Skip to content

Commit

Permalink
Remove sel_points (#3261)
Browse files Browse the repository at this point in the history
* remove .[i ]sel_points

* dangling decorator

* whatsnew
  • Loading branch information
max-sixty authored Aug 25, 2019
1 parent 3f1b879 commit 5f55d41
Show file tree
Hide file tree
Showing 6 changed files with 8 additions and 411 deletions.
8 changes: 0 additions & 8 deletions doc/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -392,14 +392,6 @@ These methods may also be applied to ``Dataset`` objects
You may find increased performance by loading your data into memory first,
e.g., with :py:meth:`~xarray.Dataset.load`.

.. note::

Vectorized indexing is a new feature in v0.10.
In older versions of xarray, dimensions of indexers are ignored.
Dedicated methods for some advanced indexing use cases,
``isel_points`` and ``sel_points`` are now deprecated.
See :ref:`more_advanced_indexing` for their alternative.

.. note::

If an indexer is a :py:meth:`~xarray.DataArray`, its coordinates should not
Expand Down
8 changes: 8 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ v0.13.0 (unreleased)
This release increases the minimum required Python version from 3.5.0 to 3.5.3
(:issue:`3089`). By `Guido Imperiale <https://github.com/crusaderky>`_.

Breaking changes
~~~~~~~~~~~~~~~~

The ``isel_points`` and ``sel_points`` methods are removed, having been deprecated
since v0.10.0. These are redundant with the ``isel`` / ``sel`` methods.
See :ref:`vectorized_indexing` for the details
By `Maximilian Roos <https://github.com/max-sixty>`_

New functions/methods
~~~~~~~~~~~~~~~~~~~~~

Expand Down
26 changes: 0 additions & 26 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1026,32 +1026,6 @@ def sel(
)
return self._from_temp_dataset(ds)

def isel_points(self, dim="points", **indexers) -> "DataArray":
"""Return a new DataArray whose data is given by pointwise integer
indexing along the specified dimension(s).
See Also
--------
Dataset.isel_points
"""
ds = self._to_temp_dataset().isel_points(dim=dim, **indexers)
return self._from_temp_dataset(ds)

def sel_points(
self, dim="points", method=None, tolerance=None, **indexers
) -> "DataArray":
"""Return a new DataArray whose dataset is given by pointwise selection
of index labels along the specified dimension(s).
See Also
--------
Dataset.sel_points
"""
ds = self._to_temp_dataset().sel_points(
dim=dim, method=method, tolerance=tolerance, **indexers
)
return self._from_temp_dataset(ds)

def broadcast_like(
self, other: Union["DataArray", Dataset], exclude: Iterable[Hashable] = None
) -> "DataArray":
Expand Down
209 changes: 0 additions & 209 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
duck_array_ops,
formatting,
groupby,
indexing,
ops,
pdcompat,
resample,
Expand Down Expand Up @@ -1999,214 +1998,6 @@ def sel(
result = self.isel(indexers=pos_indexers, drop=drop)
return result._overwrite_indexes(new_indexes)

def isel_points(self, dim: Any = "points", **indexers: Any) -> "Dataset":
"""Returns a new dataset with each array indexed pointwise along the
specified dimension(s).
This method selects pointwise values from each array and is akin to
the NumPy indexing behavior of `arr[[0, 1], [0, 1]]`, except this
method does not require knowing the order of each array's dimensions.
Parameters
----------
dim : hashable or DataArray or pandas.Index or other list-like object,
optional
Name of the dimension to concatenate along. If dim is provided as a
hashable, it must be a new dimension name, in which case it is added
along axis=0. If dim is provided as a DataArray or Index or
list-like object, its name, which must not be present in the
dataset, is used as the dimension to concatenate along and the
values are added as a coordinate.
**indexers : {dim: indexer, ...}
Keyword arguments with names matching dimensions and values given
by array-like objects. All indexers must be the same length and
1 dimensional.
Returns
-------
obj : Dataset
A new Dataset with the same contents as this dataset, except each
array and dimension is indexed by the appropriate indexers. With
pointwise indexing, the new Dataset will always be a copy of the
original.
See Also
--------
Dataset.sel
Dataset.isel
Dataset.sel_points
DataArray.isel_points
""" # noqa
warnings.warn(
"Dataset.isel_points is deprecated: use Dataset.isel()" "instead.",
DeprecationWarning,
stacklevel=2,
)

indexer_dims = set(indexers)

def take(variable, slices):
# Note: remove helper function when once when numpy
# supports vindex https://github.com/numpy/numpy/pull/6075
if hasattr(variable.data, "vindex"):
# Special case for dask backed arrays to use vectorised list
# indexing
sel = variable.data.vindex[slices]
else:
# Otherwise assume backend is numpy array with 'fancy' indexing
sel = variable.data[slices]
return sel

def relevant_keys(mapping):
return [
k for k, v in mapping.items() if any(d in indexer_dims for d in v.dims)
]

coords = relevant_keys(self.coords)
indexers = {k: np.asarray(v) for k, v in indexers.items()}
non_indexed_dims = set(self.dims) - indexer_dims
non_indexed_coords = set(self.coords) - set(coords)

# All the indexers should be iterables
# Check that indexers are valid dims, integers, and 1D
for k, v in indexers.items():
if k not in self.dims:
raise ValueError("dimension %s does not exist" % k)
if v.dtype.kind != "i": # type: ignore
raise TypeError("Indexers must be integers")
if v.ndim != 1: # type: ignore
raise ValueError("Indexers must be 1 dimensional")

# all the indexers should have the same length
lengths = {len(v) for k, v in indexers.items()}
if len(lengths) > 1:
raise ValueError("All indexers must be the same length")

# Existing dimensions are not valid choices for the dim argument
if isinstance(dim, str):
if dim in self.dims:
# dim is an invalid string
raise ValueError(
"Existing dimension names are not valid "
"choices for the dim argument in sel_points"
)

elif hasattr(dim, "dims"):
# dim is a DataArray or Coordinate
if dim.name in self.dims:
# dim already exists
raise ValueError(
"Existing dimensions are not valid choices "
"for the dim argument in sel_points"
)

# Set the new dim_name, and optionally the new dim coordinate
# dim is either an array-like or a string
if not utils.is_scalar(dim):
# dim is array like get name or assign 'points', get as variable
dim_name = "points" if not hasattr(dim, "name") else dim.name
dim_coord = as_variable(dim, name=dim_name)
else:
# dim is a string
dim_name = dim
dim_coord = None # type: ignore

reordered = self.transpose(*list(indexer_dims), *list(non_indexed_dims))

variables = OrderedDict() # type: ignore

for name, var in reordered.variables.items():
if name in indexers or any(d in indexer_dims for d in var.dims):
# slice if var is an indexer or depends on an indexed dim
slc = [indexers.get(k, slice(None)) for k in var.dims]

var_dims = [dim_name] + [d for d in var.dims if d in non_indexed_dims]
selection = take(var, tuple(slc))
var_subset = type(var)(var_dims, selection, var.attrs)
variables[name] = var_subset
else:
# If not indexed just add it back to variables or coordinates
variables[name] = var

coord_names = (set(coords) & set(variables)) | non_indexed_coords

dset = self._replace_vars_and_dims(variables, coord_names=coord_names)
# Add the dim coord to the new dset. Must be done after creation
# because_replace_vars_and_dims can only access existing coords,
# not add new ones
if dim_coord is not None:
dset.coords[dim_name] = dim_coord
return dset

def sel_points(
self,
dim: Any = "points",
method: str = None,
tolerance: Number = None,
**indexers: Any
):
"""Returns a new dataset with each array indexed pointwise by tick
labels along the specified dimension(s).
In contrast to `Dataset.isel_points`, indexers for this method should
use labels instead of integers.
In contrast to `Dataset.sel`, this method selects points along the
diagonal of multi-dimensional arrays, not the intersection.
Parameters
----------
dim : hashable or DataArray or pandas.Index or other list-like object,
optional
Name of the dimension to concatenate along. If dim is provided as a
hashable, it must be a new dimension name, in which case it is added
along axis=0. If dim is provided as a DataArray or Index or
list-like object, its name, which must not be present in the
dataset, is used as the dimension to concatenate along and the
values are added as a coordinate.
method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional
Method to use for inexact matches (requires pandas>=0.16):
* None (default): only exact matches
* pad / ffill: propagate last valid index value forward
* backfill / bfill: propagate next valid index value backward
* nearest: use nearest valid index value
tolerance : optional
Maximum distance between original and new labels for inexact
matches. The values of the index at the matching locations must
satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
Requires pandas>=0.17.
**indexers : {dim: indexer, ...}
Keyword arguments with names matching dimensions and values given
by array-like objects. All indexers must be the same length and
1 dimensional.
Returns
-------
obj : Dataset
A new Dataset with the same contents as this dataset, except each
array and dimension is indexed by the appropriate indexers. With
pointwise indexing, the new Dataset will always be a copy of the
original.
See Also
--------
Dataset.sel
Dataset.isel
Dataset.isel_points
DataArray.sel_points
""" # noqa
warnings.warn(
"Dataset.sel_points is deprecated: use Dataset.sel()" "instead.",
DeprecationWarning,
stacklevel=2,
)

pos_indexers, _ = indexing.remap_label_indexers(
self, indexers, method=method, tolerance=tolerance
)
return self.isel_points(dim=dim, **pos_indexers)

def broadcast_like(
self, other: Union["Dataset", "DataArray"], exclude: Iterable[Hashable] = None
) -> "Dataset":
Expand Down
58 changes: 0 additions & 58 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1001,64 +1001,6 @@ def test_isel_drop(self):
selected = data.isel(x=0, drop=False)
assert_identical(expected, selected)

@pytest.mark.filterwarnings("ignore:Dataset.isel_points")
def test_isel_points(self):
shape = (10, 5, 6)
np_array = np.random.random(shape)
da = DataArray(
np_array, dims=["time", "y", "x"], coords={"time": np.arange(0, 100, 10)}
)
y = [1, 3]
x = [3, 0]

expected = da.values[:, y, x]

actual = da.isel_points(y=y, x=x, dim="test_coord")
assert actual.coords["test_coord"].shape == (len(y),)
assert list(actual.coords) == ["time"]
assert actual.dims == ("test_coord", "time")

actual = da.isel_points(y=y, x=x)
assert "points" in actual.dims
# Note that because xarray always concatenates along the first
# dimension, We must transpose the result to match the numpy style of
# concatenation.
np.testing.assert_equal(actual.T, expected)

# a few corner cases
da.isel_points(time=[1, 2], x=[2, 2], y=[3, 4])
np.testing.assert_allclose(
da.isel_points(time=[1], x=[2], y=[4]).values.squeeze(),
np_array[1, 4, 2].squeeze(),
)
da.isel_points(time=[1, 2])
y = [-1, 0]
x = [-2, 2]
expected = da.values[:, y, x]
actual = da.isel_points(x=x, y=y).values
np.testing.assert_equal(actual.T, expected)

# test that the order of the indexers doesn't matter
assert_identical(da.isel_points(y=y, x=x), da.isel_points(x=x, y=y))

# make sure we're raising errors in the right places
with raises_regex(ValueError, "All indexers must be the same length"):
da.isel_points(y=[1, 2], x=[1, 2, 3])
with raises_regex(ValueError, "dimension bad_key does not exist"):
da.isel_points(bad_key=[1, 2])
with raises_regex(TypeError, "Indexers must be integers"):
da.isel_points(y=[1.5, 2.2])
with raises_regex(TypeError, "Indexers must be integers"):
da.isel_points(x=[1, 2, 3], y=slice(3))
with raises_regex(ValueError, "Indexers must be 1 dimensional"):
da.isel_points(y=1, x=2)
with raises_regex(ValueError, "Existing dimension names are not"):
da.isel_points(y=[1, 2], x=[1, 2], dim="x")

# using non string dims
actual = da.isel_points(y=[1, 2], x=[1, 2], dim=["A", "B"])
assert "points" in actual.coords

def test_loc(self):
self.ds["x"] = ("x", np.array(list("abcdefghij")))
da = self.ds["foo"]
Expand Down
Loading

0 comments on commit 5f55d41

Please sign in to comment.