Skip to content

Commit

Permalink
Fix in vectorized item assignment (#1746)
Browse files Browse the repository at this point in the history
* Fix in NumpyVindexAdapter.__setitem__ and DataArray.__setitem__

* Update what's new

* Broadcasting in setitem

* Small clean up. Revert unintended change.

* Check coordinate consistency for DataArray.__setitem__

* Only pass a dict of variables to `assert_coordinate_consistent`

* Update docs.

* still working

* Coordinate validation in .loc.__setitem__

* remove extra parenthesis

* Refactoring. remap_label_indexers is moved to coordinates.py

* Added an exception in doc for coordinate confliction

* Added a TODO for unused indexing part in Loc.__setitem__
  • Loading branch information
fujiisoup authored Dec 9, 2017
1 parent ea72303 commit 5e80189
Show file tree
Hide file tree
Showing 9 changed files with 286 additions and 53 deletions.
16 changes: 15 additions & 1 deletion doc/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ Like numpy and pandas, xarray supports indexing many array elements at once in a
If you only provide integers, slices, or unlabeled arrays (array without
dimension names, such as ``np.ndarray``, ``list``, but not
:py:meth:`~xarray.DataArray` or :py:meth:`~xarray.Variable`) indexing can be
understand as orthogonally. Each indexer component selects independently along
understood as orthogonally. Each indexer component selects independently along
the corresponding dimension, similar to how vector indexing works in Fortran or
MATLAB, or after using the :py:func:`numpy.ix_` helper:

Expand Down Expand Up @@ -357,6 +357,14 @@ These methods may and also be applied to ``Dataset`` objects
``isel_points`` and ``sel_points`` are now deprecated.
See :ref:`more_advanced_indexing` for their alternative.

.. note::

If an indexer is a :py:meth:`~xarray.DataArray`, its coordinates should not
conflict with the selected subpart of the target array (except for the
explicitly indexed dimensions with ``.loc``/``.sel``).
Otherwise, ``IndexError`` will be raised.


.. _assigning_values:

Assigning values with indexing
Expand Down Expand Up @@ -401,6 +409,11 @@ __ https://docs.scipy.org/doc/numpy/user/basics.indexing.html#assigning-values-t
Dask array does not support value assignment
(see :ref:`dask` for the details).

.. note::

Coordinates in both the left- and right-hand-side arrays should not
conflict with each other.
Otherwise, ``IndexError`` will be raised.

.. warning::

Expand Down Expand Up @@ -457,6 +470,7 @@ method:
arr.sel(space=xr.DataArray(['IA', 'IL', 'IN'], dims=['new_time']),
time=times)
.. _align and reindex:

Align and reindex
Expand Down
7 changes: 7 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ Enhancements
Bug fixes
~~~~~~~~~

- Bug fix in vectorized assignment (:issue:`1743`, `1744`).
Now item assignment to :py:meth:`~DataArray.__setitem__` checks
coordinates of target, destination and keys. If there are any conflict among
these coordinates, ``IndexError`` will be raised.
By `Keisuke Fujii <https://github.com/fujiisoup>`_.


.. _whats-new.0.10.0:

- Properly point DataArray.__dask_scheduler__ to dask.threaded.get
Expand Down
54 changes: 53 additions & 1 deletion xarray/core/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
from contextlib import contextmanager
import pandas as pd

from . import formatting
from . import formatting, indexing
from .utils import Frozen
from .merge import (
merge_coords, expand_and_merge_variables, merge_coords_for_inplace_math)
from .pycompat import OrderedDict
from .variable import Variable


class AbstractCoordinates(Mapping, formatting.ReprMixin):
Expand Down Expand Up @@ -301,3 +302,54 @@ def __getitem__(self, key):

def __unicode__(self):
return formatting.indexes_repr(self)


def assert_coordinate_consistent(obj, coords):
""" Maeke sure the dimension coordinate of obj is
consistent with coords.
obj: DataArray or Dataset
coords: Dict-like of variables
"""
for k in obj.dims:
# make sure there are no conflict in dimension coordinates
if k in coords and k in obj.coords:
if not coords[k].equals(obj[k].variable):
raise IndexError(
'dimension coordinate {!r} conflicts between '
'indexed and indexing objects:\n{}\nvs.\n{}'
.format(k, obj[k], coords[k]))


def remap_label_indexers(obj, method=None, tolerance=None, **indexers):
"""
Remap **indexers from obj.coords.
If indexer is an instance of DataArray and it has coordinate, then this
coordinate will be attached to pos_indexers.
Returns
-------
pos_indexers: Same type of indexers.
np.ndarray or Variable or DataArra
new_indexes: mapping of new dimensional-coordinate.
"""
from .dataarray import DataArray

v_indexers = {k: v.variable.data if isinstance(v, DataArray) else v
for k, v in indexers.items()}

pos_indexers, new_indexes = indexing.remap_label_indexers(
obj, v_indexers, method=method, tolerance=tolerance
)
# attach indexer's coordinate to pos_indexers
for k, v in indexers.items():
if isinstance(v, Variable):
pos_indexers[k] = Variable(v.dims, pos_indexers[k])
elif isinstance(v, DataArray):
# drop coordinates found in indexers since .sel() already
# ensures alignments
coords = OrderedDict((k, v) for k, v in v._coords.items()
if k not in indexers)
pos_indexers[k] = DataArray(pos_indexers[k],
coords=coords, dims=v.dims)
return pos_indexers, new_indexes
23 changes: 15 additions & 8 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
from .alignment import align, reindex_like_indexers
from .common import AbstractArray, BaseDataObject
from .coordinates import (DataArrayCoordinates, LevelCoordinatesSource,
Indexes)
Indexes, assert_coordinate_consistent,
remap_label_indexers)
from .dataset import Dataset, merge_indexes, split_indexes
from .pycompat import iteritems, basestring, OrderedDict, zip, range
from .variable import (as_variable, Variable, as_compatible_data,
Expand Down Expand Up @@ -102,22 +103,20 @@ class _LocIndexer(object):
def __init__(self, data_array):
self.data_array = data_array

def _remap_key(self, key):
def __getitem__(self, key):
if not utils.is_dict_like(key):
# expand the indexer so we can handle Ellipsis
labels = indexing.expanded_indexer(key, self.data_array.ndim)
key = dict(zip(self.data_array.dims, labels))
return indexing.remap_label_indexers(self.data_array, key)
return self.data_array.sel(**key)

def __getitem__(self, key):
def __setitem__(self, key, value):
if not utils.is_dict_like(key):
# expand the indexer so we can handle Ellipsis
labels = indexing.expanded_indexer(key, self.data_array.ndim)
key = dict(zip(self.data_array.dims, labels))
return self.data_array.sel(**key)

def __setitem__(self, key, value):
pos_indexers, _ = self._remap_key(key)
pos_indexers, _ = remap_label_indexers(self.data_array, **key)
self.data_array[pos_indexers] = value


Expand Down Expand Up @@ -484,7 +483,15 @@ def __setitem__(self, key, value):
if isinstance(key, basestring):
self.coords[key] = value
else:
# xarray-style array indexing
# Coordinates in key, value and self[key] should be consistent.
# TODO Coordinate consistency in key is checked here, but it
# causes unnecessary indexing. It should be optimized.
obj = self[key]
if isinstance(value, DataArray):
assert_coordinate_consistent(value, obj.coords.variables)
# DataArray key -> Variable key
key = {k: v.variable if isinstance(v, DataArray) else v
for k, v in self._item_key_to_dict(key).items()}
self.variable[key] = value

def __delitem__(self, key):
Expand Down
34 changes: 5 additions & 29 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
from . import duck_array_ops
from .. import conventions
from .alignment import align
from .coordinates import DatasetCoordinates, LevelCoordinatesSource, Indexes
from .coordinates import (DatasetCoordinates, LevelCoordinatesSource, Indexes,
assert_coordinate_consistent, remap_label_indexers)
from .common import ImplementsDatasetReduce, BaseDataObject
from .dtypes import is_datetime_like
from .merge import (dataset_update_method, dataset_merge_method,
Expand Down Expand Up @@ -1305,15 +1306,7 @@ def _get_indexers_coordinates(self, indexers):
# we don't need to call align() explicitly, because merge_variables
# already checks for exact alignment between dimension coordinates
coords = merge_variables(coord_list)

for k in self.dims:
# make sure there are not conflict in dimension coordinates
if (k in coords and k in self._variables and
not coords[k].equals(self._variables[k])):
raise IndexError(
'dimension coordinate {!r} conflicts between '
'indexed and indexing objects:\n{}\nvs.\n{}'
.format(k, self._variables[k], coords[k]))
assert_coordinate_consistent(self, coords)

attached_coords = OrderedDict()
for k, v in coords.items(): # silently drop the conflicted variables.
Expand Down Expand Up @@ -1437,25 +1430,8 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers):
Dataset.isel
DataArray.sel
"""
from .dataarray import DataArray

v_indexers = {k: v.variable.data if isinstance(v, DataArray) else v
for k, v in indexers.items()}

pos_indexers, new_indexes = indexing.remap_label_indexers(
self, v_indexers, method=method, tolerance=tolerance
)
# attach indexer's coordinate to pos_indexers
for k, v in indexers.items():
if isinstance(v, Variable):
pos_indexers[k] = Variable(v.dims, pos_indexers[k])
elif isinstance(v, DataArray):
# drop coordinates found in indexers since .sel() already
# ensures alignments
coords = OrderedDict((k, v) for k, v in v._coords.items()
if k not in indexers)
pos_indexers[k] = DataArray(pos_indexers[k],
coords=coords, dims=v.dims)
pos_indexers, new_indexes = remap_label_indexers(self, method,
tolerance, **indexers)
result = self.isel(drop=drop, **pos_indexers)
return result._replace_indexes(new_indexes)

Expand Down
15 changes: 10 additions & 5 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,17 +637,22 @@ def __setitem__(self, key, value):
"""
dims, index_tuple, new_order = self._broadcast_indexes(key)

if isinstance(value, Variable):
value = value.set_dims(dims).data

if new_order:
value = duck_array_ops.asarray(value)
if not isinstance(value, Variable):
value = as_compatible_data(value)
if value.ndim > len(dims):
raise ValueError(
'shape mismatch: value array of shape %s could not be'
'broadcast to indexing result with %s dimensions'
% (value.shape, len(dims)))
if value.ndim == 0:
value = Variable((), value)
else:
value = Variable(dims[-value.ndim:], value)
# broadcast to become assignable
value = value.set_dims(dims).data

if new_order:
value = duck_array_ops.asarray(value)
value = value[(len(dims) - value.ndim) * (np.newaxis,) +
(Ellipsis,)]
value = np.moveaxis(value, new_order, range(len(new_order)))
Expand Down
Loading

0 comments on commit 5e80189

Please sign in to comment.