diff --git a/doc/indexing.rst b/doc/indexing.rst index 74aef454c8d..6b01471ecfb 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -277,7 +277,7 @@ Like numpy and pandas, xarray supports indexing many array elements at once in a If you only provide integers, slices, or unlabeled arrays (array without dimension names, such as ``np.ndarray``, ``list``, but not :py:meth:`~xarray.DataArray` or :py:meth:`~xarray.Variable`) indexing can be -understand as orthogonally. Each indexer component selects independently along +understood as orthogonally. Each indexer component selects independently along the corresponding dimension, similar to how vector indexing works in Fortran or MATLAB, or after using the :py:func:`numpy.ix_` helper: @@ -357,6 +357,14 @@ These methods may and also be applied to ``Dataset`` objects ``isel_points`` and ``sel_points`` are now deprecated. See :ref:`more_advanced_indexing` for their alternative. +.. note:: + + If an indexer is a :py:meth:`~xarray.DataArray`, its coordinates should not + conflict with the selected subpart of the target array (except for the + explicitly indexed dimensions with ``.loc``/``.sel``). + Otherwise, ``IndexError`` will be raised. + + .. _assigning_values: Assigning values with indexing @@ -401,6 +409,11 @@ __ https://docs.scipy.org/doc/numpy/user/basics.indexing.html#assigning-values-t Dask array does not support value assignment (see :ref:`dask` for the details). +.. note:: + + Coordinates in both the left- and right-hand-side arrays should not + conflict with each other. + Otherwise, ``IndexError`` will be raised. .. warning:: @@ -457,6 +470,7 @@ method: arr.sel(space=xr.DataArray(['IA', 'IL', 'IN'], dims=['new_time']), time=times) + .. _align and reindex: Align and reindex diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 86b891654ef..12c479ed339 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,6 +33,13 @@ Enhancements Bug fixes ~~~~~~~~~ +- Bug fix in vectorized assignment (:issue:`1743`, `1744`). + Now item assignment to :py:meth:`~DataArray.__setitem__` checks + coordinates of target, destination and keys. If there are any conflict among + these coordinates, ``IndexError`` will be raised. + By `Keisuke Fujii `_. + + .. _whats-new.0.10.0: - Properly point DataArray.__dask_scheduler__ to dask.threaded.get diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 473611806b5..6b83577a90a 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -5,11 +5,12 @@ from contextlib import contextmanager import pandas as pd -from . import formatting +from . import formatting, indexing from .utils import Frozen from .merge import ( merge_coords, expand_and_merge_variables, merge_coords_for_inplace_math) from .pycompat import OrderedDict +from .variable import Variable class AbstractCoordinates(Mapping, formatting.ReprMixin): @@ -301,3 +302,54 @@ def __getitem__(self, key): def __unicode__(self): return formatting.indexes_repr(self) + + +def assert_coordinate_consistent(obj, coords): + """ Maeke sure the dimension coordinate of obj is + consistent with coords. + + obj: DataArray or Dataset + coords: Dict-like of variables + """ + for k in obj.dims: + # make sure there are no conflict in dimension coordinates + if k in coords and k in obj.coords: + if not coords[k].equals(obj[k].variable): + raise IndexError( + 'dimension coordinate {!r} conflicts between ' + 'indexed and indexing objects:\n{}\nvs.\n{}' + .format(k, obj[k], coords[k])) + + +def remap_label_indexers(obj, method=None, tolerance=None, **indexers): + """ + Remap **indexers from obj.coords. + If indexer is an instance of DataArray and it has coordinate, then this + coordinate will be attached to pos_indexers. + + Returns + ------- + pos_indexers: Same type of indexers. + np.ndarray or Variable or DataArra + new_indexes: mapping of new dimensional-coordinate. + """ + from .dataarray import DataArray + + v_indexers = {k: v.variable.data if isinstance(v, DataArray) else v + for k, v in indexers.items()} + + pos_indexers, new_indexes = indexing.remap_label_indexers( + obj, v_indexers, method=method, tolerance=tolerance + ) + # attach indexer's coordinate to pos_indexers + for k, v in indexers.items(): + if isinstance(v, Variable): + pos_indexers[k] = Variable(v.dims, pos_indexers[k]) + elif isinstance(v, DataArray): + # drop coordinates found in indexers since .sel() already + # ensures alignments + coords = OrderedDict((k, v) for k, v in v._coords.items() + if k not in indexers) + pos_indexers[k] = DataArray(pos_indexers[k], + coords=coords, dims=v.dims) + return pos_indexers, new_indexes diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 263860dbb74..4ab5136d071 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -20,7 +20,8 @@ from .alignment import align, reindex_like_indexers from .common import AbstractArray, BaseDataObject from .coordinates import (DataArrayCoordinates, LevelCoordinatesSource, - Indexes) + Indexes, assert_coordinate_consistent, + remap_label_indexers) from .dataset import Dataset, merge_indexes, split_indexes from .pycompat import iteritems, basestring, OrderedDict, zip, range from .variable import (as_variable, Variable, as_compatible_data, @@ -102,22 +103,20 @@ class _LocIndexer(object): def __init__(self, data_array): self.data_array = data_array - def _remap_key(self, key): + def __getitem__(self, key): if not utils.is_dict_like(key): # expand the indexer so we can handle Ellipsis labels = indexing.expanded_indexer(key, self.data_array.ndim) key = dict(zip(self.data_array.dims, labels)) - return indexing.remap_label_indexers(self.data_array, key) + return self.data_array.sel(**key) - def __getitem__(self, key): + def __setitem__(self, key, value): if not utils.is_dict_like(key): # expand the indexer so we can handle Ellipsis labels = indexing.expanded_indexer(key, self.data_array.ndim) key = dict(zip(self.data_array.dims, labels)) - return self.data_array.sel(**key) - def __setitem__(self, key, value): - pos_indexers, _ = self._remap_key(key) + pos_indexers, _ = remap_label_indexers(self.data_array, **key) self.data_array[pos_indexers] = value @@ -484,7 +483,15 @@ def __setitem__(self, key, value): if isinstance(key, basestring): self.coords[key] = value else: - # xarray-style array indexing + # Coordinates in key, value and self[key] should be consistent. + # TODO Coordinate consistency in key is checked here, but it + # causes unnecessary indexing. It should be optimized. + obj = self[key] + if isinstance(value, DataArray): + assert_coordinate_consistent(value, obj.coords.variables) + # DataArray key -> Variable key + key = {k: v.variable if isinstance(v, DataArray) else v + for k, v in self._item_key_to_dict(key).items()} self.variable[key] = value def __delitem__(self, key): diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 9c99213dc23..681390f8504 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -23,7 +23,8 @@ from . import duck_array_ops from .. import conventions from .alignment import align -from .coordinates import DatasetCoordinates, LevelCoordinatesSource, Indexes +from .coordinates import (DatasetCoordinates, LevelCoordinatesSource, Indexes, + assert_coordinate_consistent, remap_label_indexers) from .common import ImplementsDatasetReduce, BaseDataObject from .dtypes import is_datetime_like from .merge import (dataset_update_method, dataset_merge_method, @@ -1305,15 +1306,7 @@ def _get_indexers_coordinates(self, indexers): # we don't need to call align() explicitly, because merge_variables # already checks for exact alignment between dimension coordinates coords = merge_variables(coord_list) - - for k in self.dims: - # make sure there are not conflict in dimension coordinates - if (k in coords and k in self._variables and - not coords[k].equals(self._variables[k])): - raise IndexError( - 'dimension coordinate {!r} conflicts between ' - 'indexed and indexing objects:\n{}\nvs.\n{}' - .format(k, self._variables[k], coords[k])) + assert_coordinate_consistent(self, coords) attached_coords = OrderedDict() for k, v in coords.items(): # silently drop the conflicted variables. @@ -1437,25 +1430,8 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers): Dataset.isel DataArray.sel """ - from .dataarray import DataArray - - v_indexers = {k: v.variable.data if isinstance(v, DataArray) else v - for k, v in indexers.items()} - - pos_indexers, new_indexes = indexing.remap_label_indexers( - self, v_indexers, method=method, tolerance=tolerance - ) - # attach indexer's coordinate to pos_indexers - for k, v in indexers.items(): - if isinstance(v, Variable): - pos_indexers[k] = Variable(v.dims, pos_indexers[k]) - elif isinstance(v, DataArray): - # drop coordinates found in indexers since .sel() already - # ensures alignments - coords = OrderedDict((k, v) for k, v in v._coords.items() - if k not in indexers) - pos_indexers[k] = DataArray(pos_indexers[k], - coords=coords, dims=v.dims) + pos_indexers, new_indexes = remap_label_indexers(self, method, + tolerance, **indexers) result = self.isel(drop=drop, **pos_indexers) return result._replace_indexes(new_indexes) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 53b4bf60c5c..5c1a2bbe1a7 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -637,17 +637,22 @@ def __setitem__(self, key, value): """ dims, index_tuple, new_order = self._broadcast_indexes(key) - if isinstance(value, Variable): - value = value.set_dims(dims).data - - if new_order: - value = duck_array_ops.asarray(value) + if not isinstance(value, Variable): + value = as_compatible_data(value) if value.ndim > len(dims): raise ValueError( 'shape mismatch: value array of shape %s could not be' 'broadcast to indexing result with %s dimensions' % (value.shape, len(dims))) + if value.ndim == 0: + value = Variable((), value) + else: + value = Variable(dims[-value.ndim:], value) + # broadcast to become assignable + value = value.set_dims(dims).data + if new_order: + value = duck_array_ops.asarray(value) value = value[(len(dims) - value.ndim) * (np.newaxis,) + (Ellipsis,)] value = np.moveaxis(value, new_order, range(len(new_order))) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index c86f706f2ce..2a14742c948 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -526,6 +526,93 @@ def test_setitem(self): expected[t] = 1 self.assertArrayEqual(orig.values, expected) + def test_setitem_fancy(self): + # vectorized indexing + da = DataArray(np.ones((3, 2)), dims=['x', 'y']) + ind = Variable(['a'], [0, 1]) + da[dict(x=ind, y=ind)] = 0 + expected = DataArray([[0, 1], [1, 0], [1, 1]], dims=['x', 'y']) + self.assertDataArrayIdentical(expected, da) + # assign another 0d-variable + da[dict(x=ind, y=ind)] = Variable((), 0) + expected = DataArray([[0, 1], [1, 0], [1, 1]], dims=['x', 'y']) + self.assertDataArrayIdentical(expected, da) + # assign another 1d-variable + da[dict(x=ind, y=ind)] = Variable(['a'], [2, 3]) + expected = DataArray([[2, 1], [1, 3], [1, 1]], dims=['x', 'y']) + self.assertVariableIdentical(expected, da) + + # 2d-vectorized indexing + da = DataArray(np.ones((3, 2)), dims=['x', 'y']) + ind_x = DataArray([[0, 1]], dims=['a', 'b']) + ind_y = DataArray([[1, 0]], dims=['a', 'b']) + da[dict(x=ind_x, y=ind_y)] = 0 + expected = DataArray([[1, 0], [0, 1], [1, 1]], dims=['x', 'y']) + self.assertVariableIdentical(expected, da) + + da = DataArray(np.ones((3, 2)), dims=['x', 'y']) + ind = Variable(['a'], [0, 1]) + da[ind] = 0 + expected = DataArray([[0, 0], [0, 0], [1, 1]], dims=['x', 'y']) + self.assertVariableIdentical(expected, da) + + def test_setitem_dataarray(self): + def get_data(): + return DataArray(np.ones((4, 3, 2)), dims=['x', 'y', 'z'], + coords={'x': np.arange(4), 'y': ['a', 'b', 'c'], + 'non-dim': ('x', [1, 3, 4, 2])}) + + da = get_data() + # indexer with inconsistent coordinates. + ind = DataArray(np.arange(1, 4), dims=['x'], + coords={'x': np.random.randn(3)}) + with raises_regex(IndexError, "dimension coordinate 'x'"): + da[dict(x=ind)] = 0 + + # indexer with consistent coordinates. + ind = DataArray(np.arange(1, 4), dims=['x'], + coords={'x': np.arange(1, 4)}) + da[dict(x=ind)] = 0 # should not raise + assert np.allclose(da[dict(x=ind)].values, 0) + self.assertDataArrayIdentical(da['x'], get_data()['x']) + self.assertDataArrayIdentical(da['non-dim'], get_data()['non-dim']) + + da = get_data() + # conflict in the assigning values + value = xr.DataArray(np.zeros((3, 3, 2)), dims=['x', 'y', 'z'], + coords={'x': [0, 1, 2], + 'non-dim': ('x', [0, 2, 4])}) + with raises_regex(IndexError, "dimension coordinate 'x'"): + da[dict(x=ind)] = value + + # consistent coordinate in the assigning values + value = xr.DataArray(np.zeros((3, 3, 2)), dims=['x', 'y', 'z'], + coords={'x': [1, 2, 3], + 'non-dim': ('x', [0, 2, 4])}) + da[dict(x=ind)] = value + assert np.allclose(da[dict(x=ind)].values, 0) + self.assertDataArrayIdentical(da['x'], get_data()['x']) + self.assertDataArrayIdentical(da['non-dim'], get_data()['non-dim']) + + # Conflict in the non-dimension coordinate + value = xr.DataArray(np.zeros((3, 3, 2)), dims=['x', 'y', 'z'], + coords={'x': [1, 2, 3], + 'non-dim': ('x', [0, 2, 4])}) + da[dict(x=ind)] = value # should not raise + + # conflict in the assigning values + value = xr.DataArray(np.zeros((3, 3, 2)), dims=['x', 'y', 'z'], + coords={'x': [0, 1, 2], + 'non-dim': ('x', [0, 2, 4])}) + with raises_regex(IndexError, "dimension coordinate 'x'"): + da[dict(x=ind)] = value + + # consistent coordinate in the assigning values + value = xr.DataArray(np.zeros((3, 3, 2)), dims=['x', 'y', 'z'], + coords={'x': [1, 2, 3], + 'non-dim': ('x', [0, 2, 4])}) + da[dict(x=ind)] = value # should not raise + def test_contains(self): data_array = DataArray(1, coords={'x': 2}) with pytest.warns(FutureWarning): @@ -837,6 +924,44 @@ def test_loc_assign(self): self.assertTrue(np.all(da.values[0] == np.zeros(4))) assert da.values[1, 0] != 0 + def test_loc_assign_dataarray(self): + def get_data(): + return DataArray(np.ones((4, 3, 2)), dims=['x', 'y', 'z'], + coords={'x': np.arange(4), 'y': ['a', 'b', 'c'], + 'non-dim': ('x', [1, 3, 4, 2])}) + + da = get_data() + # indexer with inconsistent coordinates. + ind = DataArray(np.arange(1, 4), dims=['y'], + coords={'y': np.random.randn(3)}) + with raises_regex(IndexError, "dimension coordinate 'y'"): + da.loc[dict(x=ind)] = 0 + + # indexer with consistent coordinates. + ind = DataArray(np.arange(1, 4), dims=['x'], + coords={'x': np.arange(1, 4)}) + da.loc[dict(x=ind)] = 0 # should not raise + assert np.allclose(da[dict(x=ind)].values, 0) + self.assertDataArrayIdentical(da['x'], get_data()['x']) + self.assertDataArrayIdentical(da['non-dim'], get_data()['non-dim']) + + da = get_data() + # conflict in the assigning values + value = xr.DataArray(np.zeros((3, 3, 2)), dims=['x', 'y', 'z'], + coords={'x': [0, 1, 2], + 'non-dim': ('x', [0, 2, 4])}) + with raises_regex(IndexError, "dimension coordinate 'x'"): + da.loc[dict(x=ind)] = value + + # consistent coordinate in the assigning values + value = xr.DataArray(np.zeros((3, 3, 2)), dims=['x', 'y', 'z'], + coords={'x': [1, 2, 3], + 'non-dim': ('x', [0, 2, 4])}) + da.loc[dict(x=ind)] = value + assert np.allclose(da[dict(x=ind)].values, 0) + self.assertDataArrayIdentical(da['x'], get_data()['x']) + self.assertDataArrayIdentical(da['non-dim'], get_data()['non-dim']) + def test_loc_single_boolean(self): data = DataArray([0, 1], coords=[[True, False]]) self.assertEqual(data.loc[True], 0) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 192624b64f3..d0b6ed55f45 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1001,15 +1001,12 @@ def test_isel_dataarray(self): # Conflict in the dimension coordinate indexing_da = DataArray(np.arange(1, 4), dims=['dim2'], coords={'dim2': np.random.randn(3)}) - with raises_regex( - IndexError, "dimension coordinate 'dim2'"): + with raises_regex(IndexError, "dimension coordinate 'dim2'"): actual = data.isel(dim2=indexing_da) # Also the case for DataArray - with raises_regex( - IndexError, "dimension coordinate 'dim2'"): + with raises_regex(IndexError, "dimension coordinate 'dim2'"): actual = data['var2'].isel(dim2=indexing_da) - with raises_regex( - IndexError, "dimension coordinate 'dim2'"): + with raises_regex(IndexError, "dimension coordinate 'dim2'"): data['dim2'].isel(dim2=indexing_da) # same name coordinate which does not conflict @@ -1502,7 +1499,7 @@ def test_reindex_like(self): expected = data.copy(deep=True) expected['dim3'] = ('dim3', list('cdefghijkl')) - expected['var3'][:-2] = expected['var3'][2:] + expected['var3'][:-2] = expected['var3'][2:].values expected['var3'][-2:] = np.nan expected['letters'] = expected['letters'].astype(object) expected['letters'][-2:] = np.nan @@ -1614,9 +1611,9 @@ def test_align(self): left = create_test_data() right = left.copy(deep=True) right['dim3'] = ('dim3', list('cdefghijkl')) - right['var3'][:-2] = right['var3'][2:] + right['var3'][:-2] = right['var3'][2:].values right['var3'][-2:] = np.random.randn(*right['var3'][-2:].shape) - right['numbers'][:-2] = right['numbers'][2:] + right['numbers'][:-2] = right['numbers'][2:].values right['numbers'][-2:] = -10 intersection = list('cdefghij') diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index bdaee5edb7a..2e5446bcafc 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1440,6 +1440,56 @@ def test_setitem(self): v[dict(x=[True, False], y=[False, True, False])] = 1 self.assertTrue(v[0, 1] == 1) + def test_setitem_fancy(self): + # assignment which should work as np.ndarray does + def assert_assigned_2d(array, key_x, key_y, values): + expected = array.copy() + expected[key_x, key_y] = values + v = Variable(['x', 'y'], array) + v[dict(x=key_x, y=key_y)] = values + self.assertArrayEqual(expected, v) + + # 1d vectorized indexing + assert_assigned_2d(np.random.randn(4, 3), + key_x=Variable(['a'], [0, 1]), + key_y=Variable(['a'], [0, 1]), + values=0) + assert_assigned_2d(np.random.randn(4, 3), + key_x=Variable(['a'], [0, 1]), + key_y=Variable(['a'], [0, 1]), + values=Variable((), 0)) + assert_assigned_2d(np.random.randn(4, 3), + key_x=Variable(['a'], [0, 1]), + key_y=Variable(['a'], [0, 1]), + values=Variable(('a'), [3, 2])) + assert_assigned_2d(np.random.randn(4, 3), + key_x=slice(None), + key_y=Variable(['a'], [0, 1]), + values=Variable(('a'), [3, 2])) + + # 2d-vectorized indexing + assert_assigned_2d(np.random.randn(4, 3), + key_x=Variable(['a', 'b'], [[0, 1]]), + key_y=Variable(['a', 'b'], [[1, 0]]), + values=0) + assert_assigned_2d(np.random.randn(4, 3), + key_x=Variable(['a', 'b'], [[0, 1]]), + key_y=Variable(['a', 'b'], [[1, 0]]), + values=[0]) + assert_assigned_2d(np.random.randn(5, 4), + key_x=Variable(['a', 'b'], [[0, 1], [2, 3]]), + key_y=Variable(['a', 'b'], [[1, 0], [3, 3]]), + values=[2, 3]) + + # vindex with slice + v = Variable(['x', 'y', 'z'], np.ones((4, 3, 2))) + ind = Variable(['a'], [0, 1]) + v[dict(x=ind, z=ind)] = 0 + expected = Variable(['x', 'y', 'z'], np.ones((4, 3, 2))) + expected[0, :, 0] = 0 + expected[1, :, 1] = 0 + self.assertVariableIdentical(expected, v) + # dimension broadcast v = Variable(['x', 'y'], np.ones((3, 2))) ind = Variable(['a', 'b'], [[0, 1]])