pydata · shoyer · Jul 7, 2017 · Jul 10, 2017 · Jul 12, 2017 · Jul 15, 2017
diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
@@ -371,7 +371,7 @@ def shape(self):
         return tuple(shape)
 
     def __array__(self, dtype=None):
-        array = orthogonally_indexable(self.array)
+        array = broadcasted_indexable(self.array)
         return np.asarray(array[self.key], dtype=None)
 
     def __getitem__(self, key):
@@ -434,7 +434,7 @@ def __setitem__(self, key, value):
         self.array[key] = value
 
 
-def orthogonally_indexable(array):
+def broadcasted_indexable(array):
     if isinstance(array, np.ndarray):
         return NumpyIndexingAdapter(array)
     if isinstance(array, pd.Index):
@@ -445,24 +445,10 @@ def orthogonally_indexable(array):
 
 
 class NumpyIndexingAdapter(utils.NDArrayMixin):
-    """Wrap a NumPy array to use orthogonal indexing (array indexing
-    accesses different dimensions independently, like netCDF4-python variables)
+    """Wrap a NumPy array to use broadcasted indexing
     """
-    # note: this object is somewhat similar to biggus.NumpyArrayAdapter in that
-    # it implements orthogonal indexing, except it casts to a numpy array,
-    # isn't lazy and supports writing values.
     def __init__(self, array):
-        self.array = np.asarray(array)
-
-    def __array__(self, dtype=None):
-        return np.asarray(self.array, dtype=dtype)
-
-    def _convert_key(self, key):
-        key = expanded_indexer(key, self.ndim)
-        if any(not isinstance(k, integer_types + (slice,)) for k in key):
-            # key would trigger fancy indexing
-            key = orthogonal_indexer(key, self.shape)
-        return key
+        self.array = array
 
     def _ensure_ndarray(self, value):
         # We always want the result of indexing to be a NumPy array. If it's
@@ -474,29 +460,37 @@ def _ensure_ndarray(self, value):
         return value
 
     def __getitem__(self, key):
-        key = self._convert_key(key)
         return self._ensure_ndarray(self.array[key])
 
     def __setitem__(self, key, value):
-        key = self._convert_key(key)
         self.array[key] = value
 
 
 class DaskIndexingAdapter(utils.NDArrayMixin):
-    """Wrap a dask array to support orthogonal indexing
+    """Wrap a dask array to support broadcasted-indexing.
     """
     def __init__(self, array):
         self.array = array
 
     def __getitem__(self, key):
-        key = expanded_indexer(key, self.ndim)
-        if any(not isinstance(k, integer_types + (slice,)) for k in key):
+        """ key: tuple of Variable, slice, integer """
+        # basic or orthogonal indexing
+        if all(isinstance(k, (integer_types, slice)) or k.squeeze().ndim <= 1
+               for k in key):
             value = self.array
             for axis, subkey in reversed(list(enumerate(key))):
+                if hasattr(subkey, 'squeeze'):
+                    subkey = subkey.squeeze()
+                    if subkey.ndim == 0:  # make at least 1-d array
+                        subkey = subkey.flatten()
                 value = value[(slice(None),) * axis + (subkey,)]
+            return value
         else:
-            value = self.array[key]
-        return value
+            # TODO Dask does not support nd-array indexing.
+            # flatten() -> .vindex[] -> reshape() should be used
+            # instead of `.load()`
+            value = np.asarray(self.array)[key]
+            return value
 
 
 class PandasIndexAdapter(utils.NDArrayMixin):

diff --git a/xarray/core/variable.py b/xarray/core/variable.py
@@ -18,7 +18,8 @@
 from . import utils
 from .pycompat import (basestring, OrderedDict, zip, integer_types,
                        dask_array_type)
-from .indexing import (PandasIndexAdapter, orthogonally_indexable)
+from .indexing import (DaskIndexingAdapter, PandasIndexAdapter,
+                       broadcasted_indexable)
 
 import xarray as xr  # only for Dataset and DataArray
 
@@ -297,7 +298,7 @@ def data(self, data):
 
     @property
     def _indexable_data(self):
-        return orthogonally_indexable(self._data)
+        return broadcasted_indexable(self._data)
 
     def load(self):
         """Manually trigger loading of this variable's data from disk or a
@@ -376,29 +377,89 @@ def _item_key_to_tuple(self, key):
         else:
             return key
 
+    def _broadcast_indexes(self, key):
+        """
+        Parameters
+        -----------
+        key: One of
+            array
+            a mapping of dimension names to index.
+
+        Returns
+        -------
+        dims: Tuple of strings.
+            Dimension of the resultant variable.
+        indexers: list of integer, array-like, or slice. This is aligned
+            along self.dims.
+        """
+        key = self._item_key_to_tuple(key)  # key is a tuple
+        # key is a tuple of full size
+        key = indexing.expanded_indexer(key, self.ndim)
+        basic_indexing_types = integer_types + (slice,)
+        if all([isinstance(k, basic_indexing_types) for k in key]):
+            return self._broadcast_indexes_basic(key)
+        else:
+            return self._broadcast_indexes_advanced(key)
+
+    def _broadcast_indexes_basic(self, key):
+        dims = tuple(dim for k, dim in zip(key, self.dims)
+                     if not isinstance(k, integer_types))
+        return dims, key
+
+    def nonzero(self):
+        """ Equivalent numpy's nonzero but returns a tuple of Varibles. """
+        if isinstance(self._data, (np.ndarray, pd.Index, PandasIndexAdapter)):
+            nonzeros = np.nonzero(self._data)
+        elif isinstance(self._data, dask_array_type):
+            # TODO we should replace dask's native nonzero
+            # after https://github.com/dask/dask/issues/1076 is implemented.
+            nonzeros = np.nonzero(self.load()._data)
+
+        return tuple([as_variable(nz, name=dim) for nz, dim
+                      in zip(nonzeros, self.dims)])
+
+    def _isbool_type(self):
+        """ Return if the variabe is bool or not """
+        if isinstance(self._data, (np.ndarray, PandasIndexAdapter, pd.Index)):
+            return self._data.dtype is np.dtype('bool')
+        elif isinstance(self._data, dask_array_type):
+            raise NotImplementedError
+
+    def _broadcast_indexes_advanced(self, key):
+        variables = []
+
+        for dim, value in zip(self.dims, key):
+            if isinstance(value, slice):
+                value = np.arange(self.sizes[dim])[value]
+
+            try:  # TODO we need our own Exception.
+                variable = as_variable(value, name=dim)
+            except ValueError as e:
+                if "cannot set variable" in str(e):
+                    raise IndexError("Unlabelled multi-dimensional array "
+                                     "cannot be used for indexing.")
+                else:
+                    raise e
+            if variable._isbool_type():  # boolean indexing case
+                variables.extend(list(variable.nonzero()))
+            else:
+                variables.append(variable)
+        variables = _broadcast_compat_variables(*variables)
+        dims = variables[0].dims  # all variables have the same dims
+        key = tuple(variable.data for variable in variables)
+        return dims, key
+
     def __getitem__(self, key):
         """Return a new Array object whose contents are consistent with
         getting the provided key from the underlying data.
 
-        NB. __getitem__ and __setitem__ implement "orthogonal indexing" like
-        netCDF4-python, where the key can only include integers, slices
-        (including `Ellipsis`) and 1d arrays, each of which are applied
-        orthogonally along their respective dimensions.
+        NB. __getitem__ and __setitem__ implement "diagonal indexing" like
+        np.ndarray.
 
-        The difference does not matter in most cases unless you are using
-        numpy's "fancy indexing," which can otherwise result in data arrays
-        whose shapes is inconsistent (or just uninterpretable with) with the
-        variable's dimensions.
-
-        If you really want to do indexing like `x[x > 0]`, manipulate the numpy
-        array `x.values` directly.
+        This method will replace __getitem__ after we make sure its stability.
         """
-        key = self._item_key_to_tuple(key)
-        key = indexing.expanded_indexer(key, self.ndim)
-        dims = tuple(dim for k, dim in zip(key, self.dims)
-                     if not isinstance(k, integer_types))
-        values = self._indexable_data[key]
-        # orthogonal indexing should ensure the dimensionality is consistent
+        dims, index_tuple = self._broadcast_indexes(key)
+        values = self._indexable_data[index_tuple]
         if hasattr(values, 'ndim'):
             assert values.ndim == len(dims), (values.ndim, len(dims))
         else:
@@ -412,15 +473,15 @@ def __setitem__(self, key, value):
 
         See __getitem__ for more details.
         """
-        key = self._item_key_to_tuple(key)
+        dims, index_tuple = self._broadcast_indexes(key)
         if isinstance(self._data, dask_array_type):
             raise TypeError("this variable's data is stored in a dask array, "
                             'which does not support item assignment. To '
                             'assign to this variable, you must first load it '
                             'into memory explicitly using the .load_data() '
                             'method or accessing its .values attribute.')
-        data = orthogonally_indexable(self._data)
-        data[key] = value
+        data = broadcasted_indexable(self._data)
+        data[index_tuple] = value
 
     @property
     def attrs(self):