Merge remote-tracking branch 'upstream/master' into chunk-unique-token

* upstream/master: Fix map_blocks HLG layering (pydata#3598) Silence sphinx warnings: Round 2 (pydata#3592) 2x~5x speed up for isel() in most cases (pydata#3533) remove xarray again (pydata#3591) fix plotting with transposed nondim coords. (pydata#3441) make coarsen reductions consistent with reductions on other classes (pydata#3500) Resolve the version issues on RTD (pydata#3589) Add bottleneck & rasterio git tip to upstream-dev CI (pydata#3585)
dcherian · Dec 7, 2019 · 3038caa · 3038caa
2 parents f0d73e1 + cafcaee
commit 3038caa
Show file tree

Hide file tree

Showing 25 changed files with 294 additions and 98 deletions.
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -110,5 +110,5 @@ jobs:
   - bash: |
       source activate xarray-tests
       cd doc
-      sphinx-build -n -j auto -b html -d _build/doctrees . _build/html
+      sphinx-build -W --keep-going -j auto -b html -d _build/doctrees . _build/html
     displayName: Build HTML docs
diff --git a/ci/azure/install.yml b/ci/azure/install.yml
@@ -25,7 +25,9 @@ steps:
         git+https://github.com/dask/dask \
         git+https://github.com/dask/distributed \
         git+https://github.com/zarr-developers/zarr \
-        git+https://github.com/Unidata/cftime
+        git+https://github.com/Unidata/cftime \
+        git+https://github.com/mapbox/rasterio \
+        git+https://github.com/pydata/bottleneck
   condition: eq(variables['UPSTREAM_DEV'], 'true')
   displayName: Install upstream dev dependencies
 

diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml
@@ -6,7 +6,7 @@ dependencies:
   - python=3.7
   - bottleneck
   - cartopy
-  - eccodes
+  - cfgrib
   - h5netcdf
   - ipykernel
   - ipython
@@ -22,7 +22,3 @@ dependencies:
   - sphinx
   - sphinx_rtd_theme
   - zarr
-  - pip
-  - pip:
-    - cfgrib
-
diff --git a/doc/conf.py b/doc/conf.py
@@ -15,10 +15,16 @@
 
 import datetime
 import os
+import pathlib
 import subprocess
 import sys
 from contextlib import suppress
 
+# make sure the source version is preferred (#3567)
+root = pathlib.Path(__file__).absolute().parent.parent
+os.environ["PYTHONPATH"] = str(root)
+sys.path.insert(0, str(root))
+
 import xarray
 
 allowed_failures = set()

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -28,7 +28,9 @@ New Features
 - :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` and ``GroupBy.quantile``
   now work with dask Variables.
   By `Deepak Cherian <https://github.com/dcherian>`_.
-
+- Added the :py:meth:`count` reduction method to both :py:class:`DatasetCoarsen`
+  and :py:class:`DataArrayCoarsen` objects. (:pull:`3500`)
+  By `Deepak Cherian <https://github.com/dcherian>`_
 
 Bug fixes
 ~~~~~~~~~
@@ -37,29 +39,35 @@ Bug fixes
   stay the same when rechunking by the same chunk size. (:issue:`3350`)
   By `Deepak Cherian <https://github.com/dcherian>`_.
 
+- Fix plotting with transposed 2D non-dimensional coordinates. (:issue:`3138`, :pull:`3441`)
+  By `Deepak Cherian <https://github.com/dcherian>`_.
+- Fix issue with Dask-backed datasets raising a ``KeyError`` on some computations involving ``map_blocks`` (:pull:`3598`)
+  By `Tom Augspurger <https://github.com/TomAugspurger>`_.
 
 Documentation
 ~~~~~~~~~~~~~
 - Switch doc examples to use nbsphinx and replace sphinx_gallery with
   notebook.
   (:pull:`3105`, :pull:`3106`, :pull:`3121`)
-  By `Ryan Abernathey <https://github.com/rabernat>`
+  By `Ryan Abernathey <https://github.com/rabernat>`_
 - Added example notebook demonstrating use of xarray with Regional Ocean
   Modeling System (ROMS) ocean hydrodynamic model output.
   (:pull:`3116`).
-  By `Robert Hetland <https://github.com/hetland>`
+  By `Robert Hetland <https://github.com/hetland>`_
 - Added example notebook demonstrating the visualization of ERA5 GRIB
   data. (:pull:`3199`)
-  By `Zach Bruick <https://github.com/zbruick>` and
-  `Stephan Siemen <https://github.com/StephanSiemen>`
-- Added examples for `DataArray.quantile`, `Dataset.quantile` and
-  `GroupBy.quantile`. (:pull:`3576`)
+  By `Zach Bruick <https://github.com/zbruick>`_ and
+  `Stephan Siemen <https://github.com/StephanSiemen>`_
+- Added examples for :py:meth:`DataArray.quantile`, :py:meth:`Dataset.quantile` and
+  ``GroupBy.quantile``. (:pull:`3576`)
   By `Justus Magin <https://github.com/keewis>`_.
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
-
-
+- 2x to 5x speed boost (on small arrays) for :py:meth:`Dataset.isel`,
+  :py:meth:`DataArray.isel`, and :py:meth:`DataArray.__getitem__` when indexing by int,
+  slice, list of int, scalar ndarray, or 1-dimensional ndarray.
+  (:pull:`3533`) by `Guido Imperiale <https://github.com/crusaderky>`_.
 - Removed internal method ``Dataset._from_vars_and_coord_names``, 
   which was dominated by ``Dataset._construct_direct``. (:pull:`3565`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
@@ -190,6 +198,7 @@ Documentation
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
+
 - Added integration tests against `pint <https://pint.readthedocs.io/>`_.
   (:pull:`3238`, :pull:`3447`, :pull:`3493`, :pull:`3508`)
   by `Justus Magin <https://github.com/keewis>`_.

diff --git a/readthedocs.yml b/readthedocs.yml
@@ -4,5 +4,5 @@ conda:
     file: ci/requirements/doc.yml
 python:
     version: 3.7
-    setup_py_install: true
+    setup_py_install: false
 formats: []
diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py
@@ -42,6 +42,7 @@
 
 import re
 from datetime import timedelta
+from distutils.version import LooseVersion
 from functools import partial
 from typing import ClassVar, Optional
 
@@ -50,7 +51,6 @@
 from ..core.pdcompat import count_not_none
 from .cftimeindex import CFTimeIndex, _parse_iso8601_with_reso
 from .times import format_cftime_datetime
-from distutils.version import LooseVersion
 
 
 def get_date_type(calendar):

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -50,7 +50,8 @@
 )
 from .dataset import Dataset, split_indexes
 from .formatting import format_item
-from .indexes import Indexes, propagate_indexes, default_indexes
+from .indexes import Indexes, default_indexes, propagate_indexes
+from .indexing import is_fancy_indexer
 from .merge import PANDAS_TYPES, _extract_indexes_from_coords
 from .options import OPTIONS
 from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs
@@ -234,19 +235,6 @@ class DataArray(AbstractArray, DataWithCoords):
 
     Getting items from or doing mathematical operations with a DataArray
     always returns another DataArray.
-
-    Attributes
-    ----------
-    dims : tuple
-        Dimension names associated with this array.
-    values : numpy.ndarray
-        Access or modify DataArray values as a numpy array.
-    coords : dict-like
-        Dictionary of DataArray objects that label values along each dimension.
-    name : str or None
-        Name of this array.
-    attrs : dict
-        Dictionary for holding arbitrary metadata.
     """
 
     _cache: Dict[str, Any]
@@ -1027,8 +1015,27 @@ def isel(
         DataArray.sel
         """
         indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
-        ds = self._to_temp_dataset().isel(drop=drop, indexers=indexers)
-        return self._from_temp_dataset(ds)
+        if any(is_fancy_indexer(idx) for idx in indexers.values()):
+            ds = self._to_temp_dataset()._isel_fancy(indexers, drop=drop)
+            return self._from_temp_dataset(ds)
+
+        # Much faster algorithm for when all indexers are ints, slices, one-dimensional
+        # lists, or zero or one-dimensional np.ndarray's
+
+        variable = self._variable.isel(indexers)
+
+        coords = {}
+        for coord_name, coord_value in self._coords.items():
+            coord_indexers = {
+                k: v for k, v in indexers.items() if k in coord_value.dims
+            }
+            if coord_indexers:
+                coord_value = coord_value.isel(coord_indexers)
+                if drop and coord_value.ndim == 0:
+                    continue
+            coords[coord_name] = coord_value
+
+        return self._replace(variable=variable, coords=coords)
 
     def sel(
         self,
@@ -2980,8 +2987,6 @@ def quantile(
         ...     coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]},
         ...     dims=("x", "y"),
         ... )
-
-        Single quantile
         >>> da.quantile(0)  # or da.quantile(0, dim=...)
         <xarray.DataArray ()>
         array(0.7)
@@ -2993,8 +2998,6 @@ def quantile(
         Coordinates:
           * y         (y) float64 1.0 1.5 2.0 2.5
             quantile  float64 0.0
-
-        Multiple quantiles
         >>> da.quantile([0, 0.5, 1])
         <xarray.DataArray (quantile: 3)>
         array([0.7, 3.4, 9.4])

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -66,6 +66,7 @@
     propagate_indexes,
     roll_index,
 )
+from .indexing import is_fancy_indexer
 from .merge import (
     dataset_merge_method,
     dataset_update_method,
@@ -78,8 +79,8 @@
     Default,
     Frozen,
     SortedKeysDict,
-    _default,
     _check_inplace,
+    _default,
     decode_numpy_dict_values,
     either_dict_or_kwargs,
     hashable,
@@ -1910,6 +1911,48 @@ def isel(
         DataArray.isel
         """
         indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
+        if any(is_fancy_indexer(idx) for idx in indexers.values()):
+            return self._isel_fancy(indexers, drop=drop)
+
+        # Much faster algorithm for when all indexers are ints, slices, one-dimensional
+        # lists, or zero or one-dimensional np.ndarray's
+        invalid = indexers.keys() - self.dims.keys()
+        if invalid:
+            raise ValueError("dimensions %r do not exist" % invalid)
+
+        variables = {}
+        dims: Dict[Hashable, Tuple[int, ...]] = {}
+        coord_names = self._coord_names.copy()
+        indexes = self._indexes.copy() if self._indexes is not None else None
+
+        for var_name, var_value in self._variables.items():
+            var_indexers = {k: v for k, v in indexers.items() if k in var_value.dims}
+            if var_indexers:
+                var_value = var_value.isel(var_indexers)
+                if drop and var_value.ndim == 0 and var_name in coord_names:
+                    coord_names.remove(var_name)
+                    if indexes:
+                        indexes.pop(var_name, None)
+                    continue
+                if indexes and var_name in indexes:
+                    if var_value.ndim == 1:
+                        indexes[var_name] = var_value.to_index()
+                    else:
+                        del indexes[var_name]
+            variables[var_name] = var_value
+            dims.update(zip(var_value.dims, var_value.shape))
+
+        return self._construct_direct(
+            variables=variables,
+            coord_names=coord_names,
+            dims=dims,
+            attrs=self._attrs,
+            indexes=indexes,
+            encoding=self._encoding,
+            file_obj=self._file_obj,
+        )
+
+    def _isel_fancy(self, indexers: Mapping[Hashable, Any], *, drop: bool) -> "Dataset":
         # Note: we need to preserve the original indexers variable in order to merge the
         # coords below
         indexers_list = list(self._validate_indexers(indexers))
@@ -5127,8 +5170,6 @@ def quantile(
         ...     {"a": (("x", "y"), [[0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]])},
         ...     coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]},
         ... )
-
-        Single quantile
         >>> ds.quantile(0)  # or ds.quantile(0, dim=...)
         <xarray.Dataset>
         Dimensions:   ()
@@ -5144,8 +5185,6 @@ def quantile(
             quantile  float64 0.0
         Data variables:
             a         (y) float64 0.7 4.2 2.6 1.5
-
-        Multiple quantiles
         >>> ds.quantile([0, 0.5, 1])
         <xarray.Dataset>
         Dimensions:   (quantile: 3)

diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py
@@ -1,11 +1,11 @@
 import uuid
-import pkg_resources
 from collections import OrderedDict
 from functools import partial
 from html import escape
 
-from .formatting import inline_variable_array_repr, short_data_repr
+import pkg_resources
 
+from .formatting import inline_variable_array_repr, short_data_repr
 
 CSS_FILE_PATH = "/".join(("static", "css", "style.css"))
 CSS_STYLE = pkg_resources.resource_string("xarray", CSS_FILE_PATH).decode("utf8")

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
@@ -607,8 +607,6 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
         ...     dims=("y", "y"),
         ... )
         >>> ds = xr.Dataset({"a": da})
-
-        Single quantile
         >>> da.groupby("x").quantile(0)
         <xarray.DataArray (x: 2, y: 4)>
         array([[0.7, 4.2, 0.7, 1.5],
@@ -625,15 +623,12 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
           * y         (y) int64 1 2
         Data variables:
             a         (y) float64 0.7 0.7
-
-        Multiple quantiles
         >>> da.groupby("x").quantile([0, 0.5, 1])
         <xarray.DataArray (x: 2, y: 4, quantile: 3)>
         array([[[0.7 , 1.  , 1.3 ],
                 [4.2 , 6.3 , 8.4 ],
                 [0.7 , 5.05, 9.4 ],
                 [1.5 , 4.2 , 6.9 ]],
-
                [[6.5 , 6.5 , 6.5 ],
                 [7.3 , 7.3 , 7.3 ],
                 [2.6 , 2.6 , 2.6 ],

diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
@@ -1213,6 +1213,19 @@ def posify_mask_indexer(indexer):
     return type(indexer)(key)
 
 
+def is_fancy_indexer(indexer: Any) -> bool:
+    """Return False if indexer is a int, slice, a 1-dimensional list, or a 0 or
+    1-dimensional ndarray; in all other cases return True
+    """
+    if isinstance(indexer, (int, slice)):
+        return False
+    if isinstance(indexer, np.ndarray):
+        return indexer.ndim > 1
+    if isinstance(indexer, list):
+        return bool(indexer) and not isinstance(indexer[0], int)
+    return True
+
+
 class NumpyIndexingAdapter(ExplicitlyIndexedNDArrayMixin):
     """Wrap a NumPy array to use explicit indexing."""
 

diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
@@ -25,7 +25,7 @@ def _maybe_null_out(result, axis, mask, min_count=1):
     """
     if hasattr(axis, "__len__"):  # if tuple or list
         raise ValueError(
-            "min_count is not available for reduction " "with more than one dimensions."
+            "min_count is not available for reduction with more than one dimensions."
         )
 
     if axis is not None and getattr(result, "ndim", False):

diff --git a/xarray/core/ops.py b/xarray/core/ops.py
@@ -347,13 +347,3 @@ def inject_all_ops_and_reduce_methods(cls, priority=50, array_only=True):
 
     inject_reduce_methods(cls)
     inject_cum_methods(cls)
-
-
-def inject_coarsen_methods(cls):
-    # standard numpy reduce methods
-    methods = [(name, getattr(duck_array_ops, name)) for name in NAN_REDUCE_METHODS]
-    for name, f in methods:
-        func = cls._reduce_method(f)
-        func.__name__ = name
-        func.__doc__ = _COARSEN_REDUCE_DOCSTRING_TEMPLATE.format(name=func.__name__)
-        setattr(cls, name, func)