diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 90de0705a27..d6ee76c7d3f 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -110,5 +110,5 @@ jobs: - bash: | source activate xarray-tests cd doc - sphinx-build -n -j auto -b html -d _build/doctrees . _build/html + sphinx-build -W --keep-going -j auto -b html -d _build/doctrees . _build/html displayName: Build HTML docs diff --git a/ci/azure/install.yml b/ci/azure/install.yml index baa69bcc8d5..e4f3a0b9e16 100644 --- a/ci/azure/install.yml +++ b/ci/azure/install.yml @@ -25,7 +25,9 @@ steps: git+https://github.com/dask/dask \ git+https://github.com/dask/distributed \ git+https://github.com/zarr-developers/zarr \ - git+https://github.com/Unidata/cftime + git+https://github.com/Unidata/cftime \ + git+https://github.com/mapbox/rasterio \ + git+https://github.com/pydata/bottleneck condition: eq(variables['UPSTREAM_DEV'], 'true') displayName: Install upstream dev dependencies diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index f2c09ed6fef..a0c27a30b01 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -6,7 +6,7 @@ dependencies: - python=3.7 - bottleneck - cartopy - - eccodes + - cfgrib - h5netcdf - ipykernel - ipython @@ -22,7 +22,3 @@ dependencies: - sphinx - sphinx_rtd_theme - zarr - - pip - - pip: - - cfgrib - diff --git a/doc/conf.py b/doc/conf.py index 65635353e93..11abda6bb63 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -15,10 +15,16 @@ import datetime import os +import pathlib import subprocess import sys from contextlib import suppress +# make sure the source version is preferred (#3567) +root = pathlib.Path(__file__).absolute().parent.parent +os.environ["PYTHONPATH"] = str(root) +sys.path.insert(0, str(root)) + import xarray allowed_failures = set() diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a332a25e841..ce796c4e5ee 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,7 +28,9 @@ New Features - :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` and ``GroupBy.quantile`` now work with dask Variables. By `Deepak Cherian `_. - +- Added the :py:meth:`count` reduction method to both :py:class:`DatasetCoarsen` + and :py:class:`DataArrayCoarsen` objects. (:pull:`3500`) + By `Deepak Cherian `_ Bug fixes ~~~~~~~~~ @@ -37,29 +39,35 @@ Bug fixes stay the same when rechunking by the same chunk size. (:issue:`3350`) By `Deepak Cherian `_. +- Fix plotting with transposed 2D non-dimensional coordinates. (:issue:`3138`, :pull:`3441`) + By `Deepak Cherian `_. +- Fix issue with Dask-backed datasets raising a ``KeyError`` on some computations involving ``map_blocks`` (:pull:`3598`) + By `Tom Augspurger `_. Documentation ~~~~~~~~~~~~~ - Switch doc examples to use nbsphinx and replace sphinx_gallery with notebook. (:pull:`3105`, :pull:`3106`, :pull:`3121`) - By `Ryan Abernathey ` + By `Ryan Abernathey `_ - Added example notebook demonstrating use of xarray with Regional Ocean Modeling System (ROMS) ocean hydrodynamic model output. (:pull:`3116`). - By `Robert Hetland ` + By `Robert Hetland `_ - Added example notebook demonstrating the visualization of ERA5 GRIB data. (:pull:`3199`) - By `Zach Bruick ` and - `Stephan Siemen ` -- Added examples for `DataArray.quantile`, `Dataset.quantile` and - `GroupBy.quantile`. (:pull:`3576`) + By `Zach Bruick `_ and + `Stephan Siemen `_ +- Added examples for :py:meth:`DataArray.quantile`, :py:meth:`Dataset.quantile` and + ``GroupBy.quantile``. (:pull:`3576`) By `Justus Magin `_. Internal Changes ~~~~~~~~~~~~~~~~ - - +- 2x to 5x speed boost (on small arrays) for :py:meth:`Dataset.isel`, + :py:meth:`DataArray.isel`, and :py:meth:`DataArray.__getitem__` when indexing by int, + slice, list of int, scalar ndarray, or 1-dimensional ndarray. + (:pull:`3533`) by `Guido Imperiale `_. - Removed internal method ``Dataset._from_vars_and_coord_names``, which was dominated by ``Dataset._construct_direct``. (:pull:`3565`) By `Maximilian Roos `_ @@ -190,6 +198,7 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ + - Added integration tests against `pint `_. (:pull:`3238`, :pull:`3447`, :pull:`3493`, :pull:`3508`) by `Justus Magin `_. diff --git a/readthedocs.yml b/readthedocs.yml index 6429780e7d7..c64fa1b7b02 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -4,5 +4,5 @@ conda: file: ci/requirements/doc.yml python: version: 3.7 - setup_py_install: true + setup_py_install: false formats: [] diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 8471ed1a558..eeb68508527 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -42,6 +42,7 @@ import re from datetime import timedelta +from distutils.version import LooseVersion from functools import partial from typing import ClassVar, Optional @@ -50,7 +51,6 @@ from ..core.pdcompat import count_not_none from .cftimeindex import CFTimeIndex, _parse_iso8601_with_reso from .times import format_cftime_datetime -from distutils.version import LooseVersion def get_date_type(calendar): diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 64f21b0eb01..b649df6dd56 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -50,7 +50,8 @@ ) from .dataset import Dataset, split_indexes from .formatting import format_item -from .indexes import Indexes, propagate_indexes, default_indexes +from .indexes import Indexes, default_indexes, propagate_indexes +from .indexing import is_fancy_indexer from .merge import PANDAS_TYPES, _extract_indexes_from_coords from .options import OPTIONS from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs @@ -234,19 +235,6 @@ class DataArray(AbstractArray, DataWithCoords): Getting items from or doing mathematical operations with a DataArray always returns another DataArray. - - Attributes - ---------- - dims : tuple - Dimension names associated with this array. - values : numpy.ndarray - Access or modify DataArray values as a numpy array. - coords : dict-like - Dictionary of DataArray objects that label values along each dimension. - name : str or None - Name of this array. - attrs : dict - Dictionary for holding arbitrary metadata. """ _cache: Dict[str, Any] @@ -1027,8 +1015,27 @@ def isel( DataArray.sel """ indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") - ds = self._to_temp_dataset().isel(drop=drop, indexers=indexers) - return self._from_temp_dataset(ds) + if any(is_fancy_indexer(idx) for idx in indexers.values()): + ds = self._to_temp_dataset()._isel_fancy(indexers, drop=drop) + return self._from_temp_dataset(ds) + + # Much faster algorithm for when all indexers are ints, slices, one-dimensional + # lists, or zero or one-dimensional np.ndarray's + + variable = self._variable.isel(indexers) + + coords = {} + for coord_name, coord_value in self._coords.items(): + coord_indexers = { + k: v for k, v in indexers.items() if k in coord_value.dims + } + if coord_indexers: + coord_value = coord_value.isel(coord_indexers) + if drop and coord_value.ndim == 0: + continue + coords[coord_name] = coord_value + + return self._replace(variable=variable, coords=coords) def sel( self, @@ -2980,8 +2987,6 @@ def quantile( ... coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]}, ... dims=("x", "y"), ... ) - - Single quantile >>> da.quantile(0) # or da.quantile(0, dim=...) array(0.7) @@ -2993,8 +2998,6 @@ def quantile( Coordinates: * y (y) float64 1.0 1.5 2.0 2.5 quantile float64 0.0 - - Multiple quantiles >>> da.quantile([0, 0.5, 1]) array([0.7, 3.4, 9.4]) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b2524526dfd..6e406ff5c8e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -66,6 +66,7 @@ propagate_indexes, roll_index, ) +from .indexing import is_fancy_indexer from .merge import ( dataset_merge_method, dataset_update_method, @@ -78,8 +79,8 @@ Default, Frozen, SortedKeysDict, - _default, _check_inplace, + _default, decode_numpy_dict_values, either_dict_or_kwargs, hashable, @@ -1910,6 +1911,48 @@ def isel( DataArray.isel """ indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") + if any(is_fancy_indexer(idx) for idx in indexers.values()): + return self._isel_fancy(indexers, drop=drop) + + # Much faster algorithm for when all indexers are ints, slices, one-dimensional + # lists, or zero or one-dimensional np.ndarray's + invalid = indexers.keys() - self.dims.keys() + if invalid: + raise ValueError("dimensions %r do not exist" % invalid) + + variables = {} + dims: Dict[Hashable, Tuple[int, ...]] = {} + coord_names = self._coord_names.copy() + indexes = self._indexes.copy() if self._indexes is not None else None + + for var_name, var_value in self._variables.items(): + var_indexers = {k: v for k, v in indexers.items() if k in var_value.dims} + if var_indexers: + var_value = var_value.isel(var_indexers) + if drop and var_value.ndim == 0 and var_name in coord_names: + coord_names.remove(var_name) + if indexes: + indexes.pop(var_name, None) + continue + if indexes and var_name in indexes: + if var_value.ndim == 1: + indexes[var_name] = var_value.to_index() + else: + del indexes[var_name] + variables[var_name] = var_value + dims.update(zip(var_value.dims, var_value.shape)) + + return self._construct_direct( + variables=variables, + coord_names=coord_names, + dims=dims, + attrs=self._attrs, + indexes=indexes, + encoding=self._encoding, + file_obj=self._file_obj, + ) + + def _isel_fancy(self, indexers: Mapping[Hashable, Any], *, drop: bool) -> "Dataset": # Note: we need to preserve the original indexers variable in order to merge the # coords below indexers_list = list(self._validate_indexers(indexers)) @@ -5127,8 +5170,6 @@ def quantile( ... {"a": (("x", "y"), [[0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]])}, ... coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]}, ... ) - - Single quantile >>> ds.quantile(0) # or ds.quantile(0, dim=...) Dimensions: () @@ -5144,8 +5185,6 @@ def quantile( quantile float64 0.0 Data variables: a (y) float64 0.7 4.2 2.6 1.5 - - Multiple quantiles >>> ds.quantile([0, 0.5, 1]) Dimensions: (quantile: 3) diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index dbebbcf4fbe..8ceda8bfbfa 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -1,11 +1,11 @@ import uuid -import pkg_resources from collections import OrderedDict from functools import partial from html import escape -from .formatting import inline_variable_array_repr, short_data_repr +import pkg_resources +from .formatting import inline_variable_array_repr, short_data_repr CSS_FILE_PATH = "/".join(("static", "css", "style.css")) CSS_STYLE = pkg_resources.resource_string("xarray", CSS_FILE_PATH).decode("utf8") diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index cb8f6538820..5b52f48413d 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -607,8 +607,6 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): ... dims=("y", "y"), ... ) >>> ds = xr.Dataset({"a": da}) - - Single quantile >>> da.groupby("x").quantile(0) array([[0.7, 4.2, 0.7, 1.5], @@ -625,15 +623,12 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): * y (y) int64 1 2 Data variables: a (y) float64 0.7 0.7 - - Multiple quantiles >>> da.groupby("x").quantile([0, 0.5, 1]) array([[[0.7 , 1. , 1.3 ], [4.2 , 6.3 , 8.4 ], [0.7 , 5.05, 9.4 ], [1.5 , 4.2 , 6.9 ]], - [[6.5 , 6.5 , 6.5 ], [7.3 , 7.3 , 7.3 ], [2.6 , 2.6 , 2.6 ], diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index f48c9e72af1..8e851b39c3e 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1213,6 +1213,19 @@ def posify_mask_indexer(indexer): return type(indexer)(key) +def is_fancy_indexer(indexer: Any) -> bool: + """Return False if indexer is a int, slice, a 1-dimensional list, or a 0 or + 1-dimensional ndarray; in all other cases return True + """ + if isinstance(indexer, (int, slice)): + return False + if isinstance(indexer, np.ndarray): + return indexer.ndim > 1 + if isinstance(indexer, list): + return bool(indexer) and not isinstance(indexer[0], int) + return True + + class NumpyIndexingAdapter(ExplicitlyIndexedNDArrayMixin): """Wrap a NumPy array to use explicit indexing.""" diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index 17240faf007..f70e96217e8 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -25,7 +25,7 @@ def _maybe_null_out(result, axis, mask, min_count=1): """ if hasattr(axis, "__len__"): # if tuple or list raise ValueError( - "min_count is not available for reduction " "with more than one dimensions." + "min_count is not available for reduction with more than one dimensions." ) if axis is not None and getattr(result, "ndim", False): diff --git a/xarray/core/ops.py b/xarray/core/ops.py index 78c4466faed..b789f93b4f1 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -347,13 +347,3 @@ def inject_all_ops_and_reduce_methods(cls, priority=50, array_only=True): inject_reduce_methods(cls) inject_cum_methods(cls) - - -def inject_coarsen_methods(cls): - # standard numpy reduce methods - methods = [(name, getattr(duck_array_ops, name)) for name in NAN_REDUCE_METHODS] - for name, f in methods: - func = cls._reduce_method(f) - func.__name__ = name - func.__doc__ = _COARSEN_REDUCE_DOCSTRING_TEMPLATE.format(name=func.__name__) - setattr(cls, name, func) diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index fbb5ef94ca2..dd6c67338d8 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -7,12 +7,14 @@ except ImportError: pass +import collections import itertools import operator from typing import ( Any, Callable, Dict, + DefaultDict, Hashable, Mapping, Sequence, @@ -221,7 +223,12 @@ def _wrapper(func, obj, to_array, args, kwargs): indexes = {dim: dataset.indexes[dim] for dim in preserved_indexes} indexes.update({k: template.indexes[k] for k in new_indexes}) + # We're building a new HighLevelGraph hlg. We'll have one new layer + # for each variable in the dataset, which is the result of the + # func applied to the values. + graph: Dict[Any, Any] = {} + new_layers: DefaultDict[str, Dict[Any, Any]] = collections.defaultdict(dict) gname = "{}-{}".format( dask.utils.funcname(func), dask.base.tokenize(dataset, args, kwargs) ) @@ -310,9 +317,20 @@ def _wrapper(func, obj, to_array, args, kwargs): # unchunked dimensions in the input have one chunk in the result key += (0,) - graph[key] = (operator.getitem, from_wrapper, name) + # We're adding multiple new layers to the graph: + # The first new layer is the result of the computation on + # the array. + # Then we add one layer per variable, which extracts the + # result for that variable, and depends on just the first new + # layer. + new_layers[gname_l][key] = (operator.getitem, from_wrapper, name) + + hlg = HighLevelGraph.from_collections(gname, graph, dependencies=[dataset]) - graph = HighLevelGraph.from_collections(gname, graph, dependencies=[dataset]) + for gname_l, layer in new_layers.items(): + # This adds in the getitems for each variable in the dataset. + hlg.dependencies[gname_l] = {gname} + hlg.layers[gname_l] = layer result = Dataset(coords=indexes, attrs=template.attrs) for name, gname_l in var_key_map.items(): @@ -325,7 +343,7 @@ def _wrapper(func, obj, to_array, args, kwargs): var_chunks.append((len(indexes[dim]),)) data = dask.array.Array( - graph, name=gname_l, chunks=var_chunks, dtype=template[name].dtype + hlg, name=gname_l, chunks=var_chunks, dtype=template[name].dtype ) result[name] = (dims, data, template[name].attrs) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index a1864332f4d..ea6d72b2e03 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -1,12 +1,12 @@ import functools import warnings -from typing import Callable +from typing import Any, Callable, Dict import numpy as np from . import dtypes, duck_array_ops, utils from .dask_array_ops import dask_rolling_wrapper -from .ops import inject_coarsen_methods +from .ops import inject_reduce_methods from .pycompat import dask_array_type try: @@ -542,6 +542,11 @@ def __init__(self, obj, windows, boundary, side, coord_func): self.side = side self.boundary = boundary + absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims] + if absent_dims: + raise ValueError( + f"Dimensions {absent_dims!r} not found in {self.obj.__class__.__name__}." + ) if not utils.is_dict_like(coord_func): coord_func = {d: coord_func for d in self.obj.dims} for c in self.obj.coords: @@ -565,18 +570,23 @@ def __repr__(self): class DataArrayCoarsen(Coarsen): __slots__ = () + _reduce_extra_args_docstring = """""" + @classmethod - def _reduce_method(cls, func): + def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool): """ - Return a wrapped function for injecting numpy methods. - see ops.inject_coarsen_methods + Return a wrapped function for injecting reduction methods. + see ops.inject_reduce_methods """ + kwargs: Dict[str, Any] = {} + if include_skipna: + kwargs["skipna"] = None def wrapped_func(self, **kwargs): from .dataarray import DataArray reduced = self.obj.variable.coarsen( - self.windows, func, self.boundary, self.side + self.windows, func, self.boundary, self.side, **kwargs ) coords = {} for c, v in self.obj.coords.items(): @@ -585,7 +595,11 @@ def wrapped_func(self, **kwargs): else: if any(d in self.windows for d in v.dims): coords[c] = v.variable.coarsen( - self.windows, self.coord_func[c], self.boundary, self.side + self.windows, + self.coord_func[c], + self.boundary, + self.side, + **kwargs, ) else: coords[c] = v @@ -597,12 +611,17 @@ def wrapped_func(self, **kwargs): class DatasetCoarsen(Coarsen): __slots__ = () + _reduce_extra_args_docstring = """""" + @classmethod - def _reduce_method(cls, func): + def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool): """ - Return a wrapped function for injecting numpy methods. - see ops.inject_coarsen_methods + Return a wrapped function for injecting reduction methods. + see ops.inject_reduce_methods """ + kwargs: Dict[str, Any] = {} + if include_skipna: + kwargs["skipna"] = None def wrapped_func(self, **kwargs): from .dataset import Dataset @@ -610,14 +629,18 @@ def wrapped_func(self, **kwargs): reduced = {} for key, da in self.obj.data_vars.items(): reduced[key] = da.variable.coarsen( - self.windows, func, self.boundary, self.side + self.windows, func, self.boundary, self.side, **kwargs ) coords = {} for c, v in self.obj.coords.items(): if any(d in self.windows for d in v.dims): coords[c] = v.variable.coarsen( - self.windows, self.coord_func[c], self.boundary, self.side + self.windows, + self.coord_func[c], + self.boundary, + self.side, + **kwargs, ) else: coords[c] = v.variable @@ -626,5 +649,5 @@ def wrapped_func(self, **kwargs): return wrapped_func -inject_coarsen_methods(DataArrayCoarsen) -inject_coarsen_methods(DatasetCoarsen) +inject_reduce_methods(DataArrayCoarsen) +inject_reduce_methods(DatasetCoarsen) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 041c303dd3a..aa04cffb5ea 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -617,7 +617,10 @@ def _broadcast_indexes_outer(self, key): k = k.data if not isinstance(k, BASIC_INDEXING_TYPES): k = np.asarray(k) - if k.dtype.kind == "b": + if k.size == 0: + # Slice by empty list; numpy could not infer the dtype + k = k.astype(int) + elif k.dtype.kind == "b": (k,) = np.nonzero(k) new_key.append(k) @@ -1864,9 +1867,9 @@ def rolling_window( ), ) - def coarsen(self, windows, func, boundary="exact", side="left"): + def coarsen(self, windows, func, boundary="exact", side="left", **kwargs): """ - Apply + Apply reduction function. """ windows = {k: v for k, v in windows.items() if k in self.dims} if not windows: @@ -1878,11 +1881,11 @@ def coarsen(self, windows, func, boundary="exact", side="left"): func = getattr(duck_array_ops, name, None) if func is None: raise NameError(f"{name} is not a valid method.") - return type(self)(self.dims, func(reshaped, axis=axes), self._attrs) + return self._replace(data=func(reshaped, axis=axes, **kwargs)) def _coarsen_reshape(self, windows, boundary, side): """ - Construct a reshaped-array for corsen + Construct a reshaped-array for coarsen """ if not utils.is_dict_like(boundary): boundary = {d: boundary for d in windows.keys()} diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 16a4943627e..d38c9765352 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -672,10 +672,22 @@ def newplotfunc( # check if we need to broadcast one dimension if xval.ndim < yval.ndim: - xval = np.broadcast_to(xval, yval.shape) + dims = darray[ylab].dims + if xval.shape[0] == yval.shape[0]: + xval = np.broadcast_to(xval[:, np.newaxis], yval.shape) + else: + xval = np.broadcast_to(xval[np.newaxis, :], yval.shape) - if yval.ndim < xval.ndim: - yval = np.broadcast_to(yval, xval.shape) + elif yval.ndim < xval.ndim: + dims = darray[xlab].dims + if yval.shape[0] == xval.shape[0]: + yval = np.broadcast_to(yval[:, np.newaxis], xval.shape) + else: + yval = np.broadcast_to(yval[np.newaxis, :], xval.shape) + elif xval.ndim == 2: + dims = darray[xlab].dims + else: + dims = (darray[ylab].dims[0], darray[xlab].dims[0]) # May need to transpose for correct x, y labels # xlab may be the name of a coord, we have to check for dim names @@ -685,10 +697,9 @@ def newplotfunc( # we transpose to (y, x, color) to make this work. yx_dims = (ylab, xlab) dims = yx_dims + tuple(d for d in darray.dims if d not in yx_dims) - if dims != darray.dims: - darray = darray.transpose(*dims, transpose_coords=True) - elif darray[xlab].dims[-1] == darray.dims[0]: - darray = darray.transpose(transpose_coords=True) + + if dims != darray.dims: + darray = darray.transpose(*dims, transpose_coords=True) # Pass the data as a masked ndarray too zval = darray.to_masked_array(copy=False) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index de3a7eadab0..1e135ebd3e1 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3951,6 +3951,7 @@ def test_rasterio_environment(self): with xr.open_rasterio(tmp_file) as actual: assert_allclose(actual, expected) + @pytest.mark.xfail(reason="rasterio 1.1.1 is broken. GH3573") def test_rasterio_vrt(self): import rasterio diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index ab66093c25d..0e5a3c6822e 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -16,6 +16,7 @@ from xarray.testing import assert_chunks_equal from xarray.tests import mock +from ..core.duck_array_ops import lazy_array_equiv from . import ( assert_allclose, assert_array_equal, @@ -25,7 +26,6 @@ raises_regex, requires_scipy_or_netCDF4, ) -from ..core.duck_array_ops import lazy_array_equiv from .test_backends import create_tmp_file dask = pytest.importorskip("dask") @@ -1189,6 +1189,19 @@ def func(obj): assert_identical(expected, actual) +def test_map_blocks_hlg_layers(): + # regression test for #3599 + ds = xr.Dataset( + { + "x": (("a",), dask.array.ones(10, chunks=(5,))), + "z": (("b",), dask.array.ones(10, chunks=(5,))), + } + ) + mapped = ds.map_blocks(lambda x: x) + + xr.testing.assert_equal(mapped, ds) + + def test_make_meta(map_ds): from ..core.parallel import make_meta diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 7e511af84b6..a019d68dc0b 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -16,7 +16,6 @@ from xarray.core.common import full_like from xarray.core.indexes import propagate_indexes from xarray.core.utils import is_scalar - from xarray.tests import ( LooseVersion, ReturnItem, diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index ce65d04cde1..23eaa9424b3 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5513,6 +5513,11 @@ def ds(request): ) +def test_coarsen_absent_dims_error(ds): + with raises_regex(ValueError, "not found in Dataset."): + ds.coarsen(foo=2) + + @pytest.mark.parametrize("dask", [True, False]) @pytest.mark.parametrize(("boundary", "side"), [("trim", "left"), ("pad", "right")]) def test_coarsen(ds, dask, boundary, side): @@ -5521,12 +5526,11 @@ def test_coarsen(ds, dask, boundary, side): actual = ds.coarsen(time=2, x=3, boundary=boundary, side=side).max() assert_equal( - actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary=boundary, side=side).max() + actual["z1"], ds["z1"].coarsen(x=3, boundary=boundary, side=side).max() ) # coordinate should be mean by default assert_equal( - actual["time"], - ds["time"].coarsen(time=2, x=3, boundary=boundary, side=side).mean(), + actual["time"], ds["time"].coarsen(time=2, boundary=boundary, side=side).mean() ) @@ -5537,8 +5541,8 @@ def test_coarsen_coords(ds, dask): # check if coord_func works actual = ds.coarsen(time=2, x=3, boundary="trim", coord_func={"time": "max"}).max() - assert_equal(actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary="trim").max()) - assert_equal(actual["time"], ds["time"].coarsen(time=2, x=3, boundary="trim").max()) + assert_equal(actual["z1"], ds["z1"].coarsen(x=3, boundary="trim").max()) + assert_equal(actual["time"], ds["time"].coarsen(time=2, boundary="trim").max()) # raise if exact with pytest.raises(ValueError): diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 0b410383a34..1cd0319a9a5 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -9,8 +9,8 @@ NumpyInterpolator, ScipyInterpolator, SplineInterpolator, - get_clean_interp_index, _get_nan_block_lengths, + get_clean_interp_index, ) from xarray.core.pycompat import dask_array_type from xarray.tests import ( diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 6e283ea01da..a10f0d9a67e 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -265,6 +265,7 @@ def test2d_1d_2d_coordinates_contourf(self): ) a.plot.contourf(x="time", y="depth") + a.plot.contourf(x="depth", y="time") def test3d(self): self.darray.plot() @@ -2149,3 +2150,31 @@ def test_yticks_kwarg(self, da): da.plot(yticks=np.arange(5)) expected = np.arange(5) assert np.all(plt.gca().get_yticks() == expected) + + +@requires_matplotlib +@pytest.mark.parametrize("plotfunc", ["pcolormesh", "contourf", "contour"]) +def test_plot_transposed_nondim_coord(plotfunc): + x = np.linspace(0, 10, 101) + h = np.linspace(3, 7, 101) + s = np.linspace(0, 1, 51) + z = s[:, np.newaxis] * h[np.newaxis, :] + da = xr.DataArray( + np.sin(x) * np.cos(z), + dims=["s", "x"], + coords={"x": x, "s": s, "z": (("s", "x"), z), "zt": (("x", "s"), z.T)}, + ) + getattr(da.plot, plotfunc)(x="x", y="zt") + getattr(da.plot, plotfunc)(x="zt", y="x") + + +@requires_matplotlib +@pytest.mark.parametrize("plotfunc", ["pcolormesh", "imshow"]) +def test_plot_transposes_properly(plotfunc): + # test that we aren't mistakenly transposing when the 2 dimensions have equal sizes. + da = xr.DataArray([np.sin(2 * np.pi / 10 * np.arange(10))] * 10, dims=("y", "x")) + hdl = getattr(da.plot, plotfunc)(x="x", y="y") + # get_array doesn't work for contour, contourf. It returns the colormap intervals. + # pcolormesh returns 1D array but imshow returns a 2D array so it is necessary + # to ravel() on the LHS + assert np.all(hdl.get_array().ravel() == da.to_masked_array().ravel()) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 245dc1acc42..1d83e16a5bd 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1156,6 +1156,26 @@ def test_items(self): def test_getitem_basic(self): v = self.cls(["x", "y"], [[0, 1, 2], [3, 4, 5]]) + # int argument + v_new = v[0] + assert v_new.dims == ("y",) + assert_array_equal(v_new, v._data[0]) + + # slice argument + v_new = v[:2] + assert v_new.dims == ("x", "y") + assert_array_equal(v_new, v._data[:2]) + + # list arguments + v_new = v[[0]] + assert v_new.dims == ("x", "y") + assert_array_equal(v_new, v._data[[0]]) + + v_new = v[[]] + assert v_new.dims == ("x", "y") + assert_array_equal(v_new, v._data[[]]) + + # dict arguments v_new = v[dict(x=0)] assert v_new.dims == ("y",) assert_array_equal(v_new, v._data[0]) @@ -1196,6 +1216,8 @@ def test_isel(self): assert_identical(v.isel(time=0), v[0]) assert_identical(v.isel(time=slice(0, 3)), v[:3]) assert_identical(v.isel(x=0), v[:, 0]) + assert_identical(v.isel(x=[0, 2]), v[:, [0, 2]]) + assert_identical(v.isel(time=[]), v[[]]) with raises_regex(ValueError, "do not exist"): v.isel(not_a_dim=0) @@ -1833,6 +1855,26 @@ def test_coarsen_2d(self): expected[1, 1] *= 12 / 11 assert_allclose(actual, expected) + v = self.cls(("x", "y"), np.arange(4 * 4, dtype=np.float32).reshape(4, 4)) + actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact") + expected = self.cls(("x", "y"), 4 * np.ones((2, 2))) + assert_equal(actual, expected) + + v[0, 0] = np.nan + v[-1, -1] = np.nan + expected[0, 0] = 3 + expected[-1, -1] = 3 + actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact") + assert_equal(actual, expected) + + actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=False) + expected = self.cls(("x", "y"), [[np.nan, 18], [42, np.nan]]) + assert_equal(actual, expected) + + actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=True) + expected = self.cls(("x", "y"), [[10, 18], [42, 35]]) + assert_equal(actual, expected) + @requires_dask class TestVariableWithDask(VariableSubclassobjects):