From b084064fa62d3dedc3706c2f6c2dff90940fec27 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 28 Mar 2020 10:26:45 -0400 Subject: [PATCH 01/39] Un-xfail test_dayofyear_after_cftime_range (#3907) --- xarray/tests/test_cftime_offsets.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 2352f9e8cdd..343e059f53c 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1176,7 +1176,6 @@ def test_dayofweek_after_cftime_range(freq): np.testing.assert_array_equal(result, expected) -@pytest.mark.xfail(reason="See GH3885") @pytest.mark.parametrize("freq", ["A", "M", "D"]) def test_dayofyear_after_cftime_range(freq): pytest.importorskip("cftime", minversion="1.0.2.1") From 6852d01b2e2315b2d7244315f61bf7ecfbc19206 Mon Sep 17 00:00:00 2001 From: keewis Date: Sat, 28 Mar 2020 19:18:20 +0100 Subject: [PATCH 02/39] update the docstring of diff (#3909) * don't document the dim parameter as optional * update whats-new.rst * fix the reference syntax --- doc/whats-new.rst | 4 ++++ xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a138dee4128..9d83c5e4207 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -46,6 +46,10 @@ Bug fixes Documentation ~~~~~~~~~~~~~ +- update the docstring of :py:meth:`Dataset.diff` and + :py:meth:`DataArray.diff` so it does document the ``dim`` + parameter as required. (:issue:`1040`, :pull:`3909`) + By `Justus Magin `_. Internal Changes diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 070886cfc34..94c4026d4a1 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2696,7 +2696,7 @@ def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> "DataArr Parameters ---------- - dim : hashable, optional + dim : hashable Dimension over which to calculate the finite difference. n : int, optional The number of times values are differenced. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c49694b1fc0..bd8f0ef3948 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4901,7 +4901,7 @@ def diff(self, dim, n=1, label="upper"): Parameters ---------- - dim : str, optional + dim : str Dimension over which to calculate the finite difference. n : int, optional The number of times values are differenced. From 79513b7453f0b284ac5abcacf96fede702d6150c Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sat, 28 Mar 2020 16:01:23 -0400 Subject: [PATCH 03/39] Revert "Use `fixes` in PR template (#3886)" (#3912) This reverts commit ee3c87659d1687a86d406065a5af1b4b87beec17. --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- doc/whats-new.rst | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index c30202ac046..a921bddaa23 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,6 +1,6 @@ - - [ ] Fixes #xxxx + - [ ] Closes #xxxx - [ ] Tests added - [ ] Passes `isort -rc . && black . && mypy . && flake8` - [ ] Fully documented, including `whats-new.rst` for all changes and `api.rst` for new API diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9d83c5e4207..bd0851d2b39 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -54,9 +54,6 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ -- Use ``Fixes`` rather than ``Closes`` in GitHub Pull Request template, allowing - linking to issues. - By `Maximilian Roos `_ .. _whats-new.0.15.1: From ca6bb8561f2977509fc5bf53eae0efd080c0a952 Mon Sep 17 00:00:00 2001 From: keewis Date: Sat, 28 Mar 2020 21:58:14 +0100 Subject: [PATCH 04/39] Update pre-commit-config.yaml (#3911) * only run isort on python source files * update the flake8 hook * update whats-new.rst --- .pre-commit-config.yaml | 5 +++-- doc/whats-new.rst | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9df95648774..26bf4803ef6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,13 +5,14 @@ repos: rev: 4.3.21-2 hooks: - id: isort + files: .+\.py$ # https://github.com/python/black#version-control-integration - repo: https://github.com/python/black rev: stable hooks: - id: black - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.2.3 + - repo: https://gitlab.com/pycqa/flake8 + rev: 3.7.9 hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-mypy diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bd0851d2b39..39aaa0e4fd3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -54,6 +54,9 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Run the ``isort`` pre-commit hook only on python source files + and update the ``flake8`` version. (:issue:`3750`, :pull:`3711`) + By `Justus Magin `_. .. _whats-new.0.15.1: From 1416d5ae475c0875e7a5d76fa4a8278838958162 Mon Sep 17 00:00:00 2001 From: Todd Date: Sat, 28 Mar 2020 21:54:24 -0400 Subject: [PATCH 05/39] Implement idxmax and idxmin functions (#3871) * drop numpy 1.12 compat code that can hide other errors * deep copy _indexes (#3899) * implement idxmax and idxmin --- doc/api.rst | 4 + doc/whats-new.rst | 6 + xarray/core/computation.py | 66 ++- xarray/core/dataarray.py | 193 +++++++- xarray/core/dataset.py | 191 ++++++++ xarray/core/duck_array_ops.py | 20 +- xarray/tests/test_dataarray.py | 802 +++++++++++++++++++++++++++++++++ xarray/tests/test_dataset.py | 9 + 8 files changed, 1277 insertions(+), 14 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 216f47f988f..b37c84e7a81 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -180,6 +180,8 @@ Computation :py:attr:`~Dataset.any` :py:attr:`~Dataset.argmax` :py:attr:`~Dataset.argmin` +:py:attr:`~Dataset.idxmax` +:py:attr:`~Dataset.idxmin` :py:attr:`~Dataset.max` :py:attr:`~Dataset.mean` :py:attr:`~Dataset.median` @@ -362,6 +364,8 @@ Computation :py:attr:`~DataArray.any` :py:attr:`~DataArray.argmax` :py:attr:`~DataArray.argmin` +:py:attr:`~DataArray.idxmax` +:py:attr:`~DataArray.idxmin` :py:attr:`~DataArray.max` :py:attr:`~DataArray.mean` :py:attr:`~DataArray.median` diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 39aaa0e4fd3..c70dfd4f3f6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,10 +38,16 @@ New Features - Limited the length of array items with long string reprs to a reasonable width (:pull:`3900`) By `Maximilian Roos `_ +- Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, + :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:issue:`60`, :pull:`3871`) + By `Todd Jennings `_ Bug fixes ~~~~~~~~~ +- Fix a regression where deleting a coordinate from a copied :py:class:`DataArray` + can affect the original :py:class:`Dataarray`. (:issue:`3899`, :pull:`3871`) + By `Todd Jennings `_ Documentation diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 13bf6248331..6cf4178b5bf 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -23,9 +23,10 @@ import numpy as np -from . import duck_array_ops, utils +from . import dtypes, duck_array_ops, utils from .alignment import deep_align from .merge import merge_coordinates_without_align +from .nanops import dask_array from .options import OPTIONS from .pycompat import dask_array_type from .utils import is_dict_like @@ -1338,3 +1339,66 @@ def polyval(coord, coeffs, degree_dim="degree"): coords={coord.name: coord, degree_dim: np.arange(deg_coord.max() + 1)[::-1]}, ) return (lhs * coeffs).sum(degree_dim) + + +def _calc_idxminmax( + *, + array, + func: Callable, + dim: Hashable = None, + skipna: bool = None, + fill_value: Any = dtypes.NA, + keep_attrs: bool = None, +): + """Apply common operations for idxmin and idxmax.""" + # This function doesn't make sense for scalars so don't try + if not array.ndim: + raise ValueError("This function does not apply for scalars") + + if dim is not None: + pass # Use the dim if available + elif array.ndim == 1: + # it is okay to guess the dim if there is only 1 + dim = array.dims[0] + else: + # The dim is not specified and ambiguous. Don't guess. + raise ValueError("Must supply 'dim' argument for multidimensional arrays") + + if dim not in array.dims: + raise KeyError(f'Dimension "{dim}" not in dimension') + if dim not in array.coords: + raise KeyError(f'Dimension "{dim}" does not have coordinates') + + # These are dtypes with NaN values argmin and argmax can handle + na_dtypes = "cfO" + + if skipna or (skipna is None and array.dtype.kind in na_dtypes): + # Need to skip NaN values since argmin and argmax can't handle them + allna = array.isnull().all(dim) + array = array.where(~allna, 0) + + # This will run argmin or argmax. + indx = func(array, dim=dim, axis=None, keep_attrs=keep_attrs, skipna=skipna) + + # Get the coordinate we want. + coordarray = array[dim] + + # Handle dask arrays. + if isinstance(array, dask_array_type): + res = dask_array.map_blocks(coordarray, indx, dtype=indx.dtype) + else: + res = coordarray[ + indx, + ] + + if skipna or (skipna is None and array.dtype.kind in na_dtypes): + # Put the NaN values back in after removing them + res = res.where(~allna, fill_value) + + # The dim is gone but we need to remove the corresponding coordinate. + del res.coords[dim] + + # Copy attributes from argmin/argmax, if any + res.attrs = indx.attrs + + return res diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 94c4026d4a1..b7e0333dcd9 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -930,7 +930,10 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray": """ variable = self.variable.copy(deep=deep, data=data) coords = {k: v.copy(deep=deep) for k, v in self._coords.items()} - indexes = self._indexes + if self._indexes is None: + indexes = self._indexes + else: + indexes = {k: v.copy(deep=deep) for k, v in self._indexes.items()} return self._replace(variable, coords, indexes=indexes) def __copy__(self) -> "DataArray": @@ -3505,6 +3508,194 @@ def pad( ) return self._from_temp_dataset(ds) + def idxmin( + self, + dim: Hashable = None, + skipna: bool = None, + fill_value: Any = dtypes.NA, + keep_attrs: bool = None, + ) -> "DataArray": + """Return the coordinate label of the minimum value along a dimension. + + Returns a new `DataArray` named after the dimension with the values of + the coordinate labels along that dimension corresponding to minimum + values along that dimension. + + In comparison to :py:meth:`~DataArray.argmin`, this returns the + coordinate label while :py:meth:`~DataArray.argmin` returns the index. + + Parameters + ---------- + dim : str, optional + Dimension over which to apply `idxmin`. This is optional for 1D + arrays, but required for arrays with 2 or more dimensions. + skipna : bool or None, default None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for ``float``, ``complex``, and ``object`` + dtypes; other dtypes either do not have a sentinel missing value + (``int``) or ``skipna=True`` has not been implemented + (``datetime64`` or ``timedelta64``). + fill_value : Any, default NaN + Value to be filled in case all of the values along a dimension are + null. By default this is NaN. The fill value and result are + automatically converted to a compatible dtype if possible. + Ignored if ``skipna`` is False. + keep_attrs : bool, default False + If True, the attributes (``attrs``) will be copied from the + original object to the new one. If False (default), the new object + will be returned without attributes. + + Returns + ------- + reduced : DataArray + New `DataArray` object with `idxmin` applied to its data and the + indicated dimension removed. + + See also + -------- + Dataset.idxmin, DataArray.idxmax, DataArray.min, DataArray.argmin + + Examples + -------- + + >>> array = xr.DataArray([0, 2, 1, 0, -2], dims="x", + ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array.min() + + array(-2) + >>> array.argmin() + + array(4) + >>> array.idxmin() + + array('e', dtype='>> array = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], + ... "x": np.arange(5.)**2} + ... ) + >>> array.min(dim="x") + + array([-2., -4., 1.]) + Coordinates: + * y (y) int64 -1 0 1 + >>> array.argmin(dim="x") + + array([4, 0, 2]) + Coordinates: + * y (y) int64 -1 0 1 + >>> array.idxmin(dim="x") + + array([16., 0., 4.]) + Coordinates: + * y (y) int64 -1 0 1 + """ + return computation._calc_idxminmax( + array=self, + func=lambda x, *args, **kwargs: x.argmin(*args, **kwargs), + dim=dim, + skipna=skipna, + fill_value=fill_value, + keep_attrs=keep_attrs, + ) + + def idxmax( + self, + dim: Hashable = None, + skipna: bool = None, + fill_value: Any = dtypes.NA, + keep_attrs: bool = None, + ) -> "DataArray": + """Return the coordinate label of the maximum value along a dimension. + + Returns a new `DataArray` named after the dimension with the values of + the coordinate labels along that dimension corresponding to maximum + values along that dimension. + + In comparison to :py:meth:`~DataArray.argmax`, this returns the + coordinate label while :py:meth:`~DataArray.argmax` returns the index. + + Parameters + ---------- + dim : str, optional + Dimension over which to apply `idxmax`. This is optional for 1D + arrays, but required for arrays with 2 or more dimensions. + skipna : bool or None, default None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for ``float``, ``complex``, and ``object`` + dtypes; other dtypes either do not have a sentinel missing value + (``int``) or ``skipna=True`` has not been implemented + (``datetime64`` or ``timedelta64``). + fill_value : Any, default NaN + Value to be filled in case all of the values along a dimension are + null. By default this is NaN. The fill value and result are + automatically converted to a compatible dtype if possible. + Ignored if ``skipna`` is False. + keep_attrs : bool, default False + If True, the attributes (``attrs``) will be copied from the + original object to the new one. If False (default), the new object + will be returned without attributes. + + Returns + ------- + reduced : DataArray + New `DataArray` object with `idxmax` applied to its data and the + indicated dimension removed. + + See also + -------- + Dataset.idxmax, DataArray.idxmin, DataArray.max, DataArray.argmax + + Examples + -------- + + >>> array = xr.DataArray([0, 2, 1, 0, -2], dims="x", + ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array.max() + + array(2) + >>> array.argmax() + + array(1) + >>> array.idxmax() + + array('b', dtype='>> array = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], + ... "x": np.arange(5.)**2} + ... ) + >>> array.max(dim="x") + + array([2., 2., 1.]) + Coordinates: + * y (y) int64 -1 0 1 + >>> array.argmax(dim="x") + + array([0, 2, 2]) + Coordinates: + * y (y) int64 -1 0 1 + >>> array.idxmax(dim="x") + + array([0., 4., 4.]) + Coordinates: + * y (y) int64 -1 0 1 + """ + return computation._calc_idxminmax( + array=self, + func=lambda x, *args, **kwargs: x.argmax(*args, **kwargs), + dim=dim, + skipna=skipna, + fill_value=fill_value, + keep_attrs=keep_attrs, + ) + # this needs to be at the end, or mypy will confuse with `str` # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names str = property(StringAccessor) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index bd8f0ef3948..c515d781db1 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6,6 +6,7 @@ from collections import defaultdict from html import escape from numbers import Number +from operator import methodcaller from pathlib import Path from typing import ( TYPE_CHECKING, @@ -6093,5 +6094,195 @@ def pad( return self._replace_vars_and_dims(variables) + def idxmin( + self, + dim: Hashable = None, + skipna: bool = None, + fill_value: Any = dtypes.NA, + keep_attrs: bool = None, + ) -> "Dataset": + """Return the coordinate label of the minimum value along a dimension. + + Returns a new `Dataset` named after the dimension with the values of + the coordinate labels along that dimension corresponding to minimum + values along that dimension. + + In comparison to :py:meth:`~Dataset.argmin`, this returns the + coordinate label while :py:meth:`~Dataset.argmin` returns the index. + + Parameters + ---------- + dim : str, optional + Dimension over which to apply `idxmin`. This is optional for 1D + variables, but required for variables with 2 or more dimensions. + skipna : bool or None, default None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for ``float``, ``complex``, and ``object`` + dtypes; other dtypes either do not have a sentinel missing value + (``int``) or ``skipna=True`` has not been implemented + (``datetime64`` or ``timedelta64``). + fill_value : Any, default NaN + Value to be filled in case all of the values along a dimension are + null. By default this is NaN. The fill value and result are + automatically converted to a compatible dtype if possible. + Ignored if ``skipna`` is False. + keep_attrs : bool, default False + If True, the attributes (``attrs``) will be copied from the + original object to the new one. If False (default), the new object + will be returned without attributes. + + Returns + ------- + reduced : Dataset + New `Dataset` object with `idxmin` applied to its data and the + indicated dimension removed. + + See also + -------- + DataArray.idxmin, Dataset.idxmax, Dataset.min, Dataset.argmin + + Examples + -------- + + >>> array1 = xr.DataArray([0, 2, 1, 0, -2], dims="x", + ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array2 = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], + ... "x": ['a', 'b', 'c', 'd', 'e']} + ... ) + >>> ds = xr.Dataset({'int': array1, 'float': array2}) + >>> ds.min(dim='x') + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int int64 -2 + float (y) float64 -2.0 -4.0 1.0 + >>> ds.argmin(dim='x') + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int int64 4 + float (y) int64 4 0 2 + >>> ds.idxmin(dim='x') + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int "Dataset": + """Return the coordinate label of the maximum value along a dimension. + + Returns a new `Dataset` named after the dimension with the values of + the coordinate labels along that dimension corresponding to maximum + values along that dimension. + + In comparison to :py:meth:`~Dataset.argmax`, this returns the + coordinate label while :py:meth:`~Dataset.argmax` returns the index. + + Parameters + ---------- + dim : str, optional + Dimension over which to apply `idxmax`. This is optional for 1D + variables, but required for variables with 2 or more dimensions. + skipna : bool or None, default None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for ``float``, ``complex``, and ``object`` + dtypes; other dtypes either do not have a sentinel missing value + (``int``) or ``skipna=True`` has not been implemented + (``datetime64`` or ``timedelta64``). + fill_value : Any, default NaN + Value to be filled in case all of the values along a dimension are + null. By default this is NaN. The fill value and result are + automatically converted to a compatible dtype if possible. + Ignored if ``skipna`` is False. + keep_attrs : bool, default False + If True, the attributes (``attrs``) will be copied from the + original object to the new one. If False (default), the new object + will be returned without attributes. + + Returns + ------- + reduced : Dataset + New `Dataset` object with `idxmax` applied to its data and the + indicated dimension removed. + + See also + -------- + DataArray.idxmax, Dataset.idxmin, Dataset.max, Dataset.argmax + + Examples + -------- + + >>> array1 = xr.DataArray([0, 2, 1, 0, -2], dims="x", + ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array2 = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], + ... "x": ['a', 'b', 'c', 'd', 'e']} + ... ) + >>> ds = xr.Dataset({'int': array1, 'float': array2}) + >>> ds.max(dim='x') + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int int64 2 + float (y) float64 2.0 2.0 1.0 + >>> ds.argmax(dim='x') + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int int64 1 + float (y) int64 0 2 2 + >>> ds.idxmax(dim='x') + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int Date: Sun, 29 Mar 2020 23:12:34 +0200 Subject: [PATCH 06/39] add pint to the output of show_versions() (#3918) --- xarray/util/print_versions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py index 6a0e62cc9dc..223b59b9675 100755 --- a/xarray/util/print_versions.py +++ b/xarray/util/print_versions.py @@ -118,6 +118,7 @@ def show_versions(file=sys.stdout): ("cartopy", lambda mod: mod.__version__), ("seaborn", lambda mod: mod.__version__), ("numbagg", lambda mod: mod.__version__), + ("pint", lambda mod: mod.__version__), # xarray setup/test ("setuptools", lambda mod: mod.__version__), ("pip", lambda mod: mod.__version__), From eb8e43b717651cdbf65b09226d97661ce60ba446 Mon Sep 17 00:00:00 2001 From: keewis Date: Mon, 30 Mar 2020 00:13:25 +0200 Subject: [PATCH 07/39] reactivate the macos CI (#3920) --- azure-pipelines.yml | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 8d43de7b1d5..ce95fca1ba1 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -32,16 +32,15 @@ jobs: steps: - template: ci/azure/unit-tests.yml -# excluded while waiting for https://github.com/conda-forge/libwebp-feedstock/issues/26 -# - job: MacOSX -# strategy: -# matrix: -# py38: -# conda_env: py38 -# pool: -# vmImage: 'macOS-10.15' -# steps: -# - template: ci/azure/unit-tests.yml +- job: MacOSX + strategy: + matrix: + py38: + conda_env: py38 + pool: + vmImage: 'macOS-10.15' + steps: + - template: ci/azure/unit-tests.yml - job: Windows strategy: From b3bafeefbd6e6d70bce505ae1f0d9d5a2b015089 Mon Sep 17 00:00:00 2001 From: Spencer Hill Date: Tue, 31 Mar 2020 16:10:09 +0000 Subject: [PATCH 08/39] Fix for stack+groupby+apply w/ non-increasing coord (#3906) * WIP fix for stack+groupby+apply w/ non-increasing coord * Only sort if multiindex * store safe_cast_to_index call to avoid double call * black formatting --- doc/whats-new.rst | 11 +++++++---- xarray/core/groupby.py | 4 +++- xarray/tests/test_dataarray.py | 19 +++++++++++++++++++ 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c70dfd4f3f6..b9c81ad3474 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -45,6 +45,9 @@ New Features Bug fixes ~~~~~~~~~ +- Fix renaming of coords when one or more stacked coords is not in + sorted order during stack+groupby+apply operations. (:issue:`3287`, + :pull:`3906`) By `Spencer Hill `_ - Fix a regression where deleting a coordinate from a copied :py:class:`DataArray` can affect the original :py:class:`Dataarray`. (:issue:`3899`, :pull:`3871`) By `Todd Jennings `_ @@ -96,13 +99,13 @@ New Features - Added support for :py:class:`pandas.DatetimeIndex`-style rounding of ``cftime.datetime`` objects directly via a :py:class:`CFTimeIndex` or via the :py:class:`~core.accessor_dt.DatetimeAccessor`. - By `Spencer Clark `_ + By `Spencer Clark `_ - Support new h5netcdf backend keyword `phony_dims` (available from h5netcdf v0.8.0 for :py:class:`~xarray.backends.H5NetCDFStore`. By `Kai Mühlbauer `_. - Add partial support for unit aware arrays with pint. (:pull:`3706`, :pull:`3611`) By `Justus Magin `_. -- :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a +- :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a `TypeError` on multiple string arguments. Receiving multiple string arguments often means a user is attempting to pass multiple dimensions as separate arguments and should instead pass a single list of dimensions. @@ -120,7 +123,7 @@ New Features By `Maximilian Roos `_. - ``skipna`` is available in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile` - (:issue:`3843`, :pull:`3844`) + (:issue:`3843`, :pull:`3844`) By `Aaron Spring `_. Bug fixes @@ -814,7 +817,7 @@ Bug fixes Documentation ~~~~~~~~~~~~~ -- Created a `PR checklist `_ +- Created a `PR checklist `_ as a quick reference for tasks before creating a new PR or pushing new commits. By `Gregory Gundersen `_. diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 67e8f0588b3..5a5f4c0d296 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -370,8 +370,10 @@ def __init__( group = group.dropna(group_dim) # look through group to find the unique values + group_as_index = safe_cast_to_index(group) + sort = bins is None and (not isinstance(group_as_index, pd.MultiIndex)) unique_values, group_indices = unique_value_groups( - safe_cast_to_index(group), sort=(bins is None) + group_as_index, sort=sort ) unique_coord = IndexVariable(group.name, unique_values) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 5b3e122bf72..ced72d1bc06 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1202,6 +1202,25 @@ def test_selection_multiindex_from_level(self): expected = data.isel(xy=[0, 1]).unstack("xy").squeeze("y").drop_vars("y") assert_equal(actual, expected) + def test_stack_groupby_unsorted_coord(self): + data = [[0, 1], [2, 3]] + data_flat = [0, 1, 2, 3] + dims = ["x", "y"] + y_vals = [2, 3] + + arr = xr.DataArray(data, dims=dims, coords={"y": y_vals}) + actual1 = arr.stack(z=dims).groupby("z").first() + midx1 = pd.MultiIndex.from_product([[0, 1], [2, 3]], names=dims) + expected1 = xr.DataArray(data_flat, dims=["z"], coords={"z": midx1}) + xr.testing.assert_equal(actual1, expected1) + + # GH: 3287. Note that y coord values are not in sorted order. + arr = xr.DataArray(data, dims=dims, coords={"y": y_vals[::-1]}) + actual2 = arr.stack(z=dims).groupby("z").first() + midx2 = pd.MultiIndex.from_product([[0, 1], [3, 2]], names=dims) + expected2 = xr.DataArray(data_flat, dims=["z"], coords={"z": midx2}) + xr.testing.assert_equal(actual2, expected2) + def test_virtual_default_coords(self): array = DataArray(np.zeros((5,)), dims="x") expected = DataArray(range(5), dims="x", name="x") From 1ed4f4d6d967d8b9435368444d9af6247748a047 Mon Sep 17 00:00:00 2001 From: Srijan Saurav Date: Fri, 3 Apr 2020 03:05:04 +0530 Subject: [PATCH 09/39] Fix minor code quality issues (#3626) * Fix minor code quality issues Changes: - Use `_` for unused variables - Use `"""` for docstrings - Add `.deepsource.toml` file to run continuous static analysis on repository with DeepSource * Remove deepsource @sauravsrijan best of luck with your project, even if we won't be the first library to take this up Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- xarray/util/print_versions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py index 223b59b9675..32051bb6843 100755 --- a/xarray/util/print_versions.py +++ b/xarray/util/print_versions.py @@ -9,7 +9,7 @@ def get_sys_info(): - "Returns system information as a dict" + """Returns system information as a dict""" blob = [] @@ -22,7 +22,7 @@ def get_sys_info(): stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) - so, serr = pipe.communicate() + so, _ = pipe.communicate() except Exception: pass else: @@ -37,7 +37,7 @@ def get_sys_info(): blob.append(("commit", commit)) try: - (sysname, nodename, release, version, machine, processor) = platform.uname() + (sysname, _nodename, release, _version, machine, processor) = platform.uname() blob.extend( [ ("python", sys.version), From 6bccbff975d59530a8c9cb1979cfcd5c8327254e Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Fri, 3 Apr 2020 15:35:17 -0400 Subject: [PATCH 10/39] Only fail if a specific warning occurs (#3930) --- xarray/tests/test_backends.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 82fe1b38149..b26a3b37fc8 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4334,6 +4334,12 @@ def test_source_encoding_always_present(): assert ds.encoding["source"] == tmp +def _assert_no_dates_out_of_range_warning(record): + undesired_message = "dates out of range" + for warning in record: + assert undesired_message not in str(warning.message) + + @requires_scipy_or_netCDF4 @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) def test_use_cftime_standard_calendar_default_in_range(calendar): @@ -4360,7 +4366,7 @@ def test_use_cftime_standard_calendar_default_in_range(calendar): with open_dataset(tmp_file) as ds: assert_identical(expected_x, ds.x) assert_identical(expected_time, ds.time) - assert not record + _assert_no_dates_out_of_range_warning(record) @requires_cftime @@ -4423,7 +4429,7 @@ def test_use_cftime_true(calendar, units_year): with open_dataset(tmp_file, use_cftime=True) as ds: assert_identical(expected_x, ds.x) assert_identical(expected_time, ds.time) - assert not record + _assert_no_dates_out_of_range_warning(record) @requires_scipy_or_netCDF4 @@ -4452,7 +4458,7 @@ def test_use_cftime_false_standard_calendar_in_range(calendar): with open_dataset(tmp_file, use_cftime=False) as ds: assert_identical(expected_x, ds.x) assert_identical(expected_time, ds.time) - assert not record + _assert_no_dates_out_of_range_warning(record) @requires_scipy_or_netCDF4 From 9b5140e0711247c373987b56726282140b406d7f Mon Sep 17 00:00:00 2001 From: johnomotani Date: Fri, 3 Apr 2020 20:47:07 +0100 Subject: [PATCH 11/39] Add missing_dims argument allowing isel() to ignore missing dimensions (#3923) * Add missing_dims argument allowing isel() to ignore missing dimensions * Add missing_dims to whats-new.rst * Fix typos in TestVariable.test_isel() * Change values for missing_dims argument to {'raise', 'warn', 'ignore'} Matches the possible values used elsewhere for drop_vars arguments. * Add missing_dims argument Dataset.isel() * Mention Dataset.isel in whats-new.rst description of missing_dims --- doc/whats-new.rst | 5 ++++ xarray/core/dataarray.py | 30 +++++++++++++++++++-- xarray/core/dataset.py | 30 ++++++++++++++------- xarray/core/utils.py | 49 ++++++++++++++++++++++++++++++++++ xarray/core/variable.py | 14 ++++++---- xarray/tests/test_dataarray.py | 13 +++++++++ xarray/tests/test_dataset.py | 15 +++++++++++ xarray/tests/test_variable.py | 13 ++++++++- 8 files changed, 151 insertions(+), 18 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b9c81ad3474..46a67f9ae4f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,6 +35,11 @@ New Features :py:func:`combine_by_coords` and :py:func:`combine_nested` using combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`) By `John Omotani `_ +- 'missing_dims' argument to :py:meth:`Dataset.isel`, + `:py:meth:`DataArray.isel` and :py:meth:`Variable.isel` to allow replacing + the exception when a dimension passed to ``isel`` is not present with a + warning, or just ignore the dimension. (:issue:`3866`, :pull:`3923`) + By `John Omotani `_ - Limited the length of array items with long string reprs to a reasonable width (:pull:`3900`) By `Maximilian Roos `_ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b7e0333dcd9..63cba53b689 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1007,25 +1007,51 @@ def isel( self, indexers: Mapping[Hashable, Any] = None, drop: bool = False, + missing_dims: str = "raise", **indexers_kwargs: Any, ) -> "DataArray": """Return a new DataArray whose data is given by integer indexing along the specified dimension(s). + Parameters + ---------- + indexers : dict, optional + A dict with keys matching dimensions and values given + by integers, slice objects or arrays. + indexer can be a integer, slice, array-like or DataArray. + If DataArrays are passed as indexers, xarray-style indexing will be + carried out. See :ref:`indexing` for the details. + One of indexers or indexers_kwargs must be provided. + drop : bool, optional + If ``drop=True``, drop coordinates variables indexed by integers + instead of making them scalar. + missing_dims : {"raise", "warn", "ignore"}, default "raise" + What to do if dimensions that should be selected from are not present in the + DataArray: + - "exception": raise an exception + - "warning": raise a warning, and ignore the missing dimensions + - "ignore": ignore the missing dimensions + **indexers_kwargs : {dim: indexer, ...}, optional + The keyword arguments form of ``indexers``. + See Also -------- Dataset.isel DataArray.sel """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") + if any(is_fancy_indexer(idx) for idx in indexers.values()): - ds = self._to_temp_dataset()._isel_fancy(indexers, drop=drop) + ds = self._to_temp_dataset()._isel_fancy( + indexers, drop=drop, missing_dims=missing_dims + ) return self._from_temp_dataset(ds) # Much faster algorithm for when all indexers are ints, slices, one-dimensional # lists, or zero or one-dimensional np.ndarray's - variable = self._variable.isel(indexers) + variable = self._variable.isel(indexers, missing_dims=missing_dims) coords = {} for coord_name, coord_value in self._coords.items(): diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c515d781db1..97b3caf2b6e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -87,6 +87,7 @@ _check_inplace, _default, decode_numpy_dict_values, + drop_dims_from_indexers, either_dict_or_kwargs, hashable, infix_dims, @@ -1767,7 +1768,7 @@ def maybe_chunk(name, var, chunks): return self._replace(variables) def _validate_indexers( - self, indexers: Mapping[Hashable, Any] + self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise", ) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]: """ Here we make sure + indexer has a valid keys @@ -1777,9 +1778,7 @@ def _validate_indexers( """ from .dataarray import DataArray - invalid = indexers.keys() - self.dims.keys() - if invalid: - raise ValueError("dimensions %r do not exist" % invalid) + indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims) # all indexers should be int, slice, np.ndarrays, or Variable for k, v in indexers.items(): @@ -1875,6 +1874,7 @@ def isel( self, indexers: Mapping[Hashable, Any] = None, drop: bool = False, + missing_dims: str = "raise", **indexers_kwargs: Any, ) -> "Dataset": """Returns a new dataset with each array indexed along the specified @@ -1896,6 +1896,12 @@ def isel( drop : bool, optional If ``drop=True``, drop coordinates variables indexed by integers instead of making them scalar. + missing_dims : {"raise", "warn", "ignore"}, default "raise" + What to do if dimensions that should be selected from are not present in the + Dataset: + - "exception": raise an exception + - "warning": raise a warning, and ignore the missing dimensions + - "ignore": ignore the missing dimensions **indexers_kwargs : {dim: indexer, ...}, optional The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -1918,13 +1924,11 @@ def isel( """ indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") if any(is_fancy_indexer(idx) for idx in indexers.values()): - return self._isel_fancy(indexers, drop=drop) + return self._isel_fancy(indexers, drop=drop, missing_dims=missing_dims) # Much faster algorithm for when all indexers are ints, slices, one-dimensional # lists, or zero or one-dimensional np.ndarray's - invalid = indexers.keys() - self.dims.keys() - if invalid: - raise ValueError("dimensions %r do not exist" % invalid) + indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims) variables = {} dims: Dict[Hashable, Tuple[int, ...]] = {} @@ -1958,10 +1962,16 @@ def isel( file_obj=self._file_obj, ) - def _isel_fancy(self, indexers: Mapping[Hashable, Any], *, drop: bool) -> "Dataset": + def _isel_fancy( + self, + indexers: Mapping[Hashable, Any], + *, + drop: bool, + missing_dims: str = "raise", + ) -> "Dataset": # Note: we need to preserve the original indexers variable in order to merge the # coords below - indexers_list = list(self._validate_indexers(indexers)) + indexers_list = list(self._validate_indexers(indexers, missing_dims)) variables: Dict[Hashable, Variable] = {} indexes: Dict[Hashable, pd.Index] = {} diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 896ee31ab5c..1126cf3037f 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -24,6 +24,7 @@ Sequence, Tuple, TypeVar, + Union, cast, ) @@ -738,6 +739,54 @@ def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable: return new_dim +def drop_dims_from_indexers( + indexers: Mapping[Hashable, Any], + dims: Union[list, Mapping[Hashable, int]], + missing_dims: str, +) -> Mapping[Hashable, Any]: + """ Depending on the setting of missing_dims, drop any dimensions from indexers that + are not present in dims. + + Parameters + ---------- + indexers : dict + dims : sequence + missing_dims : {"raise", "warn", "ignore"} + """ + + if missing_dims == "raise": + invalid = indexers.keys() - set(dims) + if invalid: + raise ValueError( + f"dimensions {invalid} do not exist. Expected one or more of {dims}" + ) + + return indexers + + elif missing_dims == "warn": + + # don't modify input + indexers = dict(indexers) + + invalid = indexers.keys() - set(dims) + if invalid: + warnings.warn( + f"dimensions {invalid} do not exist. Expected one or more of {dims}" + ) + for key in invalid: + indexers.pop(key) + + return indexers + + elif missing_dims == "ignore": + return {key: val for key, val in indexers.items() if key in dims} + + else: + raise ValueError( + f"Unrecognised option {missing_dims} for missing_dims argument" + ) + + # Singleton type, as per https://github.com/python/typing/pull/240 class Default(Enum): token = 0 diff --git a/xarray/core/variable.py b/xarray/core/variable.py index c9addeefb04..68e823ca426 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -28,6 +28,7 @@ OrderedSet, _default, decode_numpy_dict_values, + drop_dims_from_indexers, either_dict_or_kwargs, ensure_us_time_resolution, infix_dims, @@ -1030,6 +1031,7 @@ def _to_dense(self): def isel( self: VariableType, indexers: Mapping[Hashable, Any] = None, + missing_dims: str = "raise", **indexers_kwargs: Any, ) -> VariableType: """Return a new array indexed along the specified dimension(s). @@ -1039,6 +1041,12 @@ def isel( **indexers : {dim: indexer, ...} Keyword arguments with names matching dimensions and values given by integers, slice objects or arrays. + missing_dims : {"raise", "warn", "ignore"}, default "raise" + What to do if dimensions that should be selected from are not present in the + DataArray: + - "exception": raise an exception + - "warning": raise a warning, and ignore the missing dimensions + - "ignore": ignore the missing dimensions Returns ------- @@ -1050,11 +1058,7 @@ def isel( """ indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") - invalid = indexers.keys() - set(self.dims) - if invalid: - raise ValueError( - f"dimensions {invalid} do not exist. Expected one or more of {self.dims}" - ) + indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims) key = tuple(indexers.get(dim, slice(None)) for dim in self.dims) return self[key] diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index ced72d1bc06..cf31182ed30 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -781,6 +781,19 @@ def test_isel(self): assert_identical(self.dv, self.dv.isel(x=slice(None))) assert_identical(self.dv[:3], self.dv.isel(x=slice(3))) assert_identical(self.dv[:3, :5], self.dv.isel(x=slice(3), y=slice(5))) + with raises_regex( + ValueError, + r"dimensions {'not_a_dim'} do not exist. Expected " + r"one or more of \('x', 'y'\)", + ): + self.dv.isel(not_a_dim=0) + with pytest.warns( + UserWarning, + match=r"dimensions {'not_a_dim'} do not exist. " + r"Expected one or more of \('x', 'y'\)", + ): + self.dv.isel(not_a_dim=0, missing_dims="warn") + assert_identical(self.dv, self.dv.isel(not_a_dim=0, missing_dims="ignore")) def test_isel_types(self): # regression test for #1405 diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 237c315583c..a1cb7361e77 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1023,6 +1023,21 @@ def test_isel(self): with pytest.raises(ValueError): data.isel(not_a_dim=slice(0, 2)) + with raises_regex( + ValueError, + r"dimensions {'not_a_dim'} do not exist. Expected " + r"one or more of " + r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*", + ): + data.isel(not_a_dim=slice(0, 2)) + with pytest.warns( + UserWarning, + match=r"dimensions {'not_a_dim'} do not exist. " + r"Expected one or more of " + r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*", + ): + data.isel(not_a_dim=slice(0, 2), missing_dims="warn") + assert_identical(data, data.isel(not_a_dim=slice(0, 2), missing_dims="ignore")) ret = data.isel(dim1=0) assert {"time": 20, "dim2": 9, "dim3": 10} == ret.dims diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 116466e112d..78e3848b8fb 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1254,8 +1254,19 @@ def test_isel(self): assert_identical(v.isel(x=0), v[:, 0]) assert_identical(v.isel(x=[0, 2]), v[:, [0, 2]]) assert_identical(v.isel(time=[]), v[[]]) - with raises_regex(ValueError, "do not exist"): + with raises_regex( + ValueError, + r"dimensions {'not_a_dim'} do not exist. Expected one or more of " + r"\('time', 'x'\)", + ): v.isel(not_a_dim=0) + with pytest.warns( + UserWarning, + match=r"dimensions {'not_a_dim'} do not exist. Expected one or more of " + r"\('time', 'x'\)", + ): + v.isel(not_a_dim=0, missing_dims="warn") + assert_identical(v, v.isel(not_a_dim=0, missing_dims="ignore")) def test_index_0d_numpy_string(self): # regression test to verify our work around for indexing 0d strings From 8a1c933d8c351eae58ef0af2fef74134463d2544 Mon Sep 17 00:00:00 2001 From: keewis Date: Fri, 3 Apr 2020 21:48:18 +0200 Subject: [PATCH 12/39] add a CI that tests xarray with all optional dependencies but dask (#3919) * add a new ci that does not install dask * remove iris since it depends on dask * rename the new CI to py38-all-but-dask * decorate the some of the failing tests with requires_dask * decorate the zarr tests using dask or obj.chunk with requires_dask * don't chunk if dask is not available * pass an indexer instead of a tuple to _arrayize_vectorized_indexer * xfail the remaining tests if dask is not available * update whats-new.rst --- azure-pipelines.yml | 2 ++ ci/requirements/py38-all-but-dask.yml | 44 +++++++++++++++++++++++++++ doc/whats-new.rst | 3 ++ xarray/backends/zarr.py | 8 ++++- xarray/tests/test_backends.py | 18 +++++++++++ xarray/tests/test_sparse.py | 1 + 6 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 ci/requirements/py38-all-but-dask.yml diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ce95fca1ba1..ff85501c555 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,6 +20,8 @@ jobs: conda_env: py37 py38: conda_env: py38 + py38-all-but-dask: + conda_env: py38-all-but-dask py38-upstream-dev: conda_env: py38 upstream_dev: true diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml new file mode 100644 index 00000000000..a375d9e1e5a --- /dev/null +++ b/ci/requirements/py38-all-but-dask.yml @@ -0,0 +1,44 @@ +name: xarray-tests +channels: + - conda-forge +dependencies: + - python=3.8 + - black + - boto3 + - bottleneck + - cartopy + - cdms2 + - cfgrib + - cftime + - coveralls + - flake8 + - h5netcdf + - h5py + - hdf5 + - hypothesis + - isort + - lxml # Optional dep of pydap + - matplotlib + - mypy=0.761 # Must match .pre-commit-config.yaml + - nc-time-axis + - netcdf4 + - numba + - numpy + - pandas + - pint + - pip + - pseudonetcdf + - pydap + - pynio + - pytest + - pytest-cov + - pytest-env + - rasterio + - scipy + - seaborn + - setuptools + - sparse + - toolz + - zarr + - pip: + - numbagg diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 46a67f9ae4f..408863d9a91 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -71,6 +71,9 @@ Internal Changes - Run the ``isort`` pre-commit hook only on python source files and update the ``flake8`` version. (:issue:`3750`, :pull:`3711`) By `Justus Magin `_. +- Add a CI job that runs the tests with every optional dependency + except ``dask``. (:issue:`3794`, :pull:`3919`) + By `Justus Magin `_. .. _whats-new.0.15.1: diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index cdc74e06882..c262dae2811 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -56,7 +56,7 @@ def __getitem__(self, key): return array[key.tuple] elif isinstance(key, indexing.VectorizedIndexer): return array.vindex[ - indexing._arrayize_vectorized_indexer(key.tuple, self.shape).tuple + indexing._arrayize_vectorized_indexer(key, self.shape).tuple ] else: assert isinstance(key, indexing.OuterIndexer) @@ -586,6 +586,12 @@ def open_zarr( "Instead found %s. " % chunks ) + if chunks == "auto": + try: + import dask.array # noqa + except ImportError: + chunks = None + if not decode_cf: mask_and_scale = False decode_times = False diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index b26a3b37fc8..3fde292c04f 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -581,6 +581,10 @@ def test_orthogonal_indexing(self): actual = on_disk.isel(**indexers) assert_identical(expected, actual) + @pytest.mark.xfail( + not has_dask, + reason="the code for indexing without dask handles negative steps in slices incorrectly", + ) def test_vectorized_indexing(self): in_memory = create_test_data() with self.roundtrip(in_memory) as on_disk: @@ -1539,6 +1543,7 @@ def test_roundtrip_consolidated(self): self.check_dtypes_roundtripped(expected, actual) assert_identical(expected, actual) + @requires_dask def test_auto_chunk(self): original = create_test_data().chunk() @@ -1556,6 +1561,7 @@ def test_auto_chunk(self): # chunk size should be the same as original assert v.chunks == original[k].chunks + @requires_dask @pytest.mark.filterwarnings("ignore:Specified Dask chunks") def test_manual_chunk(self): original = create_test_data().chunk({"dim1": 3, "dim2": 4, "dim3": 3}) @@ -1598,6 +1604,7 @@ def test_manual_chunk(self): assert_identical(actual, auto) assert_identical(actual.load(), auto.load()) + @requires_dask def test_warning_on_bad_chunks(self): original = create_test_data().chunk({"dim1": 4, "dim2": 3, "dim3": 5}) @@ -1620,6 +1627,7 @@ def test_warning_on_bad_chunks(self): assert v._in_memory == (k in actual.dims) assert len(record) == 0 + @requires_dask def test_deprecate_auto_chunk(self): original = create_test_data().chunk() with pytest.warns(FutureWarning): @@ -1638,6 +1646,7 @@ def test_deprecate_auto_chunk(self): # there should be no chunks assert v.chunks is None + @requires_dask def test_write_uneven_dask_chunks(self): # regression for GH#2225 original = create_test_data().chunk({"dim1": 3, "dim2": 4, "dim3": 3}) @@ -1662,6 +1671,7 @@ def test_chunk_encoding(self): with self.roundtrip(data) as actual: pass + @requires_dask def test_chunk_encoding_with_dask(self): # These datasets DO have dask chunks. Need to check for various # interactions between dask and zarr chunks @@ -1896,6 +1906,7 @@ def test_append_with_new_variable(self): combined["new_var"] = ds_with_new_var["new_var"] assert_identical(combined, xr.open_zarr(store_target)) + @requires_dask def test_to_zarr_compute_false_roundtrip(self): from dask.delayed import Delayed @@ -1915,6 +1926,7 @@ def test_to_zarr_compute_false_roundtrip(self): with self.open(store) as actual: assert_identical(original, actual) + @requires_dask def test_to_zarr_append_compute_false_roundtrip(self): from dask.delayed import Delayed @@ -1951,6 +1963,7 @@ def test_to_zarr_append_compute_false_roundtrip(self): with self.open(store) as actual: assert_identical(xr.concat([ds, ds_to_append], dim="time"), actual) + @requires_dask def test_encoding_chunksizes(self): # regression test for GH2278 # see also test_encoding_chunksizes_unlimited @@ -3513,6 +3526,7 @@ def test_uamiv_format_read(self): assert_allclose(expected, actual) camxfile.close() + @requires_dask def test_uamiv_format_mfread(self): """ Open a CAMx file and test data variables @@ -3939,6 +3953,9 @@ def test_chunks(self): ex = expected.sel(band=1).mean(dim="x") assert_allclose(ac, ex) + @pytest.mark.xfail( + not has_dask, reason="without dask, a non-serializable lock is used" + ) def test_pickle_rasterio(self): # regression test for https://github.com/pydata/xarray/issues/2121 with create_tmp_geotiff() as (tmp_file, expected): @@ -4012,6 +4029,7 @@ def test_geotiff_tags(self): with xr.open_rasterio(tmp_file) as rioda: assert isinstance(rioda.attrs["AREA_OR_POINT"], str) + @requires_dask def test_no_mftime(self): # rasterio can accept "filename" urguments that are actually urls, # including paths to remote files. diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 09ab1be9af9..f3c09ba6a5f 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -837,6 +837,7 @@ def test_sparse_coords(self): ) +@requires_dask def test_chunk(): s = sparse.COO.from_numpy(np.array([0, 0, 1, 2])) a = DataArray(s) From 19cb99d0a26a66c4da6d21d74c76b3f79d3352d5 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 3 Apr 2020 19:48:54 +0000 Subject: [PATCH 13/39] facetgrid: fix case when vmin == vmax (#3916) * facetgrid: fix case when vmin == vmax Fixes #3734 * minor fix. --- doc/whats-new.rst | 3 ++- xarray/plot/utils.py | 12 ++++++------ xarray/tests/test_plot.py | 15 +++++++++++---- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 408863d9a91..e5a574d692b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -56,7 +56,8 @@ Bug fixes - Fix a regression where deleting a coordinate from a copied :py:class:`DataArray` can affect the original :py:class:`Dataarray`. (:issue:`3899`, :pull:`3871`) By `Todd Jennings `_ - +- Fix ``FacetGrid`` when ``vmin == vmax``. (:issue:`3734`) + By `Deepak Cherian `_ Documentation ~~~~~~~~~~~~~ diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index e6c15037cb8..9fdfcc2c318 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -237,18 +237,14 @@ def _determine_cmap_params( norm.vmin = vmin else: if not vmin_was_none and vmin != norm.vmin: - raise ValueError( - "Cannot supply vmin and a norm" + " with a different vmin." - ) + raise ValueError("Cannot supply vmin and a norm with a different vmin.") vmin = norm.vmin if norm.vmax is None: norm.vmax = vmax else: if not vmax_was_none and vmax != norm.vmax: - raise ValueError( - "Cannot supply vmax and a norm" + " with a different vmax." - ) + raise ValueError("Cannot supply vmax and a norm with a different vmax.") vmax = norm.vmax # if BoundaryNorm, then set levels @@ -275,6 +271,10 @@ def _determine_cmap_params( levels = ticker.tick_values(vmin, vmax) vmin, vmax = levels[0], levels[-1] + # GH3734 + if vmin == vmax: + vmin, vmax = mpl.ticker.LinearLocator(2).tick_values(vmin, vmax) + if extend is None: extend = _determine_extend(calc_data, vmin, vmax) diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 7f3f1620133..46556c14517 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -833,10 +833,10 @@ def test_norm_sets_vmin_vmax(self): for norm, extend in zip( [ - mpl.colors.LogNorm(), - mpl.colors.LogNorm(vmin + 1, vmax - 1), - mpl.colors.LogNorm(None, vmax - 1), - mpl.colors.LogNorm(vmin + 1, None), + mpl.colors.Normalize(), + mpl.colors.Normalize(vmin + 0.1, vmax - 0.1), + mpl.colors.Normalize(None, vmax - 0.1), + mpl.colors.Normalize(vmin + 0.1, None), ], ["neither", "both", "max", "min"], ): @@ -1752,6 +1752,13 @@ def test_can_set_vmin_vmax(self): clim = np.array(image.get_clim()) assert np.allclose(expected, clim) + @pytest.mark.slow + def test_vmin_vmax_equal(self): + # regression test for GH3734 + fg = self.g.map_dataarray(xplt.imshow, "x", "y", vmin=50, vmax=50) + for mappable in fg._mappables: + assert mappable.norm.vmin != mappable.norm.vmax + @pytest.mark.slow @pytest.mark.filterwarnings("ignore") def test_can_set_norm(self): From 0181aa5741ce376a9dad45750754f3cf1ed41dde Mon Sep 17 00:00:00 2001 From: Marek Jacob Date: Sat, 4 Apr 2020 01:19:22 +0200 Subject: [PATCH 14/39] Allow plotting bool data (#3766) * Allow plotting bool data Fixes #3722 . > matplotlib can plot `bool` values so we should add that to the check in `_ensure_plottable`. * Add tests + raise nicer error when asked to plot unsupported types * Add whats-new Co-authored-by: dcherian --- doc/whats-new.rst | 3 ++- xarray/plot/plot.py | 2 +- xarray/plot/utils.py | 8 ++++---- xarray/tests/test_plot.py | 13 +++++++++++++ 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e5a574d692b..512cd40defb 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -46,7 +46,8 @@ New Features - Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:issue:`60`, :pull:`3871`) By `Todd Jennings `_ - +- Allow plotting of boolean arrays. (:pull:`3766`) + By `Marek Jacob `_ Bug fixes ~~~~~~~~~ diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 302cac05b05..71a05627692 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -689,7 +689,7 @@ def newplotfunc( xplt, xlab_extra = _resolve_intervals_2dplot(xval, plotfunc.__name__) yplt, ylab_extra = _resolve_intervals_2dplot(yval, plotfunc.__name__) - _ensure_plottable(xplt, yplt) + _ensure_plottable(xplt, yplt, zval) cmap_params, cbar_kwargs = _process_cmap_cbar_kwargs( plotfunc, zval.data, **locals() diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 9fdfcc2c318..2734e446728 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -534,7 +534,7 @@ def _ensure_plottable(*args): Raise exception if there is anything in args that can't be plotted on an axis by matplotlib. """ - numpy_types = [np.floating, np.integer, np.timedelta64, np.datetime64] + numpy_types = [np.floating, np.integer, np.timedelta64, np.datetime64, np.bool_] other_types = [datetime] try: import cftime @@ -549,10 +549,10 @@ def _ensure_plottable(*args): or _valid_other_type(np.array(x), other_types) ): raise TypeError( - "Plotting requires coordinates to be numeric " - "or dates of type np.datetime64, " + "Plotting requires coordinates to be numeric, boolean, " + "or dates of type numpy.datetime64, " "datetime.datetime, cftime.datetime or " - "pd.Interval." + f"pandas.Interval. Received data of type {np.array(x).dtype} instead." ) if ( _valid_other_type(np.array(x), cftime_datetime) diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 46556c14517..02eaf09b73e 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -139,6 +139,12 @@ def test1d(self): with raises_regex(ValueError, "None"): self.darray[:, 0, 0].plot(x="dim_1") + with raises_regex(TypeError, "complex128"): + (self.darray[:, 0, 0] + 1j).plot() + + def test_1d_bool(self): + xr.ones_like(self.darray[:, 0, 0], dtype=np.bool).plot() + def test_1d_x_y_kw(self): z = np.arange(10) da = DataArray(np.cos(z), dims=["z"], coords=[z], name="f") @@ -989,6 +995,13 @@ def test_1d_raises_valueerror(self): with raises_regex(ValueError, r"DataArray must be 2d"): self.plotfunc(self.darray[0, :]) + def test_bool(self): + xr.ones_like(self.darray, dtype=np.bool).plot() + + def test_complex_raises_typeerror(self): + with raises_regex(TypeError, "complex128"): + (self.darray + 1j).plot() + def test_3d_raises_valueerror(self): a = DataArray(easy_array((2, 3, 4))) if self.plotfunc.__name__ == "imshow": From 8d280cd7b1d80567cfdc6ae55165c522a5d4c2ce Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Sat, 4 Apr 2020 18:57:19 +0100 Subject: [PATCH 15/39] Bugfix for plotting transposed 2d coords (#3934) * always transpose y to match x * test based on issue example * updated what's new * isort --- doc/whats-new.rst | 3 +++ xarray/plot/plot.py | 1 + xarray/tests/test_plot.py | 11 +++++++++++ 3 files changed, 15 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 512cd40defb..5e890022e36 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -59,6 +59,9 @@ Bug fixes By `Todd Jennings `_ - Fix ``FacetGrid`` when ``vmin == vmax``. (:issue:`3734`) By `Deepak Cherian `_ +- Fix bug where plotting line plots with 2D coordinates depended on dimension + order. (:issue:`3933`) + By `Tom Nicholas `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 71a05627692..0fe302833d4 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -93,6 +93,7 @@ def _infer_line_data(darray, x, y, hue): otherindex = 1 if darray.dims.index(huename) == 0 else 0 otherdim = darray.dims[otherindex] xplt = darray.transpose(otherdim, huename, transpose_coords=False) + yplt = yplt.transpose(otherdim, huename, transpose_coords=False) else: raise ValueError( "For 2D inputs, hue must be a dimension" diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 02eaf09b73e..6482aa2400c 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -257,6 +257,17 @@ def test_2d_coords_line_plot(self): with pytest.raises(ValueError, match="For 2D inputs, hue must be a dimension"): da.plot.line(x="lon", hue="lat") + def test_2d_coord_line_plot_coords_transpose_invariant(self): + # checks for bug reported in GH #3933 + x = np.arange(10) + y = np.arange(20) + ds = xr.Dataset(coords={"x": x, "y": y}) + + for z in [ds.y + ds.x, ds.x + ds.y]: + ds = ds.assign_coords(z=z) + ds["v"] = ds.x + ds.y + ds["v"].plot.line(y="z", hue="x") + def test_2d_before_squeeze(self): a = DataArray(easy_array((1, 5))) a.plot() From 782e0e2a835b8f3302d3114bd9d45f6e549b9b5f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 5 Apr 2020 13:41:24 +0000 Subject: [PATCH 16/39] Use divergent colormap if lowest and highest level span 0 (#3913) * Use divergent colormap if lowest and highest level span 0 Fixes #3524 * sort levels by default. * better levels check --- doc/whats-new.rst | 2 ++ xarray/plot/utils.py | 10 +++++++++- xarray/tests/test_plot.py | 6 ++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 5e890022e36..dac63600679 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -57,6 +57,8 @@ Bug fixes - Fix a regression where deleting a coordinate from a copied :py:class:`DataArray` can affect the original :py:class:`Dataarray`. (:issue:`3899`, :pull:`3871`) By `Todd Jennings `_ +- Use divergent colormap if ``levels`` spans 0. (:issue:`3524`) + By `Deepak Cherian `_ - Fix ``FacetGrid`` when ``vmin == vmax``. (:issue:`3734`) By `Deepak Cherian `_ - Fix bug where plotting line plots with 2D coordinates depended on dimension diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 2734e446728..a756a1c1fc8 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -169,6 +169,9 @@ def _determine_cmap_params( """ import matplotlib as mpl + if isinstance(levels, Iterable): + levels = sorted(levels) + calc_data = np.ravel(plot_data[np.isfinite(plot_data)]) # Handle all-NaN input data gracefully @@ -216,8 +219,13 @@ def _determine_cmap_params( vlim = abs(vmax - center) if possibly_divergent: + levels_are_divergent = ( + isinstance(levels, Iterable) and levels[0] * levels[-1] < 0 + ) # kwargs not specific about divergent or not: infer defaults from data - divergent = ((vmin < 0) and (vmax > 0)) or not center_is_none + divergent = ( + ((vmin < 0) and (vmax > 0)) or not center_is_none or levels_are_divergent + ) else: divergent = False diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 6482aa2400c..d0a8a02a3a4 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -844,6 +844,12 @@ def test_divergentcontrol(self): assert cmap_params["vmax"] == 0.6 assert cmap_params["cmap"] == "viridis" + # regression test for GH3524 + # infer diverging colormap from divergent levels + cmap_params = _determine_cmap_params(pos, levels=[-0.1, 0, 1]) + # specifying levels makes cmap a Colormap object + assert cmap_params["cmap"].name == "RdBu_r" + def test_norm_sets_vmin_vmax(self): vmin = self.data.min() vmax = self.data.max() From ba9f82227a9ec0c42928d5d0b978b101eda756cb Mon Sep 17 00:00:00 2001 From: Hossein Madadi Date: Mon, 6 Apr 2020 00:22:27 +0430 Subject: [PATCH 17/39] Modify code in "Grouped arithmetic" (#3904) `results.append(group - alt.sel(x=label))` changed to: `results.append(group - alt.sel(letters=label))` --- doc/groupby.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/groupby.rst b/doc/groupby.rst index 927e192eb6c..223185bd0d5 100644 --- a/doc/groupby.rst +++ b/doc/groupby.rst @@ -157,7 +157,7 @@ This last line is roughly equivalent to the following:: results = [] for label, group in ds.groupby('letters'): - results.append(group - alt.sel(x=label)) + results.append(group - alt.sel(letters=label)) xr.concat(results, dim='x') Squeezing From 604835603c83618dbe101331813cc6ae428d8be1 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 6 Apr 2020 10:02:10 -0400 Subject: [PATCH 18/39] Add a days_in_month accessor to CFTimeIndex (#3935) * Add days_in_month accessor to CFTimeIndex * Add pull request number * Add version-dependent skip for test * Fix pull request number * Strip outputs from notebook; add documentation update note * typo --- doc/examples/monthly-means.ipynb | 122 +++++-------------------------- doc/weather-climate.rst | 3 +- doc/whats-new.rst | 12 ++- xarray/coding/cftimeindex.py | 3 + xarray/tests/__init__.py | 1 + xarray/tests/test_cftimeindex.py | 9 ++- 6 files changed, 43 insertions(+), 107 deletions(-) diff --git a/doc/examples/monthly-means.ipynb b/doc/examples/monthly-means.ipynb index fad40e019de..bc88f4a9fc9 100644 --- a/doc/examples/monthly-means.ipynb +++ b/doc/examples/monthly-means.ipynb @@ -29,89 +29,9 @@ "import numpy as np\n", "import pandas as pd\n", "import xarray as xr\n", - "from netCDF4 import num2date\n", "import matplotlib.pyplot as plt " ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Some calendar information so we can support any netCDF calendar. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:51:35.991620Z", - "start_time": "2018-11-28T20:51:35.960336Z" - } - }, - "outputs": [], - "source": [ - "dpm = {'noleap': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " '365_day': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " 'standard': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " 'gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " 'proleptic_gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " 'all_leap': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " '366_day': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " '360_day': [0, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30]} " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### A few calendar functions to determine the number of days in each month\n", - "If you were just using the standard calendar, it would be easy to use the `calendar.month_range` function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:51:36.015151Z", - "start_time": "2018-11-28T20:51:35.994079Z" - } - }, - "outputs": [], - "source": [ - "def leap_year(year, calendar='standard'):\n", - " \"\"\"Determine if year is a leap year\"\"\"\n", - " leap = False\n", - " if ((calendar in ['standard', 'gregorian',\n", - " 'proleptic_gregorian', 'julian']) and\n", - " (year % 4 == 0)):\n", - " leap = True\n", - " if ((calendar == 'proleptic_gregorian') and\n", - " (year % 100 == 0) and\n", - " (year % 400 != 0)):\n", - " leap = False\n", - " elif ((calendar in ['standard', 'gregorian']) and\n", - " (year % 100 == 0) and (year % 400 != 0) and\n", - " (year < 1583)):\n", - " leap = False\n", - " return leap\n", - "\n", - "def get_dpm(time, calendar='standard'):\n", - " \"\"\"\n", - " return a array of days per month corresponding to the months provided in `months`\n", - " \"\"\"\n", - " month_length = np.zeros(len(time), dtype=np.int)\n", - " \n", - " cal_days = dpm[calendar]\n", - " \n", - " for i, (month, year) in enumerate(zip(time.month, time.year)):\n", - " month_length[i] = cal_days[month]\n", - " if leap_year(year, calendar=calendar) and month == 2:\n", - " month_length[i] += 1\n", - " return month_length" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -131,7 +51,7 @@ "outputs": [], "source": [ "ds = xr.tutorial.open_dataset('rasm').load()\n", - "print(ds)" + "ds" ] }, { @@ -143,7 +63,17 @@ "- calculate the month lengths for each monthly data record\n", "- calculate weights using `groupby('time.season')`\n", "\n", - "Finally, we just need to multiply our weights by the `Dataset` and sum allong the time dimension. " + "Finally, we just need to multiply our weights by the `Dataset` and sum allong the time dimension. Creating a `DataArray` for the month length is as easy as using the `days_in_month` accessor on the time coordinate. The calendar type, in this case `'noleap'`, is automatically considered in this operation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "month_length = ds.time.dt.days_in_month\n", + "month_length" ] }, { @@ -157,13 +87,8 @@ }, "outputs": [], "source": [ - "# Make a DataArray with the number of days in each month, size = len(time)\n", - "month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar='noleap'),\n", - " coords=[ds.time], name='month_length')\n", - "\n", "# Calculate the weights by grouping by 'time.season'.\n", - "# Conversion to float type ('astype(float)') only necessary for Python 2.x\n", - "weights = month_length.groupby('time.season') / month_length.astype(float).groupby('time.season').sum()\n", + "weights = month_length.groupby('time.season') / month_length.groupby('time.season').sum()\n", "\n", "# Test that the sum of the weights for each season is 1.0\n", "np.testing.assert_allclose(weights.groupby('time.season').sum().values, np.ones(4))\n", @@ -183,7 +108,7 @@ }, "outputs": [], "source": [ - "print(ds_weighted)" + "ds_weighted" ] }, { @@ -262,13 +187,9 @@ "source": [ "# Wrap it into a simple function\n", "def season_mean(ds, calendar='standard'):\n", - " # Make a DataArray of season/year groups\n", - " year_season = xr.DataArray(ds.time.to_index().to_period(freq='Q-NOV').to_timestamp(how='E'),\n", - " coords=[ds.time], name='year_season')\n", - "\n", " # Make a DataArray with the number of days in each month, size = len(time)\n", - " month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar=calendar),\n", - " coords=[ds.time], name='month_length')\n", + " month_length = ds.time.dt.days_in_month\n", + "\n", " # Calculate the weights by grouping by 'time.season'\n", " weights = month_length.groupby('time.season') / month_length.groupby('time.season').sum()\n", "\n", @@ -278,13 +199,6 @@ " # Calculate the weighted average\n", " return (ds * weights).groupby('time.season').sum(dim='time')" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -304,7 +218,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.7.3" }, "toc": { "base_numbering": 1, @@ -321,5 +235,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst index 9e7c0f1d51d..768cf6556f9 100644 --- a/doc/weather-climate.rst +++ b/doc/weather-climate.rst @@ -95,7 +95,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: - Access of basic datetime components via the ``dt`` accessor (in this case just "year", "month", "day", "hour", "minute", "second", "microsecond", - "season", "dayofyear", and "dayofweek"): + "season", "dayofyear", "dayofweek", and "days_in_month"): .. ipython:: python @@ -104,6 +104,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: da.time.dt.season da.time.dt.dayofyear da.time.dt.dayofweek + da.time.dt.days_in_month - Rounding of datetimes to fixed frequencies via the ``dt`` accessor: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index dac63600679..f8db5865852 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -48,6 +48,13 @@ New Features By `Todd Jennings `_ - Allow plotting of boolean arrays. (:pull:`3766`) By `Marek Jacob `_ +- A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to + the ``days_in_month`` accessor for a :py:class:`pandas.DatetimeIndex`, which + returns the days in the month each datetime in the index. Now days in month + weights for both standard and non-standard calendars can be obtained using + the :py:class:`~core.accessor_dt.DatetimeAccessor` (:pull:`3935`). This + feature requires cftime version 1.1.0 or greater. By + `Spencer Clark `_. Bug fixes ~~~~~~~~~ @@ -71,7 +78,10 @@ Documentation :py:meth:`DataArray.diff` so it does document the ``dim`` parameter as required. (:issue:`1040`, :pull:`3909`) By `Justus Magin `_. - +- Updated :doc:`Calculating Seasonal Averages from Timeseries of Monthly Means + ` example notebook to take advantage of the new + ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex` + (:pull:`3935`). By `Spencer Clark `_. Internal Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 2e42702caac..6fc28d213dd 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -243,6 +243,9 @@ class CFTimeIndex(pd.Index): "dayofyr", "The ordinal day of year of the datetime", "1.0.2.1" ) dayofweek = _field_accessor("dayofwk", "The day of week of the datetime", "1.0.2.1") + days_in_month = _field_accessor( + "daysinmonth", "The number of days in the month of the datetime", "1.1.0.0" + ) date_type = property(get_date_type) def __new__(cls, data, name=None): diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index df86b5715e9..40c5cfa267c 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -62,6 +62,7 @@ def LooseVersion(vstring): has_pynio, requires_pynio = _importorskip("Nio") has_pseudonetcdf, requires_pseudonetcdf = _importorskip("PseudoNetCDF") has_cftime, requires_cftime = _importorskip("cftime") +has_cftime_1_1_0, requires_cftime_1_1_0 = _importorskip("cftime", minversion="1.1.0.0") has_dask, requires_dask = _importorskip("dask") has_bottleneck, requires_bottleneck = _importorskip("bottleneck") has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis") diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index d31bf9471ea..b30e32c92ad 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -15,7 +15,7 @@ ) from xarray.tests import assert_array_equal, assert_identical -from . import raises_regex, requires_cftime +from . import raises_regex, requires_cftime, requires_cftime_1_1_0 from .test_coding_times import ( _ALL_CALENDARS, _NON_STANDARD_CALENDARS, @@ -229,6 +229,13 @@ def test_cftimeindex_dayofweek_accessor(index): assert_array_equal(result, expected) +@requires_cftime_1_1_0 +def test_cftimeindex_days_in_month_accessor(index): + result = index.days_in_month + expected = [date.daysinmonth for date in index] + assert_array_equal(result, expected) + + @requires_cftime @pytest.mark.parametrize( ("string", "date_args", "reso"), From c97e7998b69c5b41e863dfb0a43450491fb8de45 Mon Sep 17 00:00:00 2001 From: dmey Date: Tue, 7 Apr 2020 16:33:45 +0200 Subject: [PATCH 19/39] typo (#3947) --- doc/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/io.rst b/doc/io.rst index 6064aa3568a..0c666099df8 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -281,7 +281,7 @@ library:: # glob expands paths with * to a list of files, like the unix shell paths = sorted(glob(files)) datasets = [xr.open_dataset(p) for p in paths] - combined = xr.concat(dataset, dim) + combined = xr.concat(datasets, dim) return combined combined = read_netcdfs('/all/my/files/*.nc', dim='time') From f07adb293e67ae01d305fd1c8fb42f5bad2238e7 Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Tue, 7 Apr 2020 20:28:24 +0100 Subject: [PATCH 20/39] Updated list of core developers (#3943) * updated core dev list * what's new entry --- doc/roadmap.rst | 5 +++++ doc/whats-new.rst | 2 ++ 2 files changed, 7 insertions(+) diff --git a/doc/roadmap.rst b/doc/roadmap.rst index e336b35b2bc..401dac779ad 100644 --- a/doc/roadmap.rst +++ b/doc/roadmap.rst @@ -219,6 +219,11 @@ Current core developers - Fabien Maussion - Keisuke Fujii - Maximilian Roos +- Deepak Cherian +- Spencer Clark +- Tom Nicholas +- Guido Imperiale +- Justus Magin NumFOCUS ~~~~~~~~ diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f8db5865852..4d9d78cf83e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -82,6 +82,8 @@ Documentation ` example notebook to take advantage of the new ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex` (:pull:`3935`). By `Spencer Clark `_. +- Updated the list of current core developers. (:issue:`3892`) + By `Tom Nicholas `_. Internal Changes ~~~~~~~~~~~~~~~~ From 1eedc5c146d9e6ebd46ab2cc8b271e51b3a25959 Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Wed, 8 Apr 2020 11:19:10 +0900 Subject: [PATCH 21/39] Fix wrong order of coordinate converted from pd.series with MultiIndex (#3953) * Fix 3951 * lint * black * Updata whatsnew --- doc/whats-new.rst | 2 ++ xarray/core/dataset.py | 3 +-- xarray/core/indexes.py | 8 ++++++-- xarray/tests/test_dataarray.py | 12 ++++++++++-- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4d9d78cf83e..1d00cb369d1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -58,6 +58,8 @@ New Features Bug fixes ~~~~~~~~~ +- Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`) + By `Keisuke Fujii `_. - Fix renaming of coords when one or more stacked coords is not in sorted order during stack+groupby+apply operations. (:issue:`3287`, :pull:`3906`) By `Spencer Hill `_ diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 97b3caf2b6e..dd1e31cc61a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4604,8 +4604,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas if not dataframe.columns.is_unique: raise ValueError("cannot convert DataFrame with non-unique columns") - idx = remove_unused_levels_categories(dataframe.index) - dataframe = dataframe.set_index(idx) + idx, dataframe = remove_unused_levels_categories(dataframe.index, dataframe) obj = cls() if isinstance(idx, pd.MultiIndex): diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index dea1767d50c..a4a5fa2c466 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -9,7 +9,7 @@ from .variable import Variable -def remove_unused_levels_categories(index): +def remove_unused_levels_categories(index, dataframe=None): """ Remove unused levels from MultiIndex and unused categories from CategoricalIndex """ @@ -28,7 +28,11 @@ def remove_unused_levels_categories(index): index = pd.MultiIndex.from_arrays(levels, names=index.names) elif isinstance(index, pd.CategoricalIndex): index = index.remove_unused_categories() - return index + + if dataframe is None: + return index + dataframe = dataframe.set_index(index) + return dataframe.index, dataframe class Indexes(collections.abc.Mapping): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index cf31182ed30..c3e5aafabfe 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3515,6 +3515,14 @@ def test_to_and_from_series(self): expected_da, DataArray.from_series(actual).drop_vars(["x", "y"]) ) + def test_from_series_multiindex(self): + # GH:3951 + df = pd.DataFrame({"B": [1, 2, 3], "A": [4, 5, 6]}) + df = df.rename_axis("num").rename_axis("alpha", axis=1) + actual = df.stack("alpha").to_xarray() + assert (actual.sel(alpha="B") == [1, 2, 3]).all() + assert (actual.sel(alpha="A") == [4, 5, 6]).all() + @requires_sparse def test_from_series_sparse(self): import sparse @@ -4524,7 +4532,7 @@ def test_argmax(self, x, minindex, maxindex, nanindex): def test_idxmin(self, x, minindex, maxindex, nanindex): ar0 = xr.DataArray( - x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs, + x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs ) # dim doesn't exist @@ -4620,7 +4628,7 @@ def test_idxmin(self, x, minindex, maxindex, nanindex): def test_idxmax(self, x, minindex, maxindex, nanindex): ar0 = xr.DataArray( - x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs, + x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs ) # dim doesn't exist From e4f7900886e0be7fbe16a86d38604ce764008dda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Ponte?= Date: Thu, 9 Apr 2020 08:26:56 +0200 Subject: [PATCH 22/39] implement a more threadsafe call to colorbar (#3944) Co-authored-by: Aurelien Ponte --- xarray/plot/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index a756a1c1fc8..fb744e43fc3 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -581,14 +581,15 @@ def _is_numeric(arr): def _add_colorbar(primitive, ax, cbar_ax, cbar_kwargs, cmap_params): - plt = import_matplotlib_pyplot() + cbar_kwargs.setdefault("extend", cmap_params["extend"]) if cbar_ax is None: cbar_kwargs.setdefault("ax", ax) else: cbar_kwargs.setdefault("cax", cbar_ax) - cbar = plt.colorbar(primitive, **cbar_kwargs) + fig = ax.get_figure() + cbar = fig.colorbar(primitive, **cbar_kwargs) return cbar From 7a037d5030881c7b52101515863b7fab56a739fc Mon Sep 17 00:00:00 2001 From: Etienne Combrisson Date: Fri, 10 Apr 2020 15:17:12 +0200 Subject: [PATCH 23/39] New coords to existing dim (doc) (#3958) * new coords to existing dim (doc) * Update xarray/core/common.py Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> * rm spaces * update whatsnew * swap issue and PR * Update xarray/core/common.py add space Co-Authored-By: keewis * Update xarray/core/common.py Co-Authored-By: keewis * Update xarray/core/common.py Co-Authored-By: keewis Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Co-authored-by: keewis --- doc/whats-new.rst | 4 ++++ xarray/core/common.py | 29 +++++++++++++++++++++++------ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1d00cb369d1..9ee350e084d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -76,6 +76,10 @@ Bug fixes Documentation ~~~~~~~~~~~~~ +- update the docstring of :py:meth:`DataArray.assign_coords` : clarify how to + add a new coordinate to an existing dimension and illustrative example + (:issue:`3952`, :pull:`3958`) By + `Etienne Combrisson `_. - update the docstring of :py:meth:`Dataset.diff` and :py:meth:`DataArray.diff` so it does document the ``dim`` parameter as required. (:issue:`1040`, :pull:`3909`) diff --git a/xarray/core/common.py b/xarray/core/common.py index a003642076f..e8c43e964aa 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -399,10 +399,14 @@ def assign_coords(self, coords=None, **coords_kwargs): Parameters ---------- coords : dict, optional - A dict with keys which are variables names. If the values are - callable, they are computed on this object and assigned to new - coordinate variables. If the values are not callable, - (e.g. a ``DataArray``, scalar, or array), they are simply assigned. + A dict where the keys are the names of the coordinates + with the new values to assign. If the values are callable, they are + computed on this object and assigned to new coordinate variables. + If the values are not callable, (e.g. a ``DataArray``, scalar, or + array), they are simply assigned. A new coordinate can also be + defined and attached to an existing dimension using a tuple with + the first element the dimension name and the second element the + values for this new coordinate. **coords_kwargs : keyword, value pairs, optional The keyword arguments form of ``coords``. @@ -440,10 +444,23 @@ def assign_coords(self, coords=None, **coords_kwargs): Coordinates: * lon (lon) int64 -2 -1 0 1 + New coordinate can also be attached to an existing dimension: + + >>> lon_2 = np.array([300, 289, 0, 1]) + >>> da.assign_coords(lon_2=('lon', lon_2)) + + array([0.28298 , 0.667347, 0.657938, 0.177683]) + Coordinates: + * lon (lon) int64 358 359 0 1 + lon_2 (lon) int64 300 289 0 1 + + Note that the same result can also be obtained with a dict e.g. + >>> _ = da.assign_coords({"lon_2": ('lon', lon_2)}) + Notes ----- - Since ``coords_kwargs`` is a dictionary, the order of your arguments may - not be preserved, and so the order of the new variables is not well + Since ``coords_kwargs`` is a dictionary, the order of your arguments + may not be preserved, and so the order of the new variables is not well defined. Assigning multiple variables within the same ``assign_coords`` is possible, but you cannot reference other variables created within the same ``assign_coords`` call. From c4e88829a644d6f6d4828069a06bbefaaefd5984 Mon Sep 17 00:00:00 2001 From: Etienne Combrisson Date: Fri, 10 Apr 2020 17:20:34 +0200 Subject: [PATCH 24/39] Empty line missing for DataArray.assign_coords doc (#3963) --- xarray/core/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/common.py b/xarray/core/common.py index e8c43e964aa..8f6d57e9f12 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -455,6 +455,7 @@ def assign_coords(self, coords=None, **coords_kwargs): lon_2 (lon) int64 300 289 0 1 Note that the same result can also be obtained with a dict e.g. + >>> _ = da.assign_coords({"lon_2": ('lon', lon_2)}) Notes From 745658b449439e7f10077a36eb1477100100e356 Mon Sep 17 00:00:00 2001 From: Taher Chegini Date: Fri, 10 Apr 2020 22:34:58 -0500 Subject: [PATCH 25/39] RasterioDeprecationWarning (#3964) * RasterioDeprecationWarning When a `vrt` is opened, a deprecation warning will be shown: ```python /home/taher/.local/apps/miniconda/envs/hydrodata/lib/python3.8/site-packages/xarray/backends/rasterio_.py:241: RasterioDeprecationWarning: dst_nodata will be removed in 1.1, use nodata riods = WarpedVRT(riods, **vrt_params) /home/taher/.local/apps/miniconda/envs/hydrodata/lib/python3.8/site-packages/xarray/backends/rasterio_.py:35: RasterioDeprecationWarning: dst_nodata will be removed in 1.1, use nodata riods = WarpedVRT(riods, **vrt_params) /home/taher/.local/apps/miniconda/envs/hydrodata/lib/python3.8/site-packages/xarray/backends/rasterio_.py:122: RasterioDeprecationWarning: dst_nodata will be removed in 1.1, use nodata riods = WarpedVRT(riods, **self.vrt_params) ``` This change can be confirmed in `rasterio`'s documentation where `dst_nodata` is replaced with nodata. https://rasterio.readthedocs.io/en/latest/api/rasterio.vrt.html * added changes in #3964 * Update whats-new.rst --- doc/whats-new.rst | 2 ++ xarray/backends/rasterio_.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9ee350e084d..2838e0b8e80 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -73,6 +73,8 @@ Bug fixes - Fix bug where plotting line plots with 2D coordinates depended on dimension order. (:issue:`3933`) By `Tom Nicholas `_. +- Fix ``RasterioDeprecationWarning`` when using a ``vrt`` in ``open_rasterio``. (:issue:`3964`) + By `Taher Chegini `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py index d041e430db9..77beffd09b1 100644 --- a/xarray/backends/rasterio_.py +++ b/xarray/backends/rasterio_.py @@ -224,7 +224,7 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc crs=vrt.crs.to_string(), resampling=vrt.resampling, src_nodata=vrt.src_nodata, - dst_nodata=vrt.dst_nodata, + nodata=vrt.nodata, tolerance=vrt.tolerance, transform=vrt.transform, width=vrt.width, From 2c77eb531b6689f9f1d2adbde0d8bf852f1f7362 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 11 Apr 2020 10:11:51 -0600 Subject: [PATCH 26/39] facetgrid: Ensure that colormap params are only determined once. (#3915) * facetgrid: Ensure that colormap params are only determined once. Fixes #3569 * blacken * Add test * update whats-new --- doc/whats-new.rst | 4 +++- xarray/plot/facetgrid.py | 4 +++- xarray/plot/plot.py | 5 ++++- xarray/plot/utils.py | 10 +++++++++- xarray/tests/test_plot.py | 12 ++++++++++++ 5 files changed, 31 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2838e0b8e80..e4c3a4d533f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -66,9 +66,11 @@ Bug fixes - Fix a regression where deleting a coordinate from a copied :py:class:`DataArray` can affect the original :py:class:`Dataarray`. (:issue:`3899`, :pull:`3871`) By `Todd Jennings `_ +- Fix :py:class:`~xarray.plot.FacetGrid` plots with a single contour. (:issue:`3569`, :pull:`3915`). + By `Deepak Cherian `_ - Use divergent colormap if ``levels`` spans 0. (:issue:`3524`) By `Deepak Cherian `_ -- Fix ``FacetGrid`` when ``vmin == vmax``. (:issue:`3734`) +- Fix :py:class:`~xarray.plot.FacetGrid` when ``vmin == vmax``. (:issue:`3734`) By `Deepak Cherian `_ - Fix bug where plotting line plots with 2D coordinates depended on dimension order. (:issue:`3933`) diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py index 4f3268c1203..819eded694e 100644 --- a/xarray/plot/facetgrid.py +++ b/xarray/plot/facetgrid.py @@ -273,7 +273,9 @@ def map_dataarray(self, func, x, y, **kwargs): # None is the sentinel value if d is not None: subset = self.data.loc[d] - mappable = func(subset, x=x, y=y, ax=ax, **func_kwargs) + mappable = func( + subset, x=x, y=y, ax=ax, **func_kwargs, _is_facetgrid=True + ) self._mappables.append(mappable) self._finalize_grid(x, y) diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 0fe302833d4..4657bee9415 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -693,7 +693,10 @@ def newplotfunc( _ensure_plottable(xplt, yplt, zval) cmap_params, cbar_kwargs = _process_cmap_cbar_kwargs( - plotfunc, zval.data, **locals() + plotfunc, + zval.data, + **locals(), + _is_facetgrid=kwargs.pop("_is_facetgrid", False), ) if "contour" in plotfunc.__name__: diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index fb744e43fc3..c3512828888 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -153,6 +153,7 @@ def _determine_cmap_params( levels=None, filled=True, norm=None, + _is_facetgrid=False, ): """ Use some heuristics to set good defaults for colorbar and range. @@ -736,6 +737,7 @@ def _process_cmap_cbar_kwargs( colors=None, cbar_kwargs: Union[Iterable[Tuple[str, Any]], Mapping[str, Any]] = None, levels=None, + _is_facetgrid=False, **kwargs, ): """ @@ -782,6 +784,12 @@ def _process_cmap_cbar_kwargs( cmap_args = getfullargspec(_determine_cmap_params).args cmap_kwargs.update((a, kwargs[a]) for a in cmap_args if a in kwargs) - cmap_params = _determine_cmap_params(**cmap_kwargs) + if not _is_facetgrid: + cmap_params = _determine_cmap_params(**cmap_kwargs) + else: + cmap_params = { + k: cmap_kwargs[k] + for k in ["vmin", "vmax", "cmap", "extend", "levels", "norm"] + } return cmap_params, cbar_kwargs diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index d0a8a02a3a4..bf1f9ed60bb 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -2319,3 +2319,15 @@ def test_plot_transposes_properly(plotfunc): # pcolormesh returns 1D array but imshow returns a 2D array so it is necessary # to ravel() on the LHS assert np.all(hdl.get_array().ravel() == da.to_masked_array().ravel()) + + +@requires_matplotlib +def test_facetgrid_single_contour(): + # regression test for GH3569 + x, y = np.meshgrid(np.arange(12), np.arange(12)) + z = xr.DataArray(np.sqrt(x ** 2 + y ** 2)) + z2 = xr.DataArray(np.sqrt(x ** 2 + y ** 2) + 1) + ds = xr.concat([z, z2], dim="time") + ds["time"] = [0, 1] + + ds.plot.contour(col="time", levels=[4], colors=["k"]) From cb3326e0b9d80efdf099013d3d9664a6797d74af Mon Sep 17 00:00:00 2001 From: keewis Date: Fri, 17 Apr 2020 07:59:13 +0200 Subject: [PATCH 27/39] actually use preformatted text in the details summary (#3978) --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 37dbcd2ebb0..c712cf27979 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -27,7 +27,7 @@ assignees: '' #### Versions -
Output of `xr.show_versions()` +
Output of xr.show_versions() From 2c92f69ab208c02344b8d5e5edef3e71e3495d3b Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Fri, 17 Apr 2020 21:16:26 +0100 Subject: [PATCH 28/39] keep attrs in interpolate_na (#3970) * interp_na now preserves attrs * added test * Added keep_attrs kwarg to interpolate_na * updated what's new * changed default to true --- doc/whats-new.rst | 4 ++++ xarray/core/dataarray.py | 6 ++++++ xarray/core/missing.py | 12 +++++++++--- xarray/tests/test_missing.py | 11 +++++++++++ 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e4c3a4d533f..8684e9c60a7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -77,6 +77,10 @@ Bug fixes By `Tom Nicholas `_. - Fix ``RasterioDeprecationWarning`` when using a ``vrt`` in ``open_rasterio``. (:issue:`3964`) By `Taher Chegini `_. +- Fix bug causing :py:meth:`DataArray.interpolate_na` to always drop attributes, + and added `keep_attrs` argument. (:issue:`3968`) + By `Tom Nicholas `_. + Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 63cba53b689..9d45dd15887 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2098,6 +2098,7 @@ def interpolate_na( max_gap: Union[ int, float, str, pd.Timedelta, np.timedelta64, datetime.timedelta ] = None, + keep_attrs: bool = None, **kwargs: Any, ) -> "DataArray": """Fill in NaNs by interpolating according to different methods. @@ -2152,6 +2153,10 @@ def interpolate_na( * x (x) int64 0 1 2 3 4 5 6 7 8 The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively + keep_attrs : bool, default True + If True, the dataarray's attributes (`attrs`) will be copied from + the original object to the new one. If False, the new + object will be returned without attributes. kwargs : dict, optional parameters passed verbatim to the underlying interpolation function @@ -2174,6 +2179,7 @@ def interpolate_na( limit=limit, use_coordinate=use_coordinate, max_gap=max_gap, + keep_attrs=keep_attrs, **kwargs, ) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 40f010b3514..f973b4a5468 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -11,6 +11,7 @@ from .common import _contains_datetime_like_objects, ones_like from .computation import apply_ufunc from .duck_array_ops import dask_array_type, datetime_to_numeric, timedelta_to_numeric +from .options import _get_keep_attrs from .utils import OrderedSet, is_scalar from .variable import Variable, broadcast_variables @@ -294,6 +295,7 @@ def interp_na( method: str = "linear", limit: int = None, max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64, dt.timedelta] = None, + keep_attrs: bool = None, **kwargs, ): """Interpolate values according to different methods. @@ -330,19 +332,22 @@ def interp_na( interp_class, kwargs = _get_interpolator(method, **kwargs) interpolator = partial(func_interpolate_na, interp_class, **kwargs) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) + with warnings.catch_warnings(): warnings.filterwarnings("ignore", "overflow", RuntimeWarning) warnings.filterwarnings("ignore", "invalid value", RuntimeWarning) arr = apply_ufunc( interpolator, - index, self, + index, input_core_dims=[[dim], [dim]], output_core_dims=[[dim]], output_dtypes=[self.dtype], dask="parallelized", vectorize=True, - keep_attrs=True, + keep_attrs=keep_attrs, ).transpose(*self.dims) if limit is not None: @@ -359,8 +364,9 @@ def interp_na( return arr -def func_interpolate_na(interpolator, x, y, **kwargs): +def func_interpolate_na(interpolator, y, x, **kwargs): """helper function to apply interpolation along 1 dimension""" + # reversed arguments are so that attrs are preserved from da, not index # it would be nice if this wasn't necessary, works around: # "ValueError: assignment destination is read-only" in assignment below out = y.copy() diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 35c71c2854c..731cd165244 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -231,6 +231,17 @@ def test_interpolate_kwargs(): assert_equal(actual, expected) +def test_interpolate_keep_attrs(): + vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) + mvals = vals.copy() + mvals[2] = np.nan + missing = xr.DataArray(mvals, dims="x") + missing.attrs = {"test": "value"} + + actual = missing.interpolate_na(dim="x", keep_attrs=True) + assert actual.attrs == {"test": "value"} + + def test_interpolate(): vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) From 6a6f2c8748464c89a61dbdbc9636bce78a965369 Mon Sep 17 00:00:00 2001 From: risebell Date: Sat, 18 Apr 2020 17:39:08 +0200 Subject: [PATCH 29/39] Add multi-dimensional extrapolation example and mention different behavior of kwargs in interp (#3956) * Add note for different behavior of kwargs in 1-d and n-d interpolation * Add multi-dimensional extrapolation example * Update whats-new.rst --- doc/interpolation.rst | 8 +++++++- doc/whats-new.rst | 4 ++++ xarray/core/dataarray.py | 4 +++- xarray/core/dataset.py | 4 +++- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/doc/interpolation.rst b/doc/interpolation.rst index 63e9a7cd35e..4cf39807e5a 100644 --- a/doc/interpolation.rst +++ b/doc/interpolation.rst @@ -150,8 +150,14 @@ Additional keyword arguments can be passed to scipy's functions. # fill 0 for the outside of the original coordinates. da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 0.0}) - # extrapolation + # 1-dimensional extrapolation da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 'extrapolate'}) + # multi-dimensional extrapolation + da = xr.DataArray(np.sin(0.3 * np.arange(12).reshape(4, 3)), + [('time', np.arange(4)), + ('space', [0.1, 0.2, 0.3])]) + + da.interp(time=4, space=np.linspace(-0.1, 0.5, 10), kwargs={'fill_value': None}) Advanced Interpolation diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8684e9c60a7..d9573f32690 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -98,6 +98,10 @@ Documentation (:pull:`3935`). By `Spencer Clark `_. - Updated the list of current core developers. (:issue:`3892`) By `Tom Nicholas `_. +- Add example for multi-dimensional extrapolation and note different behavior + of ``kwargs`` in :py:meth:`Dataset.interp` and :py:meth:`DataArray.interp` + for 1-d and n-d interpolation (:pull:`3956`). + By `Matthias Riße `_. Internal Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 9d45dd15887..ffa05ca64f0 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1366,7 +1366,9 @@ def interp( first. If True, x has to be an array of monotonically increasing values. kwargs: dictionary - Additional keyword passed to scipy's interpolator. + Additional keyword arguments passed to scipy's interpolator. Valid + options and their behavior depend on if 1-dimensional or + multi-dimensional interpolation is used. ``**coords_kwargs`` : {dim: coordinate, ...}, optional The keyword arguments form of ``coords``. One of coords or coords_kwargs must be provided. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index dd1e31cc61a..d811d54847f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2570,7 +2570,9 @@ def interp( coordinates are assumed to be an array of monotonically increasing values. kwargs: dictionary, optional - Additional keyword passed to scipy's interpolator. + Additional keyword arguments passed to scipy's interpolator. Valid + options and their behavior depend on if 1-dimensional or + multi-dimensional interpolation is used. **coords_kwargs : {dim: coordinate, ...}, optional The keyword arguments form of ``coords``. One of coords or coords_kwargs must be provided. From b62a3fb2e96e6d88bb866453958a902fd782adc8 Mon Sep 17 00:00:00 2001 From: keewis Date: Tue, 21 Apr 2020 14:53:44 +0200 Subject: [PATCH 30/39] Fix distributed tests on upstream-dev (#3989) * switch from pre-3.5 style coroutines to async def / await * update whats-new.rst --- doc/whats-new.rst | 4 +++- xarray/tests/test_distributed.py | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d9573f32690..46319730d21 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -111,7 +111,9 @@ Internal Changes - Add a CI job that runs the tests with every optional dependency except ``dask``. (:issue:`3794`, :pull:`3919`) By `Justus Magin `_. - +- Use ``async`` / ``await`` for the asynchronous distributed + tests. (:issue:`3987`, :pull:`3989`) + By `Justus Magin `_. .. _whats-new.0.15.1: diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py index b3c0ce37a54..8011171d223 100644 --- a/xarray/tests/test_distributed.py +++ b/xarray/tests/test_distributed.py @@ -186,7 +186,7 @@ def test_dask_distributed_cfgrib_integration_test(loop): reason="Need recent distributed version to clean up get", ) @gen_cluster(client=True, timeout=None) -def test_async(c, s, a, b): +async def test_async(c, s, a, b): x = create_test_data() assert not dask.is_dask_collection(x) y = x.chunk({"dim2": 4}) + 10 @@ -206,7 +206,7 @@ def test_async(c, s, a, b): assert futures_of(z) future = c.compute(z) - w = yield future + w = await future assert not dask.is_dask_collection(w) assert_allclose(x + 10, w) @@ -218,7 +218,7 @@ def test_hdf5_lock(): @gen_cluster(client=True) -def test_serializable_locks(c, s, a, b): +async def test_serializable_locks(c, s, a, b): def f(x, lock=None): with lock: return x + 1 @@ -233,7 +233,7 @@ def f(x, lock=None): ]: futures = c.map(f, list(range(10)), lock=lock) - yield c.gather(futures) + await c.gather(futures) lock2 = pickle.loads(pickle.dumps(lock)) assert type(lock) == type(lock2) From 0cd14a564cbc46b33e7c12e9a254c3b287dd3993 Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 22 Apr 2020 12:21:38 +0200 Subject: [PATCH 31/39] Silence sphinx warnings (#3990) * generate documentation pages for the idxmin / idxmax methods * fix a few links * convert the mention of coarsen to double backtick quoted and add rolling --- doc/api-hidden.rst | 4 ++++ doc/whats-new.rst | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index cc9517a98ba..313428c29d2 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -18,6 +18,8 @@ Dataset.any Dataset.argmax Dataset.argmin + Dataset.idxmax + Dataset.idxmin Dataset.max Dataset.min Dataset.mean @@ -160,6 +162,8 @@ DataArray.any DataArray.argmax DataArray.argmin + DataArray.idxmax + DataArray.idxmin DataArray.max DataArray.min DataArray.mean diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 46319730d21..8b15e57873b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -64,7 +64,7 @@ Bug fixes sorted order during stack+groupby+apply operations. (:issue:`3287`, :pull:`3906`) By `Spencer Hill `_ - Fix a regression where deleting a coordinate from a copied :py:class:`DataArray` - can affect the original :py:class:`Dataarray`. (:issue:`3899`, :pull:`3871`) + can affect the original :py:class:`DataArray`. (:issue:`3899`, :pull:`3871`) By `Todd Jennings `_ - Fix :py:class:`~xarray.plot.FacetGrid` plots with a single contour. (:issue:`3569`, :pull:`3915`). By `Deepak Cherian `_ @@ -200,13 +200,13 @@ Bug fixes - xarray now respects the over, under and bad colors if set on a provided colormap. (:issue:`3590`, :pull:`3601`) By `johnomotani `_. -- :py:func:`coarsen` now respects ``xr.set_options(keep_attrs=True)`` +- ``coarsen`` and ``rolling`` now respect ``xr.set_options(keep_attrs=True)`` to preserve attributes. :py:meth:`Dataset.coarsen` accepts a keyword argument ``keep_attrs`` to change this setting. (:issue:`3376`, :pull:`3801`) By `Andrew Thomas `_. - Delete associated indexes when deleting coordinate variables. (:issue:`3746`). By `Deepak Cherian `_. -- Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group` +- Fix :py:meth:`Dataset.to_zarr` when using ``append_dim`` and ``group`` simultaneously. (:issue:`3170`). By `Matthias Meyer `_. - Fix html repr on :py:class:`Dataset` with non-string keys (:pull:`3807`). By `Maximilian Roos `_. @@ -244,7 +244,7 @@ Internal Changes By `Maximilian Roos `_ - Remove xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`). By `Mathias Hauser `_. -- Remove conversion to :py:class:`pandas.Panel`, given its removal in pandas +- Remove conversion to ``pandas.Panel``, given its removal in pandas in favor of xarray's objects. By `Maximilian Roos `_ From e1f0f987c76eb170f5b7ca26c1153c4e34760f0e Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 22 Apr 2020 19:27:58 +0000 Subject: [PATCH 32/39] Better chunking error messages for zarr backend (#3983) --- doc/whats-new.rst | 2 ++ xarray/backends/zarr.py | 50 ++++++++++++++++++++--------------- xarray/tests/test_backends.py | 22 ++++++++++++--- 3 files changed, 50 insertions(+), 24 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8b15e57873b..fc95e26dabd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -105,6 +105,8 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Raise more informative error messages for chunk size conflicts when writing to zarr files. + By `Deepak Cherian `_. - Run the ``isort`` pre-commit hook only on python source files and update the ``flake8`` version. (:issue:`3750`, :pull:`3711`) By `Justus Magin `_. diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index c262dae2811..973c167911e 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -65,7 +65,7 @@ def __getitem__(self, key): # could possibly have a work-around for 0d data here -def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): +def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name): """ Given encoding chunks (possibly None) and variable chunks (possibly None) """ @@ -88,15 +88,16 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): if var_chunks and enc_chunks is None: if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks): raise ValueError( - "Zarr requires uniform chunk sizes except for final chunk." - " Variable dask chunks %r are incompatible. Consider " - "rechunking using `chunk()`." % (var_chunks,) + "Zarr requires uniform chunk sizes except for final chunk. " + f"Variable named {name!r} has incompatible dask chunks: {var_chunks!r}. " + "Consider rechunking using `chunk()`." ) if any((chunks[0] < chunks[-1]) for chunks in var_chunks): raise ValueError( "Final chunk of Zarr array must be the same size or smaller " - "than the first. Variable Dask chunks %r are incompatible. " - "Consider rechunking using `chunk()`." % var_chunks + f"than the first. Variable named {name!r} has incompatible Dask chunks {var_chunks!r}." + "Consider either rechunking using `chunk()` or instead deleting " + "or modifying `encoding['chunks']`." ) # return the first chunk for each dimension return tuple(chunk[0] for chunk in var_chunks) @@ -114,13 +115,15 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): if len(enc_chunks_tuple) != ndim: # throw away encoding chunks, start over - return _determine_zarr_chunks(None, var_chunks, ndim) + return _determine_zarr_chunks(None, var_chunks, ndim, name) for x in enc_chunks_tuple: if not isinstance(x, int): raise TypeError( - "zarr chunks must be an int or a tuple of ints. " - "Instead found %r" % (enc_chunks_tuple,) + "zarr chunk sizes specified in `encoding['chunks']` " + "must be an int or a tuple of ints. " + f"Instead found encoding['chunks']={enc_chunks_tuple!r} " + f"for variable named {name!r}." ) # if there are chunks in encoding and the variable data is a numpy array, @@ -142,19 +145,22 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): for dchunk in dchunks[:-1]: if dchunk % zchunk: raise NotImplementedError( - "Specified zarr chunks %r would overlap multiple dask " - "chunks %r. This is not implemented in xarray yet. " - " Consider rechunking the data using " - "`chunk()` or specifying different chunks in encoding." - % (enc_chunks_tuple, var_chunks) + f"Specified zarr chunks encoding['chunks']={enc_chunks_tuple!r} for " + f"variable named {name!r} would overlap multiple dask chunks {var_chunks!r}. " + "This is not implemented in xarray yet. " + "Consider either rechunking using `chunk()` or instead deleting " + "or modifying `encoding['chunks']`." ) if dchunks[-1] > zchunk: raise ValueError( "Final chunk of Zarr array must be the same size or " - "smaller than the first. The specified Zarr chunk " - "encoding is %r, but %r in variable Dask chunks %r is " - "incompatible. Consider rechunking using `chunk()`." - % (enc_chunks_tuple, dchunks, var_chunks) + "smaller than the first. " + f"Specified Zarr chunk encoding['chunks']={enc_chunks_tuple}, " + f"for variable named {name!r} " + f"but {dchunks} in the variable's Dask chunks {var_chunks} is " + "incompatible with this encoding. " + "Consider either rechunking using `chunk()` or instead deleting " + "or modifying `encoding['chunks']`." ) return enc_chunks_tuple @@ -177,7 +183,7 @@ def _get_zarr_dims_and_attrs(zarr_obj, dimension_key): return dimensions, attributes -def extract_zarr_variable_encoding(variable, raise_on_invalid=False): +def extract_zarr_variable_encoding(variable, raise_on_invalid=False, name=None): """ Extract zarr encoding dictionary from xarray Variable @@ -207,7 +213,7 @@ def extract_zarr_variable_encoding(variable, raise_on_invalid=False): del encoding[k] chunks = _determine_zarr_chunks( - encoding.get("chunks"), variable.chunks, variable.ndim + encoding.get("chunks"), variable.chunks, variable.ndim, name ) encoding["chunks"] = chunks return encoding @@ -453,7 +459,9 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No writer.add(v.data, zarr_array, region=tuple(new_region)) else: # new variable - encoding = extract_zarr_variable_encoding(v, raise_on_invalid=check) + encoding = extract_zarr_variable_encoding( + v, raise_on_invalid=check, name=vn + ) encoded_attrs = {} # the magic for storing the hidden dimension data encoded_attrs[DIMENSION_KEY] = dims diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3fde292c04f..916c29ba7bd 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1685,11 +1685,27 @@ def test_chunk_encoding_with_dask(self): # should fail if dask_chunks are irregular... ds_chunk_irreg = ds.chunk({"x": (5, 4, 3)}) - with pytest.raises(ValueError) as e_info: + with raises_regex(ValueError, "uniform chunk sizes."): with self.roundtrip(ds_chunk_irreg) as actual: pass - # make sure this error message is correct and not some other error - assert e_info.match("chunks") + + # should fail if encoding["chunks"] clashes with dask_chunks + badenc = ds.chunk({"x": 4}) + badenc.var1.encoding["chunks"] = (6,) + with raises_regex(NotImplementedError, "named 'var1' would overlap"): + with self.roundtrip(badenc) as actual: + pass + + badenc.var1.encoding["chunks"] = (2,) + with raises_regex(ValueError, "Specified Zarr chunk encoding"): + with self.roundtrip(badenc) as actual: + pass + + badenc = badenc.chunk({"x": (3, 3, 6)}) + badenc.var1.encoding["chunks"] = (3,) + with raises_regex(ValueError, "incompatible with this encoding"): + with self.roundtrip(badenc) as actual: + pass # ... except if the last chunk is smaller than the first ds_chunk_irreg = ds.chunk({"x": (5, 5, 2)}) From c788ee44008cdd65c8b6de40c737f1b28e173496 Mon Sep 17 00:00:00 2001 From: Ray Bell Date: Thu, 23 Apr 2020 03:58:09 -0400 Subject: [PATCH 33/39] DOC: add pandas.DataFrame.to_xarray (#3994) Co-authored-by: Ray Bell --- xarray/core/dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d811d54847f..53aa00f22ce 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4598,6 +4598,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas See also -------- xarray.DataArray.from_series + pandas.DataFrame.to_xarray """ # TODO: Add an option to remove dimensions along which the variables # are constant, to enable consistent serialization to/from a dataframe, From 37551da5ebc7861439ac3eefddefb534b76f2895 Mon Sep 17 00:00:00 2001 From: Prajjwal Nijhara Date: Fri, 24 Apr 2020 12:44:54 +0530 Subject: [PATCH 34/39] Fix some code quality and bug-risk issues (#3999) --- .deepsource.toml | 18 ++++++++++++++++++ xarray/convert.py | 6 +++--- xarray/core/computation.py | 4 ++-- xarray/core/formatting.py | 12 +++++------- xarray/core/groupby.py | 4 +++- xarray/plot/plot.py | 2 +- 6 files changed, 32 insertions(+), 14 deletions(-) create mode 100644 .deepsource.toml diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 00000000000..e37b41de303 --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,18 @@ +version = 1 + +test_patterns = [ + "*/tests/**", + "*/test_*.py" +] + +exclude_patterns = [ + "doc/**", + "ci/**" +] + +[[analyzers]] +name = "python" +enabled = true + + [analyzers.meta] + runtime_version = "3.x.x" \ No newline at end of file diff --git a/xarray/convert.py b/xarray/convert.py index 4974a55d8e2..0c86b090f34 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -229,11 +229,11 @@ def _iris_cell_methods_to_str(cell_methods_obj): """ cell_methods = [] for cell_method in cell_methods_obj: - names = "".join([f"{n}: " for n in cell_method.coord_names]) + names = "".join(f"{n}: " for n in cell_method.coord_names) intervals = " ".join( - [f"interval: {interval}" for interval in cell_method.intervals] + f"interval: {interval}" for interval in cell_method.intervals ) - comments = " ".join([f"comment: {comment}" for comment in cell_method.comments]) + comments = " ".join(f"comment: {comment}" for comment in cell_method.comments) extra = " ".join([intervals, comments]).strip() if extra: extra = f" ({extra})" diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 6cf4178b5bf..a3723ea9db9 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1192,10 +1192,10 @@ def dot(*arrays, dims=None, **kwargs): # construct einsum subscripts, such as '...abc,...ab->...c' # Note: input_core_dims are always moved to the last position subscripts_list = [ - "..." + "".join([dim_map[d] for d in ds]) for ds in input_core_dims + "..." + "".join(dim_map[d] for d in ds) for ds in input_core_dims ] subscripts = ",".join(subscripts_list) - subscripts += "->..." + "".join([dim_map[d] for d in output_core_dims[0]]) + subscripts += "->..." + "".join(dim_map[d] for d in output_core_dims[0]) join = OPTIONS["arithmetic_join"] # using "inner" emulates `(a * b).sum()` for all joins (except "exact") diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 534d253ecc8..d6732fc182e 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -298,12 +298,10 @@ def _summarize_coord_multiindex(coord, col_width, marker): def _summarize_coord_levels(coord, col_width, marker="-"): return "\n".join( - [ - summarize_variable( - lname, coord.get_level_variable(lname), col_width, marker=marker - ) - for lname in coord.level_names - ] + summarize_variable( + lname, coord.get_level_variable(lname), col_width, marker=marker + ) + for lname in coord.level_names ) @@ -562,7 +560,7 @@ def extra_items_repr(extra_keys, mapping, ab_side): for m in (a_mapping, b_mapping): attr_s = "\n".join( - [summarize_attr(ak, av) for ak, av in m[k].attrs.items()] + summarize_attr(ak, av) for ak, av in m[k].attrs.items() ) attrs_summary.append(attr_s) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 5a5f4c0d296..148e16863d1 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -273,7 +273,7 @@ def __init__( grouper=None, bins=None, restore_coord_dims=None, - cut_kwargs={}, + cut_kwargs=None, ): """Create a GroupBy object @@ -299,6 +299,8 @@ def __init__( Extra keyword arguments to pass to `pandas.cut` """ + if cut_kwargs is None: + cut_kwargs = {} from .dataarray import DataArray if grouper is not None and bins is not None: diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 4657bee9415..4d6033bf00d 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -30,7 +30,7 @@ def _infer_line_data(darray, x, y, hue): error_msg = "must be either None or one of ({:s})".format( - ", ".join([repr(dd) for dd in darray.dims]) + ", ".join(repr(dd) for dd in darray.dims) ) ndims = len(darray.dims) From 6ca3bd7148748fbf03d3ede653a83287f852e472 Mon Sep 17 00:00:00 2001 From: Huite Date: Fri, 24 Apr 2020 09:15:43 +0200 Subject: [PATCH 35/39] full_like: error on non-scalar fill_value (#3979) * Avoid multiplication DeprecationWarning in rasterio backend * full_like: error on non-scalar fill_value Fixes #3977 * Added test * Updated what's new * core.utils.is_scalar instead of numpy.is_scalar * More informative error message * raises_regex for error test --- doc/whats-new.rst | 2 ++ xarray/core/common.py | 5 ++++- xarray/tests/test_variable.py | 4 ++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fc95e26dabd..7b2b3530c41 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -58,6 +58,8 @@ New Features Bug fixes ~~~~~~~~~ +- ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue`3977`) + By `Huite Bootsma `_. - Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`) By `Keisuke Fujii `_. - Fix renaming of coords when one or more stacked coords is not in diff --git a/xarray/core/common.py b/xarray/core/common.py index 8f6d57e9f12..1e7069ec51f 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -25,7 +25,7 @@ from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_array_type from .rolling_exp import RollingExp -from .utils import Frozen, either_dict_or_kwargs +from .utils import Frozen, either_dict_or_kwargs, is_scalar # Used as a sentinel value to indicate a all dimensions ALL_DIMS = ... @@ -1397,6 +1397,9 @@ def full_like(other, fill_value, dtype: DTypeLike = None): from .dataset import Dataset from .variable import Variable + if not is_scalar(fill_value): + raise ValueError(f"fill_value must be scalar. Received {fill_value} instead.") + if isinstance(other, Dataset): data_vars = { k: _full_like_variable(v, fill_value, dtype) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 78e3848b8fb..3003e0d66f3 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2213,6 +2213,10 @@ def test_full_like(self): assert expect.dtype == bool assert_identical(expect, full_like(orig, True, dtype=bool)) + # raise error on non-scalar fill_value + with raises_regex(ValueError, "must be scalar"): + full_like(orig, [1.0, 2.0]) + @requires_dask def test_full_like_dask(self): orig = Variable( From 33a66d6380c26a59923922ee11e8ffcf0b4f379f Mon Sep 17 00:00:00 2001 From: Ryan May Date: Fri, 24 Apr 2020 01:16:09 -0600 Subject: [PATCH 36/39] Fix handling of abbreviated units like msec (#3998) * Fix handling of abbreviated units like msec By default, xarray tries to decode times with pandas and falls back to cftime. This fixes the exception handler to fallback properly in the cases an unhandled abbreviated unit is passed in. * Add what's new entry --- doc/whats-new.rst | 4 +++- xarray/coding/times.py | 2 +- xarray/tests/test_coding_times.py | 12 ++++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7b2b3530c41..6fc3260f10d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -82,7 +82,9 @@ Bug fixes - Fix bug causing :py:meth:`DataArray.interpolate_na` to always drop attributes, and added `keep_attrs` argument. (:issue:`3968`) By `Tom Nicholas `_. - +- Fix bug in time parsing failing to fall back to cftime. This was causing time + variables with a time unit of `'msecs'` to fail to parse. (:pull:`3998`) + By `Ryan May `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 965ddd8f043..d923f1ad088 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -155,7 +155,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): if use_cftime is None: try: dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar) - except (OutOfBoundsDatetime, OverflowError): + except (KeyError, OutOfBoundsDatetime, OverflowError): dates = _decode_datetime_with_cftime( flat_num_dates.astype(np.float), units, calendar ) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 00c34940ce4..1efd4b02bf8 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -432,6 +432,18 @@ def test_decode_360_day_calendar(): assert_array_equal(actual, expected) +@requires_cftime +def test_decode_abbreviation(): + """Test making sure we properly fall back to cftime on abbreviated units.""" + import cftime + + val = np.array([1586628000000.0]) + units = "msecs since 1970-01-01T00:00:00Z" + actual = coding.times.decode_cf_datetime(val, units) + expected = coding.times.cftime_to_nptime(cftime.num2date(val, units)) + assert_array_equal(actual, expected) + + @arm_xfail @requires_cftime @pytest.mark.parametrize( From 4e196f74dccabbc82f43df7806dc0c7810ba526a Mon Sep 17 00:00:00 2001 From: arabidopsis Date: Wed, 29 Apr 2020 23:54:22 +0800 Subject: [PATCH 37/39] ensure Variable._repr_html_ works (#3973) * ensure Variable._repr_html_ works * added PR 3972 to Bug fixes * better attribute access * moved Varible._repr_html_ test to better location Co-authored-by: Stephan Hoyer Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 3 +++ xarray/core/formatting_html.py | 3 ++- xarray/tests/test_formatting_html.py | 12 ++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6fc3260f10d..b71e0baa655 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -79,6 +79,9 @@ Bug fixes By `Tom Nicholas `_. - Fix ``RasterioDeprecationWarning`` when using a ``vrt`` in ``open_rasterio``. (:issue:`3964`) By `Taher Chegini `_. +- Fix ``AttributeError`` on displaying a :py:class:`Variable` + in a notebook context. (:issue:`3972`, :pull:`3973`) + By `Ian Castleden `_. - Fix bug causing :py:meth:`DataArray.interpolate_na` to always drop attributes, and added `keep_attrs` argument. (:issue:`3968`) By `Tom Nicholas `_. diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 8678a58b381..6e345582ed0 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -183,7 +183,8 @@ def array_section(obj): # "unique" id to expand/collapse the section data_id = "section-" + str(uuid.uuid4()) collapsed = "" - preview = escape(inline_variable_array_repr(obj.variable, max_width=70)) + variable = getattr(obj, "variable", obj) + preview = escape(inline_variable_array_repr(variable, max_width=70)) data_repr = short_data_repr_html(obj) data_icon = _icon("icon-database") diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index 239f339208d..94653016416 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -137,3 +137,15 @@ def test_repr_of_dataset(dataset): ) assert "<U4" in formatted or ">U4" in formatted assert "<IA>" in formatted + + +def test_variable_repr_html(): + v = xr.Variable(["time", "x"], [[1, 2, 3], [4, 5, 6]], {"foo": "bar"}) + assert hasattr(v, "_repr_html_") + with xr.set_options(display_style="html"): + html = v._repr_html_().strip() + # We don't do a complete string identity since + # html output is probably subject to change, is long and... reasons. + # Just test that something reasonable was produced. + assert html.startswith("") + assert "xarray.Variable" in html From 8834afa9f617bd201eba00374bb55d96dccec96b Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 29 Apr 2020 18:10:09 +0200 Subject: [PATCH 38/39] Apply blackdoc to the documentation (#4012) * replace tabs with spaces * fix some invalid code * add missing prompts * apply blackdoc * reformat the plotting docs code * whats-new.rst entry --- doc/combining.rst | 62 ++++++------ doc/computation.rst | 165 +++++++++++++++++--------------- doc/contributing.rst | 28 +++--- doc/dask.rst | 66 ++++++++----- doc/data-structures.rst | 144 +++++++++++++++------------- doc/faq.rst | 9 +- doc/groupby.rst | 56 ++++++----- doc/indexing.rst | 99 +++++++++---------- doc/internals.rst | 21 +++-- doc/interpolation.rst | 118 ++++++++++++----------- doc/io.rst | 116 ++++++++++++++--------- doc/pandas.rst | 36 ++++--- doc/plotting.rst | 151 ++++++++++++++++------------- doc/quick-overview.rst | 39 ++++---- doc/reshaping.rst | 112 +++++++++++----------- doc/time-series.rst | 54 ++++++----- doc/weather-climate.rst | 72 +++++++------- doc/whats-new.rst | 199 ++++++++++++++++++++------------------- xarray/core/common.py | 4 +- xarray/core/dataarray.py | 55 ++++++----- xarray/core/dataset.py | 66 +++++++------ 21 files changed, 917 insertions(+), 755 deletions(-) diff --git a/doc/combining.rst b/doc/combining.rst index 05b7f2efc50..ffc6575c579 100644 --- a/doc/combining.rst +++ b/doc/combining.rst @@ -4,11 +4,12 @@ Combining data -------------- .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) * For combining datasets or data arrays along a single dimension, see concatenate_. @@ -28,11 +29,10 @@ that dimension: .. ipython:: python - arr = xr.DataArray(np.random.randn(2, 3), - [('x', ['a', 'b']), ('y', [10, 20, 30])]) + arr = xr.DataArray(np.random.randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])]) arr[:, :1] # this resembles how you would use np.concatenate - xr.concat([arr[:, :1], arr[:, 1:]], dim='y') + xr.concat([arr[:, :1], arr[:, 1:]], dim="y") In addition to combining along an existing dimension, ``concat`` can create a new dimension by stacking lower dimensional arrays together: @@ -41,7 +41,7 @@ new dimension by stacking lower dimensional arrays together: arr[0] # to combine these 1d arrays into a 2d array in numpy, you would use np.array - xr.concat([arr[0], arr[1]], 'x') + xr.concat([arr[0], arr[1]], "x") If the second argument to ``concat`` is a new dimension name, the arrays will be concatenated along that new dimension, which is always inserted as the first @@ -49,7 +49,7 @@ dimension: .. ipython:: python - xr.concat([arr[0], arr[1]], 'new_dim') + xr.concat([arr[0], arr[1]], "new_dim") The second argument to ``concat`` can also be an :py:class:`~pandas.Index` or :py:class:`~xarray.DataArray` object as well as a string, in which case it is @@ -57,14 +57,14 @@ used to label the values along the new dimension: .. ipython:: python - xr.concat([arr[0], arr[1]], pd.Index([-90, -100], name='new_dim')) + xr.concat([arr[0], arr[1]], pd.Index([-90, -100], name="new_dim")) Of course, ``concat`` also works on ``Dataset`` objects: .. ipython:: python - ds = arr.to_dataset(name='foo') - xr.concat([ds.sel(x='a'), ds.sel(x='b')], 'x') + ds = arr.to_dataset(name="foo") + xr.concat([ds.sel(x="a"), ds.sel(x="b")], "x") :py:func:`~xarray.concat` has a number of options which provide deeper control over which variables are concatenated and how it handles conflicting variables @@ -84,8 +84,8 @@ To combine variables and coordinates between multiple ``DataArray`` and/or .. ipython:: python - xr.merge([ds, ds.rename({'foo': 'bar'})]) - xr.merge([xr.DataArray(n, name='var%d' % n) for n in range(5)]) + xr.merge([ds, ds.rename({"foo": "bar"})]) + xr.merge([xr.DataArray(n, name="var%d" % n) for n in range(5)]) If you merge another dataset (or a dictionary including data array objects), by default the resulting dataset will be aligned on the **union** of all index @@ -93,7 +93,7 @@ coordinates: .. ipython:: python - other = xr.Dataset({'bar': ('x', [1, 2, 3, 4]), 'x': list('abcd')}) + other = xr.Dataset({"bar": ("x", [1, 2, 3, 4]), "x": list("abcd")}) xr.merge([ds, other]) This ensures that ``merge`` is non-destructive. ``xarray.MergeError`` is raised @@ -116,7 +116,7 @@ used in the :py:class:`~xarray.Dataset` constructor: .. ipython:: python - xr.Dataset({'a': arr[:-1], 'b': arr[1:]}) + xr.Dataset({"a": arr[:-1], "b": arr[1:]}) .. _combine: @@ -131,8 +131,8 @@ are filled with ``NaN``. For example: .. ipython:: python - ar0 = xr.DataArray([[0, 0], [0, 0]], [('x', ['a', 'b']), ('y', [-1, 0])]) - ar1 = xr.DataArray([[1, 1], [1, 1]], [('x', ['b', 'c']), ('y', [0, 1])]) + ar0 = xr.DataArray([[0, 0], [0, 0]], [("x", ["a", "b"]), ("y", [-1, 0])]) + ar1 = xr.DataArray([[1, 1], [1, 1]], [("x", ["b", "c"]), ("y", [0, 1])]) ar0.combine_first(ar1) ar1.combine_first(ar0) @@ -152,7 +152,7 @@ variables with new values: .. ipython:: python - ds.update({'space': ('space', [10.2, 9.4, 3.9])}) + ds.update({"space": ("space", [10.2, 9.4, 3.9])}) However, dimensions are still required to be consistent between different Dataset variables, so you cannot change the size of a dimension unless you @@ -170,7 +170,7 @@ syntax: .. ipython:: python - ds['baz'] = xr.DataArray([9, 9, 9, 9, 9], coords=[('x', list('abcde'))]) + ds["baz"] = xr.DataArray([9, 9, 9, 9, 9], coords=[("x", list("abcde"))]) ds.baz Equals and identical @@ -193,7 +193,7 @@ object: .. ipython:: python - arr.identical(arr.rename('bar')) + arr.identical(arr.rename("bar")) :py:attr:`~xarray.Dataset.broadcast_equals` does a more relaxed form of equality check that allows variables to have different dimensions, as long as values @@ -201,8 +201,8 @@ are constant along those new dimensions: .. ipython:: python - left = xr.Dataset(coords={'x': 0}) - right = xr.Dataset({'x': [0, 0, 0]}) + left = xr.Dataset(coords={"x": 0}) + right = xr.Dataset({"x": [0, 0, 0]}) left.broadcast_equals(right) Like pandas objects, two xarray objects are still equal or identical if they have @@ -231,9 +231,9 @@ coordinates as long as any non-missing values agree or are disjoint: .. ipython:: python - ds1 = xr.Dataset({'a': ('x', [10, 20, 30, np.nan])}, {'x': [1, 2, 3, 4]}) - ds2 = xr.Dataset({'a': ('x', [np.nan, 30, 40, 50])}, {'x': [2, 3, 4, 5]}) - xr.merge([ds1, ds2], compat='no_conflicts') + ds1 = xr.Dataset({"a": ("x", [10, 20, 30, np.nan])}, {"x": [1, 2, 3, 4]}) + ds2 = xr.Dataset({"a": ("x", [np.nan, 30, 40, 50])}, {"x": [2, 3, 4, 5]}) + xr.merge([ds1, ds2], compat="no_conflicts") Note that due to the underlying representation of missing values as floating point numbers (``NaN``), variable data type is not always preserved when merging @@ -273,10 +273,12 @@ datasets into a doubly-nested list, e.g: .. ipython:: python - arr = xr.DataArray(name='temperature', data=np.random.randint(5, size=(2, 2)), dims=['x', 'y']) + arr = xr.DataArray( + name="temperature", data=np.random.randint(5, size=(2, 2)), dims=["x", "y"] + ) arr ds_grid = [[arr, arr], [arr, arr]] - xr.combine_nested(ds_grid, concat_dim=['x', 'y']) + xr.combine_nested(ds_grid, concat_dim=["x", "y"]) :py:func:`~xarray.combine_nested` can also be used to explicitly merge datasets with different variables. For example if we have 4 datasets, which are divided @@ -286,10 +288,10 @@ we wish to use ``merge`` instead of ``concat``: .. ipython:: python - temp = xr.DataArray(name='temperature', data=np.random.randn(2), dims=['t']) - precip = xr.DataArray(name='precipitation', data=np.random.randn(2), dims=['t']) + temp = xr.DataArray(name="temperature", data=np.random.randn(2), dims=["t"]) + precip = xr.DataArray(name="precipitation", data=np.random.randn(2), dims=["t"]) ds_grid = [[temp, precip], [temp, precip]] - xr.combine_nested(ds_grid, concat_dim=['t', None]) + xr.combine_nested(ds_grid, concat_dim=["t", None]) :py:func:`~xarray.combine_by_coords` is for combining objects which have dimension coordinates which specify their relationship to and order relative to one @@ -302,8 +304,8 @@ coordinates, not on their position in the list passed to ``combine_by_coords``. .. ipython:: python :okwarning: - x1 = xr.DataArray(name='foo', data=np.random.randn(3), coords=[('x', [0, 1, 2])]) - x2 = xr.DataArray(name='foo', data=np.random.randn(3), coords=[('x', [3, 4, 5])]) + x1 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [0, 1, 2])]) + x2 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [3, 4, 5])]) xr.combine_by_coords([x2, x1]) These functions can be used by :py:func:`~xarray.open_mfdataset` to open many diff --git a/doc/computation.rst b/doc/computation.rst index 4b8014c4782..3660aed93ed 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -18,17 +18,19 @@ Arithmetic operations with a single DataArray automatically vectorize (like numpy) over all array values: .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. ipython:: python - arr = xr.DataArray(np.random.RandomState(0).randn(2, 3), - [('x', ['a', 'b']), ('y', [10, 20, 30])]) + arr = xr.DataArray( + np.random.RandomState(0).randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])] + ) arr - 3 abs(arr) @@ -45,7 +47,7 @@ Use :py:func:`~xarray.where` to conditionally switch between values: .. ipython:: python - xr.where(arr > 0, 'positive', 'negative') + xr.where(arr > 0, "positive", "negative") Use `@` to perform matrix multiplication: @@ -73,14 +75,14 @@ methods for working with missing data from pandas: .. ipython:: python - x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=['x']) + x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.isnull() x.notnull() x.count() - x.dropna(dim='x') + x.dropna(dim="x") x.fillna(-1) - x.ffill('x') - x.bfill('x') + x.ffill("x") + x.bfill("x") Like pandas, xarray uses the float value ``np.nan`` (not-a-number) to represent missing values. @@ -90,9 +92,12 @@ for filling missing values via 1D interpolation. .. ipython:: python - x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=['x'], - coords={'xx': xr.Variable('x', [0, 1, 1.1, 1.9, 3])}) - x.interpolate_na(dim='x', method='linear', use_coordinate='xx') + x = xr.DataArray( + [0, 1, np.nan, np.nan, 2], + dims=["x"], + coords={"xx": xr.Variable("x", [0, 1, 1.1, 1.9, 3])}, + ) + x.interpolate_na(dim="x", method="linear", use_coordinate="xx") Note that xarray slightly diverges from the pandas ``interpolate`` syntax by providing the ``use_coordinate`` keyword which facilitates a clear specification @@ -110,8 +115,8 @@ applied along particular dimension(s): .. ipython:: python - arr.sum(dim='x') - arr.std(['x', 'y']) + arr.sum(dim="x") + arr.std(["x", "y"]) arr.min() @@ -121,7 +126,7 @@ for wrapping code designed to work with numpy arrays), you can use the .. ipython:: python - arr.get_axis_num('y') + arr.get_axis_num("y") These operations automatically skip missing values, like in pandas: @@ -142,8 +147,7 @@ method supports rolling window aggregation: .. ipython:: python - arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), - dims=('x', 'y')) + arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) arr :py:meth:`~xarray.DataArray.rolling` is applied along one dimension using the @@ -194,8 +198,9 @@ We can also manually iterate through ``Rolling`` objects: .. code:: python - for label, arr_window in r: - # arr_window is a view of x + for label, arr_window in r: + # arr_window is a view of x + ... .. _comput.rolling_exp: @@ -222,9 +227,9 @@ windowed rolling, convolution, short-time FFT etc. .. ipython:: python # rolling with 2-point stride - rolling_da = r.construct('window_dim', stride=2) + rolling_da = r.construct("window_dim", stride=2) rolling_da - rolling_da.mean('window_dim', skipna=False) + rolling_da.mean("window_dim", skipna=False) Because the ``DataArray`` given by ``r.construct('window_dim')`` is a view of the original array, it is memory efficient. @@ -232,8 +237,8 @@ You can also use ``construct`` to compute a weighted rolling sum: .. ipython:: python - weight = xr.DataArray([0.25, 0.5, 0.25], dims=['window']) - arr.rolling(y=3).construct('window').dot(weight) + weight = xr.DataArray([0.25, 0.5, 0.25], dims=["window"]) + arr.rolling(y=3).construct("window").dot(weight) .. note:: numpy's Nan-aggregation functions such as ``nansum`` copy the original array. @@ -254,52 +259,52 @@ support weighted ``sum`` and weighted ``mean``. .. ipython:: python - coords = dict(month=('month', [1, 2, 3])) + coords = dict(month=("month", [1, 2, 3])) - prec = xr.DataArray([1.1, 1.0, 0.9], dims=('month', ), coords=coords) - weights = xr.DataArray([31, 28, 31], dims=('month', ), coords=coords) + prec = xr.DataArray([1.1, 1.0, 0.9], dims=("month",), coords=coords) + weights = xr.DataArray([31, 28, 31], dims=("month",), coords=coords) Create a weighted object: .. ipython:: python - weighted_prec = prec.weighted(weights) - weighted_prec + weighted_prec = prec.weighted(weights) + weighted_prec Calculate the weighted sum: .. ipython:: python - weighted_prec.sum() + weighted_prec.sum() Calculate the weighted mean: .. ipython:: python - weighted_prec.mean(dim="month") + weighted_prec.mean(dim="month") The weighted sum corresponds to: .. ipython:: python - weighted_sum = (prec * weights).sum() - weighted_sum + weighted_sum = (prec * weights).sum() + weighted_sum and the weighted mean to: .. ipython:: python - weighted_mean = weighted_sum / weights.sum() - weighted_mean + weighted_mean = weighted_sum / weights.sum() + weighted_mean However, the functions also take missing values in the data into account: .. ipython:: python - data = xr.DataArray([np.NaN, 2, 4]) - weights = xr.DataArray([8, 1, 1]) + data = xr.DataArray([np.NaN, 2, 4]) + weights = xr.DataArray([8, 1, 1]) - data.weighted(weights).mean() + data.weighted(weights).mean() Using ``(data * weights).sum() / weights.sum()`` would (incorrectly) result in 0.6. @@ -309,16 +314,16 @@ If the weights add up to to 0, ``sum`` returns 0: .. ipython:: python - data = xr.DataArray([1.0, 1.0]) - weights = xr.DataArray([-1.0, 1.0]) + data = xr.DataArray([1.0, 1.0]) + weights = xr.DataArray([-1.0, 1.0]) - data.weighted(weights).sum() + data.weighted(weights).sum() and ``mean`` returns ``NaN``: .. ipython:: python - data.weighted(weights).mean() + data.weighted(weights).mean() .. note:: @@ -336,18 +341,21 @@ methods. This supports the block aggregation along multiple dimensions, .. ipython:: python - x = np.linspace(0, 10, 300) - t = pd.date_range('15/12/1999', periods=364) - da = xr.DataArray(np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]), - dims=['time', 'x'], coords={'time': t, 'x': x}) - da + x = np.linspace(0, 10, 300) + t = pd.date_range("15/12/1999", periods=364) + da = xr.DataArray( + np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]), + dims=["time", "x"], + coords={"time": t, "x": x}, + ) + da In order to take a block mean for every 7 days along ``time`` dimension and every 2 points along ``x`` dimension, .. ipython:: python - da.coarsen(time=7, x=2).mean() + da.coarsen(time=7, x=2).mean() :py:meth:`~xarray.DataArray.coarsen` raises an ``ValueError`` if the data length is not a multiple of the corresponding window size. @@ -356,14 +364,14 @@ the excess entries or padding ``nan`` to insufficient entries, .. ipython:: python - da.coarsen(time=30, x=2, boundary='trim').mean() + da.coarsen(time=30, x=2, boundary="trim").mean() If you want to apply a specific function to coordinate, you can pass the function or method name to ``coord_func`` option, .. ipython:: python - da.coarsen(time=7, x=2, coord_func={'time': 'min'}).mean() + da.coarsen(time=7, x=2, coord_func={"time": "min"}).mean() .. _compute.using_coordinates: @@ -377,24 +385,25 @@ central finite differences using their coordinates, .. ipython:: python - a = xr.DataArray([0, 1, 2, 3], dims=['x'], coords=[[0.1, 0.11, 0.2, 0.3]]) + a = xr.DataArray([0, 1, 2, 3], dims=["x"], coords=[[0.1, 0.11, 0.2, 0.3]]) a - a.differentiate('x') + a.differentiate("x") This method can be used also for multidimensional arrays, .. ipython:: python - a = xr.DataArray(np.arange(8).reshape(4, 2), dims=['x', 'y'], - coords={'x': [0.1, 0.11, 0.2, 0.3]}) - a.differentiate('x') + a = xr.DataArray( + np.arange(8).reshape(4, 2), dims=["x", "y"], coords={"x": [0.1, 0.11, 0.2, 0.3]} + ) + a.differentiate("x") :py:meth:`~xarray.DataArray.integrate` computes integration based on trapezoidal rule using their coordinates, .. ipython:: python - a.integrate('x') + a.integrate("x") .. note:: These methods are limited to simple cartesian geometry. Differentiation @@ -412,9 +421,9 @@ best fitting coefficients along a given dimension and for a given order, .. ipython:: python - x = xr.DataArray(np.arange(10), dims=['x'], name='x') - a = xr.DataArray(3 + 4 * x, dims=['x'], coords={'x': x}) - out = a.polyfit(dim='x', deg=1, full=True) + x = xr.DataArray(np.arange(10), dims=["x"], name="x") + a = xr.DataArray(3 + 4 * x, dims=["x"], coords={"x": x}) + out = a.polyfit(dim="x", deg=1, full=True) out The method outputs a dataset containing the coefficients (and more if `full=True`). @@ -443,9 +452,9 @@ arrays with different sizes aligned along different dimensions: .. ipython:: python - a = xr.DataArray([1, 2], [('x', ['a', 'b'])]) + a = xr.DataArray([1, 2], [("x", ["a", "b"])]) a - b = xr.DataArray([-1, -2, -3], [('y', [10, 20, 30])]) + b = xr.DataArray([-1, -2, -3], [("y", [10, 20, 30])]) b With xarray, we can apply binary mathematical operations to these arrays, and @@ -460,7 +469,7 @@ appeared: .. ipython:: python - c = xr.DataArray(np.arange(6).reshape(3, 2), [b['y'], a['x']]) + c = xr.DataArray(np.arange(6).reshape(3, 2), [b["y"], a["x"]]) c a + c @@ -494,7 +503,7 @@ operations. The default result of a binary operation is by the *intersection* .. ipython:: python - arr = xr.DataArray(np.arange(3), [('x', range(3))]) + arr = xr.DataArray(np.arange(3), [("x", range(3))]) arr + arr[:-1] If coordinate values for a dimension are missing on either argument, all @@ -503,7 +512,7 @@ matching dimensions must have the same size: .. ipython:: :verbatim: - In [1]: arr + xr.DataArray([1, 2], dims='x') + In [1]: arr + xr.DataArray([1, 2], dims="x") ValueError: arguments without labels along dimension 'x' cannot be aligned because they have different dimension size(s) {2} than the size of the aligned dimension labels: 3 @@ -562,16 +571,20 @@ variables: .. ipython:: python - ds = xr.Dataset({'x_and_y': (('x', 'y'), np.random.randn(3, 5)), - 'x_only': ('x', np.random.randn(3))}, - coords=arr.coords) + ds = xr.Dataset( + { + "x_and_y": (("x", "y"), np.random.randn(3, 5)), + "x_only": ("x", np.random.randn(3)), + }, + coords=arr.coords, + ) ds > 0 Datasets support most of the same methods found on data arrays: .. ipython:: python - ds.mean(dim='x') + ds.mean(dim="x") abs(ds) Datasets also support NumPy ufuncs (requires NumPy v1.13 or newer), or @@ -594,7 +607,7 @@ Arithmetic between two datasets matches data variables of the same name: .. ipython:: python - ds2 = xr.Dataset({'x_and_y': 0, 'x_only': 100}) + ds2 = xr.Dataset({"x_and_y": 0, "x_only": 100}) ds - ds2 Similarly to index based alignment, the result has the intersection of all @@ -638,7 +651,7 @@ any additional arguments: .. ipython:: python squared_error = lambda x, y: (x - y) ** 2 - arr1 = xr.DataArray([0, 1, 2, 3], dims='x') + arr1 = xr.DataArray([0, 1, 2, 3], dims="x") xr.apply_ufunc(squared_error, arr1, 1) For using more complex operations that consider some array values collectively, @@ -658,21 +671,21 @@ to set ``axis=-1``. As an example, here is how we would wrap .. code-block:: python def vector_norm(x, dim, ord=None): - return xr.apply_ufunc(np.linalg.norm, x, - input_core_dims=[[dim]], - kwargs={'ord': ord, 'axis': -1}) + return xr.apply_ufunc( + np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1} + ) .. ipython:: python - :suppress: + :suppress: def vector_norm(x, dim, ord=None): - return xr.apply_ufunc(np.linalg.norm, x, - input_core_dims=[[dim]], - kwargs={'ord': ord, 'axis': -1}) + return xr.apply_ufunc( + np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1} + ) .. ipython:: python - vector_norm(arr1, dim='x') + vector_norm(arr1, dim="x") Because ``apply_ufunc`` follows a standard convention for ufuncs, it plays nicely with tools for building vectorized functions, like diff --git a/doc/contributing.rst b/doc/contributing.rst index f581bcd9741..51dba2bb0cc 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -261,13 +261,13 @@ Some other important things to know about the docs: .. ipython:: python x = 2 - x**3 + x ** 3 will be rendered as:: In [1]: x = 2 - In [2]: x**3 + In [2]: x ** 3 Out[2]: 8 Almost all code examples in the docs are run (and the output saved) during the @@ -467,7 +467,7 @@ typically find tests wrapped in a class. .. code-block:: python class TestReallyCoolFeature: - .... + ... Going forward, we are moving to a more *functional* style using the `pytest `__ framework, which offers a richer @@ -477,7 +477,7 @@ writing test classes, we will write test functions like this: .. code-block:: python def test_really_cool_feature(): - .... + ... Using ``pytest`` ~~~~~~~~~~~~~~~~ @@ -508,17 +508,23 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place from xarray.testing import assert_equal - @pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64']) + @pytest.mark.parametrize("dtype", ["int8", "int16", "int32", "int64"]) def test_dtypes(dtype): assert str(np.dtype(dtype)) == dtype - @pytest.mark.parametrize('dtype', ['float32', - pytest.param('int16', marks=pytest.mark.skip), - pytest.param('int32', marks=pytest.mark.xfail( - reason='to show how it works'))]) + @pytest.mark.parametrize( + "dtype", + [ + "float32", + pytest.param("int16", marks=pytest.mark.skip), + pytest.param( + "int32", marks=pytest.mark.xfail(reason="to show how it works") + ), + ], + ) def test_mark(dtype): - assert str(np.dtype(dtype)) == 'float32' + assert str(np.dtype(dtype)) == "float32" @pytest.fixture @@ -526,7 +532,7 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place return xr.DataArray([1, 2, 3]) - @pytest.fixture(params=['int8', 'int16', 'int32', 'int64']) + @pytest.fixture(params=["int8", "int16", "int32", "int64"]) def dtype(request): return request.param diff --git a/doc/dask.rst b/doc/dask.rst index 07b3939af6e..2248de9c0d8 100644 --- a/doc/dask.rst +++ b/doc/dask.rst @@ -56,19 +56,26 @@ argument to :py:func:`~xarray.open_dataset` or using the import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) np.set_printoptions(precision=3, linewidth=100, threshold=100, edgeitems=3) - ds = xr.Dataset({'temperature': (('time', 'latitude', 'longitude'), - np.random.randn(30, 180, 180)), - 'time': pd.date_range('2015-01-01', periods=30), - 'longitude': np.arange(180), - 'latitude': np.arange(89.5, -90.5, -1)}) - ds.to_netcdf('example-data.nc') + ds = xr.Dataset( + { + "temperature": ( + ("time", "latitude", "longitude"), + np.random.randn(30, 180, 180), + ), + "time": pd.date_range("2015-01-01", periods=30), + "longitude": np.arange(180), + "latitude": np.arange(89.5, -90.5, -1), + } + ) + ds.to_netcdf("example-data.nc") .. ipython:: python - ds = xr.open_dataset('example-data.nc', chunks={'time': 10}) + ds = xr.open_dataset("example-data.nc", chunks={"time": 10}) ds In this example ``latitude`` and ``longitude`` do not appear in the ``chunks`` @@ -106,7 +113,7 @@ usual way. .. ipython:: python - ds.to_netcdf('manipulated-example-data.nc') + ds.to_netcdf("manipulated-example-data.nc") By setting the ``compute`` argument to ``False``, :py:meth:`~xarray.Dataset.to_netcdf` will return a ``dask.delayed`` object that can be computed later. @@ -114,8 +121,9 @@ will return a ``dask.delayed`` object that can be computed later. .. ipython:: python from dask.diagnostics import ProgressBar + # or distributed.progress when using the distributed scheduler - delayed_obj = ds.to_netcdf('manipulated-example-data.nc', compute=False) + delayed_obj = ds.to_netcdf("manipulated-example-data.nc", compute=False) with ProgressBar(): results = delayed_obj.compute() @@ -141,8 +149,9 @@ Dask DataFrames do not support multi-indexes so the coordinate variables from th :suppress: import os - os.remove('example-data.nc') - os.remove('manipulated-example-data.nc') + + os.remove("example-data.nc") + os.remove("manipulated-example-data.nc") Using Dask with xarray ---------------------- @@ -199,7 +208,7 @@ Dask arrays using the :py:meth:`~xarray.Dataset.persist` method: .. ipython:: python - ds = ds.persist() + ds = ds.persist() :py:meth:`~xarray.Dataset.persist` is particularly useful when using a distributed cluster because the data will be loaded into distributed memory @@ -224,11 +233,11 @@ sizes of Dask arrays is done with the :py:meth:`~xarray.Dataset.chunk` method: .. ipython:: python :suppress: - ds = ds.chunk({'time': 10}) + ds = ds.chunk({"time": 10}) .. ipython:: python - rechunked = ds.chunk({'latitude': 100, 'longitude': 100}) + rechunked = ds.chunk({"latitude": 100, "longitude": 100}) You can view the size of existing chunks on an array by viewing the :py:attr:`~xarray.Dataset.chunks` attribute: @@ -256,6 +265,7 @@ lazy Dask arrays, in the :ref:`xarray.ufuncs ` module: .. ipython:: python import xarray.ufuncs as xu + xu.sin(rechunked) To access Dask arrays directly, use the new @@ -302,24 +312,32 @@ we use to calculate `Spearman's rank-correlation coefficient ` and @@ -453,15 +470,15 @@ dataset variables: .. ipython:: python - ds.rename({'temperature': 'temp', 'precipitation': 'precip'}) + ds.rename({"temperature": "temp", "precipitation": "precip"}) The related :py:meth:`~xarray.Dataset.swap_dims` method allows you do to swap dimension and non-dimension variables: .. ipython:: python - ds.coords['day'] = ('time', [6, 7, 8]) - ds.swap_dims({'time': 'day'}) + ds.coords["day"] = ("time", [6, 7, 8]) + ds.swap_dims({"time": "day"}) .. _coordinates: @@ -519,8 +536,8 @@ To convert back and forth between data and coordinates, you can use the .. ipython:: python ds.reset_coords() - ds.set_coords(['temperature', 'precipitation']) - ds['temperature'].reset_coords(drop=True) + ds.set_coords(["temperature", "precipitation"]) + ds["temperature"].reset_coords(drop=True) Notice that these operations skip coordinates with names given by dimensions, as used for indexing. This mostly because we are not entirely sure how to @@ -544,7 +561,7 @@ logic used for merging coordinates in arithmetic operations .. ipython:: python - alt = xr.Dataset(coords={'z': [10], 'lat': 0, 'lon': 0}) + alt = xr.Dataset(coords={"z": [10], "lat": 0, "lon": 0}) ds.coords.merge(alt.coords) The ``coords.merge`` method may be useful if you want to implement your own @@ -560,7 +577,7 @@ To convert a coordinate (or any ``DataArray``) into an actual .. ipython:: python - ds['time'].to_index() + ds["time"].to_index() A useful shortcut is the ``indexes`` property (on both ``DataArray`` and ``Dataset``), which lazily constructs a dictionary whose keys are given by each @@ -577,9 +594,10 @@ Xarray supports labeling coordinate values with a :py:class:`pandas.MultiIndex`: .. ipython:: python - midx = pd.MultiIndex.from_arrays([['R', 'R', 'V', 'V'], [.1, .2, .7, .9]], - names=('band', 'wn')) - mda = xr.DataArray(np.random.rand(4), coords={'spec': midx}, dims='spec') + midx = pd.MultiIndex.from_arrays( + [["R", "R", "V", "V"], [0.1, 0.2, 0.7, 0.9]], names=("band", "wn") + ) + mda = xr.DataArray(np.random.rand(4), coords={"spec": midx}, dims="spec") mda For convenience multi-index levels are directly accessible as "virtual" or @@ -587,8 +605,8 @@ For convenience multi-index levels are directly accessible as "virtual" or .. ipython:: python - mda['band'] - mda.wn + mda["band"] + mda.wn Indexing with multi-index levels is also possible using the ``sel`` method (see :ref:`multi-level indexing`). diff --git a/doc/faq.rst b/doc/faq.rst index 576cec5c2b1..a2b8be47e06 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -4,11 +4,12 @@ Frequently Asked Questions ========================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) @@ -103,21 +104,21 @@ code fragment .. ipython:: python arr = xr.DataArray([1, 2, 3]) - pd.Series({'x': arr[0], 'mean': arr.mean(), 'std': arr.std()}) + pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}) does not yield the pandas DataFrame we expected. We need to specify the type conversion ourselves: .. ipython:: python - pd.Series({'x': arr[0], 'mean': arr.mean(), 'std': arr.std()}, dtype=float) + pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}, dtype=float) Alternatively, we could use the ``item`` method or the ``float`` constructor to convert values one at a time .. ipython:: python - pd.Series({'x': arr[0].item(), 'mean': float(arr.mean())}) + pd.Series({"x": arr[0].item(), "mean": float(arr.mean())}) .. _approach to metadata: diff --git a/doc/groupby.rst b/doc/groupby.rst index 223185bd0d5..c72a26c45ea 100644 --- a/doc/groupby.rst +++ b/doc/groupby.rst @@ -26,11 +26,12 @@ Split Let's create a simple example dataset: .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. ipython:: python @@ -47,20 +48,20 @@ use a DataArray directly), we get back a ``GroupBy`` object: .. ipython:: python - ds.groupby('letters') + ds.groupby("letters") This object works very similarly to a pandas GroupBy object. You can view the group indices with the ``groups`` attribute: .. ipython:: python - ds.groupby('letters').groups + ds.groupby("letters").groups You can also iterate over groups in ``(label, group)`` pairs: .. ipython:: python - list(ds.groupby('letters')) + list(ds.groupby("letters")) Just like in pandas, creating a GroupBy object is cheap: it does not actually split the data until you access particular values. @@ -75,8 +76,8 @@ a customized coordinate, but xarray facilitates this via the .. ipython:: python - x_bins = [0,25,50] - ds.groupby_bins('x', x_bins).groups + x_bins = [0, 25, 50] + ds.groupby_bins("x", x_bins).groups The binning is implemented via :func:`pandas.cut`, whose documentation details how the bins are assigned. As seen in the example above, by default, the bins are @@ -86,8 +87,8 @@ choose `float` labels which identify the bin centers: .. ipython:: python - x_bin_labels = [12.5,37.5] - ds.groupby_bins('x', x_bins, labels=x_bin_labels).groups + x_bin_labels = [12.5, 37.5] + ds.groupby_bins("x", x_bins, labels=x_bin_labels).groups Apply @@ -102,7 +103,8 @@ concatenated back together along the group axis: def standardize(x): return (x - x.mean()) / x.std() - arr.groupby('letters').map(standardize) + + arr.groupby("letters").map(standardize) GroupBy objects also have a :py:meth:`~xarray.core.groupby.DatasetGroupBy.reduce` method and methods like :py:meth:`~xarray.core.groupby.DatasetGroupBy.mean` as shortcuts for applying an @@ -110,14 +112,14 @@ aggregation function: .. ipython:: python - arr.groupby('letters').mean(dim='x') + arr.groupby("letters").mean(dim="x") Using a groupby is thus also a convenient shortcut for aggregating over all dimensions *other than* the provided one: .. ipython:: python - ds.groupby('x').std(...) + ds.groupby("x").std(...) .. note:: @@ -134,7 +136,7 @@ values for group along the grouped dimension: .. ipython:: python - ds.groupby('letters').first(...) + ds.groupby("letters").first(...) By default, they skip missing values (control this with ``skipna``). @@ -149,9 +151,9 @@ coordinates. For example: .. ipython:: python - alt = arr.groupby('letters').mean(...) + alt = arr.groupby("letters").mean(...) alt - ds.groupby('letters') - alt + ds.groupby("letters") - alt This last line is roughly equivalent to the following:: @@ -169,11 +171,11 @@ the ``squeeze`` parameter: .. ipython:: python - next(iter(arr.groupby('x'))) + next(iter(arr.groupby("x"))) .. ipython:: python - next(iter(arr.groupby('x', squeeze=False))) + next(iter(arr.groupby("x", squeeze=False))) Although xarray will attempt to automatically :py:attr:`~xarray.DataArray.transpose` dimensions back into their original order @@ -197,13 +199,17 @@ __ http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimen .. ipython:: python - da = xr.DataArray([[0,1],[2,3]], - coords={'lon': (['ny','nx'], [[30,40],[40,50]] ), - 'lat': (['ny','nx'], [[10,10],[20,20]] ),}, - dims=['ny','nx']) + da = xr.DataArray( + [[0, 1], [2, 3]], + coords={ + "lon": (["ny", "nx"], [[30, 40], [40, 50]]), + "lat": (["ny", "nx"], [[10, 10], [20, 20]]), + }, + dims=["ny", "nx"], + ) da - da.groupby('lon').sum(...) - da.groupby('lon').map(lambda x: x - x.mean(), shortcut=False) + da.groupby("lon").sum(...) + da.groupby("lon").map(lambda x: x - x.mean(), shortcut=False) Because multidimensional groups have the ability to generate a very large number of bins, coarse-binning via :py:meth:`~xarray.Dataset.groupby_bins` @@ -211,7 +217,7 @@ may be desirable: .. ipython:: python - da.groupby_bins('lon', [0,45,50]).sum() + da.groupby_bins("lon", [0, 45, 50]).sum() These methods group by `lon` values. It is also possible to groupby each cell in a grid, regardless of value, by stacking multiple dimensions, @@ -219,5 +225,5 @@ applying your function, and then unstacking the result: .. ipython:: python - stacked = da.stack(gridcell=['ny', 'nx']) - stacked.groupby('gridcell').sum(...).unstack('gridcell') + stacked = da.stack(gridcell=["ny", "nx"]) + stacked.groupby("gridcell").sum(...).unstack("gridcell") \ No newline at end of file diff --git a/doc/indexing.rst b/doc/indexing.rst index cfbb84a8343..af8e44fb80b 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -4,11 +4,12 @@ Indexing and selecting data =========================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray offers extremely flexible indexing routines that combine the best @@ -60,9 +61,13 @@ DataArray: .. ipython:: python - da = xr.DataArray(np.random.rand(4, 3), - [('time', pd.date_range('2000-01-01', periods=4)), - ('space', ['IA', 'IL', 'IN'])]) + da = xr.DataArray( + np.random.rand(4, 3), + [ + ("time", pd.date_range("2000-01-01", periods=4)), + ("space", ["IA", "IL", "IN"]), + ], + ) da[:2] da[0, 0] da[:, [2, 1]] @@ -81,7 +86,7 @@ fast. To do label based indexing, use the :py:attr:`~xarray.DataArray.loc` attri .. ipython:: python - da.loc['2000-01-01':'2000-01-02', 'IA'] + da.loc["2000-01-01":"2000-01-02", "IA"] In this example, the selected is a subpart of the array in the range '2000-01-01':'2000-01-02' along the first coordinate `time` @@ -98,7 +103,7 @@ Setting values with label based indexing is also supported: .. ipython:: python - da.loc['2000-01-01', ['IL', 'IN']] = -10 + da.loc["2000-01-01", ["IL", "IN"]] = -10 da @@ -117,7 +122,7 @@ use them explicitly to slice data. There are two ways to do this: da[dict(space=0, time=slice(None, 2))] # index by dimension coordinate labels - da.loc[dict(time=slice('2000-01-01', '2000-01-02'))] + da.loc[dict(time=slice("2000-01-01", "2000-01-02"))] 2. Use the :py:meth:`~xarray.DataArray.sel` and :py:meth:`~xarray.DataArray.isel` convenience methods: @@ -128,7 +133,7 @@ use them explicitly to slice data. There are two ways to do this: da.isel(space=0, time=slice(None, 2)) # index by dimension coordinate labels - da.sel(time=slice('2000-01-01', '2000-01-02')) + da.sel(time=slice("2000-01-01", "2000-01-02")) The arguments to these methods can be any objects that could index the array along the dimension given by the keyword, e.g., labels for an individual value, @@ -156,16 +161,16 @@ enabling nearest neighbor (inexact) lookups by use of the methods ``'pad'``, .. ipython:: python - da = xr.DataArray([1, 2, 3], [('x', [0, 1, 2])]) - da.sel(x=[1.1, 1.9], method='nearest') - da.sel(x=0.1, method='backfill') - da.reindex(x=[0.5, 1, 1.5, 2, 2.5], method='pad') + da = xr.DataArray([1, 2, 3], [("x", [0, 1, 2])]) + da.sel(x=[1.1, 1.9], method="nearest") + da.sel(x=0.1, method="backfill") + da.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad") Tolerance limits the maximum distance for valid matches with an inexact lookup: .. ipython:: python - da.reindex(x=[1.1, 1.5], method='nearest', tolerance=0.2) + da.reindex(x=[1.1, 1.5], method="nearest", tolerance=0.2) The method parameter is not yet supported if any of the arguments to ``.sel()`` is a ``slice`` object: @@ -173,7 +178,7 @@ to ``.sel()`` is a ``slice`` object: .. ipython:: :verbatim: - In [1]: da.sel(x=slice(1, 3), method='nearest') + In [1]: da.sel(x=slice(1, 3), method="nearest") NotImplementedError However, you don't need to use ``method`` to do inexact slicing. Slicing @@ -182,15 +187,15 @@ labels are monotonic increasing: .. ipython:: python - da.sel(x=slice(0.9, 3.1)) + da.sel(x=slice(0.9, 3.1)) Indexing axes with monotonic decreasing labels also works, as long as the ``slice`` or ``.loc`` arguments are also decreasing: .. ipython:: python - reversed_da = da[::-1] - reversed_da.loc[3.1:0.9] + reversed_da = da[::-1] + reversed_da.loc[3.1:0.9] .. note:: @@ -227,7 +232,7 @@ arrays). However, you can do normal indexing with dimension names: .. ipython:: python ds[dict(space=[0], time=[0])] - ds.loc[dict(time='2000-01-01')] + ds.loc[dict(time="2000-01-01")] Using indexing to *assign* values to a subset of dataset (e.g., ``ds[dict(space=0)] = 1``) is not yet supported. @@ -240,7 +245,7 @@ index labels along a dimension dropped: .. ipython:: python - ds.drop_sel(space=['IN', 'IL']) + ds.drop_sel(space=["IN", "IL"]) ``drop_sel`` is both a ``Dataset`` and ``DataArray`` method. @@ -249,7 +254,7 @@ Any variables with these dimensions are also dropped: .. ipython:: python - ds.drop_dims('time') + ds.drop_dims("time") .. _masking with where: @@ -263,7 +268,7 @@ xarray, use :py:meth:`~xarray.DataArray.where`: .. ipython:: python - da = xr.DataArray(np.arange(16).reshape(4, 4), dims=['x', 'y']) + da = xr.DataArray(np.arange(16).reshape(4, 4), dims=["x", "y"]) da.where(da.x + da.y < 4) This is particularly useful for ragged indexing of multi-dimensional data, @@ -296,7 +301,7 @@ multiple values, use :py:meth:`~xarray.DataArray.isin`: .. ipython:: python - da = xr.DataArray([1, 2, 3, 4, 5], dims=['x']) + da = xr.DataArray([1, 2, 3, 4, 5], dims=["x"]) da.isin([2, 4]) :py:meth:`~xarray.DataArray.isin` works particularly well with @@ -305,7 +310,7 @@ already labels of an array: .. ipython:: python - lookup = xr.DataArray([-1, -2, -3, -4, -5], dims=['x']) + lookup = xr.DataArray([-1, -2, -3, -4, -5], dims=["x"]) da.where(lookup.isin([-2, -4]), drop=True) However, some caution is in order: when done repeatedly, this type of indexing @@ -328,7 +333,6 @@ MATLAB, or after using the :py:func:`numpy.ix_` helper: .. ipython:: python - da = xr.DataArray( np.arange(12).reshape((3, 4)), dims=["x", "y"], @@ -344,8 +348,8 @@ dimensions: .. ipython:: python - ind_x = xr.DataArray([0, 1], dims=['x']) - ind_y = xr.DataArray([0, 1], dims=['y']) + ind_x = xr.DataArray([0, 1], dims=["x"]) + ind_y = xr.DataArray([0, 1], dims=["y"]) da[ind_x, ind_y] # orthogonal indexing da[ind_x, ind_x] # vectorized indexing @@ -364,7 +368,7 @@ indexers' dimension: .. ipython:: python - ind = xr.DataArray([[0, 1], [0, 1]], dims=['a', 'b']) + ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da[ind] Similar to how NumPy's `advanced indexing`_ works, vectorized @@ -378,18 +382,18 @@ Vectorized indexing also works with ``isel``, ``loc``, and ``sel``: .. ipython:: python - ind = xr.DataArray([[0, 1], [0, 1]], dims=['a', 'b']) + ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da.isel(y=ind) # same as da[:, ind] - ind = xr.DataArray([['a', 'b'], ['b', 'a']], dims=['a', 'b']) + ind = xr.DataArray([["a", "b"], ["b", "a"]], dims=["a", "b"]) da.loc[:, ind] # same as da.sel(y=ind) These methods may also be applied to ``Dataset`` objects .. ipython:: python - ds = da.to_dataset(name='bar') - ds.isel(x=xr.DataArray([0, 1, 2], dims=['points'])) + ds = da.to_dataset(name="bar") + ds.isel(x=xr.DataArray([0, 1, 2], dims=["points"])) .. tip:: @@ -476,8 +480,8 @@ Like ``numpy.ndarray``, value assignment sometimes works differently from what o .. ipython:: python - da = xr.DataArray([0, 1, 2, 3], dims=['x']) - ind = xr.DataArray([0, 0, 0], dims=['x']) + da = xr.DataArray([0, 1, 2, 3], dims=["x"]) + ind = xr.DataArray([0, 0, 0], dims=["x"]) da[ind] -= 1 da @@ -511,7 +515,7 @@ __ https://docs.scipy.org/doc/numpy/user/basics.indexing.html#assigning-values-t .. ipython:: python - da = xr.DataArray([0, 1, 2, 3], dims=['x']) + da = xr.DataArray([0, 1, 2, 3], dims=["x"]) # DO NOT do this da.isel(x=[0, 1, 2])[1] = -1 da @@ -581,15 +585,15 @@ To reindex a particular dimension, use :py:meth:`~xarray.DataArray.reindex`: .. ipython:: python - da.reindex(space=['IA', 'CA']) + da.reindex(space=["IA", "CA"]) The :py:meth:`~xarray.DataArray.reindex_like` method is a useful shortcut. To demonstrate, we will make a subset DataArray with new values: .. ipython:: python - foo = da.rename('foo') - baz = (10 * da[:2, :2]).rename('baz') + foo = da.rename("foo") + baz = (10 * da[:2, :2]).rename("baz") baz Reindexing ``foo`` with ``baz`` selects out the first two values along each @@ -611,8 +615,8 @@ The :py:func:`~xarray.align` function lets us perform more flexible database-lik .. ipython:: python - xr.align(foo, baz, join='inner') - xr.align(foo, baz, join='outer') + xr.align(foo, baz, join="inner") + xr.align(foo, baz, join="outer") Both ``reindex_like`` and ``align`` work interchangeably between :py:class:`~xarray.DataArray` and :py:class:`~xarray.Dataset` objects, and with any number of matching dimension names: @@ -621,7 +625,7 @@ Both ``reindex_like`` and ``align`` work interchangeably between ds ds.reindex_like(baz) - other = xr.DataArray(['a', 'b', 'c'], dims='other') + other = xr.DataArray(["a", "b", "c"], dims="other") # this is a no-op, because there are no shared dimension names ds.reindex_like(other) @@ -636,7 +640,7 @@ integer-based indexing as a fallback for dimensions without a coordinate label: .. ipython:: python - da = xr.DataArray([1, 2, 3], dims='x') + da = xr.DataArray([1, 2, 3], dims="x") da.sel(x=[0, -1]) Alignment between xarray objects where one or both do not have coordinate labels @@ -675,9 +679,9 @@ labels: .. ipython:: python - da = xr.DataArray([1, 2, 3], dims='x') + da = xr.DataArray([1, 2, 3], dims="x") da - da.get_index('x') + da.get_index("x") .. _copies_vs_views: @@ -721,7 +725,6 @@ pandas: .. ipython:: python - midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two")) mda = xr.DataArray(np.random.rand(6, 3), [("x", midx), ("y", range(3))]) mda @@ -732,20 +735,20 @@ a slice of tuples: .. ipython:: python - mda.sel(x=[('a', 0), ('b', 1)]) + mda.sel(x=[("a", 0), ("b", 1)]) Additionally, xarray supports dictionaries: .. ipython:: python - mda.sel(x={'one': 'a', 'two': 0}) + mda.sel(x={"one": "a", "two": 0}) For convenience, ``sel`` also accepts multi-index levels directly as keyword arguments: .. ipython:: python - mda.sel(one='a', two=0) + mda.sel(one="a", two=0) Note that using ``sel`` it is not possible to mix a dimension indexer with level indexers for that dimension @@ -757,7 +760,7 @@ multi-index is reduced to a single index. .. ipython:: python - mda.loc[{'one': 'a'}, ...] + mda.loc[{"one": "a"}, ...] Unlike pandas, xarray does not guess whether you provide index levels or dimensions when using ``loc`` in some ambiguous cases. For example, for diff --git a/doc/internals.rst b/doc/internals.rst index a4870f2316a..c23aab8c5d7 100644 --- a/doc/internals.rst +++ b/doc/internals.rst @@ -46,11 +46,12 @@ Extending xarray ---------------- .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray is designed as a general purpose library, and hence tries to avoid @@ -87,11 +88,12 @@ defined that returns an instance of your class: .. code-block:: python - class Dataset: - ... - @property - def geo(self) - return GeoAccessor(self) + class Dataset: + ... + + @property + def geo(self): + return GeoAccessor(self) However, using the register accessor decorators is preferable to simply adding your own ad-hoc property (i.e., ``Dataset.geo = property(...)``), for several @@ -116,14 +118,13 @@ reasons: Back in an interactive IPython session, we can use these properties: .. ipython:: python - :suppress: + :suppress: - exec(open("examples/_code/accessor_example.py").read()) + exec(open("examples/_code/accessor_example.py").read()) .. ipython:: python - ds = xr.Dataset({'longitude': np.linspace(0, 10), - 'latitude': np.linspace(0, 20)}) + ds = xr.Dataset({"longitude": np.linspace(0, 10), "latitude": np.linspace(0, 20)}) ds.geo.center ds.geo.plot() diff --git a/doc/interpolation.rst b/doc/interpolation.rst index 4cf39807e5a..c2922813e15 100644 --- a/doc/interpolation.rst +++ b/doc/interpolation.rst @@ -4,11 +4,12 @@ Interpolating data ================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray offers flexible interpolation routines, which have a similar interface @@ -27,9 +28,10 @@ indexing of a :py:class:`~xarray.DataArray`, .. ipython:: python - da = xr.DataArray(np.sin(0.3 * np.arange(12).reshape(4, 3)), - [('time', np.arange(4)), - ('space', [0.1, 0.2, 0.3])]) + da = xr.DataArray( + np.sin(0.3 * np.arange(12).reshape(4, 3)), + [("time", np.arange(4)), ("space", [0.1, 0.2, 0.3])], + ) # label lookup da.sel(time=3) @@ -52,16 +54,17 @@ To interpolate data with a :py:doc:`numpy.datetime64 .. ipython:: python - da_dt64 = xr.DataArray([1, 3], - [('time', pd.date_range('1/1/2000', '1/3/2000', periods=2))]) - da_dt64.interp(time='2000-01-02') + da_dt64 = xr.DataArray( + [1, 3], [("time", pd.date_range("1/1/2000", "1/3/2000", periods=2))] + ) + da_dt64.interp(time="2000-01-02") The interpolated data can be merged into the original :py:class:`~xarray.DataArray` by specifying the time periods required. .. ipython:: python - da_dt64.interp(time=pd.date_range('1/1/2000', '1/3/2000', periods=3)) + da_dt64.interp(time=pd.date_range("1/1/2000", "1/3/2000", periods=3)) Interpolation of data indexed by a :py:class:`~xarray.CFTimeIndex` is also allowed. See :ref:`CFTimeIndex` for examples. @@ -108,9 +111,10 @@ different coordinates, .. ipython:: python - other = xr.DataArray(np.sin(0.4 * np.arange(9).reshape(3, 3)), - [('time', [0.9, 1.9, 2.9]), - ('space', [0.15, 0.25, 0.35])]) + other = xr.DataArray( + np.sin(0.4 * np.arange(9).reshape(3, 3)), + [("time", [0.9, 1.9, 2.9]), ("space", [0.15, 0.25, 0.35])], + ) it might be a good idea to first interpolate ``da`` so that it will stay on the same coordinates of ``other``, and then subtract it. @@ -118,9 +122,9 @@ same coordinates of ``other``, and then subtract it. .. ipython:: python - # interpolate da along other's coordinates - interpolated = da.interp_like(other) - interpolated + # interpolate da along other's coordinates + interpolated = da.interp_like(other) + interpolated It is now possible to safely compute the difference ``other - interpolated``. @@ -135,12 +139,15 @@ The interpolation method can be specified by the optional ``method`` argument. .. ipython:: python - da = xr.DataArray(np.sin(np.linspace(0, 2 * np.pi, 10)), dims='x', - coords={'x': np.linspace(0, 1, 10)}) + da = xr.DataArray( + np.sin(np.linspace(0, 2 * np.pi, 10)), + dims="x", + coords={"x": np.linspace(0, 1, 10)}, + ) - da.plot.line('o', label='original') - da.interp(x=np.linspace(0, 1, 100)).plot.line(label='linear (default)') - da.interp(x=np.linspace(0, 1, 100), method='cubic').plot.line(label='cubic') + da.plot.line("o", label="original") + da.interp(x=np.linspace(0, 1, 100)).plot.line(label="linear (default)") + da.interp(x=np.linspace(0, 1, 100), method="cubic").plot.line(label="cubic") @savefig interpolation_sample1.png width=4in plt.legend() @@ -149,15 +156,16 @@ Additional keyword arguments can be passed to scipy's functions. .. ipython:: python # fill 0 for the outside of the original coordinates. - da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 0.0}) + da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": 0.0}) # 1-dimensional extrapolation - da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 'extrapolate'}) + da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": "extrapolate"}) # multi-dimensional extrapolation - da = xr.DataArray(np.sin(0.3 * np.arange(12).reshape(4, 3)), - [('time', np.arange(4)), - ('space', [0.1, 0.2, 0.3])]) + da = xr.DataArray( + np.sin(0.3 * np.arange(12).reshape(4, 3)), + [("time", np.arange(4)), ("space", [0.1, 0.2, 0.3])], + ) - da.interp(time=4, space=np.linspace(-0.1, 0.5, 10), kwargs={'fill_value': None}) + da.interp(time=4, space=np.linspace(-0.1, 0.5, 10), kwargs={"fill_value": None}) Advanced Interpolation @@ -181,17 +189,18 @@ For example: .. ipython:: python - da = xr.DataArray(np.sin(0.3 * np.arange(20).reshape(5, 4)), - [('x', np.arange(5)), - ('y', [0.1, 0.2, 0.3, 0.4])]) + da = xr.DataArray( + np.sin(0.3 * np.arange(20).reshape(5, 4)), + [("x", np.arange(5)), ("y", [0.1, 0.2, 0.3, 0.4])], + ) # advanced indexing - x = xr.DataArray([0, 2, 4], dims='z') - y = xr.DataArray([0.1, 0.2, 0.3], dims='z') + x = xr.DataArray([0, 2, 4], dims="z") + y = xr.DataArray([0.1, 0.2, 0.3], dims="z") da.sel(x=x, y=y) # advanced interpolation - x = xr.DataArray([0.5, 1.5, 2.5], dims='z') - y = xr.DataArray([0.15, 0.25, 0.35], dims='z') + x = xr.DataArray([0.5, 1.5, 2.5], dims="z") + y = xr.DataArray([0.15, 0.25, 0.35], dims="z") da.interp(x=x, y=y) where values on the original coordinates @@ -203,9 +212,8 @@ If you want to add a coordinate to the new dimension ``z``, you can supply .. ipython:: python - x = xr.DataArray([0.5, 1.5, 2.5], dims='z', coords={'z': ['a', 'b','c']}) - y = xr.DataArray([0.15, 0.25, 0.35], dims='z', - coords={'z': ['a', 'b','c']}) + x = xr.DataArray([0.5, 1.5, 2.5], dims="z", coords={"z": ["a", "b", "c"]}) + y = xr.DataArray([0.15, 0.25, 0.35], dims="z", coords={"z": ["a", "b", "c"]}) da.interp(x=x, y=y) For the details of the advanced indexing, @@ -224,19 +232,18 @@ while other methods such as ``cubic`` or ``quadratic`` return all NaN arrays. .. ipython:: python - da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims='x', - coords={'x': range(5)}) + da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims="x", coords={"x": range(5)}) da.interp(x=[0.5, 1.5, 2.5]) - da.interp(x=[0.5, 1.5, 2.5], method='cubic') + da.interp(x=[0.5, 1.5, 2.5], method="cubic") To avoid this, you can drop NaN by :py:meth:`~xarray.DataArray.dropna`, and then make the interpolation .. ipython:: python - dropped = da.dropna('x') + dropped = da.dropna("x") dropped - dropped.interp(x=[0.5, 1.5, 2.5], method='cubic') + dropped.interp(x=[0.5, 1.5, 2.5], method="cubic") If NaNs are distributed randomly in your multidimensional array, dropping all the columns containing more than one NaNs by @@ -246,7 +253,7 @@ which is similar to :py:meth:`pandas.Series.interpolate`. .. ipython:: python - filled = da.interpolate_na(dim='x') + filled = da.interpolate_na(dim="x") filled This fills NaN by interpolating along the specified dimension. @@ -254,7 +261,7 @@ After filling NaNs, you can interpolate: .. ipython:: python - filled.interp(x=[0.5, 1.5, 2.5], method='cubic') + filled.interp(x=[0.5, 1.5, 2.5], method="cubic") For the details of :py:meth:`~xarray.DataArray.interpolate_na`, see :ref:`Missing values `. @@ -268,18 +275,18 @@ Let's see how :py:meth:`~xarray.DataArray.interp` works on real data. .. ipython:: python # Raw data - ds = xr.tutorial.open_dataset('air_temperature').isel(time=0) + ds = xr.tutorial.open_dataset("air_temperature").isel(time=0) fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) ds.air.plot(ax=axes[0]) - axes[0].set_title('Raw data') + axes[0].set_title("Raw data") # Interpolated data - new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims['lon'] * 4) - new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims['lat'] * 4) + new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims["lon"] * 4) + new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims["lat"] * 4) dsi = ds.interp(lat=new_lat, lon=new_lon) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample3.png width=8in - axes[1].set_title('Interpolated data') + axes[1].set_title("Interpolated data") Our advanced interpolation can be used to remap the data to the new coordinate. Consider the new coordinates x and z on the two dimensional plane. @@ -291,20 +298,23 @@ The remapping can be done as follows x = np.linspace(240, 300, 100) z = np.linspace(20, 70, 100) # relation between new and original coordinates - lat = xr.DataArray(z, dims=['z'], coords={'z': z}) - lon = xr.DataArray((x[:, np.newaxis]-270)/np.cos(z*np.pi/180)+270, - dims=['x', 'z'], coords={'x': x, 'z': z}) + lat = xr.DataArray(z, dims=["z"], coords={"z": z}) + lon = xr.DataArray( + (x[:, np.newaxis] - 270) / np.cos(z * np.pi / 180) + 270, + dims=["x", "z"], + coords={"x": x, "z": z}, + ) fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) ds.air.plot(ax=axes[0]) # draw the new coordinate on the original coordinates. for idx in [0, 33, 66, 99]: - axes[0].plot(lon.isel(x=idx), lat, '--k') + axes[0].plot(lon.isel(x=idx), lat, "--k") for idx in [0, 33, 66, 99]: - axes[0].plot(*xr.broadcast(lon.isel(z=idx), lat.isel(z=idx)), '--k') - axes[0].set_title('Raw data') + axes[0].plot(*xr.broadcast(lon.isel(z=idx), lat.isel(z=idx)), "--k") + axes[0].set_title("Raw data") dsi = ds.interp(lon=lon, lat=lat) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample4.png width=8in - axes[1].set_title('Remapped data') + axes[1].set_title("Remapped data") \ No newline at end of file diff --git a/doc/io.rst b/doc/io.rst index 0c666099df8..738d8d2b7ab 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -9,11 +9,12 @@ simple :ref:`io.pickle` files to the more flexible :ref:`io.netcdf` format (recommended). .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. _io.netcdf: @@ -52,12 +53,16 @@ We can save a Dataset to disk using the .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 5))}, - coords={'x': [10, 20, 30, 40], - 'y': pd.date_range('2000-01-01', periods=5), - 'z': ('x', list('abcd'))}) + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.rand(4, 5))}, + coords={ + "x": [10, 20, 30, 40], + "y": pd.date_range("2000-01-01", periods=5), + "z": ("x", list("abcd")), + }, + ) - ds.to_netcdf('saved_on_disk.nc') + ds.to_netcdf("saved_on_disk.nc") By default, the file is saved as netCDF4 (assuming netCDF4-Python is installed). You can control the format and engine used to write the file with @@ -76,7 +81,7 @@ We can load netCDF files to create a new Dataset using .. ipython:: python - ds_disk = xr.open_dataset('saved_on_disk.nc') + ds_disk = xr.open_dataset("saved_on_disk.nc") ds_disk Similarly, a DataArray can be saved to disk using the @@ -117,7 +122,7 @@ netCDF file. However, it's often cleaner to use a ``with`` statement: .. ipython:: python # this automatically closes the dataset after use - with xr.open_dataset('saved_on_disk.nc') as ds: + with xr.open_dataset("saved_on_disk.nc") as ds: print(ds.keys()) Although xarray provides reasonable support for incremental reads of files on @@ -171,7 +176,7 @@ You can view this encoding information (among others) in the .. ipython:: :verbatim: - In [1]: ds_disk['y'].encoding + In [1]: ds_disk["y"].encoding Out[1]: {'zlib': False, 'shuffle': False, @@ -469,7 +474,7 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set: :okwarning: # Writing complex valued data - da = xr.DataArray([1.+1.j, 2.+2.j, 3.+3.j]) + da = xr.DataArray([1.0 + 1.0j, 2.0 + 2.0j, 3.0 + 3.0j]) da.to_netcdf("complex.nc", engine="h5netcdf", invalid_netcdf=True) # Reading it back @@ -479,7 +484,8 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set: :suppress: import os - os.remove('complex.nc') + + os.remove("complex.nc") .. warning:: @@ -499,9 +505,11 @@ installed xarray can convert a ``DataArray`` into a ``Cube`` using .. ipython:: python - da = xr.DataArray(np.random.rand(4, 5), dims=['x', 'y'], - coords=dict(x=[10, 20, 30, 40], - y=pd.date_range('2000-01-01', periods=5))) + da = xr.DataArray( + np.random.rand(4, 5), + dims=["x", "y"], + coords=dict(x=[10, 20, 30, 40], y=pd.date_range("2000-01-01", periods=5)), + ) cube = da.to_iris() cube @@ -548,8 +556,9 @@ __ http://iri.columbia.edu/ :verbatim: In [3]: remote_data = xr.open_dataset( - ...: 'http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods', - ...: decode_times=False) + ...: "http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods", + ...: decode_times=False, + ...: ) In [4]: remote_data Out[4]: @@ -587,7 +596,7 @@ over the network until we look at particular values: .. ipython:: :verbatim: - In [4]: tmax = remote_data['tmax'][:500, ::3, ::3] + In [4]: tmax = remote_data["tmax"][:500, ::3, ::3] In [5]: tmax Out[5]: @@ -715,7 +724,8 @@ search indices or other automated data discovery tools. :suppress: import os - os.remove('saved_on_disk.nc') + + os.remove("saved_on_disk.nc") .. _io.rasterio: @@ -729,7 +739,7 @@ rasterio is installed. Here is an example of how to use .. ipython:: :verbatim: - In [7]: rio = xr.open_rasterio('RGB.byte.tif') + In [7]: rio = xr.open_rasterio("RGB.byte.tif") In [8]: rio Out[8]: @@ -769,7 +779,7 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF In [1]: import rioxarray - In [2]: rds = rioxarray.open_rasterio('RGB.byte.tif') + In [2]: rds = rioxarray.open_rasterio("RGB.byte.tif") In [3]: rds Out[3]: @@ -799,7 +809,7 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF In [6]: rds4326.rio.crs Out[6]: CRS.from_epsg(4326) - In [7]: rds4326.rio.to_raster('RGB.byte.4326.tif') + In [7]: rds4326.rio.to_raster("RGB.byte.4326.tif") .. _rasterio: https://rasterio.readthedocs.io/en/latest/ @@ -831,17 +841,21 @@ xarray. To write a dataset with zarr, we use the :py:attr:`Dataset.to_zarr` meth To write to a local directory, we pass a path to a directory .. ipython:: python - :suppress: + :suppress: ! rm -rf path/to/directory.zarr .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 5))}, - coords={'x': [10, 20, 30, 40], - 'y': pd.date_range('2000-01-01', periods=5), - 'z': ('x', list('abcd'))}) - ds.to_zarr('path/to/directory.zarr') + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.rand(4, 5))}, + coords={ + "x": [10, 20, 30, 40], + "y": pd.date_range("2000-01-01", periods=5), + "z": ("x", list("abcd")), + }, + ) + ds.to_zarr("path/to/directory.zarr") (The suffix ``.zarr`` is optional--just a reminder that a zarr store lives there.) If the directory does not exist, it will be created. If a zarr @@ -854,22 +868,30 @@ It is also possible to append to an existing store. For that, set can be omitted as it will internally be set to ``'a'``. .. ipython:: python - :suppress: + :suppress: ! rm -rf path/to/directory.zarr .. ipython:: python - ds1 = xr.Dataset({'foo': (('x', 'y', 't'), np.random.rand(4, 5, 2))}, - coords={'x': [10, 20, 30, 40], - 'y': [1,2,3,4,5], - 't': pd.date_range('2001-01-01', periods=2)}) - ds1.to_zarr('path/to/directory.zarr') - ds2 = xr.Dataset({'foo': (('x', 'y', 't'), np.random.rand(4, 5, 2))}, - coords={'x': [10, 20, 30, 40], - 'y': [1,2,3,4,5], - 't': pd.date_range('2001-01-03', periods=2)}) - ds2.to_zarr('path/to/directory.zarr', append_dim='t') + ds1 = xr.Dataset( + {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, + coords={ + "x": [10, 20, 30, 40], + "y": [1, 2, 3, 4, 5], + "t": pd.date_range("2001-01-01", periods=2), + }, + ) + ds1.to_zarr("path/to/directory.zarr") + ds2 = xr.Dataset( + {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, + coords={ + "x": [10, 20, 30, 40], + "y": [1, 2, 3, 4, 5], + "t": pd.date_range("2001-01-03", periods=2), + }, + ) + ds2.to_zarr("path/to/directory.zarr", append_dim="t") To store variable length strings use ``dtype=object``. @@ -878,7 +900,7 @@ To read back a zarr dataset that has been created this way, we use the .. ipython:: python - ds_zarr = xr.open_zarr('path/to/directory.zarr') + ds_zarr = xr.open_zarr("path/to/directory.zarr") ds_zarr Cloud Storage Buckets @@ -912,15 +934,16 @@ These options can be passed to the ``to_zarr`` method as variable encoding. For example: .. ipython:: python - :suppress: + :suppress: ! rm -rf foo.zarr .. ipython:: python import zarr - compressor = zarr.Blosc(cname='zstd', clevel=3, shuffle=2) - ds.to_zarr('foo.zarr', encoding={'foo': {'compressor': compressor}}) + + compressor = zarr.Blosc(cname="zstd", clevel=3, shuffle=2) + ds.to_zarr("foo.zarr", encoding={"foo": {"compressor": compressor}}) .. note:: @@ -959,11 +982,12 @@ be done directly from zarr, as described in the .. _io.cfgrib: .. ipython:: python - :suppress: + :suppress: import shutil - shutil.rmtree('foo.zarr') - shutil.rmtree('path/to/directory.zarr') + + shutil.rmtree("foo.zarr") + shutil.rmtree("path/to/directory.zarr") GRIB format via cfgrib ---------------------- @@ -975,7 +999,7 @@ to :py:func:`open_dataset`: .. ipython:: :verbatim: - In [1]: ds_grib = xr.open_dataset('example.grib', engine='cfgrib') + In [1]: ds_grib = xr.open_dataset("example.grib", engine="cfgrib") We recommend installing ecCodes via conda:: diff --git a/doc/pandas.rst b/doc/pandas.rst index b0ec2a117dc..acf1d16b6ee 100644 --- a/doc/pandas.rst +++ b/doc/pandas.rst @@ -20,6 +20,7 @@ __ http://seaborn.pydata.org/ import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Hierarchical and tidy data @@ -47,10 +48,15 @@ To convert any dataset to a ``DataFrame`` in tidy form, use the .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.randn(2, 3))}, - coords={'x': [10, 20], 'y': ['a', 'b', 'c'], - 'along_x': ('x', np.random.randn(2)), - 'scalar': 123}) + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.randn(2, 3))}, + coords={ + "x": [10, 20], + "y": ["a", "b", "c"], + "along_x": ("x", np.random.randn(2)), + "scalar": 123, + }, + ) ds df = ds.to_dataframe() df @@ -91,7 +97,7 @@ DataFrames: .. ipython:: python - s = ds['foo'].to_series() + s = ds["foo"].to_series() s # or equivalently, with Series.to_xarray() xr.DataArray.from_series(s) @@ -117,8 +123,9 @@ available in pandas (i.e., a 1D array is converted to a .. ipython:: python - arr = xr.DataArray(np.random.randn(2, 3), - coords=[('x', [10, 20]), ('y', ['a', 'b', 'c'])]) + arr = xr.DataArray( + np.random.randn(2, 3), coords=[("x", [10, 20]), ("y", ["a", "b", "c"])] + ) df = arr.to_pandas() df @@ -136,9 +143,10 @@ preserve all use of multi-indexes: .. ipython:: python - index = pd.MultiIndex.from_arrays([['a', 'a', 'b'], [0, 1, 2]], - names=['one', 'two']) - df = pd.DataFrame({'x': 1, 'y': 2}, index=index) + index = pd.MultiIndex.from_arrays( + [["a", "a", "b"], [0, 1, 2]], names=["one", "two"] + ) + df = pd.DataFrame({"x": 1, "y": 2}, index=index) ds = xr.Dataset(df) ds @@ -175,9 +183,9 @@ Let's take a look: .. ipython:: python data = np.random.RandomState(0).rand(2, 3, 4) - items = list('ab') - major_axis = list('mno') - minor_axis = pd.date_range(start='2000', periods=4, name='date') + items = list("ab") + major_axis = list("mno") + minor_axis = pd.date_range(start="2000", periods=4, name="date") With old versions of pandas (prior to 0.25), this could stored in a ``Panel``: @@ -207,7 +215,7 @@ You can also easily convert this data into ``Dataset``: .. ipython:: python - array.to_dataset(dim='dim_0') + array.to_dataset(dim="dim_0") Here, there are two data variables, each representing a DataFrame on panel's ``items`` axis, and labeled as such. Each variable is a 2D array of the diff --git a/doc/plotting.rst b/doc/plotting.rst index f3d9c0213de..fb30417e2c6 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -56,6 +56,7 @@ Imports # Use defaults so we don't get gridlines in generated docs import matplotlib as mpl + mpl.rcdefaults() The following imports are necessary for all of the examples. @@ -71,7 +72,7 @@ For these examples we'll use the North American air temperature dataset. .. ipython:: python - airtemps = xr.tutorial.open_dataset('air_temperature') + airtemps = xr.tutorial.open_dataset("air_temperature") airtemps # Convert to celsius @@ -79,7 +80,7 @@ For these examples we'll use the North American air temperature dataset. # copy attributes to get nice figure labels and change Kelvin to Celsius air.attrs = airtemps.air.attrs - air.attrs['units'] = 'deg C' + air.attrs["units"] = "deg C" .. note:: Until :issue:`1614` is solved, you might need to copy over the metadata in ``attrs`` to get informative figure labels (as was done above). @@ -126,7 +127,7 @@ can be used: .. ipython:: python @savefig plotting_1d_additional_args.png width=4in - air1d[:200].plot.line('b-^') + air1d[:200].plot.line("b-^") .. note:: Not all xarray plotting methods support passing positional arguments @@ -138,7 +139,7 @@ Keyword arguments work the same way, and are more explicit. .. ipython:: python @savefig plotting_example_sin3.png width=4in - air1d[:200].plot.line(color='purple', marker='o') + air1d[:200].plot.line(color="purple", marker="o") ========================= Adding to Existing Axis @@ -219,7 +220,7 @@ plots to check the variation of air temperature at three different latitudes alo .. ipython:: python @savefig plotting_example_multiple_lines_x_kwarg.png - air.isel(lon=10, lat=[19,21,22]).plot.line(x='time') + air.isel(lon=10, lat=[19, 21, 22]).plot.line(x="time") It is required to explicitly specify either @@ -240,7 +241,7 @@ It is also possible to make line plots such that the data are on the x-axis and .. ipython:: python @savefig plotting_example_xy_kwarg.png - air.isel(time=10, lon=[10, 11]).plot(y='lat', hue='lon') + air.isel(time=10, lon=[10, 11]).plot(y="lat", hue="lon") ============ Step plots @@ -253,7 +254,7 @@ made using 1D data. :okwarning: @savefig plotting_example_step.png width=4in - air1d[:20].plot.step(where='mid') + air1d[:20].plot.step(where="mid") The argument ``where`` defines where the steps should be placed, options are ``'pre'`` (default), ``'post'``, and ``'mid'``. This is particularly handy @@ -261,15 +262,15 @@ when plotting data grouped with :py:meth:`Dataset.groupby_bins`. .. ipython:: python - air_grp = air.mean(['time','lon']).groupby_bins('lat',[0,23.5,66.5,90]) + air_grp = air.mean(["time", "lon"]).groupby_bins("lat", [0, 23.5, 66.5, 90]) air_mean = air_grp.mean() air_std = air_grp.std() air_mean.plot.step() - (air_mean + air_std).plot.step(ls=':') - (air_mean - air_std).plot.step(ls=':') - plt.ylim(-20,30) + (air_mean + air_std).plot.step(ls=":") + (air_mean - air_std).plot.step(ls=":") + plt.ylim(-20, 30) @savefig plotting_example_step_groupby.png width=4in - plt.title('Zonal mean temperature') + plt.title("Zonal mean temperature") In this case, the actual boundaries of the bins are used and the ``where`` argument is ignored. @@ -284,7 +285,9 @@ The keyword arguments ``xincrease`` and ``yincrease`` let you control the axes d .. ipython:: python @savefig plotting_example_xincrease_yincrease_kwarg.png - air.isel(time=10, lon=[10, 11]).plot.line(y='lat', hue='lon', xincrease=False, yincrease=False) + air.isel(time=10, lon=[10, 11]).plot.line( + y="lat", hue="lon", xincrease=False, yincrease=False + ) In addition, one can use ``xscale, yscale`` to set axes scaling; ``xticks, yticks`` to set axes ticks and ``xlim, ylim`` to set axes limits. These accept the same values as the matplotlib methods ``Axes.set_(x,y)scale()``, ``Axes.set_(x,y)ticks()``, ``Axes.set_(x,y)lim()`` respectively. @@ -348,7 +351,7 @@ produce plots with nonuniform coordinates. b = air2d.copy() # Apply a nonlinear transformation to one of the coords - b.coords['lat'] = np.log(b.coords['lat']) + b.coords["lat"] = np.log(b.coords["lat"]) @savefig plotting_nonuniform_coords.png width=4in b.plot() @@ -363,9 +366,9 @@ matplotlib is available. .. ipython:: python air2d.plot(cmap=plt.cm.Blues) - plt.title('These colors prove North America\nhas fallen in the ocean') - plt.ylabel('latitude') - plt.xlabel('longitude') + plt.title("These colors prove North America\nhas fallen in the ocean") + plt.ylabel("latitude") + plt.xlabel("longitude") plt.tight_layout() @savefig plotting_2d_call_matplotlib.png width=4in @@ -381,7 +384,7 @@ matplotlib is available. .. ipython:: python - plt.xlabel('Never gonna see this.') + plt.xlabel("Never gonna see this.") air2d.plot() @savefig plotting_2d_call_matplotlib2.png width=4in @@ -473,10 +476,10 @@ if using ``imshow`` or ``pcolormesh`` (but not with ``contour`` or ``contourf``, since levels are chosen automatically). .. ipython:: python - :okwarning: + :okwarning: @savefig plotting_seaborn_palette.png width=4in - air2d.plot(levels=10, cmap='husl') + air2d.plot(levels=10, cmap="husl") plt.draw() .. _plotting.faceting: @@ -520,14 +523,16 @@ arguments to the xarray plotting methods/functions. This returns a .. ipython:: python @savefig plot_facet_dataarray.png - g_simple = t.plot(x='lon', y='lat', col='time', col_wrap=3) + g_simple = t.plot(x="lon", y="lat", col="time", col_wrap=3) Faceting also works for line plots. .. ipython:: python @savefig plot_facet_dataarray_line.png - g_simple_line = t.isel(lat=slice(0,None,4)).plot(x='lon', hue='lat', col='time', col_wrap=3) + g_simple_line = t.isel(lat=slice(0, None, 4)).plot( + x="lon", hue="lat", col="time", col_wrap=3 + ) =============== 4 dimensional @@ -541,12 +546,12 @@ one were much hotter. .. ipython:: python t2 = t.isel(time=slice(0, 2)) - t4d = xr.concat([t2, t2 + 40], pd.Index(['normal', 'hot'], name='fourth_dim')) + t4d = xr.concat([t2, t2 + 40], pd.Index(["normal", "hot"], name="fourth_dim")) # This is a 4d array t4d.coords @savefig plot_facet_4d.png - t4d.plot(x='lon', y='lat', col='time', row='fourth_dim') + t4d.plot(x="lon", y="lat", col="time", row="fourth_dim") ================ Other features @@ -555,9 +560,9 @@ one were much hotter. Faceted plotting supports other arguments common to xarray 2d plots. .. ipython:: python - :suppress: + :suppress: - plt.close('all') + plt.close("all") .. ipython:: python @@ -566,9 +571,15 @@ Faceted plotting supports other arguments common to xarray 2d plots. hasoutliers[-1, -1, -1] = 400 @savefig plot_facet_robust.png - g = hasoutliers.plot.pcolormesh('lon', 'lat', col='time', col_wrap=3, - robust=True, cmap='viridis', - cbar_kwargs={'label': 'this has outliers'}) + g = hasoutliers.plot.pcolormesh( + "lon", + "lat", + col="time", + col_wrap=3, + robust=True, + cmap="viridis", + cbar_kwargs={"label": "this has outliers"}, + ) =================== FacetGrid Objects @@ -594,20 +605,20 @@ It's possible to select the :py:class:`xarray.DataArray` or .. ipython:: python - g.data.loc[g.name_dicts[0, 0]] + g.data.loc[g.name_dicts[0, 0]] Here is an example of using the lower level API and then modifying the axes after they have been plotted. .. ipython:: python - g = t.plot.imshow('lon', 'lat', col='time', col_wrap=3, robust=True) + g = t.plot.imshow("lon", "lat", col="time", col_wrap=3, robust=True) for i, ax in enumerate(g.axes.flat): - ax.set_title('Air Temperature %d' % i) + ax.set_title("Air Temperature %d" % i) bottomright = g.axes[-1, -1] - bottomright.annotate('bottom right', (240, 40)) + bottomright.annotate("bottom right", (240, 40)) @savefig plot_facet_iterator.png plt.draw() @@ -632,8 +643,8 @@ Consider this dataset .. ipython:: python - ds = xr.tutorial.scatter_example_dataset() - ds + ds = xr.tutorial.scatter_example_dataset() + ds Suppose we want to scatter ``A`` against ``B`` @@ -641,14 +652,14 @@ Suppose we want to scatter ``A`` against ``B`` .. ipython:: python @savefig ds_simple_scatter.png - ds.plot.scatter(x='A', y='B') + ds.plot.scatter(x="A", y="B") The ``hue`` kwarg lets you vary the color by variable value .. ipython:: python @savefig ds_hue_scatter.png - ds.plot.scatter(x='A', y='B', hue='w') + ds.plot.scatter(x="A", y="B", hue="w") When ``hue`` is specified, a colorbar is added for numeric ``hue`` DataArrays by default and a legend is added for non-numeric ``hue`` DataArrays (as above). @@ -659,21 +670,21 @@ Additionally, the boolean kwarg ``add_guide`` can be used to prevent the display ds = ds.assign(w=[1, 2, 3, 5]) @savefig ds_discrete_legend_hue_scatter.png - ds.plot.scatter(x='A', y='B', hue='w', hue_style='discrete') + ds.plot.scatter(x="A", y="B", hue="w", hue_style="discrete") The ``markersize`` kwarg lets you vary the point's size by variable value. You can additionally pass ``size_norm`` to control how the variable's values are mapped to point sizes. .. ipython:: python @savefig ds_hue_size_scatter.png - ds.plot.scatter(x='A', y='B', hue='z', hue_style='discrete', markersize='z') + ds.plot.scatter(x="A", y="B", hue="z", hue_style="discrete", markersize="z") Faceting is also possible .. ipython:: python @savefig ds_facet_scatter.png - ds.plot.scatter(x='A', y='B', col='x', row='z', hue='w', hue_style='discrete') + ds.plot.scatter(x="A", y="B", col="x", row="z", hue="w", hue_style="discrete") For more advanced scatter plots, we recommend converting the relevant data variables to a pandas DataFrame and using the extensive plotting capabilities of ``seaborn``. @@ -691,11 +702,15 @@ This script will plot the air temperature on a map. .. ipython:: python import cartopy.crs as ccrs - air = xr.tutorial.open_dataset('air_temperature').air + + air = xr.tutorial.open_dataset("air_temperature").air + ax = plt.axes(projection=ccrs.Orthographic(-80, 35)) - air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree()); + air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree()) + ax.set_global() + @savefig plotting_maps_cartopy.png width=100% - ax.set_global(); ax.coastlines(); + ax.coastlines() When faceting on maps, the projection can be transferred to the ``plot`` function using the ``subplot_kws`` keyword. The axes for the subplots created @@ -703,13 +718,16 @@ by faceting are accessible in the object returned by ``plot``: .. ipython:: python - p = air.isel(time=[0, 4]).plot(transform=ccrs.PlateCarree(), col='time', - subplot_kws={'projection': ccrs.Orthographic(-80, 35)}) + p = air.isel(time=[0, 4]).plot( + transform=ccrs.PlateCarree(), + col="time", + subplot_kws={"projection": ccrs.Orthographic(-80, 35)}, + ) for ax in p.axes.flat: ax.coastlines() ax.gridlines() @savefig plotting_maps_cartopy_facetting.png width=100% - plt.draw(); + plt.draw() Details @@ -732,6 +750,7 @@ These are provided for user convenience; they all call the same code. .. ipython:: python import xarray.plot as xplt + da = xr.DataArray(range(5)) fig, axes = plt.subplots(ncols=2, nrows=2) da.plot(ax=axes[0, 0]) @@ -766,8 +785,7 @@ read on. .. ipython:: python - a0 = xr.DataArray(np.zeros((4, 3, 2)), dims=('y', 'x', 'z'), - name='temperature') + a0 = xr.DataArray(np.zeros((4, 3, 2)), dims=("y", "x", "z"), name="temperature") a0[0, 0, 0] = 1 a = a0.isel(z=0) a @@ -801,14 +819,16 @@ instead of the default ones: .. ipython:: python lon, lat = np.meshgrid(np.linspace(-20, 20, 5), np.linspace(0, 30, 4)) - lon += lat/10 - lat += lon/10 - da = xr.DataArray(np.arange(20).reshape(4, 5), dims=['y', 'x'], - coords = {'lat': (('y', 'x'), lat), - 'lon': (('y', 'x'), lon)}) + lon += lat / 10 + lat += lon / 10 + da = xr.DataArray( + np.arange(20).reshape(4, 5), + dims=["y", "x"], + coords={"lat": (("y", "x"), lat), "lon": (("y", "x"), lon)}, + ) @savefig plotting_example_2d_irreg.png width=4in - da.plot.pcolormesh('lon', 'lat'); + da.plot.pcolormesh("lon", "lat") Note that in this case, xarray still follows the pixel centered convention. This might be undesirable in some cases, for example when your data is defined @@ -818,22 +838,25 @@ this convention when plotting on a map: .. ipython:: python import cartopy.crs as ccrs - ax = plt.subplot(projection=ccrs.PlateCarree()); - da.plot.pcolormesh('lon', 'lat', ax=ax); - ax.scatter(lon, lat, transform=ccrs.PlateCarree()); + + ax = plt.subplot(projection=ccrs.PlateCarree()) + da.plot.pcolormesh("lon", "lat", ax=ax) + ax.scatter(lon, lat, transform=ccrs.PlateCarree()) + ax.coastlines() @savefig plotting_example_2d_irreg_map.png width=4in - ax.coastlines(); ax.gridlines(draw_labels=True); + ax.gridlines(draw_labels=True) You can however decide to infer the cell boundaries and use the ``infer_intervals`` keyword: .. ipython:: python - ax = plt.subplot(projection=ccrs.PlateCarree()); - da.plot.pcolormesh('lon', 'lat', ax=ax, infer_intervals=True); - ax.scatter(lon, lat, transform=ccrs.PlateCarree()); + ax = plt.subplot(projection=ccrs.PlateCarree()) + da.plot.pcolormesh("lon", "lat", ax=ax, infer_intervals=True) + ax.scatter(lon, lat, transform=ccrs.PlateCarree()) + ax.coastlines() @savefig plotting_example_2d_irreg_map_infer.png width=4in - ax.coastlines(); ax.gridlines(draw_labels=True); + ax.gridlines(draw_labels=True) .. note:: The data model of xarray does not support datasets with `cell boundaries`_ @@ -847,6 +870,6 @@ One can also make line plots with multidimensional coordinates. In this case, `` .. ipython:: python f, ax = plt.subplots(2, 1) - da.plot.line(x='lon', hue='y', ax=ax[0]); + da.plot.line(x="lon", hue="y", ax=ax[0]) @savefig plotting_example_2d_hue_xy.png - da.plot.line(x='lon', hue='x', ax=ax[1]); + da.plot.line(x="lon", hue="x", ax=ax[1]) diff --git a/doc/quick-overview.rst b/doc/quick-overview.rst index 741b3d1a5fe..09b0d4c6fbb 100644 --- a/doc/quick-overview.rst +++ b/doc/quick-overview.rst @@ -22,16 +22,14 @@ array or list, with optional *dimensions* and *coordinates*: .. ipython:: python - data = xr.DataArray(np.random.randn(2, 3), - dims=('x', 'y'), - coords={'x': [10, 20]}) + data = xr.DataArray(np.random.randn(2, 3), dims=("x", "y"), coords={"x": [10, 20]}) data In this case, we have generated a 2D array, assigned the names *x* and *y* to the two dimensions respectively and associated two *coordinate labels* '10' and '20' with the two locations along the x dimension. If you supply a pandas :py:class:`~pandas.Series` or :py:class:`~pandas.DataFrame`, metadata is copied directly: .. ipython:: python - xr.DataArray(pd.Series(range(3), index=list('abc'), name='foo')) + xr.DataArray(pd.Series(range(3), index=list("abc"), name="foo")) Here are the key properties for a ``DataArray``: @@ -75,13 +73,13 @@ While you're setting up your DataArray, it's often a good idea to set metadata a .. ipython:: python - data.attrs['long_name'] = 'random velocity' - data.attrs['units'] = 'metres/sec' - data.attrs['description'] = 'A random variable created as an example.' - data.attrs['random_attribute'] = 123 + data.attrs["long_name"] = "random velocity" + data.attrs["units"] = "metres/sec" + data.attrs["description"] = "A random variable created as an example." + data.attrs["random_attribute"] = 123 data.attrs # you can add metadata to coordinates too - data.x.attrs['units'] = 'x units' + data.x.attrs["units"] = "x units" Computation @@ -102,15 +100,15 @@ numbers: .. ipython:: python - data.mean(dim='x') + data.mean(dim="x") Arithmetic operations broadcast based on dimension name. This means you don't need to insert dummy dimensions for alignment: .. ipython:: python - a = xr.DataArray(np.random.randn(3), [data.coords['y']]) - b = xr.DataArray(np.random.randn(4), dims='z') + a = xr.DataArray(np.random.randn(3), [data.coords["y"]]) + b = xr.DataArray(np.random.randn(4), dims="z") a b @@ -139,9 +137,9 @@ xarray supports grouped operations using a very similar API to pandas (see :ref: .. ipython:: python - labels = xr.DataArray(['E', 'F', 'E'], [data.coords['y']], name='labels') + labels = xr.DataArray(["E", "F", "E"], [data.coords["y"]], name="labels") labels - data.groupby(labels).mean('y') + data.groupby(labels).mean("y") data.groupby(labels).map(lambda x: x - x.min()) Plotting @@ -178,7 +176,7 @@ objects. You can think of it as a multi-dimensional generalization of the .. ipython:: python - ds = xr.Dataset({'foo': data, 'bar': ('x', [1, 2]), 'baz': np.pi}) + ds = xr.Dataset({"foo": data, "bar": ("x", [1, 2]), "baz": np.pi}) ds @@ -186,7 +184,7 @@ This creates a dataset with three DataArrays named ``foo``, ``bar`` and ``baz``. .. ipython:: python - ds['foo'] + ds["foo"] ds.foo @@ -216,14 +214,15 @@ You can directly read and write xarray objects to disk using :py:meth:`~xarray.D .. ipython:: python - ds.to_netcdf('example.nc') - xr.open_dataset('example.nc') + ds.to_netcdf("example.nc") + xr.open_dataset("example.nc") .. ipython:: python - :suppress: + :suppress: import os - os.remove('example.nc') + + os.remove("example.nc") It is common for datasets to be distributed across multiple files (commonly one file per timestep). xarray supports this use-case by providing the :py:meth:`~xarray.open_mfdataset` and the :py:meth:`~xarray.save_mfdataset` methods. For more, see :ref:`io`. diff --git a/doc/reshaping.rst b/doc/reshaping.rst index 465ca14dfc2..40de9ea799a 100644 --- a/doc/reshaping.rst +++ b/doc/reshaping.rst @@ -7,11 +7,12 @@ Reshaping and reorganizing data These methods allow you to reorganize .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Reordering dimensions @@ -23,9 +24,9 @@ ellipsis (`...`) can be use to represent all other dimensions: .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y', 'z'), [[[42]]]), 'bar': (('y', 'z'), [[24]])}) - ds.transpose('y', 'z', 'x') - ds.transpose(..., 'x') # equivalent + ds = xr.Dataset({"foo": (("x", "y", "z"), [[[42]]]), "bar": (("y", "z"), [[24]])}) + ds.transpose("y", "z", "x") + ds.transpose(..., "x") # equivalent ds.transpose() # reverses all dimensions Expand and squeeze dimensions @@ -37,7 +38,7 @@ use :py:meth:`~xarray.DataArray.expand_dims` .. ipython:: python - expanded = ds.expand_dims('w') + expanded = ds.expand_dims("w") expanded This method attaches a new dimension with size 1 to all data variables. @@ -48,7 +49,7 @@ use :py:meth:`~xarray.DataArray.squeeze` .. ipython:: python - expanded.squeeze('w') + expanded.squeeze("w") Converting between datasets and arrays -------------------------------------- @@ -69,14 +70,14 @@ To convert back from a DataArray to a Dataset, use .. ipython:: python - arr.to_dataset(dim='variable') + arr.to_dataset(dim="variable") The broadcasting behavior of ``to_array`` means that the resulting array includes the union of data variable dimensions: .. ipython:: python - ds2 = xr.Dataset({'a': 0, 'b': ('x', [3, 4, 5])}) + ds2 = xr.Dataset({"a": 0, "b": ("x", [3, 4, 5])}) # the input dataset has 4 elements ds2 @@ -90,7 +91,7 @@ If you use ``to_dataset`` without supplying the ``dim`` argument, the DataArray .. ipython:: python - arr.to_dataset(name='combined') + arr.to_dataset(name="combined") .. _reshape.stack: @@ -103,11 +104,12 @@ implemented :py:meth:`~xarray.DataArray.stack` and .. ipython:: python - array = xr.DataArray(np.random.randn(2, 3), - coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) - stacked = array.stack(z=('x', 'y')) + array = xr.DataArray( + np.random.randn(2, 3), coords=[("x", ["a", "b"]), ("y", [0, 1, 2])] + ) + stacked = array.stack(z=("x", "y")) stacked - stacked.unstack('z') + stacked.unstack("z") As elsewhere in xarray, an ellipsis (`...`) can be used to represent all unlisted dimensions: @@ -128,15 +130,15 @@ possible levels. Missing levels are filled in with ``NaN`` in the resulting obje stacked2 = stacked[::2] stacked2 - stacked2.unstack('z') + stacked2.unstack("z") However, xarray's ``stack`` has an important difference from pandas: unlike pandas, it does not automatically drop missing values. Compare: .. ipython:: python - array = xr.DataArray([[np.nan, 1], [2, 3]], dims=['x', 'y']) - array.stack(z=('x', 'y')) + array = xr.DataArray([[np.nan, 1], [2, 3]], dims=["x", "y"]) + array.stack(z=("x", "y")) array.to_pandas().stack() We departed from pandas's behavior here because predictable shapes for new @@ -166,16 +168,15 @@ like this: .. ipython:: python - data = xr.Dataset( - data_vars={'a': (('x', 'y'), [[0, 1, 2], [3, 4, 5]]), - 'b': ('x', [6, 7])}, - coords={'y': ['u', 'v', 'w']} - ) - data - stacked = data.to_stacked_array("z", sample_dims=['x']) - stacked - unstacked = stacked.to_unstacked_dataset("z") - unstacked + data = xr.Dataset( + data_vars={"a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]), "b": ("x", [6, 7])}, + coords={"y": ["u", "v", "w"]}, + ) + data + stacked = data.to_stacked_array("z", sample_dims=["x"]) + stacked + unstacked = stacked.to_unstacked_dataset("z") + unstacked In this example, ``stacked`` is a two dimensional array that we can easily pass to a scikit-learn or another generic numerical method. @@ -202,19 +203,23 @@ coordinates using :py:meth:`~xarray.DataArray.set_index`: .. ipython:: python - da = xr.DataArray(np.random.rand(4), - coords={'band': ('x', ['a', 'a', 'b', 'b']), - 'wavenumber': ('x', np.linspace(200, 400, 4))}, - dims='x') - da - mda = da.set_index(x=['band', 'wavenumber']) - mda + da = xr.DataArray( + np.random.rand(4), + coords={ + "band": ("x", ["a", "a", "b", "b"]), + "wavenumber": ("x", np.linspace(200, 400, 4)), + }, + dims="x", + ) + da + mda = da.set_index(x=["band", "wavenumber"]) + mda These coordinates can now be used for indexing, e.g., .. ipython:: python - mda.sel(band='a') + mda.sel(band="a") Conversely, you can use :py:meth:`~xarray.DataArray.reset_index` to extract multi-index levels as coordinates (this is mainly useful @@ -222,14 +227,14 @@ for serialization): .. ipython:: python - mda.reset_index('x') + mda.reset_index("x") :py:meth:`~xarray.DataArray.reorder_levels` allows changing the order of multi-index levels: .. ipython:: python - mda.reorder_levels(x=['wavenumber', 'band']) + mda.reorder_levels(x=["wavenumber", "band"]) As of xarray v0.9 coordinate labels for each dimension are optional. You can also use ``.set_index`` / ``.reset_index`` to add / remove @@ -237,12 +242,12 @@ labels for one or several dimensions: .. ipython:: python - array = xr.DataArray([1, 2, 3], dims='x') + array = xr.DataArray([1, 2, 3], dims="x") array - array['c'] = ('x', ['a', 'b', 'c']) - array.set_index(x='c') - array = array.set_index(x='c') - array = array.reset_index('x', drop=True) + array["c"] = ("x", ["a", "b", "c"]) + array.set_index(x="c") + array = array.set_index(x="c") + array = array.reset_index("x", drop=True) .. _reshape.shift_and_roll: @@ -254,9 +259,9 @@ To adjust coordinate labels, you can use the :py:meth:`~xarray.Dataset.shift` an .. ipython:: python - array = xr.DataArray([1, 2, 3, 4], dims='x') - array.shift(x=2) - array.roll(x=2, roll_coords=True) + array = xr.DataArray([1, 2, 3, 4], dims="x") + array.shift(x=2) + array.roll(x=2, roll_coords=True) .. _reshape.sort: @@ -269,17 +274,18 @@ One may sort a DataArray/Dataset via :py:meth:`~xarray.DataArray.sortby` and .. ipython:: python - ds = xr.Dataset({'A': (('x', 'y'), [[1, 2], [3, 4]]), - 'B': (('x', 'y'), [[5, 6], [7, 8]])}, - coords={'x': ['b', 'a'], 'y': [1, 0]}) - dax = xr.DataArray([100, 99], [('x', [0, 1])]) - day = xr.DataArray([90, 80], [('y', [0, 1])]) - ds.sortby([day, dax]) + ds = xr.Dataset( + {"A": (("x", "y"), [[1, 2], [3, 4]]), "B": (("x", "y"), [[5, 6], [7, 8]])}, + coords={"x": ["b", "a"], "y": [1, 0]}, + ) + dax = xr.DataArray([100, 99], [("x", [0, 1])]) + day = xr.DataArray([90, 80], [("y", [0, 1])]) + ds.sortby([day, dax]) As a shortcut, you can refer to existing coordinates by name: .. ipython:: python - ds.sortby('x') - ds.sortby(['y', 'x']) - ds.sortby(['y', 'x'], ascending=False) + ds.sortby("x") + ds.sortby(["y", "x"]) + ds.sortby(["y", "x"], ascending=False) \ No newline at end of file diff --git a/doc/time-series.rst b/doc/time-series.rst index d838dbbd4cd..96a2edc0ea5 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -10,11 +10,12 @@ data in pandas such a joy to xarray. In most cases, we rely on pandas for the core functionality. .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Creating datetime64 data @@ -29,8 +30,8 @@ using :py:func:`pandas.to_datetime` and :py:func:`pandas.date_range`: .. ipython:: python - pd.to_datetime(['2000-01-01', '2000-02-02']) - pd.date_range('2000-01-01', periods=365) + pd.to_datetime(["2000-01-01", "2000-02-02"]) + pd.date_range("2000-01-01", periods=365) Alternatively, you can supply arrays of Python ``datetime`` objects. These get converted automatically when used as arguments in xarray objects: @@ -38,7 +39,8 @@ converted automatically when used as arguments in xarray objects: .. ipython:: python import datetime - xr.Dataset({'time': datetime.datetime(2000, 1, 1)}) + + xr.Dataset({"time": datetime.datetime(2000, 1, 1)}) When reading or writing netCDF files, xarray automatically decodes datetime and timedelta arrays using `CF conventions`_ (that is, by using a ``units`` @@ -62,8 +64,8 @@ You can manual decode arrays in this form by passing a dataset to .. ipython:: python - attrs = {'units': 'hours since 2000-01-01'} - ds = xr.Dataset({'time': ('time', [0, 1, 2, 3], attrs)}) + attrs = {"units": "hours since 2000-01-01"} + ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) xr.decode_cf(ds) One unfortunate limitation of using ``datetime64[ns]`` is that it limits the @@ -87,10 +89,10 @@ items and with the `slice` object: .. ipython:: python - time = pd.date_range('2000-01-01', freq='H', periods=365 * 24) - ds = xr.Dataset({'foo': ('time', np.arange(365 * 24)), 'time': time}) - ds.sel(time='2000-01') - ds.sel(time=slice('2000-06-01', '2000-06-10')) + time = pd.date_range("2000-01-01", freq="H", periods=365 * 24) + ds = xr.Dataset({"foo": ("time", np.arange(365 * 24)), "time": time}) + ds.sel(time="2000-01") + ds.sel(time=slice("2000-06-01", "2000-06-10")) You can also select a particular time by indexing with a :py:class:`datetime.time` object: @@ -113,8 +115,8 @@ given ``DataArray`` can be quickly computed using a special ``.dt`` accessor. .. ipython:: python - time = pd.date_range('2000-01-01', freq='6H', periods=365 * 4) - ds = xr.Dataset({'foo': ('time', np.arange(365 * 4)), 'time': time}) + time = pd.date_range("2000-01-01", freq="6H", periods=365 * 4) + ds = xr.Dataset({"foo": ("time", np.arange(365 * 4)), "time": time}) ds.time.dt.hour ds.time.dt.dayofweek @@ -130,16 +132,16 @@ __ http://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components .. ipython:: python - ds['time.month'] - ds['time.dayofyear'] + ds["time.month"] + ds["time.dayofyear"] For use as a derived coordinate, xarray adds ``'season'`` to the list of datetime components supported by pandas: .. ipython:: python - ds['time.season'] - ds['time'].dt.season + ds["time.season"] + ds["time"].dt.season The set of valid seasons consists of 'DJF', 'MAM', 'JJA' and 'SON', labeled by the first letters of the corresponding months. @@ -152,7 +154,7 @@ __ http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases .. ipython:: python - ds['time'].dt.floor('D') + ds["time"].dt.floor("D") The ``.dt`` accessor can also be used to generate formatted datetime strings for arrays utilising the same formatting as the standard `datetime.strftime`_. @@ -161,7 +163,7 @@ for arrays utilising the same formatting as the standard `datetime.strftime`_. .. ipython:: python - ds['time'].dt.strftime('%a, %b %d %H:%M') + ds["time"].dt.strftime("%a, %b %d %H:%M") .. _resampling: @@ -173,9 +175,9 @@ Datetime components couple particularly well with grouped operations (see calculate the mean by time of day: .. ipython:: python - :okwarning: + :okwarning: - ds.groupby('time.hour').mean() + ds.groupby("time.hour").mean() For upsampling or downsampling temporal resolutions, xarray offers a :py:meth:`~xarray.Dataset.resample` method building on the core functionality @@ -187,25 +189,25 @@ same api as ``resample`` `in pandas`_. For example, we can downsample our dataset from hourly to 6-hourly: .. ipython:: python - :okwarning: + :okwarning: - ds.resample(time='6H') + ds.resample(time="6H") This will create a specialized ``Resample`` object which saves information necessary for resampling. All of the reduction methods which work with ``Resample`` objects can also be used for resampling: .. ipython:: python - :okwarning: + :okwarning: - ds.resample(time='6H').mean() + ds.resample(time="6H").mean() You can also supply an arbitrary reduction function to aggregate over each resampling group: .. ipython:: python - ds.resample(time='6H').reduce(np.mean) + ds.resample(time="6H").reduce(np.mean) For upsampling, xarray provides six methods: ``asfreq``, ``ffill``, ``bfill``, ``pad``, ``nearest`` and ``interpolate``. ``interpolate`` extends ``scipy.interpolate.interp1d`` @@ -218,7 +220,7 @@ Data that has indices outside of the given ``tolerance`` are set to ``NaN``. .. ipython:: python - ds.resample(time='1H').nearest(tolerance='1H') + ds.resample(time="1H").nearest(tolerance="1H") For more examples of using grouped operations on a time dimension, see diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst index 768cf6556f9..1eb63d24630 100644 --- a/doc/weather-climate.rst +++ b/doc/weather-climate.rst @@ -4,7 +4,7 @@ Weather and climate data ======================== .. ipython:: python - :suppress: + :suppress: import xarray as xr @@ -56,11 +56,14 @@ coordinate with dates from a no-leap calendar and a .. ipython:: python - from itertools import product - from cftime import DatetimeNoLeap - dates = [DatetimeNoLeap(year, month, 1) for year, month in - product(range(1, 3), range(1, 13))] - da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') + from itertools import product + from cftime import DatetimeNoLeap + + dates = [ + DatetimeNoLeap(year, month, 1) + for year, month in product(range(1, 3), range(1, 13)) + ] + da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") xarray also includes a :py:func:`~xarray.cftime_range` function, which enables creating a :py:class:`~xarray.CFTimeIndex` with regularly-spaced dates. For @@ -68,8 +71,8 @@ instance, we can create the same dates and DataArray we created above using: .. ipython:: python - dates = xr.cftime_range(start='0001', periods=24, freq='MS', calendar='noleap') - da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') + dates = xr.cftime_range(start="0001", periods=24, freq="MS", calendar="noleap") + da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") With :py:meth:`~xarray.CFTimeIndex.strftime` we can also easily generate formatted strings from the datetime values of a :py:class:`~xarray.CFTimeIndex` directly or through the @@ -80,8 +83,8 @@ using the same formatting as the standard `datetime.strftime`_ convention . .. ipython:: python - dates.strftime('%c') - da['time'].dt.strftime('%Y%m%d') + dates.strftime("%c") + da["time"].dt.strftime("%Y%m%d") For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: @@ -90,8 +93,8 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. ipython:: python - da.sel(time='0001') - da.sel(time=slice('0001-05', '0002-02')) + da.sel(time="0001") + da.sel(time=slice("0001-05", "0002-02")) - Access of basic datetime components via the ``dt`` accessor (in this case just "year", "month", "day", "hour", "minute", "second", "microsecond", @@ -99,64 +102,65 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. ipython:: python - da.time.dt.year - da.time.dt.month - da.time.dt.season - da.time.dt.dayofyear - da.time.dt.dayofweek - da.time.dt.days_in_month + da.time.dt.year + da.time.dt.month + da.time.dt.season + da.time.dt.dayofyear + da.time.dt.dayofweek + da.time.dt.days_in_month - Rounding of datetimes to fixed frequencies via the ``dt`` accessor: .. ipython:: python - da.time.dt.ceil('3D') - da.time.dt.floor('5D') - da.time.dt.round('2D') + da.time.dt.ceil("3D") + da.time.dt.floor("5D") + da.time.dt.round("2D") - Group-by operations based on datetime accessor attributes (e.g. by month of the year): .. ipython:: python - da.groupby('time.month').sum() + da.groupby("time.month").sum() - Interpolation using :py:class:`cftime.datetime` objects: .. ipython:: python - da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)]) + da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)]) - Interpolation using datetime strings: .. ipython:: python - da.interp(time=['0001-01-15', '0001-02-15']) + da.interp(time=["0001-01-15", "0001-02-15"]) - Differentiation: .. ipython:: python - da.differentiate('time') + da.differentiate("time") - Serialization: .. ipython:: python - da.to_netcdf('example-no-leap.nc') - xr.open_dataset('example-no-leap.nc') + da.to_netcdf("example-no-leap.nc") + xr.open_dataset("example-no-leap.nc") .. ipython:: python :suppress: import os - os.remove('example-no-leap.nc') + + os.remove("example-no-leap.nc") - And resampling along the time dimension for data indexed by a :py:class:`~xarray.CFTimeIndex`: .. ipython:: python - da.resample(time='81T', closed='right', label='right', base=3).mean() + da.resample(time="81T", closed="right", label="right", base=3).mean() .. note:: @@ -168,13 +172,13 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: method: .. ipython:: python - :okwarning: + :okwarning: - modern_times = xr.cftime_range('2000', periods=24, freq='MS', calendar='noleap') - da = xr.DataArray(range(24), [('time', modern_times)]) + modern_times = xr.cftime_range("2000", periods=24, freq="MS", calendar="noleap") + da = xr.DataArray(range(24), [("time", modern_times)]) da - datetimeindex = da.indexes['time'].to_datetimeindex() - da['time'] = datetimeindex + datetimeindex = da.indexes["time"].to_datetimeindex() + da["time"] = datetimeindex However in this case one should use caution to only perform operations which do not depend on differences between dates (e.g. differentiation, diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b71e0baa655..42e20bbf1bd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -4,13 +4,14 @@ What's New ========== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xray import xarray import xarray as xr + np.random.seed(123456) .. _whats-new.0.16.0: @@ -109,6 +110,8 @@ Documentation of ``kwargs`` in :py:meth:`Dataset.interp` and :py:meth:`DataArray.interp` for 1-d and n-d interpolation (:pull:`3956`). By `Matthias Riße `_. +- Apply ``black`` to all the code in the documentation (:pull:`4012`) + By `Justus Magin `_. Internal Changes ~~~~~~~~~~~~~~~~ @@ -1965,8 +1968,8 @@ Enhancements .. ipython:: python - ds = xr.Dataset({'a': 1}) - np.sin(ds) + ds = xr.Dataset({"a": 1}) + np.sin(ds) This obliviates the need for the ``xarray.ufuncs`` module, which will be deprecated in the future when xarray drops support for older versions of @@ -2057,8 +2060,8 @@ Enhancements .. ipython:: python - da = xr.DataArray(np.array([True, False, np.nan], dtype=object), dims='x') - da.sum() + da = xr.DataArray(np.array([True, False, np.nan], dtype=object), dims="x") + da.sum() (:issue:`1866`) By `Keisuke Fujii `_. @@ -2212,7 +2215,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: ds.resample('24H', dim='time', how='max') + In [1]: ds.resample("24H", dim="time", how="max") Out[1]: [...] @@ -2222,7 +2225,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: ds.resample(time='24H').max() + In [1]: ds.resample(time="24H").max() Out[1]: [...] @@ -2292,9 +2295,9 @@ Enhancements In [1]: import xarray as xr - In [2]: arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=('x', 'y')) + In [2]: arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("x", "y")) - In [3]: xr.where(arr % 2, 'even', 'odd') + In [3]: xr.where(arr % 2, "even", "odd") Out[3]: array([['even', 'odd', 'even'], @@ -2815,7 +2818,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: xr.Dataset({'foo': (('x', 'y'), [[1, 2]])}) + In [1]: xr.Dataset({"foo": (("x", "y"), [[1, 2]])}) Out[1]: Dimensions: (x: 1, y: 2) @@ -3272,10 +3275,10 @@ Enhancements .. ipython:: :verbatim: - In [1]: import xarray as xr; import numpy as np + In [1]: import xarray as xr + ...: import numpy as np - In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), - dims=('x', 'y')) + In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) In [3]: arr Out[3]: @@ -3414,7 +3417,7 @@ Breaking changes .. ipython:: :verbatim: - In [2]: xray.DataArray([4, 5, 6], dims='x', name='x') + In [2]: xray.DataArray([4, 5, 6], dims="x", name="x") Out[2]: array([4, 5, 6]) @@ -3426,7 +3429,7 @@ Breaking changes .. ipython:: :verbatim: - In [2]: xray.DataArray([4, 5, 6], dims='x', name='x') + In [2]: xray.DataArray([4, 5, 6], dims="x", name="x") Out[2]: array([4, 5, 6]) @@ -3449,13 +3452,11 @@ Enhancements .. ipython:: :verbatim: - In [7]: df = pd.DataFrame({'foo': range(3), - ...: 'x': ['a', 'b', 'b'], - ...: 'y': [0, 0, 1]}) + In [7]: df = pd.DataFrame({"foo": range(3), "x": ["a", "b", "b"], "y": [0, 0, 1]}) - In [8]: s = df.set_index(['x', 'y'])['foo'] + In [8]: s = df.set_index(["x", "y"])["foo"] - In [12]: arr = xray.DataArray(s, dims='z') + In [12]: arr = xray.DataArray(s, dims="z") In [13]: arr Out[13]: @@ -3464,13 +3465,13 @@ Enhancements Coordinates: * z (z) object ('a', 0) ('b', 0) ('b', 1) - In [19]: arr.indexes['z'] + In [19]: arr.indexes["z"] Out[19]: MultiIndex(levels=[[u'a', u'b'], [0, 1]], labels=[[0, 1, 1], [0, 0, 1]], names=[u'x', u'y']) - In [14]: arr.unstack('z') + In [14]: arr.unstack("z") Out[14]: array([[ 0., nan], @@ -3479,7 +3480,7 @@ Enhancements * x (x) object 'a' 'b' * y (y) int64 0 1 - In [26]: arr.unstack('z').stack(z=('x', 'y')) + In [26]: arr.unstack("z").stack(z=("x", "y")) Out[26]: array([ 0., nan, 1., 2.]) @@ -3507,9 +3508,9 @@ Enhancements for shifting/rotating datasets or arrays along a dimension: .. ipython:: python - :okwarning: + :okwarning: - array = xray.DataArray([5, 6, 7, 8], dims='x') + array = xray.DataArray([5, 6, 7, 8], dims="x") array.shift(x=2) array.roll(x=2) @@ -3524,8 +3525,8 @@ Enhancements .. ipython:: python - a = xray.DataArray([1, 2, 3], dims='x') - b = xray.DataArray([5, 6], dims='y') + a = xray.DataArray([1, 2, 3], dims="x") + b = xray.DataArray([5, 6], dims="y") a b a2, b2 = xray.broadcast(a, b) @@ -3595,9 +3596,9 @@ Enhancements .. ipython:: :verbatim: - In [5]: array = xray.DataArray([1, 2, 3], dims='x') + In [5]: array = xray.DataArray([1, 2, 3], dims="x") - In [6]: array.reindex(x=[0.9, 1.5], method='nearest', tolerance=0.2) + In [6]: array.reindex(x=[0.9, 1.5], method="nearest", tolerance=0.2) Out[6]: array([ 2., nan]) @@ -3677,10 +3678,11 @@ Enhancements .. ipython:: :verbatim: - In [1]: da = xray.DataArray(np.arange(56).reshape((7, 8)), - ...: coords={'x': list('abcdefg'), - ...: 'y': 10 * np.arange(8)}, - ...: dims=['x', 'y']) + In [1]: da = xray.DataArray( + ...: np.arange(56).reshape((7, 8)), + ...: coords={"x": list("abcdefg"), "y": 10 * np.arange(8)}, + ...: dims=["x", "y"], + ...: ) In [2]: da Out[2]: @@ -3697,7 +3699,7 @@ Enhancements * x (x) |S1 'a' 'b' 'c' 'd' 'e' 'f' 'g' # we can index by position along each dimension - In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim='points') + In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim="points") Out[3]: array([ 0, 9, 48]) @@ -3707,7 +3709,7 @@ Enhancements * points (points) int64 0 1 2 # or equivalently by label - In [9]: da.sel_points(x=['a', 'b', 'g'], y=[0, 10, 0], dim='points') + In [9]: da.sel_points(x=["a", "b", "g"], y=[0, 10, 0], dim="points") Out[9]: array([ 0, 9, 48]) @@ -3721,11 +3723,11 @@ Enhancements .. ipython:: python - ds = xray.Dataset(coords={'x': range(100), 'y': range(100)}) - ds['distance'] = np.sqrt(ds.x ** 2 + ds.y ** 2) + ds = xray.Dataset(coords={"x": range(100), "y": range(100)}) + ds["distance"] = np.sqrt(ds.x ** 2 + ds.y ** 2) - @savefig where_example.png width=4in height=4in - ds.distance.where(ds.distance < 100).plot() + @savefig where_example.png width=4in height=4in + ds.distance.where(ds.distance < 100).plot() - Added new methods ``xray.DataArray.diff`` and ``xray.Dataset.diff`` for finite difference calculations along a given axis. @@ -3735,9 +3737,9 @@ Enhancements .. ipython:: python - da = xray.DataArray(np.random.random_sample(size=(5, 4))) - da.where(da < 0.5) - da.where(da < 0.5).to_masked_array(copy=True) + da = xray.DataArray(np.random.random_sample(size=(5, 4))) + da.where(da < 0.5) + da.where(da < 0.5).to_masked_array(copy=True) - Added new flag "drop_variables" to ``xray.open_dataset`` for excluding variables from being parsed. This may be useful to drop @@ -3795,9 +3797,9 @@ Enhancements .. ipython:: :verbatim: - In [1]: years, datasets = zip(*ds.groupby('time.year')) + In [1]: years, datasets = zip(*ds.groupby("time.year")) - In [2]: paths = ['%s.nc' % y for y in years] + In [2]: paths = ["%s.nc" % y for y in years] In [3]: xray.save_mfdataset(datasets, paths) @@ -3870,9 +3872,9 @@ Backwards incompatible changes .. ipython:: :verbatim: - In [1]: ds = xray.Dataset({'x': 0}) + In [1]: ds = xray.Dataset({"x": 0}) - In [2]: xray.concat([ds, ds], dim='y') + In [2]: xray.concat([ds, ds], dim="y") Out[2]: Dimensions: () @@ -3884,13 +3886,13 @@ Backwards incompatible changes Now, the default always concatenates data variables: .. ipython:: python - :suppress: + :suppress: - ds = xray.Dataset({'x': 0}) + ds = xray.Dataset({"x": 0}) .. ipython:: python - xray.concat([ds, ds], dim='y') + xray.concat([ds, ds], dim="y") To obtain the old behavior, supply the argument ``concat_over=[]``. @@ -3903,17 +3905,20 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'a': 1, 'b': ('x', [1, 2, 3])}, - coords={'c': 42}, attrs={'Conventions': 'None'}) + ds = xray.Dataset( + {"a": 1, "b": ("x", [1, 2, 3])}, + coords={"c": 42}, + attrs={"Conventions": "None"}, + ) ds.to_array() - ds.to_array().to_dataset(dim='variable') + ds.to_array().to_dataset(dim="variable") - New ``xray.Dataset.fillna`` method to fill missing values, modeled off the pandas method of the same name: .. ipython:: python - array = xray.DataArray([np.nan, 1, np.nan, 3], dims='x') + array = xray.DataArray([np.nan, 1, np.nan, 3], dims="x") array.fillna(0) ``fillna`` works on both ``Dataset`` and ``DataArray`` objects, and uses @@ -3926,9 +3931,9 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'y': ('x', [1, 2, 3])}) - ds.assign(z = lambda ds: ds.y ** 2) - ds.assign_coords(z = ('x', ['a', 'b', 'c'])) + ds = xray.Dataset({"y": ("x", [1, 2, 3])}) + ds.assign(z=lambda ds: ds.y ** 2) + ds.assign_coords(z=("x", ["a", "b", "c"])) These methods return a new Dataset (or DataArray) with updated data or coordinate variables. @@ -3941,7 +3946,7 @@ Enhancements .. ipython:: :verbatim: - In [12]: ds.sel(x=1.1, method='nearest') + In [12]: ds.sel(x=1.1, method="nearest") Out[12]: Dimensions: () @@ -3950,7 +3955,7 @@ Enhancements Data variables: y int64 2 - In [13]: ds.sel(x=[1.1, 2.1], method='pad') + In [13]: ds.sel(x=[1.1, 2.1], method="pad") Out[13]: Dimensions: (x: 2) @@ -3976,7 +3981,7 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'x': np.arange(1000)}) + ds = xray.Dataset({"x": np.arange(1000)}) with xray.set_options(display_width=40): print(ds) @@ -4014,42 +4019,42 @@ Enhancements need to supply the time dimension explicitly: .. ipython:: python - :verbatim: + :verbatim: - time = pd.date_range('2000-01-01', freq='6H', periods=10) - array = xray.DataArray(np.arange(10), [('time', time)]) - array.resample('1D', dim='time') + time = pd.date_range("2000-01-01", freq="6H", periods=10) + array = xray.DataArray(np.arange(10), [("time", time)]) + array.resample("1D", dim="time") You can specify how to do the resampling with the ``how`` argument and other options such as ``closed`` and ``label`` let you control labeling: .. ipython:: python - :verbatim: + :verbatim: - array.resample('1D', dim='time', how='sum', label='right') + array.resample("1D", dim="time", how="sum", label="right") If the desired temporal resolution is higher than the original data (upsampling), xray will insert missing values: .. ipython:: python - :verbatim: + :verbatim: - array.resample('3H', 'time') + array.resample("3H", "time") - ``first`` and ``last`` methods on groupby objects let you take the first or last examples from each group along the grouped axis: .. ipython:: python - :verbatim: + :verbatim: - array.groupby('time.day').first() + array.groupby("time.day").first() These methods combine well with ``resample``: .. ipython:: python - :verbatim: + :verbatim: - array.resample('1D', dim='time', how='first') + array.resample("1D", dim="time", how="first") - ``xray.Dataset.swap_dims`` allows for easily swapping one dimension @@ -4057,9 +4062,9 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'x': range(3), 'y': ('x', list('abc'))}) - ds - ds.swap_dims({'x': 'y'}) + ds = xray.Dataset({"x": range(3), "y": ("x", list("abc"))}) + ds + ds.swap_dims({"x": "y"}) This was possible in earlier versions of xray, but required some contortions. - ``xray.open_dataset`` and ``xray.Dataset.to_netcdf`` now @@ -4105,8 +4110,8 @@ Breaking changes .. ipython:: python - lhs = xray.DataArray([1, 2, 3], [('x', [0, 1, 2])]) - rhs = xray.DataArray([2, 3, 4], [('x', [1, 2, 3])]) + lhs = xray.DataArray([1, 2, 3], [("x", [0, 1, 2])]) + rhs = xray.DataArray([2, 3, 4], [("x", [1, 2, 3])]) lhs + rhs :ref:`For dataset construction and merging`, we align based on the @@ -4114,14 +4119,14 @@ Breaking changes .. ipython:: python - xray.Dataset({'foo': lhs, 'bar': rhs}) + xray.Dataset({"foo": lhs, "bar": rhs}) :ref:`For update and __setitem__`, we align based on the **original** object: .. ipython:: python - lhs.coords['rhs'] = rhs + lhs.coords["rhs"] = rhs lhs - Aggregations like ``mean`` or ``median`` now skip missing values by default: @@ -4144,8 +4149,8 @@ Breaking changes .. ipython:: python - a = xray.DataArray([1, 2], coords={'c': 0}, dims='x') - b = xray.DataArray([1, 2], coords={'c': ('x', [0, 0])}, dims='x') + a = xray.DataArray([1, 2], coords={"c": 0}, dims="x") + b = xray.DataArray([1, 2], coords={"c": ("x", [0, 0])}, dims="x") (a + b).coords This functionality can be controlled through the ``compat`` option, which @@ -4156,9 +4161,10 @@ Breaking changes .. ipython:: python - time = xray.DataArray(pd.date_range('2000-01-01', periods=365), - dims='time', name='time') - counts = time.groupby('time.month').count() + time = xray.DataArray( + pd.date_range("2000-01-01", periods=365), dims="time", name="time" + ) + counts = time.groupby("time.month").count() counts.sel(month=2) Previously, you would need to use something like @@ -4168,8 +4174,8 @@ Breaking changes .. ipython:: python - ds = xray.Dataset({'t': pd.date_range('2000-01-01', periods=12, freq='M')}) - ds['t.season'] + ds = xray.Dataset({"t": pd.date_range("2000-01-01", periods=12, freq="M")}) + ds["t.season"] Previously, it returned numbered seasons 1 through 4. - We have updated our use of the terms of "coordinates" and "variables". What @@ -4192,8 +4198,8 @@ Enhancements .. ipython:: python - data = xray.DataArray([1, 2, 3], [('x', range(3))]) - data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method='pad') + data = xray.DataArray([1, 2, 3], [("x", range(3))]) + data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad") This will be especially useful once pandas 0.16 is released, at which point xray will immediately support reindexing with @@ -4212,15 +4218,15 @@ Enhancements makes it easy to drop explicitly listed variables or index labels: .. ipython:: python - :okwarning: + :okwarning: # drop variables - ds = xray.Dataset({'x': 0, 'y': 1}) - ds.drop('x') + ds = xray.Dataset({"x": 0, "y": 1}) + ds.drop("x") # drop index labels - arr = xray.DataArray([1, 2, 3], coords=[('x', list('abc'))]) - arr.drop(['a', 'c'], dim='x') + arr = xray.DataArray([1, 2, 3], coords=[("x", list("abc"))]) + arr.drop(["a", "c"], dim="x") - ``xray.Dataset.broadcast_equals`` has been added to correspond to the new ``compat`` option. @@ -4288,7 +4294,8 @@ Backwards incompatible changes .. ipython:: python from datetime import datetime - xray.Dataset({'t': [datetime(2000, 1, 1)]}) + + xray.Dataset({"t": [datetime(2000, 1, 1)]}) - xray now has support (including serialization to netCDF) for :py:class:`~pandas.TimedeltaIndex`. :py:class:`datetime.timedelta` objects @@ -4304,8 +4311,8 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'tmin': ([], 25, {'units': 'celsius'})}) - ds.tmin.units + ds = xray.Dataset({"tmin": ([], 25, {"units": "celsius"})}) + ds.tmin.units Tab-completion for these variables should work in editors such as IPython. However, setting variables or attributes in this fashion is not yet @@ -4315,7 +4322,7 @@ Enhancements .. ipython:: python - array = xray.DataArray(np.zeros(5), dims=['x']) + array = xray.DataArray(np.zeros(5), dims=["x"]) array[dict(x=slice(3))] = 1 array diff --git a/xarray/core/common.py b/xarray/core/common.py index 1e7069ec51f..e343f342040 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -447,7 +447,7 @@ def assign_coords(self, coords=None, **coords_kwargs): New coordinate can also be attached to an existing dimension: >>> lon_2 = np.array([300, 289, 0, 1]) - >>> da.assign_coords(lon_2=('lon', lon_2)) + >>> da.assign_coords(lon_2=("lon", lon_2)) array([0.28298 , 0.667347, 0.657938, 0.177683]) Coordinates: @@ -456,7 +456,7 @@ def assign_coords(self, coords=None, **coords_kwargs): Note that the same result can also be obtained with a dict e.g. - >>> _ = da.assign_coords({"lon_2": ('lon', lon_2)}) + >>> _ = da.assign_coords({"lon_2": ("lon", lon_2)}) Notes ----- diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index ffa05ca64f0..5ced7e251c4 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3495,17 +3495,18 @@ def pad( Examples -------- - >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0,1,2])]) - >>> arr.pad(x=(1,2), constant_values=0) + >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0, 1, 2])]) + >>> arr.pad(x=(1, 2), constant_values=0) array([0, 5, 6, 7, 0, 0]) Coordinates: * x (x) float64 nan 0.0 1.0 2.0 nan nan - >>> da = xr.DataArray([[0,1,2,3], [10,11,12,13]], - dims=["x", "y"], - coords={"x": [0,1], "y": [10, 20 ,30, 40], "z": ("x", [100, 200])} - ) + >>> da = xr.DataArray( + ... [[0, 1, 2, 3], [10, 11, 12, 13]], + ... dims=["x", "y"], + ... coords={"x": [0, 1], "y": [10, 20, 30, 40], "z": ("x", [100, 200])}, + ... ) >>> da.pad(x=1) array([[nan, nan, nan, nan], @@ -3592,8 +3593,9 @@ def idxmin( Examples -------- - >>> array = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) >>> array.min() array(-2) @@ -3604,13 +3606,15 @@ def idxmin( array('e', dtype='>> array = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": np.arange(5.)**2} - ... ) + >>> array = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2}, + ... ) >>> array.min(dim="x") array([-2., -4., 1.]) @@ -3686,8 +3690,9 @@ def idxmax( Examples -------- - >>> array = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) >>> array.max() array(2) @@ -3698,13 +3703,15 @@ def idxmax( array('b', dtype='>> array = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": np.arange(5.)**2} - ... ) + >>> array = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2}, + ... ) >>> array.max(dim="x") array([2., 2., 1.]) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 53aa00f22ce..dd7871eaf3a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1055,9 +1055,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": structure of the original object, but with the new data. Original object is unaffected. - >>> ds.copy( - ... data={"foo": np.arange(6).reshape(2, 3), "bar": ["a", "b"]} - ... ) + >>> ds.copy(data={"foo": np.arange(6).reshape(2, 3), "bar": ["a", "b"]}) Dimensions: (dim_0: 2, dim_1: 3, x: 2) Coordinates: @@ -6061,8 +6059,8 @@ def pad( Examples -------- - >>> ds = xr.Dataset({'foo': ('x', range(5))}) - >>> ds.pad(x=(1,2)) + >>> ds = xr.Dataset({"foo": ("x", range(5))}) + >>> ds.pad(x=(1, 2)) Dimensions: (x: 8) Dimensions without coordinates: x @@ -6156,17 +6154,20 @@ def idxmin( Examples -------- - >>> array1 = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) - >>> array2 = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": ['a', 'b', 'c', 'd', 'e']} - ... ) - >>> ds = xr.Dataset({'int': array1, 'float': array2}) - >>> ds.min(dim='x') + >>> array1 = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) + >>> array2 = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": ["a", "b", "c", "d", "e"]}, + ... ) + >>> ds = xr.Dataset({"int": array1, "float": array2}) + >>> ds.min(dim="x") Dimensions: (y: 3) Coordinates: @@ -6174,7 +6175,7 @@ def idxmin( Data variables: int int64 -2 float (y) float64 -2.0 -4.0 1.0 - >>> ds.argmin(dim='x') + >>> ds.argmin(dim="x") Dimensions: (y: 3) Coordinates: @@ -6182,7 +6183,7 @@ def idxmin( Data variables: int int64 4 float (y) int64 4 0 2 - >>> ds.idxmin(dim='x') + >>> ds.idxmin(dim="x") Dimensions: (y: 3) Coordinates: @@ -6251,17 +6252,20 @@ def idxmax( Examples -------- - >>> array1 = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) - >>> array2 = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": ['a', 'b', 'c', 'd', 'e']} - ... ) - >>> ds = xr.Dataset({'int': array1, 'float': array2}) - >>> ds.max(dim='x') + >>> array1 = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) + >>> array2 = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": ["a", "b", "c", "d", "e"]}, + ... ) + >>> ds = xr.Dataset({"int": array1, "float": array2}) + >>> ds.max(dim="x") Dimensions: (y: 3) Coordinates: @@ -6269,7 +6273,7 @@ def idxmax( Data variables: int int64 2 float (y) float64 2.0 2.0 1.0 - >>> ds.argmax(dim='x') + >>> ds.argmax(dim="x") Dimensions: (y: 3) Coordinates: @@ -6277,7 +6281,7 @@ def idxmax( Data variables: int int64 1 float (y) int64 0 2 2 - >>> ds.idxmax(dim='x') + >>> ds.idxmax(dim="x") Dimensions: (y: 3) Coordinates: From 3820fb77256682d909c1e41d962e29bec0edd62d Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 29 Apr 2020 18:12:23 +0200 Subject: [PATCH 39/39] Pint support for DataArray (#3643) * remove xfail marks from median and cumprod * remove all xfails not related to indexes or external packages * switch away from using assert_equal_with_units * use assert_allclose in a few cases instead * don't use a kwarg for searchsorted normally, this should work, but the documentation mismatches the implementation of searchsorted and names the keys as `keys` instead of `v` * move the tests for item into their own test function * move the searchsorted tests into their own test function * remove a wrapping pytest.param * treat objects implementing __array_function__ the same as ndarray * mark numpy.median as xfailing * remove the xfail marks for the all and any tests * use assert_units_equal to check the resulting units * don't attempt to use interpolate_na with int dtype arrays * update the xfail reason for DataArray.interpolate_na * xfail the compatible units bivariate_ufunc test and don't use 0 * combine and expand the reindex and interp tests * combine and expand the reindex_like and interp_like tests * xfail the quantile tests if pint is not recent enough * xfail the rolling tests * don't xfail combine_first it currently does not test indexing, so probably will need a new test for that. * use numpy's assert_allclose * don't add dimension coordinates if they're not necessary * add the PR to the list of related PRs * move the whats-new.rst entry to 0.16.0 * check for __array_ufunc__ to decide if the type is supported * xfail the bivariate ufunc tests * remove the check for __array_ufunc__ * skip the DataArray.identical tests * use pytest.param --- doc/whats-new.rst | 3 + xarray/tests/test_units.py | 520 ++++++++++++++++++++----------------- 2 files changed, 289 insertions(+), 234 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 42e20bbf1bd..051a41a57e5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,6 +47,9 @@ New Features - Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:issue:`60`, :pull:`3871`) By `Todd Jennings `_ +- More support for unit aware arrays with pint (:pull:`3643`) + By `Justus Magin `_. + - Allow plotting of boolean arrays. (:pull:`3766`) By `Marek Jacob `_ - A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 2826dc2479c..5dd4a42cff0 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -1660,7 +1660,7 @@ def test_missing_value_fillna(self, unit, error): method("equals"), pytest.param( method("identical"), - marks=pytest.mark.skip(reason="behaviour of identical is unclear"), + marks=pytest.mark.skip(reason="behavior of identical is undecided"), ), ), ids=repr, @@ -1885,7 +1885,10 @@ def test_squeeze(self, dtype): method("coarsen", windows={"y": 2}, func=np.mean), pytest.param( method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), + marks=pytest.mark.xfail( + LooseVersion(pint.__version__) < "0.12", + reason="quantile / nanquantile not implemented yet", + ), ), pytest.param( method("rank", dim="x"), @@ -2161,8 +2164,8 @@ class TestDataArray: "with_dims", marks=pytest.mark.xfail(reason="units in indexes are not supported"), ), - pytest.param("with_coords"), - pytest.param("without_coords"), + "with_coords", + "without_coords", ), ) def test_init(self, variant, dtype): @@ -2224,21 +2227,17 @@ def test_repr(self, func, variant, dtype): @pytest.mark.parametrize( "func", ( - pytest.param( - function("all"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), - pytest.param( - function("any"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), + function("all"), + function("any"), function("argmax"), function("argmin"), function("max"), function("mean"), pytest.param( function("median"), - marks=pytest.mark.xfail(reason="not implemented by xarray"), + marks=pytest.mark.xfail( + reason="median does not work with dataarrays yet" + ), ), function("min"), pytest.param( @@ -2249,18 +2248,9 @@ def test_repr(self, func, variant, dtype): function("std"), function("var"), function("cumsum"), - pytest.param( - function("cumprod"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), - pytest.param( - method("all"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), - pytest.param( - method("any"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), + function("cumprod"), + method("all"), + method("any"), method("argmax"), method("argmin"), method("max"), @@ -2269,18 +2259,13 @@ def test_repr(self, func, variant, dtype): method("min"), pytest.param( method("prod"), - marks=pytest.mark.xfail( - reason="comparison of quantity with ndarrays in nanops not implemented" - ), + marks=pytest.mark.xfail(reason="not implemented by pint yet"), ), method("sum"), method("std"), method("var"), method("cumsum"), - pytest.param( - method("cumprod"), - marks=pytest.mark.xfail(reason="pint does not implement cumprod yet"), - ), + method("cumprod"), ), ids=repr, ) @@ -2296,7 +2281,8 @@ def test_aggregation(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) @pytest.mark.parametrize( "func", @@ -2314,7 +2300,8 @@ def test_unary_operations(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -2333,7 +2320,8 @@ def test_binary_operations(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "comparison", @@ -2383,7 +2371,8 @@ def test_comparison_operations(self, comparison, unit, error, dtype): strip_units(convert_units(to_compare_with, expected_units)), ) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "units,error", @@ -2411,9 +2400,10 @@ def test_univariate_ufunc(self, units, error, dtype): ) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="xarray's `np.maximum` strips units") + @pytest.mark.xfail(reason="needs the type register system for __array_ufunc__") @pytest.mark.parametrize( "unit,error", ( @@ -2422,7 +2412,12 @@ def test_univariate_ufunc(self, units, error, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param( + unit_registry.mm, + None, + id="compatible_unit", + marks=pytest.mark.xfail(reason="pint converts to the wrong units"), + ), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -2433,7 +2428,7 @@ def test_bivariate_ufunc(self, unit, error, dtype): if error is not None: with pytest.raises(error): - np.maximum(data_array, 0 * unit) + np.maximum(data_array, 1 * unit) return @@ -2441,16 +2436,18 @@ def test_bivariate_ufunc(self, unit, error, dtype): expected = attach_units( np.maximum( strip_units(data_array), - strip_units(convert_units(0 * unit, expected_units)), + strip_units(convert_units(1 * unit, expected_units)), ), expected_units, ) - actual = np.maximum(data_array, 0 * unit) - assert_equal_with_units(expected, actual) + actual = np.maximum(data_array, 1 * unit) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - actual = np.maximum(0 * unit, data_array) - assert_equal_with_units(expected, actual) + actual = np.maximum(1 * unit, data_array) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize("property", ("T", "imag", "real")) def test_numpy_properties(self, property, dtype): @@ -2466,7 +2463,8 @@ def test_numpy_properties(self, property, dtype): ) actual = getattr(data_array, property) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -2481,16 +2479,86 @@ def test_numpy_methods(self, func, dtype): expected = attach_units(strip_units(data_array), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) + + def test_item(self, dtype): + array = np.arange(10).astype(dtype) * unit_registry.m + data_array = xr.DataArray(data=array) + + func = method("item", 2) + + expected = func(strip_units(data_array)) * unit_registry.m + actual = func(data_array) + + np.testing.assert_allclose(expected, actual) + + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ) + @pytest.mark.parametrize( + "func", + ( + method("searchsorted", 5), + pytest.param( + function("searchsorted", 5), + marks=pytest.mark.xfail( + reason="xarray does not implement __array_function__" + ), + ), + ), + ids=repr, + ) + def test_searchsorted(self, func, unit, error, dtype): + array = np.arange(10).astype(dtype) * unit_registry.m + data_array = xr.DataArray(data=array) + + scalar_types = (int, float) + args = list(value * unit for value in func.args) + kwargs = { + key: (value * unit if isinstance(value, scalar_types) else value) + for key, value in func.kwargs.items() + } + + if error is not None: + with pytest.raises(error): + func(data_array, *args, **kwargs) + + return + + units = extract_units(data_array) + expected_units = extract_units(func(array, *args, **kwargs)) + stripped_args = [strip_units(convert_units(value, units)) for value in args] + stripped_kwargs = { + key: strip_units(convert_units(value, units)) + for key, value in kwargs.items() + } + expected = attach_units( + func(strip_units(data_array), *stripped_args, **stripped_kwargs), + expected_units, + ) + actual = func(data_array, *args, **kwargs) + + assert_units_equal(expected, actual) + np.testing.assert_allclose(expected, actual) @pytest.mark.parametrize( "func", ( method("clip", min=3, max=8), pytest.param( - method("searchsorted", v=5), + function("clip", a_min=3, a_max=8), marks=pytest.mark.xfail( - reason="searchsorted somehow requires a undocumented `keys` argument" + reason="xarray does not implement __array_function__" ), ), ), @@ -2513,28 +2581,32 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype): data_array = xr.DataArray(data=array) scalar_types = (int, float) + args = list(value * unit for value in func.args) kwargs = { key: (value * unit if isinstance(value, scalar_types) else value) for key, value in func.kwargs.items() } if error is not None: with pytest.raises(error): - func(data_array, **kwargs) + func(data_array, *args, **kwargs) return units = extract_units(data_array) - expected_units = extract_units(func(array, **kwargs)) + expected_units = extract_units(func(array, *args, **kwargs)) + stripped_args = [strip_units(convert_units(value, units)) for value in args] stripped_kwargs = { key: strip_units(convert_units(value, units)) for key, value in kwargs.items() } expected = attach_units( - func(strip_units(data_array), **stripped_kwargs), expected_units + func(strip_units(data_array), *stripped_args, **stripped_kwargs), + expected_units, ) - actual = func(data_array, **kwargs) + actual = func(data_array, *args, **kwargs) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", (method("isnull"), method("notnull"), method("count")), ids=repr @@ -2551,15 +2623,13 @@ def test_missing_value_detection(self, func, dtype): ) * unit_registry.degK ) - x = np.arange(array.shape[0]) * unit_registry.m - y = np.arange(array.shape[1]) * unit_registry.m - - data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y")) + data_array = xr.DataArray(data=array) expected = func(strip_units(data_array)) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="ffill and bfill lose units in data") @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr) @@ -2576,7 +2646,8 @@ def test_missing_value_filling(self, func, dtype): ) actual = func(data_array, dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2586,12 +2657,7 @@ def test_missing_value_filling(self, func, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - None, - id="compatible_unit", - marks=pytest.mark.xfail(reason="fillna converts to value's unit"), - ), + pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -2629,7 +2695,8 @@ def test_fillna(self, fill_value, unit, error, dtype): ) actual = func(data_array, value=value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) def test_dropna(self, dtype): array = ( @@ -2643,18 +2710,13 @@ def test_dropna(self, dtype): expected = attach_units(strip_units(data_array).dropna(dim="x"), units) actual = data_array.dropna(dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit", ( - pytest.param( - 1, - id="no_unit", - marks=pytest.mark.xfail( - reason="pint's isin implementation does not work well with mixed args" - ), - ), + pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param(unit_registry.cm, id="compatible_unit"), @@ -2677,22 +2739,11 @@ def test_isin(self, unit, dtype): ) & array.check(unit) actual = data_array.isin(values) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( - "variant", - ( - pytest.param( - "masking", - marks=pytest.mark.xfail(reason="array(nan) is not a quantity"), - ), - "replacing_scalar", - "replacing_array", - pytest.param( - "dropping", - marks=pytest.mark.xfail(reason="array(nan) is not a quantity"), - ), - ), + "variant", ("masking", "replacing_scalar", "replacing_array", "dropping") ) @pytest.mark.parametrize( "unit,error", @@ -2742,22 +2793,24 @@ def test_where(self, variant, unit, error, dtype): ) actual = data_array.where(**kwargs) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="interpolate strips units") - def test_interpolate_na(self, dtype): + @pytest.mark.xfail(reason="uses numpy.vectorize") + def test_interpolate_na(self): array = ( np.array([-1.03, 0.1, 1.4, np.nan, 2.3, np.nan, np.nan, 9.1]) * unit_registry.m ) x = np.arange(len(array)) - data_array = xr.DataArray(data=array, coords={"x": x}, dims="x").astype(dtype) + data_array = xr.DataArray(data=array, coords={"x": x}, dims="x") units = extract_units(data_array) expected = attach_units(strip_units(data_array).interpolate_na(dim="x"), units) actual = data_array.interpolate_na(dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2767,18 +2820,8 @@ def test_interpolate_na(self, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - None, - id="compatible_unit", - marks=pytest.mark.xfail(reason="depends on reindex"), - ), - pytest.param( - unit_registry.m, - None, - id="identical_unit", - marks=pytest.mark.xfail(reason="depends on reindex"), - ), + pytest.param(unit_registry.cm, None, id="compatible_unit",), + pytest.param(unit_registry.m, None, id="identical_unit",), ), ) def test_combine_first(self, unit, error, dtype): @@ -2807,7 +2850,8 @@ def test_combine_first(self, unit, error, dtype): ) actual = data_array.combine_first(other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit", @@ -2829,7 +2873,17 @@ def test_combine_first(self, unit, error, dtype): "coords", ), ) - @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr) + @pytest.mark.parametrize( + "func", + ( + method("equals"), + pytest.param( + method("identical"), + marks=pytest.mark.skip(reason="the behavior of identical is undecided"), + ), + ), + ids=repr, + ) def test_comparisons(self, func, variation, unit, dtype): def is_compatible(a, b): a = a if a is not None else 1 @@ -2903,7 +2957,8 @@ def test_broadcast_like(self, unit, dtype): ) actual = arr1.broadcast_like(arr2) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit", @@ -2950,7 +3005,6 @@ def test_broadcast_equals(self, unit, dtype): method("reset_coords", names="x2"), method("copy"), method("astype", np.float32), - method("item", 1), ), ids=repr, ) @@ -2978,7 +3032,8 @@ def test_content_manipulation(self, func, dtype): expected = attach_units(func(strip_units(data_array), **stripped_kwargs), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", (pytest.param(method("copy", data=np.arange(20))),), ids=repr @@ -3004,7 +3059,9 @@ def test_content_manipulation_with_units(self, func, unit, dtype): ) actual = func(data_array, **kwargs) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "indices", @@ -3024,7 +3081,8 @@ def test_isel(self, indices, dtype): ) actual = data_array.isel(x=indices) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3067,7 +3125,9 @@ def test_sel(self, raw_values, unit, error, dtype): extract_units(data_array), ) actual = data_array.sel(x=values) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3110,7 +3170,9 @@ def test_loc(self, raw_values, unit, error, dtype): extract_units(data_array), ) actual = data_array.loc[{"x": values}] - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3153,7 +3215,9 @@ def test_drop_sel(self, raw_values, unit, error, dtype): extract_units(data_array), ) actual = data_array.drop_sel(x=values) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "shape", @@ -3181,7 +3245,9 @@ def test_squeeze(self, shape, dtype): strip_units(data_array).squeeze(), extract_units(data_array) ) actual = data_array.squeeze() - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) # try squeezing the dimensions separately names = tuple(dim for dim, coord in coords.items() if len(coord) == 1) @@ -3190,7 +3256,9 @@ def test_squeeze(self, shape, dtype): strip_units(data_array).squeeze(dim=name), extract_units(data_array) ) actual = data_array.squeeze(dim=name) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3212,49 +3280,42 @@ def test_head_tail_thin(self, func, dtype): ) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="indexes don't support units") + @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( - "unit,error", + "func", ( - pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + method("interp"), marks=pytest.mark.xfail(reason="uses scipy") ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), + method("reindex"), ), + ids=repr, ) - def test_interp(self, unit, error): - array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK - new_coords = (np.arange(10) + 0.5) * unit - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, + def test_interp_reindex(self, variant, func, dtype): + variants = { + "data": (unit_registry.m, 1), + "coords": (1, unit_registry.m), } + data_unit, coord_unit = variants.get(variant) - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) + array = np.linspace(1, 2, 10).astype(dtype) * data_unit + y = np.arange(10) * coord_unit - if error is not None: - with pytest.raises(error): - data_array.interp(x=new_coords) - - return + x = np.arange(10) + new_x = np.arange(10) + 0.5 + data_array = xr.DataArray(array, coords={"x": x, "y": ("x", y)}, dims="x") units = extract_units(data_array) - expected = attach_units( - strip_units(data_array).interp( - x=strip_units(convert_units(new_coords, {None: unit_registry.m})) - ), - units, - ) - actual = data_array.interp(x=new_coords) + expected = attach_units(func(strip_units(data_array), x=new_x), units) + actual = func(data_array, x=new_x) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) - @pytest.mark.xfail(reason="indexes strip units") + @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( "unit,error", ( @@ -3267,79 +3328,66 @@ def test_interp(self, unit, error): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_interp_like(self, unit, error): - array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK - coords = { - "x": (np.arange(10) + 0.3) * unit_registry.m, - "y": (np.arange(5) + 0.3) * unit_registry.m, - } - - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - other = xr.DataArray( - data=np.empty((20, 10)) * unit_registry.degK, - coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit}, - dims=("x", "y"), - ) + @pytest.mark.parametrize( + "func", (method("interp"), method("reindex")), ids=repr, + ) + def test_interp_reindex_indexing(self, func, unit, error, dtype): + array = np.linspace(1, 2, 10).astype(dtype) + x = np.arange(10) * unit_registry.m + new_x = (np.arange(10) + 0.5) * unit + data_array = xr.DataArray(array, coords={"x": x}, dims="x") if error is not None: with pytest.raises(error): - data_array.interp_like(other) + func(data_array, x=new_x) return units = extract_units(data_array) expected = attach_units( - strip_units(data_array).interp_like( - strip_units(convert_units(other, units)) + func( + strip_units(data_array), + x=strip_units(convert_units(new_x, {None: unit_registry.m})), ), units, ) - actual = data_array.interp_like(other) + actual = func(data_array, x=new_x) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="indexes don't support units") + @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( - "unit,error", + "func", ( - pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + method("interp_like"), marks=pytest.mark.xfail(reason="uses scipy") ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), + method("reindex_like"), ), + ids=repr, ) - def test_reindex(self, unit, error, dtype): - array = ( - np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - new_coords = (np.arange(10) + 0.5) * unit - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, + def test_interp_reindex_like(self, variant, func, dtype): + variants = { + "data": (unit_registry.m, 1), + "coords": (1, unit_registry.m), } + data_unit, coord_unit = variants.get(variant) - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - func = method("reindex") - - if error is not None: - with pytest.raises(error): - func(data_array, x=new_coords) + array = np.linspace(1, 2, 10).astype(dtype) * data_unit + coord = np.arange(10) * coord_unit - return + x = np.arange(10) + new_x = np.arange(-2, 2) + 0.5 + data_array = xr.DataArray(array, coords={"x": x, "y": ("x", coord)}, dims="x") + other = xr.DataArray(np.empty_like(new_x), coords={"x": new_x}, dims="x") - expected = attach_units( - func( - strip_units(data_array), - x=strip_units(convert_units(new_coords, {None: unit_registry.m})), - ), - {None: unit_registry.degK}, - ) - actual = func(data_array, x=new_coords) + units = extract_units(data_array) + expected = attach_units(func(strip_units(data_array), other), units) + actual = func(data_array, other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3354,38 +3402,35 @@ def test_reindex(self, unit, error, dtype): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_reindex_like(self, unit, error, dtype): - array = ( - np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - coords = { - "x": (np.arange(10) + 0.3) * unit_registry.m, - "y": (np.arange(5) + 0.3) * unit_registry.m, - } + @pytest.mark.parametrize( + "func", (method("interp_like"), method("reindex_like")), ids=repr, + ) + def test_interp_reindex_like_indexing(self, func, unit, error, dtype): + array = np.linspace(1, 2, 10).astype(dtype) + x = np.arange(10) * unit_registry.m + new_x = (np.arange(-2, 2) + 0.5) * unit - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - other = xr.DataArray( - data=np.empty((20, 10)) * unit_registry.degK, - coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit}, - dims=("x", "y"), - ) + data_array = xr.DataArray(array, coords={"x": x}, dims="x") + other = xr.DataArray(np.empty_like(new_x), {"x": new_x}, dims="x") if error is not None: with pytest.raises(error): - data_array.reindex_like(other) + func(data_array, other) return units = extract_units(data_array) expected = attach_units( - strip_units(data_array).reindex_like( - strip_units(convert_units(other, units)) + func( + strip_units(data_array), + strip_units(convert_units(other, {None: unit_registry.m})), ), units, ) - actual = data_array.reindex_like(other) + actual = func(data_array, other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3407,7 +3452,8 @@ def test_stacking_stacked(self, func, dtype): expected = attach_units(func(strip_units(stacked)), {"data": unit_registry.m}) actual = func(stacked) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") def test_to_unstacked_dataset(self, dtype): @@ -3430,7 +3476,8 @@ def test_to_unstacked_dataset(self, dtype): ).rename({elem.magnitude: elem for elem in x}) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3438,9 +3485,7 @@ def test_to_unstacked_dataset(self, dtype): method("transpose", "y", "x", "z"), method("stack", a=("x", "y")), method("set_index", x="x2"), - pytest.param( - method("shift", x=2), marks=pytest.mark.xfail(reason="strips units") - ), + method("shift", x=2), method("roll", x=2, roll_coords=False), method("sortby", "x2"), ), @@ -3466,7 +3511,8 @@ def test_stacking_reordering(self, func, dtype): expected = attach_units(func(strip_units(data_array)), {None: unit_registry.m}) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3476,16 +3522,13 @@ def test_stacking_reordering(self, func, dtype): method("integrate", dim="x"), pytest.param( method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), - ), - method("reduce", func=np.sum, dim="x"), - pytest.param( - lambda x: x.dot(x), - id="method_dot", marks=pytest.mark.xfail( - reason="pint does not implement the dot method" + LooseVersion(pint.__version__) < "0.12", + reason="quantile / nanquantile not implemented yet", ), ), + method("reduce", func=np.sum, dim="x"), + pytest.param(lambda x: x.dot(x), id="method_dot"), ), ids=repr, ) @@ -3512,7 +3555,8 @@ def test_computation(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3522,7 +3566,9 @@ def test_computation(self, func, dtype): method("coarsen", y=2), pytest.param( method("rolling", y=3), - marks=pytest.mark.xfail(reason="rolling strips units"), + marks=pytest.mark.xfail( + reason="numpy.lib.stride_tricks.as_strided converts to ndarray" + ), ), pytest.param( method("rolling_exp", y=3), @@ -3545,7 +3591,8 @@ def test_computation_objects(self, func, dtype): expected = attach_units(func(strip_units(data_array)).mean(), units) actual = func(data_array).mean() - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) def test_resample(self, dtype): array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m @@ -3559,7 +3606,8 @@ def test_resample(self, dtype): expected = attach_units(func(strip_units(data_array)).mean(), units) actual = func(data_array).mean() - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3569,7 +3617,10 @@ def test_resample(self, dtype): method("last"), pytest.param( method("quantile", q=[0.25, 0.5, 0.75], dim="x"), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), + marks=pytest.mark.xfail( + LooseVersion(pint.__version__) < "0.12", + reason="quantile / nanquantile not implemented yet", + ), ), ), ids=repr, @@ -3598,7 +3649,8 @@ def test_grouped_operations(self, func, dtype): ) actual = func(data_array.groupby("y")) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) class TestDataset: