From 1eefcdcdf0c18745b7858331af8e89bde8626b65 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 31 Jan 2020 17:10:01 +0000 Subject: [PATCH 01/75] Fix RTD build (#3737) * pin some requirements to reduce conda's memory usage * remove python section in readthedocs.yml --- ci/requirements/doc.yml | 21 +++++++++++---------- readthedocs.yml | 4 ---- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index a8b72dc0956..16cce5782e0 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -6,20 +6,21 @@ dependencies: - python=3.7 - bottleneck - cartopy - - cfgrib - - h5netcdf + - cfgrib>=0.9 + - dask>=2.10 + - h5netcdf>=0.7.4 - ipykernel - ipython - - iris + - iris>=2.3 - jupyter_client - nbsphinx - - netcdf4 + - netcdf4>=1.5 - numba - - numpy + - numpy>=1.17 - numpydoc - - pandas - - rasterio + - pandas>=1.0 + - rasterio>=1.1 - seaborn - - sphinx - - sphinx_rtd_theme - - zarr + - sphinx>=2.3 + - sphinx_rtd_theme>=0.4 + - zarr>=2.4 diff --git a/readthedocs.yml b/readthedocs.yml index 9ed8d28eaf2..88aee82a44b 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -6,8 +6,4 @@ build: conda: environment: ci/requirements/doc.yml -python: - version: 3.7 - install: [] - formats: [] From 90e734a55792f3c19e795df110fc3501c609d191 Mon Sep 17 00:00:00 2001 From: Bruno Pagani Date: Wed, 19 Feb 2020 18:24:42 +0000 Subject: [PATCH 02/75] Avoid running test_open_mfdataset_list_attr without dask (#3780) Fixes GH-3777. --- doc/whats-new.rst | 3 +++ xarray/tests/test_backends.py | 1 + 2 files changed, 4 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bf8e63eb926..e3e4eca7a01 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -37,6 +37,9 @@ Internal Changes - Removed the internal ``import_seaborn`` function which handled the deprecation of the ``seaborn.apionly`` entry point (:issue:`3747`). By `Mathias Hauser `_. +- Changed test_open_mfdataset_list_attr to only run with dask installed + (:issue:`3777`, :pull:`3780`). + By `Bruno Pagani `_. .. _whats-new.0.15.0: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index bb77cbb94fe..b7ba70ef6c4 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2540,6 +2540,7 @@ def test_open_mfdataset_manyfiles( @requires_netCDF4 +@requires_dask def test_open_mfdataset_list_attr(): """ Case when an attribute of type list differs across the multiple files From 1667e4c2223b125845c6aad756881c3ead1510cc Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sat, 22 Feb 2020 03:33:36 -0500 Subject: [PATCH 03/75] Format issue template comment as md comment (#3790) --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index f24884c617a..31fef19b32a 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -24,6 +24,6 @@ assignees: '' #### Output of ``xr.show_versions()``
-# Paste the output here xr.show_versions() here +
From 47476eb400497215b114c2abddc457a057205f63 Mon Sep 17 00:00:00 2001 From: keewis Date: Sun, 23 Feb 2020 20:13:07 +0100 Subject: [PATCH 04/75] Pint support for variables (#3706) * get fillna tests to pass * get the _getitem_with_mask tests to pass * silence the behavior change warning of pint * don't use 0 as fill value since that has special behaviour * use concat as a class method * use np.pad after trimming instead of concatenating a filled array * rewrite the concat test to pass appropriate arrays * use da.pad when dealing with dask arrays * mark the failing pad tests as xfail when on a current pint version * update whats-new.rst * fix the import order * test using pint master * fix the install command * reimplement the pad test to really work with units * use np.logical_not instead * use duck_array_ops to provide pad * add comments explaining the order of the arguments to where * mark the flipped parameter changes with a todo * skip the identical tests * remove the warnings filter --- ci/azure/install.yml | 1 + doc/whats-new.rst | 2 + xarray/core/duck_array_ops.py | 6 +- xarray/core/variable.py | 31 ++++------ xarray/tests/test_units.py | 112 +++++++++++++++++++++------------- 5 files changed, 89 insertions(+), 63 deletions(-) diff --git a/ci/azure/install.yml b/ci/azure/install.yml index e11a8b54db3..958e3c180fa 100644 --- a/ci/azure/install.yml +++ b/ci/azure/install.yml @@ -29,6 +29,7 @@ steps: git+https://github.com/zarr-developers/zarr \ git+https://github.com/Unidata/cftime \ git+https://github.com/mapbox/rasterio \ + git+https://github.com/hgrecco/pint \ git+https://github.com/pydata/bottleneck condition: eq(variables['UPSTREAM_DEV'], 'true') displayName: Install upstream dev dependencies diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e3e4eca7a01..b447e2c2048 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,6 +24,8 @@ Breaking changes New Features ~~~~~~~~~~~~ +- implement pint support. (:issue:`3594`, :pull:`3706`) + By `Justus Magin `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index c2fe604a9d3..06e12e83abd 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -121,6 +121,7 @@ def notnull(data): isin = _dask_or_eager_func("isin", array_args=slice(2)) take = _dask_or_eager_func("take") broadcast_to = _dask_or_eager_func("broadcast_to") +pad = _dask_or_eager_func("pad") _concatenate = _dask_or_eager_func("concatenate", list_of_args=True) _stack = _dask_or_eager_func("stack", list_of_args=True) @@ -261,7 +262,10 @@ def where_method(data, cond, other=dtypes.NA): def fillna(data, other): - return where(isnull(data), other, data) + # we need to pass data first so pint has a chance of returning the + # correct unit + # TODO: revert after https://github.com/hgrecco/pint/issues/1019 is fixed + return where(notnull(data), data, other) def concatenate(arrays, axis=0): diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 74d5d57e6f6..058b7bf52d4 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -742,7 +742,10 @@ def _getitem_with_mask(self, key, fill_value=dtypes.NA): data = as_indexable(self._data)[actual_indexer] mask = indexing.create_mask(indexer, self.shape, data) - data = duck_array_ops.where(mask, fill_value, data) + # we need to invert the mask in order to pass data first. This helps + # pint to choose the correct unit + # TODO: revert after https://github.com/hgrecco/pint/issues/1019 is fixed + data = duck_array_ops.where(np.logical_not(mask), data, fill_value) else: # array cannot be indexed along dimensions of size 0, so just # build the mask directly instead. @@ -1099,24 +1102,16 @@ def _shift_one_dim(self, dim, count, fill_value=dtypes.NA): else: dtype = self.dtype - shape = list(self.shape) - shape[axis] = min(abs(count), shape[axis]) + width = min(abs(count), self.shape[axis]) + dim_pad = (width, 0) if count >= 0 else (0, width) + pads = [(0, 0) if d != dim else dim_pad for d in self.dims] - if isinstance(trimmed_data, dask_array_type): - chunks = list(trimmed_data.chunks) - chunks[axis] = (shape[axis],) - full = functools.partial(da.full, chunks=chunks) - else: - full = np.full - - filler = full(shape, fill_value, dtype=dtype) - - if count > 0: - arrays = [filler, trimmed_data] - else: - arrays = [trimmed_data, filler] - - data = duck_array_ops.concatenate(arrays, axis) + data = duck_array_ops.pad( + trimmed_data.astype(dtype), + pads, + mode="constant", + constant_values=fill_value, + ) if isinstance(data, dask_array_type): # chunked data should come out with the same chunks; this makes diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index d98e5e23516..75e743c3455 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -1,4 +1,5 @@ import operator +from distutils.version import LooseVersion import numpy as np import pandas as pd @@ -19,6 +20,7 @@ unit_registry = pint.UnitRegistry(force_ndarray=True) Quantity = unit_registry.Quantity + pytestmark = [ pytest.mark.skipif( not IS_NEP18_ACTIVE, reason="NUMPY_EXPERIMENTAL_ARRAY_FUNCTION is not enabled" @@ -1536,27 +1538,17 @@ def test_missing_value_detection(self, func): @pytest.mark.parametrize( "unit,error", ( - pytest.param( - 1, - DimensionalityError, - id="no_unit", - marks=pytest.mark.xfail(reason="uses 0 as a replacement"), - ), + pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - None, - id="compatible_unit", - marks=pytest.mark.xfail(reason="converts to fill value's unit"), - ), + pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_missing_value_fillna(self, unit, error): - value = 0 + value = 10 array = ( np.array( [ @@ -1595,13 +1587,7 @@ def test_missing_value_fillna(self, unit, error): pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - id="compatible_unit", - marks=pytest.mark.xfail( - reason="checking for identical units does not work properly, yet" - ), - ), + pytest.param(unit_registry.cm, id="compatible_unit",), pytest.param(unit_registry.m, id="identical_unit"), ), ) @@ -1612,7 +1598,17 @@ def test_missing_value_fillna(self, unit, error): pytest.param(True, id="with_conversion"), ), ) - @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr) + @pytest.mark.parametrize( + "func", + ( + method("equals"), + pytest.param( + method("identical"), + marks=pytest.mark.skip(reason="behaviour of identical is unclear"), + ), + ), + ids=repr, + ) def test_comparisons(self, func, unit, convert_data, dtype): array = np.linspace(0, 1, 9).astype(dtype) quantity1 = array * unit_registry.m @@ -1762,14 +1758,7 @@ def test_1d_math(self, func, unit, error, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - None, - id="compatible_unit", - marks=pytest.mark.xfail( - reason="getitem_with_mask converts to the unit of other" - ), - ), + pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -1853,12 +1842,7 @@ def test_squeeze(self, dtype): ), method("reduce", np.std, "x"), method("round", 2), - pytest.param( - method("shift", {"x": -2}), - marks=pytest.mark.xfail( - reason="trying to concatenate ndarray to quantity" - ), - ), + method("shift", {"x": -2}), method("transpose", "y", "x"), ), ids=repr, @@ -1933,7 +1917,6 @@ def test_unstack(self, dtype): assert_units_equal(expected, actual) xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="ignores units") @pytest.mark.parametrize( "unit,error", ( @@ -1948,25 +1931,28 @@ def test_unstack(self, dtype): ) def test_concat(self, unit, error, dtype): array1 = ( - np.linspace(0, 5, 3 * 10).reshape(3, 10).astype(dtype) * unit_registry.m + np.linspace(0, 5, 9 * 10).reshape(3, 6, 5).astype(dtype) * unit_registry.m ) - array2 = np.linspace(5, 10, 10 * 2).reshape(10, 2).astype(dtype) * unit + array2 = np.linspace(5, 10, 10 * 3).reshape(3, 2, 5).astype(dtype) * unit - variable = xr.Variable(("x", "y"), array1) - other = xr.Variable(("y", "z"), array2) + variable = xr.Variable(("x", "y", "z"), array1) + other = xr.Variable(("x", "y", "z"), array2) if error is not None: with pytest.raises(error): - variable.concat(other) + xr.Variable.concat([variable, other], dim="y") return units = extract_units(variable) expected = attach_units( - strip_units(variable).concat(strip_units(convert_units(other, units))), + xr.Variable.concat( + [strip_units(variable), strip_units(convert_units(other, units))], + dim="y", + ), units, ) - actual = variable.concat(other) + actual = xr.Variable.concat([variable, other], dim="y") assert_units_equal(expected, actual) xr.testing.assert_identical(expected, actual) @@ -2036,6 +2022,43 @@ def test_no_conflicts(self, unit, dtype): assert expected == actual + def test_pad(self, dtype): + data = np.arange(4 * 3 * 2).reshape(4, 3, 2).astype(dtype) * unit_registry.m + v = xr.Variable(["x", "y", "z"], data) + + xr_args = [{"x": (2, 1)}, {"y": (0, 3)}, {"x": (3, 1), "z": (2, 0)}] + np_args = [ + ((2, 1), (0, 0), (0, 0)), + ((0, 0), (0, 3), (0, 0)), + ((3, 1), (0, 0), (2, 0)), + ] + for xr_arg, np_arg in zip(xr_args, np_args): + actual = v.pad_with_fill_value(**xr_arg) + expected = xr.Variable( + v.dims, + np.pad( + v.data.astype(float), + np_arg, + mode="constant", + constant_values=np.nan, + ), + ) + xr.testing.assert_identical(expected, actual) + assert_units_equal(expected, actual) + assert isinstance(actual._data, type(v._data)) + + # for the boolean array, we pad False + data = np.full_like(data, False, dtype=bool).reshape(4, 3, 2) + v = xr.Variable(["x", "y", "z"], data) + for xr_arg, np_arg in zip(xr_args, np_args): + actual = v.pad_with_fill_value(fill_value=data.flat[0], **xr_arg) + expected = xr.Variable( + v.dims, + np.pad(v.data, np_arg, mode="constant", constant_values=v.data.flat[0]), + ) + xr.testing.assert_identical(actual, expected) + assert_units_equal(expected, actual) + @pytest.mark.parametrize( "unit,error", ( @@ -2044,7 +2067,8 @@ def test_no_conflicts(self, unit, dtype): DimensionalityError, id="no_unit", marks=pytest.mark.xfail( - reason="is not treated the same as dimensionless" + LooseVersion(pint.__version__) < LooseVersion("0.10.2"), + reason="bug in pint's implementation of np.pad", ), ), pytest.param( From 858eba6f1a99b4b1e37ab16f76d4bd060c5598fb Mon Sep 17 00:00:00 2001 From: keewis Date: Sun, 23 Feb 2020 20:13:45 +0100 Subject: [PATCH 05/75] allow formatting the diff of ndarray attributes (#3728) * allow comparing with ndarrays * add a test for the attrs diff repr * use array_equiv instead of using all since the comparison may warn --- xarray/core/formatting.py | 13 ++++++++++- xarray/tests/test_formatting.py | 39 +++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 520fa9b9f1b..89246ff228d 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -500,6 +500,13 @@ def diff_dim_summary(a, b): def _diff_mapping_repr(a_mapping, b_mapping, compat, title, summarizer, col_width=None): + def is_array_like(value): + return ( + hasattr(value, "ndim") + and hasattr(value, "shape") + and hasattr(value, "dtype") + ) + def extra_items_repr(extra_keys, mapping, ab_side): extra_repr = [summarizer(k, mapping[k], col_width) for k in extra_keys] if extra_repr: @@ -522,7 +529,11 @@ def extra_items_repr(extra_keys, mapping, ab_side): is_variable = True except AttributeError: # compare attribute value - compatible = a_mapping[k] == b_mapping[k] + if is_array_like(a_mapping[k]) or is_array_like(b_mapping[k]): + compatible = array_equiv(a_mapping[k], b_mapping[k]) + else: + compatible = a_mapping[k] == b_mapping[k] + is_variable = False if not compatible: diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index 9a1f0bbd975..61ecf46b79b 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -3,6 +3,7 @@ import numpy as np import pandas as pd +import pytest import xarray as xr from xarray.core import formatting @@ -275,6 +276,44 @@ def test_diff_array_repr(self): except AssertionError: assert actual == expected.replace(", dtype=int64", "") + @pytest.mark.filterwarnings("error") + def test_diff_attrs_repr_with_array(self): + attrs_a = {"attr": np.array([0, 1])} + + attrs_b = {"attr": 1} + expected = dedent( + """\ + Differing attributes: + L attr: [0 1] + R attr: 1 + """ + ).strip() + actual = formatting.diff_attrs_repr(attrs_a, attrs_b, "equals") + assert expected == actual + + attrs_b = {"attr": np.array([-3, 5])} + expected = dedent( + """\ + Differing attributes: + L attr: [0 1] + R attr: [-3 5] + """ + ).strip() + actual = formatting.diff_attrs_repr(attrs_a, attrs_b, "equals") + assert expected == actual + + # should not raise a warning + attrs_b = {"attr": np.array([0, 1, 2])} + expected = dedent( + """\ + Differing attributes: + L attr: [0 1] + R attr: [0 1 2] + """ + ).strip() + actual = formatting.diff_attrs_repr(attrs_a, attrs_b, "equals") + assert expected == actual + def test_diff_dataset_repr(self): ds_a = xr.Dataset( data_vars={ From 66625c9947a647a4a547dc151f45b192dbb2bd20 Mon Sep 17 00:00:00 2001 From: keewis Date: Sun, 23 Feb 2020 20:34:34 +0100 Subject: [PATCH 06/75] always use dask_array_type for isinstance calls (#3787) * always use dask_array_type for instance checking * update whats-new.rst --- doc/whats-new.rst | 3 +++ xarray/core/dask_array_compat.py | 4 +++- xarray/core/duck_array_ops.py | 6 +++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b447e2c2048..c5c46022dc0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -29,6 +29,9 @@ New Features Bug fixes ~~~~~~~~~ +- Use ``dask_array_type`` instead of ``dask_array.Array`` for type + checking. (:issue:`3779`, :pull:`3787`) + By `Justus Magin `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index de55de89f0c..05f750a1355 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -3,6 +3,8 @@ import numpy as np +from .pycompat import dask_array_type + try: import dask.array as da from dask import __version__ as dask_version @@ -36,7 +38,7 @@ def meta_from_array(x, ndim=None, dtype=None): """ # If using x._meta, x must be a Dask Array, some libraries (e.g. zarr) # implement a _meta attribute that are incompatible with Dask Array._meta - if hasattr(x, "_meta") and isinstance(x, da.Array): + if hasattr(x, "_meta") and isinstance(x, dask_array_type): x = x._meta if dtype is None and x is None: diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 06e12e83abd..bc2db93a0a8 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -37,7 +37,7 @@ def f(*args, **kwargs): dispatch_args = args[0] else: dispatch_args = args[array_args] - if any(isinstance(a, dask_array.Array) for a in dispatch_args): + if any(isinstance(a, dask_array_type) for a in dispatch_args): try: wrapped = getattr(dask_module, name) except AttributeError as e: @@ -190,8 +190,8 @@ def lazy_array_equiv(arr1, arr2): return False if ( dask_array - and isinstance(arr1, dask_array.Array) - and isinstance(arr2, dask_array.Array) + and isinstance(arr1, dask_array_type) + and isinstance(arr2, dask_array_type) ): # GH3068 if arr1.name == arr2.name: From 58b11a63732e3066ad38dc1e63a733f4cce6425f Mon Sep 17 00:00:00 2001 From: Graham Inggs Date: Sun, 23 Feb 2020 21:39:39 +0200 Subject: [PATCH 07/75] Let test_repr_of_dataset pass on big-endian systems (#3772) --- xarray/tests/test_formatting_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index fea24ff93f8..01357000b20 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -130,5 +130,5 @@ def test_repr_of_dataset(dataset): assert ( formatted.count("class='xr-section-summary-in' type='checkbox' checked>") == 3 ) - assert "<U4" in formatted + assert "<U4" in formatted or ">U4" in formatted assert "<IA>" in formatted From 24cfdd2414169248183c6839f2b39021746b978e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 23 Feb 2020 20:41:13 +0100 Subject: [PATCH 08/75] Add new h5netcdf backend phony_dims kwarg (#3753) * ADD: add `phony_dims` keyword arg to h5netcdf backend available from h5netcdf v0.8.0 * ADD: add `whats-new.rst` entry * FIX: raise ValueError Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 5 +++++ xarray/backends/h5netcdf_.py | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c5c46022dc0..9d2981d098d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,9 +24,14 @@ Breaking changes New Features ~~~~~~~~~~~~ + +- Support new h5netcdf backend keyword `phony_dims` (available from h5netcdf + v0.8.0 for :py:class:`~xarray.backends.H5NetCDFStore`. + By `Kai Mühlbauer `_. - implement pint support. (:issue:`3594`, :pull:`3706`) By `Justus Magin `_. + Bug fixes ~~~~~~~~~ - Use ``dask_array_type`` instead of ``dask_array.Array`` for type diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 2b7c2d9057c..393db14a7e9 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -1,4 +1,5 @@ import functools +from distutils.version import LooseVersion import numpy as np @@ -117,6 +118,7 @@ def open( lock=None, autoclose=False, invalid_netcdf=None, + phony_dims=None, ): import h5netcdf @@ -124,6 +126,14 @@ def open( raise ValueError("invalid format for h5netcdf backend") kwargs = {"invalid_netcdf": invalid_netcdf} + if phony_dims is not None: + if LooseVersion(h5netcdf.__version__) >= LooseVersion("0.8.0"): + kwargs["phony_dims"] = phony_dims + else: + raise ValueError( + "h5netcdf backend keyword argument 'phony_dims' needs " + "h5netcdf >= 0.8.0." + ) if lock is None: if mode == "r": From 3ef75aeffaf89aa1c7ea9957e5bdf8d1b36aac54 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 23 Feb 2020 19:45:18 +0000 Subject: [PATCH 09/75] concat now handles non-dim coordinates only present in one dataset (#3769) * concat can now deal with non-dim coordinates only present in one dataset. * fix test * minor fixes. --- doc/whats-new.rst | 4 ++++ xarray/core/concat.py | 18 +++++++++++++++++- xarray/tests/test_combine.py | 7 ++++--- xarray/tests/test_concat.py | 21 +++++++++++++++++++++ 4 files changed, 46 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9d2981d098d..1d7c425e554 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,6 +38,10 @@ Bug fixes checking. (:issue:`3779`, :pull:`3787`) By `Justus Magin `_. +- :py:func:`concat` can now handle coordinate variables only present in one of + the objects to be concatenated when ``coords="different"``. + By `Deepak Cherian `_. + Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 302f7afcec6..96b4be15d1b 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -194,7 +194,23 @@ def process_subset_opt(opt, subset): for k in getattr(datasets[0], subset): if k not in concat_over: equals[k] = None - variables = [ds.variables[k] for ds in datasets] + + variables = [] + for ds in datasets: + if k in ds.variables: + variables.append(ds.variables[k]) + + if len(variables) == 1: + # coords="different" doesn't make sense when only one object + # contains a particular variable. + break + elif len(variables) != len(datasets) and opt == "different": + raise ValueError( + f"{k!r} not present in all datasets and coords='different'. " + f"Either add {k!r} to datasets where it is missing or " + "specify coords='minimal'." + ) + # first check without comparing values i.e. no computes for var in variables[1:]: equals[k] = getattr(variables[0], compat)( diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index d907e1c5e46..eb2c6e1dbf7 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -365,9 +365,10 @@ def test_nested_concat(self): expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])}) assert_identical(expected, actual) - objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})] - with pytest.raises(KeyError): - combine_nested(objs, concat_dim="x") + objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1]})] + actual = combine_nested(objs, concat_dim="x") + expected = Dataset({"x": [0, 1], "y": [0]}) + assert_identical(expected, actual) @pytest.mark.parametrize( "join, expected", diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index def5abc942f..bd99181a947 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -475,3 +475,24 @@ def test_concat_attrs_first_variable(attr1, attr2): concat_attrs = concat(arrs, "y").attrs assert concat_attrs == attr1 + + +def test_concat_merge_single_non_dim_coord(): + da1 = DataArray([1, 2, 3], dims="x", coords={"x": [1, 2, 3], "y": 1}) + da2 = DataArray([4, 5, 6], dims="x", coords={"x": [4, 5, 6]}) + + expected = DataArray(range(1, 7), dims="x", coords={"x": range(1, 7), "y": 1}) + + for coords in ["different", "minimal"]: + actual = concat([da1, da2], "x", coords=coords) + assert_identical(actual, expected) + + with raises_regex(ValueError, "'y' is not present in all datasets."): + concat([da1, da2], dim="x", coords="all") + + da1 = DataArray([1, 2, 3], dims="x", coords={"x": [1, 2, 3], "y": 1}) + da2 = DataArray([4, 5, 6], dims="x", coords={"x": [4, 5, 6]}) + da3 = DataArray([7, 8, 9], dims="x", coords={"x": [7, 8, 9], "y": 1}) + for coords in ["different", "all"]: + with raises_regex(ValueError, "'y' not present in all datasets"): + concat([da1, da2, da3], dim="x") From 5e41b607b367bc49c37f34704923aba4bf164c13 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 24 Feb 2020 03:22:10 +0000 Subject: [PATCH 10/75] pin msgpack (#3793) * pin msgpack * fix package name --- ci/requirements/py36-min-all-deps.yml | 1 + ci/requirements/py36-min-nep18.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml index 2781e551f23..86540197dcc 100644 --- a/ci/requirements/py36-min-all-deps.yml +++ b/ci/requirements/py36-min-all-deps.yml @@ -26,6 +26,7 @@ dependencies: - isort - lxml=4.4 # Optional dep of pydap - matplotlib=3.1 + - msgpack-python=0.6 # remove once distributed is bumped. distributed GH3491 - mypy=0.761 # Must match .pre-commit-config.yaml - nc-time-axis=1.2 - netcdf4=1.4 diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml index 286b11c0de1..c10fdf67dc4 100644 --- a/ci/requirements/py36-min-nep18.yml +++ b/ci/requirements/py36-min-nep18.yml @@ -8,6 +8,7 @@ dependencies: - coveralls - dask=2.4 - distributed=2.4 + - msgpack-python=0.6 # remove once distributed is bumped. distributed GH3491 - numpy=1.17 - pandas=0.25 - pint=0.9 # Actually not enough as it doesn't implement __array_function__yet! From f468a0676b2411a53e4bb94d67918092d06b50b8 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 24 Feb 2020 10:26:29 -0800 Subject: [PATCH 11/75] Optimize isel for lazy array equality checking (#3588) * Add some xfailed tests. * Only xfail failing tests. * Add DataArray.rename_dims, DataArray.rename_vars * Update tests. * Fix isel. Tests pass. * todos * All tests pass. * Add comments. * wip * cleanup * Revert "Add DataArray.rename_dims, DataArray.rename_vars" This reverts commit 61b73347d71612e87d1478c0721b5c86ec6ee4bc. * more tests * Add comment * Add optimization to DaskIndexingAdapter * Update xarray/core/variable.py Co-Authored-By: crusaderky * minor. Co-authored-by: crusaderky --- xarray/core/indexing.py | 20 +++++++++++++- xarray/core/variable.py | 4 ++- xarray/tests/test_dask.py | 55 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 4e58be1ad2f..ab049a0a4b4 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -4,7 +4,7 @@ from collections import defaultdict from contextlib import suppress from datetime import timedelta -from typing import Any, Callable, Sequence, Tuple, Union +from typing import Any, Callable, Iterable, Sequence, Tuple, Union import numpy as np import pandas as pd @@ -1314,6 +1314,24 @@ def __init__(self, array): self.array = array def __getitem__(self, key): + + if not isinstance(key, VectorizedIndexer): + # if possible, short-circuit when keys are effectively slice(None) + # This preserves dask name and passes lazy array equivalence checks + # (see duck_array_ops.lazy_array_equiv) + rewritten_indexer = False + new_indexer = [] + for idim, k in enumerate(key.tuple): + if isinstance(k, Iterable) and duck_array_ops.array_equiv( + k, np.arange(self.array.shape[idim]) + ): + new_indexer.append(slice(None)) + rewritten_indexer = True + else: + new_indexer.append(k) + if rewritten_indexer: + key = type(key)(tuple(new_indexer)) + if isinstance(key, BasicIndexer): return self.array[key.tuple] elif isinstance(key, VectorizedIndexer): diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 058b7bf52d4..daa8678157b 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1057,7 +1057,9 @@ def isel( invalid = indexers.keys() - set(self.dims) if invalid: - raise ValueError("dimensions %r do not exist" % invalid) + raise ValueError( + f"dimensions {invalid} do not exist. Expected one or more of {self.dims}" + ) key = tuple(indexers.get(dim, slice(None)) for dim in self.dims) return self[key] diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index cc554850839..8fb54c4ee84 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1390,3 +1390,58 @@ def test_lazy_array_equiv_merge(compat): xr.merge([da1, da3], compat=compat) with raise_if_dask_computes(max_computes=2): xr.merge([da1, da2 / 2], compat=compat) + + +@pytest.mark.filterwarnings("ignore::FutureWarning") # transpose_coords +@pytest.mark.parametrize("obj", [make_da(), make_ds()]) +@pytest.mark.parametrize( + "transform", + [ + lambda a: a.assign_attrs(new_attr="anew"), + lambda a: a.assign_coords(cxy=a.cxy), + lambda a: a.copy(), + lambda a: a.isel(x=np.arange(a.sizes["x"])), + lambda a: a.isel(x=slice(None)), + lambda a: a.loc[dict(x=slice(None))], + lambda a: a.loc[dict(x=np.arange(a.sizes["x"]))], + lambda a: a.loc[dict(x=a.x)], + lambda a: a.sel(x=a.x), + lambda a: a.sel(x=a.x.values), + lambda a: a.transpose(...), + lambda a: a.squeeze(), # no dimensions to squeeze + lambda a: a.sortby("x"), # "x" is already sorted + lambda a: a.reindex(x=a.x), + lambda a: a.reindex_like(a), + lambda a: a.rename({"cxy": "cnew"}).rename({"cnew": "cxy"}), + lambda a: a.pipe(lambda x: x), + lambda a: xr.align(a, xr.zeros_like(a))[0], + # assign + # swap_dims + # set_index / reset_index + ], +) +def test_transforms_pass_lazy_array_equiv(obj, transform): + with raise_if_dask_computes(): + assert_equal(obj, transform(obj)) + + +def test_more_transforms_pass_lazy_array_equiv(map_da, map_ds): + with raise_if_dask_computes(): + assert_equal(map_ds.cxy.broadcast_like(map_ds.cxy), map_ds.cxy) + assert_equal(xr.broadcast(map_ds.cxy, map_ds.cxy)[0], map_ds.cxy) + assert_equal(map_ds.map(lambda x: x), map_ds) + assert_equal(map_ds.set_coords("a").reset_coords("a"), map_ds) + assert_equal(map_ds.update({"a": map_ds.a}), map_ds) + + # fails because of index error + # assert_equal( + # map_ds.rename_dims({"x": "xnew"}).rename_dims({"xnew": "x"}), map_ds + # ) + + assert_equal( + map_ds.rename_vars({"cxy": "cnew"}).rename_vars({"cnew": "cxy"}), map_ds + ) + + assert_equal(map_da._from_temp_dataset(map_da._to_temp_dataset()), map_da) + assert_equal(map_da.astype(map_da.dtype), map_da) + assert_equal(map_da.transpose("y", "x", transpose_coords=False).cxy, map_da.cxy) From dc8a8dcc7d76398264c4a8d6ea5836e9f06aa8e9 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 24 Feb 2020 10:52:03 -0800 Subject: [PATCH 12/75] Add twine check and readthedocs reminder to HOW_TO_RELEASE (#3738) --- HOW_TO_RELEASE.md | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index cdeb0e19a3e..4ef7342a5ed 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -1,4 +1,4 @@ -How to issue an xarray release in 14 easy steps +How to issue an xarray release in 16 easy steps Time required: about an hour. @@ -20,32 +20,37 @@ Time required: about an hour. ``` pytest ``` - 4. On the master branch, commit the release in git: + 4. Check that the ReadTheDocs build is passing. + 5. On the master branch, commit the release in git: ``` git commit -a -m 'Release v0.X.Y' ``` - 5. Tag the release: + 6. Tag the release: ``` git tag -a v0.X.Y -m 'v0.X.Y' ``` - 6. Build source and binary wheels for pypi: + 7. Build source and binary wheels for pypi: ``` git clean -xdf # this deletes all uncommited changes! python setup.py bdist_wheel sdist ``` - 7. Use twine to register and upload the release on pypi. Be careful, you can't + 8. Use twine to check the package build: + ``` + twine check dist/xarray-0.X.Y* + ``` + 9. Use twine to register and upload the release on pypi. Be careful, you can't take this back! ``` twine upload dist/xarray-0.X.Y* ``` You will need to be listed as a package owner at https://pypi.python.org/pypi/xarray for this to work. - 8. Push your changes to master: +10. Push your changes to master: ``` git push upstream master git push upstream --tags ``` - 9. Update the stable branch (used by ReadTheDocs) and switch back to master: +11. Update the stable branch (used by ReadTheDocs) and switch back to master: ``` git checkout stable git rebase master @@ -55,20 +60,20 @@ Time required: about an hour. It's OK to force push to 'stable' if necessary. (We also update the stable branch with `git cherrypick` for documentation only fixes that apply the current released version.) -10. Add a section for the next release (v.X.(Y+1)) to doc/whats-new.rst. -11. Commit your changes and push to master again: +12. Add a section for the next release (v.X.(Y+1)) to doc/whats-new.rst. +13. Commit your changes and push to master again: ``` git commit -a -m 'New whatsnew section' git push upstream master ``` You're done pushing to master! -12. Issue the release on GitHub. Click on "Draft a new release" at +14. Issue the release on GitHub. Click on "Draft a new release" at https://github.com/pydata/xarray/releases. Type in the version number, but don't bother to describe it -- we maintain that on the docs instead. -13. Update the docs. Login to https://readthedocs.org/projects/xray/versions/ +15. Update the docs. Login to https://readthedocs.org/projects/xray/versions/ and switch your new release tag (at the bottom) from "Inactive" to "Active". It should now build automatically. -14. Issue the release announcement! For bug fix releases, I usually only email +16. Issue the release announcement! For bug fix releases, I usually only email xarray@googlegroups.com. For major/feature releases, I will email a broader list (no more than once every 3-6 months): - pydata@googlegroups.com From b14eea2f06bbcf1a02c4ae4cba9ed981aef69292 Mon Sep 17 00:00:00 2001 From: johnomotani Date: Mon, 24 Feb 2020 20:20:07 +0000 Subject: [PATCH 13/75] Fix contourf set under (#3601) * Copy colors for bad, under, and over values in _build_discrete_cmap() Copies the cmap._rgba_bad, cmap._rgba_under, and cmap._rgba_over values to new_cmap, in case they have been set to non-default values. Allows the user to customize plots more by using matplotlib methods on a cmap before passing as an argument to xarray's plotting methods. Previously these settings were overridden by defaults when creating the cmap actually used to make the plot. * Tests that cmap.set_bad, cmap.set_under, cmap.set_over not overridden * Add defaults in case cmap is a str in _build_discrete_cmap If the input cmap is a str, getting _rgba_bad, _rgba_under, or _rgba_over attributes from it will fail. To fix this, provide defaults taken from new_cmap. * Consolidate tests of cmap.set_bad() cmap.set_under() and cmap.set_over() Make one unit test test all three properties, rather than having a separate test for each. * Do not read/modify private members for bad, under and over colors * Only change under, over colors if they were set by user When modifying a colormap, we only want to preserve the under and over colors of the original colormap if they were explicitly set by the user. In _build_discrete_cmap this makes no difference, as the new_cmap returned by mpl.colors.from_levels_and_colors has the same minimum and maximum colors as its input, so the default under and over colors would not change anyway and could be copied regardless. However, for clarity and in case the same pattern is needed in future elsewhere, it is nicer to add a checks for: whether the under color is the same as cmap(0), only setting under for new_cmap if it is not; and whether the over color is the same as cmap(cmap.N - 1), only setting over for new_cmap if it is not. * Remove temporary variable that is only used once * Use deepcopy instead of copy for cmaps cmaps contain tuples member variables, so safer to deepcopy instead of just copy to make sure we never change the copied variable. * Set different colors for bad, under and over when testing * Extra test checking bad, under and over colors when not set explicitly * Comment on why test uses deepcopy * Pass vmin and vmax in test_contourf_cmap_set() Set vmin and vmax so that _build_discrete_colormap is called with extend='both'. extend is passed to mpl.colors.from_levels_and_colors(), which returns a result with sensible under and over values if extend='both', but not if extend='neither' (but if extend='neither' the under and over values would not be used because the data would all be within the plotted range) * Don't use private members in tests * Add whats-new * Fix isort * Update doc/whats-new.rst Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 3 ++ xarray/plot/utils.py | 24 ++++++++++++++ xarray/tests/test_plot.py | 66 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1d7c425e554..8a2666d25fa 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -41,6 +41,9 @@ Bug fixes - :py:func:`concat` can now handle coordinate variables only present in one of the objects to be concatenated when ``coords="different"``. By `Deepak Cherian `_. +- xarray now respects the over, under and bad colors if set on a provided colormap. + (:issue:`3590`, :pull:`3601`) + By `johnomotani `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 341ff730e01..cb3bef6d409 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -78,6 +78,30 @@ def _build_discrete_cmap(cmap, levels, extend, filled): # copy the old cmap name, for easier testing new_cmap.name = getattr(cmap, "name", cmap) + # copy colors to use for bad, under, and over values in case they have been + # set to non-default values + try: + # matplotlib<3.2 only uses bad color for masked values + bad = cmap(np.ma.masked_invalid([np.nan]))[0] + except TypeError: + # cmap was a str or list rather than a color-map object, so there are + # no bad, under or over values to check or copy + pass + else: + under = cmap(-np.inf) + over = cmap(np.inf) + + new_cmap.set_bad(bad) + + # Only update under and over if they were explicitly changed by the user + # (i.e. are different from the lowest or highest values in cmap). Otherwise + # leave unchanged so new_cmap uses its default values (its own lowest and + # highest values). + if under != cmap(0): + new_cmap.set_under(under) + if over != cmap(cmap.N - 1): + new_cmap.set_over(over) + return new_cmap, cnorm diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index dda9e5de3b2..9ffbcd9c85e 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -1,4 +1,5 @@ import inspect +from copy import deepcopy from datetime import datetime import numpy as np @@ -275,6 +276,71 @@ def test2d_1d_2d_coordinates_contourf(self): a.plot.contourf(x="time", y="depth") a.plot.contourf(x="depth", y="time") + def test_contourf_cmap_set(self): + a = DataArray(easy_array((4, 4)), dims=["z", "time"]) + + cmap = mpl.cm.viridis + + # deepcopy to ensure cmap is not changed by contourf() + # Set vmin and vmax so that _build_discrete_colormap is called with + # extend='both'. extend is passed to + # mpl.colors.from_levels_and_colors(), which returns a result with + # sensible under and over values if extend='both', but not if + # extend='neither' (but if extend='neither' the under and over values + # would not be used because the data would all be within the plotted + # range) + pl = a.plot.contourf(cmap=deepcopy(cmap), vmin=0.1, vmax=0.9) + + # check the set_bad color + assert np.all( + pl.cmap(np.ma.masked_invalid([np.nan]))[0] + == cmap(np.ma.masked_invalid([np.nan]))[0] + ) + + # check the set_under color + assert pl.cmap(-np.inf) == cmap(-np.inf) + + # check the set_over color + assert pl.cmap(np.inf) == cmap(np.inf) + + def test_contourf_cmap_set_with_bad_under_over(self): + a = DataArray(easy_array((4, 4)), dims=["z", "time"]) + + # Make a copy here because we want a local cmap that we will modify. + # Use deepcopy because matplotlib Colormap objects have tuple members + # and we want to ensure we do not change the original. + cmap = deepcopy(mpl.cm.viridis) + + cmap.set_bad("w") + # check we actually changed the set_bad color + assert np.all( + cmap(np.ma.masked_invalid([np.nan]))[0] + != mpl.cm.viridis(np.ma.masked_invalid([np.nan]))[0] + ) + + cmap.set_under("r") + # check we actually changed the set_under color + assert cmap(-np.inf) != mpl.cm.viridis(-np.inf) + + cmap.set_over("g") + # check we actually changed the set_over color + assert cmap(np.inf) != mpl.cm.viridis(-np.inf) + + # deepcopy to ensure cmap is not changed by contourf() + pl = a.plot.contourf(cmap=deepcopy(cmap)) + + # check the set_bad color has been kept + assert np.all( + pl.cmap(np.ma.masked_invalid([np.nan]))[0] + == cmap(np.ma.masked_invalid([np.nan]))[0] + ) + + # check the set_under color has been kept + assert pl.cmap(-np.inf) == cmap(-np.inf) + + # check the set_over color has been kept + assert pl.cmap(np.inf) == cmap(np.inf) + def test3d(self): self.darray.plot() From 18e34cc8746ea2ce7be785edcaeee0bd7d4a0200 Mon Sep 17 00:00:00 2001 From: Joseph K Aicher <4666753+jaicher@users.noreply.github.com> Date: Mon, 24 Feb 2020 17:34:58 -0500 Subject: [PATCH 14/75] Fix swap_dims() index names (issue #3748) (#3752) * Added test for GH3748 * Rename newly created index in swap_dims() to dim name if not multiindex Fixes GH3748 * Updated whats-new.rst with pull request information for swap_dims fix * Move tests for GH3748 into existing swap_dims tests + integrated new tests for GH3748 for DataArray into existing swap_dims tests + added similar tests for Dataset + added test for multiindex case Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 6 +++++- xarray/core/dataset.py | 6 +++++- xarray/tests/test_dataarray.py | 19 +++++++++++++++++++ xarray/tests/test_dataset.py | 12 +++++++++++- 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8a2666d25fa..32c86563a57 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,10 +34,14 @@ New Features Bug fixes ~~~~~~~~~ + +- Fix :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` producing + index with name reflecting the previous dimension name instead of the new one + (:issue:`3748`, :pull:`3752`). By `Joseph K Aicher + `_. - Use ``dask_array_type`` instead of ``dask_array.Array`` for type checking. (:issue:`3779`, :pull:`3787`) By `Justus Magin `_. - - :py:func:`concat` can now handle coordinate variables only present in one of the objects to be concatenated when ``coords="different"``. By `Deepak Cherian `_. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 07bea6dac19..7252dd2f3df 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2948,7 +2948,11 @@ def swap_dims( if k in self.indexes: indexes[k] = self.indexes[k] else: - indexes[k] = var.to_index() + new_index = var.to_index() + if new_index.nlevels == 1: + # make sure index name matches dimension name + new_index = new_index.rename(k) + indexes[k] = new_index else: var = v.to_base_variable() var.dims = dims diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b9b719e8af9..0a622d279ba 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1536,11 +1536,30 @@ def test_swap_dims(self): expected = DataArray(array.values, {"y": list("abc")}, dims="y") actual = array.swap_dims({"x": "y"}) assert_identical(expected, actual) + for dim_name in set().union(expected.indexes.keys(), actual.indexes.keys()): + pd.testing.assert_index_equal( + expected.indexes[dim_name], actual.indexes[dim_name] + ) array = DataArray(np.random.randn(3), {"x": list("abc")}, "x") expected = DataArray(array.values, {"x": ("y", list("abc"))}, dims="y") actual = array.swap_dims({"x": "y"}) assert_identical(expected, actual) + for dim_name in set().union(expected.indexes.keys(), actual.indexes.keys()): + pd.testing.assert_index_equal( + expected.indexes[dim_name], actual.indexes[dim_name] + ) + + # multiindex case + idx = pd.MultiIndex.from_arrays([list("aab"), list("yzz")], names=["y1", "y2"]) + array = DataArray(np.random.randn(3), {"y": ("x", idx)}, "x") + expected = DataArray(array.values, {"y": idx}, "y") + actual = array.swap_dims({"x": "y"}) + assert_identical(expected, actual) + for dim_name in set().union(expected.indexes.keys(), actual.indexes.keys()): + pd.testing.assert_index_equal( + expected.indexes[dim_name], actual.indexes[dim_name] + ) def test_expand_dims_error(self): array = DataArray( diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 4e51e229b29..5e254c37e44 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2596,7 +2596,7 @@ def test_swap_dims(self): assert_identical(expected, actual) assert isinstance(actual.variables["y"], IndexVariable) assert isinstance(actual.variables["x"], Variable) - assert actual.indexes["y"].equals(pd.Index(list("abc"))) + pd.testing.assert_index_equal(actual.indexes["y"], expected.indexes["y"]) roundtripped = actual.swap_dims({"y": "x"}) assert_identical(original.set_coords("y"), roundtripped) @@ -2612,6 +2612,16 @@ def test_swap_dims(self): actual = original.swap_dims({"x": "u"}) assert_identical(expected, actual) + # handle multiindex case + idx = pd.MultiIndex.from_arrays([list("aab"), list("yzz")], names=["y1", "y2"]) + original = Dataset({"x": [1, 2, 3], "y": ("x", idx), "z": 42}) + expected = Dataset({"z": 42}, {"x": ("y", [1, 2, 3]), "y": idx}) + actual = original.swap_dims({"x": "y"}) + assert_identical(expected, actual) + assert isinstance(actual.variables["y"], IndexVariable) + assert isinstance(actual.variables["x"], Variable) + pd.testing.assert_index_equal(actual.indexes["y"], expected.indexes["y"]) + def test_expand_dims_error(self): original = Dataset( { From b6c8162724b4f828361204a8c0759b8437d80290 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 27 Feb 2020 22:09:46 -0500 Subject: [PATCH 15/75] Add short summary at the top of issue template (#3799) --- .github/ISSUE_TEMPLATE/bug_report.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 31fef19b32a..df5b2304bc3 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -7,6 +7,9 @@ assignees: '' --- + + + #### MCVE Code Sample From fd08842e81576f5ea6b826e31bc2031bcca79de2 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Fri, 28 Feb 2020 08:39:58 -0500 Subject: [PATCH 16/75] xfail tests due to #3751 (#3808) --- xarray/tests/test_cftimeindex.py | 17 +++++++++++++++-- xarray/tests/test_interp.py | 4 ++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index a8ee3c97042..8025766529e 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -450,6 +450,7 @@ def test_sel_date_scalar(da, date_type, index): assert_identical(result, expected) +@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime @pytest.mark.parametrize( "sel_kwargs", @@ -501,7 +502,12 @@ def test_sel_date_scalar_backfill(da, date_type, index, sel_kwargs): [ {"method": "pad", "tolerance": timedelta(days=20)}, {"method": "backfill", "tolerance": timedelta(days=20)}, - {"method": "nearest", "tolerance": timedelta(days=20)}, + pytest.param( + {"method": "nearest", "tolerance": timedelta(days=20)}, + marks=pytest.mark.xfail( + reason="https://github.com/pydata/xarray/issues/3751" + ), + ), ], ) def test_sel_date_scalar_tolerance_raises(da, date_type, sel_kwargs): @@ -509,6 +515,7 @@ def test_sel_date_scalar_tolerance_raises(da, date_type, sel_kwargs): da.sel(time=date_type(1, 5, 1), **sel_kwargs) +@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime @pytest.mark.parametrize( "sel_kwargs", @@ -556,7 +563,12 @@ def test_sel_date_list_backfill(da, date_type, index, sel_kwargs): [ {"method": "pad", "tolerance": timedelta(days=20)}, {"method": "backfill", "tolerance": timedelta(days=20)}, - {"method": "nearest", "tolerance": timedelta(days=20)}, + pytest.param( + {"method": "nearest", "tolerance": timedelta(days=20)}, + marks=pytest.mark.xfail( + reason="https://github.com/pydata/xarray/issues/3751" + ), + ), ], ) def test_sel_date_list_tolerance_raises(da, date_type, sel_kwargs): @@ -591,6 +603,7 @@ def range_args(date_type): ] +@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime def test_indexing_in_series_getitem(series, index, scalar_args, range_args): for arg in scalar_args: diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index e3af8b5873a..c2bec2166c8 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -556,6 +556,7 @@ def test_datetime_single_string(): assert_allclose(actual.drop_vars("time"), expected) +@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime @requires_scipy def test_cftime(): @@ -582,6 +583,7 @@ def test_cftime_type_error(): da.interp(time=times_new) +@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime @requires_scipy def test_cftime_list_of_strings(): @@ -603,6 +605,7 @@ def test_cftime_list_of_strings(): assert_allclose(actual, expected) +@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime @requires_scipy def test_cftime_single_string(): @@ -664,6 +667,7 @@ def test_datetime_interp_noerror(): a.interp(x=xi, time=xi.time) # should not raise an error +@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime def test_3641(): times = xr.cftime_range("0001", periods=3, freq="500Y") From 20e6236f250d1507d22daf06d38b283a83c12e44 Mon Sep 17 00:00:00 2001 From: keewis Date: Fri, 28 Feb 2020 16:16:13 +0100 Subject: [PATCH 17/75] remove datetime tests with pint (#3788) * don't try to wrap datetime objects in pint * make sure we actually decide based on dask.array.Array * update whats-new.rst * directly use isnat * always use numpy's isnat --- doc/whats-new.rst | 2 ++ xarray/core/duck_array_ops.py | 9 +-------- xarray/tests/test_units.py | 10 ++++++++++ 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 32c86563a57..6a5491e34dd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -58,6 +58,8 @@ Internal Changes - Removed the internal ``import_seaborn`` function which handled the deprecation of the ``seaborn.apionly`` entry point (:issue:`3747`). By `Mathias Hauser `_. +- Don't test pint integration in combination with datetime objects. (:issue:`3778`, :pull:`3788`) + By `Justus Magin `_. - Changed test_open_mfdataset_list_attr to only run with dask installed (:issue:`3777`, :pull:`3780`). By `Bruno Pagani `_. diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index bc2db93a0a8..6d0abe9a6fc 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -71,14 +71,7 @@ def fail_on_dask_array_input(values, msg=None, func_name=None): isclose = _dask_or_eager_func("isclose") -if hasattr(np, "isnat") and ( - dask_array is None or hasattr(dask_array_type, "__array_ufunc__") -): - # np.isnat is available since NumPy 1.13, so __array_ufunc__ is always - # supported. - isnat = np.isnat -else: - isnat = _dask_or_eager_func("isnull", eager_module=pd) +isnat = np.isnat isnan = _dask_or_eager_func("isnan") zeros_like = _dask_or_eager_func("zeros_like") diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 75e743c3455..9f63ebb1d42 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -1335,6 +1335,7 @@ def wrapper(cls): "test_index_0d_datetime", "test_index_0d_timedelta64", "test_0d_time_data", + "test_index_0d_not_a_time", "test_datetime64_conversion", "test_timedelta64_conversion", "test_pandas_period_index", @@ -1357,6 +1358,15 @@ def cls(dims, data, *args, **kwargs): dims, unit_registry.Quantity(data, unit_registry.m), *args, **kwargs ) + def example_1d_objects(self): + for data in [ + range(3), + 0.5 * np.arange(3), + 0.5 * np.arange(3, dtype=np.float32), + np.array(["a", "b", "c"], dtype=object), + ]: + yield (self.cls("x", data), data) + @pytest.mark.parametrize( "func", ( From 016a77d626338efc5a59fc50b7d82d153144d086 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sat, 29 Feb 2020 15:47:11 -0500 Subject: [PATCH 18/75] raise on multiple string args to groupby (#3802) --- doc/whats-new.rst | 6 +++++- xarray/core/common.py | 11 +++++++++++ xarray/tests/test_groupby.py | 5 +++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6a5491e34dd..1deb77eecfc 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,7 +30,11 @@ New Features By `Kai Mühlbauer `_. - implement pint support. (:issue:`3594`, :pull:`3706`) By `Justus Magin `_. - +- :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a + `TypeError` on multiple string arguments. Receiving multiple string arguments + often means a user is attempting to pass multiple dimensions to group over + and should instead pass a list. + By `Maximilian Roos `_ Bug fixes ~~~~~~~~~ diff --git a/xarray/core/common.py b/xarray/core/common.py index e908c69dd14..582ae310061 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -660,6 +660,17 @@ def groupby(self, group, squeeze: bool = True, restore_coord_dims: bool = None): core.groupby.DataArrayGroupBy core.groupby.DatasetGroupBy """ + # While we don't generally check the type of every arg, passing + # multiple dimensions as multiple arguments is common enough, and the + # consequences hidden enough (strings evaluate as true) to warrant + # checking here. + # A future version could make squeeze kwarg only, but would face + # backward-compat issues. + if not isinstance(squeeze, bool): + raise TypeError( + f"`squeeze` must be True or False, but {squeeze} was supplied" + ) + return self._groupby_cls( self, group, squeeze=squeeze, restore_coord_dims=restore_coord_dims ) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 97bd31ae050..77558e741be 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -483,6 +483,11 @@ def test_groupby_reduce_dimension_error(array): assert_allclose(array.mean(["x", "z"]), grouped.reduce(np.mean, ["x", "z"])) +def test_groupby_multiple_string_args(array): + with pytest.raises(TypeError): + array.groupby("x", "y") + + def test_groupby_bins_timeseries(): ds = xr.Dataset() ds["time"] = xr.DataArray( From 45d88fc4b2524ecb0c1236cd31767d00f72b0ea1 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 2 Mar 2020 04:41:19 -0500 Subject: [PATCH 19/75] Enable pandas-style rounding of cftime.datetime objects (#3792) * Initial progress on implementing cftime floor/ceil/round * Improve tests and docstrings * Add tests of rounding cftime datetimes via dt accessor * Add documentation * docstring edits * Test rounding raises error with non-fixed frequency * black * typo * A couple cleanup items: - Fix floating point issue in asi8 and add tests - Ensure dask only computes once when using the rounding accessors * black --- doc/weather-climate.rst | 8 ++ doc/whats-new.rst | 4 + xarray/coding/cftimeindex.py | 135 +++++++++++++++++++++++++++++++ xarray/core/accessor_dt.py | 28 ++++--- xarray/tests/test_accessor_dt.py | 104 ++++++++++++++++++++++++ xarray/tests/test_cftimeindex.py | 89 ++++++++++++++++++++ 6 files changed, 359 insertions(+), 9 deletions(-) diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst index 96641c2b97e..9e7c0f1d51d 100644 --- a/doc/weather-climate.rst +++ b/doc/weather-climate.rst @@ -105,6 +105,14 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: da.time.dt.dayofyear da.time.dt.dayofweek +- Rounding of datetimes to fixed frequencies via the ``dt`` accessor: + +.. ipython:: python + + da.time.dt.ceil('3D') + da.time.dt.floor('5D') + da.time.dt.round('2D') + - Group-by operations based on datetime accessor attributes (e.g. by month of the year): diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1deb77eecfc..579719cb8d7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,10 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Added support for :py:class:`pandas.DatetimeIndex`-style rounding of + ``cftime.datetime`` objects directly via a :py:class:`CFTimeIndex` or via the + :py:class:`~core.accessor_dt.DatetimeAccessor`. + By `Spencer Clark `_ - Support new h5netcdf backend keyword `phony_dims` (available from h5netcdf v0.8.0 for :py:class:`~xarray.backends.H5NetCDFStore`. By `Kai Mühlbauer `_. diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 8b440812ca9..99f90430e91 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -528,6 +528,83 @@ def strftime(self, date_format): """ return pd.Index([date.strftime(date_format) for date in self._data]) + @property + def asi8(self): + """Convert to integers with units of microseconds since 1970-01-01.""" + from ..core.resample_cftime import exact_cftime_datetime_difference + + epoch = self.date_type(1970, 1, 1) + return np.array( + [ + _total_microseconds(exact_cftime_datetime_difference(epoch, date)) + for date in self.values + ] + ) + + def _round_via_method(self, freq, method): + """Round dates using a specified method.""" + from .cftime_offsets import CFTIME_TICKS, to_offset + + offset = to_offset(freq) + if not isinstance(offset, CFTIME_TICKS): + raise ValueError(f"{offset} is a non-fixed frequency") + + unit = _total_microseconds(offset.as_timedelta()) + values = self.asi8 + rounded = method(values, unit) + return _cftimeindex_from_i8(rounded, self.date_type, self.name) + + def floor(self, freq): + """Round dates down to fixed frequency. + + Parameters + ---------- + freq : str or CFTimeOffset + The frequency level to round the index to. Must be a fixed + frequency like 'S' (second) not 'ME' (month end). See `frequency + aliases `_ + for a list of possible values. + + Returns + ------- + CFTimeIndex + """ + return self._round_via_method(freq, _floor_int) + + def ceil(self, freq): + """Round dates up to fixed frequency. + + Parameters + ---------- + freq : str or CFTimeOffset + The frequency level to round the index to. Must be a fixed + frequency like 'S' (second) not 'ME' (month end). See `frequency + aliases `_ + for a list of possible values. + + Returns + ------- + CFTimeIndex + """ + return self._round_via_method(freq, _ceil_int) + + def round(self, freq): + """Round dates to a fixed frequency. + + Parameters + ---------- + freq : str or CFTimeOffset + The frequency level to round the index to. Must be a fixed + frequency like 'S' (second) not 'ME' (month end). See `frequency + aliases `_ + for a list of possible values. + + Returns + ------- + CFTimeIndex + """ + return self._round_via_method(freq, _round_to_nearest_half_even) + def _parse_iso8601_without_reso(date_type, datetime_str): date, _ = _parse_iso8601_with_reso(date_type, datetime_str) @@ -554,3 +631,61 @@ def _parse_array_of_cftime_strings(strings, date_type): return np.array( [_parse_iso8601_without_reso(date_type, s) for s in strings.ravel()] ).reshape(strings.shape) + + +def _cftimeindex_from_i8(values, date_type, name): + """Construct a CFTimeIndex from an array of integers. + + Parameters + ---------- + values : np.array + Integers representing microseconds since 1970-01-01. + date_type : cftime.datetime + Type of date for the index. + name : str + Name of the index. + + Returns + ------- + CFTimeIndex + """ + epoch = date_type(1970, 1, 1) + dates = np.array([epoch + timedelta(microseconds=int(value)) for value in values]) + return CFTimeIndex(dates, name=name) + + +def _total_microseconds(delta): + """Compute the total number of microseconds of a datetime.timedelta. + + Parameters + ---------- + delta : datetime.timedelta + Input timedelta. + + Returns + ------- + int + """ + return delta / timedelta(microseconds=1) + + +def _floor_int(values, unit): + """Copied from pandas.""" + return values - np.remainder(values, unit) + + +def _ceil_int(values, unit): + """Copied from pandas.""" + return values + np.remainder(-values, unit) + + +def _round_to_nearest_half_even(values, unit): + """Copied from pandas.""" + if unit % 2: + return _ceil_int(values - unit // 2, unit) + quotient, remainder = np.divmod(values, unit) + mask = np.logical_or( + remainder > (unit // 2), np.logical_and(remainder == (unit // 2), quotient % 2) + ) + quotient[mask] += 1 + return quotient * unit diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index c407371f9f0..de0e332b26c 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -78,20 +78,27 @@ def _get_date_field(values, name, dtype): return access_method(values, name) -def _round_series(values, name, freq): - """Coerce an array of datetime-like values to a pandas Series and - apply requested rounding +def _round_through_series_or_index(values, name, freq): + """Coerce an array of datetime-like values to a pandas Series or xarray + CFTimeIndex and apply requested rounding """ - values_as_series = pd.Series(values.ravel()) - method = getattr(values_as_series.dt, name) + from ..coding.cftimeindex import CFTimeIndex + + if is_np_datetime_like(values.dtype): + values_as_series = pd.Series(values.ravel()) + method = getattr(values_as_series.dt, name) + else: + values_as_cftimeindex = CFTimeIndex(values.ravel()) + method = getattr(values_as_cftimeindex, name) + field_values = method(freq=freq).values return field_values.reshape(values.shape) def _round_field(values, name, freq): - """Indirectly access pandas rounding functions by wrapping data - as a Series and calling through `.dt` attribute. + """Indirectly access rounding functions by wrapping data + as a Series or CFTimeIndex Parameters ---------- @@ -110,9 +117,12 @@ def _round_field(values, name, freq): if isinstance(values, dask_array_type): from dask.array import map_blocks - return map_blocks(_round_series, values, name, freq=freq, dtype=np.datetime64) + dtype = np.datetime64 if is_np_datetime_like(values.dtype) else np.dtype("O") + return map_blocks( + _round_through_series_or_index, values, name, freq=freq, dtype=dtype + ) else: - return _round_series(values, name, freq) + return _round_through_series_or_index(values, name, freq) def _strftime_through_cftimeindex(values, date_format): diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index f178720a6e1..1a8a2732eeb 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -7,6 +7,7 @@ from . import ( assert_array_equal, assert_equal, + assert_identical, raises_regex, requires_cftime, requires_dask, @@ -435,3 +436,106 @@ def test_seasons(cftime_date_type): seasons = xr.DataArray(seasons) assert_array_equal(seasons.values, dates.dt.season.values) + + +@pytest.fixture +def cftime_rounding_dataarray(cftime_date_type): + return xr.DataArray( + [ + [cftime_date_type(1, 1, 1, 1), cftime_date_type(1, 1, 1, 15)], + [cftime_date_type(1, 1, 1, 23), cftime_date_type(1, 1, 2, 1)], + ] + ) + + +@requires_cftime +@requires_dask +@pytest.mark.parametrize("use_dask", [False, True]) +def test_cftime_floor_accessor(cftime_rounding_dataarray, cftime_date_type, use_dask): + import dask.array as da + + freq = "D" + expected = xr.DataArray( + [ + [cftime_date_type(1, 1, 1, 0), cftime_date_type(1, 1, 1, 0)], + [cftime_date_type(1, 1, 1, 0), cftime_date_type(1, 1, 2, 0)], + ], + name="floor", + ) + + if use_dask: + chunks = {"dim_0": 1} + # Currently a compute is done to inspect a single value of the array + # if it is of object dtype to check if it is a cftime.datetime (if not + # we raise an error when using the dt accessor). + with raise_if_dask_computes(max_computes=1): + result = cftime_rounding_dataarray.chunk(chunks).dt.floor(freq) + expected = expected.chunk(chunks) + assert isinstance(result.data, da.Array) + assert result.chunks == expected.chunks + else: + result = cftime_rounding_dataarray.dt.floor(freq) + + assert_identical(result, expected) + + +@requires_cftime +@requires_dask +@pytest.mark.parametrize("use_dask", [False, True]) +def test_cftime_ceil_accessor(cftime_rounding_dataarray, cftime_date_type, use_dask): + import dask.array as da + + freq = "D" + expected = xr.DataArray( + [ + [cftime_date_type(1, 1, 2, 0), cftime_date_type(1, 1, 2, 0)], + [cftime_date_type(1, 1, 2, 0), cftime_date_type(1, 1, 3, 0)], + ], + name="ceil", + ) + + if use_dask: + chunks = {"dim_0": 1} + # Currently a compute is done to inspect a single value of the array + # if it is of object dtype to check if it is a cftime.datetime (if not + # we raise an error when using the dt accessor). + with raise_if_dask_computes(max_computes=1): + result = cftime_rounding_dataarray.chunk(chunks).dt.ceil(freq) + expected = expected.chunk(chunks) + assert isinstance(result.data, da.Array) + assert result.chunks == expected.chunks + else: + result = cftime_rounding_dataarray.dt.ceil(freq) + + assert_identical(result, expected) + + +@requires_cftime +@requires_dask +@pytest.mark.parametrize("use_dask", [False, True]) +def test_cftime_round_accessor(cftime_rounding_dataarray, cftime_date_type, use_dask): + import dask.array as da + + freq = "D" + expected = xr.DataArray( + [ + [cftime_date_type(1, 1, 1, 0), cftime_date_type(1, 1, 2, 0)], + [cftime_date_type(1, 1, 2, 0), cftime_date_type(1, 1, 2, 0)], + ], + name="round", + ) + + if use_dask: + chunks = {"dim_0": 1} + # Currently a compute is done to inspect a single value of the array + # if it is of object dtype to check if it is a cftime.datetime (if not + # we raise an error when using the dt accessor). + with raise_if_dask_computes(max_computes=1): + result = cftime_rounding_dataarray.chunk(chunks).dt.round(freq) + expected = expected.chunk(chunks) + assert isinstance(result.data, da.Array) + assert result.chunks == expected.chunks + else: + result = cftime_rounding_dataarray.dt.round(freq) + + assert_identical(result, expected) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 8025766529e..8d83b833ca3 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -904,3 +904,92 @@ def test_multiindex(): index = xr.cftime_range("2001-01-01", periods=100, calendar="360_day") mindex = pd.MultiIndex.from_arrays([index]) assert mindex.get_loc("2001-01") == slice(0, 30) + + +@requires_cftime +@pytest.mark.parametrize("freq", ["3663S", "33T", "2H"]) +@pytest.mark.parametrize("method", ["floor", "ceil", "round"]) +def test_rounding_methods_against_datetimeindex(freq, method): + expected = pd.date_range("2000-01-02T01:03:51", periods=10, freq="1777S") + expected = getattr(expected, method)(freq) + result = xr.cftime_range("2000-01-02T01:03:51", periods=10, freq="1777S") + result = getattr(result, method)(freq).to_datetimeindex() + assert result.equals(expected) + + +@requires_cftime +@pytest.mark.parametrize("method", ["floor", "ceil", "round"]) +def test_rounding_methods_invalid_freq(method): + index = xr.cftime_range("2000-01-02T01:03:51", periods=10, freq="1777S") + with pytest.raises(ValueError, match="fixed"): + getattr(index, method)("MS") + + +@pytest.fixture +def rounding_index(date_type): + return xr.CFTimeIndex( + [ + date_type(1, 1, 1, 1, 59, 59, 999512), + date_type(1, 1, 1, 3, 0, 1, 500001), + date_type(1, 1, 1, 7, 0, 6, 499999), + ] + ) + + +@requires_cftime +def test_ceil(rounding_index, date_type): + result = rounding_index.ceil("S") + expected = xr.CFTimeIndex( + [ + date_type(1, 1, 1, 2, 0, 0, 0), + date_type(1, 1, 1, 3, 0, 2, 0), + date_type(1, 1, 1, 7, 0, 7, 0), + ] + ) + assert result.equals(expected) + + +@requires_cftime +def test_floor(rounding_index, date_type): + result = rounding_index.floor("S") + expected = xr.CFTimeIndex( + [ + date_type(1, 1, 1, 1, 59, 59, 0), + date_type(1, 1, 1, 3, 0, 1, 0), + date_type(1, 1, 1, 7, 0, 6, 0), + ] + ) + assert result.equals(expected) + + +@requires_cftime +def test_round(rounding_index, date_type): + result = rounding_index.round("S") + expected = xr.CFTimeIndex( + [ + date_type(1, 1, 1, 2, 0, 0, 0), + date_type(1, 1, 1, 3, 0, 2, 0), + date_type(1, 1, 1, 7, 0, 6, 0), + ] + ) + assert result.equals(expected) + + +@requires_cftime +def test_asi8(date_type): + index = xr.CFTimeIndex([date_type(1970, 1, 1), date_type(1970, 1, 2)]) + result = index.asi8 + expected = 1000000 * 86400 * np.array([0, 1]) + np.testing.assert_array_equal(result, expected) + + +@requires_cftime +def test_asi8_distant_date(): + """Test that asi8 conversion is truly exact.""" + import cftime + + date_type = cftime.DatetimeProlepticGregorian + index = xr.CFTimeIndex([date_type(10731, 4, 22, 3, 25, 45, 123456)]) + result = index.asi8 + expected = np.array([1000000 * 86400 * 400 * 8000 + 12345 * 1000000 + 123456]) + np.testing.assert_array_equal(result, expected) From 8512b7bf498c0c300f146447c0b05545842e9404 Mon Sep 17 00:00:00 2001 From: niowniow Date: Mon, 2 Mar 2020 13:19:16 +0100 Subject: [PATCH 20/75] Fix zarr append with groups (#3610) * bug fixed and added zarr group tests * black . * added info to whats-new Co-authored-by: Ryan Abernathey --- doc/whats-new.rst | 2 ++ xarray/backends/zarr.py | 4 ++-- xarray/tests/test_backends.py | 35 ++++++++++++++++++++++++----------- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 579719cb8d7..2cc92c78ac8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -56,6 +56,8 @@ Bug fixes - xarray now respects the over, under and bad colors if set on a provided colormap. (:issue:`3590`, :pull:`3601`) By `johnomotani `_. +- Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group` + simultaneously. (:issue:`3170`). By `Matthias Meyer `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 763769dac74..2469a31a3d9 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -373,7 +373,7 @@ def store( if len(existing_variables) > 0: # there are variables to append # their encoding must be the same as in the store - ds = open_zarr(self.ds.store, chunks=None) + ds = open_zarr(self.ds.store, group=self.ds.path, chunks=None) variables_with_encoding = {} for vn in existing_variables: variables_with_encoding[vn] = variables[vn].copy(deep=False) @@ -487,7 +487,7 @@ def open_zarr( directory in file system where a Zarr DirectoryStore has been stored. synchronizer : object, optional Array synchronizer provided to zarr - group : str, obtional + group : str, optional Group path. (a.k.a. `path` in zarr terminology.) chunks : int or dict or tuple or {None, 'auto'}, optional Chunk sizes along each dimension, e.g., ``5`` or diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index b7ba70ef6c4..015d2cbfdeb 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1729,39 +1729,52 @@ def test_hidden_zarr_keys(self): pass @pytest.mark.skipif(LooseVersion(dask_version) < "2.4", reason="dask GH5334") - def test_write_persistence_modes(self): + @pytest.mark.parametrize("group", [None, "group1"]) + def test_write_persistence_modes(self, group): original = create_test_data() # overwrite mode - with self.roundtrip(original, save_kwargs={"mode": "w"}) as actual: + with self.roundtrip( + original, + save_kwargs={"mode": "w", "group": group}, + open_kwargs={"group": group}, + ) as actual: assert_identical(original, actual) # don't overwrite mode - with self.roundtrip(original, save_kwargs={"mode": "w-"}) as actual: + with self.roundtrip( + original, + save_kwargs={"mode": "w-", "group": group}, + open_kwargs={"group": group}, + ) as actual: assert_identical(original, actual) # make sure overwriting works as expected with self.create_zarr_target() as store: self.save(original, store) # should overwrite with no error - self.save(original, store, mode="w") - with self.open(store) as actual: + self.save(original, store, mode="w", group=group) + with self.open(store, group=group) as actual: assert_identical(original, actual) with pytest.raises(ValueError): self.save(original, store, mode="w-") # check append mode for normal write - with self.roundtrip(original, save_kwargs={"mode": "a"}) as actual: + with self.roundtrip( + original, + save_kwargs={"mode": "a", "group": group}, + open_kwargs={"group": group}, + ) as actual: assert_identical(original, actual) - ds, ds_to_append, _ = create_append_test_data() - # check append mode for append write + ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") - ds_to_append.to_zarr(store_target, append_dim="time") + ds.to_zarr(store_target, mode="w", group=group) + ds_to_append.to_zarr(store_target, append_dim="time", group=group) original = xr.concat([ds, ds_to_append], dim="time") - assert_identical(original, xr.open_zarr(store_target)) + actual = xr.open_zarr(store_target, group=group) + assert_identical(original, actual) def test_compressor_encoding(self): original = create_test_data() From b155853ff6e17172b1b6b16c0da31522718e9409 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 2 Mar 2020 18:01:43 -0500 Subject: [PATCH 21/75] Turn on html repr by default (#3812) * Turn on html repr by default * Add By line to release docs * Change tests to expect html as the default display_style --- doc/whats-new.rst | 5 +++++ xarray/core/options.py | 2 +- xarray/tests/test_options.py | 22 ++++++++++++---------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2cc92c78ac8..151ba917cce 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,6 +39,11 @@ New Features often means a user is attempting to pass multiple dimensions to group over and should instead pass a list. By `Maximilian Roos `_ +- The new ``Dataset._repr_html_`` and ``DataArray._repr_html_`` (introduced + in 0.14.1) is now on by default. To disable, use + ``xarray.set_options(display_style="text")``. + By `Julia Signell `_. + Bug fixes ~~~~~~~~~ diff --git a/xarray/core/options.py b/xarray/core/options.py index 72f9ad8e1fa..15d05159d6d 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -20,7 +20,7 @@ CMAP_SEQUENTIAL: "viridis", CMAP_DIVERGENT: "RdBu_r", KEEP_ATTRS: "default", - DISPLAY_STYLE: "text", + DISPLAY_STYLE: "html", } _JOIN_OPTIONS = frozenset(["inner", "outer", "left", "right", "exact"]) diff --git a/xarray/tests/test_options.py b/xarray/tests/test_options.py index f155acbf494..19f74476ced 100644 --- a/xarray/tests/test_options.py +++ b/xarray/tests/test_options.py @@ -68,12 +68,12 @@ def test_nested_options(): def test_display_style(): - original = "text" + original = "html" assert OPTIONS["display_style"] == original with pytest.raises(ValueError): xarray.set_options(display_style="invalid_str") - with xarray.set_options(display_style="html"): - assert OPTIONS["display_style"] == "html" + with xarray.set_options(display_style="text"): + assert OPTIONS["display_style"] == "text" assert OPTIONS["display_style"] == original @@ -177,10 +177,11 @@ def test_merge_attr_retention(self): def test_display_style_text(self): ds = create_test_dataset_attrs() - text = ds._repr_html_() - assert text.startswith("
")
-        assert "'nested'" in text
-        assert "<xarray.Dataset>" in text
+        with xarray.set_options(display_style="text"):
+            text = ds._repr_html_()
+            assert text.startswith("
")
+            assert "'nested'" in text
+            assert "<xarray.Dataset>" in text
 
     def test_display_style_html(self):
         ds = create_test_dataset_attrs()
@@ -191,9 +192,10 @@ def test_display_style_html(self):
 
     def test_display_dataarray_style_text(self):
         da = create_test_dataarray_attrs()
-        text = da._repr_html_()
-        assert text.startswith("
")
-        assert "<xarray.DataArray 'var1'" in text
+        with xarray.set_options(display_style="text"):
+            text = da._repr_html_()
+            assert text.startswith("
")
+            assert "<xarray.DataArray 'var1'" in text
 
     def test_display_dataarray_style_html(self):
         da = create_test_dataarray_attrs()

From 1c5e1cd022a0ff91275c50a50d1c6f88a7abff7d Mon Sep 17 00:00:00 2001
From: Andrew Thomas 
Date: Mon, 2 Mar 2020 18:02:55 -0500
Subject: [PATCH 22/75] Coarsen keep attrs 3376 (#3801)

* Add test of DataWithCoords.coarsen() for #3376

* Add test of Variable.coarsen() for #3376

* Add keep_attrs kwarg to DataWithCoords.coarsen() for #3376

* Style and spelling fixes (#3376)

* Fix test_coarsen_keep_attrs by removing self from input

* Pass keep_attrs through to _coarsen_cls and _rolling_cls returns (#3376)

* Move keyword from coarsen to mean in test_coarsen_keep_attrs

* Start handling keep_attrs in rolling class constructors (#3376)

* Update Coarsen constructor and DatasetCoarsen class method (GH3376)

Assign keep_attrs keyword value to Coarsen objects in constructor
Add conditional inside _reduce_method.wrapped_func branching on self.keep_attrs and pass back to returned Dataset

* Incorporate code review from @max-sixty

* Fix Dataset.coarsen and Variable.coarsen for GH3376

Handle global keep_attrs setting inside Variable._coarsen_reshape

Pass attrs through consistently inside DatasetCoarsen._reduce_method

Don't pass Variable.coarsen a keyword argument it doesn't expect inside DataArrayCoarsen._reduce_method

* Update tests for GH3376

* Incorporate review changes to test_dataset for GH3376

Remove commented-out test from test_coarsen_keep_attrs

Add test_rolling_keep_attrs

* Change Rolling._dataset_implementation for GH3376

Return a Dataset object that results in test_rolling_keep_attrs Passing

* style fixes

* Remove duplicate variable assignment and document change (GH3776)
---
 doc/whats-new.rst             |  5 +++
 xarray/core/common.py         | 29 +++++++++++++--
 xarray/core/rolling.py        | 67 ++++++++++++++++++++++++++++-------
 xarray/core/variable.py       |  3 ++
 xarray/tests/test_dataset.py  | 56 +++++++++++++++++++++++++++++
 xarray/tests/test_variable.py | 22 +++++++++++-
 6 files changed, 165 insertions(+), 17 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 151ba917cce..089cbbe1be3 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -61,6 +61,11 @@ Bug fixes
 - xarray now respects the over, under and bad colors if set on a provided colormap.
   (:issue:`3590`, :pull:`3601`)
   By `johnomotani `_.
+- :py:func:`coarsen` now respects ``xr.set_options(keep_attrs=True)``
+  to preserve attributes. :py:meth:`Dataset.coarsen` accepts a keyword
+  argument ``keep_attrs`` to change this setting. (:issue:`3376`,
+  :pull:`3801`) By `Andrew Thomas `_.
+  
 - Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group`
   simultaneously. (:issue:`3170`). By `Matthias Meyer `_.
 
diff --git a/xarray/core/common.py b/xarray/core/common.py
index 582ae310061..e3739d6d039 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -753,6 +753,7 @@ def rolling(
         dim: Mapping[Hashable, int] = None,
         min_periods: int = None,
         center: bool = False,
+        keep_attrs: bool = None,
         **window_kwargs: int,
     ):
         """
@@ -769,6 +770,10 @@ def rolling(
             setting min_periods equal to the size of the window.
         center : boolean, default False
             Set the labels at the center of the window.
+        keep_attrs : bool, optional
+            If True, the object's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
         **window_kwargs : optional
             The keyword arguments form of ``dim``.
             One of dim or window_kwargs must be provided.
@@ -810,8 +815,13 @@ def rolling(
         core.rolling.DataArrayRolling
         core.rolling.DatasetRolling
         """
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
+
         dim = either_dict_or_kwargs(dim, window_kwargs, "rolling")
-        return self._rolling_cls(self, dim, min_periods=min_periods, center=center)
+        return self._rolling_cls(
+            self, dim, min_periods=min_periods, center=center, keep_attrs=keep_attrs
+        )
 
     def rolling_exp(
         self,
@@ -859,6 +869,7 @@ def coarsen(
         boundary: str = "exact",
         side: Union[str, Mapping[Hashable, str]] = "left",
         coord_func: str = "mean",
+        keep_attrs: bool = None,
         **window_kwargs: int,
     ):
         """
@@ -879,8 +890,12 @@ def coarsen(
             multiple of the window size. If 'trim', the excess entries are
             dropped. If 'pad', NA will be padded.
         side : 'left' or 'right' or mapping from dimension to 'left' or 'right'
-        coord_func : function (name) that is applied to the coordintes,
+        coord_func : function (name) that is applied to the coordinates,
             or a mapping from coordinate name to function (name).
+        keep_attrs : bool, optional
+            If True, the object's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
 
         Returns
         -------
@@ -915,9 +930,17 @@ def coarsen(
         core.rolling.DataArrayCoarsen
         core.rolling.DatasetCoarsen
         """
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
+
         dim = either_dict_or_kwargs(dim, window_kwargs, "coarsen")
         return self._coarsen_cls(
-            self, dim, boundary=boundary, side=side, coord_func=coord_func
+            self,
+            dim,
+            boundary=boundary,
+            side=side,
+            coord_func=coord_func,
+            keep_attrs=keep_attrs,
         )
 
     def resample(
diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py
index ea6d72b2e03..61178cfb15f 100644
--- a/xarray/core/rolling.py
+++ b/xarray/core/rolling.py
@@ -7,6 +7,7 @@
 from . import dtypes, duck_array_ops, utils
 from .dask_array_ops import dask_rolling_wrapper
 from .ops import inject_reduce_methods
+from .options import _get_keep_attrs
 from .pycompat import dask_array_type
 
 try:
@@ -42,10 +43,10 @@ class Rolling:
     DataArray.rolling
     """
 
-    __slots__ = ("obj", "window", "min_periods", "center", "dim")
-    _attributes = ("window", "min_periods", "center", "dim")
+    __slots__ = ("obj", "window", "min_periods", "center", "dim", "keep_attrs")
+    _attributes = ("window", "min_periods", "center", "dim", "keep_attrs")
 
-    def __init__(self, obj, windows, min_periods=None, center=False):
+    def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None):
         """
         Moving window object.
 
@@ -65,6 +66,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
             setting min_periods equal to the size of the window.
         center : boolean, default False
             Set the labels at the center of the window.
+        keep_attrs : bool, optional
+            If True, the object's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
 
         Returns
         -------
@@ -89,6 +94,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
         self.center = center
         self.dim = dim
 
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
+        self.keep_attrs = keep_attrs
+
     @property
     def _min_periods(self):
         return self.min_periods if self.min_periods is not None else self.window
@@ -143,7 +152,7 @@ def count(self):
 class DataArrayRolling(Rolling):
     __slots__ = ("window_labels",)
 
-    def __init__(self, obj, windows, min_periods=None, center=False):
+    def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None):
         """
         Moving window object for DataArray.
         You should use DataArray.rolling() method to construct this object
@@ -165,6 +174,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
             setting min_periods equal to the size of the window.
         center : boolean, default False
             Set the labels at the center of the window.
+        keep_attrs : bool, optional
+            If True, the object's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
 
         Returns
         -------
@@ -177,7 +190,11 @@ def __init__(self, obj, windows, min_periods=None, center=False):
         Dataset.rolling
         Dataset.groupby
         """
-        super().__init__(obj, windows, min_periods=min_periods, center=center)
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
+        super().__init__(
+            obj, windows, min_periods=min_periods, center=center, keep_attrs=keep_attrs
+        )
 
         self.window_labels = self.obj[self.dim]
 
@@ -374,7 +391,7 @@ def _numpy_or_bottleneck_reduce(
 class DatasetRolling(Rolling):
     __slots__ = ("rollings",)
 
-    def __init__(self, obj, windows, min_periods=None, center=False):
+    def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None):
         """
         Moving window object for Dataset.
         You should use Dataset.rolling() method to construct this object
@@ -396,6 +413,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
             setting min_periods equal to the size of the window.
         center : boolean, default False
             Set the labels at the center of the window.
+        keep_attrs : bool, optional
+            If True, the object's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
 
         Returns
         -------
@@ -408,7 +429,7 @@ def __init__(self, obj, windows, min_periods=None, center=False):
         Dataset.groupby
         DataArray.groupby
         """
-        super().__init__(obj, windows, min_periods, center)
+        super().__init__(obj, windows, min_periods, center, keep_attrs)
         if self.dim not in self.obj.dims:
             raise KeyError(self.dim)
         # Keep each Rolling object as a dictionary
@@ -416,7 +437,9 @@ def __init__(self, obj, windows, min_periods=None, center=False):
         for key, da in self.obj.data_vars.items():
             # keeps rollings only for the dataset depending on slf.dim
             if self.dim in da.dims:
-                self.rollings[key] = DataArrayRolling(da, windows, min_periods, center)
+                self.rollings[key] = DataArrayRolling(
+                    da, windows, min_periods, center, keep_attrs
+                )
 
     def _dataset_implementation(self, func, **kwargs):
         from .dataset import Dataset
@@ -427,7 +450,8 @@ def _dataset_implementation(self, func, **kwargs):
                 reduced[key] = func(self.rollings[key], **kwargs)
             else:
                 reduced[key] = self.obj[key]
-        return Dataset(reduced, coords=self.obj.coords)
+        attrs = self.obj.attrs if self.keep_attrs else {}
+        return Dataset(reduced, coords=self.obj.coords, attrs=attrs)
 
     def reduce(self, func, **kwargs):
         """Reduce the items in this group by applying `func` along some
@@ -466,7 +490,7 @@ def _numpy_or_bottleneck_reduce(
             **kwargs,
         )
 
-    def construct(self, window_dim, stride=1, fill_value=dtypes.NA):
+    def construct(self, window_dim, stride=1, fill_value=dtypes.NA, keep_attrs=None):
         """
         Convert this rolling object to xr.Dataset,
         where the window dimension is stacked as a new dimension
@@ -487,6 +511,9 @@ def construct(self, window_dim, stride=1, fill_value=dtypes.NA):
 
         from .dataset import Dataset
 
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=True)
+
         dataset = {}
         for key, da in self.obj.data_vars.items():
             if self.dim in da.dims:
@@ -509,10 +536,18 @@ class Coarsen:
     DataArray.coarsen
     """
 
-    __slots__ = ("obj", "boundary", "coord_func", "windows", "side", "trim_excess")
+    __slots__ = (
+        "obj",
+        "boundary",
+        "coord_func",
+        "windows",
+        "side",
+        "trim_excess",
+        "keep_attrs",
+    )
     _attributes = ("windows", "side", "trim_excess")
 
-    def __init__(self, obj, windows, boundary, side, coord_func):
+    def __init__(self, obj, windows, boundary, side, coord_func, keep_attrs):
         """
         Moving window object.
 
@@ -541,6 +576,7 @@ def __init__(self, obj, windows, boundary, side, coord_func):
         self.windows = windows
         self.side = side
         self.boundary = boundary
+        self.keep_attrs = keep_attrs
 
         absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims]
         if absent_dims:
@@ -626,6 +662,11 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
         def wrapped_func(self, **kwargs):
             from .dataset import Dataset
 
+            if self.keep_attrs:
+                attrs = self.obj.attrs
+            else:
+                attrs = {}
+
             reduced = {}
             for key, da in self.obj.data_vars.items():
                 reduced[key] = da.variable.coarsen(
@@ -644,7 +685,7 @@ def wrapped_func(self, **kwargs):
                     )
                 else:
                     coords[c] = v.variable
-            return Dataset(reduced, coords=coords)
+            return Dataset(reduced, coords=coords, attrs=attrs)
 
         return wrapped_func
 
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index daa8678157b..62f9fde6a2e 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -1949,6 +1949,9 @@ def _coarsen_reshape(self, windows, boundary, side):
             else:
                 shape.append(variable.shape[i])
 
+        keep_attrs = _get_keep_attrs(default=False)
+        variable.attrs = variable._attrs if keep_attrs else {}
+
         return variable.data.reshape(shape), tuple(axes)
 
     @property
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 5e254c37e44..7bcf9379ae8 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -5664,6 +5664,62 @@ def test_coarsen_coords_cftime():
     np.testing.assert_array_equal(actual.time, expected_times)
 
 
+def test_coarsen_keep_attrs():
+    _attrs = {"units": "test", "long_name": "testing"}
+
+    var1 = np.linspace(10, 15, 100)
+    var2 = np.linspace(5, 10, 100)
+    coords = np.linspace(1, 10, 100)
+
+    ds = Dataset(
+        data_vars={"var1": ("coord", var1), "var2": ("coord", var2)},
+        coords={"coord": coords},
+        attrs=_attrs,
+    )
+
+    # Test dropped attrs
+    dat = ds.coarsen(coord=5).mean()
+    assert dat.attrs == {}
+
+    # Test kept attrs using dataset keyword
+    dat = ds.coarsen(coord=5, keep_attrs=True).mean()
+    assert dat.attrs == _attrs
+
+    # Test kept attrs using global option
+    with set_options(keep_attrs=True):
+        dat = ds.coarsen(coord=5).mean()
+    assert dat.attrs == _attrs
+
+
+def test_rolling_keep_attrs():
+    _attrs = {"units": "test", "long_name": "testing"}
+
+    var1 = np.linspace(10, 15, 100)
+    var2 = np.linspace(5, 10, 100)
+    coords = np.linspace(1, 10, 100)
+
+    ds = Dataset(
+        data_vars={"var1": ("coord", var1), "var2": ("coord", var2)},
+        coords={"coord": coords},
+        attrs=_attrs,
+    )
+
+    # Test dropped attrs
+    dat = ds.rolling(dim={"coord": 5}, min_periods=None, center=False).mean()
+    assert dat.attrs == {}
+
+    # Test kept attrs using dataset keyword
+    dat = ds.rolling(
+        dim={"coord": 5}, min_periods=None, center=False, keep_attrs=True
+    ).mean()
+    assert dat.attrs == _attrs
+
+    # Test kept attrs using global option
+    with set_options(keep_attrs=True):
+        dat = ds.rolling(dim={"coord": 5}, min_periods=None, center=False).mean()
+    assert dat.attrs == _attrs
+
+
 def test_rolling_properties(ds):
     # catching invalid args
     with pytest.raises(ValueError, match="exactly one dim/window should"):
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index 62fde920b1e..c86ecd0121f 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -9,7 +9,7 @@
 import pytz
 
 from xarray import Coordinate, Dataset, IndexVariable, Variable, set_options
-from xarray.core import dtypes, indexing
+from xarray.core import dtypes, duck_array_ops, indexing
 from xarray.core.common import full_like, ones_like, zeros_like
 from xarray.core.indexing import (
     BasicIndexer,
@@ -1879,6 +1879,26 @@ def test_coarsen_2d(self):
         expected = self.cls(("x", "y"), [[10, 18], [42, 35]])
         assert_equal(actual, expected)
 
+    # perhaps @pytest.mark.parametrize("operation", [f for f in duck_array_ops])
+    def test_coarsen_keep_attrs(self, operation="mean"):
+        _attrs = {"units": "test", "long_name": "testing"}
+
+        test_func = getattr(duck_array_ops, operation, None)
+
+        # Test dropped attrs
+        with set_options(keep_attrs=False):
+            new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen(
+                windows={"coord": 1}, func=test_func, boundary="exact", side="left"
+            )
+        assert new.attrs == {}
+
+        # Test kept attrs
+        with set_options(keep_attrs=True):
+            new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen(
+                windows={"coord": 1}, func=test_func, boundary="exact", side="left"
+            )
+        assert new.attrs == _attrs
+
 
 @requires_dask
 class TestVariableWithDask(VariableSubclassobjects):

From a333a5c73db078fa34324475f9d74d71d74d4659 Mon Sep 17 00:00:00 2001
From: Sander 
Date: Tue, 3 Mar 2020 01:38:04 +0100
Subject: [PATCH 23/75] =?UTF-8?q?removed=20mention=20that=20'dims'=20are?=
 =?UTF-8?q?=20inferred=20from=20'coords'-dict=20when=20omit=E2=80=A6=20(#3?=
 =?UTF-8?q?821)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* removed mention that 'dims' are inferred from 'coords'-dict when omitted in DataArray (fixes #3820)

* added summary of PR #3821 to whats-new
---
 doc/whats-new.rst        | 3 +++
 xarray/core/dataarray.py | 3 +--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 089cbbe1be3..4a6083522ba 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -71,6 +71,9 @@ Bug fixes
 
 Documentation
 ~~~~~~~~~~~~~
+- Fix documentation of :py:class:`DataArray` removing the deprecated mention
+  that when omitted, `dims` are inferred from a `coords`-dict. (:pull:`3821`)
+  By `Sander van Rijn `_.
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 062cc6342df..b1da0ca1448 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -304,8 +304,7 @@ def __init__(
             Name(s) of the data dimension(s). Must be either a hashable (only
             for 1D data) or a sequence of hashables with length equal to the
             number of dimensions. If this argument is omitted, dimension names
-            are taken from ``coords`` (if possible) and otherwise default to
-            ``['dim_0', ... 'dim_n']``.
+            default to ``['dim_0', ... 'dim_n']``.
         name : str or None, optional
             Name of this array.
         attrs : dict_like or None, optional

From 01462d65c7213e5e1cddf36492c6a34a7e53ce55 Mon Sep 17 00:00:00 2001
From: dcherian 
Date: Wed, 4 Mar 2020 07:05:14 +0530
Subject: [PATCH 24/75] Use stable RTD image.

---
 readthedocs.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readthedocs.yml b/readthedocs.yml
index 88aee82a44b..173d61ec6f3 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -1,7 +1,7 @@
 version: 2
 
 build:
-    image: latest
+    image: stable
 
 conda:
     environment: ci/requirements/doc.yml

From b2f06cb9d36a2520fa4f3aee6c38cae9972e702e Mon Sep 17 00:00:00 2001
From: Deepak Cherian 
Date: Thu, 5 Mar 2020 18:26:11 +0530
Subject: [PATCH 25/75] DOC: Add rioxarray and other external examples (#3757)

* DOC: Add rioxarray link to examples and add example in file IO

* Add more external examples.

* fix spacing for ipython docs

* minor fixes

* fix bad edit

Co-authored-by: Deepak Cherian 
---
 doc/examples.rst |  9 +++++++++
 doc/io.rst       | 46 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/doc/examples.rst b/doc/examples.rst
index 3067ca824be..805395808e0 100644
--- a/doc/examples.rst
+++ b/doc/examples.rst
@@ -17,3 +17,12 @@ Using apply_ufunc
     :maxdepth: 2
 
     examples/apply_ufunc_vectorize_1d
+
+External Examples
+-----------------
+.. toctree::
+    :maxdepth: 2
+
+    Managing raster data with rioxarray 
+    Xarray with dask 
+    Xarray and dask on the cloud with Pangeo 
diff --git a/doc/io.rst b/doc/io.rst
index e910943236f..6064aa3568a 100644
--- a/doc/io.rst
+++ b/doc/io.rst
@@ -759,9 +759,53 @@ for an example of how to convert these to longitudes and latitudes.
     considered as being experimental. Please report any bug you may find
     on xarray's github repository.
 
+
+Additionally, you can use `rioxarray`_ for reading in GeoTiff, netCDF or other
+GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIFF.
+`rioxarray`_ can also handle geospatial related tasks such as re-projecting and clipping.
+
+.. ipython::
+    :verbatim:
+
+    In [1]: import rioxarray
+
+    In [2]: rds = rioxarray.open_rasterio('RGB.byte.tif')
+
+    In [3]: rds
+    Out[3]:
+    
+    [1703814 values with dtype=uint8]
+    Coordinates:
+      * band         (band) int64 1 2 3
+      * y            (y) float64 2.827e+06 2.826e+06 ... 2.612e+06 2.612e+06
+      * x            (x) float64 1.021e+05 1.024e+05 ... 3.389e+05 3.392e+05
+        spatial_ref  int64 0
+    Attributes:
+        STATISTICS_MAXIMUM:  255
+        STATISTICS_MEAN:     29.947726688477
+        STATISTICS_MINIMUM:  0
+        STATISTICS_STDDEV:   52.340921626611
+        transform:           (300.0379266750948, 0.0, 101985.0, 0.0, -300.0417827...
+        _FillValue:          0.0
+        scale_factor:        1.0
+        add_offset:          0.0
+        grid_mapping:        spatial_ref
+
+    In [4]: rds.rio.crs
+    Out[4]: CRS.from_epsg(32618)
+
+    In [5]: rds4326 = rio.rio.reproject("epsg:4326")
+
+    In [6]: rds4326.rio.crs
+    Out[6]: CRS.from_epsg(4326)
+
+    In [7]: rds4326.rio.to_raster('RGB.byte.4326.tif')
+
+
 .. _rasterio: https://rasterio.readthedocs.io/en/latest/
+.. _rioxarray: https://corteva.github.io/rioxarray/stable/
 .. _test files: https://github.com/mapbox/rasterio/blob/master/tests/data/RGB.byte.tif
-.. _pyproj: https://github.com/jswhit/pyproj
+.. _pyproj: https://github.com/pyproj4/pyproj
 
 .. _io.zarr:
 

From 8fb47f282555fd1430b9621abedbed82cdac7d4a Mon Sep 17 00:00:00 2001
From: Deepak Cherian 
Date: Thu, 5 Mar 2020 18:26:54 +0530
Subject: [PATCH 26/75] Add note on diff's n differing from pandas (#3822)

* note that n != periods in diff docstring

* better wording based on feedback
---
 xarray/core/dataarray.py | 6 ++++++
 xarray/core/dataset.py   | 5 +++++
 2 files changed, 11 insertions(+)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index b1da0ca1448..4e80ef222c2 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -2692,6 +2692,12 @@ def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> "DataArr
         difference : same type as caller
             The n-th order finite difference of this object.
 
+        .. note::
+
+            `n` matches numpy's behavior and is different from pandas' first
+            argument named `periods`.
+
+
         Examples
         --------
         >>> arr = xr.DataArray([5, 5, 6, 6], [[1, 2, 3, 4]], ['x'])
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 7252dd2f3df..52940e98b27 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4879,6 +4879,11 @@ def diff(self, dim, n=1, label="upper"):
         difference : same type as caller
             The n-th order finite difference of this object.
 
+        .. note::
+
+            `n` matches numpy's behavior and is different from pandas' first
+            argument named `periods`.
+
         Examples
         --------
         >>> ds = xr.Dataset({'foo': ('x', [5, 5, 6, 6])})

From 69723ebf34cb9c37917b44b2ac1ab92ae553fecc Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Thu, 5 Mar 2020 22:36:07 -0500
Subject: [PATCH 27/75] Label "Installed Versions" item in Issue template
 (#3832)

* Label Installed Versions details in GH Issue template

* Update bug_report.md
---
 .github/ISSUE_TEMPLATE/bug_report.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index df5b2304bc3..83c3aea53a8 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -25,8 +25,9 @@ assignees: ''
 
 
 
-#### Output of ``xr.show_versions()``
-
+#### Versions + +
Output of `xr.show_versions()`
From 00e5b367c483656c67c63c47a2a9e07112bbc885 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 6 Mar 2020 08:57:17 -0500 Subject: [PATCH 28/75] update macos image (#3838) * update macos image * whatsnew --- azure-pipelines.yml | 2 +- doc/whats-new.rst | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5789161c966..ce95fca1ba1 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -38,7 +38,7 @@ jobs: py38: conda_env: py38 pool: - vmImage: 'macOS-10.13' + vmImage: 'macOS-10.15' steps: - template: ci/azure/unit-tests.yml diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4a6083522ba..99ee66fad67 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -86,6 +86,8 @@ Internal Changes - Changed test_open_mfdataset_list_attr to only run with dask installed (:issue:`3777`, :pull:`3780`). By `Bruno Pagani `_. +- Updated Azure CI MacOS image, given pending removal. + By `Maximilian Roos `_ .. _whats-new.0.15.0: From 9fbb4170c1732fe2f3cd57b2b96d770a5bac50ed Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 6 Mar 2020 23:38:11 -0500 Subject: [PATCH 29/75] Allow `where` to receive a callable (#3827) * allow where to receive a callable * Update xarray/core/common.py Co-Authored-By: keewis * docstring * whatsnew Co-authored-by: keewis --- doc/whats-new.rst | 4 +++- xarray/core/common.py | 22 ++++++++++++++++++++++ xarray/tests/test_dataarray.py | 6 ++++++ xarray/tests/test_dataset.py | 9 +++++++++ 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 99ee66fad67..24120270444 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -43,7 +43,9 @@ New Features in 0.14.1) is now on by default. To disable, use ``xarray.set_options(display_style="text")``. By `Julia Signell `_. - +- :py:meth:`Dataset.where` and :py:meth:`DataArray.where` accept a lambda as a + first argument, which is then called on the input; replicating pandas' behavior. + By `Maximilian Roos `_ Bug fixes ~~~~~~~~~ diff --git a/xarray/core/common.py b/xarray/core/common.py index e3739d6d039..c80cb24c5b5 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1119,6 +1119,15 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): >>> import numpy as np >>> a = xr.DataArray(np.arange(25).reshape(5, 5), dims=('x', 'y')) + >>> a + + array([[ 0, 1, 2, 3, 4], + [ 5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19], + [20, 21, 22, 23, 24]]) + Dimensions without coordinates: x, y + >>> a.where(a.x + a.y < 4) array([[ 0., 1., 2., 3., nan], @@ -1127,6 +1136,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): [ 15., nan, nan, nan, nan], [ nan, nan, nan, nan, nan]]) Dimensions without coordinates: x, y + >>> a.where(a.x + a.y < 5, -1) array([[ 0, 1, 2, 3, 4], @@ -1135,6 +1145,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): [15, 16, -1, -1, -1], [20, -1, -1, -1, -1]]) Dimensions without coordinates: x, y + >>> a.where(a.x + a.y < 4, drop=True) array([[ 0., 1., 2., 3.], @@ -1143,6 +1154,14 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): [ 15., nan, nan, nan]]) Dimensions without coordinates: x, y + >>> a.where(lambda x: x.x + x.y < 4, drop=True) + + array([[ 0., 1., 2., 3.], + [ 5., 6., 7., nan], + [ 10., 11., nan, nan], + [ 15., nan, nan, nan]]) + Dimensions without coordinates: x, y + See also -------- numpy.where : corresponding numpy function @@ -1152,6 +1171,9 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): from .dataarray import DataArray from .dataset import Dataset + if callable(cond): + cond = cond(self) + if drop: if other is not dtypes.NA: raise ValueError("cannot set `other` if drop=True") diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 0a622d279ba..b8a9c5edaf9 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2215,6 +2215,12 @@ def test_where(self): actual = arr.where(arr.x < 2, drop=True) assert_identical(actual, expected) + def test_where_lambda(self): + arr = DataArray(np.arange(4), dims="y") + expected = arr.sel(y=slice(2)) + actual = arr.where(lambda x: x.y < 2, drop=True) + assert_identical(actual, expected) + def test_where_string(self): array = DataArray(["a", "b"]) expected = DataArray(np.array(["a", np.nan], dtype=object)) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 7bcf9379ae8..44ffafb23b1 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4349,6 +4349,12 @@ def test_where(self): assert actual.a.name == "a" assert actual.a.attrs == ds.a.attrs + # lambda + ds = Dataset({"a": ("x", range(5))}) + expected = Dataset({"a": ("x", [np.nan, np.nan, 2, 3, 4])}) + actual = ds.where(lambda x: x > 1) + assert_identical(expected, actual) + def test_where_other(self): ds = Dataset({"a": ("x", range(5))}, {"x": range(5)}) expected = Dataset({"a": ("x", [-1, -1, 2, 3, 4])}, {"x": range(5)}) @@ -4356,6 +4362,9 @@ def test_where_other(self): assert_equal(expected, actual) assert actual.a.dtype == int + actual = ds.where(lambda x: x > 1, -1) + assert_equal(expected, actual) + with raises_regex(ValueError, "cannot set"): ds.where(ds > 1, other=0, drop=True) From cdaac64fa528222d947bbc821ac6c919f7fa7fa8 Mon Sep 17 00:00:00 2001 From: Aaron Spring Date: Sun, 8 Mar 2020 18:42:43 +0100 Subject: [PATCH 30/75] Implement skipna kwarg in xr.quantile (#3844) * quick fix, no docs, no tests * added tests * docstrings * added whatsnew * Update doc/whats-new.rst Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> * Update doc/whats-new.rst Co-Authored-By: keewis Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Co-authored-by: keewis --- doc/whats-new.rst | 7 ++++++- xarray/core/dataarray.py | 11 +++++++++-- xarray/core/dataset.py | 13 +++++++++++-- xarray/core/groupby.py | 9 +++++++-- xarray/core/variable.py | 8 ++++++-- xarray/tests/test_dataarray.py | 8 +++++--- xarray/tests/test_dataset.py | 24 ++++++++++++++++++++---- xarray/tests/test_variable.py | 8 +++++--- 8 files changed, 69 insertions(+), 19 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 24120270444..2c30db99bcd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -45,7 +45,12 @@ New Features By `Julia Signell `_. - :py:meth:`Dataset.where` and :py:meth:`DataArray.where` accept a lambda as a first argument, which is then called on the input; replicating pandas' behavior. - By `Maximilian Roos `_ + By `Maximilian Roos `_. +- Implement ``skipna`` in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, + :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile` + (:issue:`3843`, :pull:`3844`) + By `Aaron Spring `_. + Bug fixes ~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4e80ef222c2..7fcb42bf9d2 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2939,6 +2939,7 @@ def quantile( dim: Union[Hashable, Sequence[Hashable], None] = None, interpolation: str = "linear", keep_attrs: bool = None, + skipna: bool = True, ) -> "DataArray": """Compute the qth quantile of the data along the specified dimension. @@ -2966,6 +2967,8 @@ def quantile( If True, the dataset's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -2978,7 +2981,7 @@ def quantile( See Also -------- - numpy.nanquantile, pandas.Series.quantile, Dataset.quantile + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, Dataset.quantile Examples -------- @@ -3015,7 +3018,11 @@ def quantile( """ ds = self._to_temp_dataset().quantile( - q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation + q, + dim=dim, + keep_attrs=keep_attrs, + interpolation=interpolation, + skipna=skipna, ) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 52940e98b27..f286236dd45 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5140,7 +5140,13 @@ def sortby(self, variables, ascending=True): return aligned_self.isel(**indices) def quantile( - self, q, dim=None, interpolation="linear", numeric_only=False, keep_attrs=None + self, + q, + dim=None, + interpolation="linear", + numeric_only=False, + keep_attrs=None, + skipna=True, ): """Compute the qth quantile of the data along the specified dimension. @@ -5171,6 +5177,8 @@ def quantile( object will be returned without attributes. numeric_only : bool, optional If True, only apply ``func`` to variables with a numeric dtype. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -5183,7 +5191,7 @@ def quantile( See Also -------- - numpy.nanquantile, pandas.Series.quantile, DataArray.quantile + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, DataArray.quantile Examples -------- @@ -5258,6 +5266,7 @@ def quantile( dim=reduce_dims, interpolation=interpolation, keep_attrs=keep_attrs, + skipna=skipna, ) else: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index f2a9ebac6eb..4223d9dc255 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -558,7 +558,9 @@ def fillna(self, value): out = ops.fillna(self, value) return out - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): + def quantile( + self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True + ): """Compute the qth quantile over each array in the groups and concatenate them together into a new array. @@ -582,6 +584,8 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): * higher: ``j``. * nearest: ``i`` or ``j``, whichever is nearest. * midpoint: ``(i + j) / 2``. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -595,7 +599,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): See Also -------- - numpy.nanquantile, pandas.Series.quantile, Dataset.quantile, + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, Dataset.quantile, DataArray.quantile Examples @@ -656,6 +660,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): dim=dim, interpolation=interpolation, keep_attrs=keep_attrs, + skipna=skipna, ) return out diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 62f9fde6a2e..435edb6f014 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1678,7 +1678,9 @@ def no_conflicts(self, other, equiv=duck_array_ops.array_notnull_equiv): """ return self.broadcast_equals(other, equiv=equiv) - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): + def quantile( + self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True + ): """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements. @@ -1725,6 +1727,8 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): from .computation import apply_ufunc + _quantile_func = np.nanquantile if skipna else np.quantile + if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) @@ -1739,7 +1743,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): def _wrapper(npa, **kwargs): # move quantile axis to end. required for apply_ufunc - return np.moveaxis(np.nanquantile(npa, **kwargs), 0, -1) + return np.moveaxis(_quantile_func(npa, **kwargs), 0, -1) axis = np.arange(-1, -1 * len(dim) - 1, -1) result = apply_ufunc( diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b8a9c5edaf9..33f1b403eb8 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2368,13 +2368,15 @@ def test_reduce_out(self): with pytest.raises(TypeError): orig.mean(out=np.ones(orig.shape)) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) @pytest.mark.parametrize( "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) ) - def test_quantile(self, q, axis, dim): - actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True) - expected = np.nanpercentile(self.dv.values, np.array(q) * 100, axis=axis) + def test_quantile(self, q, axis, dim, skipna): + actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna) + _percentile_func = np.nanpercentile if skipna else np.percentile + expected = _percentile_func(self.dv.values, np.array(q) * 100, axis=axis) np.testing.assert_allclose(actual.values, expected) if is_scalar(q): assert "quantile" not in actual.dims diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 44ffafb23b1..d2e8c6b7609 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4697,12 +4697,13 @@ def test_reduce_keepdims(self): ) assert_identical(expected, actual) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) - def test_quantile(self, q): + def test_quantile(self, q, skipna): ds = create_test_data(seed=123) for dim in [None, "dim1", ["dim1"]]: - ds_quantile = ds.quantile(q, dim=dim) + ds_quantile = ds.quantile(q, dim=dim, skipna=skipna) if is_scalar(q): assert "quantile" not in ds_quantile.dims else: @@ -4710,12 +4711,27 @@ def test_quantile(self, q): for var, dar in ds.data_vars.items(): assert var in ds_quantile - assert_identical(ds_quantile[var], dar.quantile(q, dim=dim)) + assert_identical( + ds_quantile[var], dar.quantile(q, dim=dim, skipna=skipna) + ) dim = ["dim1", "dim2"] - ds_quantile = ds.quantile(q, dim=dim) + ds_quantile = ds.quantile(q, dim=dim, skipna=skipna) assert "dim3" in ds_quantile.dims assert all(d not in ds_quantile.dims for d in dim) + @pytest.mark.parametrize("skipna", [True, False]) + def test_quantile_skipna(self, skipna): + q = 0.1 + dim = "time" + ds = Dataset({"a": ([dim], np.arange(0, 11))}) + ds = ds.where(ds >= 1) + + result = ds.quantile(q=q, dim=dim, skipna=skipna) + + value = 1.9 if skipna else np.nan + expected = Dataset({"a": value}, coords={"quantile": q}) + assert_identical(result, expected) + @requires_bottleneck def test_rank(self): ds = create_test_data(seed=1234) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index c86ecd0121f..c600f7a77d0 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1511,14 +1511,16 @@ def test_reduce(self): with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"): v.mean(dim="x", allow_lazy=False) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) @pytest.mark.parametrize( "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) ) - def test_quantile(self, q, axis, dim): + def test_quantile(self, q, axis, dim, skipna): v = Variable(["x", "y"], self.d) - actual = v.quantile(q, dim=dim) - expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis) + actual = v.quantile(q, dim=dim, skipna=skipna) + _percentile_func = np.nanpercentile if skipna else np.percentile + expected = _percentile_func(self.d, np.array(q) * 100, axis=axis) np.testing.assert_allclose(actual.values, expected) @requires_dask From 603b0ad3f8a02a9e1180eb8dfc72f7f885f0e19a Mon Sep 17 00:00:00 2001 From: Mirko Panighel <30869713+mpanighel@users.noreply.github.com> Date: Sun, 8 Mar 2020 18:43:36 +0100 Subject: [PATCH 31/75] Add nxarray to related-projects.rst (#3848) --- doc/related-projects.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/related-projects.rst b/doc/related-projects.rst index 3188751366f..edee80b72b8 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -61,6 +61,7 @@ Extend xarray capabilities - `Collocate `_: Collocate xarray trajectories in arbitrary physical dimensions - `eofs `_: EOF analysis in Python. - `hypothesis-gufunc `_: Extension to hypothesis. Makes it easy to write unit tests with xarray objects as input. +- `nxarray `_: NeXus input/output capability for xarray. - `xarray_extras `_: Advanced algorithms for xarray objects (e.g. integrations/interpolations). - `xrft `_: Fourier transforms for xarray data. - `xr-scipy `_: A lightweight scipy wrapper for xarray. From 203c3f4ee1b4220b3fa3a073b5412fb7bd72525b Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Mon, 9 Mar 2020 03:11:55 -0400 Subject: [PATCH 32/75] remove panel conversion (#3845) --- doc/whats-new.rst | 3 +++ xarray/core/dataarray.py | 10 ++-------- xarray/tests/test_dataarray.py | 8 ++------ 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2c30db99bcd..ed94b84feea 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -95,6 +95,9 @@ Internal Changes By `Bruno Pagani `_. - Updated Azure CI MacOS image, given pending removal. By `Maximilian Roos `_ +- Removed conversion to :py:class:`pandas.Panel`, given its removal in pandas + in favor of xarray's objects. + By `Maximilian Roos `_ .. _whats-new.0.15.0: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 7fcb42bf9d2..7a95aedc2f7 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2243,20 +2243,14 @@ def to_pandas(self) -> Union["DataArray", pd.Series, pd.DataFrame]: * 0D -> `xarray.DataArray` * 1D -> `pandas.Series` * 2D -> `pandas.DataFrame` - * 3D -> `pandas.Panel` *(deprecated)* - Only works for arrays with 3 or fewer dimensions. + Only works for arrays with 2 or fewer dimensions. The DataArray constructor performs the inverse transformation. """ # TODO: consolidate the info about pandas constructors and the # attributes that correspond to their indexes into a separate module? - constructors = { - 0: lambda x: x, - 1: pd.Series, - 2: pd.DataFrame, - 3: pdcompat.Panel, - } + constructors = {0: lambda x: x, 1: pd.Series, 2: pd.DataFrame} try: constructor = constructors[self.ndim] except KeyError: diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 33f1b403eb8..dfaf8fd4e28 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3411,14 +3411,10 @@ def test_to_pandas(self): assert_array_equal(actual.columns, [0, 1]) # roundtrips - for shape in [(3,), (3, 4), (3, 4, 5)]: - if len(shape) > 2 and LooseVersion(pd.__version__) >= "0.25.0": - continue + for shape in [(3,), (3, 4)]: dims = list("abc")[: len(shape)] da = DataArray(np.random.randn(*shape), dims=dims) - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", r"\W*Panel is deprecated") - roundtripped = DataArray(da.to_pandas()).drop_vars(dims) + roundtripped = DataArray(da.to_pandas()).drop_vars(dims) assert_identical(da, roundtripped) with raises_regex(ValueError, "cannot convert"): From f4ebbfef8f317205fba9edecadaac843dfa131f7 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 9 Mar 2020 08:18:06 +0100 Subject: [PATCH 33/75] un-xfail tests that append to netCDF files with scipy (#3805) * remove ScipyWriteBase class * add whats new Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 2 ++ xarray/tests/test_backends.py | 24 ++++-------------------- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ed94b84feea..bc0e5092d5b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -95,6 +95,8 @@ Internal Changes By `Bruno Pagani `_. - Updated Azure CI MacOS image, given pending removal. By `Maximilian Roos `_ +- Removed xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`). + By `Mathias Hauser `_. - Removed conversion to :py:class:`pandas.Panel`, given its removal in pandas in favor of xarray's objects. By `Maximilian Roos `_ diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 015d2cbfdeb..59ed8e690cc 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1979,24 +1979,8 @@ def create_zarr_target(self): yield tmp -class ScipyWriteBase(CFEncodedBase, NetCDF3Only): - def test_append_write(self): - import scipy - - if scipy.__version__ == "1.0.1": - pytest.xfail("https://github.com/scipy/scipy/issues/8625") - super().test_append_write() - - def test_append_overwrite_values(self): - import scipy - - if scipy.__version__ == "1.0.1": - pytest.xfail("https://github.com/scipy/scipy/issues/8625") - super().test_append_overwrite_values() - - @requires_scipy -class TestScipyInMemoryData(ScipyWriteBase): +class TestScipyInMemoryData(CFEncodedBase, NetCDF3Only): engine = "scipy" @contextlib.contextmanager @@ -2017,7 +2001,7 @@ def test_bytes_pickle(self): @requires_scipy -class TestScipyFileObject(ScipyWriteBase): +class TestScipyFileObject(CFEncodedBase, NetCDF3Only): engine = "scipy" @contextlib.contextmanager @@ -2050,7 +2034,7 @@ def test_pickle_dataarray(self): @requires_scipy -class TestScipyFilePath(ScipyWriteBase): +class TestScipyFilePath(CFEncodedBase, NetCDF3Only): engine = "scipy" @contextlib.contextmanager @@ -3317,7 +3301,7 @@ def test_session(self): @requires_scipy @requires_pynio -class TestPyNio(ScipyWriteBase): +class TestPyNio(CFEncodedBase, NetCDF3Only): def test_write_store(self): # pynio is read-only for now pass From 9f97c4384f6456a5582f2bf7277c90be110fce92 Mon Sep 17 00:00:00 2001 From: keewis Date: Mon, 9 Mar 2020 08:40:45 +0100 Subject: [PATCH 34/75] Pint support for top-level functions (#3611) * get the align tests to pass * add pint to the upstream-dev ci job * special case for booleans * silence the pint behaviour change warning * preprocess the unit mapping parameter to convert_units * use assert_allclose and assert_identical instead * clean up a few tests * remove some xfails * use the unit registry's quantity class * explain the catch_warnings block * don't use the function wrapper class if we don't need arguments * whats-new.rst * require the new pint version * use functools.partial instead of function * remove the convert_from parameter of array_attach_units * make sure every top-level function test uses assert_units_equal * hide the traceback of the unit comparison function * considerably simplify the merge_dataarray test * simplify the merge_dataset test --- ci/requirements/py36-min-nep18.yml | 2 +- doc/whats-new.rst | 2 +- xarray/tests/test_units.py | 356 ++++++++++++++++------------- 3 files changed, 203 insertions(+), 157 deletions(-) diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml index c10fdf67dc4..a5eded49cd4 100644 --- a/ci/requirements/py36-min-nep18.yml +++ b/ci/requirements/py36-min-nep18.yml @@ -11,7 +11,7 @@ dependencies: - msgpack-python=0.6 # remove once distributed is bumped. distributed GH3491 - numpy=1.17 - pandas=0.25 - - pint=0.9 # Actually not enough as it doesn't implement __array_function__yet! + - pint=0.11 - pip - pytest - pytest-cov diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bc0e5092d5b..00c63b81260 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -32,7 +32,7 @@ New Features - Support new h5netcdf backend keyword `phony_dims` (available from h5netcdf v0.8.0 for :py:class:`~xarray.backends.H5NetCDFStore`. By `Kai Mühlbauer `_. -- implement pint support. (:issue:`3594`, :pull:`3706`) +- Support unit aware arrays with pint. (:issue:`3594`, :pull:`3706`, :pull:`3611`) By `Justus Magin `_. - :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a `TypeError` on multiple string arguments. Receiving multiple string arguments diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 9f63ebb1d42..bef3af62d74 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -1,3 +1,4 @@ +import functools import operator from distutils.version import LooseVersion @@ -8,6 +9,7 @@ import xarray as xr from xarray.core import formatting from xarray.core.npcompat import IS_NEP18_ACTIVE +from xarray.testing import assert_allclose, assert_identical from .test_variable import VariableSubclassobjects @@ -70,53 +72,17 @@ def array_strip_units(array): return array -def array_attach_units(data, unit, convert_from=None): - try: - unit, convert_from = unit - except TypeError: - pass - +def array_attach_units(data, unit): if isinstance(data, Quantity): - if not convert_from: - raise ValueError( - "cannot attach unit {unit} to quantity ({data.units})".format( - unit=unit, data=data - ) - ) - elif isinstance(convert_from, unit_registry.Unit): - data = data.magnitude - elif convert_from is True: # intentionally accept exactly true - if data.check(unit): - convert_from = data.units - data = data.magnitude - else: - raise ValueError( - "cannot convert quantity ({data.units}) to {unit}".format( - unit=unit, data=data - ) - ) - else: - raise ValueError( - "cannot convert from invalid unit {convert_from}".format( - convert_from=convert_from - ) - ) + raise ValueError(f"cannot attach unit {unit} to quantity {data}") - # to make sure we also encounter the case of "equal if converted" - if convert_from is not None: - quantity = (data * convert_from).to( - unit - if isinstance(unit, unit_registry.Unit) - else unit_registry.dimensionless - ) - else: - try: - quantity = data * unit - except np.core._exceptions.UFuncTypeError: - if unit != 1: - raise + try: + quantity = data * unit + except np.core._exceptions.UFuncTypeError: + if isinstance(unit, unit_registry.Unit): + raise - quantity = data + quantity = data return quantity @@ -241,6 +207,11 @@ def attach_units(obj, units): def convert_units(obj, to): + # preprocess + to = { + key: None if not isinstance(value, unit_registry.Unit) else value + for key, value in to.items() + } if isinstance(obj, xr.Dataset): data_vars = { name: convert_units(array.variable, {None: to.get(name)}) @@ -282,6 +253,7 @@ def convert_units(obj, to): def assert_units_equal(a, b): + __tracebackhide__ = True assert extract_units(a) == extract_units(b) @@ -414,9 +386,8 @@ def __repr__(self): return f"function_{self.name}" -@pytest.mark.xfail(reason="test bug: apply_ufunc should not be called that way") def test_apply_ufunc_dataarray(dtype): - func = function( + func = functools.partial( xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1} ) @@ -427,12 +398,12 @@ def test_apply_ufunc_dataarray(dtype): expected = attach_units(func(strip_units(data_array)), extract_units(data_array)) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) -@pytest.mark.xfail(reason="test bug: apply_ufunc should not be called that way") def test_apply_ufunc_dataset(dtype): - func = function( + func = functools.partial( xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1} ) @@ -450,10 +421,10 @@ def test_apply_ufunc_dataset(dtype): expected = attach_units(func(strip_units(ds)), extract_units(ds)) actual = func(ds) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) -@pytest.mark.xfail(reason="blocked by `reindex` / `where`") @pytest.mark.parametrize( "unit,error", ( @@ -475,36 +446,40 @@ def test_apply_ufunc_dataset(dtype): "coords", ), ) -@pytest.mark.parametrize("fill_value", (np.float64(10), np.float64(np.nan))) +@pytest.mark.parametrize("fill_value", (10, np.nan)) def test_align_dataarray(fill_value, variant, unit, error, dtype): original_unit = unit_registry.m variants = { - "data": (unit, 1, 1), - "dims": (original_unit, unit, 1), - "coords": (original_unit, 1, unit), + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), } data_unit, dim_unit, coord_unit = variants.get(variant) array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit array2 = np.linspace(0, 8, 2 * 5).reshape(2, 5).astype(dtype) * data_unit x = np.arange(2) * original_unit - x_a1 = np.array([10, 5]) * original_unit - x_a2 = np.array([10, 5]) * coord_unit y1 = np.arange(5) * original_unit y2 = np.arange(2, 7) * dim_unit + y_a1 = np.array([3, 5, 7, 8, 9]) * original_unit + y_a2 = np.array([7, 8, 9, 11, 13]) * coord_unit - data_array1 = xr.DataArray( - data=array1, coords={"x": x, "x_a": ("x", x_a1), "y": y1}, dims=("x", "y") - ) - data_array2 = xr.DataArray( - data=array2, coords={"x": x, "x_a": ("x", x_a2), "y": y2}, dims=("x", "y") - ) + coords1 = {"x": x, "y": y1} + coords2 = {"x": x, "y": y2} + if variant == "coords": + coords1["y_a"] = ("y", y_a1) + coords2["y_a"] = ("y", y_a2) + + data_array1 = xr.DataArray(data=array1, coords=coords1, dims=("x", "y")) + data_array2 = xr.DataArray(data=array2, coords=coords2, dims=("x", "y")) fill_value = fill_value * data_unit func = function(xr.align, join="outer", fill_value=fill_value) - if error is not None: + if error is not None and not ( + np.isnan(fill_value) and not isinstance(fill_value, Quantity) + ): with pytest.raises(error): func(data_array1, data_array2) @@ -524,15 +499,19 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype): **stripped_kwargs, ) expected_a = attach_units(expected_a, units_a) - expected_b = convert_units(attach_units(expected_b, units_a), units_b) + if isinstance(array2, Quantity): + expected_b = convert_units(attach_units(expected_b, units_a), units_b) + else: + expected_b = attach_units(expected_b, units_b) actual_a, actual_b = func(data_array1, data_array2) - assert_equal_with_units(expected_a, actual_a) - assert_equal_with_units(expected_b, actual_b) + assert_units_equal(expected_a, actual_a) + assert_allclose(expected_a, actual_a) + assert_units_equal(expected_b, actual_b) + assert_allclose(expected_b, actual_b) -@pytest.mark.xfail(reason="blocked by `reindex` / `where`") @pytest.mark.parametrize( "unit,error", ( @@ -558,31 +537,37 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype): def test_align_dataset(fill_value, unit, variant, error, dtype): original_unit = unit_registry.m - variants = {"data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit)} + variants = { + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), + } data_unit, dim_unit, coord_unit = variants.get(variant) array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit array2 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit x = np.arange(2) * original_unit - x_a1 = np.array([10, 5]) * original_unit - x_a2 = np.array([10, 5]) * coord_unit y1 = np.arange(5) * original_unit y2 = np.arange(2, 7) * dim_unit + y_a1 = np.array([3, 5, 7, 8, 9]) * original_unit + y_a2 = np.array([7, 8, 9, 11, 13]) * coord_unit - ds1 = xr.Dataset( - data_vars={"a": (("x", "y"), array1)}, - coords={"x": x, "x_a": ("x", x_a1), "y": y1}, - ) - ds2 = xr.Dataset( - data_vars={"a": (("x", "y"), array2)}, - coords={"x": x, "x_a": ("x", x_a2), "y": y2}, - ) + coords1 = {"x": x, "y": y1} + coords2 = {"x": x, "y": y2} + if variant == "coords": + coords1["y_a"] = ("y", y_a1) + coords2["y_a"] = ("y", y_a2) + + ds1 = xr.Dataset(data_vars={"a": (("x", "y"), array1)}, coords=coords1) + ds2 = xr.Dataset(data_vars={"a": (("x", "y"), array2)}, coords=coords2) fill_value = fill_value * data_unit func = function(xr.align, join="outer", fill_value=fill_value) - if error is not None: + if error is not None and not ( + np.isnan(fill_value) and not isinstance(fill_value, Quantity) + ): with pytest.raises(error): func(ds1, ds2) @@ -600,12 +585,17 @@ def test_align_dataset(fill_value, unit, variant, error, dtype): strip_units(ds1), strip_units(convert_units(ds2, units_a)), **stripped_kwargs ) expected_a = attach_units(expected_a, units_a) - expected_b = convert_units(attach_units(expected_b, units_a), units_b) + if isinstance(array2, Quantity): + expected_b = convert_units(attach_units(expected_b, units_a), units_b) + else: + expected_b = attach_units(expected_b, units_b) actual_a, actual_b = func(ds1, ds2) - assert_equal_with_units(expected_a, actual_a) - assert_equal_with_units(expected_b, actual_b) + assert_units_equal(expected_a, actual_a) + assert_allclose(expected_a, actual_a) + assert_units_equal(expected_b, actual_b) + assert_allclose(expected_b, actual_b) def test_broadcast_dataarray(dtype): @@ -615,28 +605,53 @@ def test_broadcast_dataarray(dtype): a = xr.DataArray(data=array1, dims="x") b = xr.DataArray(data=array2, dims="y") - expected_a, expected_b = tuple( - attach_units(elem, extract_units(a)) - for elem in xr.broadcast(strip_units(a), strip_units(b)) - ) + units_a = extract_units(a) + units_b = extract_units(b) + expected_a, expected_b = xr.broadcast(strip_units(a), strip_units(b)) + expected_a = attach_units(expected_a, units_a) + expected_b = convert_units(attach_units(expected_b, units_a), units_b) + actual_a, actual_b = xr.broadcast(a, b) - assert_equal_with_units(expected_a, actual_a) - assert_equal_with_units(expected_b, actual_b) + assert_units_equal(expected_a, actual_a) + assert_identical(expected_a, actual_a) + assert_units_equal(expected_b, actual_b) + assert_identical(expected_b, actual_b) def test_broadcast_dataset(dtype): array1 = np.linspace(0, 10, 2) * unit_registry.Pa array2 = np.linspace(0, 10, 3) * unit_registry.Pa - ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("y", array2)}) + x1 = np.arange(2) + y1 = np.arange(3) + + x2 = np.arange(2, 4) + y2 = np.arange(3, 6) - (expected,) = tuple( - attach_units(elem, extract_units(ds)) for elem in xr.broadcast(strip_units(ds)) + ds = xr.Dataset( + data_vars={"a": ("x", array1), "b": ("y", array2)}, coords={"x": x1, "y": y1} + ) + other = xr.Dataset( + data_vars={ + "a": ("x", array1.to(unit_registry.hPa)), + "b": ("y", array2.to(unit_registry.hPa)), + }, + coords={"x": x2, "y": y2}, ) - (actual,) = xr.broadcast(ds) - assert_equal_with_units(expected, actual) + units_a = extract_units(ds) + units_b = extract_units(other) + expected_a, expected_b = xr.broadcast(strip_units(ds), strip_units(other)) + expected_a = attach_units(expected_a, units_a) + expected_b = attach_units(expected_b, units_b) + + actual_a, actual_b = xr.broadcast(ds, other) + + assert_units_equal(expected_a, actual_a) + assert_identical(expected_a, actual_a) + assert_units_equal(expected_b, actual_b) + assert_identical(expected_b, actual_b) @pytest.mark.parametrize( @@ -706,7 +721,8 @@ def test_combine_by_coords(variant, unit, error, dtype): ) actual = xr.combine_by_coords([ds, other]) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( @@ -717,12 +733,7 @@ def test_combine_by_coords(variant, unit, error, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.mm, - None, - id="compatible_unit", - marks=pytest.mark.xfail(reason="wrong order of arguments to `where`"), - ), + pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ids=repr, @@ -810,7 +821,8 @@ def test_combine_nested(variant, unit, error, dtype): ) actual = func([[ds1, ds2], [ds3, ds4]]) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( @@ -862,7 +874,8 @@ def test_concat_dataarray(variant, unit, error, dtype): ) actual = xr.concat([arr1, arr2], dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( @@ -912,10 +925,10 @@ def test_concat_dataset(variant, unit, error, dtype): ) actual = xr.concat([ds1, ds2], dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) -@pytest.mark.xfail(reason="blocked by `reindex` / `where`") @pytest.mark.parametrize( "unit,error", ( @@ -948,64 +961,81 @@ def test_merge_dataarray(variant, unit, error, dtype): data_unit, dim_unit, coord_unit = variants.get(variant) array1 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * original_unit + x1 = np.arange(2) * original_unit + y1 = np.arange(3) * original_unit + u1 = np.linspace(10, 20, 2) * original_unit + v1 = np.linspace(10, 20, 3) * original_unit + array2 = np.linspace(1, 2, 2 * 4).reshape(2, 4).astype(dtype) * data_unit - array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit + x2 = np.arange(2, 4) * dim_unit + z2 = np.arange(4) * original_unit + u2 = np.linspace(20, 30, 2) * coord_unit + w2 = np.linspace(10, 20, 4) * original_unit - x = np.arange(2) * original_unit - y = np.arange(3) * original_unit - z = np.arange(4) * original_unit - u = np.linspace(10, 20, 2) * original_unit - v = np.linspace(10, 20, 3) * original_unit - w = np.linspace(10, 20, 4) * original_unit + array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit + y3 = np.arange(3, 6) * dim_unit + z3 = np.arange(4, 8) * dim_unit + v3 = np.linspace(10, 20, 3) * coord_unit + w3 = np.linspace(10, 20, 4) * coord_unit arr1 = xr.DataArray( name="a", data=array1, - coords={"x": x, "y": y, "u": ("x", u), "v": ("y", v)}, + coords={"x": x1, "y": y1, "u": ("x", u1), "v": ("y", v1)}, dims=("x", "y"), ) arr2 = xr.DataArray( - name="b", + name="a", data=array2, - coords={ - "x": np.arange(2, 4) * dim_unit, - "z": z, - "u": ("x", np.linspace(20, 30, 2) * coord_unit), - "w": ("z", w), - }, + coords={"x": x2, "z": z2, "u": ("x", u2), "w": ("z", w2)}, dims=("x", "z"), ) arr3 = xr.DataArray( - name="c", + name="a", data=array3, - coords={ - "y": np.arange(3, 6) * dim_unit, - "z": np.arange(4, 8) * dim_unit, - "v": ("y", np.linspace(10, 20, 3) * coord_unit), - "w": ("z", np.linspace(10, 20, 4) * coord_unit), - }, + coords={"y": y3, "z": z3, "v": ("y", v3), "w": ("z", w3)}, dims=("y", "z"), ) - func = function(xr.merge) if error is not None: with pytest.raises(error): - func([arr1, arr2, arr3]) + xr.merge([arr1, arr2, arr3]) return - units = {name: original_unit for name in list("abcuvwxyz")} + units = {name: original_unit for name in list("axyzuvw")} + convert_and_strip = lambda arr: strip_units(convert_units(arr, units)) - expected = attach_units( - func([strip_units(arr1), convert_and_strip(arr2), convert_and_strip(arr3)]), - units, + expected_units = { + "a": original_unit, + "u": original_unit, + "v": original_unit, + "w": original_unit, + "x": original_unit, + "y": original_unit, + "z": original_unit, + } + + expected = convert_units( + attach_units( + xr.merge( + [ + convert_and_strip(arr1), + convert_and_strip(arr2), + convert_and_strip(arr3), + ] + ), + units, + ), + expected_units, ) - actual = func([arr1, arr2, arr3]) - assert_equal_with_units(expected, actual) + actual = xr.merge([arr1, arr2, arr3]) + + assert_units_equal(expected, actual) + assert_allclose(expected, actual) -@pytest.mark.xfail(reason="blocked by `reindex` / `where`") @pytest.mark.parametrize( "unit,error", ( @@ -1046,7 +1076,7 @@ def test_merge_dataset(variant, unit, error, dtype): ds1 = xr.Dataset( data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)}, - coords={"x": x, "y": y, "z": ("x", z)}, + coords={"x": x, "y": y, "u": ("x", z)}, ) ds2 = xr.Dataset( data_vars={ @@ -1056,18 +1086,18 @@ def test_merge_dataset(variant, unit, error, dtype): coords={ "x": np.arange(3) * dim_unit, "y": np.arange(2, 4) * dim_unit, - "z": ("x", np.arange(-3, 0) * coord_unit), + "u": ("x", np.arange(-3, 0) * coord_unit), }, ) ds3 = xr.Dataset( data_vars={ - "a": (("y", "x"), np.zeros_like(array1) * np.nan * data_unit), - "b": (("y", "x"), np.zeros_like(array2) * np.nan * data_unit), + "a": (("y", "x"), np.full_like(array1, np.nan) * data_unit), + "b": (("y", "x"), np.full_like(array2, np.nan) * data_unit), }, coords={ "x": np.arange(3, 6) * dim_unit, "y": np.arange(4, 6) * dim_unit, - "z": ("x", np.arange(3, 6) * coord_unit), + "u": ("x", np.arange(3, 6) * coord_unit), }, ) @@ -1080,12 +1110,20 @@ def test_merge_dataset(variant, unit, error, dtype): units = extract_units(ds1) convert_and_strip = lambda ds: strip_units(convert_units(ds, units)) - expected = attach_units( - func([strip_units(ds1), convert_and_strip(ds2), convert_and_strip(ds3)]), units + expected_units = {name: original_unit for name in list("abxyzu")} + expected = convert_units( + attach_units( + func( + [convert_and_strip(ds1), convert_and_strip(ds2), convert_and_strip(ds3)] + ), + units, + ), + expected_units, ) actual = func([ds1, ds2, ds3]) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_allclose(expected, actual) @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like)) @@ -1094,10 +1132,12 @@ def test_replication_dataarray(func, dtype): data_array = xr.DataArray(data=array, dims="x") numpy_func = getattr(np, func.__name__) - expected = xr.DataArray(data=numpy_func(array), dims="x") + units = extract_units(numpy_func(data_array)) + expected = attach_units(func(data_array), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like)) @@ -1114,12 +1154,13 @@ def test_replication_dataset(func, dtype): ) numpy_func = getattr(np, func.__name__) - expected = ds.copy( - data={name: numpy_func(array.data) for name, array in ds.data_vars.items()} - ) + units = extract_units(ds.map(numpy_func)) + expected = attach_units(func(strip_units(ds)), units) + actual = func(ds) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.xfail( @@ -1158,7 +1199,8 @@ def test_replication_full_like_dataarray(unit, error, dtype): ) actual = xr.full_like(data_array, fill_value=fill_value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.xfail( @@ -1208,7 +1250,8 @@ def test_replication_full_like_dataset(unit, error, dtype): ) actual = xr.full_like(ds, fill_value=fill_value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( @@ -1250,7 +1293,8 @@ def test_where_dataarray(fill_value, unit, error, dtype): ) actual = xr.where(cond, x, fill_value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( @@ -1294,7 +1338,8 @@ def test_where_dataset(fill_value, unit, error, dtype): ) actual = xr.where(cond, ds, fill_value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) def test_dot_dataarray(dtype): @@ -1315,7 +1360,8 @@ def test_dot_dataarray(dtype): ) actual = xr.dot(data_array, other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) def delete_attrs(*to_delete): From 1db010bb1f84c63c45c1317a78e89362587e1423 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 9 Mar 2020 15:07:02 +0100 Subject: [PATCH 35/75] update installation instruction (#3849) * installing.rst: update instructions * whats-new * explicit link and anchor * :doc: -> :ref: --- doc/installing.rst | 15 ++++++++------- doc/whats-new.rst | 3 +++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/doc/installing.rst b/doc/installing.rst index dfc2841a956..a25bf65e342 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -11,6 +11,8 @@ Required dependencies - `numpy `__ (1.15 or later) - `pandas `__ (0.25 or later) +.. _optional-dependencies: + Optional dependencies --------------------- @@ -24,7 +26,7 @@ For netCDF and IO - `h5netcdf `__: an alternative library for reading and writing netCDF4 files that does not use the netCDF-C libraries - `pynio `__: for reading GRIB and other - geoscience specific file formats + geoscience specific file formats. Note that pynio is not available for Windows. - `zarr `__: for chunked, compressed, N-dimensional arrays. - `cftime `__: recommended if you want to encode/decode datetimes for non-standard calendars or dates before @@ -121,16 +123,15 @@ xarray itself is a pure Python package, but its dependencies are not. The easiest way to get everything installed is to use conda_. To install xarray with its recommended dependencies using the conda command line tool:: - $ conda install xarray dask netCDF4 bottleneck + $ conda install -c conda-forge xarray dask netCDF4 bottleneck .. _conda: http://conda.io/ -We recommend using the community maintained `conda-forge `__ channel if you need difficult\-to\-build dependencies such as cartopy, pynio or PseudoNetCDF:: - - $ conda install -c conda-forge xarray cartopy pynio pseudonetcdf +If you require other :ref:`optional-dependencies` add them to the line above. -New releases may also appear in conda-forge before being updated in the default -channel. +We recommend using the community maintained `conda-forge `__ channel, +as some of the dependencies are difficult to build. New releases may also appear in conda-forge before +being updated in the default channel. If you don't use conda, be sure you have the required dependencies (numpy and pandas) installed first. Then, install xarray with pip:: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 00c63b81260..3f04ba4ec57 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -81,6 +81,9 @@ Documentation - Fix documentation of :py:class:`DataArray` removing the deprecated mention that when omitted, `dims` are inferred from a `coords`-dict. (:pull:`3821`) By `Sander van Rijn `_. +- Update the installation instructions: only explicitly list recommended dependencies + (:issue:`3756`). + By `Mathias Hauser `_. Internal Changes ~~~~~~~~~~~~~~~~ From 7927c2b79e4dd7ecebb648e8e64e2647405b08db Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 9 Mar 2020 23:06:07 -0700 Subject: [PATCH 36/75] add xpublish to related projects (#3850) --- doc/related-projects.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/related-projects.rst b/doc/related-projects.rst index edee80b72b8..57b8da0c447 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -63,6 +63,7 @@ Extend xarray capabilities - `hypothesis-gufunc `_: Extension to hypothesis. Makes it easy to write unit tests with xarray objects as input. - `nxarray `_: NeXus input/output capability for xarray. - `xarray_extras `_: Advanced algorithms for xarray objects (e.g. integrations/interpolations). +- `xpublish `_: Publish Xarray Datasets via a Zarr compatible REST API. - `xrft `_: Fourier transforms for xarray data. - `xr-scipy `_: A lightweight scipy wrapper for xarray. - `X-regression `_: Multiple linear regression from Statsmodels library coupled with Xarray library. From 739b34767ddd19b6168af05ee749b527266c104d Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Tue, 10 Mar 2020 10:02:59 -0400 Subject: [PATCH 37/75] Doctests fixes (#3846) * start of doctest fixes * start of doctest fixes --- conftest.py | 11 +++++++++++ doc/contributing.rst | 8 ++++---- xarray/core/dataarray.py | 19 ++++++++++++------- xarray/core/dataset.py | 11 ++++++++--- xarray/core/rolling.py | 15 ++++++++------- 5 files changed, 43 insertions(+), 21 deletions(-) diff --git a/conftest.py b/conftest.py index 25dc284975e..712af1d3759 100644 --- a/conftest.py +++ b/conftest.py @@ -21,3 +21,14 @@ def pytest_runtest_setup(item): pytest.skip( "set --run-network-tests to run test requiring an " "internet connection" ) + + +@pytest.fixture(autouse=True) +def add_standard_imports(doctest_namespace): + import numpy as np + import pandas as pd + import xarray as xr + + doctest_namespace["np"] = np + doctest_namespace["pd"] = pd + doctest_namespace["xr"] = xr diff --git a/doc/contributing.rst b/doc/contributing.rst index eb31db24591..f581bcd9741 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -51,8 +51,8 @@ Bug reports must: `_:: ```python - >>> from xarray import Dataset - >>> df = Dataset(...) + >>> import xarray as xr + >>> df = xr.Dataset(...) ... ``` @@ -378,8 +378,8 @@ and then running:: pre-commit install -from the root of the xarray repository. You can skip the pre-commit checks with -``git commit --no-verify``. +from the root of the xarray repository. You can skip the pre-commit checks +with ``git commit --no-verify``. Backwards Compatibility diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 7a95aedc2f7..6782070da0b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1475,20 +1475,23 @@ def swap_dims(self, dims_dict: Mapping[Hashable, Hashable]) -> "DataArray": Examples -------- + >>> arr = xr.DataArray(data=[0, 1], dims="x", - coords={"x": ["a", "b"], "y": ("x", [0, 1])}) + ... coords={"x": ["a", "b"], "y": ("x", [0, 1])}) >>> arr array([0, 1]) Coordinates: * x (x) >> arr.swap_dims({"x": "y"}) array([0, 1]) Coordinates: x (y) >> arr.swap_dims({"x": "z"}) array([0, 1]) @@ -1718,7 +1721,7 @@ def stack( Examples -------- - >>> arr = DataArray(np.arange(6).reshape(2, 3), + >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) >>> arr @@ -1768,7 +1771,7 @@ def unstack( Examples -------- - >>> arr = DataArray(np.arange(6).reshape(2, 3), + >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) >>> arr @@ -1817,7 +1820,7 @@ def to_unstacked_dataset(self, dim, level=0): Examples -------- >>> import xarray as xr - >>> arr = DataArray(np.arange(6).reshape(2, 3), + >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) >>> data = xr.Dataset({'a': arr, 'b': arr.isel(y=0)}) >>> data @@ -2623,7 +2626,7 @@ def plot(self) -> _PlotMethods: """ Access plotting functions for DataArray's - >>> d = DataArray([[1, 2], [3, 4]]) + >>> d = xr.DataArray([[1, 2], [3, 4]]) For convenience just call this directly @@ -2849,18 +2852,20 @@ def dot( -------- >>> da_vals = np.arange(6 * 5 * 4).reshape((6, 5, 4)) - >>> da = DataArray(da_vals, dims=['x', 'y', 'z']) + >>> da = xr.DataArray(da_vals, dims=['x', 'y', 'z']) >>> dm_vals = np.arange(4) - >>> dm = DataArray(dm_vals, dims=['z']) + >>> dm = xr.DataArray(dm_vals, dims=['z']) >>> dm.dims ('z') + >>> da.dims ('x', 'y', 'z') >>> dot_result = da.dot(dm) >>> dot_result.dims ('x', 'y') + """ if isinstance(other, Dataset): raise NotImplementedError( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f286236dd45..a4d20a79b7c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1011,7 +1011,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": >>> da = xr.DataArray(np.random.randn(2, 3)) >>> ds = xr.Dataset({'foo': da, 'bar': ('x', [-1, 2])}, - coords={'x': ['one', 'two']}) + ... coords={'x': ['one', 'two']}) >>> ds.copy() Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -1021,6 +1021,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": Data variables: foo (dim_0, dim_1) float64 -0.8079 0.3897 -1.862 -0.6091 -1.051 -0.3003 bar (x) int64 -1 2 + >>> ds_0 = ds.copy(deep=False) >>> ds_0['foo'][0, 0] = 7 >>> ds_0 @@ -1032,6 +1033,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": Data variables: foo (dim_0, dim_1) float64 7.0 0.3897 -1.862 -0.6091 -1.051 -0.3003 bar (x) int64 -1 2 + >>> ds Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -1055,6 +1057,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": Data variables: foo (dim_0, dim_1) int64 0 1 2 3 4 5 bar (x) >> ds Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -2883,7 +2886,7 @@ def swap_dims( Examples -------- >>> ds = xr.Dataset(data_vars={"a": ("x", [5, 7]), "b": ("x", [0.1, 2.4])}, - coords={"x": ["a", "b"], "y": ("x", [0, 1])}) + ... coords={"x": ["a", "b"], "y": ("x", [0, 1])}) >>> ds Dimensions: (x: 2) @@ -2893,6 +2896,7 @@ def swap_dims( Data variables: a (x) int64 5 7 b (x) float64 0.1 2.4 + >>> ds.swap_dims({"x": "y"}) Dimensions: (y: 2) @@ -2902,6 +2906,7 @@ def swap_dims( Data variables: a (y) int64 5 7 b (y) float64 0.1 2.4 + >>> ds.swap_dims({"x": "z"}) Dimensions: (z: 2) @@ -3341,7 +3346,7 @@ def to_stacked_array( Examples -------- - >>> data = Dataset( + >>> data = xr.Dataset( ... data_vars={'a': (('x', 'y'), [[0, 1, 2], [3, 4, 5]]), ... 'b': ('x', [6, 7])}, ... coords={'y': ['u', 'v', 'w']} diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 61178cfb15f..5f633abbde6 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -231,21 +231,22 @@ def construct(self, window_dim, stride=1, fill_value=dtypes.NA): Examples -------- - >>> da = DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) - >>> + >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) + >>> rolling = da.rolling(b=3) >>> rolling.construct('window_dim') array([[[np.nan, np.nan, 0], [np.nan, 0, 1], [0, 1, 2], [1, 2, 3]], [[np.nan, np.nan, 4], [np.nan, 4, 5], [4, 5, 6], [5, 6, 7]]]) Dimensions without coordinates: a, b, window_dim - >>> + >>> rolling = da.rolling(b=3, center=True) >>> rolling.construct('window_dim') array([[[np.nan, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, np.nan]], [[np.nan, 4, 5], [4, 5, 6], [5, 6, 7], [6, 7, np.nan]]]) Dimensions without coordinates: a, b, window_dim + """ from .dataarray import DataArray @@ -278,26 +279,26 @@ def reduce(self, func, **kwargs): Examples -------- - >>> da = DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) - >>> + >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) >>> rolling = da.rolling(b=3) >>> rolling.construct('window_dim') array([[[np.nan, np.nan, 0], [np.nan, 0, 1], [0, 1, 2], [1, 2, 3]], [[np.nan, np.nan, 4], [np.nan, 4, 5], [4, 5, 6], [5, 6, 7]]]) Dimensions without coordinates: a, b, window_dim - >>> + >>> rolling.reduce(np.sum) array([[nan, nan, 3., 6.], [nan, nan, 15., 18.]]) Dimensions without coordinates: a, b - >>> + >>> rolling = da.rolling(b=3, min_periods=1) >>> rolling.reduce(np.nansum) array([[ 0., 1., 3., 6.], [ 4., 9., 15., 18.]]) + """ rolling_dim = utils.get_temp_dimname(self.obj.dims, "_rolling_dim") windows = self.construct(rolling_dim) From 650a981734ce3291f5aaa68648ebde451339f28a Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Fri, 13 Mar 2020 02:14:41 -0400 Subject: [PATCH 38/75] Fix CFTimeIndex-related errors stemming from updates in pandas (#3764) * Allow subtraction of a generic Index of cftime.datetimes from a CFTimeIndex * black * Test that NotImplemented logic works * Vendor _get_nearest_indexer and _filter_indexer_tolerance * Test OverflowError in __rsub__ * Fix name of pandas method in docstring * Add what's new entries * Enable use of tolerance greater than 292 years * newlinw Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 6 +++ xarray/coding/cftimeindex.py | 54 +++++++++++++++++++++++---- xarray/tests/test_cftimeindex.py | 63 +++++++++++++++++++++++++++++++- 3 files changed, 113 insertions(+), 10 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3f04ba4ec57..80309dc4673 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -96,6 +96,12 @@ Internal Changes - Changed test_open_mfdataset_list_attr to only run with dask installed (:issue:`3777`, :pull:`3780`). By `Bruno Pagani `_. +- Preserved the ability to index with ``method="nearest"`` with a + :py:class:`CFTimeIndex` with pandas versions greater than 1.0.1 + (:issue:`3751`). By `Spencer Clark `_. +- Greater flexibility and improved test coverage of subtracting various types + of objects from a :py:class:`CFTimeIndex`. By `Spencer Clark + `_. - Updated Azure CI MacOS image, given pending removal. By `Maximilian Roos `_ - Removed xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`). diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 99f90430e91..1ea5d3a7d11 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -49,6 +49,7 @@ from xarray.core.utils import is_scalar +from ..core.common import _contains_cftime_datetimes from .times import _STANDARD_CALENDARS, cftime_to_nptime, infer_calendar_name @@ -326,6 +327,32 @@ def _get_string_slice(self, key): raise KeyError(key) return loc + def _get_nearest_indexer(self, target, limit, tolerance): + """Adapted from pandas.Index._get_nearest_indexer""" + left_indexer = self.get_indexer(target, "pad", limit=limit) + right_indexer = self.get_indexer(target, "backfill", limit=limit) + left_distances = abs(self.values[left_indexer] - target.values) + right_distances = abs(self.values[right_indexer] - target.values) + + if self.is_monotonic_increasing: + condition = (left_distances < right_distances) | (right_indexer == -1) + else: + condition = (left_distances <= right_distances) | (right_indexer == -1) + indexer = np.where(condition, left_indexer, right_indexer) + + if tolerance is not None: + indexer = self._filter_indexer_tolerance(target, indexer, tolerance) + return indexer + + def _filter_indexer_tolerance(self, target, indexer, tolerance): + """Adapted from pandas.Index._filter_indexer_tolerance""" + if isinstance(target, pd.Index): + distance = abs(self.values[indexer] - target.values) + else: + distance = abs(self.values[indexer] - target) + indexer = np.where(distance <= tolerance, indexer, -1) + return indexer + def get_loc(self, key, method=None, tolerance=None): """Adapted from pandas.tseries.index.DatetimeIndex.get_loc""" if isinstance(key, str): @@ -427,9 +454,11 @@ def __radd__(self, other): return CFTimeIndex(other + np.array(self)) def __sub__(self, other): - import cftime - - if isinstance(other, (CFTimeIndex, cftime.datetime)): + if _contains_datetime_timedeltas(other): + return CFTimeIndex(np.array(self) - other) + elif isinstance(other, pd.TimedeltaIndex): + return CFTimeIndex(np.array(self) - other.to_pytimedelta()) + elif _contains_cftime_datetimes(np.array(other)): try: return pd.TimedeltaIndex(np.array(self) - np.array(other)) except OverflowError: @@ -437,14 +466,17 @@ def __sub__(self, other): "The time difference exceeds the range of values " "that can be expressed at the nanosecond resolution." ) - - elif isinstance(other, pd.TimedeltaIndex): - return CFTimeIndex(np.array(self) - other.to_pytimedelta()) else: - return CFTimeIndex(np.array(self) - other) + return NotImplemented def __rsub__(self, other): - return pd.TimedeltaIndex(other - np.array(self)) + try: + return pd.TimedeltaIndex(other - np.array(self)) + except OverflowError: + raise ValueError( + "The time difference exceeds the range of values " + "that can be expressed at the nanosecond resolution." + ) def to_datetimeindex(self, unsafe=False): """If possible, convert this index to a pandas.DatetimeIndex. @@ -633,6 +665,12 @@ def _parse_array_of_cftime_strings(strings, date_type): ).reshape(strings.shape) +def _contains_datetime_timedeltas(array): + """Check if an input array contains datetime.timedelta objects.""" + array = np.atleast_1d(array) + return isinstance(array[0], timedelta) + + def _cftimeindex_from_i8(values, date_type, name): """Construct a CFTimeIndex from an array of integers. diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 8d83b833ca3..43d6d7b068e 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -451,10 +451,21 @@ def test_sel_date_scalar(da, date_type, index): @pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") +@requires_cftime +def test_sel_date_distant_date(da, date_type, index): + expected = xr.DataArray(4).assign_coords(time=index[3]) + result = da.sel(time=date_type(2000, 1, 1), method="nearest") + assert_identical(result, expected) + + @requires_cftime @pytest.mark.parametrize( "sel_kwargs", - [{"method": "nearest"}, {"method": "nearest", "tolerance": timedelta(days=70)}], + [ + {"method": "nearest"}, + {"method": "nearest", "tolerance": timedelta(days=70)}, + {"method": "nearest", "tolerance": timedelta(days=1800000)}, + ], ) def test_sel_date_scalar_nearest(da, date_type, index, sel_kwargs): expected = xr.DataArray(2).assign_coords(time=index[1]) @@ -738,7 +749,7 @@ def test_timedeltaindex_add_cftimeindex(calendar): @requires_cftime -def test_cftimeindex_sub(index): +def test_cftimeindex_sub_timedelta(index): date_type = index.date_type expected_dates = [ date_type(1, 1, 2), @@ -753,6 +764,27 @@ def test_cftimeindex_sub(index): assert isinstance(result, CFTimeIndex) +@requires_cftime +@pytest.mark.parametrize( + "other", + [np.array(4 * [timedelta(days=1)]), np.array(timedelta(days=1))], + ids=["1d-array", "scalar-array"], +) +def test_cftimeindex_sub_timedelta_array(index, other): + date_type = index.date_type + expected_dates = [ + date_type(1, 1, 2), + date_type(1, 2, 2), + date_type(2, 1, 2), + date_type(2, 2, 2), + ] + expected = CFTimeIndex(expected_dates) + result = index + timedelta(days=2) + result = result - other + assert result.equals(expected) + assert isinstance(result, CFTimeIndex) + + @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_cftimeindex_sub_cftimeindex(calendar): @@ -784,6 +816,14 @@ def test_cftime_datetime_sub_cftimeindex(calendar): assert isinstance(result, pd.TimedeltaIndex) +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_distant_cftime_datetime_sub_cftimeindex(calendar): + a = xr.cftime_range("2000", periods=5, calendar=calendar) + with pytest.raises(ValueError, match="difference exceeds"): + a.date_type(1, 1, 1) - a + + @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_cftimeindex_sub_timedeltaindex(calendar): @@ -795,6 +835,25 @@ def test_cftimeindex_sub_timedeltaindex(calendar): assert isinstance(result, CFTimeIndex) +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_cftimeindex_sub_index_of_cftime_datetimes(calendar): + a = xr.cftime_range("2000", periods=5, calendar=calendar) + b = pd.Index(a.values) + expected = a - a + result = a - b + assert result.equals(expected) + assert isinstance(result, pd.TimedeltaIndex) + + +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_cftimeindex_sub_not_implemented(calendar): + a = xr.cftime_range("2000", periods=5, calendar=calendar) + with pytest.raises(TypeError, match="unsupported operand"): + a - 1 + + @requires_cftime def test_cftimeindex_rsub(index): with pytest.raises(TypeError): From 7f4f027e69b42ae1eb93fce2df708d65c70c0a10 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 13 Mar 2020 13:25:12 +0000 Subject: [PATCH 39/75] Fix alignment with join="override" when some dims are unindexed (#3839) --- doc/whats-new.rst | 2 ++ xarray/core/alignment.py | 2 +- xarray/tests/test_concat.py | 7 +++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 80309dc4673..34d4342b028 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -55,6 +55,8 @@ New Features Bug fixes ~~~~~~~~~ +- Fix alignment with ``join="override"`` when some dimensions are unindexed. (:issue:`3681`). + By `Deepak Cherian `_. - Fix :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` producing index with name reflecting the previous dimension name instead of the new one (:issue:`3748`, :pull:`3752`). By `Joseph K Aicher diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 908119f7995..a83b1b87aa4 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -50,7 +50,7 @@ def _override_indexes(objects, all_indexes, exclude): objects = list(objects) for idx, obj in enumerate(objects[1:]): new_indexes = {} - for dim in obj.dims: + for dim in obj.indexes: if dim not in exclude: new_indexes[dim] = all_indexes[dim][0] objects[idx + 1] = obj._overwrite_indexes(new_indexes) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index bd99181a947..77c030198ac 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -250,6 +250,13 @@ def test_concat_join_kwarg(self): actual = concat([ds1, ds2], join=join, dim="x") assert_equal(actual, expected[join]) + # regression test for #3681 + actual = concat([ds1.drop("x"), ds2.drop("x")], join="override", dim="y") + expected = Dataset( + {"a": (("x", "y"), np.array([0, 0], ndmin=2))}, coords={"y": [0, 0.0001]} + ) + assert_identical(actual, expected) + def test_concat_promote_shape(self): # mixed dims within variables objs = [Dataset({}, {"x": 0}), Dataset({"x": [1]})] From 0d95ebac19faa3af25ac369d1e8177535022c0d9 Mon Sep 17 00:00:00 2001 From: David Huard Date: Fri, 13 Mar 2020 09:58:37 -0400 Subject: [PATCH 40/75] Fix interp bug when indexer shares coordinates with array (#3758) * added test demonstrating interp bug for nd indexes sharing coordinate with array * fix test so it works with sel * support shared dimensions in interp * isort fixes * update whats new * Revert "isort fixes" This reverts commit 5df6c9c0f99376dbc43f2f30567661ee49c00655. * test requires scipy --- doc/whats-new.rst | 3 +++ xarray/core/dataset.py | 11 +++++++++++ xarray/tests/test_interp.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 34d4342b028..df28837dcfa 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -54,6 +54,9 @@ New Features Bug fixes ~~~~~~~~~ +- Fix :py:meth:`Dataset.interp` when indexing array shares coordinates with the + indexed variable (:issue:`3252`). + By `David Huard `_. - Fix alignment with ``join="override"`` when some dimensions are unindexed. (:issue:`3681`). By `Deepak Cherian `_. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a4d20a79b7c..880c574c9cb 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2574,6 +2574,17 @@ def interp( coords = either_dict_or_kwargs(coords, coords_kwargs, "interp") indexers = dict(self._validate_interp_indexers(coords)) + if coords: + # This avoids broadcasting over coordinates that are both in + # the original array AND in the indexing array. It essentially + # forces interpolation along the shared coordinates. + sdims = ( + set(self.dims) + .intersection(*[set(nx.dims) for nx in indexers.values()]) + .difference(coords.keys()) + ) + indexers.update({d: self.variables[d] for d in sdims}) + obj = self if assume_sorted else self.sortby([k for k in coords]) def maybe_variable(obj, k): diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index c2bec2166c8..9cc4933f462 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -244,6 +244,36 @@ def test_interpolate_nd(case): assert_allclose(actual.transpose("y", "z"), expected) +@requires_scipy +def test_interpolate_nd_nd(): + """Interpolate nd array with an nd indexer sharing coordinates.""" + # Create original array + a = [0, 2] + x = [0, 1, 2] + da = xr.DataArray( + np.arange(6).reshape(2, 3), dims=("a", "x"), coords={"a": a, "x": x} + ) + + # Create indexer into `a` with dimensions (y, x) + y = [10] + c = {"x": x, "y": y} + ia = xr.DataArray([[1, 2, 2]], dims=("y", "x"), coords=c) + out = da.interp(a=ia) + expected = xr.DataArray([[1.5, 4, 5]], dims=("y", "x"), coords=c) + xr.testing.assert_allclose(out.drop_vars("a"), expected) + + # If the *shared* indexing coordinates do not match, interp should fail. + with pytest.raises(ValueError): + c = {"x": [1], "y": y} + ia = xr.DataArray([[1]], dims=("y", "x"), coords=c) + da.interp(a=ia) + + with pytest.raises(ValueError): + c = {"x": [5, 6, 7], "y": y} + ia = xr.DataArray([[1]], dims=("y", "x"), coords=c) + da.interp(a=ia) + + @pytest.mark.parametrize("method", ["linear"]) @pytest.mark.parametrize("case", [0, 1]) def test_interpolate_scalar(method, case): From ae03616dbd30544cadf4ff85e66cffb582ab3481 Mon Sep 17 00:00:00 2001 From: Matthieu Ancellin <31126826+mancellin@users.noreply.github.com> Date: Fri, 13 Mar 2020 20:55:06 +0100 Subject: [PATCH 41/75] Fix multi-index with categorical values. (#3860) * Fix bug for multi-index with categorical values. See issue #3674. * Blacked. * Add line in whats-new.rst. * Remove forgotten print. Co-authored-by: Matthieu Ancellin --- doc/whats-new.rst | 3 +++ xarray/core/indexes.py | 2 ++ xarray/tests/test_dataset.py | 11 +++++++++++ 3 files changed, 16 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index df28837dcfa..9b78d046148 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -58,6 +58,9 @@ Bug fixes indexed variable (:issue:`3252`). By `David Huard `_. + +- Fix use of multi-index with categorical values (:issue:`3674`). + By `Matthieu Ancellin `_. - Fix alignment with ``join="override"`` when some dimensions are unindexed. (:issue:`3681`). By `Deepak Cherian `_. - Fix :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` producing diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 06bf08cefd2..dea1767d50c 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -22,6 +22,8 @@ def remove_unused_levels_categories(index): for i, level in enumerate(index.levels): if isinstance(level, pd.CategoricalIndex): level = level[index.codes[i]].remove_unused_categories() + else: + level = level[index.codes[i]] levels.append(level) index = pd.MultiIndex.from_arrays(levels, names=index.names) elif isinstance(index, pd.CategoricalIndex): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d2e8c6b7609..6a6c496591a 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1458,6 +1458,17 @@ def test_categorical_reindex(self): actual = ds.reindex(cat=["foo"])["cat"].values assert (actual == np.array(["foo"])).all() + def test_categorical_multiindex(self): + i1 = pd.Series([0, 0]) + cat = pd.CategoricalDtype(categories=["foo", "baz", "bar"]) + i2 = pd.Series(["baz", "bar"], dtype=cat) + + df = pd.DataFrame({"i1": i1, "i2": i2, "values": [1, 2]}).set_index( + ["i1", "i2"] + ) + actual = df.to_xarray() + assert actual["values"].shape == (1, 2) + def test_sel_drop(self): data = Dataset({"foo": ("x", [1, 2, 3])}, {"x": [0, 1, 2]}) expected = Dataset({"foo": 1}) From cafab46aac8f7a073a32ec5aa47e213a9810ed54 Mon Sep 17 00:00:00 2001 From: keewis Date: Sat, 14 Mar 2020 22:25:46 +0100 Subject: [PATCH 42/75] Blacken the doctest code in docstrings (#3857) * fix a few erroneous doctest blocks * blacken the doctest code * manually remove the trailing comma from doctest lines --- xarray/backends/api.py | 4 +- xarray/coding/cftime_offsets.py | 2 +- xarray/coding/cftimeindex.py | 46 ++++++----- xarray/coding/strings.py | 2 +- xarray/conventions.py | 4 +- xarray/core/accessor_dt.py | 10 +-- xarray/core/accessor_str.py | 2 +- xarray/core/alignment.py | 34 ++++---- xarray/core/combine.py | 6 +- xarray/core/common.py | 114 +++++++++++++++------------ xarray/core/computation.py | 31 ++++---- xarray/core/dataarray.py | 101 +++++++++++++----------- xarray/core/dataset.py | 133 ++++++++++++++++++-------------- xarray/core/extensions.py | 5 +- xarray/core/merge.py | 16 ++-- xarray/core/nputils.py | 2 +- xarray/core/options.py | 2 +- xarray/core/parallel.py | 15 ++-- xarray/core/rolling.py | 10 +-- xarray/core/rolling_exp.py | 4 +- xarray/core/variable.py | 8 +- 21 files changed, 307 insertions(+), 244 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 56cd0649989..e828faabc27 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1196,8 +1196,8 @@ def save_mfdataset( Save a dataset into one netCDF per year of data: - >>> years, datasets = zip(*ds.groupby('time.year')) - >>> paths = ['%s.nc' % y for y in years] + >>> years, datasets = zip(*ds.groupby("time.year")) + >>> paths = ["%s.nc" % y for y in years] >>> xr.save_mfdataset(datasets, paths) """ if mode == "w" and len(set(paths)) < len(paths): diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index eeb68508527..a2306331ca7 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -938,7 +938,7 @@ def cftime_range( This function returns a ``CFTimeIndex``, populated with ``cftime.datetime`` objects associated with the specified calendar type, e.g. - >>> xr.cftime_range(start='2000', periods=6, freq='2MS', calendar='noleap') + >>> xr.cftime_range(start="2000", periods=6, freq="2MS", calendar="noleap") CFTimeIndex([2000-01-01 00:00:00, 2000-03-01 00:00:00, 2000-05-01 00:00:00, 2000-07-01 00:00:00, 2000-09-01 00:00:00, 2000-11-01 00:00:00], dtype='object') diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 1ea5d3a7d11..c680a7e0bcf 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -269,29 +269,32 @@ def _partial_date_slice(self, resolution, parsed): >>> from cftime import DatetimeNoLeap >>> import pandas as pd >>> import xarray as xr - >>> da = xr.DataArray([1, 2], - coords=[[DatetimeNoLeap(2001, 1, 1), - DatetimeNoLeap(2001, 2, 1)]], - dims=['time']) - >>> da.sel(time='2001-01-01') + >>> da = xr.DataArray( + ... [1, 2], + ... coords=[[DatetimeNoLeap(2001, 1, 1), DatetimeNoLeap(2001, 2, 1)]], + ... dims=["time"], + ... ) + >>> da.sel(time="2001-01-01") array([1]) Coordinates: * time (time) object 2001-01-01 00:00:00 - >>> da = xr.DataArray([1, 2], - coords=[[pd.Timestamp(2001, 1, 1), - pd.Timestamp(2001, 2, 1)]], - dims=['time']) - >>> da.sel(time='2001-01-01') + >>> da = xr.DataArray( + ... [1, 2], + ... coords=[[pd.Timestamp(2001, 1, 1), pd.Timestamp(2001, 2, 1)]], + ... dims=["time"], + ... ) + >>> da.sel(time="2001-01-01") array(1) Coordinates: time datetime64[ns] 2001-01-01 - >>> da = xr.DataArray([1, 2], - coords=[[pd.Timestamp(2001, 1, 1, 1), - pd.Timestamp(2001, 2, 1)]], - dims=['time']) - >>> da.sel(time='2001-01-01') + >>> da = xr.DataArray( + ... [1, 2], + ... coords=[[pd.Timestamp(2001, 1, 1, 1), pd.Timestamp(2001, 2, 1)]], + ... dims=["time"], + ... ) + >>> da.sel(time="2001-01-01") array([1]) Coordinates: @@ -423,10 +426,10 @@ def shift(self, n, freq): Examples -------- - >>> index = xr.cftime_range('2000', periods=1, freq='M') + >>> index = xr.cftime_range("2000", periods=1, freq="M") >>> index CFTimeIndex([2000-01-31 00:00:00], dtype='object') - >>> index.shift(1, 'M') + >>> index.shift(1, "M") CFTimeIndex([2000-02-29 00:00:00], dtype='object') """ from .cftime_offsets import to_offset @@ -511,7 +514,7 @@ def to_datetimeindex(self, unsafe=False): Examples -------- >>> import xarray as xr - >>> times = xr.cftime_range('2000', periods=2, calendar='gregorian') + >>> times = xr.cftime_range("2000", periods=2, calendar="gregorian") >>> times CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00], dtype='object') >>> times.to_datetimeindex() @@ -550,9 +553,10 @@ def strftime(self, date_format): Examples -------- - >>> rng = xr.cftime_range(start='2000', periods=5, freq='2MS', - ... calendar='noleap') - >>> rng.strftime('%B %d, %Y, %r') + >>> rng = xr.cftime_range( + ... start="2000", periods=5, freq="2MS", calendar="noleap" + ... ) + >>> rng.strftime("%B %d, %Y, %r") Index(['January 01, 2000, 12:00:00 AM', 'March 01, 2000, 12:00:00 AM', 'May 01, 2000, 12:00:00 AM', 'July 01, 2000, 12:00:00 AM', 'September 01, 2000, 12:00:00 AM'], diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 6d383fcf318..35cc190ffe3 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -201,7 +201,7 @@ class StackedBytesArray(indexing.ExplicitlyIndexedNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically stacked along the last dimension. - >>> StackedBytesArray(np.array(['a', 'b', 'c']))[:] + >>> StackedBytesArray(np.array(["a", "b", "c"]))[:] array('abc', dtype='|S3') """ diff --git a/xarray/conventions.py b/xarray/conventions.py index a8b9906c153..df24d0d3d8d 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -19,7 +19,7 @@ class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): big endian) into native endianness, so they can be used with Cython functions, such as those found in bottleneck and pandas. - >>> x = np.arange(5, dtype='>i2') + >>> x = np.arange(5, dtype=">i2") >>> x.dtype dtype('>i2') @@ -50,7 +50,7 @@ class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): This is useful for decoding boolean arrays from integer typed netCDF variables. - >>> x = np.array([1, 0, 1, 1, 0], dtype='i1') + >>> x = np.array([1, 0, 1, 1, 0], dtype="i1") >>> x.dtype dtype('>i2') diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index de0e332b26c..2977596036c 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -250,8 +250,8 @@ class DatetimeAccessor(Properties): --------- >>> import xarray as xr >>> import pandas as pd - >>> dates = pd.date_range(start='2000/01/01', freq='D', periods=10) - >>> ts = xr.DataArray(dates, dims=('time')) + >>> dates = pd.date_range(start="2000/01/01", freq="D", periods=10) + >>> ts = xr.DataArray(dates, dims=("time")) >>> ts array(['2000-01-01T00:00:00.000000000', '2000-01-02T00:00:00.000000000', @@ -296,8 +296,8 @@ def strftime(self, date_format): Examples -------- - >>> rng = xr.Dataset({'time': datetime.datetime(2000, 1, 1)}) - >>> rng['time'].dt.strftime('%B %d, %Y, %r') + >>> rng = xr.Dataset({"time": datetime.datetime(2000, 1, 1)}) + >>> rng["time"].dt.strftime("%B %d, %Y, %r") array('January 01, 2000, 12:00:00 AM', dtype=object) """ @@ -400,7 +400,7 @@ class TimedeltaAccessor(Properties): >>> import pandas as pd >>> import xarray as xr >>> dates = pd.timedelta_range(start="1 day", freq="6H", periods=20) - >>> ts = xr.DataArray(dates, dims=('time')) + >>> ts = xr.DataArray(dates, dims=("time")) >>> ts array([ 86400000000000, 108000000000000, 129600000000000, 151200000000000, diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 6a975b948eb..5502ba72855 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -67,7 +67,7 @@ class StringAccessor: Similar to pandas, fields can be accessed through the `.str` attribute for applicable DataArrays. - >>> da = xr.DataArray(['some', 'text', 'in', 'an', 'array']) + >>> da = xr.DataArray(["some", "text", "in", "an", "array"]) >>> ds.str.len() array([4, 4, 2, 2, 5]) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index a83b1b87aa4..abc180e049c 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -121,10 +121,16 @@ def align( -------- >>> import xarray as xr - >>> x = xr.DataArray([[25, 35], [10, 24]], dims=('lat', 'lon'), - ... coords={'lat': [35., 40.], 'lon': [100., 120.]}) - >>> y = xr.DataArray([[20, 5], [7, 13]], dims=('lat', 'lon'), - ... coords={'lat': [35., 42.], 'lon': [100., 120.]}) + >>> x = xr.DataArray( + ... [[25, 35], [10, 24]], + ... dims=("lat", "lon"), + ... coords={"lat": [35.0, 40.0], "lon": [100.0, 120.0]}, + ... ) + >>> y = xr.DataArray( + ... [[20, 5], [7, 13]], + ... dims=("lat", "lon"), + ... coords={"lat": [35.0, 42.0], "lon": [100.0, 120.0]}, + ... ) >>> x @@ -156,7 +162,7 @@ def align( * lat (lat) float64 35.0 * lon (lon) float64 100.0 120.0 - >>> a, b = xr.align(x, y, join='outer') + >>> a, b = xr.align(x, y, join="outer") >>> a array([[25., 35.], @@ -174,7 +180,7 @@ def align( * lat (lat) float64 35.0 40.0 42.0 * lon (lon) float64 100.0 120.0 - >>> a, b = xr.align(x, y, join='outer', fill_value=-999) + >>> a, b = xr.align(x, y, join="outer", fill_value=-999) >>> a array([[ 25, 35], @@ -192,7 +198,7 @@ def align( * lat (lat) float64 35.0 40.0 42.0 * lon (lon) float64 100.0 120.0 - >>> a, b = xr.align(x, y, join='left') + >>> a, b = xr.align(x, y, join="left") >>> a array([[25, 35], @@ -208,7 +214,7 @@ def align( * lat (lat) float64 35.0 40.0 * lon (lon) float64 100.0 120.0 - >>> a, b = xr.align(x, y, join='right') + >>> a, b = xr.align(x, y, join="right") >>> a array([[25., 35.], @@ -224,13 +230,13 @@ def align( * lat (lat) float64 35.0 42.0 * lon (lon) float64 100.0 120.0 - >>> a, b = xr.align(x, y, join='exact') + >>> a, b = xr.align(x, y, join="exact") Traceback (most recent call last): ... "indexes along dimension {!r} are not equal".format(dim) ValueError: indexes along dimension 'lat' are not equal - >>> a, b = xr.align(x, y, join='override') + >>> a, b = xr.align(x, y, join="override") >>> a array([[25, 35], @@ -674,8 +680,8 @@ def broadcast(*args, exclude=None): Broadcast two data arrays against one another to fill out their dimensions: - >>> a = xr.DataArray([1, 2, 3], dims='x') - >>> b = xr.DataArray([5, 6], dims='y') + >>> a = xr.DataArray([1, 2, 3], dims="x") + >>> b = xr.DataArray([5, 6], dims="y") >>> a array([1, 2, 3]) @@ -706,8 +712,8 @@ def broadcast(*args, exclude=None): Fill out the dimensions of all data variables in a dataset: - >>> ds = xr.Dataset({'a': a, 'b': b}) - >>> ds2, = xr.broadcast(ds) # use tuple unpacking to extract one dataset + >>> ds = xr.Dataset({"a": a, "b": b}) + >>> (ds2,) = xr.broadcast(ds) # use tuple unpacking to extract one dataset >>> ds2 Dimensions: (x: 3, y: 2) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 3f6e0e79351..1fa2df00352 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -412,7 +412,7 @@ def combine_nested( precipitation (x, y) float64 5.904 2.453 3.404 ... >>> ds_grid = [[x1y1, x1y2], [x2y1, x2y2]] - >>> combined = xr.combine_nested(ds_grid, concat_dim=['x', 'y']) + >>> combined = xr.combine_nested(ds_grid, concat_dim=["x", "y"]) Dimensions: (x: 4, y: 4) Dimensions without coordinates: x, y @@ -441,7 +441,7 @@ def combine_nested( precipitation (t) float64 5.904 2.453 3.404 ... >>> ds_grid = [[t1temp, t1precip], [t2temp, t2precip]] - >>> combined = xr.combine_nested(ds_grid, concat_dim=['t', None]) + >>> combined = xr.combine_nested(ds_grid, concat_dim=["t", None]) Dimensions: (t: 10) Dimensions without coordinates: t @@ -650,7 +650,7 @@ def combine_by_coords( temperature (y, x) float64 1.654 10.63 7.015 nan ... nan 12.46 2.22 15.96 precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953 - >>> xr.combine_by_coords([x3, x1], join='override') + >>> xr.combine_by_coords([x3, x1], join="override") Dimensions: (x: 3, y: 4) Coordinates: diff --git a/xarray/core/common.py b/xarray/core/common.py index c80cb24c5b5..39aa7982091 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -418,9 +418,9 @@ def assign_coords(self, coords=None, **coords_kwargs): -------- Convert longitude coordinates from 0-359 to -180-179: - >>> da = xr.DataArray(np.random.rand(4), - ... coords=[np.array([358, 359, 0, 1])], - ... dims='lon') + >>> da = xr.DataArray( + ... np.random.rand(4), coords=[np.array([358, 359, 0, 1])], dims="lon", + ... ) >>> da array([0.28298 , 0.667347, 0.657938, 0.177683]) @@ -434,7 +434,7 @@ def assign_coords(self, coords=None, **coords_kwargs): The function also accepts dictionary arguments: - >>> da.assign_coords({'lon': (((da.lon + 180) % 360) - 180)}) + >>> da.assign_coords({"lon": (((da.lon + 180) % 360) - 180)}) array([0.28298 , 0.667347, 0.657938, 0.177683]) Coordinates: @@ -518,19 +518,13 @@ def pipe( You can write - >>> (ds.pipe(h) - ... .pipe(g, arg1=a) - ... .pipe(f, arg2=b, arg3=c) - ... ) + >>> (ds.pipe(h).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c)) If you have a function that takes the data as (say) the second argument, pass a tuple indicating which keyword expects the data. For example, suppose ``f`` takes its data as ``arg2``: - >>> (ds.pipe(h) - ... .pipe(g, arg1=a) - ... .pipe((f, 'arg2'), arg1=a, arg3=c) - ... ) + >>> (ds.pipe(h).pipe(g, arg1=a).pipe((f, "arg2"), arg1=a, arg3=c)) Examples -------- @@ -539,7 +533,10 @@ def pipe( >>> import xarray as xr >>> x = xr.Dataset( ... { - ... "temperature_c": (("lat", "lon"), 20 * np.random.rand(4).reshape(2, 2)), + ... "temperature_c": ( + ... ("lat", "lon"), + ... 20 * np.random.rand(4).reshape(2, 2), + ... ), ... "precipitation": (("lat", "lon"), np.random.rand(4).reshape(2, 2)), ... }, ... coords={"lat": [10, 20], "lon": [150, 160]}, @@ -584,10 +581,9 @@ def pipe( precipitation (lat, lon) float64 2.731 2.719 2.848 2.467 >>> ( - ... x - ... .pipe(adder, arg=2) - ... .pipe(div, arg=2) - ... .pipe(sub_mult, sub_arg=2, mult_arg=2) + ... x.pipe(adder, arg=2) + ... .pipe(div, arg=2) + ... .pipe(sub_mult, sub_arg=2, mult_arg=2) ... ) Dimensions: (lat: 2, lon: 2) @@ -639,16 +635,17 @@ def groupby(self, group, squeeze: bool = True, restore_coord_dims: bool = None): -------- Calculate daily anomalies for daily data: - >>> da = xr.DataArray(np.linspace(0, 1826, num=1827), - ... coords=[pd.date_range('1/1/2000', '31/12/2004', - ... freq='D')], - ... dims='time') + >>> da = xr.DataArray( + ... np.linspace(0, 1826, num=1827), + ... coords=[pd.date_range("1/1/2000", "31/12/2004", freq="D")], + ... dims="time", + ... ) >>> da array([0.000e+00, 1.000e+00, 2.000e+00, ..., 1.824e+03, 1.825e+03, 1.826e+03]) Coordinates: * time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ... - >>> da.groupby('time.dayofyear') - da.groupby('time.dayofyear').mean('time') + >>> da.groupby("time.dayofyear") - da.groupby("time.dayofyear").mean("time") array([-730.8, -730.8, -730.8, ..., 730.2, 730.2, 730.5]) Coordinates: @@ -787,10 +784,15 @@ def rolling( -------- Create rolling seasonal average of monthly data e.g. DJF, JFM, ..., SON: - >>> da = xr.DataArray(np.linspace(0, 11, num=12), - ... coords=[pd.date_range('15/12/1999', - ... periods=12, freq=pd.DateOffset(months=1))], - ... dims='time') + >>> da = xr.DataArray( + ... np.linspace(0, 11, num=12), + ... coords=[ + ... pd.date_range( + ... "15/12/1999", periods=12, freq=pd.DateOffset(months=1), + ... ) + ... ], + ... dims="time", + ... ) >>> da array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11.]) @@ -804,7 +806,7 @@ def rolling( Remove the NaNs using ``dropna()``: - >>> da.rolling(time=3, center=True).mean().dropna('time') + >>> da.rolling(time=3, center=True).mean().dropna("time") array([ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]) Coordinates: @@ -906,10 +908,11 @@ def coarsen( -------- Coarsen the long time series by averaging over every four days. - >>> da = xr.DataArray(np.linspace(0, 364, num=364), - ... dims='time', - ... coords={'time': pd.date_range( - ... '15/12/1999', periods=364)}) + >>> da = xr.DataArray( + ... np.linspace(0, 364, num=364), + ... dims="time", + ... coords={"time": pd.date_range("15/12/1999", periods=364)}, + ... ) >>> da array([ 0. , 1.002755, 2.00551 , ..., 361.99449 , 362.997245, @@ -917,7 +920,7 @@ def coarsen( Coordinates: * time (time) datetime64[ns] 1999-12-15 1999-12-16 ... 2000-12-12 >>> - >>> da.coarsen(time=3, boundary='trim').mean() + >>> da.coarsen(time=3, boundary="trim").mean() array([ 1.002755, 4.011019, 7.019284, ..., 358.986226, 361.99449 ]) @@ -1000,10 +1003,15 @@ def resample( -------- Downsample monthly time-series data to seasonal data: - >>> da = xr.DataArray(np.linspace(0, 11, num=12), - ... coords=[pd.date_range('15/12/1999', - ... periods=12, freq=pd.DateOffset(months=1))], - ... dims='time') + >>> da = xr.DataArray( + ... np.linspace(0, 11, num=12), + ... coords=[ + ... pd.date_range( + ... "15/12/1999", periods=12, freq=pd.DateOffset(months=1), + ... ) + ... ], + ... dims="time", + ... ) >>> da array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11.]) @@ -1017,7 +1025,7 @@ def resample( Upsample monthly time-series data to daily data: - >>> da.resample(time='1D').interpolate('linear') + >>> da.resample(time="1D").interpolate("linear") array([ 0. , 0.032258, 0.064516, ..., 10.935484, 10.967742, 11. ]) Coordinates: @@ -1025,7 +1033,7 @@ def resample( Limit scope of upsampling method - >>> da.resample(time='1D').nearest(tolerance='1D') + >>> da.resample(time="1D").nearest(tolerance="1D") array([ 0., 0., nan, ..., nan, 11., 11.]) Coordinates: @@ -1118,7 +1126,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): -------- >>> import numpy as np - >>> a = xr.DataArray(np.arange(25).reshape(5, 5), dims=('x', 'y')) + >>> a = xr.DataArray(np.arange(25).reshape(5, 5), dims=("x", "y")) >>> a array([[ 0, 1, 2, 3, 4], @@ -1227,7 +1235,7 @@ def isin(self, test_elements): Examples -------- - >>> array = xr.DataArray([1, 2, 3], dims='x') + >>> array = xr.DataArray([1, 2, 3], dims="x") >>> array.isin([1, 3]) array([ True, False, True]) @@ -1296,9 +1304,11 @@ def full_like(other, fill_value, dtype: DTypeLike = None): >>> import numpy as np >>> import xarray as xr - >>> x = xr.DataArray(np.arange(6).reshape(2, 3), - ... dims=['lat', 'lon'], - ... coords={'lat': [1, 2], 'lon': [0, 1, 2]}) + >>> x = xr.DataArray( + ... np.arange(6).reshape(2, 3), + ... dims=["lat", "lon"], + ... coords={"lat": [1, 2], "lon": [0, 1, 2]}, + ... ) >>> x array([[0, 1, 2], @@ -1410,9 +1420,11 @@ def zeros_like(other, dtype: DTypeLike = None): >>> import numpy as np >>> import xarray as xr - >>> x = xr.DataArray(np.arange(6).reshape(2, 3), - ... dims=['lat', 'lon'], - ... coords={'lat': [1, 2], 'lon': [0, 1, 2]}) + >>> x = xr.DataArray( + ... np.arange(6).reshape(2, 3), + ... dims=["lat", "lon"], + ... coords={"lat": [1, 2], "lon": [0, 1, 2]}, + ... ) >>> x array([[0, 1, 2], @@ -1468,9 +1480,11 @@ def ones_like(other, dtype: DTypeLike = None): >>> import numpy as np >>> import xarray as xr - >>> x = xr.DataArray(np.arange(6).reshape(2, 3), - ... dims=['lat', 'lon'], - ... coords={'lat': [1, 2], 'lon': [0, 1, 2]}) + >>> x = xr.DataArray( + ... np.arange(6).reshape(2, 3), + ... dims=["lat", "lon"], + ... coords={"lat": [1, 2], "lon": [0, 1, 2]}, + ... ) >>> x array([[0, 1, 2], @@ -1479,7 +1493,7 @@ def ones_like(other, dtype: DTypeLike = None): * lat (lat) int64 1 2 * lon (lon) int64 0 1 2 - >>> >>> xr.ones_like(x) + >>> xr.ones_like(x) array([[1, 1, 1], [1, 1, 1]]) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index d2c5c32bc00..f99764448da 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -889,7 +889,7 @@ def apply_ufunc( You can now apply ``magnitude()`` to ``xr.DataArray`` and ``xr.Dataset`` objects, with automatically preserved dimensions and coordinates, e.g., - >>> array = xr.DataArray([1, 2, 3], coords=[('x', [0.1, 0.2, 0.3])]) + >>> array = xr.DataArray([1, 2, 3], coords=[("x", [0.1, 0.2, 0.3])]) >>> magnitude(array, -array) array([1.414214, 2.828427, 4.242641]) @@ -1093,10 +1093,9 @@ def dot(*arrays, dims=None, **kwargs): >>> import numpy as np >>> import xarray as xr - >>> da_a = xr.DataArray(np.arange(3 * 2).reshape(3, 2), dims=['a', 'b']) - >>> da_b = xr.DataArray(np.arange(3 * 2 * 2).reshape(3, 2, 2), - ... dims=['a', 'b', 'c']) - >>> da_c = xr.DataArray(np.arange(2 * 3).reshape(2, 3), dims=['c', 'd']) + >>> da_a = xr.DataArray(np.arange(3 * 2).reshape(3, 2), dims=["a", "b"]) + >>> da_b = xr.DataArray(np.arange(3 * 2 * 2).reshape(3, 2, 2), dims=["a", "b", "c"]) + >>> da_c = xr.DataArray(np.arange(2 * 3).reshape(2, 3), dims=["c", "d"]) >>> da_a @@ -1121,18 +1120,18 @@ def dot(*arrays, dims=None, **kwargs): [3, 4, 5]]) Dimensions without coordinates: c, d - >>> xr.dot(da_a, da_b, dims=['a', 'b']) + >>> xr.dot(da_a, da_b, dims=["a", "b"]) array([110, 125]) Dimensions without coordinates: c - >>> xr.dot(da_a, da_b, dims=['a']) + >>> xr.dot(da_a, da_b, dims=["a"]) array([[40, 46], [70, 79]]) Dimensions without coordinates: b, c - >>> xr.dot(da_a, da_b, da_c, dims=['b', 'c']) + >>> xr.dot(da_a, da_b, da_c, dims=["b", "c"]) array([[ 9, 14, 19], [ 93, 150, 207], @@ -1238,21 +1237,25 @@ def where(cond, x, y): -------- >>> import xarray as xr >>> import numpy as np - >>> x = xr.DataArray(0.1 * np.arange(10), dims=['lat'], - ... coords={'lat': np.arange(10)}, name='sst') + >>> x = xr.DataArray( + ... 0.1 * np.arange(10), + ... dims=["lat"], + ... coords={"lat": np.arange(10)}, + ... name="sst", + ... ) >>> x array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) Coordinates: * lat (lat) int64 0 1 2 3 4 5 6 7 8 9 - >>> xr.where(x < 0.5, x, 100*x) + >>> xr.where(x < 0.5, x, 100 * x) array([ 0. , 0.1, 0.2, 0.3, 0.4, 50. , 60. , 70. , 80. , 90. ]) Coordinates: * lat (lat) int64 0 1 2 3 4 5 6 7 8 9 - >>> >>> y = xr.DataArray( + >>> y = xr.DataArray( ... 0.1 * np.arange(9).reshape(3, 3), ... dims=["lat", "lon"], ... coords={"lat": np.arange(3), "lon": 10 + np.arange(3)}, @@ -1276,8 +1279,8 @@ def where(cond, x, y): * lat (lat) int64 0 1 2 * lon (lon) int64 10 11 12 - >>> cond = xr.DataArray([True, False], dims=['x']) - >>> x = xr.DataArray([1, 2], dims=['y']) + >>> cond = xr.DataArray([True, False], dims=["x"]) + >>> x = xr.DataArray([1, 2], dims=["y"]) >>> xr.where(cond, x, 0) array([[1, 2], diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 6782070da0b..b335eeb293b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -875,8 +875,7 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray": Shallow versus deep copy - >>> array = xr.DataArray([1, 2, 3], dims='x', - ... coords={'x': ['a', 'b', 'c']}) + >>> array = xr.DataArray([1, 2, 3], dims="x", coords={"x": ["a", "b", "c"]}) >>> array.copy() array([1, 2, 3]) @@ -1344,7 +1343,7 @@ def interp( Examples -------- - >>> da = xr.DataArray([1, 3], [('x', np.arange(2))]) + >>> da = xr.DataArray([1, 3], [("x", np.arange(2))]) >>> da.interp(x=0.5) array(2.0) @@ -1476,8 +1475,9 @@ def swap_dims(self, dims_dict: Mapping[Hashable, Hashable]) -> "DataArray": Examples -------- - >>> arr = xr.DataArray(data=[0, 1], dims="x", - ... coords={"x": ["a", "b"], "y": ("x", [0, 1])}) + >>> arr = xr.DataArray( + ... data=[0, 1], dims="x", coords={"x": ["a", "b"], "y": ("x", [0, 1])}, + ... ) >>> arr array([0, 1]) @@ -1592,12 +1592,11 @@ def set_index( Examples -------- - >>> arr = xr.DataArray(data=np.ones((2, 3)), - ... dims=['x', 'y'], - ... coords={'x': - ... range(2), 'y': - ... range(3), 'a': ('x', [3, 4]) - ... }) + >>> arr = xr.DataArray( + ... data=np.ones((2, 3)), + ... dims=["x", "y"], + ... coords={"x": range(2), "y": range(3), "a": ("x", [3, 4])}, + ... ) >>> arr array([[1., 1., 1.], @@ -1606,7 +1605,7 @@ def set_index( * x (x) int64 0 1 * y (y) int64 0 1 2 a (x) int64 3 4 - >>> arr.set_index(x='a') + >>> arr.set_index(x="a") array([[1., 1., 1.], [1., 1., 1.]]) @@ -1721,8 +1720,10 @@ def stack( Examples -------- - >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), - ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) + >>> arr = xr.DataArray( + ... np.arange(6).reshape(2, 3), + ... coords=[("x", ["a", "b"]), ("y", [0, 1, 2])], + ... ) >>> arr array([[0, 1, 2], @@ -1730,8 +1731,8 @@ def stack( Coordinates: * x (x) |S1 'a' 'b' * y (y) int64 0 1 2 - >>> stacked = arr.stack(z=('x', 'y')) - >>> stacked.indexes['z'] + >>> stacked = arr.stack(z=("x", "y")) + >>> stacked.indexes["z"] MultiIndex(levels=[['a', 'b'], [0, 1, 2]], codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], names=['x', 'y']) @@ -1771,8 +1772,10 @@ def unstack( Examples -------- - >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), - ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) + >>> arr = xr.DataArray( + ... np.arange(6).reshape(2, 3), + ... coords=[("x", ["a", "b"]), ("y", [0, 1, 2])], + ... ) >>> arr array([[0, 1, 2], @@ -1780,8 +1783,8 @@ def unstack( Coordinates: * x (x) |S1 'a' 'b' * y (y) int64 0 1 2 - >>> stacked = arr.stack(z=('x', 'y')) - >>> stacked.indexes['z'] + >>> stacked = arr.stack(z=("x", "y")) + >>> stacked.indexes["z"] MultiIndex(levels=[['a', 'b'], [0, 1, 2]], codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], names=['x', 'y']) @@ -1820,9 +1823,11 @@ def to_unstacked_dataset(self, dim, level=0): Examples -------- >>> import xarray as xr - >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), - ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) - >>> data = xr.Dataset({'a': arr, 'b': arr.isel(y=0)}) + >>> arr = xr.DataArray( + ... np.arange(6).reshape(2, 3), + ... coords=[("x", ["a", "b"]), ("y", [0, 1, 2])], + ... ) + >>> data = xr.Dataset({"a": arr, "b": arr.isel(y=0)}) >>> data Dimensions: (x: 2, y: 3) @@ -1832,12 +1837,12 @@ def to_unstacked_dataset(self, dim, level=0): Data variables: a (x, y) int64 0 1 2 3 4 5 b (x) int64 0 3 - >>> stacked = data.to_stacked_array("z", ['y']) - >>> stacked.indexes['z'] + >>> stacked = data.to_stacked_array("z", ["y"]) + >>> stacked.indexes["z"] MultiIndex(levels=[['a', 'b'], [0, 1, 2]], labels=[[0, 0, 0, 1], [0, 1, 2, -1]], names=['variable', 'y']) - >>> roundtripped = stacked.to_unstacked_dataset(dim='z') + >>> roundtripped = stacked.to_unstacked_dataset(dim="z") >>> data.identical(roundtripped) True @@ -2697,13 +2702,13 @@ def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> "DataArr Examples -------- - >>> arr = xr.DataArray([5, 5, 6, 6], [[1, 2, 3, 4]], ['x']) - >>> arr.diff('x') + >>> arr = xr.DataArray([5, 5, 6, 6], [[1, 2, 3, 4]], ["x"]) + >>> arr.diff("x") array([0, 1, 0]) Coordinates: * x (x) int64 2 3 4 - >>> arr.diff('x', 2) + >>> arr.diff("x", 2) array([ 1, -1]) Coordinates: @@ -2753,7 +2758,7 @@ def shift( Examples -------- - >>> arr = xr.DataArray([5, 6, 7], dims='x') + >>> arr = xr.DataArray([5, 6, 7], dims="x") >>> arr.shift(x=1) array([ nan, 5., 6.]) @@ -2803,7 +2808,7 @@ def roll( Examples -------- - >>> arr = xr.DataArray([5, 6, 7], dims='x') + >>> arr = xr.DataArray([5, 6, 7], dims="x") >>> arr.roll(x=1) array([7, 5, 6]) @@ -2852,9 +2857,9 @@ def dot( -------- >>> da_vals = np.arange(6 * 5 * 4).reshape((6, 5, 4)) - >>> da = xr.DataArray(da_vals, dims=['x', 'y', 'z']) + >>> da = xr.DataArray(da_vals, dims=["x", "y", "z"]) >>> dm_vals = np.arange(4) - >>> dm = xr.DataArray(dm_vals, dims=['z']) + >>> dm = xr.DataArray(dm_vals, dims=["z"]) >>> dm.dims ('z') @@ -2914,9 +2919,11 @@ def sortby( Examples -------- - >>> da = xr.DataArray(np.random.rand(5), - ... coords=[pd.date_range('1/1/2000', periods=5)], - ... dims='time') + >>> da = xr.DataArray( + ... np.random.rand(5), + ... coords=[pd.date_range("1/1/2000", periods=5)], + ... dims="time", + ... ) >>> da array([ 0.965471, 0.615637, 0.26532 , 0.270962, 0.552878]) @@ -3057,8 +3064,8 @@ def rank( Examples -------- - >>> arr = xr.DataArray([5, 6, 7], dims='x') - >>> arr.rank('x') + >>> arr = xr.DataArray([5, 6, 7], dims="x") + >>> arr.rank("x") array([ 1., 2., 3.]) Dimensions without coordinates: x @@ -3098,8 +3105,11 @@ def differentiate( Examples -------- - >>> da = xr.DataArray(np.arange(12).reshape(4, 3), dims=['x', 'y'], - ... coords={'x': [0, 0.1, 1.1, 1.2]}) + >>> da = xr.DataArray( + ... np.arange(12).reshape(4, 3), + ... dims=["x", "y"], + ... coords={"x": [0, 0.1, 1.1, 1.2]}, + ... ) >>> da array([[ 0, 1, 2], @@ -3110,7 +3120,7 @@ def differentiate( * x (x) float64 0.0 0.1 1.1 1.2 Dimensions without coordinates: y >>> - >>> da.differentiate('x') + >>> da.differentiate("x") array([[30. , 30. , 30. ], [27.545455, 27.545455, 27.545455], @@ -3152,8 +3162,11 @@ def integrate( Examples -------- - >>> da = xr.DataArray(np.arange(12).reshape(4, 3), dims=['x', 'y'], - ... coords={'x': [0, 0.1, 1.1, 1.2]}) + >>> da = xr.DataArray( + ... np.arange(12).reshape(4, 3), + ... dims=["x", "y"], + ... coords={"x": [0, 0.1, 1.1, 1.2]}, + ... ) >>> da array([[ 0, 1, 2], @@ -3164,7 +3177,7 @@ def integrate( * x (x) float64 0.0 0.1 1.1 1.2 Dimensions without coordinates: y >>> - >>> da.integrate('x') + >>> da.integrate("x") array([5.4, 6.6, 7.8]) Dimensions without coordinates: y diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 880c574c9cb..d5ad1123a54 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1010,8 +1010,9 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": Shallow copy versus deep copy >>> da = xr.DataArray(np.random.randn(2, 3)) - >>> ds = xr.Dataset({'foo': da, 'bar': ('x', [-1, 2])}, - ... coords={'x': ['one', 'two']}) + >>> ds = xr.Dataset( + ... {"foo": da, "bar": ("x", [-1, 2])}, coords={"x": ["one", "two"]}, + ... ) >>> ds.copy() Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -1023,7 +1024,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": bar (x) int64 -1 2 >>> ds_0 = ds.copy(deep=False) - >>> ds_0['foo'][0, 0] = 7 + >>> ds_0["foo"][0, 0] = 7 >>> ds_0 Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -1048,7 +1049,9 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": structure of the original object, but with the new data. Original object is unaffected. - >>> ds.copy(data={'foo': np.arange(6).reshape(2, 3), 'bar': ['a', 'b']}) + >>> ds.copy( + ... data={"foo": np.arange(6).reshape(2, 3), "bar": ["a", "b"]} + ... ) Dimensions: (dim_0: 2, dim_1: 3, x: 2) Coordinates: @@ -2358,9 +2361,10 @@ def reindex( >>> x = xr.Dataset( ... { ... "temperature": ("station", 20 * np.random.rand(4)), - ... "pressure": ("station", 500 * np.random.rand(4)) + ... "pressure": ("station", 500 * np.random.rand(4)), ... }, - ... coords={"station": ["boston", "nyc", "seattle", "denver"]}) + ... coords={"station": ["boston", "nyc", "seattle", "denver"]}, + ... ) >>> x Dimensions: (station: 4) @@ -2375,8 +2379,8 @@ def reindex( Create a new index and reindex the dataset. By default values in the new index that do not have corresponding records in the dataset are assigned `NaN`. - >>> new_index = ['boston', 'austin', 'seattle', 'lincoln'] - >>> x.reindex({'station': new_index}) + >>> new_index = ["boston", "austin", "seattle", "lincoln"] + >>> x.reindex({"station": new_index}) Dimensions: (station: 4) Coordinates: @@ -2387,7 +2391,7 @@ def reindex( We can fill in the missing values by passing a value to the keyword `fill_value`. - >>> x.reindex({'station': new_index}, fill_value=0) + >>> x.reindex({"station": new_index}, fill_value=0) Dimensions: (station: 4) Coordinates: @@ -2399,7 +2403,7 @@ def reindex( Because the index is not monotonically increasing or decreasing, we cannot use arguments to the keyword method to fill the `NaN` values. - >>> x.reindex({'station': new_index}, method='nearest') + >>> x.reindex({"station": new_index}, method="nearest") Traceback (most recent call last): ... raise ValueError('index must be monotonic increasing or decreasing') @@ -2410,10 +2414,14 @@ def reindex( >>> x2 = xr.Dataset( ... { - ... "temperature": ("time", [15.57, 12.77, np.nan, 0.3081, 16.59, 15.12]), - ... "pressure": ("time", 500 * np.random.rand(6)) + ... "temperature": ( + ... "time", + ... [15.57, 12.77, np.nan, 0.3081, 16.59, 15.12], + ... ), + ... "pressure": ("time", 500 * np.random.rand(6)), ... }, - ... coords={"time": pd.date_range('01/01/2019', periods=6, freq='D')}) + ... coords={"time": pd.date_range("01/01/2019", periods=6, freq="D")}, + ... ) >>> x2 Dimensions: (time: 6) @@ -2425,8 +2433,8 @@ def reindex( Suppose we decide to expand the dataset to cover a wider date range. - >>> time_index2 = pd.date_range('12/29/2018', periods=10, freq='D') - >>> x2.reindex({'time': time_index2}) + >>> time_index2 = pd.date_range("12/29/2018", periods=10, freq="D") + >>> x2.reindex({"time": time_index2}) Dimensions: (time: 10) Coordinates: @@ -2441,7 +2449,7 @@ def reindex( For example, to back-propagate the last valid value to fill the `NaN` values, pass `bfill` as an argument to the `method` keyword. - >>> x3 = x2.reindex({'time': time_index2}, method='bfill') + >>> x3 = x2.reindex({"time": time_index2}, method="bfill") >>> x3 Dimensions: (time: 10) @@ -2896,8 +2904,10 @@ def swap_dims( Examples -------- - >>> ds = xr.Dataset(data_vars={"a": ("x", [5, 7]), "b": ("x", [0.1, 2.4])}, - ... coords={"x": ["a", "b"], "y": ("x", [0, 1])}) + >>> ds = xr.Dataset( + ... data_vars={"a": ("x", [5, 7]), "b": ("x", [0.1, 2.4])}, + ... coords={"x": ["a", "b"], "y": ("x", [0, 1])}, + ... ) >>> ds Dimensions: (x: 2) @@ -3138,13 +3148,12 @@ def set_index( Examples -------- - >>> arr = xr.DataArray(data=np.ones((2, 3)), - ... dims=['x', 'y'], - ... coords={'x': - ... range(2), 'y': - ... range(3), 'a': ('x', [3, 4]) - ... }) - >>> ds = xr.Dataset({'v': arr}) + >>> arr = xr.DataArray( + ... data=np.ones((2, 3)), + ... dims=["x", "y"], + ... coords={"x": range(2), "y": range(3), "a": ("x", [3, 4])}, + ... ) + >>> ds = xr.Dataset({"v": arr}) >>> ds Dimensions: (x: 2, y: 3) @@ -3154,7 +3163,7 @@ def set_index( a (x) int64 3 4 Data variables: v (x, y) float64 1.0 1.0 1.0 1.0 1.0 1.0 - >>> ds.set_index(x='a') + >>> ds.set_index(x="a") Dimensions: (x: 2, y: 3) Coordinates: @@ -3358,9 +3367,11 @@ def to_stacked_array( Examples -------- >>> data = xr.Dataset( - ... data_vars={'a': (('x', 'y'), [[0, 1, 2], [3, 4, 5]]), - ... 'b': ('x', [6, 7])}, - ... coords={'y': ['u', 'v', 'w']} + ... data_vars={ + ... "a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]), + ... "b": ("x", [6, 7]), + ... }, + ... coords={"y": ["u", "v", "w"]}, ... ) >>> data @@ -3373,7 +3384,7 @@ def to_stacked_array( a (x, y) int64 0 1 2 3 4 5 b (x) int64 6 7 - >>> data.to_stacked_array("z", sample_dims=['x']) + >>> data.to_stacked_array("z", sample_dims=["x"]) array([[0, 1, 2, 6], [3, 4, 5, 7]]) @@ -3744,9 +3755,9 @@ def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs): Examples -------- >>> data = np.random.randn(2, 3) - >>> labels = ['a', 'b', 'c'] - >>> ds = xr.Dataset({'A': (['x', 'y'], data), 'y': labels}) - >>> ds.drop_sel(y=['a', 'c']) + >>> labels = ["a", "b", "c"] + >>> ds = xr.Dataset({"A": (["x", "y"], data), "y": labels}) + >>> ds.drop_sel(y=["a", "c"]) Dimensions: (x: 2, y: 1) Coordinates: @@ -3754,7 +3765,7 @@ def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs): Dimensions without coordinates: x Data variables: A (x, y) float64 -0.3454 0.1734 - >>> ds.drop_sel(y='b') + >>> ds.drop_sel(y="b") Dimensions: (x: 2, y: 2) Coordinates: @@ -3959,9 +3970,10 @@ def fillna(self, value: Any) -> "Dataset": ... "A": ("x", [np.nan, 2, np.nan, 0]), ... "B": ("x", [3, 4, np.nan, 1]), ... "C": ("x", [np.nan, np.nan, np.nan, 5]), - ... "D": ("x", [np.nan, 3, np.nan, 4]) + ... "D": ("x", [np.nan, 3, np.nan, 4]), ... }, - ... coords={"x": [0, 1, 2, 3]}) + ... coords={"x": [0, 1, 2, 3]}, + ... ) >>> ds Dimensions: (x: 4) @@ -3988,7 +4000,7 @@ def fillna(self, value: Any) -> "Dataset": Replace all `NaN` elements in column ‘A’, ‘B’, ‘C’, and ‘D’, with 0, 1, 2, and 3 respectively. - >>> values = {'A': 0, 'B': 1, 'C': 2, 'D': 3} + >>> values = {"A": 0, "B": 1, "C": 2, "D": 3} >>> ds.fillna(value=values) Dimensions: (x: 4) @@ -4295,7 +4307,7 @@ def map( Examples -------- >>> da = xr.DataArray(np.random.randn(2, 3)) - >>> ds = xr.Dataset({'foo': da, 'bar': ('x', [-1, 2])}) + >>> ds = xr.Dataset({"foo": da, "bar": ("x", [-1, 2])}) >>> ds Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -4382,7 +4394,10 @@ def assign( >>> import xarray as xr >>> x = xr.Dataset( ... { - ... "temperature_c": (("lat", "lon"), 20 * np.random.rand(4).reshape(2, 2)), + ... "temperature_c": ( + ... ("lat", "lon"), + ... 20 * np.random.rand(4).reshape(2, 2), + ... ), ... "precipitation": (("lat", "lon"), np.random.rand(4).reshape(2, 2)), ... }, ... coords={"lat": [10, 20], "lon": [150, 160]}, @@ -4399,7 +4414,7 @@ def assign( Where the value is a callable, evaluated on dataset: - >>> x.assign(temperature_f = lambda x: x.temperature_c * 9 / 5 + 32) + >>> x.assign(temperature_f=lambda x: x.temperature_c * 9 / 5 + 32) Dimensions: (lat: 2, lon: 2) Coordinates: @@ -4902,15 +4917,15 @@ def diff(self, dim, n=1, label="upper"): Examples -------- - >>> ds = xr.Dataset({'foo': ('x', [5, 5, 6, 6])}) - >>> ds.diff('x') + >>> ds = xr.Dataset({"foo": ("x", [5, 5, 6, 6])}) + >>> ds.diff("x") Dimensions: (x: 3) Coordinates: * x (x) int64 1 2 3 Data variables: foo (x) int64 0 1 0 - >>> ds.diff('x', 2) + >>> ds.diff("x", 2) Dimensions: (x: 2) Coordinates: @@ -4994,7 +5009,7 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): Examples -------- - >>> ds = xr.Dataset({'foo': ('x', list('abcde'))}) + >>> ds = xr.Dataset({"foo": ("x", list("abcde"))}) >>> ds.shift(x=2) Dimensions: (x: 5) @@ -5053,7 +5068,7 @@ def roll(self, shifts=None, roll_coords=None, **shifts_kwargs): Examples -------- - >>> ds = xr.Dataset({'foo': ('x', list('abcde'))}) + >>> ds = xr.Dataset({"foo": ("x", list("abcde"))}) >>> ds.roll(x=2) Dimensions: (x: 5) @@ -5566,19 +5581,23 @@ def filter_by_attrs(self, **kwargs): >>> precip = 10 * np.random.rand(2, 2, 3) >>> lon = [[-99.83, -99.32], [-99.79, -99.23]] >>> lat = [[42.25, 42.21], [42.63, 42.59]] - >>> dims = ['x', 'y', 'time'] - >>> temp_attr = dict(standard_name='air_potential_temperature') - >>> precip_attr = dict(standard_name='convective_precipitation_flux') - >>> ds = xr.Dataset({ - ... 'temperature': (dims, temp, temp_attr), - ... 'precipitation': (dims, precip, precip_attr)}, - ... coords={ - ... 'lon': (['x', 'y'], lon), - ... 'lat': (['x', 'y'], lat), - ... 'time': pd.date_range('2014-09-06', periods=3), - ... 'reference_time': pd.Timestamp('2014-09-05')}) + >>> dims = ["x", "y", "time"] + >>> temp_attr = dict(standard_name="air_potential_temperature") + >>> precip_attr = dict(standard_name="convective_precipitation_flux") + >>> ds = xr.Dataset( + ... { + ... "temperature": (dims, temp, temp_attr), + ... "precipitation": (dims, precip, precip_attr), + ... }, + ... coords={ + ... "lon": (["x", "y"], lon), + ... "lat": (["x", "y"], lat), + ... "time": pd.date_range("2014-09-06", periods=3), + ... "reference_time": pd.Timestamp("2014-09-05"), + ... }, + ... ) >>> # Get variables matching a specific standard_name. - >>> ds.filter_by_attrs(standard_name='convective_precipitation_flux') + >>> ds.filter_by_attrs(standard_name="convective_precipitation_flux") Dimensions: (time: 3, x: 2, y: 2) Coordinates: diff --git a/xarray/core/extensions.py b/xarray/core/extensions.py index 79abbccea39..e81070d18fd 100644 --- a/xarray/core/extensions.py +++ b/xarray/core/extensions.py @@ -110,8 +110,9 @@ def plot(self): Back in an interactive IPython session: - >>> ds = xarray.Dataset({'longitude': np.linspace(0, 10), - ... 'latitude': np.linspace(0, 20)}) + >>> ds = xarray.Dataset( + ... {"longitude": np.linspace(0, 10), "latitude": np.linspace(0, 20)} + ... ) >>> ds.geo.center (5.0, 10.0) >>> ds.geo.plot() diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 10c7804d718..1d1b8d39a20 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -678,7 +678,7 @@ def merge( var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 - >>> xr.merge([x, y, z], compat='identical') + >>> xr.merge([x, y, z], compat="identical") Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: @@ -690,7 +690,7 @@ def merge( var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 - >>> xr.merge([x, y, z], compat='equals') + >>> xr.merge([x, y, z], compat="equals") Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: @@ -702,7 +702,7 @@ def merge( var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 - >>> xr.merge([x, y, z], compat='equals', fill_value=-999.) + >>> xr.merge([x, y, z], compat="equals", fill_value=-999.0) Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: @@ -714,7 +714,7 @@ def merge( var2 (lat, lon) float64 5.0 -999.0 6.0 -999.0 ... -999.0 7.0 -999.0 8.0 var3 (time, lon) float64 0.0 -999.0 3.0 4.0 -999.0 9.0 - >>> xr.merge([x, y, z], join='override') + >>> xr.merge([x, y, z], join="override") Dimensions: (lat: 2, lon: 2, time: 2) Coordinates: @@ -726,7 +726,7 @@ def merge( var2 (lat, lon) float64 5.0 6.0 7.0 8.0 var3 (time, lon) float64 0.0 3.0 4.0 9.0 - >>> xr.merge([x, y, z], join='inner') + >>> xr.merge([x, y, z], join="inner") Dimensions: (lat: 1, lon: 1, time: 2) Coordinates: @@ -738,7 +738,7 @@ def merge( var2 (lat, lon) float64 5.0 var3 (time, lon) float64 0.0 4.0 - >>> xr.merge([x, y, z], compat='identical', join='inner') + >>> xr.merge([x, y, z], compat="identical", join="inner") Dimensions: (lat: 1, lon: 1, time: 2) Coordinates: @@ -750,7 +750,7 @@ def merge( var2 (lat, lon) float64 5.0 var3 (time, lon) float64 0.0 4.0 - >>> xr.merge([x, y, z], compat='broadcast_equals', join='outer') + >>> xr.merge([x, y, z], compat="broadcast_equals", join="outer") Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: @@ -762,7 +762,7 @@ def merge( var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 - >>> xr.merge([x, y, z], join='exact') + >>> xr.merge([x, y, z], join="exact") Traceback (most recent call last): ... ValueError: indexes along dimension 'lat' are not equal diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index cf189e471cc..5dd8219ebca 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -165,7 +165,7 @@ def _rolling_window(a, window, axis=-1): Examples -------- - >>> x=np.arange(10).reshape((2,5)) + >>> x = np.arange(10).reshape((2, 5)) >>> np.rolling_window(x, 3, axis=-1) array([[[0, 1, 2], [1, 2, 3], [2, 3, 4]], [[5, 6, 7], [6, 7, 8], [7, 8, 9]]]) diff --git a/xarray/core/options.py b/xarray/core/options.py index 15d05159d6d..5d81ca40a6e 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -108,7 +108,7 @@ class set_options: You can use ``set_options`` either as a context manager: - >>> ds = xr.Dataset({'x': np.arange(1000)}) + >>> ds = xr.Dataset({"x": np.arange(1000)}) >>> with xr.set_options(display_width=40): ... print(ds) diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index facfa06b23c..8429d0f71ad 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -162,18 +162,19 @@ def map_blocks( ``xr.map_blocks()`` allows for parallel operations with knowledge of ``xarray``, its indices, and its methods like ``.groupby()``. - >>> def calculate_anomaly(da, groupby_type='time.month'): + >>> def calculate_anomaly(da, groupby_type="time.month"): ... # Necessary workaround to xarray's check with zero dimensions ... # https://github.com/pydata/xarray/issues/3575 ... if sum(da.shape) == 0: ... return da ... gb = da.groupby(groupby_type) - ... clim = gb.mean(dim='time') + ... clim = gb.mean(dim="time") ... return gb - clim - >>> time = xr.cftime_range('1990-01', '1992-01', freq='M') + >>> time = xr.cftime_range("1990-01", "1992-01", freq="M") >>> np.random.seed(123) - >>> array = xr.DataArray(np.random.rand(len(time)), - ... dims="time", coords=[time]).chunk() + >>> array = xr.DataArray( + ... np.random.rand(len(time)), dims="time", coords=[time] + ... ).chunk() >>> xr.map_blocks(calculate_anomaly, array).compute() array([ 0.12894847, 0.11323072, -0.0855964 , -0.09334032, 0.26848862, @@ -187,7 +188,9 @@ def map_blocks( Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments to the function being applied in ``xr.map_blocks()``: - >>> xr.map_blocks(calculate_anomaly, array, kwargs={'groupby_type': 'time.year'}) + >>> xr.map_blocks( + ... calculate_anomaly, array, kwargs={"groupby_type": "time.year"}, + ... ) array([ 0.15361741, -0.25671244, -0.31600032, 0.008463 , 0.1766172 , -0.11974531, 0.43791243, 0.14197797, -0.06191987, -0.15073425, diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 5f633abbde6..58f0b275b21 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -231,17 +231,17 @@ def construct(self, window_dim, stride=1, fill_value=dtypes.NA): Examples -------- - >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) + >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=("a", "b")) >>> rolling = da.rolling(b=3) - >>> rolling.construct('window_dim') + >>> rolling.construct("window_dim") array([[[np.nan, np.nan, 0], [np.nan, 0, 1], [0, 1, 2], [1, 2, 3]], [[np.nan, np.nan, 4], [np.nan, 4, 5], [4, 5, 6], [5, 6, 7]]]) Dimensions without coordinates: a, b, window_dim >>> rolling = da.rolling(b=3, center=True) - >>> rolling.construct('window_dim') + >>> rolling.construct("window_dim") array([[[np.nan, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, np.nan]], [[np.nan, 4, 5], [4, 5, 6], [5, 6, 7], [6, 7, np.nan]]]) @@ -279,9 +279,9 @@ def reduce(self, func, **kwargs): Examples -------- - >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) + >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=("a", "b")) >>> rolling = da.rolling(b=3) - >>> rolling.construct('window_dim') + >>> rolling.construct("window_dim") array([[[np.nan, np.nan, 0], [np.nan, 0, 1], [0, 1, 2], [1, 2, 3]], [[np.nan, np.nan, 4], [np.nan, 4, 5], [4, 5, 6], [5, 6, 7]]]) diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index ac6768e8a9c..6ef63e42291 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -94,8 +94,8 @@ def mean(self): Examples -------- - >>> da = xr.DataArray([1,1,2,2,2], dims='x') - >>> da.rolling_exp(x=2, window_type='span').mean() + >>> da = xr.DataArray([1, 1, 2, 2, 2], dims="x") + >>> da.rolling_exp(x=2, window_type="span").mean() array([1. , 1. , 1.692308, 1.9 , 1.966942]) Dimensions without coordinates: x diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 435edb6f014..01f816941b5 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -843,7 +843,7 @@ def copy(self, deep=True, data=None): Shallow copy versus deep copy - >>> var = xr.Variable(data=[1, 2, 3], dims='x') + >>> var = xr.Variable(data=[1, 2, 3], dims="x") >>> var.copy() array([1, 2, 3]) @@ -1844,13 +1844,13 @@ def rolling_window( Examples -------- - >>> v=Variable(('a', 'b'), np.arange(8).reshape((2,4))) - >>> v.rolling_window(x, 'b', 3, 'window_dim') + >>> v = Variable(("a", "b"), np.arange(8).reshape((2, 4))) + >>> v.rolling_window(x, "b", 3, "window_dim") array([[[nan, nan, 0], [nan, 0, 1], [0, 1, 2], [1, 2, 3]], [[nan, nan, 4], [nan, 4, 5], [4, 5, 6], [5, 6, 7]]]) - >>> v.rolling_window(x, 'b', 3, 'window_dim', center=True) + >>> v.rolling_window(x, "b", 3, "window_dim", center=True) array([[[nan, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, nan]], [[nan, 4, 5], [4, 5, 6], [5, 6, 7], [6, 7, nan]]]) From 65a5bff79479c4b56d6f733236fe544b7f4120a8 Mon Sep 17 00:00:00 2001 From: Eric Jansen Date: Tue, 17 Mar 2020 17:34:36 +0100 Subject: [PATCH 43/75] Fix recombination in groupby when changing size along the grouped dimension (#3807) * Fix recombination in groupby when changing size along the grouped dimension * cleanup tests * minor test rename * minor fix Co-authored-by: dcherian Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 6 ++++-- xarray/core/groupby.py | 8 +++++--- xarray/tests/test_groupby.py | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9b78d046148..aad0e083a8c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -57,8 +57,10 @@ Bug fixes - Fix :py:meth:`Dataset.interp` when indexing array shares coordinates with the indexed variable (:issue:`3252`). By `David Huard `_. - - +- Fix recombination of groups in :py:meth:`Dataset.groupby` and + :py:meth:`DataArray.groupby` when performing an operation that changes the + size of the groups along the grouped dimension. By `Eric Jansen + `_. - Fix use of multi-index with categorical values (:issue:`3674`). By `Matthieu Ancellin `_. - Fix alignment with ``join="override"`` when some dimensions are unindexed. (:issue:`3681`). diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 4223d9dc255..67e8f0588b3 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -720,7 +720,7 @@ def assign_coords(self, coords=None, **coords_kwargs): def _maybe_reorder(xarray_obj, dim, positions): order = _inverse_permutation_indices(positions) - if order is None: + if order is None or len(order) != xarray_obj.sizes[dim]: return xarray_obj else: return xarray_obj[{dim: order}] @@ -838,7 +838,8 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False): if isinstance(combined, type(self._obj)): # only restore dimension order for arrays combined = self._restore_dim_order(combined) - if coord is not None: + # assign coord when the applied function does not return that coord + if coord is not None and dim not in applied_example.dims: if shortcut: coord_var = as_variable(coord) combined._coords[coord.name] = coord_var @@ -954,7 +955,8 @@ def _combine(self, applied): coord, dim, positions = self._infer_concat_args(applied_example) combined = concat(applied, dim) combined = _maybe_reorder(combined, dim, positions) - if coord is not None: + # assign coord when the applied function does not return that coord + if coord is not None and dim not in applied_example.dims: combined[coord.name] = coord combined = self._maybe_restore_empty_groups(combined) combined = self._maybe_unstack(combined) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 77558e741be..8ab4b7b2f80 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -107,6 +107,39 @@ def test_groupby_input_mutation(): assert_identical(array, array_copy) # should not modify inputs +@pytest.mark.parametrize( + "obj", + [ + xr.DataArray([1, 2, 3, 4, 5, 6], [("x", [1, 1, 1, 2, 2, 2])]), + xr.Dataset({"foo": ("x", [1, 2, 3, 4, 5, 6])}, {"x": [1, 1, 1, 2, 2, 2]}), + ], +) +def test_groupby_map_shrink_groups(obj): + expected = obj.isel(x=[0, 1, 3, 4]) + actual = obj.groupby("x").map(lambda f: f.isel(x=[0, 1])) + assert_identical(expected, actual) + + +@pytest.mark.parametrize( + "obj", + [ + xr.DataArray([1, 2, 3], [("x", [1, 2, 2])]), + xr.Dataset({"foo": ("x", [1, 2, 3])}, {"x": [1, 2, 2]}), + ], +) +def test_groupby_map_change_group_size(obj): + def func(group): + if group.sizes["x"] == 1: + result = group.isel(x=[0, 0]) + else: + result = group.isel(x=[0]) + return result + + expected = obj.isel(x=[0, 0, 1]) + actual = obj.groupby("x").map(func) + assert_identical(expected, actual) + + def test_da_groupby_map_func_args(): def func(arg1, arg2, arg3=0): return arg1 + arg2 + arg3 From df614b96082b38966a329b115082cd8dddf9fb29 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Thu, 19 Mar 2020 15:29:41 +0100 Subject: [PATCH 44/75] Feature/weighted (#2922) * weighted for DataArray * remove some commented code * pep8 and faulty import tests * add weighted sum, replace 0s in sum_of_wgt * weighted: overhaul tests * weighted: pep8 * weighted: pep8 lines * weighted update docs * weighted: fix typo * weighted: pep8 * undo changes to avoid merge conflict * add weighted to dataarray again * remove super * overhaul core/weighted.py * add DatasetWeighted class * _maybe_get_all_dims return sorted tuple * work on: test_weighted * black and flake8 * Apply suggestions from code review (docs) * restructure interim * restructure classes * update weighted.py * black * use map; add keep_attrs * implement expected_weighted; update tests * add whats new * undo changes to whats-new * F811: noqa where? * api.rst * add to computation * small updates * add example to gallery * typo * another typo * correct docstring in core/common.py * typos * adjust review * clean tests * add test nonequal coords * comment on use of dot * fix erroneous merge * update tests * move example to notebook * move whats-new entry to 15.1 * some doc updates * dot to own function * simplify some tests * Doc updates * very minor changes. * fix & add references * doc: return 0/NaN on 0 weights * Update xarray/core/common.py Co-authored-by: dcherian Co-authored-by: Deepak Cherian --- doc/api.rst | 18 ++ doc/computation.rst | 86 ++++- doc/examples.rst | 1 + doc/examples/area_weighted_temperature.ipynb | 226 ++++++++++++++ doc/whats-new.rst | 3 + xarray/core/common.py | 19 ++ xarray/core/dataarray.py | 2 + xarray/core/dataset.py | 2 + xarray/core/weighted.py | 255 +++++++++++++++ xarray/tests/test_weighted.py | 311 +++++++++++++++++++ 10 files changed, 922 insertions(+), 1 deletion(-) create mode 100644 doc/examples/area_weighted_temperature.ipynb create mode 100644 xarray/core/weighted.py create mode 100644 xarray/tests/test_weighted.py diff --git a/doc/api.rst b/doc/api.rst index 4492d882355..43a9cf53ead 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -165,6 +165,7 @@ Computation Dataset.groupby_bins Dataset.rolling Dataset.rolling_exp + Dataset.weighted Dataset.coarsen Dataset.resample Dataset.diff @@ -340,6 +341,7 @@ Computation DataArray.groupby_bins DataArray.rolling DataArray.rolling_exp + DataArray.weighted DataArray.coarsen DataArray.dt DataArray.resample @@ -577,6 +579,22 @@ Rolling objects core.rolling.DatasetRolling.reduce core.rolling_exp.RollingExp +Weighted objects +================ + +.. autosummary:: + :toctree: generated/ + + core.weighted.DataArrayWeighted + core.weighted.DataArrayWeighted.mean + core.weighted.DataArrayWeighted.sum + core.weighted.DataArrayWeighted.sum_of_weights + core.weighted.DatasetWeighted + core.weighted.DatasetWeighted.mean + core.weighted.DatasetWeighted.sum + core.weighted.DatasetWeighted.sum_of_weights + + Coarsen objects =============== diff --git a/doc/computation.rst b/doc/computation.rst index 1ac30f55ee7..5309f27e9b6 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -1,3 +1,5 @@ +.. currentmodule:: xarray + .. _comput: ########### @@ -241,12 +243,94 @@ You can also use ``construct`` to compute a weighted rolling sum: To avoid this, use ``skipna=False`` as the above example. +.. _comput.weighted: + +Weighted array reductions +========================= + +:py:class:`DataArray` and :py:class:`Dataset` objects include :py:meth:`DataArray.weighted` +and :py:meth:`Dataset.weighted` array reduction methods. They currently +support weighted ``sum`` and weighted ``mean``. + +.. ipython:: python + + coords = dict(month=('month', [1, 2, 3])) + + prec = xr.DataArray([1.1, 1.0, 0.9], dims=('month', ), coords=coords) + weights = xr.DataArray([31, 28, 31], dims=('month', ), coords=coords) + +Create a weighted object: + +.. ipython:: python + + weighted_prec = prec.weighted(weights) + weighted_prec + +Calculate the weighted sum: + +.. ipython:: python + + weighted_prec.sum() + +Calculate the weighted mean: + +.. ipython:: python + + weighted_prec.mean(dim="month") + +The weighted sum corresponds to: + +.. ipython:: python + + weighted_sum = (prec * weights).sum() + weighted_sum + +and the weighted mean to: + +.. ipython:: python + + weighted_mean = weighted_sum / weights.sum() + weighted_mean + +However, the functions also take missing values in the data into account: + +.. ipython:: python + + data = xr.DataArray([np.NaN, 2, 4]) + weights = xr.DataArray([8, 1, 1]) + + data.weighted(weights).mean() + +Using ``(data * weights).sum() / weights.sum()`` would (incorrectly) result +in 0.6. + + +If the weights add up to to 0, ``sum`` returns 0: + +.. ipython:: python + + data = xr.DataArray([1.0, 1.0]) + weights = xr.DataArray([-1.0, 1.0]) + + data.weighted(weights).sum() + +and ``mean`` returns ``NaN``: + +.. ipython:: python + + data.weighted(weights).mean() + + +.. note:: + ``weights`` must be a :py:class:`DataArray` and cannot contain missing values. + Missing values can be replaced manually by ``weights.fillna(0)``. + .. _comput.coarsen: Coarsen large arrays ==================== -``DataArray`` and ``Dataset`` objects include a +:py:class:`DataArray` and :py:class:`Dataset` objects include a :py:meth:`~xarray.DataArray.coarsen` and :py:meth:`~xarray.Dataset.coarsen` methods. This supports the block aggregation along multiple dimensions, diff --git a/doc/examples.rst b/doc/examples.rst index 805395808e0..1d48d29bcc5 100644 --- a/doc/examples.rst +++ b/doc/examples.rst @@ -6,6 +6,7 @@ Examples examples/weather-data examples/monthly-means + examples/area_weighted_temperature examples/multidimensional-coords examples/visualization_gallery examples/ROMS_ocean_model diff --git a/doc/examples/area_weighted_temperature.ipynb b/doc/examples/area_weighted_temperature.ipynb new file mode 100644 index 00000000000..72876e3fc29 --- /dev/null +++ b/doc/examples/area_weighted_temperature.ipynb @@ -0,0 +1,226 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "toc": true + }, + "source": [ + "

Table of Contents

\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Compare weighted and unweighted mean temperature\n", + "\n", + "\n", + "Author: [Mathias Hauser](https://github.com/mathause/)\n", + "\n", + "\n", + "We use the `air_temperature` example dataset to calculate the area-weighted temperature over its domain. This dataset has a regular latitude/ longitude grid, thus the gridcell area decreases towards the pole. For this grid we can use the cosine of the latitude as proxy for the grid cell area.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-03-17T14:43:57.222351Z", + "start_time": "2020-03-17T14:43:56.147541Z" + } + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "\n", + "import cartopy.crs as ccrs\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data\n", + "\n", + "Load the data, convert to celsius, and resample to daily values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-03-17T14:43:57.831734Z", + "start_time": "2020-03-17T14:43:57.651845Z" + } + }, + "outputs": [], + "source": [ + "ds = xr.tutorial.load_dataset(\"air_temperature\")\n", + "\n", + "# to celsius\n", + "air = ds.air - 273.15\n", + "\n", + "# resample from 6-hourly to daily values\n", + "air = air.resample(time=\"D\").mean()\n", + "\n", + "air" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot the first timestep:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-03-17T14:43:59.887120Z", + "start_time": "2020-03-17T14:43:59.582894Z" + } + }, + "outputs": [], + "source": [ + "projection = ccrs.LambertConformal(central_longitude=-95, central_latitude=45)\n", + "\n", + "f, ax = plt.subplots(subplot_kw=dict(projection=projection))\n", + "\n", + "air.isel(time=0).plot(transform=ccrs.PlateCarree(), cbar_kwargs=dict(shrink=0.7))\n", + "ax.coastlines()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating weights\n", + "\n", + "For a for a rectangular grid the cosine of the latitude is proportional to the grid cell area." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-03-17T14:44:18.777092Z", + "start_time": "2020-03-17T14:44:18.736587Z" + } + }, + "outputs": [], + "source": [ + "weights = np.cos(np.deg2rad(air.lat))\n", + "weights.name = \"weights\"\n", + "weights" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Weighted mean" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-03-17T14:44:52.607120Z", + "start_time": "2020-03-17T14:44:52.564674Z" + } + }, + "outputs": [], + "source": [ + "air_weighted = air.weighted(weights)\n", + "air_weighted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-03-17T14:44:54.334279Z", + "start_time": "2020-03-17T14:44:54.280022Z" + } + }, + "outputs": [], + "source": [ + "weighted_mean = air_weighted.mean((\"lon\", \"lat\"))\n", + "weighted_mean" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plot: comparison with unweighted mean\n", + "\n", + "Note how the weighted mean temperature is higher than the unweighted." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-03-17T14:45:08.877307Z", + "start_time": "2020-03-17T14:45:08.673383Z" + } + }, + "outputs": [], + "source": [ + "weighted_mean.plot(label=\"weighted\")\n", + "air.mean((\"lon\", \"lat\")).plot(label=\"unweighted\")\n", + "\n", + "plt.legend()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": true, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/whats-new.rst b/doc/whats-new.rst index aad0e083a8c..5640e872bea 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,9 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Weighted array reductions are now supported via the new :py:meth:`DataArray.weighted` + and :py:meth:`Dataset.weighted` methods. See :ref:`comput.weighted`. (:issue:`422`, :pull:`2922`). + By `Mathias Hauser `_ - Added support for :py:class:`pandas.DatetimeIndex`-style rounding of ``cftime.datetime`` objects directly via a :py:class:`CFTimeIndex` or via the :py:class:`~core.accessor_dt.DatetimeAccessor`. diff --git a/xarray/core/common.py b/xarray/core/common.py index 39aa7982091..a003642076f 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -745,6 +745,25 @@ def groupby_bins( }, ) + def weighted(self, weights): + """ + Weighted operations. + + Parameters + ---------- + weights : DataArray + An array of weights associated with the values in this Dataset. + Each value in the data contributes to the reduction operation + according to its associated weight. + + Notes + ----- + ``weights`` must be a DataArray and cannot contain missing values. + Missing values can be replaced by ``weights.fillna(0)``. + """ + + return self._weighted_cls(self, weights) + def rolling( self, dim: Mapping[Hashable, int] = None, diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b335eeb293b..4b3ecb2744c 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -33,6 +33,7 @@ resample, rolling, utils, + weighted, ) from .accessor_dt import CombinedDatetimelikeAccessor from .accessor_str import StringAccessor @@ -258,6 +259,7 @@ class DataArray(AbstractArray, DataWithCoords): _rolling_cls = rolling.DataArrayRolling _coarsen_cls = rolling.DataArrayCoarsen _resample_cls = resample.DataArrayResample + _weighted_cls = weighted.DataArrayWeighted dt = property(CombinedDatetimelikeAccessor) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d5ad1123a54..c10447f6d11 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -46,6 +46,7 @@ resample, rolling, utils, + weighted, ) from .alignment import _broadcast_helper, _get_broadcast_dims_map_common_coords, align from .common import ( @@ -457,6 +458,7 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords): _rolling_cls = rolling.DatasetRolling _coarsen_cls = rolling.DatasetCoarsen _resample_cls = resample.DatasetResample + _weighted_cls = weighted.DatasetWeighted def __init__( self, diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py new file mode 100644 index 00000000000..996d2e4c43e --- /dev/null +++ b/xarray/core/weighted.py @@ -0,0 +1,255 @@ +from typing import TYPE_CHECKING, Hashable, Iterable, Optional, Union, overload + +from .computation import dot +from .options import _get_keep_attrs + +if TYPE_CHECKING: + from .dataarray import DataArray, Dataset + +_WEIGHTED_REDUCE_DOCSTRING_TEMPLATE = """ + Reduce this {cls}'s data by a weighted ``{fcn}`` along some dimension(s). + + Parameters + ---------- + dim : str or sequence of str, optional + Dimension(s) over which to apply the weighted ``{fcn}``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, the attributes (``attrs``) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + + Returns + ------- + reduced : {cls} + New {cls} object with weighted ``{fcn}`` applied to its data and + the indicated dimension(s) removed. + + Notes + ----- + Returns {on_zero} if the ``weights`` sum to 0.0 along the reduced + dimension(s). + """ + +_SUM_OF_WEIGHTS_DOCSTRING = """ + Calculate the sum of weights, accounting for missing values in the data + + Parameters + ---------- + dim : str or sequence of str, optional + Dimension(s) over which to sum the weights. + keep_attrs : bool, optional + If True, the attributes (``attrs``) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + + Returns + ------- + reduced : {cls} + New {cls} object with the sum of the weights over the given dimension. + """ + + +class Weighted: + """An object that implements weighted operations. + + You should create a Weighted object by using the ``DataArray.weighted`` or + ``Dataset.weighted`` methods. + + See Also + -------- + Dataset.weighted + DataArray.weighted + """ + + __slots__ = ("obj", "weights") + + @overload + def __init__(self, obj: "DataArray", weights: "DataArray") -> None: + ... + + @overload # noqa: F811 + def __init__(self, obj: "Dataset", weights: "DataArray") -> None: # noqa: F811 + ... + + def __init__(self, obj, weights): # noqa: F811 + """ + Create a Weighted object + + Parameters + ---------- + obj : DataArray or Dataset + Object over which the weighted reduction operation is applied. + weights : DataArray + An array of weights associated with the values in the obj. + Each value in the obj contributes to the reduction operation + according to its associated weight. + + Notes + ----- + ``weights`` must be a ``DataArray`` and cannot contain missing values. + Missing values can be replaced by ``weights.fillna(0)``. + """ + + from .dataarray import DataArray + + if not isinstance(weights, DataArray): + raise ValueError("`weights` must be a DataArray") + + if weights.isnull().any(): + raise ValueError( + "`weights` cannot contain missing values. " + "Missing values can be replaced by `weights.fillna(0)`." + ) + + self.obj = obj + self.weights = weights + + @staticmethod + def _reduce( + da: "DataArray", + weights: "DataArray", + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + ) -> "DataArray": + """reduce using dot; equivalent to (da * weights).sum(dim, skipna) + + for internal use only + """ + + # need to infer dims as we use `dot` + if dim is None: + dim = ... + + # need to mask invalid values in da, as `dot` does not implement skipna + if skipna or (skipna is None and da.dtype.kind in "cfO"): + da = da.fillna(0.0) + + # `dot` does not broadcast arrays, so this avoids creating a large + # DataArray (if `weights` has additional dimensions) + # maybe add fasttrack (`(da * weights).sum(dims=dim, skipna=skipna)`) + return dot(da, weights, dims=dim) + + def _sum_of_weights( + self, da: "DataArray", dim: Optional[Union[Hashable, Iterable[Hashable]]] = None + ) -> "DataArray": + """ Calculate the sum of weights, accounting for missing values """ + + # we need to mask data values that are nan; else the weights are wrong + mask = da.notnull() + + sum_of_weights = self._reduce(mask, self.weights, dim=dim, skipna=False) + + # 0-weights are not valid + valid_weights = sum_of_weights != 0.0 + + return sum_of_weights.where(valid_weights) + + def _weighted_sum( + self, + da: "DataArray", + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + ) -> "DataArray": + """Reduce a DataArray by a by a weighted ``sum`` along some dimension(s).""" + + return self._reduce(da, self.weights, dim=dim, skipna=skipna) + + def _weighted_mean( + self, + da: "DataArray", + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + ) -> "DataArray": + """Reduce a DataArray by a weighted ``mean`` along some dimension(s).""" + + weighted_sum = self._weighted_sum(da, dim=dim, skipna=skipna) + + sum_of_weights = self._sum_of_weights(da, dim=dim) + + return weighted_sum / sum_of_weights + + def _implementation(self, func, dim, **kwargs): + + raise NotImplementedError("Use `Dataset.weighted` or `DataArray.weighted`") + + def sum_of_weights( + self, + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + keep_attrs: Optional[bool] = None, + ) -> Union["DataArray", "Dataset"]: + + return self._implementation( + self._sum_of_weights, dim=dim, keep_attrs=keep_attrs + ) + + def sum( + self, + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + ) -> Union["DataArray", "Dataset"]: + + return self._implementation( + self._weighted_sum, dim=dim, skipna=skipna, keep_attrs=keep_attrs + ) + + def mean( + self, + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + ) -> Union["DataArray", "Dataset"]: + + return self._implementation( + self._weighted_mean, dim=dim, skipna=skipna, keep_attrs=keep_attrs + ) + + def __repr__(self): + """provide a nice str repr of our Weighted object""" + + klass = self.__class__.__name__ + weight_dims = ", ".join(self.weights.dims) + return f"{klass} with weights along dimensions: {weight_dims}" + + +class DataArrayWeighted(Weighted): + def _implementation(self, func, dim, **kwargs): + + keep_attrs = kwargs.pop("keep_attrs") + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + + weighted = func(self.obj, dim=dim, **kwargs) + + if keep_attrs: + weighted.attrs = self.obj.attrs + + return weighted + + +class DatasetWeighted(Weighted): + def _implementation(self, func, dim, **kwargs) -> "Dataset": + + return self.obj.map(func, dim=dim, **kwargs) + + +def _inject_docstring(cls, cls_name): + + cls.sum_of_weights.__doc__ = _SUM_OF_WEIGHTS_DOCSTRING.format(cls=cls_name) + + cls.sum.__doc__ = _WEIGHTED_REDUCE_DOCSTRING_TEMPLATE.format( + cls=cls_name, fcn="sum", on_zero="0" + ) + + cls.mean.__doc__ = _WEIGHTED_REDUCE_DOCSTRING_TEMPLATE.format( + cls=cls_name, fcn="mean", on_zero="NaN" + ) + + +_inject_docstring(DataArrayWeighted, "DataArray") +_inject_docstring(DatasetWeighted, "Dataset") diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py new file mode 100644 index 00000000000..24531215dfb --- /dev/null +++ b/xarray/tests/test_weighted.py @@ -0,0 +1,311 @@ +import numpy as np +import pytest + +import xarray as xr +from xarray import DataArray +from xarray.tests import assert_allclose, assert_equal, raises_regex + + +@pytest.mark.parametrize("as_dataset", (True, False)) +def test_weighted_non_DataArray_weights(as_dataset): + + data = DataArray([1, 2]) + if as_dataset: + data = data.to_dataset(name="data") + + with raises_regex(ValueError, "`weights` must be a DataArray"): + data.weighted([1, 2]) + + +@pytest.mark.parametrize("as_dataset", (True, False)) +@pytest.mark.parametrize("weights", ([np.nan, 2], [np.nan, np.nan])) +def test_weighted_weights_nan_raises(as_dataset, weights): + + data = DataArray([1, 2]) + if as_dataset: + data = data.to_dataset(name="data") + + with pytest.raises(ValueError, match="`weights` cannot contain missing values."): + data.weighted(DataArray(weights)) + + +@pytest.mark.parametrize( + ("weights", "expected"), + (([1, 2], 3), ([2, 0], 2), ([0, 0], np.nan), ([-1, 1], np.nan)), +) +def test_weighted_sum_of_weights_no_nan(weights, expected): + + da = DataArray([1, 2]) + weights = DataArray(weights) + result = da.weighted(weights).sum_of_weights() + + expected = DataArray(expected) + + assert_equal(expected, result) + + +@pytest.mark.parametrize( + ("weights", "expected"), + (([1, 2], 2), ([2, 0], np.nan), ([0, 0], np.nan), ([-1, 1], 1)), +) +def test_weighted_sum_of_weights_nan(weights, expected): + + da = DataArray([np.nan, 2]) + weights = DataArray(weights) + result = da.weighted(weights).sum_of_weights() + + expected = DataArray(expected) + + assert_equal(expected, result) + + +@pytest.mark.parametrize("da", ([1.0, 2], [1, np.nan], [np.nan, np.nan])) +@pytest.mark.parametrize("factor", [0, 1, 3.14]) +@pytest.mark.parametrize("skipna", (True, False)) +def test_weighted_sum_equal_weights(da, factor, skipna): + # if all weights are 'f'; weighted sum is f times the ordinary sum + + da = DataArray(da) + weights = xr.full_like(da, factor) + + expected = da.sum(skipna=skipna) * factor + result = da.weighted(weights).sum(skipna=skipna) + + assert_equal(expected, result) + + +@pytest.mark.parametrize( + ("weights", "expected"), (([1, 2], 5), ([0, 2], 4), ([0, 0], 0)) +) +def test_weighted_sum_no_nan(weights, expected): + + da = DataArray([1, 2]) + + weights = DataArray(weights) + result = da.weighted(weights).sum() + expected = DataArray(expected) + + assert_equal(expected, result) + + +@pytest.mark.parametrize( + ("weights", "expected"), (([1, 2], 4), ([0, 2], 4), ([1, 0], 0), ([0, 0], 0)) +) +@pytest.mark.parametrize("skipna", (True, False)) +def test_weighted_sum_nan(weights, expected, skipna): + + da = DataArray([np.nan, 2]) + + weights = DataArray(weights) + result = da.weighted(weights).sum(skipna=skipna) + + if skipna: + expected = DataArray(expected) + else: + expected = DataArray(np.nan) + + assert_equal(expected, result) + + +@pytest.mark.filterwarnings("ignore:Mean of empty slice") +@pytest.mark.parametrize("da", ([1.0, 2], [1, np.nan], [np.nan, np.nan])) +@pytest.mark.parametrize("skipna", (True, False)) +@pytest.mark.parametrize("factor", [1, 2, 3.14]) +def test_weighted_mean_equal_weights(da, skipna, factor): + # if all weights are equal (!= 0), should yield the same result as mean + + da = DataArray(da) + + # all weights as 1. + weights = xr.full_like(da, factor) + + expected = da.mean(skipna=skipna) + result = da.weighted(weights).mean(skipna=skipna) + + assert_equal(expected, result) + + +@pytest.mark.parametrize( + ("weights", "expected"), (([4, 6], 1.6), ([1, 0], 1.0), ([0, 0], np.nan)) +) +def test_weighted_mean_no_nan(weights, expected): + + da = DataArray([1, 2]) + weights = DataArray(weights) + expected = DataArray(expected) + + result = da.weighted(weights).mean() + + assert_equal(expected, result) + + +@pytest.mark.parametrize( + ("weights", "expected"), (([4, 6], 2.0), ([1, 0], np.nan), ([0, 0], np.nan)) +) +@pytest.mark.parametrize("skipna", (True, False)) +def test_weighted_mean_nan(weights, expected, skipna): + + da = DataArray([np.nan, 2]) + weights = DataArray(weights) + + if skipna: + expected = DataArray(expected) + else: + expected = DataArray(np.nan) + + result = da.weighted(weights).mean(skipna=skipna) + + assert_equal(expected, result) + + +def expected_weighted(da, weights, dim, skipna, operation): + """ + Generate expected result using ``*`` and ``sum``. This is checked against + the result of da.weighted which uses ``dot`` + """ + + weighted_sum = (da * weights).sum(dim=dim, skipna=skipna) + + if operation == "sum": + return weighted_sum + + masked_weights = weights.where(da.notnull()) + sum_of_weights = masked_weights.sum(dim=dim, skipna=True) + valid_weights = sum_of_weights != 0 + sum_of_weights = sum_of_weights.where(valid_weights) + + if operation == "sum_of_weights": + return sum_of_weights + + weighted_mean = weighted_sum / sum_of_weights + + if operation == "mean": + return weighted_mean + + +@pytest.mark.parametrize("dim", ("a", "b", "c", ("a", "b"), ("a", "b", "c"), None)) +@pytest.mark.parametrize("operation", ("sum_of_weights", "sum", "mean")) +@pytest.mark.parametrize("add_nans", (True, False)) +@pytest.mark.parametrize("skipna", (None, True, False)) +@pytest.mark.parametrize("as_dataset", (True, False)) +def test_weighted_operations_3D(dim, operation, add_nans, skipna, as_dataset): + + dims = ("a", "b", "c") + coords = dict(a=[0, 1, 2, 3], b=[0, 1, 2, 3], c=[0, 1, 2, 3]) + + weights = DataArray(np.random.randn(4, 4, 4), dims=dims, coords=coords) + + data = np.random.randn(4, 4, 4) + + # add approximately 25 % NaNs (https://stackoverflow.com/a/32182680/3010700) + if add_nans: + c = int(data.size * 0.25) + data.ravel()[np.random.choice(data.size, c, replace=False)] = np.NaN + + data = DataArray(data, dims=dims, coords=coords) + + if as_dataset: + data = data.to_dataset(name="data") + + if operation == "sum_of_weights": + result = data.weighted(weights).sum_of_weights(dim) + else: + result = getattr(data.weighted(weights), operation)(dim, skipna=skipna) + + expected = expected_weighted(data, weights, dim, skipna, operation) + + assert_allclose(expected, result) + + +@pytest.mark.parametrize("operation", ("sum_of_weights", "sum", "mean")) +@pytest.mark.parametrize("as_dataset", (True, False)) +def test_weighted_operations_nonequal_coords(operation, as_dataset): + + weights = DataArray(np.random.randn(4), dims=("a",), coords=dict(a=[0, 1, 2, 3])) + data = DataArray(np.random.randn(4), dims=("a",), coords=dict(a=[1, 2, 3, 4])) + + if as_dataset: + data = data.to_dataset(name="data") + + expected = expected_weighted( + data, weights, dim="a", skipna=None, operation=operation + ) + result = getattr(data.weighted(weights), operation)(dim="a") + + assert_allclose(expected, result) + + +@pytest.mark.parametrize("dim", ("dim_0", None)) +@pytest.mark.parametrize("shape_data", ((4,), (4, 4), (4, 4, 4))) +@pytest.mark.parametrize("shape_weights", ((4,), (4, 4), (4, 4, 4))) +@pytest.mark.parametrize("operation", ("sum_of_weights", "sum", "mean")) +@pytest.mark.parametrize("add_nans", (True, False)) +@pytest.mark.parametrize("skipna", (None, True, False)) +@pytest.mark.parametrize("as_dataset", (True, False)) +def test_weighted_operations_different_shapes( + dim, shape_data, shape_weights, operation, add_nans, skipna, as_dataset +): + + weights = DataArray(np.random.randn(*shape_weights)) + + data = np.random.randn(*shape_data) + + # add approximately 25 % NaNs + if add_nans: + c = int(data.size * 0.25) + data.ravel()[np.random.choice(data.size, c, replace=False)] = np.NaN + + data = DataArray(data) + + if as_dataset: + data = data.to_dataset(name="data") + + if operation == "sum_of_weights": + result = getattr(data.weighted(weights), operation)(dim) + else: + result = getattr(data.weighted(weights), operation)(dim, skipna=skipna) + + expected = expected_weighted(data, weights, dim, skipna, operation) + + assert_allclose(expected, result) + + +@pytest.mark.parametrize("operation", ("sum_of_weights", "sum", "mean")) +@pytest.mark.parametrize("as_dataset", (True, False)) +@pytest.mark.parametrize("keep_attrs", (True, False, None)) +def test_weighted_operations_keep_attr(operation, as_dataset, keep_attrs): + + weights = DataArray(np.random.randn(2, 2), attrs=dict(attr="weights")) + data = DataArray(np.random.randn(2, 2)) + + if as_dataset: + data = data.to_dataset(name="data") + + data.attrs = dict(attr="weights") + + result = getattr(data.weighted(weights), operation)(keep_attrs=True) + + if operation == "sum_of_weights": + assert weights.attrs == result.attrs + else: + assert data.attrs == result.attrs + + result = getattr(data.weighted(weights), operation)(keep_attrs=None) + assert not result.attrs + + result = getattr(data.weighted(weights), operation)(keep_attrs=False) + assert not result.attrs + + +@pytest.mark.xfail(reason="xr.Dataset.map does not copy attrs of DataArrays GH: 3595") +@pytest.mark.parametrize("operation", ("sum", "mean")) +def test_weighted_operations_keep_attr_da_in_ds(operation): + # GH #3595 + + weights = DataArray(np.random.randn(2, 2)) + data = DataArray(np.random.randn(2, 2), attrs=dict(attr="data")) + data = data.to_dataset(name="a") + + result = getattr(data.weighted(weights), operation)(keep_attrs=True) + + assert data.a.attrs == result.a.attrs From beea37e90ac9d6410ae696dec4d6b052bdb05ba7 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 19 Mar 2020 08:32:39 -0600 Subject: [PATCH 45/75] Fix some warnings (#3864) * Fix some warnings * Update xarray/backends/api.py Co-Authored-By: keewis * fix test Co-authored-by: keewis --- xarray/backends/api.py | 2 +- xarray/tests/test_accessor_dt.py | 1 + xarray/tests/test_backends.py | 38 ++++++++++++++++------------- xarray/tests/test_concat.py | 3 +-- xarray/tests/test_dask.py | 1 + xarray/tests/test_dataarray.py | 4 +-- xarray/tests/test_dataset.py | 2 +- xarray/tests/test_duck_array_ops.py | 1 + xarray/tests/test_groupby.py | 3 ++- xarray/tests/test_plot.py | 1 + 10 files changed, 32 insertions(+), 24 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index e828faabc27..c7481e22b59 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1253,7 +1253,7 @@ def check_dtype(var): if ( not np.issubdtype(var.dtype, np.number) and not np.issubdtype(var.dtype, np.datetime64) - and not np.issubdtype(var.dtype, np.bool) + and not np.issubdtype(var.dtype, np.bool_) and not coding.strings.is_unicode_dtype(var.dtype) and not var.dtype == object ): diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 1a8a2732eeb..20a9283e32c 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -347,6 +347,7 @@ def test_field_access(data, field): @requires_cftime +@pytest.mark.filterwarnings("ignore::RuntimeWarning") def test_cftime_strftime_access(data): """ compare cftime formatting against datetime formatting """ date_format = "%Y%m%d%H" diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 59ed8e690cc..5f8ba83c330 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1921,33 +1921,36 @@ def test_to_zarr_append_compute_false_roundtrip(self): ds, ds_to_append, _ = create_append_test_data() ds, ds_to_append = ds.chunk(), ds_to_append.chunk() - with self.create_zarr_target() as store: - delayed_obj = self.save(ds, store, compute=False, mode="w") - assert isinstance(delayed_obj, Delayed) + with pytest.warns(SerializationWarning): + with self.create_zarr_target() as store: + delayed_obj = self.save(ds, store, compute=False, mode="w") + assert isinstance(delayed_obj, Delayed) + + with pytest.raises(AssertionError): + with self.open(store) as actual: + assert_identical(ds, actual) + + delayed_obj.compute() - with pytest.raises(AssertionError): with self.open(store) as actual: assert_identical(ds, actual) - delayed_obj.compute() + delayed_obj = self.save( + ds_to_append, store, compute=False, append_dim="time" + ) + assert isinstance(delayed_obj, Delayed) - with self.open(store) as actual: - assert_identical(ds, actual) + with pytest.raises(AssertionError): + with self.open(store) as actual: + assert_identical( + xr.concat([ds, ds_to_append], dim="time"), actual + ) - delayed_obj = self.save( - ds_to_append, store, compute=False, append_dim="time" - ) - assert isinstance(delayed_obj, Delayed) + delayed_obj.compute() - with pytest.raises(AssertionError): with self.open(store) as actual: assert_identical(xr.concat([ds, ds_to_append], dim="time"), actual) - delayed_obj.compute() - - with self.open(store) as actual: - assert_identical(xr.concat([ds, ds_to_append], dim="time"), actual) - def test_encoding_chunksizes(self): # regression test for GH2278 # see also test_encoding_chunksizes_unlimited @@ -3519,6 +3522,7 @@ def test_uamiv_format_mfread(self): ["example.uamiv", "example.uamiv"], engine="pseudonetcdf", concat_dim="TSTEP", + combine="nested", backend_kwargs={"format": "uamiv"}, ) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 77c030198ac..1a498496c03 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -40,8 +40,7 @@ def test_concat_compat(): assert_equal(ds2.no_x_y, result.no_x_y.transpose()) for var in ["has_x", "no_x_y"]: - assert "y" not in result[var] - + assert "y" not in result[var].dims and "y" not in result[var].coords with raises_regex(ValueError, "coordinates in some datasets but not others"): concat([ds1, ds2], dim="q") with raises_regex(ValueError, "'q' is not present in all datasets"): diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 8fb54c4ee84..4f7e3910f82 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1344,6 +1344,7 @@ def test_normalize_token_with_backend(map_ds): map_ds.to_netcdf(tmp_file) read = xr.open_dataset(tmp_file) assert not dask.base.tokenize(map_ds) == dask.base.tokenize(read) + read.close() @pytest.mark.parametrize( diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index dfaf8fd4e28..ef3da5a3b94 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2035,7 +2035,7 @@ def test_stack_unstack(self): codes=[[], []], names=["x", "y"], ) - pd.util.testing.assert_index_equal(a, b) + pd.testing.assert_index_equal(a, b) actual = orig.stack(z=["x", "y"]).unstack("z").drop_vars(["x", "y"]) assert_identical(orig, actual) @@ -3488,7 +3488,7 @@ def test_from_series_sparse(self): def test_to_and_from_empty_series(self): # GH697 - expected = pd.Series([]) + expected = pd.Series([], dtype=np.float64) da = DataArray.from_series(expected) assert len(da) == 0 actual = da.to_series() diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 6a6c496591a..c7f39108477 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6042,7 +6042,7 @@ def test_integrate(dask): actual = da.integrate("x") # coordinate that contains x should be dropped. expected_x = xr.DataArray( - np.trapz(da, da["x"], axis=0), + np.trapz(da.compute(), da["x"], axis=0), dims=["y"], coords={k: v for k, v in da.coords.items() if "x" not in v.dims}, ) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index f4f11473e48..157cd16cba6 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -279,6 +279,7 @@ def assert_dask_array(da, dask): @arm_xfail +@pytest.mark.filterwarnings("ignore::RuntimeWarning") @pytest.mark.parametrize("dask", [False, True] if has_dask else [False]) def test_datetime_mean(dask): # Note: only testing numpy, as dask is broken upstream diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 8ab4b7b2f80..866d5fb0899 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -447,7 +447,8 @@ def test_groupby_drops_nans(): # reduction operation along a different dimension actual = grouped.mean("time") - expected = ds.mean("time").where(ds.id.notnull()) + with pytest.warns(RuntimeWarning): # mean of empty slice + expected = ds.mean("time").where(ds.id.notnull()) assert_identical(actual, expected) # NaN in non-dimensional coordinate diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 9ffbcd9c85e..c1549c62038 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -1749,6 +1749,7 @@ def test_can_set_vmin_vmax(self): assert np.allclose(expected, clim) @pytest.mark.slow + @pytest.mark.filterwarnings("ignore") def test_can_set_norm(self): norm = mpl.colors.SymLogNorm(0.1) self.g.map_dataarray(xplt.imshow, "x", "y", norm=norm) From e7d6e12662ae113a57eaf38eb2a19ab9ff92b9a8 Mon Sep 17 00:00:00 2001 From: Mark Boer Date: Thu, 19 Mar 2020 15:41:49 +0100 Subject: [PATCH 46/75] Add DataArray.pad, Dataset.pad, Variable.pad (#3596) * add pad method to Variable and add corresponding test * move pad_with_fill value to dask_array_compat.py and make it default to dask.array.pad * add pad method to dataarray * add docstrings for variable.pad and dataarray.pad * add tests for DataArray.pad * improve pad method signature and support dictionaries as pad_options instead of list of tuples * fix linting errors and remove typo from tests * implement suggested changes: pad_width => padwidths, use pytest.mark.parametrize in test_variable.test_pad * move pad method to dataset * add helper function to variable.pad and fix some mypy errors * add some more tests for DataArray.pad and add docstrings to all pad methods * add workaround for dask.pad mode=mean that converts integers to floats, and add an additional check if the shape of output * disable linear_ramp test and add pad to whats-new.rst and api.rst * fix small mege issue in test_unit * fix DataArray.pad and Dataset.pad docstrings * implement suggested changes from code review: add option of integer pad_width, add a warning and exception to dask_array_compad.pad * apply isort and and set linear_ramp to xfail * Minor fixes. 1. Add warning category 2. Use variable for pad arguments when testing 3. Add example. * fix merge issue and make some minor changes as suggested in the code review * fix test_unit.test_pad_constant_values * Keewis review comments * Add experimental warning Co-authored-by: dcherian --- doc/api-hidden.rst | 2 - doc/api.rst | 2 + doc/whats-new.rst | 2 + xarray/core/dask_array_compat.py | 47 +++++++++ xarray/core/dataarray.py | 168 +++++++++++++++++++++++++++++++ xarray/core/dataset.py | 166 ++++++++++++++++++++++++++++++ xarray/core/duck_array_ops.py | 2 +- xarray/core/rolling.py | 2 +- xarray/core/variable.py | 161 ++++++++++++++++++----------- xarray/tests/test_dataarray.py | 107 ++++++++++++++++++++ xarray/tests/test_dataset.py | 13 +++ xarray/tests/test_sparse.py | 2 +- xarray/tests/test_units.py | 60 +++++------ xarray/tests/test_variable.py | 107 +++++++++++++++----- 14 files changed, 717 insertions(+), 124 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 437f53b1a91..cc9517a98ba 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -379,7 +379,6 @@ Variable.min Variable.no_conflicts Variable.notnull - Variable.pad_with_fill_value Variable.prod Variable.quantile Variable.rank @@ -453,7 +452,6 @@ IndexVariable.min IndexVariable.no_conflicts IndexVariable.notnull - IndexVariable.pad_with_fill_value IndexVariable.prod IndexVariable.quantile IndexVariable.rank diff --git a/doc/api.rst b/doc/api.rst index 43a9cf53ead..b9c3e3bdd33 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -221,6 +221,7 @@ Reshaping and reorganizing Dataset.to_stacked_array Dataset.shift Dataset.roll + Dataset.pad Dataset.sortby Dataset.broadcast_like @@ -401,6 +402,7 @@ Reshaping and reorganizing DataArray.to_unstacked_dataset DataArray.shift DataArray.roll + DataArray.pad DataArray.sortby DataArray.broadcast_like diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 5640e872bea..8140288f350 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -157,6 +157,8 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Implement :py:meth:`DataArray.pad` and :py:meth:`Dataset.pad`. (:issue:`2605`, :pull:`3596`). + By `Mark Boer `_. - :py:meth:`DataArray.sel` and :py:meth:`Dataset.sel` now support :py:class:`pandas.CategoricalIndex`. (:issue:`3669`) By `Keisuke Fujii `_. - Support using an existing, opened h5netcdf ``File`` with diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index 05f750a1355..94c50d90e84 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -1,3 +1,4 @@ +import warnings from distutils.version import LooseVersion from typing import Iterable @@ -99,6 +100,52 @@ def meta_from_array(x, ndim=None, dtype=None): return meta +def _validate_pad_output_shape(input_shape, pad_width, output_shape): + """ Validates the output shape of dask.array.pad, raising a RuntimeError if they do not match. + In the current versions of dask (2.2/2.4), dask.array.pad with mode='reflect' sometimes returns + an invalid shape. + """ + isint = lambda i: isinstance(i, int) + + if isint(pad_width): + pass + elif len(pad_width) == 2 and all(map(isint, pad_width)): + pad_width = sum(pad_width) + elif ( + len(pad_width) == len(input_shape) + and all(map(lambda x: len(x) == 2, pad_width)) + and all((isint(i) for p in pad_width for i in p)) + ): + pad_width = np.sum(pad_width, axis=1) + else: + # unreachable: dask.array.pad should already have thrown an error + raise ValueError("Invalid value for `pad_width`") + + if not np.array_equal(np.array(input_shape) + pad_width, output_shape): + raise RuntimeError( + "There seems to be something wrong with the shape of the output of dask.array.pad, " + "try upgrading Dask, use a different pad mode e.g. mode='constant' or first convert " + "your DataArray/Dataset to one backed by a numpy array by calling the `compute()` method." + "See: https://github.com/dask/dask/issues/5303" + ) + + +def pad(array, pad_width, mode="constant", **kwargs): + padded = da.pad(array, pad_width, mode=mode, **kwargs) + # workaround for inconsistency between numpy and dask: https://github.com/dask/dask/issues/5303 + if mode == "mean" and issubclass(array.dtype.type, np.integer): + warnings.warn( + 'dask.array.pad(mode="mean") converts integers to floats. xarray converts ' + "these floats back to integers to keep the interface consistent. There is a chance that " + "this introduces rounding errors. If you wish to keep the values as floats, first change " + "the dtype to a float before calling pad.", + UserWarning, + ) + return da.round(padded).astype(array.dtype) + _validate_pad_output_shape(array.shape, pad_width, padded.shape) + return padded + + if LooseVersion(dask_version) >= LooseVersion("2.8.1"): median = da.median else: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4b3ecb2744c..bd956553929 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3260,6 +3260,174 @@ def map_blocks( return map_blocks(func, self, args, kwargs) + def pad( + self, + pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, + mode: str = "constant", + stat_length: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + constant_values: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + end_values: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + reflect_type: str = None, + **pad_width_kwargs: Any, + ) -> "DataArray": + """Pad this array along one or more dimensions. + + .. warning:: + This function is experimental and its behaviour is likely to change + especially regarding padding of dimension coordinates (or IndexVariables). + + When using one of the modes ("edge", "reflect", "symmetric", "wrap"), + coordinates will be padded with the same mode, otherwise coordinates + are padded using the "constant" mode with fill_value dtypes.NA. + + Parameters + ---------- + pad_width : Mapping with the form of {dim: (pad_before, pad_after)} + Number of values padded along each dimension. + {dim: pad} is a shortcut for pad_before = pad_after = pad + mode : str + One of the following string values (taken from numpy docs) + + 'constant' (default) + Pads with a constant value. + 'edge' + Pads with the edge values of array. + 'linear_ramp' + Pads with the linear ramp between end_value and the + array edge value. + 'maximum' + Pads with the maximum value of all or part of the + vector along each axis. + 'mean' + Pads with the mean value of all or part of the + vector along each axis. + 'median' + Pads with the median value of all or part of the + vector along each axis. + 'minimum' + Pads with the minimum value of all or part of the + vector along each axis. + 'reflect' + Pads with the reflection of the vector mirrored on + the first and last values of the vector along each + axis. + 'symmetric' + Pads with the reflection of the vector mirrored + along the edge of the array. + 'wrap' + Pads with the wrap of the vector along the axis. + The first values are used to pad the end and the + end values are used to pad the beginning. + stat_length : int, tuple or mapping of the form {dim: tuple} + Used in 'maximum', 'mean', 'median', and 'minimum'. Number of + values at edge of each axis used to calculate the statistic value. + {dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)} unique + statistic lengths along each dimension. + ((before, after),) yields same before and after statistic lengths + for each dimension. + (stat_length,) or int is a shortcut for before = after = statistic + length for all axes. + Default is ``None``, to use the entire axis. + constant_values : scalar, tuple or mapping of the form {dim: tuple} + Used in 'constant'. The values to set the padded values for each + axis. + ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique + pad constants along each dimension. + ``((before, after),)`` yields same before and after constants for each + dimension. + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for + all dimensions. + Default is 0. + end_values : scalar, tuple or mapping of the form {dim: tuple} + Used in 'linear_ramp'. The values used for the ending value of the + linear_ramp and that will form the edge of the padded array. + ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique + end values along each dimension. + ``((before, after),)`` yields same before and after end values for each + axis. + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for + all axes. + Default is 0. + reflect_type : {'even', 'odd'}, optional + Used in 'reflect', and 'symmetric'. The 'even' style is the + default with an unaltered reflection around the edge value. For + the 'odd' style, the extended part of the array is created by + subtracting the reflected values from two times the edge value. + **pad_width_kwargs: + The keyword arguments form of ``pad_width``. + One of ``pad_width`` or ``pad_width_kwargs`` must be provided. + + Returns + ------- + padded : DataArray + DataArray with the padded coordinates and data. + + See also + -------- + DataArray.shift, DataArray.roll, DataArray.bfill, DataArray.ffill, numpy.pad, dask.array.pad + + Notes + ----- + By default when ``mode="constant"`` and ``constant_values=None``, integer types will be + promoted to ``float`` and padded with ``np.nan``. To avoid type promotion + specify ``constant_values=np.nan`` + + Examples + -------- + + >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0,1,2])]) + >>> arr.pad(x=(1,2), constant_values=0) + + array([0, 5, 6, 7, 0, 0]) + Coordinates: + * x (x) float64 nan 0.0 1.0 2.0 nan nan + + >>> da = xr.DataArray([[0,1,2,3], [10,11,12,13]], + dims=["x", "y"], + coords={"x": [0,1], "y": [10, 20 ,30, 40], "z": ("x", [100, 200])} + ) + >>> da.pad(x=1) + + array([[nan, nan, nan, nan], + [ 0., 1., 2., 3.], + [10., 11., 12., 13.], + [nan, nan, nan, nan]]) + Coordinates: + * x (x) float64 nan 0.0 1.0 nan + * y (y) int64 10 20 30 40 + z (x) float64 nan 100.0 200.0 nan + >>> da.pad(x=1, constant_values=np.nan) + + array([[-9223372036854775808, -9223372036854775808, -9223372036854775808, + -9223372036854775808], + [ 0, 1, 2, + 3], + [ 10, 11, 12, + 13], + [-9223372036854775808, -9223372036854775808, -9223372036854775808, + -9223372036854775808]]) + Coordinates: + * x (x) float64 nan 0.0 1.0 nan + * y (y) int64 10 20 30 40 + z (x) float64 nan 100.0 200.0 nan + """ + ds = self._to_temp_dataset().pad( + pad_width=pad_width, + mode=mode, + stat_length=stat_length, + constant_values=constant_values, + end_values=end_values, + reflect_type=reflect_type, + **pad_width_kwargs, + ) + return self._from_temp_dataset(ds) + # this needs to be at the end, or mypy will confuse with `str` # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names str = property(StringAccessor) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c10447f6d11..7c218e209cb 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5745,5 +5745,171 @@ def map_blocks( return map_blocks(func, self, args, kwargs) + def pad( + self, + pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, + mode: str = "constant", + stat_length: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + constant_values: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + end_values: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + reflect_type: str = None, + **pad_width_kwargs: Any, + ) -> "Dataset": + """Pad this dataset along one or more dimensions. + + .. warning:: + This function is experimental and its behaviour is likely to change + especially regarding padding of dimension coordinates (or IndexVariables). + + When using one of the modes ("edge", "reflect", "symmetric", "wrap"), + coordinates will be padded with the same mode, otherwise coordinates + are padded using the "constant" mode with fill_value dtypes.NA. + + Parameters + ---------- + pad_width : Mapping with the form of {dim: (pad_before, pad_after)} + Number of values padded along each dimension. + {dim: pad} is a shortcut for pad_before = pad_after = pad + mode : str + One of the following string values (taken from numpy docs). + + 'constant' (default) + Pads with a constant value. + 'edge' + Pads with the edge values of array. + 'linear_ramp' + Pads with the linear ramp between end_value and the + array edge value. + 'maximum' + Pads with the maximum value of all or part of the + vector along each axis. + 'mean' + Pads with the mean value of all or part of the + vector along each axis. + 'median' + Pads with the median value of all or part of the + vector along each axis. + 'minimum' + Pads with the minimum value of all or part of the + vector along each axis. + 'reflect' + Pads with the reflection of the vector mirrored on + the first and last values of the vector along each + axis. + 'symmetric' + Pads with the reflection of the vector mirrored + along the edge of the array. + 'wrap' + Pads with the wrap of the vector along the axis. + The first values are used to pad the end and the + end values are used to pad the beginning. + stat_length : int, tuple or mapping of the form {dim: tuple} + Used in 'maximum', 'mean', 'median', and 'minimum'. Number of + values at edge of each axis used to calculate the statistic value. + {dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)} unique + statistic lengths along each dimension. + ((before, after),) yields same before and after statistic lengths + for each dimension. + (stat_length,) or int is a shortcut for before = after = statistic + length for all axes. + Default is ``None``, to use the entire axis. + constant_values : scalar, tuple or mapping of the form {dim: tuple} + Used in 'constant'. The values to set the padded values for each + axis. + ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique + pad constants along each dimension. + ``((before, after),)`` yields same before and after constants for each + dimension. + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for + all dimensions. + Default is 0. + end_values : scalar, tuple or mapping of the form {dim: tuple} + Used in 'linear_ramp'. The values used for the ending value of the + linear_ramp and that will form the edge of the padded array. + ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique + end values along each dimension. + ``((before, after),)`` yields same before and after end values for each + axis. + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for + all axes. + Default is 0. + reflect_type : {'even', 'odd'}, optional + Used in 'reflect', and 'symmetric'. The 'even' style is the + default with an unaltered reflection around the edge value. For + the 'odd' style, the extended part of the array is created by + subtracting the reflected values from two times the edge value. + **pad_width_kwargs: + The keyword arguments form of ``pad_width``. + One of ``pad_width`` or ``pad_width_kwargs`` must be provided. + + Returns + ------- + padded : Dataset + Dataset with the padded coordinates and data. + + See also + -------- + Dataset.shift, Dataset.roll, Dataset.bfill, Dataset.ffill, numpy.pad, dask.array.pad + + Notes + ----- + By default when ``mode="constant"`` and ``constant_values=None``, integer types will be + promoted to ``float`` and padded with ``np.nan``. To avoid type promotion + specify ``constant_values=np.nan`` + + Examples + -------- + + >>> ds = xr.Dataset({'foo': ('x', range(5))}) + >>> ds.pad(x=(1,2)) + + Dimensions: (x: 8) + Dimensions without coordinates: x + Data variables: + foo (x) float64 nan 0.0 1.0 2.0 3.0 4.0 nan nan + """ + pad_width = either_dict_or_kwargs(pad_width, pad_width_kwargs, "pad") + + if mode in ("edge", "reflect", "symmetric", "wrap"): + coord_pad_mode = mode + coord_pad_options = { + "stat_length": stat_length, + "constant_values": constant_values, + "end_values": end_values, + "reflect_type": reflect_type, + } + else: + coord_pad_mode = "constant" + coord_pad_options = {} + + variables = {} + for name, var in self.variables.items(): + var_pad_width = {k: v for k, v in pad_width.items() if k in var.dims} + if not var_pad_width: + variables[name] = var + elif name in self.data_vars: + variables[name] = var.pad( + pad_width=var_pad_width, + mode=mode, + stat_length=stat_length, + constant_values=constant_values, + end_values=end_values, + reflect_type=reflect_type, + ) + else: + variables[name] = var.pad( + pad_width=var_pad_width, + mode=coord_pad_mode, + **coord_pad_options, # type: ignore + ) + + return self._replace_vars_and_dims(variables) + ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 6d0abe9a6fc..ff2d0af63ed 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -114,7 +114,7 @@ def notnull(data): isin = _dask_or_eager_func("isin", array_args=slice(2)) take = _dask_or_eager_func("take") broadcast_to = _dask_or_eager_func("broadcast_to") -pad = _dask_or_eager_func("pad") +pad = _dask_or_eager_func("pad", dask_module=dask_array_compat) _concatenate = _dask_or_eager_func("concatenate", list_of_args=True) _stack = _dask_or_eager_func("stack", list_of_args=True) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 58f0b275b21..ecba5307680 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -349,7 +349,7 @@ def _bottleneck_reduce(self, func, **kwargs): else: shift = (-self.window // 2) + 1 valid = (slice(None),) * axis + (slice(-shift, None),) - padded = padded.pad_with_fill_value({self.dim: (0, -shift)}) + padded = padded.pad({self.dim: (0, -shift)}, mode="constant") if isinstance(padded.data, dask_array_type): raise AssertionError("should not be reachable") diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 01f816941b5..1ec6512e4fb 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1,11 +1,12 @@ import copy import functools import itertools +import numbers import warnings from collections import defaultdict from datetime import timedelta from distutils.version import LooseVersion -from typing import Any, Dict, Hashable, Mapping, TypeVar, Union +from typing import Any, Dict, Hashable, Mapping, Tuple, TypeVar, Union import numpy as np import pandas as pd @@ -32,12 +33,6 @@ infix_dims, ) -try: - import dask.array as da -except ImportError: - pass - - NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, pd.Index, @@ -1150,66 +1145,114 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): result = result._shift_one_dim(dim, count, fill_value=fill_value) return result - def pad_with_fill_value( - self, pad_widths=None, fill_value=dtypes.NA, **pad_widths_kwargs + def _pad_options_dim_to_index( + self, + pad_option: Mapping[Hashable, Union[int, Tuple[int, int]]], + fill_with_shape=False, + ): + if fill_with_shape: + return [ + (n, n) if d not in pad_option else pad_option[d] + for d, n in zip(self.dims, self.data.shape) + ] + return [(0, 0) if d not in pad_option else pad_option[d] for d in self.dims] + + def pad( + self, + pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, + mode: str = "constant", + stat_length: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + constant_values: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + end_values: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + reflect_type: str = None, + **pad_width_kwargs: Any, ): """ - Return a new Variable with paddings. + Return a new Variable with padded data. Parameters ---------- - pad_width: Mapping of the form {dim: (before, after)} - Number of values padded to the edges of each dimension. - **pad_widths_kwargs: - Keyword argument for pad_widths + pad_width: Mapping with the form of {dim: (pad_before, pad_after)} + Number of values padded along each dimension. + {dim: pad} is a shortcut for pad_before = pad_after = pad + mode: (str) + See numpy / Dask docs + stat_length : int, tuple or mapping of the form {dim: tuple} + Used in 'maximum', 'mean', 'median', and 'minimum'. Number of + values at edge of each axis used to calculate the statistic value. + constant_values : scalar, tuple or mapping of the form {dim: tuple} + Used in 'constant'. The values to set the padded values for each + axis. + end_values : scalar, tuple or mapping of the form {dim: tuple} + Used in 'linear_ramp'. The values used for the ending value of the + linear_ramp and that will form the edge of the padded array. + reflect_type : {'even', 'odd'}, optional + Used in 'reflect', and 'symmetric'. The 'even' style is the + default with an unaltered reflection around the edge value. For + the 'odd' style, the extended part of the array is created by + subtracting the reflected values from two times the edge value. + **pad_width_kwargs: + One of pad_width or pad_width_kwargs must be provided. + + Returns + ------- + padded : Variable + Variable with the same dimensions and attributes but padded data. """ - pad_widths = either_dict_or_kwargs(pad_widths, pad_widths_kwargs, "pad") + pad_width = either_dict_or_kwargs(pad_width, pad_width_kwargs, "pad") - if fill_value is dtypes.NA: - dtype, fill_value = dtypes.maybe_promote(self.dtype) + # change default behaviour of pad with mode constant + if mode == "constant" and ( + constant_values is None or constant_values is dtypes.NA + ): + dtype, constant_values = dtypes.maybe_promote(self.dtype) else: dtype = self.dtype - if isinstance(self.data, dask_array_type): - array = self.data - - # Dask does not yet support pad. We manually implement it. - # https://github.com/dask/dask/issues/1926 - for d, pad in pad_widths.items(): - axis = self.get_axis_num(d) - before_shape = list(array.shape) - before_shape[axis] = pad[0] - before_chunks = list(array.chunks) - before_chunks[axis] = (pad[0],) - after_shape = list(array.shape) - after_shape[axis] = pad[1] - after_chunks = list(array.chunks) - after_chunks[axis] = (pad[1],) - - arrays = [] - if pad[0] > 0: - arrays.append( - da.full( - before_shape, fill_value, dtype=dtype, chunks=before_chunks - ) - ) - arrays.append(array) - if pad[1] > 0: - arrays.append( - da.full( - after_shape, fill_value, dtype=dtype, chunks=after_chunks - ) - ) - if len(arrays) > 1: - array = da.concatenate(arrays, axis=axis) - else: - pads = [(0, 0) if d not in pad_widths else pad_widths[d] for d in self.dims] - array = np.pad( - self.data.astype(dtype, copy=False), - pads, - mode="constant", - constant_values=fill_value, + # create pad_options_kwargs, numpy requires only relevant kwargs to be nonempty + if isinstance(stat_length, dict): + stat_length = self._pad_options_dim_to_index( + stat_length, fill_with_shape=True ) + if isinstance(constant_values, dict): + constant_values = self._pad_options_dim_to_index(constant_values) + if isinstance(end_values, dict): + end_values = self._pad_options_dim_to_index(end_values) + + # workaround for bug in Dask's default value of stat_length https://github.com/dask/dask/issues/5303 + if stat_length is None and mode in ["maximum", "mean", "median", "minimum"]: + stat_length = [(n, n) for n in self.data.shape] # type: ignore + + # change integer values to a tuple of two of those values and change pad_width to index + for k, v in pad_width.items(): + if isinstance(v, numbers.Number): + pad_width[k] = (v, v) + pad_width_by_index = self._pad_options_dim_to_index(pad_width) + + # create pad_options_kwargs, numpy/dask requires only relevant kwargs to be nonempty + pad_option_kwargs = {} + if stat_length is not None: + pad_option_kwargs["stat_length"] = stat_length + if constant_values is not None: + pad_option_kwargs["constant_values"] = constant_values + if end_values is not None: + pad_option_kwargs["end_values"] = end_values + if reflect_type is not None: + pad_option_kwargs["reflect_type"] = reflect_type # type: ignore + + array = duck_array_ops.pad( + self.data.astype(dtype, copy=False), + pad_width_by_index, + mode=mode, + **pad_option_kwargs, + ) + return type(self)(self.dims, array) def _roll_one_dim(self, dim, count): @@ -1930,10 +1973,10 @@ def _coarsen_reshape(self, windows, boundary, side): if pad < 0: pad += window if side[d] == "left": - pad_widths = {d: (0, pad)} + pad_width = {d: (0, pad)} else: - pad_widths = {d: (pad, 0)} - variable = variable.pad_with_fill_value(pad_widths) + pad_width = {d: (pad, 0)} + variable = variable.pad(pad_width, mode="constant") else: raise TypeError( "{} is invalid for boundary. Valid option is 'exact', " diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index ef3da5a3b94..de02f8e059d 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4175,6 +4175,113 @@ def test_rank(self): y = DataArray([0.75, 0.25, np.nan, 0.5, 1.0], dims=("z",)) assert_equal(y.rank("z", pct=True), y) + def test_pad_constant(self): + ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) + actual = ar.pad(dim_0=(1, 3)) + expected = DataArray( + np.pad( + np.arange(3 * 4 * 5).reshape(3, 4, 5).astype(np.float32), + mode="constant", + pad_width=((1, 3), (0, 0), (0, 0)), + constant_values=np.nan, + ) + ) + assert actual.shape == (7, 4, 5) + assert_identical(actual, expected) + + def test_pad_coords(self): + ar = DataArray( + np.arange(3 * 4 * 5).reshape(3, 4, 5), + [("x", np.arange(3)), ("y", np.arange(4)), ("z", np.arange(5))], + ) + actual = ar.pad(x=(1, 3), constant_values=1) + expected = DataArray( + np.pad( + np.arange(3 * 4 * 5).reshape(3, 4, 5), + mode="constant", + pad_width=((1, 3), (0, 0), (0, 0)), + constant_values=1, + ), + [ + ( + "x", + np.pad( + np.arange(3).astype(np.float32), + mode="constant", + pad_width=(1, 3), + constant_values=np.nan, + ), + ), + ("y", np.arange(4)), + ("z", np.arange(5)), + ], + ) + assert_identical(actual, expected) + + @pytest.mark.parametrize("mode", ("minimum", "maximum", "mean", "median")) + @pytest.mark.parametrize( + "stat_length", (None, 3, (1, 3), {"dim_0": (2, 1), "dim_2": (4, 2)}) + ) + def test_pad_stat_length(self, mode, stat_length): + ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) + actual = ar.pad(dim_0=(1, 3), dim_2=(2, 2), mode=mode, stat_length=stat_length) + if isinstance(stat_length, dict): + stat_length = (stat_length["dim_0"], (4, 4), stat_length["dim_2"]) + expected = DataArray( + np.pad( + np.arange(3 * 4 * 5).reshape(3, 4, 5), + pad_width=((1, 3), (0, 0), (2, 2)), + mode=mode, + stat_length=stat_length, + ) + ) + assert actual.shape == (7, 4, 9) + assert_identical(actual, expected) + + @pytest.mark.parametrize( + "end_values", (None, 3, (3, 5), {"dim_0": (2, 1), "dim_2": (4, 2)}) + ) + def test_pad_linear_ramp(self, end_values): + ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) + actual = ar.pad( + dim_0=(1, 3), dim_2=(2, 2), mode="linear_ramp", end_values=end_values + ) + if end_values is None: + end_values = 0 + elif isinstance(end_values, dict): + end_values = (end_values["dim_0"], (4, 4), end_values["dim_2"]) + expected = DataArray( + np.pad( + np.arange(3 * 4 * 5).reshape(3, 4, 5), + pad_width=((1, 3), (0, 0), (2, 2)), + mode="linear_ramp", + end_values=end_values, + ) + ) + assert actual.shape == (7, 4, 9) + assert_identical(actual, expected) + + @pytest.mark.parametrize("mode", ("reflect", "symmetric")) + @pytest.mark.parametrize("reflect_type", (None, "even", "odd")) + def test_pad_reflect(self, mode, reflect_type): + + ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) + actual = ar.pad( + dim_0=(1, 3), dim_2=(2, 2), mode=mode, reflect_type=reflect_type + ) + np_kwargs = { + "array": np.arange(3 * 4 * 5).reshape(3, 4, 5), + "pad_width": ((1, 3), (0, 0), (2, 2)), + "mode": mode, + } + # numpy does not support reflect_type=None + if reflect_type is not None: + np_kwargs["reflect_type"] = reflect_type + expected = DataArray(np.pad(**np_kwargs)) + + assert actual.shape == (7, 4, 9) + assert_identical(actual, expected) + @pytest.fixture(params=[1]) def da(request): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index c7f39108477..74173e71af6 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5484,6 +5484,19 @@ def test_ipython_key_completion(self): ds.data_vars[item] # should not raise assert sorted(actual) == sorted(expected) + def test_pad(self): + ds = create_test_data(seed=1) + padded = ds.pad(dim2=(1, 1), constant_values=42) + + assert padded["dim2"].shape == (11,) + assert padded["var1"].shape == (8, 11) + assert padded["var2"].shape == (8, 11) + assert padded["var3"].shape == (10, 8) + assert dict(padded.dims) == {"dim1": 8, "dim2": 11, "dim3": 10, "time": 20} + + np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42) + np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan) + # Py.test tests diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 21a212c29b3..09ab1be9af9 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -175,7 +175,7 @@ def test_variable_property(prop): marks=xfail(reason="mixed sparse-dense operation"), ), param( - do("pad_with_fill_value", pad_widths={"x": (1, 1)}, fill_value=5), + do("pad", mode="constant", pad_widths={"x": (1, 1)}, fill_value=5), True, marks=xfail(reason="Missing implementation for np.pad"), ), diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index bef3af62d74..2826dc2479c 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -11,7 +11,7 @@ from xarray.core.npcompat import IS_NEP18_ACTIVE from xarray.testing import assert_allclose, assert_identical -from .test_variable import VariableSubclassobjects +from .test_variable import _PAD_XR_NP_ARGS, VariableSubclassobjects pint = pytest.importorskip("pint") DimensionalityError = pint.errors.DimensionalityError @@ -2078,42 +2078,32 @@ def test_no_conflicts(self, unit, dtype): assert expected == actual - def test_pad(self, dtype): + @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) + def test_pad_constant_values(self, dtype, xr_arg, np_arg): data = np.arange(4 * 3 * 2).reshape(4, 3, 2).astype(dtype) * unit_registry.m v = xr.Variable(["x", "y", "z"], data) - xr_args = [{"x": (2, 1)}, {"y": (0, 3)}, {"x": (3, 1), "z": (2, 0)}] - np_args = [ - ((2, 1), (0, 0), (0, 0)), - ((0, 0), (0, 3), (0, 0)), - ((3, 1), (0, 0), (2, 0)), - ] - for xr_arg, np_arg in zip(xr_args, np_args): - actual = v.pad_with_fill_value(**xr_arg) - expected = xr.Variable( - v.dims, - np.pad( - v.data.astype(float), - np_arg, - mode="constant", - constant_values=np.nan, - ), - ) - xr.testing.assert_identical(expected, actual) - assert_units_equal(expected, actual) - assert isinstance(actual._data, type(v._data)) + actual = v.pad(**xr_arg, mode="constant") + expected = xr.Variable( + v.dims, + np.pad( + v.data.astype(float), np_arg, mode="constant", constant_values=np.nan, + ), + ) + xr.testing.assert_identical(expected, actual) + assert_units_equal(expected, actual) + assert isinstance(actual._data, type(v._data)) # for the boolean array, we pad False data = np.full_like(data, False, dtype=bool).reshape(4, 3, 2) v = xr.Variable(["x", "y", "z"], data) - for xr_arg, np_arg in zip(xr_args, np_args): - actual = v.pad_with_fill_value(fill_value=data.flat[0], **xr_arg) - expected = xr.Variable( - v.dims, - np.pad(v.data, np_arg, mode="constant", constant_values=v.data.flat[0]), - ) - xr.testing.assert_identical(actual, expected) - assert_units_equal(expected, actual) + actual = v.pad(**xr_arg, mode="constant", constant_values=data.flat[0]) + expected = xr.Variable( + v.dims, + np.pad(v.data, np_arg, mode="constant", constant_values=v.data.flat[0]), + ) + xr.testing.assert_identical(actual, expected) + assert_units_equal(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2135,16 +2125,16 @@ def test_pad(self, dtype): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_pad_with_fill_value(self, unit, error, dtype): + def test_pad_unit_constant_value(self, unit, error, dtype): array = np.linspace(0, 5, 3 * 10).reshape(3, 10).astype(dtype) * unit_registry.m variable = xr.Variable(("x", "y"), array) fill_value = -100 * unit - func = method("pad_with_fill_value", x=(2, 3), y=(1, 4)) + func = method("pad", mode="constant", x=(2, 3), y=(1, 4)) if error is not None: with pytest.raises(error): - func(variable, fill_value=fill_value) + func(variable, constant_values=fill_value) return @@ -2152,11 +2142,11 @@ def test_pad_with_fill_value(self, unit, error, dtype): expected = attach_units( func( strip_units(variable), - fill_value=strip_units(convert_units(fill_value, units)), + constant_values=strip_units(convert_units(fill_value, units)), ), units, ) - actual = func(variable, fill_value=fill_value) + actual = func(variable, constant_values=fill_value) assert_units_equal(expected, actual) xr.testing.assert_identical(expected, actual) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index c600f7a77d0..525a005c601 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -38,6 +38,14 @@ source_ndarray, ) +_PAD_XR_NP_ARGS = [ + [{"x": (2, 1)}, ((2, 1), (0, 0), (0, 0))], + [{"x": 1}, ((1, 1), (0, 0), (0, 0))], + [{"y": (0, 3)}, ((0, 0), (0, 3), (0, 0))], + [{"x": (3, 1), "z": (2, 0)}, ((3, 1), (0, 0), (2, 0))], + [{"x": (3, 1), "z": 2}, ((3, 1), (0, 0), (2, 2))], +] + class VariableSubclassobjects: def test_properties(self): @@ -785,36 +793,65 @@ def test_getitem_error(self): with raises_regex(IndexError, "Dimensions of indexers mis"): v[:, ind] - def test_pad(self): + @pytest.mark.parametrize( + "mode", + [ + "mean", + pytest.param( + "median", + marks=pytest.mark.xfail(reason="median is not implemented by Dask"), + ), + pytest.param( + "reflect", marks=pytest.mark.xfail(reason="dask.array.pad bug") + ), + "edge", + pytest.param( + "linear_ramp", + marks=pytest.mark.xfail( + reason="pint bug: https://github.com/hgrecco/pint/issues/1026" + ), + ), + "maximum", + "minimum", + "symmetric", + "wrap", + ], + ) + @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) + def test_pad(self, mode, xr_arg, np_arg): data = np.arange(4 * 3 * 2).reshape(4, 3, 2) v = self.cls(["x", "y", "z"], data) - xr_args = [{"x": (2, 1)}, {"y": (0, 3)}, {"x": (3, 1), "z": (2, 0)}] - np_args = [ - ((2, 1), (0, 0), (0, 0)), - ((0, 0), (0, 3), (0, 0)), - ((3, 1), (0, 0), (2, 0)), - ] - for xr_arg, np_arg in zip(xr_args, np_args): - actual = v.pad_with_fill_value(**xr_arg) - expected = np.pad( - np.array(v.data.astype(float)), - np_arg, - mode="constant", - constant_values=np.nan, - ) - assert_array_equal(actual, expected) - assert isinstance(actual._data, type(v._data)) + actual = v.pad(mode=mode, **xr_arg) + expected = np.pad(data, np_arg, mode=mode) + + assert_array_equal(actual, expected) + assert isinstance(actual._data, type(v._data)) + + @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) + def test_pad_constant_values(self, xr_arg, np_arg): + data = np.arange(4 * 3 * 2).reshape(4, 3, 2) + v = self.cls(["x", "y", "z"], data) + + actual = v.pad(**xr_arg) + expected = np.pad( + np.array(v.data.astype(float)), + np_arg, + mode="constant", + constant_values=np.nan, + ) + assert_array_equal(actual, expected) + assert isinstance(actual._data, type(v._data)) # for the boolean array, we pad False data = np.full_like(data, False, dtype=bool).reshape(4, 3, 2) v = self.cls(["x", "y", "z"], data) - for xr_arg, np_arg in zip(xr_args, np_args): - actual = v.pad_with_fill_value(fill_value=False, **xr_arg) - expected = np.pad( - np.array(v.data), np_arg, mode="constant", constant_values=False - ) - assert_array_equal(actual, expected) + + actual = v.pad(mode="constant", constant_values=False, **xr_arg) + expected = np.pad( + np.array(v.data), np_arg, mode="constant", constant_values=False + ) + assert_array_equal(actual, expected) def test_rolling_window(self): # Just a working test. See test_nputils for the algorithm validation @@ -2056,8 +2093,28 @@ def test_getitem_uint(self): super().test_getitem_fancy() @pytest.mark.xfail - def test_pad(self): - super().test_rolling_window() + @pytest.mark.parametrize( + "mode", + [ + "mean", + "median", + "reflect", + "edge", + "linear_ramp", + "maximum", + "minimum", + "symmetric", + "wrap", + ], + ) + @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) + def test_pad(self, mode, xr_arg, np_arg): + super().test_pad(mode, xr_arg, np_arg) + + @pytest.mark.xfail + @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) + def test_pad_constant_values(self, xr_arg, np_arg): + super().test_pad_constant_values(xr_arg, np_arg) @pytest.mark.xfail def test_rolling_window(self): From 5548c1e13ad076196d06a9b99cff0dcc4ef2be5e Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 19 Mar 2020 17:29:58 -0400 Subject: [PATCH 47/75] Improve where docstring (#3836) * improve the where docstring * whatsnew * improve assign docstring * changes from @dcherian --- doc/whats-new.rst | 2 ++ xarray/core/computation.py | 12 ++++++++---- xarray/core/dataset.py | 2 -- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8140288f350..6ae7398626f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -94,6 +94,8 @@ Documentation - Fix documentation of :py:class:`DataArray` removing the deprecated mention that when omitted, `dims` are inferred from a `coords`-dict. (:pull:`3821`) By `Sander van Rijn `_. +- Improve the :py:func:`where` docstring. + By `Maximilian Roos `_ - Update the installation instructions: only explicitly list recommended dependencies (:issue:`3756`). By `Mathias Hauser `_. diff --git a/xarray/core/computation.py b/xarray/core/computation.py index f99764448da..f2941a3d0ba 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1224,9 +1224,13 @@ def where(cond, x, y): ---------- cond : scalar, array, Variable, DataArray or Dataset with boolean dtype When True, return values from `x`, otherwise returns values from `y`. - x, y : scalar, array, Variable, DataArray or Dataset - Values from which to choose. All dimension coordinates on these objects - must be aligned with each other and with `cond`. + x : scalar, array, Variable, DataArray or Dataset + values to choose from where `cond` is True + y : scalar, array, Variable, DataArray or Dataset + values to choose from where `cond` is False + + All dimension coordinates on these objects must be aligned with each + other and with `cond`. Returns ------- @@ -1249,7 +1253,7 @@ def where(cond, x, y): Coordinates: * lat (lat) int64 0 1 2 3 4 5 6 7 8 9 - >>> xr.where(x < 0.5, x, 100 * x) + >>> xr.where(x < 0.5, x, x * 100) array([ 0. , 0.1, 0.2, 0.3, 0.4, 50. , 60. , 70. , 80. , 90. ]) Coordinates: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 7c218e209cb..a607f1aa164 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4392,8 +4392,6 @@ def assign( Examples -------- - >>> import numpy as np - >>> import xarray as xr >>> x = xr.Dataset( ... { ... "temperature_c": ( From e8a284f341645a63a4d83676a6b268394c721bbc Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 19 Mar 2020 18:55:08 -0400 Subject: [PATCH 48/75] Allow ellipsis to be used in stack (#3826) * allow ellipsis to be used in stack * doc fix * support ellipsis only as part of an iterable * docs * whatsnew * docstring, whatsnew * docstring, whatsnew * add passing a partial list of dims * more wording changes * improvement from @dcherian --- doc/reshaping.rst | 8 ++++++++ doc/whats-new.rst | 10 ++++++++-- xarray/core/dataarray.py | 4 +++- xarray/core/dataset.py | 7 ++++++- xarray/tests/test_dataarray.py | 3 +++ xarray/tests/test_dataset.py | 11 +++++++++++ 6 files changed, 39 insertions(+), 4 deletions(-) diff --git a/doc/reshaping.rst b/doc/reshaping.rst index 455a24f9216..465ca14dfc2 100644 --- a/doc/reshaping.rst +++ b/doc/reshaping.rst @@ -109,6 +109,13 @@ implemented :py:meth:`~xarray.DataArray.stack` and stacked stacked.unstack('z') +As elsewhere in xarray, an ellipsis (`...`) can be used to represent all unlisted dimensions: + +.. ipython:: python + + stacked = array.stack(z=[..., "x"]) + stacked + These methods are modeled on the :py:class:`pandas.DataFrame` methods of the same name, although in xarray they always create new dimensions rather than adding to the existing index or columns. @@ -164,6 +171,7 @@ like this: 'b': ('x', [6, 7])}, coords={'y': ['u', 'v', 'w']} ) + data stacked = data.to_stacked_array("z", sample_dims=['x']) stacked unstacked = stacked.to_unstacked_dataset("z") diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6ae7398626f..6863d52d9bf 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,13 +39,19 @@ New Features By `Justus Magin `_. - :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a `TypeError` on multiple string arguments. Receiving multiple string arguments - often means a user is attempting to pass multiple dimensions to group over - and should instead pass a list. + often means a user is attempting to pass multiple dimensions as separate + arguments and should instead pass a single list of dimensions. + (:pull:`3802`) By `Maximilian Roos `_ - The new ``Dataset._repr_html_`` and ``DataArray._repr_html_`` (introduced in 0.14.1) is now on by default. To disable, use ``xarray.set_options(display_style="text")``. By `Julia Signell `_. +- An ellipsis (``...``) is now supported in the ``dims`` argument of + :py:meth:`Dataset.stack` and :py:meth:`DataArray.stack`, meaning all + unlisted dimensions, similar to its meaning in :py:meth:`DataArray.transpose`. + (:pull:`3826`) + By `Maximilian Roos `_ - :py:meth:`Dataset.where` and :py:meth:`DataArray.where` accept a lambda as a first argument, which is then called on the input; replicating pandas' behavior. By `Maximilian Roos `_. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index bd956553929..324e7ccd290 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1709,7 +1709,9 @@ def stack( ---------- dimensions : Mapping of the form new_name=(dim1, dim2, ...) Names of new dimensions, and the existing dimensions that they - replace. + replace. An ellipsis (`...`) will be replaced by all unlisted dimensions. + Passing a list containing an ellipsis (`stacked_dim=[...]`) will stack over + all dimensions. **dimensions_kwargs: The keyword arguments form of ``dimensions``. One of dimensions or dimensions_kwargs must be provided. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a607f1aa164..b7ce0ec4e1e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -87,6 +87,7 @@ decode_numpy_dict_values, either_dict_or_kwargs, hashable, + infix_dims, is_dict_like, is_scalar, maybe_wrap_array, @@ -3262,6 +3263,8 @@ def reorder_levels( return self._replace(variables, indexes=indexes) def _stack_once(self, dims, new_dim): + if ... in dims: + dims = list(infix_dims(dims, self.dims)) variables = {} for name, var in self.variables.items(): if name not in dims: @@ -3304,7 +3307,9 @@ def stack( ---------- dimensions : Mapping of the form new_name=(dim1, dim2, ...) Names of new dimensions, and the existing dimensions that they - replace. + replace. An ellipsis (`...`) will be replaced by all unlisted dimensions. + Passing a list containing an ellipsis (`stacked_dim=[...]`) will stack over + all dimensions. **dimensions_kwargs: The keyword arguments form of ``dimensions``. One of dimensions or dimensions_kwargs must be provided. diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index de02f8e059d..6f065c9daed 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2040,6 +2040,9 @@ def test_stack_unstack(self): actual = orig.stack(z=["x", "y"]).unstack("z").drop_vars(["x", "y"]) assert_identical(orig, actual) + actual = orig.stack(z=[...]).unstack("z").drop_vars(["x", "y"]) + assert_identical(orig, actual) + dims = ["a", "b", "c", "d", "e"] orig = xr.DataArray(np.random.rand(1, 2, 3, 2, 1), dims=dims) stacked = orig.stack(ab=["a", "b"], cd=["c", "d"]) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 74173e71af6..d2e7bcdabf8 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2879,6 +2879,17 @@ def test_stack(self): actual = ds.stack(z=["x", "y"]) assert_identical(expected, actual) + actual = ds.stack(z=[...]) + assert_identical(expected, actual) + + # non list dims with ellipsis + actual = ds.stack(z=(...,)) + assert_identical(expected, actual) + + # ellipsis with given dim + actual = ds.stack(z=[..., "y"]) + assert_identical(expected, actual) + exp_index = pd.MultiIndex.from_product([["a", "b"], [0, 1]], names=["y", "x"]) expected = Dataset( {"a": ("z", [0, 1, 0, 1]), "b": ("z", [0, 2, 1, 3]), "z": exp_index} From 564a291b13db73a31c15c4cf2a9ff5ec1ad2498c Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 20 Mar 2020 13:04:26 -0400 Subject: [PATCH 49/75] Fix html repr on non-str keys (#3870) * fix html repr on non-str keys * whatsnew * Update doc/whats-new.rst Co-Authored-By: keewis Co-authored-by: Deepak Cherian Co-authored-by: keewis --- doc/whats-new.rst | 2 ++ xarray/core/formatting_html.py | 2 +- xarray/tests/test_formatting_html.py | 5 +++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6863d52d9bf..5bdf6536d3d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -94,6 +94,8 @@ Bug fixes - Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group` simultaneously. (:issue:`3170`). By `Matthias Meyer `_. +- Fix html repr on :py:class:`Dataset` with non-string keys (:pull:`3807`). + By `Maximilian Roos `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 8ceda8bfbfa..8678a58b381 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -95,7 +95,7 @@ def summarize_variable(name, var, is_index=False, dtype=None, preview=None): cssclass_idx = " class='xr-has-index'" if is_index else "" dims_str = f"({', '.join(escape(dim) for dim in var.dims)})" - name = escape(name) + name = escape(str(name)) dtype = dtype or escape(str(var.dtype)) # "unique" ids required to expand/collapse subsections diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index 01357000b20..239f339208d 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -51,6 +51,11 @@ def test_short_data_repr_html(dataarray): assert data_repr.startswith("array") +def test_short_data_repr_html_non_str_keys(dataset): + ds = dataset.assign({2: lambda x: x["tmin"]}) + fh.dataset_repr(ds) + + def test_short_data_repr_html_dask(dask_dataarray): import dask From 889240bcd1cc81747beef941002125c597f48b14 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sat, 21 Mar 2020 13:54:56 -0400 Subject: [PATCH 50/75] remove macos build while waiting for libwebp fix (#3875) --- azure-pipelines.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ce95fca1ba1..8d43de7b1d5 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -32,15 +32,16 @@ jobs: steps: - template: ci/azure/unit-tests.yml -- job: MacOSX - strategy: - matrix: - py38: - conda_env: py38 - pool: - vmImage: 'macOS-10.15' - steps: - - template: ci/azure/unit-tests.yml +# excluded while waiting for https://github.com/conda-forge/libwebp-feedstock/issues/26 +# - job: MacOSX +# strategy: +# matrix: +# py38: +# conda_env: py38 +# pool: +# vmImage: 'macOS-10.15' +# steps: +# - template: ci/azure/unit-tests.yml - job: Windows strategy: From 5354679579d46d3bcb620817125c5bde3c4f1cff Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 21 Mar 2020 19:03:51 +0000 Subject: [PATCH 51/75] Delete associated indexes when deleting coordinate variables. (#3840) * Delete associated indexes when deleting coordinate variables. Fixes #3746 * review * fix tests --- doc/whats-new.rst | 3 ++- xarray/core/coordinates.py | 11 ++++++++--- xarray/tests/test_dataarray.py | 6 ++++++ xarray/tests/test_dataset.py | 4 ++++ 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 5bdf6536d3d..ac80524a3c4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -91,7 +91,8 @@ Bug fixes to preserve attributes. :py:meth:`Dataset.coarsen` accepts a keyword argument ``keep_attrs`` to change this setting. (:issue:`3376`, :pull:`3801`) By `Andrew Thomas `_. - +- Delete associated indexes when deleting coordinate variables. (:issue:`3746`). + By `Deepak Cherian `_. - Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group` simultaneously. (:issue:`3170`). By `Matthias Meyer `_. - Fix html repr on :py:class:`Dataset` with non-string keys (:pull:`3807`). diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 3d51c9b4271..83c4d2a8636 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -247,7 +247,7 @@ def __delitem__(self, key: Hashable) -> None: if key in self: del self._data[key] else: - raise KeyError(key) + raise KeyError(f"{key!r} is not a coordinate variable.") def _ipython_key_completions_(self): """Provide method for the key-autocompletions in IPython. """ @@ -291,7 +291,7 @@ def _update_coords( dims = calculate_dimensions(coords_plus_data) if not set(dims) <= set(self.dims): raise ValueError( - "cannot add coordinates with new dimensions to " "a DataArray" + "cannot add coordinates with new dimensions to a DataArray" ) self._data._coords = coords @@ -312,7 +312,12 @@ def to_dataset(self) -> "Dataset": return Dataset._construct_direct(coords, set(coords)) def __delitem__(self, key: Hashable) -> None: - del self._data._coords[key] + if key in self: + del self._data._coords[key] + if self._data._indexes is not None and key in self._data._indexes: + del self._data._indexes[key] + else: + raise KeyError(f"{key!r} is not a coordinate variable.") def _ipython_key_completions_(self): """Provide method for the key-autocompletions in IPython. """ diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 6f065c9daed..fbd9810f285 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1412,6 +1412,12 @@ def test_coords_non_string(self): expected = DataArray(2, coords={1: 2}, name=1) assert_identical(actual, expected) + def test_coords_delitem_delete_indexes(self): + # regression test for GH3746 + arr = DataArray(np.ones((2,)), dims="x", coords={"x": [0, 1]}) + del arr.coords["x"] + assert "x" not in arr.indexes + def test_broadcast_like(self): arr1 = DataArray( np.ones((2, 3)), diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d2e7bcdabf8..20b814a25c7 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -744,6 +744,10 @@ def test_coords_modify(self): expected = data.merge({"c": 11}).set_coords("c") assert_identical(expected, actual) + # regression test for GH3746 + del actual.coords["x"] + assert "x" not in actual.indexes + def test_update_index(self): actual = Dataset(coords={"x": [1, 2, 3]}) actual["x"] = ["a", "b", "c"] From b6409f0627d813065b58f67e6244cbe47f84090c Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 21 Mar 2020 19:51:06 +0000 Subject: [PATCH 52/75] map_blocks: allow user function to add new unindexed dimension. (#3817) --- doc/whats-new.rst | 3 ++- xarray/core/parallel.py | 3 +++ xarray/tests/test_dask.py | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ac80524a3c4..86272cf8710 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -43,6 +43,8 @@ New Features arguments and should instead pass a single list of dimensions. (:pull:`3802`) By `Maximilian Roos `_ +- :py:func:`map_blocks` can now apply functions that add new unindexed dimensions. + By `Deepak Cherian `_ - The new ``Dataset._repr_html_`` and ``DataArray._repr_html_`` (introduced in 0.14.1) is now on by default. To disable, use ``xarray.set_options(display_style="text")``. @@ -60,7 +62,6 @@ New Features (:issue:`3843`, :pull:`3844`) By `Aaron Spring `_. - Bug fixes ~~~~~~~~~ - Fix :py:meth:`Dataset.interp` when indexing array shares coordinates with the diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 8429d0f71ad..6f1668f698f 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -386,6 +386,9 @@ def _wrapper(func, obj, to_array, args, kwargs): var_chunks.append(input_chunks[dim]) elif dim in indexes: var_chunks.append((len(indexes[dim]),)) + elif dim in template.dims: + # new unindexed dimension + var_chunks.append((template.sizes[dim],)) data = dask.array.Array( hlg, name=gname_l, chunks=var_chunks, dtype=template[name].dtype diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 4f7e3910f82..923b35e5946 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1147,6 +1147,7 @@ def test_map_blocks_to_array(map_ds): lambda x: x.to_dataset(), lambda x: x.drop_vars("x"), lambda x: x.expand_dims(k=[1, 2, 3]), + lambda x: x.expand_dims(k=3), lambda x: x.assign_coords(new_coord=("y", x.y * 2)), lambda x: x.astype(np.int32), # TODO: [lambda x: x.isel(x=1).drop_vars("x"), map_da], @@ -1167,6 +1168,7 @@ def test_map_blocks_da_transformations(func, map_da): lambda x: x.drop_vars("a"), lambda x: x.drop_vars("x"), lambda x: x.expand_dims(k=[1, 2, 3]), + lambda x: x.expand_dims(k=3), lambda x: x.rename({"a": "new1", "b": "new2"}), # TODO: [lambda x: x.isel(x=1)], ], From 6c19aab1ae52ba820e7b61cad8636d7af37830aa Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sun, 22 Mar 2020 02:27:13 -0400 Subject: [PATCH 53/75] add spacing in the versions section of the issue report (#3876) --- .github/ISSUE_TEMPLATE/bug_report.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 83c3aea53a8..37dbcd2ebb0 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -28,6 +28,8 @@ assignees: '' #### Versions
Output of `xr.show_versions()` + +
From 2d0b85e84fa1d3d540ead8be04fc27703041b2cb Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 22 Mar 2020 18:19:41 -0400 Subject: [PATCH 54/75] Re-enable tests xfailed in #3808 and fix new CFTimeIndex failures due to upstream changes (#3874) * Re-enable tests xfailed in #3808 * Add _cache attribute to CFTimeIndex * Temporarily install pandas master from GitHub instead of wheel * Fix pandas url --- ci/azure/install.yml | 4 ++-- xarray/coding/cftimeindex.py | 1 + xarray/tests/test_cftimeindex.py | 17 ++--------------- xarray/tests/test_interp.py | 4 ---- 4 files changed, 5 insertions(+), 21 deletions(-) diff --git a/ci/azure/install.yml b/ci/azure/install.yml index 958e3c180fa..60559dd2064 100644 --- a/ci/azure/install.yml +++ b/ci/azure/install.yml @@ -19,7 +19,6 @@ steps: --upgrade \ matplotlib \ numpy \ - pandas \ scipy python -m pip install \ --no-deps \ @@ -30,7 +29,8 @@ steps: git+https://github.com/Unidata/cftime \ git+https://github.com/mapbox/rasterio \ git+https://github.com/hgrecco/pint \ - git+https://github.com/pydata/bottleneck + git+https://github.com/pydata/bottleneck \ + git+https://github.com/pandas-dev/pandas condition: eq(variables['UPSTREAM_DEV'], 'true') displayName: Install upstream dev dependencies diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index c680a7e0bcf..2e42702caac 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -253,6 +253,7 @@ def __new__(cls, data, name=None): result = object.__new__(cls) result._data = np.array(data, dtype="O") result.name = name + result._cache = {} return result def _partial_date_slice(self, resolution, parsed): diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 43d6d7b068e..d31bf9471ea 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -450,7 +450,6 @@ def test_sel_date_scalar(da, date_type, index): assert_identical(result, expected) -@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime def test_sel_date_distant_date(da, date_type, index): expected = xr.DataArray(4).assign_coords(time=index[3]) @@ -513,12 +512,7 @@ def test_sel_date_scalar_backfill(da, date_type, index, sel_kwargs): [ {"method": "pad", "tolerance": timedelta(days=20)}, {"method": "backfill", "tolerance": timedelta(days=20)}, - pytest.param( - {"method": "nearest", "tolerance": timedelta(days=20)}, - marks=pytest.mark.xfail( - reason="https://github.com/pydata/xarray/issues/3751" - ), - ), + {"method": "nearest", "tolerance": timedelta(days=20)}, ], ) def test_sel_date_scalar_tolerance_raises(da, date_type, sel_kwargs): @@ -526,7 +520,6 @@ def test_sel_date_scalar_tolerance_raises(da, date_type, sel_kwargs): da.sel(time=date_type(1, 5, 1), **sel_kwargs) -@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime @pytest.mark.parametrize( "sel_kwargs", @@ -574,12 +567,7 @@ def test_sel_date_list_backfill(da, date_type, index, sel_kwargs): [ {"method": "pad", "tolerance": timedelta(days=20)}, {"method": "backfill", "tolerance": timedelta(days=20)}, - pytest.param( - {"method": "nearest", "tolerance": timedelta(days=20)}, - marks=pytest.mark.xfail( - reason="https://github.com/pydata/xarray/issues/3751" - ), - ), + {"method": "nearest", "tolerance": timedelta(days=20)}, ], ) def test_sel_date_list_tolerance_raises(da, date_type, sel_kwargs): @@ -614,7 +602,6 @@ def range_args(date_type): ] -@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime def test_indexing_in_series_getitem(series, index, scalar_args, range_args): for arg in scalar_args: diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index 9cc4933f462..0502348160e 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -586,7 +586,6 @@ def test_datetime_single_string(): assert_allclose(actual.drop_vars("time"), expected) -@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime @requires_scipy def test_cftime(): @@ -613,7 +612,6 @@ def test_cftime_type_error(): da.interp(time=times_new) -@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime @requires_scipy def test_cftime_list_of_strings(): @@ -635,7 +633,6 @@ def test_cftime_list_of_strings(): assert_allclose(actual, expected) -@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime @requires_scipy def test_cftime_single_string(): @@ -697,7 +694,6 @@ def test_datetime_interp_noerror(): a.interp(x=xi, time=xi.time) # should not raise an error -@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime def test_3641(): times = xr.cftime_range("0001", periods=3, freq="500Y") From 9eec56c833da6dca02c3e6c593586fd201a534a0 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 23 Mar 2020 07:42:49 -0600 Subject: [PATCH 55/75] Raise error when assigning to IndexVariable.values & IndexVariable.data (#3862) * Raise error when assigning IndexVariable.values, IndexVariable.data Fixes #3470 * fix existing tests * Add new test * whats-new * Fix more existing tests * Update doc/whats-new.rst * fix docs * update whats-new --- doc/plotting.rst | 2 +- doc/whats-new.rst | 6 ++++++ xarray/core/variable.py | 14 +++++++++++--- xarray/tests/test_accessor_dt.py | 4 ++-- xarray/tests/test_dask.py | 2 +- xarray/tests/test_variable.py | 7 +++++-- 6 files changed, 26 insertions(+), 9 deletions(-) diff --git a/doc/plotting.rst b/doc/plotting.rst index ea9816780a7..f3d9c0213de 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -657,7 +657,7 @@ Additionally, the boolean kwarg ``add_guide`` can be used to prevent the display .. ipython:: python - ds.w.values = [1, 2, 3, 5] + ds = ds.assign(w=[1, 2, 3, 5]) @savefig ds_discrete_legend_hue_scatter.png ds.plot.scatter(x='A', y='B', hue='w', hue_style='discrete') diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 86272cf8710..40307827bc9 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,12 @@ v0.15.1 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ +- Raise an error when assigning to the ``.values`` or ``.data`` attribute of + dimension coordinates i.e. ``IndexVariable`` objects. This has been broken since + v0.12.0. Please use :py:meth:`DataArray.assign_coords` or :py:meth:`Dataset.assign_coords` + instead. (:issue:`3470`, :pull:`3862`) + By `Deepak Cherian `_ + New Features ~~~~~~~~~~~~ diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 1ec6512e4fb..c9addeefb04 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2104,9 +2104,17 @@ def load(self): # https://github.com/python/mypy/issues/1465 @Variable.data.setter # type: ignore def data(self, data): - Variable.data.fset(self, data) - if not isinstance(self._data, PandasIndexAdapter): - self._data = PandasIndexAdapter(self._data) + raise ValueError( + f"Cannot assign to the .data attribute of dimension coordinate a.k.a IndexVariable {self.name!r}. " + f"Please use DataArray.assign_coords, Dataset.assign_coords or Dataset.assign as appropriate." + ) + + @Variable.values.setter # type: ignore + def values(self, values): + raise ValueError( + f"Cannot assign to the .values attribute of dimension coordinate a.k.a IndexVariable {self.name!r}. " + f"Please use DataArray.assign_coords, Dataset.assign_coords or Dataset.assign as appropriate." + ) def chunk(self, chunks=None, name=None, lock=False): # Dummy - do not chunk. This method is invoked e.g. by Dataset.chunk() diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 20a9283e32c..b3640722106 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -80,7 +80,7 @@ def test_strftime(self): def test_not_datetime_type(self): nontime_data = self.data.copy() int_data = np.arange(len(self.data.time)).astype("int8") - nontime_data["time"].values = int_data + nontime_data = nontime_data.assign_coords(time=int_data) with raises_regex(TypeError, "dt"): nontime_data.time.dt @@ -213,7 +213,7 @@ def setup(self): def test_not_datetime_type(self): nontime_data = self.data.copy() int_data = np.arange(len(self.data.time)).astype("int8") - nontime_data["time"].values = int_data + nontime_data = nontime_data.assign_coords(time=int_data) with raises_regex(TypeError, "dt"): nontime_data.time.dt diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 923b35e5946..538dbbfb58b 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1276,7 +1276,7 @@ def test_token_changes_when_data_changes(obj): assert t3 != t2 # Change IndexVariable - obj.coords["x"] *= 2 + obj = obj.assign_coords(x=obj.x * 2) with raise_if_dask_computes(): t4 = dask.base.tokenize(obj) assert t4 != t3 diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 525a005c601..116466e112d 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -538,8 +538,7 @@ def test_copy_index_with_data(self): orig = IndexVariable("x", np.arange(5)) new_data = np.arange(5, 10) actual = orig.copy(data=new_data) - expected = orig.copy() - expected.data = new_data + expected = IndexVariable("x", np.arange(5, 10)) assert_identical(expected, actual) def test_copy_index_with_data_errors(self): @@ -547,6 +546,10 @@ def test_copy_index_with_data_errors(self): new_data = np.arange(5, 20) with raises_regex(ValueError, "must match shape of object"): orig.copy(data=new_data) + with raises_regex(ValueError, "Cannot assign to the .data"): + orig.data = new_data + with raises_regex(ValueError, "Cannot assign to the .values"): + orig.values = new_data def test_replace(self): var = Variable(("x", "y"), [[1.5, 2.0], [3.1, 4.3]], {"foo": "bar"}) From c32d7bdda1ab00f37989e57605a851ca07c30d82 Mon Sep 17 00:00:00 2001 From: keewis Date: Mon, 23 Mar 2020 19:03:04 +0100 Subject: [PATCH 56/75] reword the whats-new entry for unit support (#3878) * reword the whats-new entry for unit support of top-level functions and Variable --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 40307827bc9..aac2ca7bbf8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -41,7 +41,7 @@ New Features - Support new h5netcdf backend keyword `phony_dims` (available from h5netcdf v0.8.0 for :py:class:`~xarray.backends.H5NetCDFStore`. By `Kai Mühlbauer `_. -- Support unit aware arrays with pint. (:issue:`3594`, :pull:`3706`, :pull:`3611`) +- Add partial support for unit aware arrays with pint. (:pull:`3706`, :pull:`3611`) By `Justus Magin `_. - :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a `TypeError` on multiple string arguments. Receiving multiple string arguments From 6c27ef24616c050ee1d0c510e13d33c7378c9fe2 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Mon, 23 Mar 2020 15:14:12 -0400 Subject: [PATCH 57/75] update panel documentation (#3880) * update panel documentation * @keewis feedback --- doc/pandas.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/pandas.rst b/doc/pandas.rst index b1660e48dd2..b0ec2a117dc 100644 --- a/doc/pandas.rst +++ b/doc/pandas.rst @@ -110,10 +110,10 @@ Multi-dimensional data Tidy data is great, but it sometimes you want to preserve dimensions instead of automatically stacking them into a ``MultiIndex``. -:py:meth:`DataArray.to_pandas()` is a shortcut that -lets you convert a DataArray directly into a pandas object with the same -dimensionality (i.e., a 1D array is converted to a :py:class:`~pandas.Series`, -2D to :py:class:`~pandas.DataFrame` and 3D to ``pandas.Panel``): +:py:meth:`DataArray.to_pandas()` is a shortcut that lets you convert a +DataArray directly into a pandas object with the same dimensionality, if +available in pandas (i.e., a 1D array is converted to a +:py:class:`~pandas.Series` and 2D to :py:class:`~pandas.DataFrame`): .. ipython:: python @@ -151,13 +151,13 @@ However, you will need to set dimension names explicitly, either with the Transitioning from pandas.Panel to xarray ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``Panel``, pandas' data structure for 3D arrays, has always -been a second class data structure compared to the Series and DataFrame. To -allow pandas developers to focus more on its core functionality built around -the DataFrame, pandas has deprecated ``Panel``. It will be removed in pandas -0.25. +``Panel``, pandas' data structure for 3D arrays, was always a second class +data structure compared to the Series and DataFrame. To allow pandas +developers to focus more on its core functionality built around the +DataFrame, pandas removed ``Panel`` in favor of directing users who use +multi-dimensional arrays to xarray. -xarray has most of ``Panel``'s features, a more explicit API (particularly around +Xarray has most of ``Panel``'s features, a more explicit API (particularly around indexing), and the ability to scale to >3 dimensions with the same interface. As discussed :ref:`elsewhere ` in the docs, there are two primary data structures in @@ -210,7 +210,7 @@ You can also easily convert this data into ``Dataset``: array.to_dataset(dim='dim_0') Here, there are two data variables, each representing a DataFrame on panel's -``items`` axis, and labelled as such. Each variable is a 2D array of the +``items`` axis, and labeled as such. Each variable is a 2D array of the respective values along the ``items`` dimension. While the xarray docs are relatively complete, a few items stand out for Panel users: From 321f2e55253b61a251cd5d2db5329dd37d39a471 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Mon, 23 Mar 2020 15:14:32 -0400 Subject: [PATCH 58/75] whatsnew for 0.15.1 (#3879) * whatsnew for 0.15.1 * title formatting --- doc/whats-new.rst | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index aac2ca7bbf8..db50d09f431 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -16,9 +16,13 @@ What's New .. _whats-new.0.15.1: -v0.15.1 (unreleased) +v0.15.1 (23 Mar 2020) --------------------- +This release brings many new features such as :py:meth:`Dataset.weighted` methods for weighted array +reductions, a new jupyter repr by default, and the start of units integration with pint. There's also +the usual batch of usability improvements, documentation additions, and bug fixes. + Breaking changes ~~~~~~~~~~~~~~~~ @@ -34,6 +38,10 @@ New Features - Weighted array reductions are now supported via the new :py:meth:`DataArray.weighted` and :py:meth:`Dataset.weighted` methods. See :ref:`comput.weighted`. (:issue:`422`, :pull:`2922`). By `Mathias Hauser `_ +- The new jupyter notebook repr (``Dataset._repr_html_`` and + ``DataArray._repr_html_``) (introduced in 0.14.1) is now on by default. To + disable, use ``xarray.set_options(display_style="text")``. + By `Julia Signell `_. - Added support for :py:class:`pandas.DatetimeIndex`-style rounding of ``cftime.datetime`` objects directly via a :py:class:`CFTimeIndex` or via the :py:class:`~core.accessor_dt.DatetimeAccessor`. @@ -51,10 +59,6 @@ New Features By `Maximilian Roos `_ - :py:func:`map_blocks` can now apply functions that add new unindexed dimensions. By `Deepak Cherian `_ -- The new ``Dataset._repr_html_`` and ``DataArray._repr_html_`` (introduced - in 0.14.1) is now on by default. To disable, use - ``xarray.set_options(display_style="text")``. - By `Julia Signell `_. - An ellipsis (``...``) is now supported in the ``dims`` argument of :py:meth:`Dataset.stack` and :py:meth:`DataArray.stack`, meaning all unlisted dimensions, similar to its meaning in :py:meth:`DataArray.transpose`. @@ -63,13 +67,14 @@ New Features - :py:meth:`Dataset.where` and :py:meth:`DataArray.where` accept a lambda as a first argument, which is then called on the input; replicating pandas' behavior. By `Maximilian Roos `_. -- Implement ``skipna`` in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, +- ``skipna`` is available in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile` (:issue:`3843`, :pull:`3844`) By `Aaron Spring `_. Bug fixes ~~~~~~~~~ + - Fix :py:meth:`Dataset.interp` when indexing array shares coordinates with the indexed variable (:issue:`3252`). By `David Huard `_. @@ -107,6 +112,7 @@ Bug fixes Documentation ~~~~~~~~~~~~~ + - Fix documentation of :py:class:`DataArray` removing the deprecated mention that when omitted, `dims` are inferred from a `coords`-dict. (:pull:`3821`) By `Sander van Rijn `_. @@ -119,25 +125,25 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ -- Removed the internal ``import_seaborn`` function which handled the deprecation of +- Remove the internal ``import_seaborn`` function which handled the deprecation of the ``seaborn.apionly`` entry point (:issue:`3747`). By `Mathias Hauser `_. - Don't test pint integration in combination with datetime objects. (:issue:`3778`, :pull:`3788`) By `Justus Magin `_. -- Changed test_open_mfdataset_list_attr to only run with dask installed +- Change test_open_mfdataset_list_attr to only run with dask installed (:issue:`3777`, :pull:`3780`). By `Bruno Pagani `_. -- Preserved the ability to index with ``method="nearest"`` with a +- Preserve the ability to index with ``method="nearest"`` with a :py:class:`CFTimeIndex` with pandas versions greater than 1.0.1 (:issue:`3751`). By `Spencer Clark `_. - Greater flexibility and improved test coverage of subtracting various types of objects from a :py:class:`CFTimeIndex`. By `Spencer Clark `_. -- Updated Azure CI MacOS image, given pending removal. +- Update Azure CI MacOS image, given pending removal. By `Maximilian Roos `_ -- Removed xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`). +- Remove xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`). By `Mathias Hauser `_. -- Removed conversion to :py:class:`pandas.Panel`, given its removal in pandas +- Remove conversion to :py:class:`pandas.Panel`, given its removal in pandas in favor of xarray's objects. By `Maximilian Roos `_ From 732b6cd6248ce715da74f3cd7a0e211eaa1d0aa2 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Mon, 23 Mar 2020 16:41:44 -0400 Subject: [PATCH 59/75] Release v0.15.1 From a2cccd641d7f4c66e0a517a721a3e06f415ae0ee Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Mon, 23 Mar 2020 17:05:09 -0400 Subject: [PATCH 60/75] whatsnew section for 0.16.0 --- doc/whats-new.rst | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index db50d09f431..10f6b23ca66 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -13,9 +13,33 @@ What's New import xarray as xr np.random.seed(123456) -.. _whats-new.0.15.1: +.. _whats-new.0.16.0: + +v0.16.0 (unreleased) +--------------------- + +Breaking changes +~~~~~~~~~~~~~~~~ + + +New Features +~~~~~~~~~~~~ + + +Bug fixes +~~~~~~~~~ + + +Documentation +~~~~~~~~~~~~~ +Internal Changes +~~~~~~~~~~~~~~~~ + + +.. _whats-new.0.15.1: + v0.15.1 (23 Mar 2020) --------------------- From c707b337a0f75224ee3b3e3b65a08da792df2fa6 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Mon, 23 Mar 2020 20:52:13 -0400 Subject: [PATCH 61/75] Tweaks to "how_to_release" (#3882) * tweaks to how_to_release * previous release * copy paste list of names looks terrible * should not show --- HOW_TO_RELEASE.md | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index 4ef7342a5ed..3fdd1d7236d 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -23,7 +23,7 @@ Time required: about an hour. 4. Check that the ReadTheDocs build is passing. 5. On the master branch, commit the release in git: ``` - git commit -a -m 'Release v0.X.Y' + git commit -am 'Release v0.X.Y' ``` 6. Tag the release: ``` @@ -60,10 +60,35 @@ Time required: about an hour. It's OK to force push to 'stable' if necessary. (We also update the stable branch with `git cherrypick` for documentation only fixes that apply the current released version.) -12. Add a section for the next release (v.X.(Y+1)) to doc/whats-new.rst. +12. Add a section for the next release (v.X.Y+1) to doc/whats-new.rst: + ``` + .. _whats-new.0.X.Y+1: + + v0.X.Y+1 (unreleased) + --------------------- + + Breaking changes + ~~~~~~~~~~~~~~~~ + + + New Features + ~~~~~~~~~~~~ + + + Bug fixes + ~~~~~~~~~ + + + Documentation + ~~~~~~~~~~~~~ + + + Internal Changes + ~~~~~~~~~~~~~~~~ + ``` 13. Commit your changes and push to master again: ``` - git commit -a -m 'New whatsnew section' + git commit -am 'New whatsnew section' git push upstream master ``` You're done pushing to master! @@ -88,15 +113,17 @@ Time required: about an hour. ``` git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format="%aN" | sort -u ``` - or by replacing `v0.X.Y` with the _previous_ release in: + or by substituting the _previous_ release in: ``` - git log v0.X.Y.. --format="%aN" | sort -u + git log v0.X.Y-1.. --format="%aN" | sort -u ``` + NB: copying this output into a Google Groups form can cause + [issues](https://groups.google.com/forum/#!topic/xarray/hK158wAviPs) with line breaks, so take care Note on version numbering: We follow a rough approximation of semantic version. Only major releases (0.X.0) -show include breaking changes. Minor releases (0.X.Y) are for bug fixes and +should include breaking changes. Minor releases (0.X.Y) are for bug fixes and backwards compatible new features, but if a sufficient number of new features have arrived we will issue a major release even if there are no compatibility breaks. From ee3c87659d1687a86d406065a5af1b4b87beec17 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Tue, 24 Mar 2020 14:48:35 -0400 Subject: [PATCH 62/75] Use `fixes` in PR template (#3886) * Use `fixes` in PR template * whatsnew --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- doc/whats-new.rst | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index a921bddaa23..c30202ac046 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,6 +1,6 @@ - - [ ] Closes #xxxx + - [ ] Fixes #xxxx - [ ] Tests added - [ ] Passes `isort -rc . && black . && mypy . && flake8` - [ ] Fully documented, including `whats-new.rst` for all changes and `api.rst` for new API diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 10f6b23ca66..d40ca82ba85 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,6 +36,9 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Use ``Fixes`` rather than ``Closes`` in GitHub Pull Request template, allowing + linking to issues. + By `Maximilian Roos `_ .. _whats-new.0.15.1: From c10c9928d8800e32a4c127429b1fa11bdb68aca1 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Tue, 24 Mar 2020 15:24:42 -0400 Subject: [PATCH 63/75] xfail test_uamiv_format_write (#3885) * xfail test_uamiv_format_write * add reason kwarg * xfail test_dayofyear_after_cftime_range --- xarray/tests/test_backends.py | 1 + xarray/tests/test_cftime_offsets.py | 1 + 2 files changed, 2 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 5f8ba83c330..a4585985bdc 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3548,6 +3548,7 @@ def test_uamiv_format_mfread(self): assert_allclose(expected, actual) camxfile.close() + @pytest.mark.xfail(reason="Flaky; see GH3711") def test_uamiv_format_write(self): fmtkw = {"format": "uamiv"} diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 343e059f53c..2352f9e8cdd 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1176,6 +1176,7 @@ def test_dayofweek_after_cftime_range(freq): np.testing.assert_array_equal(result, expected) +@pytest.mark.xfail(reason="See GH3885") @pytest.mark.parametrize("freq", ["A", "M", "D"]) def test_dayofyear_after_cftime_range(freq): pytest.importorskip("cftime", minversion="1.0.2.1") From d8bb6204dc6a4bacdfca25b02ba62bb7f1bb5795 Mon Sep 17 00:00:00 2001 From: johnomotani Date: Tue, 24 Mar 2020 20:40:17 +0000 Subject: [PATCH 64/75] Control attrs of result in `merge()`, `concat()`, `combine_by_coords()` and `combine_nested()` (#3877) * Optionally promote attrs from DataArray to Dataset in to_dataset Adds option 'promote_attrs' to DataArray.to_dataset(). By default promote_attrs=False, maintaining current behaviour. If promote_attrs=True, the attrs of the DataArray are shallow-copied to the Dataset returned by to_dataset(). * utils.ordered_dict_union returns the union of two compatible dicts If the values of any shared key are not equivalent, then raises an error. * combine_attrs argument for merge() Provides several options for how to combine the attributes of the passed objects and give them to the returned Dataset. * combine_attrs argument for concat() Provides several options for how to combine the attributes of the passed objects and give them to the returned DataArray or Dataset. * combine_attrs argument for combine_by_coords() and combine_nested() Provides several options for how to combine the attributes of the passed objects and give them to the returned Dataset. * Add combine_attrs changes to whats-new.rst * Update docstrings to note default values Apply suggestions from code review Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> * First argument of update_safety_check and ordered_dict_union not mutable No need for these arguments to be MutableMapping rather than just Mapping. * Rename ordered_dict_union -> compat_dict_union Do not use OrderedDicts any more, so name did not make sense. * Move combine_attrs to v0.16.0 in whats-new.rst * Fix merge of whats-new.rst Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- doc/whats-new.rst | 4 ++ xarray/core/combine.py | 50 +++++++++++++++++-- xarray/core/concat.py | 34 ++++++++++--- xarray/core/dataarray.py | 19 ++++++-- xarray/core/dataset.py | 2 +- xarray/core/merge.py | 85 +++++++++++++++++++++++++++++--- xarray/core/utils.py | 34 ++++++++++++- xarray/tests/test_combine.py | 89 ++++++++++++++++++++++++++++++++++ xarray/tests/test_concat.py | 46 ++++++++++++++++++ xarray/tests/test_dataarray.py | 11 ++++- xarray/tests/test_merge.py | 60 +++++++++++++++++++++++ xarray/tests/test_utils.py | 11 ++++- 12 files changed, 420 insertions(+), 25 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d40ca82ba85..4515f552812 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,6 +24,10 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Control over attributes of result in :py:func:`merge`, :py:func:`concat`, + :py:func:`combine_by_coords` and :py:func:`combine_nested` using + combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`) + By `John Omotani `_ Bug fixes diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 1fa2df00352..1f990457798 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -155,6 +155,7 @@ def _combine_nd( compat="no_conflicts", fill_value=dtypes.NA, join="outer", + combine_attrs="drop", ): """ Combines an N-dimensional structure of datasets into one by applying a @@ -202,13 +203,21 @@ def _combine_nd( compat=compat, fill_value=fill_value, join=join, + combine_attrs=combine_attrs, ) (combined_ds,) = combined_ids.values() return combined_ds def _combine_all_along_first_dim( - combined_ids, dim, data_vars, coords, compat, fill_value=dtypes.NA, join="outer" + combined_ids, + dim, + data_vars, + coords, + compat, + fill_value=dtypes.NA, + join="outer", + combine_attrs="drop", ): # Group into lines of datasets which must be combined along dim @@ -223,7 +232,7 @@ def _combine_all_along_first_dim( combined_ids = dict(sorted(group)) datasets = combined_ids.values() new_combined_ids[new_id] = _combine_1d( - datasets, dim, compat, data_vars, coords, fill_value, join + datasets, dim, compat, data_vars, coords, fill_value, join, combine_attrs ) return new_combined_ids @@ -236,6 +245,7 @@ def _combine_1d( coords="different", fill_value=dtypes.NA, join="outer", + combine_attrs="drop", ): """ Applies either concat or merge to 1D list of datasets depending on value @@ -252,6 +262,7 @@ def _combine_1d( compat=compat, fill_value=fill_value, join=join, + combine_attrs=combine_attrs, ) except ValueError as err: if "encountered unexpected variable" in str(err): @@ -265,7 +276,13 @@ def _combine_1d( else: raise else: - combined = merge(datasets, compat=compat, fill_value=fill_value, join=join) + combined = merge( + datasets, + compat=compat, + fill_value=fill_value, + join=join, + combine_attrs=combine_attrs, + ) return combined @@ -284,6 +301,7 @@ def _nested_combine( ids, fill_value=dtypes.NA, join="outer", + combine_attrs="drop", ): if len(datasets) == 0: @@ -311,6 +329,7 @@ def _nested_combine( coords=coords, fill_value=fill_value, join=join, + combine_attrs=combine_attrs, ) return combined @@ -323,6 +342,7 @@ def combine_nested( coords="different", fill_value=dtypes.NA, join="outer", + combine_attrs="drop", ): """ Explicitly combine an N-dimensional grid of datasets into one by using a @@ -390,6 +410,16 @@ def combine_nested( - 'override': if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. + combine_attrs : {'drop', 'identical', 'no_conflicts', 'override'}, + default 'drop' + String indicating how to combine attrs of the objects being merged: + + - 'drop': empty attrs on returned Dataset. + - 'identical': all attrs must be the same on every object. + - 'no_conflicts': attrs from all objects are combined, any that have + the same name must also have the same value. + - 'override': skip comparing and copy attrs from the first dataset to + the result. Returns ------- @@ -468,6 +498,7 @@ def combine_nested( ids=False, fill_value=fill_value, join=join, + combine_attrs=combine_attrs, ) @@ -482,6 +513,7 @@ def combine_by_coords( coords="different", fill_value=dtypes.NA, join="outer", + combine_attrs="no_conflicts", ): """ Attempt to auto-magically combine the given datasets into one by using @@ -557,6 +589,16 @@ def combine_by_coords( - 'override': if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. + combine_attrs : {'drop', 'identical', 'no_conflicts', 'override'}, + default 'drop' + String indicating how to combine attrs of the objects being merged: + + - 'drop': empty attrs on returned Dataset. + - 'identical': all attrs must be the same on every object. + - 'no_conflicts': attrs from all objects are combined, any that have + the same name must also have the same value. + - 'override': skip comparing and copy attrs from the first dataset to + the result. Returns ------- @@ -700,6 +742,7 @@ def combine_by_coords( compat=compat, fill_value=fill_value, join=join, + combine_attrs=combine_attrs, ) # Check the overall coordinates are monotonically increasing @@ -717,6 +760,7 @@ def combine_by_coords( compat=compat, fill_value=fill_value, join=join, + combine_attrs=combine_attrs, ) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 96b4be15d1b..7741cbb826b 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -3,7 +3,7 @@ from . import dtypes, utils from .alignment import align from .duck_array_ops import lazy_array_equiv -from .merge import _VALID_COMPAT, unique_variable +from .merge import _VALID_COMPAT, merge_attrs, unique_variable from .variable import IndexVariable, Variable, as_variable from .variable import concat as concat_vars @@ -17,6 +17,7 @@ def concat( positions=None, fill_value=dtypes.NA, join="outer", + combine_attrs="override", ): """Concatenate xarray objects along a new or existing dimension. @@ -92,15 +93,21 @@ def concat( - 'override': if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. + combine_attrs : {'drop', 'identical', 'no_conflicts', 'override'}, + default 'override + String indicating how to combine attrs of the objects being merged: + + - 'drop': empty attrs on returned Dataset. + - 'identical': all attrs must be the same on every object. + - 'no_conflicts': attrs from all objects are combined, any that have + the same name must also have the same value. + - 'override': skip comparing and copy attrs from the first dataset to + the result. Returns ------- concatenated : type of objs - Notes - ----- - Each concatenated Variable preserves corresponding ``attrs`` from the first element of ``objs``. - See also -------- merge @@ -132,7 +139,9 @@ def concat( "can only concatenate xarray Dataset and DataArray " "objects, got %s" % type(first_obj) ) - return f(objs, dim, data_vars, coords, compat, positions, fill_value, join) + return f( + objs, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs + ) def _calc_concat_dim_coord(dim): @@ -306,6 +315,7 @@ def _dataset_concat( positions, fill_value=dtypes.NA, join="outer", + combine_attrs="override", ): """ Concatenate a sequence of datasets along a new or existing dimension @@ -362,7 +372,7 @@ def _dataset_concat( result_vars.update(dim_coords) # assign attrs and encoding from first dataset - result_attrs = datasets[0].attrs + result_attrs = merge_attrs([ds.attrs for ds in datasets], combine_attrs) result_encoding = datasets[0].encoding # check that global attributes are fixed across all datasets if necessary @@ -425,6 +435,7 @@ def _dataarray_concat( positions, fill_value=dtypes.NA, join="outer", + combine_attrs="override", ): arrays = list(arrays) @@ -453,5 +464,12 @@ def _dataarray_concat( positions, fill_value=fill_value, join=join, + combine_attrs="drop", ) - return arrays[0]._from_temp_dataset(ds, name) + + merged_attrs = merge_attrs([da.attrs for da in arrays], combine_attrs) + + result = arrays[0]._from_temp_dataset(ds, name) + result.attrs = merged_attrs + + return result diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 324e7ccd290..232fb86144e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -475,7 +475,13 @@ def _to_dataset_whole( dataset = Dataset._construct_direct(variables, coord_names, indexes=indexes) return dataset - def to_dataset(self, dim: Hashable = None, *, name: Hashable = None) -> Dataset: + def to_dataset( + self, + dim: Hashable = None, + *, + name: Hashable = None, + promote_attrs: bool = False, + ) -> Dataset: """Convert a DataArray to a Dataset. Parameters @@ -487,6 +493,8 @@ def to_dataset(self, dim: Hashable = None, *, name: Hashable = None) -> Dataset: name : hashable, optional Name to substitute for this array's name. Only valid if ``dim`` is not provided. + promote_attrs : bool, default False + Set to True to shallow copy attrs of DataArray to returned Dataset. Returns ------- @@ -500,9 +508,14 @@ def to_dataset(self, dim: Hashable = None, *, name: Hashable = None) -> Dataset: if dim is not None: if name is not None: raise TypeError("cannot supply both dim and name arguments") - return self._to_dataset_split(dim) + result = self._to_dataset_split(dim) else: - return self._to_dataset_whole(name) + result = self._to_dataset_whole(name) + + if promote_attrs: + result.attrs = dict(self.attrs) + + return result @property def name(self) -> Optional[Hashable]: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b7ce0ec4e1e..6f96e4f469c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -536,7 +536,7 @@ def __init__( if isinstance(coords, Dataset): coords = coords.variables - variables, coord_names, dims, indexes = merge_data_and_coords( + variables, coord_names, dims, indexes, _ = merge_data_and_coords( data_vars, coords, compat="broadcast_equals" ) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 1d1b8d39a20..fea94246471 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -20,7 +20,7 @@ from . import dtypes, pdcompat from .alignment import deep_align from .duck_array_ops import lazy_array_equiv -from .utils import Frozen, dict_equiv +from .utils import Frozen, compat_dict_union, dict_equiv from .variable import Variable, as_variable, assert_unique_multiindex_level_names if TYPE_CHECKING: @@ -491,17 +491,54 @@ def assert_valid_explicit_coords(variables, dims, explicit_coords): ) +def merge_attrs(variable_attrs, combine_attrs): + """Combine attributes from different variables according to combine_attrs + """ + if not variable_attrs: + # no attributes to merge + return None + + if combine_attrs == "drop": + return {} + elif combine_attrs == "override": + return variable_attrs[0] + elif combine_attrs == "no_conflicts": + result = dict(variable_attrs[0]) + for attrs in variable_attrs[1:]: + try: + result = compat_dict_union(result, attrs) + except ValueError: + raise MergeError( + "combine_attrs='no_conflicts', but some values are not " + "the same. Merging %s with %s" % (str(result), str(attrs)) + ) + return result + elif combine_attrs == "identical": + result = dict(variable_attrs[0]) + for attrs in variable_attrs[1:]: + if not dict_equiv(result, attrs): + raise MergeError( + "combine_attrs='identical', but attrs differ. First is %s " + ", other is %s." % (str(result), str(attrs)) + ) + return result + else: + raise ValueError("Unrecognised value for combine_attrs=%s" % combine_attrs) + + class _MergeResult(NamedTuple): variables: Dict[Hashable, Variable] coord_names: Set[Hashable] dims: Dict[Hashable, int] indexes: Dict[Hashable, pd.Index] + attrs: Dict[Hashable, Any] def merge_core( objects: Iterable["CoercibleMapping"], compat: str = "broadcast_equals", join: str = "outer", + combine_attrs: Optional[str] = "override", priority_arg: Optional[int] = None, explicit_coords: Optional[Sequence] = None, indexes: Optional[Mapping[Hashable, pd.Index]] = None, @@ -519,6 +556,8 @@ def merge_core( Compatibility checks to use when merging variables. join : {'outer', 'inner', 'left', 'right'}, optional How to combine objects with different indexes. + combine_attrs : {'drop', 'identical', 'no_conflicts', 'override'}, optional + How to combine attributes of objects priority_arg : integer, optional Optional argument in `objects` that takes precedence over the others. explicit_coords : set, optional @@ -536,12 +575,15 @@ def merge_core( Set of coordinate names. dims : dict Dictionary mapping from dimension names to sizes. + attrs : dict + Dictionary of attributes Raises ------ MergeError if the merge cannot be done successfully. """ - from .dataset import calculate_dimensions + from .dataarray import DataArray + from .dataset import Dataset, calculate_dimensions _assert_compat_valid(compat) @@ -571,7 +613,16 @@ def merge_core( "coordinates or not in the merged result: %s" % ambiguous_coords ) - return _MergeResult(variables, coord_names, dims, out_indexes) + attrs = merge_attrs( + [ + var.attrs + for var in coerced + if isinstance(var, Dataset) or isinstance(var, DataArray) + ], + combine_attrs, + ) + + return _MergeResult(variables, coord_names, dims, out_indexes, attrs) def merge( @@ -579,6 +630,7 @@ def merge( compat: str = "no_conflicts", join: str = "outer", fill_value: object = dtypes.NA, + combine_attrs: str = "drop", ) -> "Dataset": """Merge any number of xarray objects into a single Dataset as variables. @@ -614,6 +666,16 @@ def merge( dimension must have the same size in all objects. fill_value : scalar, optional Value to use for newly missing values + combine_attrs : {'drop', 'identical', 'no_conflicts', 'override'}, + default 'drop' + String indicating how to combine attrs of the objects being merged: + + - 'drop': empty attrs on returned Dataset. + - 'identical': all attrs must be the same on every object. + - 'no_conflicts': attrs from all objects are combined, any that have + the same name must also have the same value. + - 'override': skip comparing and copy attrs from the first dataset to + the result. Returns ------- @@ -787,10 +849,16 @@ def merge( "Dataset(s), DataArray(s), and dictionaries." ) - obj = obj.to_dataset() if isinstance(obj, DataArray) else obj + obj = obj.to_dataset(promote_attrs=True) if isinstance(obj, DataArray) else obj dict_like_objects.append(obj) - merge_result = merge_core(dict_like_objects, compat, join, fill_value=fill_value) + merge_result = merge_core( + dict_like_objects, + compat, + join, + combine_attrs=combine_attrs, + fill_value=fill_value, + ) merged = Dataset._construct_direct(**merge_result._asdict()) return merged @@ -861,4 +929,9 @@ def dataset_update_method( if coord_names: other[key] = value.drop_vars(coord_names) - return merge_core([dataset, other], priority_arg=1, indexes=dataset.indexes) + return merge_core( + [dataset, other], + priority_arg=1, + indexes=dataset.indexes, + combine_attrs="override", + ) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index e335365d5ca..5570f9e9a80 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -184,7 +184,7 @@ def peek_at(iterable: Iterable[T]) -> Tuple[T, Iterator[T]]: def update_safety_check( - first_dict: MutableMapping[K, V], + first_dict: Mapping[K, V], second_dict: Mapping[K, V], compat: Callable[[V, V], bool] = equivalent, ) -> None: @@ -361,6 +361,9 @@ def ordered_dict_intersection( Binary operator to determine if two values are compatible. By default, checks for equivalence. + # TODO: Rename to compat_dict_intersection, as we do not use OrderedDicts + # any more. + Returns ------- intersection : dict @@ -371,6 +374,35 @@ def ordered_dict_intersection( return new_dict +def compat_dict_union( + first_dict: Mapping[K, V], + second_dict: Mapping[K, V], + compat: Callable[[V, V], bool] = equivalent, +) -> MutableMapping[K, V]: + """Return the union of two dictionaries as a new dictionary. + + An exception is raised if any keys are found in both dictionaries and the + values are not compatible. + + Parameters + ---------- + first_dict, second_dict : dict-like + Mappings to merge. + compat : function, optional + Binary operator to determine if two values are compatible. By default, + checks for equivalence. + + Returns + ------- + union : dict + union of the contents. + """ + new_dict = dict(first_dict) + update_safety_check(first_dict, second_dict, compat) + new_dict.update(second_dict) + return new_dict + + class Frozen(Mapping[K, V]): """Wrapper around an object implementing the mapping interface to make it immutable. If you really want to modify the mapping, the mutable version is diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index eb2c6e1dbf7..c3f981f10d1 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -503,6 +503,49 @@ def test_auto_combine_2d(self): result = combine_nested(datasets, concat_dim=["dim1", "dim2"]) assert_equal(result, expected) + def test_auto_combine_2d_combine_attrs_kwarg(self): + ds = create_test_data + + partway1 = concat([ds(0), ds(3)], dim="dim1") + partway2 = concat([ds(1), ds(4)], dim="dim1") + partway3 = concat([ds(2), ds(5)], dim="dim1") + expected = concat([partway1, partway2, partway3], dim="dim2") + + expected_dict = {} + expected_dict["drop"] = expected.copy(deep=True) + expected_dict["drop"].attrs = {} + expected_dict["no_conflicts"] = expected.copy(deep=True) + expected_dict["no_conflicts"].attrs = { + "a": 1, + "b": 2, + "c": 3, + "d": 4, + "e": 5, + "f": 6, + } + expected_dict["override"] = expected.copy(deep=True) + expected_dict["override"].attrs = {"a": 1} + + datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]] + + datasets[0][0].attrs = {"a": 1} + datasets[0][1].attrs = {"a": 1, "b": 2} + datasets[0][2].attrs = {"a": 1, "c": 3} + datasets[1][0].attrs = {"a": 1, "d": 4} + datasets[1][1].attrs = {"a": 1, "e": 5} + datasets[1][2].attrs = {"a": 1, "f": 6} + + with raises_regex(ValueError, "combine_attrs='identical'"): + result = combine_nested( + datasets, concat_dim=["dim1", "dim2"], combine_attrs="identical" + ) + + for combine_attrs in expected_dict: + result = combine_nested( + datasets, concat_dim=["dim1", "dim2"], combine_attrs=combine_attrs + ) + assert_identical(result, expected_dict[combine_attrs]) + def test_combine_nested_missing_data_new_dim(self): # Your data includes "time" and "station" dimensions, and each year's # data has a different set of stations. @@ -642,6 +685,52 @@ def test_combine_coords_join_exact(self): with raises_regex(ValueError, "indexes along dimension"): combine_nested(objs, concat_dim="x", join="exact") + @pytest.mark.parametrize( + "combine_attrs, expected", + [ + ("drop", Dataset({"x": [0, 1], "y": [0, 1]}, attrs={})), + ( + "no_conflicts", + Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1, "b": 2}), + ), + ("override", Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1})), + ], + ) + def test_combine_coords_combine_attrs(self, combine_attrs, expected): + objs = [ + Dataset({"x": [0], "y": [0]}, attrs={"a": 1}), + Dataset({"x": [1], "y": [1]}, attrs={"a": 1, "b": 2}), + ] + actual = combine_nested( + objs, concat_dim="x", join="outer", combine_attrs=combine_attrs + ) + assert_identical(expected, actual) + + if combine_attrs == "no_conflicts": + objs[1].attrs["a"] = 2 + with raises_regex(ValueError, "combine_attrs='no_conflicts'"): + actual = combine_nested( + objs, concat_dim="x", join="outer", combine_attrs=combine_attrs + ) + + def test_combine_coords_combine_attrs_identical(self): + objs = [ + Dataset({"x": [0], "y": [0]}, attrs={"a": 1}), + Dataset({"x": [1], "y": [1]}, attrs={"a": 1}), + ] + expected = Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1}) + actual = combine_nested( + objs, concat_dim="x", join="outer", combine_attrs="identical" + ) + assert_identical(expected, actual) + + objs[1].attrs["b"] = 2 + + with raises_regex(ValueError, "combine_attrs='identical'"): + actual = combine_nested( + objs, concat_dim="x", join="outer", combine_attrs="identical" + ) + def test_infer_order_from_coords(self): data = create_test_data() objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))] diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 1a498496c03..e5038dd4af2 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -256,6 +256,28 @@ def test_concat_join_kwarg(self): ) assert_identical(actual, expected) + def test_concat_combine_attrs_kwarg(self): + ds1 = Dataset({"a": ("x", [0])}, coords={"x": [0]}, attrs={"b": 42}) + ds2 = Dataset({"a": ("x", [0])}, coords={"x": [1]}, attrs={"b": 42, "c": 43}) + + expected = {} + expected["drop"] = Dataset({"a": ("x", [0, 0])}, {"x": [0, 1]}) + expected["no_conflicts"] = Dataset( + {"a": ("x", [0, 0])}, {"x": [0, 1]}, {"b": 42, "c": 43} + ) + expected["override"] = Dataset({"a": ("x", [0, 0])}, {"x": [0, 1]}, {"b": 42}) + + with raises_regex(ValueError, "combine_attrs='identical'"): + actual = concat([ds1, ds2], dim="x", combine_attrs="identical") + with raises_regex(ValueError, "combine_attrs='no_conflicts'"): + ds3 = ds2.copy(deep=True) + ds3.attrs["b"] = 44 + actual = concat([ds1, ds3], dim="x", combine_attrs="no_conflicts") + + for combine_attrs in expected: + actual = concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) + assert_identical(actual, expected[combine_attrs]) + def test_concat_promote_shape(self): # mixed dims within variables objs = [Dataset({}, {"x": 0}), Dataset({"x": [1]})] @@ -469,6 +491,30 @@ def test_concat_join_kwarg(self): actual = concat([ds1, ds2], join=join, dim="x") assert_equal(actual, expected[join].to_array()) + def test_concat_combine_attrs_kwarg(self): + da1 = DataArray([0], coords=[("x", [0])], attrs={"b": 42}) + da2 = DataArray([0], coords=[("x", [1])], attrs={"b": 42, "c": 43}) + + expected = {} + expected["drop"] = DataArray([0, 0], coords=[("x", [0, 1])]) + expected["no_conflicts"] = DataArray( + [0, 0], coords=[("x", [0, 1])], attrs={"b": 42, "c": 43} + ) + expected["override"] = DataArray( + [0, 0], coords=[("x", [0, 1])], attrs={"b": 42} + ) + + with raises_regex(ValueError, "combine_attrs='identical'"): + actual = concat([da1, da2], dim="x", combine_attrs="identical") + with raises_regex(ValueError, "combine_attrs='no_conflicts'"): + da3 = da2.copy(deep=True) + da3.attrs["b"] = 44 + actual = concat([da1, da3], dim="x", combine_attrs="no_conflicts") + + for combine_attrs in expected: + actual = concat([da1, da2], dim="x", combine_attrs=combine_attrs) + assert_identical(actual, expected[combine_attrs]) + @pytest.mark.parametrize("attr1", ({"a": {"meta": [10, 20, 30]}}, {"a": [1, 2, 3]}, {})) @pytest.mark.parametrize("attr2", ({"a": [1, 2, 3]}, {})) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index fbd9810f285..4f19dc2a9cf 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3750,9 +3750,16 @@ def test_to_dataset_whole(self): expected = Dataset({"foo": ("x", [1, 2])}) assert_identical(expected, actual) - named = DataArray([1, 2], dims="x", name="foo") + named = DataArray([1, 2], dims="x", name="foo", attrs={"y": "testattr"}) actual = named.to_dataset() - expected = Dataset({"foo": ("x", [1, 2])}) + expected = Dataset({"foo": ("x", [1, 2], {"y": "testattr"})}) + assert_identical(expected, actual) + + # Test promoting attrs + actual = named.to_dataset(promote_attrs=True) + expected = Dataset( + {"foo": ("x", [1, 2], {"y": "testattr"})}, attrs={"y": "testattr"} + ) assert_identical(expected, actual) with pytest.raises(TypeError): diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 6c8f3f65657..9057575b38c 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -3,6 +3,7 @@ import xarray as xr from xarray.core import dtypes, merge +from xarray.core.merge import MergeError from xarray.testing import assert_identical from . import raises_regex @@ -49,6 +50,65 @@ def test_merge_dataarray_unnamed(self): with raises_regex(ValueError, "without providing an explicit name"): xr.merge([data]) + def test_merge_arrays_attrs_default(self): + var1_attrs = {"a": 1, "b": 2} + var2_attrs = {"a": 1, "c": 3} + expected_attrs = {} + + data = create_test_data() + data.var1.attrs = var1_attrs + data.var2.attrs = var2_attrs + actual = xr.merge([data.var1, data.var2]) + expected = data[["var1", "var2"]] + expected.attrs = expected_attrs + assert actual.identical(expected) + + @pytest.mark.parametrize( + "combine_attrs, var1_attrs, var2_attrs, expected_attrs, " "expect_exception", + [ + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 1, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + False, + ), + ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False), + ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False), + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 4, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + True, + ), + ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True), + ( + "override", + {"a": 1, "b": 2}, + {"a": 4, "b": 5, "c": 3}, + {"a": 1, "b": 2}, + False, + ), + ], + ) + def test_merge_arrays_attrs( + self, combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception + ): + data = create_test_data() + data.var1.attrs = var1_attrs + data.var2.attrs = var2_attrs + if expect_exception: + with raises_regex(MergeError, "combine_attrs"): + actual = xr.merge([data.var1, data.var2], combine_attrs=combine_attrs) + else: + actual = xr.merge([data.var1, data.var2], combine_attrs=combine_attrs) + expected = data[["var1", "var2"]] + expected.attrs = expected_attrs + assert actual.identical(expected) + def test_merge_dicts_simple(self): actual = xr.merge([{"foo": 0}, {"bar": "one"}, {"baz": 3.5}]) expected = xr.Dataset({"foo": 0, "bar": "one", "baz": 3.5}) diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index af87b94393d..ddca6c57064 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -9,7 +9,7 @@ from xarray.core import duck_array_ops, utils from xarray.core.utils import either_dict_or_kwargs -from . import assert_array_equal, requires_cftime, requires_dask +from . import assert_array_equal, raises_regex, requires_cftime, requires_dask from .test_coding_times import _all_cftime_date_types @@ -124,6 +124,15 @@ def test_ordered_dict_intersection(self): assert {"b": "B"} == utils.ordered_dict_intersection(self.x, self.y) assert {} == utils.ordered_dict_intersection(self.x, self.z) + def test_compat_dict_union(self): + assert {"a": "A", "b": "B", "c": "C"} == utils.compat_dict_union(self.x, self.y) + with raises_regex( + ValueError, + "unsafe to merge dictionaries without " + "overriding values; conflicting key", + ): + utils.compat_dict_union(self.x, self.z) + def test_dict_equiv(self): x = {} x["a"] = 3 From 009aa66620b3437cf0de675013fa7d1ff231963c Mon Sep 17 00:00:00 2001 From: johnomotani Date: Tue, 24 Mar 2020 22:59:06 +0000 Subject: [PATCH 65/75] Rename ordered_dict_intersection -> compat_dict_intersection (#3887) Do not use OrderedDicts any more, so name did not make sense. --- xarray/core/utils.py | 5 +---- xarray/tests/test_utils.py | 6 +++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 5570f9e9a80..896ee31ab5c 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -343,7 +343,7 @@ def dict_equiv( return True -def ordered_dict_intersection( +def compat_dict_intersection( first_dict: Mapping[K, V], second_dict: Mapping[K, V], compat: Callable[[V, V], bool] = equivalent, @@ -361,9 +361,6 @@ def ordered_dict_intersection( Binary operator to determine if two values are compatible. By default, checks for equivalence. - # TODO: Rename to compat_dict_intersection, as we do not use OrderedDicts - # any more. - Returns ------- intersection : dict diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index ddca6c57064..5f8b1770bd3 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -120,9 +120,9 @@ def test_unsafe(self): with pytest.raises(ValueError): utils.update_safety_check(self.x, self.z) - def test_ordered_dict_intersection(self): - assert {"b": "B"} == utils.ordered_dict_intersection(self.x, self.y) - assert {} == utils.ordered_dict_intersection(self.x, self.z) + def test_compat_dict_intersection(self): + assert {"b": "B"} == utils.compat_dict_intersection(self.x, self.y) + assert {} == utils.compat_dict_intersection(self.x, self.z) def test_compat_dict_union(self): assert {"a": "A", "b": "B", "c": "C"} == utils.compat_dict_union(self.x, self.y) From f583ac7b125cf33f11dba9d948d6cfffac47317e Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Wed, 25 Mar 2020 11:34:31 -0400 Subject: [PATCH 66/75] misplaced quote in whatsnew (#3889) --- doc/whats-new.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4515f552812..594dcad5a19 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -789,12 +789,13 @@ Bug fixes - Plots in 2 dimensions (pcolormesh, contour) now allow to specify levels as numpy array (:issue:`3284`). By `Mathias Hauser `_. - Fixed bug in :meth:`DataArray.quantile` failing to keep attributes when - `keep_attrs` was True (:issue:`3304`). By David Huard ``_. + `keep_attrs` was True (:issue:`3304`). By `David Huard `_. Documentation ~~~~~~~~~~~~~ -- Created a `PR checklist `_ as a quick reference for tasks before creating a new PR +- Created a `PR checklist `_ + as a quick reference for tasks before creating a new PR or pushing new commits. By `Gregory Gundersen `_. From ec215daecec642db94102dc24156448f8440f52d Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 25 Mar 2020 13:17:44 -0400 Subject: [PATCH 67/75] Implementation of polyfit and polyval (#3733) * [WIP] Implementation of polyfit and polyval - minimum testing - no docs * Formatting with black, flake8 * Fix failing test * More intelligent skipna switching * Add docs | Change coeff order to fit numpy | move polyval * Move doc patching to class * conditional doc patching * Fix windows fail - more efficient nan skipping * Fix typo in least_squares * Move polyfit to dataset * Add more tests | fix some edge cases * Skip test without dask * Fix 1D case | add docs * skip polyval test without dask * Explicit docs | More restrictive polyval * Small typo in polyfit docstrings * Apply suggestions from code review Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> * Polyfit : fix style in docstring | add see also section * Clean up docstrings and documentation. * Move whats new entry to 0.16 | fix PEP8 issue in test_dataarray Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- doc/api.rst | 3 + doc/computation.rst | 26 ++++ doc/whats-new.rst | 2 + xarray/__init__.py | 3 +- xarray/core/computation.py | 32 +++++ xarray/core/dask_array_ops.py | 27 +++++ xarray/core/dataarray.py | 62 ++++++++++ xarray/core/dataset.py | 179 ++++++++++++++++++++++++++++ xarray/core/duck_array_ops.py | 9 ++ xarray/core/nputils.py | 33 +++++ xarray/tests/test_computation.py | 32 +++++ xarray/tests/test_dataarray.py | 50 ++++++++ xarray/tests/test_dataset.py | 13 ++ xarray/tests/test_duck_array_ops.py | 18 +++ 14 files changed, 488 insertions(+), 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index b9c3e3bdd33..216f47f988f 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -30,6 +30,7 @@ Top-level functions zeros_like ones_like dot + polyval map_blocks show_versions set_options @@ -172,6 +173,7 @@ Computation Dataset.quantile Dataset.differentiate Dataset.integrate + Dataset.polyfit **Aggregation**: :py:attr:`~Dataset.all` @@ -352,6 +354,7 @@ Computation DataArray.quantile DataArray.differentiate DataArray.integrate + DataArray.polyfit DataArray.str **Aggregation**: diff --git a/doc/computation.rst b/doc/computation.rst index 5309f27e9b6..4b8014c4782 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -401,6 +401,32 @@ trapezoidal rule using their coordinates, and integration along multidimensional coordinate are not supported. +.. _compute.polyfit: + +Fitting polynomials +=================== + +Xarray objects provide an interface for performing linear or polynomial regressions +using the least-squares method. :py:meth:`~xarray.DataArray.polyfit` computes the +best fitting coefficients along a given dimension and for a given order, + +.. ipython:: python + + x = xr.DataArray(np.arange(10), dims=['x'], name='x') + a = xr.DataArray(3 + 4 * x, dims=['x'], coords={'x': x}) + out = a.polyfit(dim='x', deg=1, full=True) + out + +The method outputs a dataset containing the coefficients (and more if `full=True`). +The inverse operation is done with :py:meth:`~xarray.polyval`, + +.. ipython:: python + + xr.polyval(coord=x, coeffs=out.polyfit_coefficients) + +.. note:: + These methods replicate the behaviour of :py:func:`numpy.polyfit` and :py:func:`numpy.polyval`. + .. _compute.broadcasting: Broadcasting by dimension name diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 594dcad5a19..eebd04123d1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,6 +24,8 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`) + By `Pascal Bourgault `_. - Control over attributes of result in :py:func:`merge`, :py:func:`concat`, :py:func:`combine_by_coords` and :py:func:`combine_nested` using combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`) diff --git a/xarray/__init__.py b/xarray/__init__.py index 331d8ecb09a..0fead57e5fb 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -17,7 +17,7 @@ from .core.alignment import align, broadcast from .core.combine import auto_combine, combine_by_coords, combine_nested from .core.common import ALL_DIMS, full_like, ones_like, zeros_like -from .core.computation import apply_ufunc, dot, where +from .core.computation import apply_ufunc, dot, polyval, where from .core.concat import concat from .core.dataarray import DataArray from .core.dataset import Dataset @@ -65,6 +65,7 @@ "open_mfdataset", "open_rasterio", "open_zarr", + "polyval", "register_dataarray_accessor", "register_dataset_accessor", "save_mfdataset", diff --git a/xarray/core/computation.py b/xarray/core/computation.py index f2941a3d0ba..13bf6248331 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1306,3 +1306,35 @@ def where(cond, x, y): dataset_join="exact", dask="allowed", ) + + +def polyval(coord, coeffs, degree_dim="degree"): + """Evaluate a polynomial at specific values + + Parameters + ---------- + coord : DataArray + The 1D coordinate along which to evaluate the polynomial. + coeffs : DataArray + Coefficients of the polynomials. + degree_dim : str, default "degree" + Name of the polynomial degree dimension in `coeffs`. + + See also + -------- + xarray.DataArray.polyfit + numpy.polyval + """ + from .dataarray import DataArray + from .missing import get_clean_interp_index + + x = get_clean_interp_index(coord, coord.name) + + deg_coord = coeffs[degree_dim] + + lhs = DataArray( + np.vander(x, int(deg_coord.max()) + 1), + dims=(coord.name, degree_dim), + coords={coord.name: coord, degree_dim: np.arange(deg_coord.max() + 1)[::-1]}, + ) + return (lhs * coeffs).sum(degree_dim) diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py index 37f261cc3ad..87f646352eb 100644 --- a/xarray/core/dask_array_ops.py +++ b/xarray/core/dask_array_ops.py @@ -95,3 +95,30 @@ def func(x, window, axis=-1): # crop boundary. index = (slice(None),) * axis + (slice(drop_size, drop_size + orig_shape[axis]),) return out[index] + + +def least_squares(lhs, rhs, rcond=None, skipna=False): + import dask.array as da + + lhs_da = da.from_array(lhs, chunks=(rhs.chunks[0], lhs.shape[1])) + if skipna: + added_dim = rhs.ndim == 1 + if added_dim: + rhs = rhs.reshape(rhs.shape[0], 1) + results = da.apply_along_axis( + nputils._nanpolyfit_1d, + 0, + rhs, + lhs_da, + dtype=float, + shape=(lhs.shape[1] + 1,), + rcond=rcond, + ) + coeffs = results[:-1, ...] + residuals = results[-1, ...] + if added_dim: + coeffs = coeffs.reshape(coeffs.shape[0]) + residuals = residuals.reshape(residuals.shape[0]) + else: + coeffs, residuals, _, _ = da.linalg.lstsq(lhs_da, rhs) + return coeffs, residuals diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 232fb86144e..070886cfc34 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3275,6 +3275,68 @@ def map_blocks( return map_blocks(func, self, args, kwargs) + def polyfit( + self, + dim: Hashable, + deg: int, + skipna: bool = None, + rcond: float = None, + w: Union[Hashable, Any] = None, + full: bool = False, + cov: bool = False, + ): + """ + Least squares polynomial fit. + + This replicates the behaviour of `numpy.polyfit` but differs by skipping + invalid values when `skipna = True`. + + Parameters + ---------- + dim : hashable + Coordinate along which to fit the polynomials. + deg : int + Degree of the fitting polynomial. + skipna : bool, optional + If True, removes all invalid values before fitting each 1D slices of the array. + Default is True if data is stored in a dask.array or if there is any + invalid values, False otherwise. + rcond : float, optional + Relative condition number to the fit. + w : Union[Hashable, Any], optional + Weights to apply to the y-coordinate of the sample points. + Can be an array-like object or the name of a coordinate in the dataset. + full : bool, optional + Whether to return the residuals, matrix rank and singular values in addition + to the coefficients. + cov : Union[bool, str], optional + Whether to return to the covariance matrix in addition to the coefficients. + The matrix is not scaled if `cov='unscaled'`. + + Returns + ------- + polyfit_results : Dataset + A single dataset which contains: + + polyfit_coefficients + The coefficients of the best fit. + polyfit_residuals + The residuals of the least-square computation (only included if `full=True`) + [dim]_matrix_rank + The effective rank of the scaled Vandermonde coefficient matrix (only included if `full=True`) + [dim]_singular_value + The singular values of the scaled Vandermonde coefficient matrix (only included if `full=True`) + polyfit_covariance + The covariance matrix of the polynomial coefficient estimates (only included if `full=False` and `cov=True`) + + See also + -------- + numpy.polyfit + """ + return self._to_temp_dataset().polyfit( + dim, deg, skipna=skipna, rcond=rcond, w=w, full=full, cov=cov + ) + def pad( self, pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6f96e4f469c..c49694b1fc0 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -76,6 +76,7 @@ merge_coordinates_without_align, merge_data_and_coords, ) +from .missing import get_clean_interp_index from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_array_type from .utils import ( @@ -5748,6 +5749,184 @@ def map_blocks( return map_blocks(func, self, args, kwargs) + def polyfit( + self, + dim: Hashable, + deg: int, + skipna: bool = None, + rcond: float = None, + w: Union[Hashable, Any] = None, + full: bool = False, + cov: Union[bool, str] = False, + ): + """ + Least squares polynomial fit. + + This replicates the behaviour of `numpy.polyfit` but differs by skipping + invalid values when `skipna = True`. + + Parameters + ---------- + dim : hashable + Coordinate along which to fit the polynomials. + deg : int + Degree of the fitting polynomial. + skipna : bool, optional + If True, removes all invalid values before fitting each 1D slices of the array. + Default is True if data is stored in a dask.array or if there is any + invalid values, False otherwise. + rcond : float, optional + Relative condition number to the fit. + w : Union[Hashable, Any], optional + Weights to apply to the y-coordinate of the sample points. + Can be an array-like object or the name of a coordinate in the dataset. + full : bool, optional + Whether to return the residuals, matrix rank and singular values in addition + to the coefficients. + cov : Union[bool, str], optional + Whether to return to the covariance matrix in addition to the coefficients. + The matrix is not scaled if `cov='unscaled'`. + + + Returns + ------- + polyfit_results : Dataset + A single dataset which contains (for each "var" in the input dataset): + + [var]_polyfit_coefficients + The coefficients of the best fit for each variable in this dataset. + [var]_polyfit_residuals + The residuals of the least-square computation for each variable (only included if `full=True`) + [dim]_matrix_rank + The effective rank of the scaled Vandermonde coefficient matrix (only included if `full=True`) + [dim]_singular_values + The singular values of the scaled Vandermonde coefficient matrix (only included if `full=True`) + [var]_polyfit_covariance + The covariance matrix of the polynomial coefficient estimates (only included if `full=False` and `cov=True`) + + See also + -------- + numpy.polyfit + """ + variables = {} + skipna_da = skipna + + x = get_clean_interp_index(self, dim) + xname = "{}_".format(self[dim].name) + order = int(deg) + 1 + lhs = np.vander(x, order) + + if rcond is None: + rcond = x.shape[0] * np.core.finfo(x.dtype).eps + + # Weights: + if w is not None: + if isinstance(w, Hashable): + w = self.coords[w] + w = np.asarray(w) + if w.ndim != 1: + raise TypeError("Expected a 1-d array for weights.") + if w.shape[0] != lhs.shape[0]: + raise TypeError("Expected w and {} to have the same length".format(dim)) + lhs *= w[:, np.newaxis] + + # Scaling + scale = np.sqrt((lhs * lhs).sum(axis=0)) + lhs /= scale + + degree_dim = utils.get_temp_dimname(self.dims, "degree") + + rank = np.linalg.matrix_rank(lhs) + if rank != order and not full: + warnings.warn( + "Polyfit may be poorly conditioned", np.RankWarning, stacklevel=4 + ) + + if full: + rank = xr.DataArray(rank, name=xname + "matrix_rank") + variables[rank.name] = rank + sing = np.linalg.svd(lhs, compute_uv=False) + sing = xr.DataArray( + sing, + dims=(degree_dim,), + coords={degree_dim: np.arange(order)[::-1]}, + name=xname + "singular_values", + ) + variables[sing.name] = sing + + for name, da in self.data_vars.items(): + if dim not in da.dims: + continue + + if skipna is None: + if isinstance(da.data, dask_array_type): + skipna_da = True + else: + skipna_da = np.any(da.isnull()) + + dims_to_stack = [dimname for dimname in da.dims if dimname != dim] + stacked_coords = {} + if dims_to_stack: + stacked_dim = utils.get_temp_dimname(dims_to_stack, "stacked") + rhs = da.transpose(dim, *dims_to_stack).stack( + {stacked_dim: dims_to_stack} + ) + stacked_coords = {stacked_dim: rhs[stacked_dim]} + scale_da = scale[:, np.newaxis] + else: + rhs = da + scale_da = scale + + if w is not None: + rhs *= w[:, np.newaxis] + + coeffs, residuals = duck_array_ops.least_squares( + lhs, rhs.data, rcond=rcond, skipna=skipna_da + ) + + if isinstance(name, str): + name = "{}_".format(name) + else: + # Thus a ReprObject => polyfit was called on a DataArray + name = "" + + coeffs = xr.DataArray( + coeffs / scale_da, + dims=[degree_dim] + list(stacked_coords.keys()), + coords={degree_dim: np.arange(order)[::-1], **stacked_coords}, + name=name + "polyfit_coefficients", + ) + if dims_to_stack: + coeffs = coeffs.unstack(stacked_dim) + variables[coeffs.name] = coeffs + + if full or (cov is True): + residuals = xr.DataArray( + residuals if dims_to_stack else residuals.squeeze(), + dims=list(stacked_coords.keys()), + coords=stacked_coords, + name=name + "polyfit_residuals", + ) + if dims_to_stack: + residuals = residuals.unstack(stacked_dim) + variables[residuals.name] = residuals + + if cov: + Vbase = np.linalg.inv(np.dot(lhs.T, lhs)) + Vbase /= np.outer(scale, scale) + if cov == "unscaled": + fac = 1 + else: + if x.shape[0] <= order: + raise ValueError( + "The number of data points must exceed order to scale the covariance matrix." + ) + fac = residuals / (x.shape[0] - order) + covariance = xr.DataArray(Vbase, dims=("cov_i", "cov_j"),) * fac + variables[name + "polyfit_covariance"] = covariance + + return Dataset(data_vars=variables, attrs=self.attrs.copy()) + def pad( self, pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index ff2d0af63ed..4047a1e68e1 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -597,3 +597,12 @@ def rolling_window(array, axis, window, center, fill_value): return dask_array_ops.rolling_window(array, axis, window, center, fill_value) else: # np.ndarray return nputils.rolling_window(array, axis, window, center, fill_value) + + +def least_squares(lhs, rhs, rcond=None, skipna=False): + """Return the coefficients and residuals of a least-squares fit. + """ + if isinstance(rhs, dask_array_type): + return dask_array_ops.least_squares(lhs, rhs, rcond=rcond, skipna=skipna) + else: + return nputils.least_squares(lhs, rhs, rcond=rcond, skipna=skipna) diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 5dd8219ebca..fa6df63e0ea 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -220,6 +220,39 @@ def f(values, axis=None, **kwargs): return f +def _nanpolyfit_1d(arr, x, rcond=None): + out = np.full((x.shape[1] + 1,), np.nan) + mask = np.isnan(arr) + if not np.all(mask): + out[:-1], out[-1], _, _ = np.linalg.lstsq(x[~mask, :], arr[~mask], rcond=rcond) + return out + + +def least_squares(lhs, rhs, rcond=None, skipna=False): + if skipna: + added_dim = rhs.ndim == 1 + if added_dim: + rhs = rhs.reshape(rhs.shape[0], 1) + nan_cols = np.any(np.isnan(rhs), axis=0) + out = np.empty((lhs.shape[1] + 1, rhs.shape[1])) + if np.any(nan_cols): + out[:, nan_cols] = np.apply_along_axis( + _nanpolyfit_1d, 0, rhs[:, nan_cols], lhs + ) + if np.any(~nan_cols): + out[:-1, ~nan_cols], out[-1, ~nan_cols], _, _ = np.linalg.lstsq( + lhs, rhs[:, ~nan_cols], rcond=rcond + ) + coeffs = out[:-1, :] + residuals = out[-1, :] + if added_dim: + coeffs = coeffs.reshape(coeffs.shape[0]) + residuals = residuals.reshape(residuals.shape[0]) + else: + coeffs, residuals, _, _ = np.linalg.lstsq(lhs, rhs, rcond=rcond) + return coeffs, residuals + + nanmin = _create_bottleneck_method("nanmin") nanmax = _create_bottleneck_method("nanmax") nanmean = _create_bottleneck_method("nanmean") diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 369903552ad..4eed464d2dc 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1120,3 +1120,35 @@ def test_where(): actual = xr.where(cond, 1, 0) expected = xr.DataArray([1, 0], dims="x") assert_identical(expected, actual) + + +@pytest.mark.parametrize("use_dask", [True, False]) +@pytest.mark.parametrize("use_datetime", [True, False]) +def test_polyval(use_dask, use_datetime): + if use_dask and not has_dask: + pytest.skip("requires dask") + + if use_datetime: + xcoord = xr.DataArray( + pd.date_range("2000-01-01", freq="D", periods=10), dims=("x",), name="x" + ) + x = xr.core.missing.get_clean_interp_index(xcoord, "x") + else: + xcoord = x = np.arange(10) + + da = xr.DataArray( + np.stack((1.0 + x + 2.0 * x ** 2, 1.0 + 2.0 * x + 3.0 * x ** 2)), + dims=("d", "x"), + coords={"x": xcoord, "d": [0, 1]}, + ) + coeffs = xr.DataArray( + [[2, 1, 1], [3, 2, 1]], + dims=("d", "degree"), + coords={"d": [0, 1], "degree": [2, 1, 0]}, + ) + if use_dask: + coeffs = coeffs.chunk({"d": 2}) + + da_pv = xr.polyval(da.x, coeffs) + + xr.testing.assert_allclose(da, da_pv.T) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 4f19dc2a9cf..e23ff2f7e31 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -23,6 +23,7 @@ assert_array_equal, assert_equal, assert_identical, + has_dask, raises_regex, requires_bottleneck, requires_dask, @@ -4191,6 +4192,55 @@ def test_rank(self): y = DataArray([0.75, 0.25, np.nan, 0.5, 1.0], dims=("z",)) assert_equal(y.rank("z", pct=True), y) + @pytest.mark.parametrize("use_dask", [True, False]) + @pytest.mark.parametrize("use_datetime", [True, False]) + def test_polyfit(self, use_dask, use_datetime): + if use_dask and not has_dask: + pytest.skip("requires dask") + xcoord = xr.DataArray( + pd.date_range("1970-01-01", freq="D", periods=10), dims=("x",), name="x" + ) + x = xr.core.missing.get_clean_interp_index(xcoord, "x") + if not use_datetime: + xcoord = x + + da_raw = DataArray( + np.stack( + (10 + 1e-15 * x + 2e-28 * x ** 2, 30 + 2e-14 * x + 1e-29 * x ** 2) + ), + dims=("d", "x"), + coords={"x": xcoord, "d": [0, 1]}, + ) + + if use_dask: + da = da_raw.chunk({"d": 1}) + else: + da = da_raw + + out = da.polyfit("x", 2) + expected = DataArray( + [[2e-28, 1e-15, 10], [1e-29, 2e-14, 30]], + dims=("d", "degree"), + coords={"degree": [2, 1, 0], "d": [0, 1]}, + ).T + assert_allclose(out.polyfit_coefficients, expected, rtol=1e-3) + + # With NaN + da_raw[0, 1] = np.nan + if use_dask: + da = da_raw.chunk({"d": 1}) + else: + da = da_raw + out = da.polyfit("x", 2, skipna=True, cov=True) + assert_allclose(out.polyfit_coefficients, expected, rtol=1e-3) + assert "polyfit_covariance" in out + + # Skipna + Full output + out = da.polyfit("x", 2, skipna=True, full=True) + assert_allclose(out.polyfit_coefficients, expected, rtol=1e-3) + assert out.x_matrix_rank == 3 + np.testing.assert_almost_equal(out.polyfit_residuals, [0, 0]) + def test_pad_constant(self): ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) actual = ar.pad(dim_0=(1, 3)) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 20b814a25c7..02698253e5d 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5499,6 +5499,19 @@ def test_ipython_key_completion(self): ds.data_vars[item] # should not raise assert sorted(actual) == sorted(expected) + def test_polyfit_output(self): + ds = create_test_data(seed=1) + + out = ds.polyfit("dim2", 2, full=False) + assert "var1_polyfit_coefficients" in out + + out = ds.polyfit("dim1", 2, full=True) + assert "var1_polyfit_coefficients" in out + assert "dim1_matrix_rank" in out + + out = ds.polyfit("time", 2) + assert len(out.data_vars) == 0 + def test_pad(self): ds = create_test_data(seed=1) padded = ds.pad(dim2=(1, 1), constant_values=42) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 157cd16cba6..e61881cfce3 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -16,6 +16,7 @@ first, gradient, last, + least_squares, mean, np_timedelta64_to_float, pd_timedelta_to_float, @@ -761,3 +762,20 @@ def test_timedelta_to_numeric(td): out = timedelta_to_numeric(td, "ns") np.testing.assert_allclose(out, 86400 * 1e9) assert isinstance(out, float) + + +@pytest.mark.parametrize("use_dask", [True, False]) +@pytest.mark.parametrize("skipna", [True, False]) +def test_least_squares(use_dask, skipna): + if use_dask and not has_dask: + pytest.skip("requires dask") + lhs = np.array([[1, 2], [1, 2], [3, 2]]) + rhs = DataArray(np.array([3, 5, 7]), dims=("y",)) + + if use_dask: + rhs = rhs.chunk({"y": 1}) + + coeffs, residuals = least_squares(lhs, rhs.data, skipna=skipna) + + np.testing.assert_allclose(coeffs, [1.5, 1.25]) + np.testing.assert_allclose(residuals, [2.0]) From 6378a711d50ba7f1ba9b2a451d4d1f5e1fb37353 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Wed, 25 Mar 2020 20:47:42 -0400 Subject: [PATCH 68/75] Use drawstyle instead of linestyle in plot.step. (#3274) Mixing the two is deprecated in Matplotlib 3.1, and breaks the doc build if warnings are set to errors (which they are in new IPython sphinx extensions.) --- doc/whats-new.rst | 7 ++++++- xarray/plot/plot.py | 18 +++++++++--------- xarray/plot/utils.py | 4 ++-- xarray/tests/test_plot.py | 4 ++++ 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index eebd04123d1..14941228c88 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -20,7 +20,12 @@ v0.16.0 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ - +- Alternate draw styles for :py:meth:`plot.step` must be passed using the + ``drawstyle`` (or ``ds``) keyword argument, instead of the ``linestyle`` (or + ``ls``) keyword argument, in line with the `upstream change in Matplotlib + `_. + (:pull:`3274`) + By `Elliott Sales de Andrade `_ New Features ~~~~~~~~~~~~ diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 98131887e28..302cac05b05 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -329,7 +329,7 @@ def line( return primitive -def step(darray, *args, where="pre", linestyle=None, ls=None, **kwargs): +def step(darray, *args, where="pre", drawstyle=None, ds=None, **kwargs): """ Step plot of DataArray index against values @@ -359,16 +359,16 @@ def step(darray, *args, where="pre", linestyle=None, ls=None, **kwargs): if where not in {"pre", "post", "mid"}: raise ValueError("'where' argument to step must be " "'pre', 'post' or 'mid'") - if ls is not None: - if linestyle is None: - linestyle = ls + if ds is not None: + if drawstyle is None: + drawstyle = ds else: - raise TypeError("ls and linestyle are mutually exclusive") - if linestyle is None: - linestyle = "" - linestyle = "steps-" + where + linestyle + raise TypeError("ds and drawstyle are mutually exclusive") + if drawstyle is None: + drawstyle = "" + drawstyle = "steps-" + where + drawstyle - return line(darray, *args, linestyle=linestyle, **kwargs) + return line(darray, *args, drawstyle=drawstyle, **kwargs) def hist( diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index cb3bef6d409..e6c15037cb8 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -465,7 +465,7 @@ def _resolve_intervals_1dplot(xval, yval, xlabel, ylabel, kwargs): """ # Is it a step plot? (see matplotlib.Axes.step) - if kwargs.get("linestyle", "").startswith("steps-"): + if kwargs.get("drawstyle", "").startswith("steps-"): # Convert intervals to double points if _valid_other_type(np.array([xval, yval]), [pd.Interval]): @@ -476,7 +476,7 @@ def _resolve_intervals_1dplot(xval, yval, xlabel, ylabel, kwargs): yval, xval = _interval_to_double_bound_points(yval, xval) # Remove steps-* to be sure that matplotlib is not confused - del kwargs["linestyle"] + del kwargs["drawstyle"] # Is it another kind of plot? else: diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index c1549c62038..7f3f1620133 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -591,6 +591,10 @@ def setUp(self): def test_step(self): self.darray[0, 0].plot.step() + @pytest.mark.parametrize("ds", ["pre", "post", "mid"]) + def test_step_with_drawstyle(self, ds): + self.darray[0, 0].plot.step(drawstyle=ds) + def test_coord_with_interval_step(self): """Test step plot with intervals.""" bins = [-1, 0, 1, 2] From b3066746efd412cbc9b6c6aafd64229f4c9122f3 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Fri, 27 Mar 2020 15:37:25 -0700 Subject: [PATCH 69/75] expose a few zarr backend functions as semi-public api (#3897) * expose a few zarr backend functions as semi-public api * black * update equality check for chunks --- xarray/backends/zarr.py | 44 +++++++++++++++++++++++--------- xarray/tests/test_backends.py | 47 +++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 12 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 2469a31a3d9..cdc74e06882 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -10,13 +10,20 @@ from .common import AbstractWritableDataStore, BackendArray, _encode_variable_name # need some special secret attributes to tell us the dimensions -_DIMENSION_KEY = "_ARRAY_DIMENSIONS" +DIMENSION_KEY = "_ARRAY_DIMENSIONS" -# zarr attributes have to be serializable as json -# many xarray datasets / variables have numpy arrays and values -# these functions handle encoding / decoding of such items -def _encode_zarr_attr_value(value): +def encode_zarr_attr_value(value): + """ + Encode a attribute value as something that can be serialized as json + + Many xarray datasets / variables have numpy arrays and values. This + function handles encoding / decoding of such items. + + ndarray -> list + scalar array -> scalar + other -> other (no change) + """ if isinstance(value, np.ndarray): encoded = value.tolist() # this checks if it's a scalar number @@ -170,7 +177,20 @@ def _get_zarr_dims_and_attrs(zarr_obj, dimension_key): return dimensions, attributes -def _extract_zarr_variable_encoding(variable, raise_on_invalid=False): +def extract_zarr_variable_encoding(variable, raise_on_invalid=False): + """ + Extract zarr encoding dictionary from xarray Variable + + Parameters + ---------- + variable : xarray.Variable + raise_on_invalid : bool, optional + + Returns + ------- + encoding : dict + Zarr encoding for `variable` + """ encoding = variable.encoding.copy() valid_encodings = {"chunks", "compressor", "filters", "cache_metadata"} @@ -271,7 +291,7 @@ def __init__(self, zarr_group, consolidate_on_close=False): def open_store_variable(self, name, zarr_array): data = indexing.LazilyOuterIndexedArray(ZarrArrayWrapper(name, self)) - dimensions, attributes = _get_zarr_dims_and_attrs(zarr_array, _DIMENSION_KEY) + dimensions, attributes = _get_zarr_dims_and_attrs(zarr_array, DIMENSION_KEY) attributes = dict(attributes) encoding = { "chunks": zarr_array.chunks, @@ -298,7 +318,7 @@ def get_dimensions(self): dimensions = {} for k, v in self.ds.arrays(): try: - for d, s in zip(v.attrs[_DIMENSION_KEY], v.shape): + for d, s in zip(v.attrs[DIMENSION_KEY], v.shape): if d in dimensions and dimensions[d] != s: raise ValueError( "found conflicting lengths for dimension %s " @@ -310,7 +330,7 @@ def get_dimensions(self): raise KeyError( "Zarr object is missing the attribute `%s`, " "which is required for xarray to determine " - "variable dimensions." % (_DIMENSION_KEY) + "variable dimensions." % (DIMENSION_KEY) ) return dimensions @@ -328,7 +348,7 @@ def encode_variable(self, variable): return variable def encode_attribute(self, a): - return _encode_zarr_attr_value(a) + return encode_zarr_attr_value(a) def store( self, @@ -433,10 +453,10 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No writer.add(v.data, zarr_array, region=tuple(new_region)) else: # new variable - encoding = _extract_zarr_variable_encoding(v, raise_on_invalid=check) + encoding = extract_zarr_variable_encoding(v, raise_on_invalid=check) encoded_attrs = {} # the magic for storing the hidden dimension data - encoded_attrs[_DIMENSION_KEY] = dims + encoded_attrs[DIMENSION_KEY] = dims for k2, v2 in attrs.items(): encoded_attrs[k2] = self.encode_attribute(v2) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a4585985bdc..82fe1b38149 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4498,3 +4498,50 @@ def test_invalid_netcdf_raises(engine): data = create_test_data() with raises_regex(ValueError, "unrecognized option 'invalid_netcdf'"): data.to_netcdf("foo.nc", engine=engine, invalid_netcdf=True) + + +@requires_zarr +def test_encode_zarr_attr_value(): + # array -> list + arr = np.array([1, 2, 3]) + expected = [1, 2, 3] + actual = backends.zarr.encode_zarr_attr_value(arr) + assert isinstance(actual, list) + assert actual == expected + + # scalar array -> scalar + sarr = np.array(1)[()] + expected = 1 + actual = backends.zarr.encode_zarr_attr_value(sarr) + assert isinstance(actual, int) + assert actual == expected + + # string -> string (no change) + expected = "foo" + actual = backends.zarr.encode_zarr_attr_value(expected) + assert isinstance(actual, str) + assert actual == expected + + +@requires_zarr +def test_extract_zarr_variable_encoding(): + + var = xr.Variable("x", [1, 2]) + actual = backends.zarr.extract_zarr_variable_encoding(var) + assert "chunks" in actual + assert actual["chunks"] is None + + var = xr.Variable("x", [1, 2], encoding={"chunks": (1,)}) + actual = backends.zarr.extract_zarr_variable_encoding(var) + assert actual["chunks"] == (1,) + + # does not raise on invalid + var = xr.Variable("x", [1, 2], encoding={"foo": (1,)}) + actual = backends.zarr.extract_zarr_variable_encoding(var) + + # raises on invalid + var = xr.Variable("x", [1, 2], encoding={"foo": (1,)}) + with raises_regex(ValueError, "unexpected encoding parameters"): + actual = backends.zarr.extract_zarr_variable_encoding( + var, raise_on_invalid=True + ) From acf7d4157ca44f05c85a92d1b914b68738988773 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 27 Mar 2020 22:22:11 -0400 Subject: [PATCH 70/75] Limit repr of arrays containing long strings (#3900) * limit repr of arrays containing long strings * whatsnew --- doc/whats-new.rst | 3 +++ xarray/core/formatting.py | 34 ++++++++++++++++++++++++--------- xarray/tests/test_formatting.py | 25 +++++++++++++++++------- 3 files changed, 46 insertions(+), 16 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 14941228c88..a138dee4128 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,6 +35,9 @@ New Features :py:func:`combine_by_coords` and :py:func:`combine_nested` using combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`) By `John Omotani `_ +- Limited the length of array items with long string reprs to a + reasonable width (:pull:`3900`) + By `Maximilian Roos `_ Bug fixes diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 89246ff228d..534d253ecc8 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -4,6 +4,7 @@ import functools from datetime import datetime, timedelta from itertools import zip_longest +from typing import Hashable import numpy as np import pandas as pd @@ -14,7 +15,7 @@ from .pycompat import dask_array_type, sparse_array_type -def pretty_print(x, numchars): +def pretty_print(x, numchars: int): """Given an object `x`, call `str(x)` and format the returned string so that it is numchars long, padding with trailing spaces or truncating with ellipses as necessary @@ -163,7 +164,7 @@ def format_items(x): return formatted -def format_array_flat(array, max_width): +def format_array_flat(array, max_width: int): """Return a formatted string for as many items in the flattened version of array that will fit within max_width characters. """ @@ -198,11 +199,20 @@ def format_array_flat(array, max_width): num_back = count - num_front # note that num_back is 0 <--> array.size is 0 or 1 # <--> relevant_back_items is [] - pprint_str = ( - " ".join(relevant_front_items[:num_front]) - + padding - + " ".join(relevant_back_items[-num_back:]) + pprint_str = "".join( + [ + " ".join(relevant_front_items[:num_front]), + padding, + " ".join(relevant_back_items[-num_back:]), + ] ) + + # As a final check, if it's still too long even with the limit in values, + # replace the end with an ellipsis + # NB: this will still returns a full 3-character ellipsis when max_width < 3 + if len(pprint_str) > max_width: + pprint_str = pprint_str[: max(max_width - 3, 0)] + "..." + return pprint_str @@ -258,10 +268,16 @@ def inline_variable_array_repr(var, max_width): return "..." -def summarize_variable(name, var, col_width, marker=" ", max_width=None): +def summarize_variable( + name: Hashable, var, col_width: int, marker: str = " ", max_width: int = None +): """Summarize a variable in one line, e.g., for the Dataset.__repr__.""" if max_width is None: - max_width = OPTIONS["display_width"] + max_width_options = OPTIONS["display_width"] + if not isinstance(max_width_options, int): + raise TypeError(f"`max_width` value of `{max_width}` is not a valid int") + else: + max_width = max_width_options first_col = pretty_print(f" {marker} {name} ", col_width) if var.dims: dims_str = "({}) ".format(", ".join(map(str, var.dims))) @@ -295,7 +311,7 @@ def summarize_datavar(name, var, col_width): return summarize_variable(name, var.variable, col_width) -def summarize_coord(name, var, col_width): +def summarize_coord(name: Hashable, var, col_width: int): is_index = name in var.dims marker = "*" if is_index else " " if is_index: diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index 61ecf46b79b..6881c0bc0ff 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -115,7 +115,7 @@ def test_format_items(self): def test_format_array_flat(self): actual = formatting.format_array_flat(np.arange(100), 2) - expected = "0 ... 99" + expected = "..." assert expected == actual actual = formatting.format_array_flat(np.arange(100), 9) @@ -134,11 +134,13 @@ def test_format_array_flat(self): expected = "0 1 2 ... 98 99" assert expected == actual + # NB: Probably not ideal; an alternative would be cutting after the + # first ellipsis actual = formatting.format_array_flat(np.arange(100.0), 11) - expected = "0.0 ... 99.0" + expected = "0.0 ... ..." assert expected == actual - actual = formatting.format_array_flat(np.arange(100.0), 1) + actual = formatting.format_array_flat(np.arange(100.0), 12) expected = "0.0 ... 99.0" assert expected == actual @@ -154,16 +156,25 @@ def test_format_array_flat(self): expected = "" assert expected == actual - actual = formatting.format_array_flat(np.arange(1), 0) + actual = formatting.format_array_flat(np.arange(1), 1) expected = "0" assert expected == actual - actual = formatting.format_array_flat(np.arange(2), 0) + actual = formatting.format_array_flat(np.arange(2), 3) expected = "0 1" assert expected == actual - actual = formatting.format_array_flat(np.arange(4), 0) - expected = "0 ... 3" + actual = formatting.format_array_flat(np.arange(4), 7) + expected = "0 1 2 3" + assert expected == actual + + actual = formatting.format_array_flat(np.arange(5), 7) + expected = "0 ... 4" + assert expected == actual + + long_str = [" ".join(["hello world" for _ in range(100)])] + actual = formatting.format_array_flat(np.asarray([long_str]), 21) + expected = "'hello world hello..." assert expected == actual def test_pretty_print(self): From b084064fa62d3dedc3706c2f6c2dff90940fec27 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 28 Mar 2020 10:26:45 -0400 Subject: [PATCH 71/75] Un-xfail test_dayofyear_after_cftime_range (#3907) --- xarray/tests/test_cftime_offsets.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 2352f9e8cdd..343e059f53c 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1176,7 +1176,6 @@ def test_dayofweek_after_cftime_range(freq): np.testing.assert_array_equal(result, expected) -@pytest.mark.xfail(reason="See GH3885") @pytest.mark.parametrize("freq", ["A", "M", "D"]) def test_dayofyear_after_cftime_range(freq): pytest.importorskip("cftime", minversion="1.0.2.1") From 6852d01b2e2315b2d7244315f61bf7ecfbc19206 Mon Sep 17 00:00:00 2001 From: keewis Date: Sat, 28 Mar 2020 19:18:20 +0100 Subject: [PATCH 72/75] update the docstring of diff (#3909) * don't document the dim parameter as optional * update whats-new.rst * fix the reference syntax --- doc/whats-new.rst | 4 ++++ xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a138dee4128..9d83c5e4207 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -46,6 +46,10 @@ Bug fixes Documentation ~~~~~~~~~~~~~ +- update the docstring of :py:meth:`Dataset.diff` and + :py:meth:`DataArray.diff` so it does document the ``dim`` + parameter as required. (:issue:`1040`, :pull:`3909`) + By `Justus Magin `_. Internal Changes diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 070886cfc34..94c4026d4a1 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2696,7 +2696,7 @@ def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> "DataArr Parameters ---------- - dim : hashable, optional + dim : hashable Dimension over which to calculate the finite difference. n : int, optional The number of times values are differenced. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c49694b1fc0..bd8f0ef3948 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4901,7 +4901,7 @@ def diff(self, dim, n=1, label="upper"): Parameters ---------- - dim : str, optional + dim : str Dimension over which to calculate the finite difference. n : int, optional The number of times values are differenced. From 79513b7453f0b284ac5abcacf96fede702d6150c Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sat, 28 Mar 2020 16:01:23 -0400 Subject: [PATCH 73/75] Revert "Use `fixes` in PR template (#3886)" (#3912) This reverts commit ee3c87659d1687a86d406065a5af1b4b87beec17. --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- doc/whats-new.rst | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index c30202ac046..a921bddaa23 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,6 +1,6 @@ - - [ ] Fixes #xxxx + - [ ] Closes #xxxx - [ ] Tests added - [ ] Passes `isort -rc . && black . && mypy . && flake8` - [ ] Fully documented, including `whats-new.rst` for all changes and `api.rst` for new API diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9d83c5e4207..bd0851d2b39 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -54,9 +54,6 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ -- Use ``Fixes`` rather than ``Closes`` in GitHub Pull Request template, allowing - linking to issues. - By `Maximilian Roos `_ .. _whats-new.0.15.1: From ca6bb8561f2977509fc5bf53eae0efd080c0a952 Mon Sep 17 00:00:00 2001 From: keewis Date: Sat, 28 Mar 2020 21:58:14 +0100 Subject: [PATCH 74/75] Update pre-commit-config.yaml (#3911) * only run isort on python source files * update the flake8 hook * update whats-new.rst --- .pre-commit-config.yaml | 5 +++-- doc/whats-new.rst | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9df95648774..26bf4803ef6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,13 +5,14 @@ repos: rev: 4.3.21-2 hooks: - id: isort + files: .+\.py$ # https://github.com/python/black#version-control-integration - repo: https://github.com/python/black rev: stable hooks: - id: black - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.2.3 + - repo: https://gitlab.com/pycqa/flake8 + rev: 3.7.9 hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-mypy diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bd0851d2b39..39aaa0e4fd3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -54,6 +54,9 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Run the ``isort`` pre-commit hook only on python source files + and update the ``flake8`` version. (:issue:`3750`, :pull:`3711`) + By `Justus Magin `_. .. _whats-new.0.15.1: From 1416d5ae475c0875e7a5d76fa4a8278838958162 Mon Sep 17 00:00:00 2001 From: Todd Date: Sat, 28 Mar 2020 21:54:24 -0400 Subject: [PATCH 75/75] Implement idxmax and idxmin functions (#3871) * drop numpy 1.12 compat code that can hide other errors * deep copy _indexes (#3899) * implement idxmax and idxmin --- doc/api.rst | 4 + doc/whats-new.rst | 6 + xarray/core/computation.py | 66 ++- xarray/core/dataarray.py | 193 +++++++- xarray/core/dataset.py | 191 ++++++++ xarray/core/duck_array_ops.py | 20 +- xarray/tests/test_dataarray.py | 802 +++++++++++++++++++++++++++++++++ xarray/tests/test_dataset.py | 9 + 8 files changed, 1277 insertions(+), 14 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 216f47f988f..b37c84e7a81 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -180,6 +180,8 @@ Computation :py:attr:`~Dataset.any` :py:attr:`~Dataset.argmax` :py:attr:`~Dataset.argmin` +:py:attr:`~Dataset.idxmax` +:py:attr:`~Dataset.idxmin` :py:attr:`~Dataset.max` :py:attr:`~Dataset.mean` :py:attr:`~Dataset.median` @@ -362,6 +364,8 @@ Computation :py:attr:`~DataArray.any` :py:attr:`~DataArray.argmax` :py:attr:`~DataArray.argmin` +:py:attr:`~DataArray.idxmax` +:py:attr:`~DataArray.idxmin` :py:attr:`~DataArray.max` :py:attr:`~DataArray.mean` :py:attr:`~DataArray.median` diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 39aaa0e4fd3..c70dfd4f3f6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,10 +38,16 @@ New Features - Limited the length of array items with long string reprs to a reasonable width (:pull:`3900`) By `Maximilian Roos `_ +- Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, + :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:issue:`60`, :pull:`3871`) + By `Todd Jennings `_ Bug fixes ~~~~~~~~~ +- Fix a regression where deleting a coordinate from a copied :py:class:`DataArray` + can affect the original :py:class:`Dataarray`. (:issue:`3899`, :pull:`3871`) + By `Todd Jennings `_ Documentation diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 13bf6248331..6cf4178b5bf 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -23,9 +23,10 @@ import numpy as np -from . import duck_array_ops, utils +from . import dtypes, duck_array_ops, utils from .alignment import deep_align from .merge import merge_coordinates_without_align +from .nanops import dask_array from .options import OPTIONS from .pycompat import dask_array_type from .utils import is_dict_like @@ -1338,3 +1339,66 @@ def polyval(coord, coeffs, degree_dim="degree"): coords={coord.name: coord, degree_dim: np.arange(deg_coord.max() + 1)[::-1]}, ) return (lhs * coeffs).sum(degree_dim) + + +def _calc_idxminmax( + *, + array, + func: Callable, + dim: Hashable = None, + skipna: bool = None, + fill_value: Any = dtypes.NA, + keep_attrs: bool = None, +): + """Apply common operations for idxmin and idxmax.""" + # This function doesn't make sense for scalars so don't try + if not array.ndim: + raise ValueError("This function does not apply for scalars") + + if dim is not None: + pass # Use the dim if available + elif array.ndim == 1: + # it is okay to guess the dim if there is only 1 + dim = array.dims[0] + else: + # The dim is not specified and ambiguous. Don't guess. + raise ValueError("Must supply 'dim' argument for multidimensional arrays") + + if dim not in array.dims: + raise KeyError(f'Dimension "{dim}" not in dimension') + if dim not in array.coords: + raise KeyError(f'Dimension "{dim}" does not have coordinates') + + # These are dtypes with NaN values argmin and argmax can handle + na_dtypes = "cfO" + + if skipna or (skipna is None and array.dtype.kind in na_dtypes): + # Need to skip NaN values since argmin and argmax can't handle them + allna = array.isnull().all(dim) + array = array.where(~allna, 0) + + # This will run argmin or argmax. + indx = func(array, dim=dim, axis=None, keep_attrs=keep_attrs, skipna=skipna) + + # Get the coordinate we want. + coordarray = array[dim] + + # Handle dask arrays. + if isinstance(array, dask_array_type): + res = dask_array.map_blocks(coordarray, indx, dtype=indx.dtype) + else: + res = coordarray[ + indx, + ] + + if skipna or (skipna is None and array.dtype.kind in na_dtypes): + # Put the NaN values back in after removing them + res = res.where(~allna, fill_value) + + # The dim is gone but we need to remove the corresponding coordinate. + del res.coords[dim] + + # Copy attributes from argmin/argmax, if any + res.attrs = indx.attrs + + return res diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 94c4026d4a1..b7e0333dcd9 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -930,7 +930,10 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray": """ variable = self.variable.copy(deep=deep, data=data) coords = {k: v.copy(deep=deep) for k, v in self._coords.items()} - indexes = self._indexes + if self._indexes is None: + indexes = self._indexes + else: + indexes = {k: v.copy(deep=deep) for k, v in self._indexes.items()} return self._replace(variable, coords, indexes=indexes) def __copy__(self) -> "DataArray": @@ -3505,6 +3508,194 @@ def pad( ) return self._from_temp_dataset(ds) + def idxmin( + self, + dim: Hashable = None, + skipna: bool = None, + fill_value: Any = dtypes.NA, + keep_attrs: bool = None, + ) -> "DataArray": + """Return the coordinate label of the minimum value along a dimension. + + Returns a new `DataArray` named after the dimension with the values of + the coordinate labels along that dimension corresponding to minimum + values along that dimension. + + In comparison to :py:meth:`~DataArray.argmin`, this returns the + coordinate label while :py:meth:`~DataArray.argmin` returns the index. + + Parameters + ---------- + dim : str, optional + Dimension over which to apply `idxmin`. This is optional for 1D + arrays, but required for arrays with 2 or more dimensions. + skipna : bool or None, default None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for ``float``, ``complex``, and ``object`` + dtypes; other dtypes either do not have a sentinel missing value + (``int``) or ``skipna=True`` has not been implemented + (``datetime64`` or ``timedelta64``). + fill_value : Any, default NaN + Value to be filled in case all of the values along a dimension are + null. By default this is NaN. The fill value and result are + automatically converted to a compatible dtype if possible. + Ignored if ``skipna`` is False. + keep_attrs : bool, default False + If True, the attributes (``attrs``) will be copied from the + original object to the new one. If False (default), the new object + will be returned without attributes. + + Returns + ------- + reduced : DataArray + New `DataArray` object with `idxmin` applied to its data and the + indicated dimension removed. + + See also + -------- + Dataset.idxmin, DataArray.idxmax, DataArray.min, DataArray.argmin + + Examples + -------- + + >>> array = xr.DataArray([0, 2, 1, 0, -2], dims="x", + ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array.min() + + array(-2) + >>> array.argmin() + + array(4) + >>> array.idxmin() + + array('e', dtype='>> array = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], + ... "x": np.arange(5.)**2} + ... ) + >>> array.min(dim="x") + + array([-2., -4., 1.]) + Coordinates: + * y (y) int64 -1 0 1 + >>> array.argmin(dim="x") + + array([4, 0, 2]) + Coordinates: + * y (y) int64 -1 0 1 + >>> array.idxmin(dim="x") + + array([16., 0., 4.]) + Coordinates: + * y (y) int64 -1 0 1 + """ + return computation._calc_idxminmax( + array=self, + func=lambda x, *args, **kwargs: x.argmin(*args, **kwargs), + dim=dim, + skipna=skipna, + fill_value=fill_value, + keep_attrs=keep_attrs, + ) + + def idxmax( + self, + dim: Hashable = None, + skipna: bool = None, + fill_value: Any = dtypes.NA, + keep_attrs: bool = None, + ) -> "DataArray": + """Return the coordinate label of the maximum value along a dimension. + + Returns a new `DataArray` named after the dimension with the values of + the coordinate labels along that dimension corresponding to maximum + values along that dimension. + + In comparison to :py:meth:`~DataArray.argmax`, this returns the + coordinate label while :py:meth:`~DataArray.argmax` returns the index. + + Parameters + ---------- + dim : str, optional + Dimension over which to apply `idxmax`. This is optional for 1D + arrays, but required for arrays with 2 or more dimensions. + skipna : bool or None, default None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for ``float``, ``complex``, and ``object`` + dtypes; other dtypes either do not have a sentinel missing value + (``int``) or ``skipna=True`` has not been implemented + (``datetime64`` or ``timedelta64``). + fill_value : Any, default NaN + Value to be filled in case all of the values along a dimension are + null. By default this is NaN. The fill value and result are + automatically converted to a compatible dtype if possible. + Ignored if ``skipna`` is False. + keep_attrs : bool, default False + If True, the attributes (``attrs``) will be copied from the + original object to the new one. If False (default), the new object + will be returned without attributes. + + Returns + ------- + reduced : DataArray + New `DataArray` object with `idxmax` applied to its data and the + indicated dimension removed. + + See also + -------- + Dataset.idxmax, DataArray.idxmin, DataArray.max, DataArray.argmax + + Examples + -------- + + >>> array = xr.DataArray([0, 2, 1, 0, -2], dims="x", + ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array.max() + + array(2) + >>> array.argmax() + + array(1) + >>> array.idxmax() + + array('b', dtype='>> array = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], + ... "x": np.arange(5.)**2} + ... ) + >>> array.max(dim="x") + + array([2., 2., 1.]) + Coordinates: + * y (y) int64 -1 0 1 + >>> array.argmax(dim="x") + + array([0, 2, 2]) + Coordinates: + * y (y) int64 -1 0 1 + >>> array.idxmax(dim="x") + + array([0., 4., 4.]) + Coordinates: + * y (y) int64 -1 0 1 + """ + return computation._calc_idxminmax( + array=self, + func=lambda x, *args, **kwargs: x.argmax(*args, **kwargs), + dim=dim, + skipna=skipna, + fill_value=fill_value, + keep_attrs=keep_attrs, + ) + # this needs to be at the end, or mypy will confuse with `str` # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names str = property(StringAccessor) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index bd8f0ef3948..c515d781db1 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6,6 +6,7 @@ from collections import defaultdict from html import escape from numbers import Number +from operator import methodcaller from pathlib import Path from typing import ( TYPE_CHECKING, @@ -6093,5 +6094,195 @@ def pad( return self._replace_vars_and_dims(variables) + def idxmin( + self, + dim: Hashable = None, + skipna: bool = None, + fill_value: Any = dtypes.NA, + keep_attrs: bool = None, + ) -> "Dataset": + """Return the coordinate label of the minimum value along a dimension. + + Returns a new `Dataset` named after the dimension with the values of + the coordinate labels along that dimension corresponding to minimum + values along that dimension. + + In comparison to :py:meth:`~Dataset.argmin`, this returns the + coordinate label while :py:meth:`~Dataset.argmin` returns the index. + + Parameters + ---------- + dim : str, optional + Dimension over which to apply `idxmin`. This is optional for 1D + variables, but required for variables with 2 or more dimensions. + skipna : bool or None, default None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for ``float``, ``complex``, and ``object`` + dtypes; other dtypes either do not have a sentinel missing value + (``int``) or ``skipna=True`` has not been implemented + (``datetime64`` or ``timedelta64``). + fill_value : Any, default NaN + Value to be filled in case all of the values along a dimension are + null. By default this is NaN. The fill value and result are + automatically converted to a compatible dtype if possible. + Ignored if ``skipna`` is False. + keep_attrs : bool, default False + If True, the attributes (``attrs``) will be copied from the + original object to the new one. If False (default), the new object + will be returned without attributes. + + Returns + ------- + reduced : Dataset + New `Dataset` object with `idxmin` applied to its data and the + indicated dimension removed. + + See also + -------- + DataArray.idxmin, Dataset.idxmax, Dataset.min, Dataset.argmin + + Examples + -------- + + >>> array1 = xr.DataArray([0, 2, 1, 0, -2], dims="x", + ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array2 = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], + ... "x": ['a', 'b', 'c', 'd', 'e']} + ... ) + >>> ds = xr.Dataset({'int': array1, 'float': array2}) + >>> ds.min(dim='x') + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int int64 -2 + float (y) float64 -2.0 -4.0 1.0 + >>> ds.argmin(dim='x') + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int int64 4 + float (y) int64 4 0 2 + >>> ds.idxmin(dim='x') + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int "Dataset": + """Return the coordinate label of the maximum value along a dimension. + + Returns a new `Dataset` named after the dimension with the values of + the coordinate labels along that dimension corresponding to maximum + values along that dimension. + + In comparison to :py:meth:`~Dataset.argmax`, this returns the + coordinate label while :py:meth:`~Dataset.argmax` returns the index. + + Parameters + ---------- + dim : str, optional + Dimension over which to apply `idxmax`. This is optional for 1D + variables, but required for variables with 2 or more dimensions. + skipna : bool or None, default None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for ``float``, ``complex``, and ``object`` + dtypes; other dtypes either do not have a sentinel missing value + (``int``) or ``skipna=True`` has not been implemented + (``datetime64`` or ``timedelta64``). + fill_value : Any, default NaN + Value to be filled in case all of the values along a dimension are + null. By default this is NaN. The fill value and result are + automatically converted to a compatible dtype if possible. + Ignored if ``skipna`` is False. + keep_attrs : bool, default False + If True, the attributes (``attrs``) will be copied from the + original object to the new one. If False (default), the new object + will be returned without attributes. + + Returns + ------- + reduced : Dataset + New `Dataset` object with `idxmax` applied to its data and the + indicated dimension removed. + + See also + -------- + DataArray.idxmax, Dataset.idxmin, Dataset.max, Dataset.argmax + + Examples + -------- + + >>> array1 = xr.DataArray([0, 2, 1, 0, -2], dims="x", + ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array2 = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], + ... "x": ['a', 'b', 'c', 'd', 'e']} + ... ) + >>> ds = xr.Dataset({'int': array1, 'float': array2}) + >>> ds.max(dim='x') + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int int64 2 + float (y) float64 2.0 2.0 1.0 + >>> ds.argmax(dim='x') + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int int64 1 + float (y) int64 0 2 2 + >>> ds.idxmax(dim='x') + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int