From de66daec91aa277bf3330c66e0e7f70a8d2f5acc Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Tue, 12 Sep 2023 01:05:48 +0200 Subject: [PATCH 01/10] Implement preferred_chunks for netcdf 4 backends (#7948) * Write failing test * Add preferred chunks to netcdf 4 backends * Add unit tests for preferred chunking * Fix formatting * Require dask for a couple of chunking tests * Use xarray's interface to create a test chunked nc file * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix type annotations * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Import Generator * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Use roundtrip * Add news about the new feature * Update xarray/tests/test_backends.py * Update xarray/tests/test_backends.py * Move whats new line --------- Co-authored-by: Deepak Cherian Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- doc/whats-new.rst | 1 + xarray/backends/h5netcdf_.py | 2 + xarray/backends/netCDF4_.py | 1 + xarray/tests/test_backends.py | 79 ++++++++++++++++++++++++++++++++++- 4 files changed, 82 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a25f87468af..164f01e236c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,6 +26,7 @@ New Features different collections of coordinates prior to assign them to a Dataset or DataArray (:pull:`8102`) at once. By `Benoît Bovy `_. +- Provide `preferred_chunks` for data read from netcdf files (:issue:`1440`, :pull:`7948`) Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 59f6c362491..19748084625 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -198,6 +198,8 @@ def open_store_variable(self, name, var): "fletcher32": var.fletcher32, "shuffle": var.shuffle, } + if var.chunks: + encoding["preferred_chunks"] = dict(zip(var.dimensions, var.chunks)) # Convert h5py-style compression options to NetCDF4-Python # style, if possible if var.compression == "gzip": diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 7cbfa5b5e4e..f21f15bf795 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -426,6 +426,7 @@ def open_store_variable(self, name, var): else: encoding["contiguous"] = False encoding["chunksizes"] = tuple(chunking) + encoding["preferred_chunks"] = dict(zip(var.dimensions, chunking)) # TODO: figure out how to round-trip "endian-ness" without raising # warnings from netCDF4 # encoding['endian'] = var.endian() diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index e2ae34f94f2..4799b619efd 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -13,7 +13,7 @@ import tempfile import uuid import warnings -from collections.abc import Iterator +from collections.abc import Generator, Iterator from contextlib import ExitStack from io import BytesIO from os import listdir @@ -1536,6 +1536,83 @@ def test_keep_chunksizes_if_no_original_shape(self) -> None: ds["x"].encoding["chunksizes"], actual["x"].encoding["chunksizes"] ) + def test_preferred_chunks_is_present(self) -> None: + ds = Dataset({"x": [1, 2, 3]}) + chunksizes = (2,) + ds.variables["x"].encoding = {"chunksizes": chunksizes} + + with self.roundtrip(ds) as actual: + assert actual["x"].encoding["preferred_chunks"] == {"x": 2} + + @requires_dask + def test_auto_chunking_is_based_on_disk_chunk_sizes(self) -> None: + x_size = y_size = 1000 + y_chunksize = y_size + x_chunksize = 10 + + with dask.config.set({"array.chunk-size": "100KiB"}): + with self.chunked_roundtrip( + (1, y_size, x_size), + (1, y_chunksize, x_chunksize), + open_kwargs={"chunks": "auto"}, + ) as ds: + t_chunks, y_chunks, x_chunks = ds["image"].data.chunks + assert all(np.asanyarray(y_chunks) == y_chunksize) + # Check that the chunk size is a multiple of the file chunk size + assert all(np.asanyarray(x_chunks) % x_chunksize == 0) + + @requires_dask + def test_base_chunking_uses_disk_chunk_sizes(self) -> None: + x_size = y_size = 1000 + y_chunksize = y_size + x_chunksize = 10 + + with self.chunked_roundtrip( + (1, y_size, x_size), + (1, y_chunksize, x_chunksize), + open_kwargs={"chunks": {}}, + ) as ds: + for chunksizes, expected in zip( + ds["image"].data.chunks, (1, y_chunksize, x_chunksize) + ): + assert all(np.asanyarray(chunksizes) == expected) + + @contextlib.contextmanager + def chunked_roundtrip( + self, + array_shape: tuple[int, int, int], + chunk_sizes: tuple[int, int, int], + open_kwargs: dict[str, Any] | None = None, + ) -> Generator[Dataset, None, None]: + t_size, y_size, x_size = array_shape + t_chunksize, y_chunksize, x_chunksize = chunk_sizes + + image = xr.DataArray( + np.arange(t_size * x_size * y_size, dtype=np.int16).reshape( + (t_size, y_size, x_size) + ), + dims=["t", "y", "x"], + ) + image.encoding = {"chunksizes": (t_chunksize, y_chunksize, x_chunksize)} + dataset = xr.Dataset(dict(image=image)) + + with self.roundtrip(dataset, open_kwargs=open_kwargs) as ds: + yield ds + + def test_preferred_chunks_are_disk_chunk_sizes(self) -> None: + x_size = y_size = 1000 + y_chunksize = y_size + x_chunksize = 10 + + with self.chunked_roundtrip( + (1, y_size, x_size), (1, y_chunksize, x_chunksize) + ) as ds: + assert ds["image"].encoding["preferred_chunks"] == { + "t": 1, + "y": y_chunksize, + "x": x_chunksize, + } + def test_encoding_chunksizes_unlimited(self) -> None: # regression test for GH1225 ds = Dataset({"x": [1, 2, 3], "y": ("x", [2, 3, 4])}) From 8215911a8e1c97e58c3db071276a185b931b4a35 Mon Sep 17 00:00:00 2001 From: Darsh Ranjan Date: Tue, 12 Sep 2023 11:17:08 -0700 Subject: [PATCH 02/10] Fix assignment with .loc (#8067) * Add unit test for Variable[...] = DataArray (#7030) * Check for DataArray instance (#7030) * Update whats-new.rst (#7030) * Revert Variable and fix DataArray instead (#7030) * Add test case for .loc[...] = dataarray (#7030) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix whats-new.rst (#7030) * Add regression test for Dataset (#7030) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- doc/whats-new.rst | 3 +++ xarray/core/dataarray.py | 1 + xarray/tests/test_dataarray.py | 21 +++++++++++++++++++++ xarray/tests/test_dataset.py | 23 +++++++++++++++++++++++ 4 files changed, 48 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 164f01e236c..e71c7df49d0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -60,6 +60,9 @@ Bug fixes names were not updated properly internally (:issue:`7405`, :issue:`7588`, :pull:`8104`). By `Benoît Bovy `_. +- Fix bug where :py:class:`DataArray` instances on the right-hand side + of :py:meth:`DataArray.__setitem__` lose dimension names. + (:issue:`7030`, :pull:`8067`) By `Darsh Ranjan `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index fd3ff60cb6c..2e0a3a7089d 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -856,6 +856,7 @@ def __setitem__(self, key: Any, value: Any) -> None: obj = self[key] if isinstance(value, DataArray): assert_coordinate_consistent(value, obj.coords.variables) + value = value.variable # DataArray key -> Variable key key = { k: v.variable if isinstance(v, DataArray) else v diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 1f4d259d320..e8a4259e500 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -841,6 +841,27 @@ def get_data(): ) da[dict(x=ind)] = value # should not raise + def test_setitem_vectorized(self) -> None: + # Regression test for GH:7030 + # Positional indexing + v = xr.DataArray(np.r_[:120].reshape(2, 3, 4, 5), dims=["a", "b", "c", "d"]) + b = xr.DataArray([[0, 0], [1, 0]], dims=["u", "v"]) + c = xr.DataArray([[0, 1], [2, 3]], dims=["u", "v"]) + w = xr.DataArray([-1, -2], dims=["u"]) + index = dict(b=b, c=c) + v[index] = w + assert (v[index] == w).all() + + # Indexing with coordinates + v = xr.DataArray(np.r_[:120].reshape(2, 3, 4, 5), dims=["a", "b", "c", "d"]) + v.coords["b"] = [2, 4, 6] + b = xr.DataArray([[2, 2], [4, 2]], dims=["u", "v"]) + c = xr.DataArray([[0, 1], [2, 3]], dims=["u", "v"]) + w = xr.DataArray([-1, -2], dims=["u"]) + index = dict(b=b, c=c) + v.loc[index] = w + assert (v.loc[index] == w).all() + def test_contains(self) -> None: data_array = DataArray([1, 2]) assert 1 in data_array diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 882285ac8ec..89825ac0996 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4202,6 +4202,29 @@ def test_setitem_align_new_indexes(self) -> None: ) assert_identical(ds, expected) + def test_setitem_vectorized(self) -> None: + # Regression test for GH:7030 + # Positional indexing + da = xr.DataArray(np.r_[:120].reshape(2, 3, 4, 5), dims=["a", "b", "c", "d"]) + ds = xr.Dataset({"da": da}) + b = xr.DataArray([[0, 0], [1, 0]], dims=["u", "v"]) + c = xr.DataArray([[0, 1], [2, 3]], dims=["u", "v"]) + w = xr.DataArray([-1, -2], dims=["u"]) + index = dict(b=b, c=c) + ds[index] = xr.Dataset({"da": w}) + assert (ds[index]["da"] == w).all() + + # Indexing with coordinates + da = xr.DataArray(np.r_[:120].reshape(2, 3, 4, 5), dims=["a", "b", "c", "d"]) + ds = xr.Dataset({"da": da}) + ds.coords["b"] = [2, 4, 6] + b = xr.DataArray([[2, 2], [4, 2]], dims=["u", "v"]) + c = xr.DataArray([[0, 1], [2, 3]], dims=["u", "v"]) + w = xr.DataArray([-1, -2], dims=["u"]) + index = dict(b=b, c=c) + ds.loc[index] = xr.Dataset({"da": w}, coords={"b": ds.coords["b"]}) + assert (ds.loc[index]["da"] == w).all() + @pytest.mark.parametrize("dtype", [str, bytes]) def test_setitem_str_dtype(self, dtype) -> None: ds = xr.Dataset(coords={"x": np.array(["x", "y"], dtype=dtype)}) From 218ea21d79b5eaf04c982c235deb437a529bf35a Mon Sep 17 00:00:00 2001 From: Richard Kleijn <32801740+rhkleijn@users.noreply.github.com> Date: Thu, 14 Sep 2023 14:13:57 +0200 Subject: [PATCH 03/10] improve typing of DataArray and Dataset reductions (#6746) * improve typing of DataArray and Dataset reductions * fix some mypy errors * import Self from xarray.core.types * fix typing errors * fix remaining mypy error * fix some typing issues * add whats-new * re-add type ignore --------- Co-authored-by: Michael Niklas --- doc/whats-new.rst | 5 ++- xarray/core/_aggregations.py | 58 ++++++++++++++-------------- xarray/core/computation.py | 4 +- xarray/core/rolling.py | 8 ++-- xarray/tests/test_groupby.py | 2 +- xarray/util/generate_aggregations.py | 6 +-- 6 files changed, 43 insertions(+), 40 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e71c7df49d0..91d64ddb7a3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,7 +26,10 @@ New Features different collections of coordinates prior to assign them to a Dataset or DataArray (:pull:`8102`) at once. By `Benoît Bovy `_. -- Provide `preferred_chunks` for data read from netcdf files (:issue:`1440`, :pull:`7948`) +- Provide `preferred_chunks` for data read from netcdf files (:issue:`1440`, :pull:`7948`). + By `Martin Raspaud `_. +- Improved static typing of reduction methods (:pull:`6746`). + By `Richard Kleijn `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index d5070f97c6a..89cec94e24f 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -8,7 +8,7 @@ from xarray.core import duck_array_ops from xarray.core.options import OPTIONS -from xarray.core.types import Dims +from xarray.core.types import Dims, Self from xarray.core.utils import contains_only_chunked_or_numpy, module_available if TYPE_CHECKING: @@ -30,7 +30,7 @@ def reduce( keep_attrs: bool | None = None, keepdims: bool = False, **kwargs: Any, - ) -> Dataset: + ) -> Self: raise NotImplementedError() def count( @@ -39,7 +39,7 @@ def count( *, keep_attrs: bool | None = None, **kwargs: Any, - ) -> Dataset: + ) -> Self: """ Reduce this Dataset's data by applying ``count`` along some dimension(s). @@ -111,7 +111,7 @@ def all( *, keep_attrs: bool | None = None, **kwargs: Any, - ) -> Dataset: + ) -> Self: """ Reduce this Dataset's data by applying ``all`` along some dimension(s). @@ -183,7 +183,7 @@ def any( *, keep_attrs: bool | None = None, **kwargs: Any, - ) -> Dataset: + ) -> Self: """ Reduce this Dataset's data by applying ``any`` along some dimension(s). @@ -256,7 +256,7 @@ def max( skipna: bool | None = None, keep_attrs: bool | None = None, **kwargs: Any, - ) -> Dataset: + ) -> Self: """ Reduce this Dataset's data by applying ``max`` along some dimension(s). @@ -343,7 +343,7 @@ def min( skipna: bool | None = None, keep_attrs: bool | None = None, **kwargs: Any, - ) -> Dataset: + ) -> Self: """ Reduce this Dataset's data by applying ``min`` along some dimension(s). @@ -430,7 +430,7 @@ def mean( skipna: bool | None = None, keep_attrs: bool | None = None, **kwargs: Any, - ) -> Dataset: + ) -> Self: """ Reduce this Dataset's data by applying ``mean`` along some dimension(s). @@ -522,7 +522,7 @@ def prod( min_count: int | None = None, keep_attrs: bool | None = None, **kwargs: Any, - ) -> Dataset: + ) -> Self: """ Reduce this Dataset's data by applying ``prod`` along some dimension(s). @@ -629,7 +629,7 @@ def sum( min_count: int | None = None, keep_attrs: bool | None = None, **kwargs: Any, - ) -> Dataset: + ) -> Self: """ Reduce this Dataset's data by applying ``sum`` along some dimension(s). @@ -736,7 +736,7 @@ def std( ddof: int = 0, keep_attrs: bool | None = None, **kwargs: Any, - ) -> Dataset: + ) -> Self: """ Reduce this Dataset's data by applying ``std`` along some dimension(s). @@ -840,7 +840,7 @@ def var( ddof: int = 0, keep_attrs: bool | None = None, **kwargs: Any, - ) -> Dataset: + ) -> Self: """ Reduce this Dataset's data by applying ``var`` along some dimension(s). @@ -943,7 +943,7 @@ def median( skipna: bool | None = None, keep_attrs: bool | None = None, **kwargs: Any, - ) -> Dataset: + ) -> Self: """ Reduce this Dataset's data by applying ``median`` along some dimension(s). @@ -1034,7 +1034,7 @@ def cumsum( skipna: bool | None = None, keep_attrs: bool | None = None, **kwargs: Any, - ) -> Dataset: + ) -> Self: """ Reduce this Dataset's data by applying ``cumsum`` along some dimension(s). @@ -1127,7 +1127,7 @@ def cumprod( skipna: bool | None = None, keep_attrs: bool | None = None, **kwargs: Any, - ) -> Dataset: + ) -> Self: """ Reduce this Dataset's data by applying ``cumprod`` along some dimension(s). @@ -1226,7 +1226,7 @@ def reduce( keep_attrs: bool | None = None, keepdims: bool = False, **kwargs: Any, - ) -> DataArray: + ) -> Self: raise NotImplementedError() def count( @@ -1235,7 +1235,7 @@ def count( *, keep_attrs: bool | None = None, **kwargs: Any, - ) -> DataArray: + ) -> Self: """ Reduce this DataArray's data by applying ``count`` along some dimension(s). @@ -1301,7 +1301,7 @@ def all( *, keep_attrs: bool | None = None, **kwargs: Any, - ) -> DataArray: + ) -> Self: """ Reduce this DataArray's data by applying ``all`` along some dimension(s). @@ -1367,7 +1367,7 @@ def any( *, keep_attrs: bool | None = None, **kwargs: Any, - ) -> DataArray: + ) -> Self: """ Reduce this DataArray's data by applying ``any`` along some dimension(s). @@ -1434,7 +1434,7 @@ def max( skipna: bool | None = None, keep_attrs: bool | None = None, **kwargs: Any, - ) -> DataArray: + ) -> Self: """ Reduce this DataArray's data by applying ``max`` along some dimension(s). @@ -1513,7 +1513,7 @@ def min( skipna: bool | None = None, keep_attrs: bool | None = None, **kwargs: Any, - ) -> DataArray: + ) -> Self: """ Reduce this DataArray's data by applying ``min`` along some dimension(s). @@ -1592,7 +1592,7 @@ def mean( skipna: bool | None = None, keep_attrs: bool | None = None, **kwargs: Any, - ) -> DataArray: + ) -> Self: """ Reduce this DataArray's data by applying ``mean`` along some dimension(s). @@ -1676,7 +1676,7 @@ def prod( min_count: int | None = None, keep_attrs: bool | None = None, **kwargs: Any, - ) -> DataArray: + ) -> Self: """ Reduce this DataArray's data by applying ``prod`` along some dimension(s). @@ -1773,7 +1773,7 @@ def sum( min_count: int | None = None, keep_attrs: bool | None = None, **kwargs: Any, - ) -> DataArray: + ) -> Self: """ Reduce this DataArray's data by applying ``sum`` along some dimension(s). @@ -1870,7 +1870,7 @@ def std( ddof: int = 0, keep_attrs: bool | None = None, **kwargs: Any, - ) -> DataArray: + ) -> Self: """ Reduce this DataArray's data by applying ``std`` along some dimension(s). @@ -1964,7 +1964,7 @@ def var( ddof: int = 0, keep_attrs: bool | None = None, **kwargs: Any, - ) -> DataArray: + ) -> Self: """ Reduce this DataArray's data by applying ``var`` along some dimension(s). @@ -2057,7 +2057,7 @@ def median( skipna: bool | None = None, keep_attrs: bool | None = None, **kwargs: Any, - ) -> DataArray: + ) -> Self: """ Reduce this DataArray's data by applying ``median`` along some dimension(s). @@ -2140,7 +2140,7 @@ def cumsum( skipna: bool | None = None, keep_attrs: bool | None = None, **kwargs: Any, - ) -> DataArray: + ) -> Self: """ Reduce this DataArray's data by applying ``cumsum`` along some dimension(s). @@ -2229,7 +2229,7 @@ def cumprod( skipna: bool | None = None, keep_attrs: bool | None = None, **kwargs: Any, - ) -> DataArray: + ) -> Self: """ Reduce this DataArray's data by applying ``cumprod`` along some dimension(s). diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 235b52402f1..97a1e15b1d9 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1402,14 +1402,14 @@ def _cov_corr( ) / (valid_count) if method == "cov": - return cov # type: ignore[return-value] + return cov else: # compute std + corr da_a_std = da_a.std(dim=dim) da_b_std = da_b.std(dim=dim) corr = cov / (da_a_std * da_b_std) - return corr # type: ignore[return-value] + return corr def cross( diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 67389527a98..d49cb6e13a4 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -475,9 +475,8 @@ def reduce( obj, rolling_dim, keep_attrs=keep_attrs, fill_value=fillna ) - result = windows.reduce( - func, dim=list(rolling_dim.values()), keep_attrs=keep_attrs, **kwargs - ) + dim = list(rolling_dim.values()) + result = windows.reduce(func, dim=dim, keep_attrs=keep_attrs, **kwargs) # Find valid windows based on count. counts = self._counts(keep_attrs=False) @@ -494,6 +493,7 @@ def _counts(self, keep_attrs: bool | None) -> DataArray: # array is faster to be reduced than object array. # The use of skipna==False is also faster since it does not need to # copy the strided array. + dim = list(rolling_dim.values()) counts = ( self.obj.notnull(keep_attrs=keep_attrs) .rolling( @@ -501,7 +501,7 @@ def _counts(self, keep_attrs: bool | None) -> DataArray: center={d: self.center[i] for i, d in enumerate(self.dim)}, ) .construct(rolling_dim, fill_value=False, keep_attrs=keep_attrs) - .sum(dim=list(rolling_dim.values()), skipna=False, keep_attrs=keep_attrs) + .sum(dim=dim, skipna=False, keep_attrs=keep_attrs) ) return counts diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index b961abef5db..e143e2b8e03 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -728,7 +728,7 @@ def test_groupby_dataset_iter() -> None: def test_groupby_dataset_errors() -> None: data = create_test_data() with pytest.raises(TypeError, match=r"`group` must be"): - data.groupby(np.arange(10)) # type: ignore + data.groupby(np.arange(10)) # type: ignore[arg-type,unused-ignore] with pytest.raises(ValueError, match=r"length does not match"): data.groupby(data["dim1"][:3]) with pytest.raises(TypeError, match=r"`group` must be"): diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index 312f5722f8e..873f6015b5c 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -27,7 +27,7 @@ from xarray.core import duck_array_ops from xarray.core.options import OPTIONS -from xarray.core.types import Dims +from xarray.core.types import Dims, Self from xarray.core.utils import contains_only_chunked_or_numpy, module_available if TYPE_CHECKING: @@ -50,7 +50,7 @@ def reduce( keep_attrs: bool | None = None, keepdims: bool = False, **kwargs: Any, - ) -> {obj}: + ) -> Self: raise NotImplementedError()""" GROUPBY_PREAMBLE = """ @@ -108,7 +108,7 @@ def {method}( *,{extra_kwargs} keep_attrs: bool | None = None, **kwargs: Any, - ) -> {obj}: + ) -> Self: """ Reduce this {obj}'s data by applying ``{method}`` along some dimension(s). From 0c4f16574a881e865e39e4621cbf6b95b8d1f0f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Thu, 14 Sep 2023 15:57:36 +0200 Subject: [PATCH 04/10] FIX: handle NaT values in dt-accessor (#8084) * FIX: handle NaT values in dt-accessor * FIX: only use isnat-check for datetimes not cftime * check for and apply needed dtype for isocalendar * Keep current version without NaT * add whats-new.rst entry --- doc/whats-new.rst | 4 ++++ xarray/core/accessor_dt.py | 25 ++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 91d64ddb7a3..f60941033f4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -66,6 +66,10 @@ Bug fixes - Fix bug where :py:class:`DataArray` instances on the right-hand side of :py:meth:`DataArray.__setitem__` lose dimension names. (:issue:`7030`, :pull:`8067`) By `Darsh Ranjan `_. +- Return ``float64`` in presence of ``NaT`` in :py:class:`~core.accessor_dt.DatetimeAccessor` and + special case ``NaT`` handling in :py:meth:`~core.accessor_dt.DatetimeAccessor.isocalendar()` + (:issue:`7928`, :pull:`8084`). + By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 77976fe610e..4c1ce4b5c48 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -74,11 +74,25 @@ def _access_through_series(values, name): months = values_as_series.dt.month.values field_values = _season_from_months(months) elif name == "isocalendar": + # special NaT-handling can be removed when + # https://github.com/pandas-dev/pandas/issues/54657 is resolved + field_values = values_as_series.dt.isocalendar() + # test for and apply needed dtype + hasna = any(field_values.year.isnull()) + if hasna: + field_values = np.dstack( + [ + getattr(field_values, name).astype(np.float64, copy=False).values + for name in ["year", "week", "day"] + ] + ) + else: + field_values = np.array(field_values, dtype=np.int64) # isocalendar returns iso- year, week, and weekday -> reshape - field_values = np.array(values_as_series.dt.isocalendar(), dtype=np.int64) return field_values.T.reshape(3, *values.shape) else: field_values = getattr(values_as_series.dt, name).values + return field_values.reshape(values.shape) @@ -110,7 +124,7 @@ def _get_date_field(values, name, dtype): from dask.array import map_blocks new_axis = chunks = None - # isocalendar adds adds an axis + # isocalendar adds an axis if name == "isocalendar": chunks = (3,) + values.chunksize new_axis = 0 @@ -119,7 +133,12 @@ def _get_date_field(values, name, dtype): access_method, values, name, dtype=dtype, new_axis=new_axis, chunks=chunks ) else: - return access_method(values, name).astype(dtype, copy=False) + out = access_method(values, name) + # cast only for integer types to keep float64 in presence of NaT + # see https://github.com/pydata/xarray/issues/7928 + if np.issubdtype(out.dtype, np.integer): + out = out.astype(dtype, copy=False) + return out def _round_through_series_or_index(values, name, freq): From 756eee610ddf6bc2c6869dfed5035cd720b4655d Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 14 Sep 2023 08:32:49 -0700 Subject: [PATCH 05/10] Fix `PeriodIndex` deprecation in xarray tests (#8182) --- asv_bench/benchmarks/pandas.py | 2 +- xarray/tests/test_dataset.py | 2 +- xarray/tests/test_variable.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/asv_bench/benchmarks/pandas.py b/asv_bench/benchmarks/pandas.py index 9bda5970a4c..ebe61081916 100644 --- a/asv_bench/benchmarks/pandas.py +++ b/asv_bench/benchmarks/pandas.py @@ -13,7 +13,7 @@ def setup(self, dtype, subset): [ list("abcdefhijk"), list("abcdefhijk"), - pd.date_range(start="2000-01-01", periods=1000, freq="B"), + pd.date_range(start="2000-01-01", periods=1000, freq="D"), ] ) series = pd.Series(data, index) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 89825ac0996..d794603b341 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -380,7 +380,7 @@ def test_repr_multiindex(self) -> None: def test_repr_period_index(self) -> None: data = create_test_data(seed=456) - data.coords["time"] = pd.period_range("2000-01-01", periods=20, freq="B") + data.coords["time"] = pd.period_range("2000-01-01", periods=20, freq="D") # check that creating the repr doesn't raise an error #GH645 repr(data) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index c4bac3b2c61..3c40d0a2361 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -338,10 +338,10 @@ def test_pandas_data(self): assert v[0].values == v.values[0] def test_pandas_period_index(self): - v = self.cls(["x"], pd.period_range(start="2000", periods=20, freq="B")) + v = self.cls(["x"], pd.period_range(start="2000", periods=20, freq="D")) v = v.load() # for dask-based Variable - assert v[0] == pd.Period("2000", freq="B") - assert "Period('2000-01-03', 'B')" in repr(v) + assert v[0] == pd.Period("2000", freq="D") + assert "Period('2000-01-01', 'D')" in repr(v) @pytest.mark.parametrize("dtype", [float, int]) def test_1d_math(self, dtype: np.typing.DTypeLike) -> None: From 2a02052dd808bbfe768a2001ffea1fd5d08aac35 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 14 Sep 2023 13:06:16 -0700 Subject: [PATCH 06/10] Display data returned in `apply_ufunc` error message (#8179) * Display data returned in ufunc error message This makes debugging much easier! * Use `short_array_repr` * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- xarray/core/computation.py | 25 +++++++++++++------------ xarray/core/formatting.py | 8 ++++++-- xarray/tests/test_computation.py | 10 ++++++++-- 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 97a1e15b1d9..2068cdcadd2 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -659,6 +659,7 @@ def apply_variable_ufunc( dask_gufunc_kwargs=None, ) -> Variable | tuple[Variable, ...]: """Apply a ndarray level function over Variable and/or ndarray objects.""" + from xarray.core.formatting import short_array_repr from xarray.core.variable import Variable, as_compatible_data dim_sizes = unified_dim_sizes( @@ -766,11 +767,10 @@ def func(*arrays): not isinstance(result_data, tuple) or len(result_data) != signature.num_outputs ): raise ValueError( - "applied function does not have the number of " - "outputs specified in the ufunc signature. " - "Result is not a tuple of {} elements: {!r}".format( - signature.num_outputs, result_data - ) + f"applied function does not have the number of " + f"outputs specified in the ufunc signature. " + f"Result is not a tuple of {signature.num_outputs} elements:\n\n" + f"{short_array_repr(result_data)}" ) objs = _all_of_type(args, Variable) @@ -784,21 +784,22 @@ def func(*arrays): data = as_compatible_data(data) if data.ndim != len(dims): raise ValueError( - "applied function returned data with unexpected " + "applied function returned data with an unexpected " f"number of dimensions. Received {data.ndim} dimension(s) but " - f"expected {len(dims)} dimensions with names: {dims!r}" + f"expected {len(dims)} dimensions with names {dims!r}, from:\n\n" + f"{short_array_repr(data)}" ) var = Variable(dims, data, fastpath=True) for dim, new_size in var.sizes.items(): if dim in dim_sizes and new_size != dim_sizes[dim]: raise ValueError( - "size of dimension {!r} on inputs was unexpectedly " - "changed by applied function from {} to {}. Only " + f"size of dimension '{dim}' on inputs was unexpectedly " + f"changed by applied function from {dim_sizes[dim]} to {new_size}. Only " "dimensions specified in ``exclude_dims`` with " - "xarray.apply_ufunc are allowed to change size.".format( - dim, dim_sizes[dim], new_size - ) + "xarray.apply_ufunc are allowed to change size. " + "The data returned was:\n\n" + f"{short_array_repr(data)}" ) var.attrs = attrs diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 1f2bf720a10..3bfe902f0a3 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -10,6 +10,7 @@ from datetime import datetime, timedelta from itertools import chain, zip_longest from reprlib import recursive_repr +from typing import TYPE_CHECKING import numpy as np import pandas as pd @@ -21,6 +22,9 @@ from xarray.core.pycompat import array_type from xarray.core.utils import is_duck_array +if TYPE_CHECKING: + from xarray.core.coordinates import AbstractCoordinates + def pretty_print(x, numchars: int): """Given an object `x`, call `str(x)` and format the returned string so @@ -398,7 +402,7 @@ def _mapping_repr( ) -def coords_repr(coords, col_width=None, max_rows=None): +def coords_repr(coords: AbstractCoordinates, col_width=None, max_rows=None): if col_width is None: col_width = _calculate_col_width(coords) return _mapping_repr( @@ -412,7 +416,7 @@ def coords_repr(coords, col_width=None, max_rows=None): ) -def inline_index_repr(index, max_width=None): +def inline_index_repr(index: pd.Index, max_width=None): if hasattr(index, "_repr_inline_"): repr_ = index._repr_inline_(max_width=max_width) else: diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 3c10e3f27ab..372998a66bc 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1666,7 +1666,10 @@ def identity(x): def tuple3x(x): return (x, x, x) - with pytest.raises(ValueError, match=r"number of outputs"): + with pytest.raises( + ValueError, + match=r"number of outputs.*Result is not a tuple of 2 elements:\n\narray\(\[0", + ): apply_ufunc(identity, variable, output_core_dims=[(), ()]) with pytest.raises(ValueError, match=r"number of outputs"): @@ -1682,7 +1685,10 @@ def add_dim(x): def remove_dim(x): return x[..., 0] - with pytest.raises(ValueError, match=r"unexpected number of dimensions"): + with pytest.raises( + ValueError, + match=r"unexpected number of dimensions.*from:\n\n.*array\(\[\[0", + ): apply_ufunc(add_dim, variable, output_core_dims=[("y", "z")]) with pytest.raises(ValueError, match=r"unexpected number of dimensions"): From 5007e5febeab9c3a0cefaf58113d113e626ad120 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 14 Sep 2023 13:47:07 -0700 Subject: [PATCH 07/10] Set dev version above released version (#8181) * Set dev version above released version --- pyproject.toml | 32 ++++++++++++++------------------ setup.py | 2 +- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 15c6bf194a5..8e63cb033a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ requires = [ ] [tool.setuptools_scm] -fallback_version = "999" +fallback_version = "9999" [tool.coverage.run] omit = [ @@ -64,7 +64,7 @@ module = [ "sparse.*", "toolz.*", "zarr.*", - "numpy.exceptions.*", # remove once support for `numpy<2.0` has been dropped + "numpy.exceptions.*", # remove once support for `numpy<2.0` has been dropped ] [[tool.mypy.overrides]] @@ -72,31 +72,27 @@ ignore_errors = true module = [] [tool.ruff] -target-version = "py39" builtins = ["ellipsis"] exclude = [ - ".eggs", - "doc", - "_typed_ops.pyi", + ".eggs", + "doc", + "_typed_ops.pyi", ] +target-version = "py39" # E402: module level import not at top of file # E501: line too long - let black worry about that # E731: do not assign a lambda expression, use a def ignore = [ - "E402", - "E501", - "E731", + "E402", + "E501", + "E731", ] select = [ - # Pyflakes - "F", - # Pycodestyle - "E", - "W", - # isort - "I", - # Pyupgrade - "UP", + "F", # Pyflakes + "E", # Pycodestyle + "W", # isort + "I", # Pyupgrade + "UP", ] [tool.ruff.isort] diff --git a/setup.py b/setup.py index 088d7e4eac6..69343515fd5 100755 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ #!/usr/bin/env python from setuptools import setup -setup(use_scm_version={"fallback_version": "999"}) +setup(use_scm_version={"fallback_version": "9999"}) From 64660b6422d0b1c7a57c6800d58ab34b3e20c2fd Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 14 Sep 2023 13:52:39 -0700 Subject: [PATCH 08/10] Remove `setup.cfg` in favor of `pyproject.toml` (#8183) * Remove `setup.cfg` in favor of `pyproject.toml` --- pyproject.toml | 64 +++++++++++++++++++ setup.cfg | 154 --------------------------------------------- xarray/__init__.py | 2 +- 3 files changed, 65 insertions(+), 155 deletions(-) delete mode 100644 setup.cfg diff --git a/pyproject.toml b/pyproject.toml index 8e63cb033a0..b075382985e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,52 @@ +[project] +authors = [ + {name = "xarray Developers", email = "xarray@googlegroups.com"}, +] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Intended Audience :: Science/Research", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Scientific/Engineering", +] +description = "N-D labeled arrays and datasets in Python" +dynamic = ["version"] +license = {text = "Apache-2.0"} +name = "xarray" +readme = "README.md" +requires-python = ">=3.9" + +dependencies = [ + "numpy>=1.21", + "packaging>=21.3", + "pandas>=1.4", +] + +[project.optional-dependencies] +accel = ["scipy", "bottleneck", "numbagg", "flox"] +complete = ["xarray[accel,io,parallel,viz]"] +io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr", "fsspec", "cftime", "pooch"] +parallel = ["dask[complete]"] +viz = ["matplotlib", "seaborn", "nc-time-axis"] + +[tool.setuptools] +packages = ["xarray"] + +[project.entry-points."xarray.chunkmanagers"] +dask = "xarray.core.daskmanager:DaskManager" + +[project.urls] +Documentation = "https://docs.xarray.dev" +SciPy2015-talk = "https://www.youtube.com/watch?v=X0pAhJgySxk" +homepage = "https://xarray.dev/" +issue-tracker = "https://github.com/pydata/xarray/issues" +source-code = "https://github.com/pydata/xarray" + [build-system] build-backend = "setuptools.build_meta" requires = [ @@ -97,3 +146,18 @@ select = [ [tool.ruff.isort] known-first-party = ["xarray"] + +[tool.pytest.ini-options] +filterwarnings = [ + "ignore:Using a non-tuple sequence for multidimensional indexing is deprecated:FutureWarning", +] +markers = [ + "flaky: flaky tests", + "network: tests requiring a network connection", + "slow: slow tests", +] +python_files = "test_*.py" +testpaths = ["xarray/tests", "properties"] + +[tool.aliases] +test = "pytest" diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 85ac8e259e5..00000000000 --- a/setup.cfg +++ /dev/null @@ -1,154 +0,0 @@ -[metadata] -name = xarray -author = xarray Developers -author_email = xarray@googlegroups.com -license = Apache-2.0 -description = N-D labeled arrays and datasets in Python -long_description_content_type=text/x-rst -long_description = - **xarray** (formerly **xray**) is an open source project and Python package - that makes working with labelled multi-dimensional arrays simple, - efficient, and fun! - - xarray introduces labels in the form of dimensions, coordinates and - attributes on top of raw NumPy_-like arrays, which allows for a more - intuitive, more concise, and less error-prone developer experience. - The package includes a large and growing library of domain-agnostic functions - for advanced analytics and visualization with these data structures. - - xarray was inspired by and borrows heavily from pandas_, the popular data - analysis package focused on labelled tabular data. - It is particularly tailored to working with netCDF_ files, which were the - source of xarray's data model, and integrates tightly with dask_ for parallel - computing. - - .. _NumPy: https://www.numpy.org - .. _pandas: https://pandas.pydata.org - .. _dask: https://dask.org - .. _netCDF: https://www.unidata.ucar.edu/software/netcdf - - Why xarray? - ----------- - Multi-dimensional (a.k.a. N-dimensional, ND) arrays (sometimes called - "tensors") are an essential part of computational science. - They are encountered in a wide range of fields, including physics, astronomy, - geoscience, bioinformatics, engineering, finance, and deep learning. - In Python, NumPy_ provides the fundamental data structure and API for - working with raw ND arrays. - However, real-world datasets are usually more than just raw numbers; - they have labels which encode information about how the array values map - to locations in space, time, etc. - - xarray doesn't just keep track of labels on arrays -- it uses them to provide a - powerful and concise interface. For example: - - - Apply operations over dimensions by name: ``x.sum('time')``. - - Select values by label instead of integer location: ``x.loc['2014-01-01']`` or ``x.sel(time='2014-01-01')``. - - Mathematical operations (e.g., ``x - y``) vectorize across multiple dimensions (array broadcasting) based on dimension names, not shape. - - Flexible split-apply-combine operations with groupby: ``x.groupby('time.dayofyear').mean()``. - - Database like alignment based on coordinate labels that smoothly handles missing values: ``x, y = xr.align(x, y, join='outer')``. - - Keep track of arbitrary metadata in the form of a Python dictionary: ``x.attrs``. - - Learn more - ---------- - - Documentation: ``_ - - Issue tracker: ``_ - - Source code: ``_ - - SciPy2015 talk: ``_ - -url = https://github.com/pydata/xarray -classifiers = - Development Status :: 5 - Production/Stable - License :: OSI Approved :: Apache Software License - Operating System :: OS Independent - Intended Audience :: Science/Research - Programming Language :: Python - Programming Language :: Python :: 3 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: 3.11 - Topic :: Scientific/Engineering - -[options] -packages = find: -zip_safe = False # https://mypy.readthedocs.io/en/latest/installed_packages.html -include_package_data = True -python_requires = >=3.9 -install_requires = - numpy >= 1.21 # recommended to use >= 1.22 for full quantile method support - pandas >= 1.4 - packaging >= 21.3 - -[options.extras_require] -io = - netCDF4 - h5netcdf - scipy - pydap; python_version<"3.10" # see https://github.com/pydap/pydap/issues/268 - zarr - fsspec - cftime - pooch - ## Scitools packages & dependencies (e.g: cartopy, cf-units) can be hard to install - # scitools-iris - -accel = - scipy - bottleneck - numbagg - flox - -parallel = - dask[complete] - -viz = - matplotlib - seaborn - nc-time-axis - ## Cartopy requires 3rd party libraries and only provides source distributions - ## See: https://github.com/SciTools/cartopy/issues/805 - # cartopy - -complete = - %(io)s - %(accel)s - %(parallel)s - %(viz)s - -docs = - %(complete)s - sphinx-autosummary-accessors - sphinx_rtd_theme - ipython - ipykernel - jupyter-client - nbsphinx - scanpydoc - -[options.package_data] -xarray = - py.typed - tests/data/* - static/css/* - static/html/* - -[options.entry_points] -xarray.chunkmanagers = - dask = xarray.core.daskmanager:DaskManager - -[tool:pytest] -python_files = test_*.py -testpaths = xarray/tests properties -# Fixed upstream in https://github.com/pydata/bottleneck/pull/199 -filterwarnings = - ignore:Using a non-tuple sequence for multidimensional indexing is deprecated:FutureWarning -markers = - flaky: flaky tests - network: tests requiring a network connection - slow: slow tests - -[aliases] -test = pytest - -[pytest-watch] -nobeep = True diff --git a/xarray/__init__.py b/xarray/__init__.py index 830bc254a71..b63b0d81470 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -52,7 +52,7 @@ except Exception: # Local copy or not installed with setuptools. # Disable minimum version checks on downstream libraries. - __version__ = "999" + __version__ = "9999" # A hardcoded __all__ variable is necessary to appease # `mypy --strict` running in projects that import xarray. From c537238ae9ac905b8fe9a4630d03a329cab3a1df Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 14 Sep 2023 13:54:51 -0700 Subject: [PATCH 09/10] Fix comment alignment in `pyproject.toml` (#8185) I wasn't sufficiently careful in #8181 --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b075382985e..96d5361552f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -139,9 +139,9 @@ ignore = [ select = [ "F", # Pyflakes "E", # Pycodestyle - "W", # isort - "I", # Pyupgrade - "UP", + "W", + "I", # isort + "UP", # Pyupgrade ] [tool.ruff.isort] From e00e31b9ea7fcd562f75f6c657d02c4f81867095 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Fri, 15 Sep 2023 15:26:24 +0200 Subject: [PATCH 10/10] fix the failing docs (#8188) * don't try to build with `pandas=2.1.0` one of the docstrings we inherit from `pandas` is malformatted in that version * reorder the project metadata sections * source the exhaustive list of optional deps from `pyproject.toml` * link to the new location of the colormap tutorial in matplotlib * also convert the narrative documentation to `pyproject.toml` --- ci/requirements/doc.yml | 2 +- doc/getting-started-guide/installing.rst | 12 +++++------ pyproject.toml | 26 ++++++++++++------------ xarray/plot/dataarray_plot.py | 4 ++-- xarray/plot/dataset_plot.py | 2 +- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index fe1fe91bb51..bc35d0db894 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -21,7 +21,7 @@ dependencies: - numba - numpy>=1.21 - packaging>=21.3 - - pandas>=1.4 + - pandas>=1.4,!=2.1.0 - pooch - pip - pre-commit diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst index ff8650bc0ff..e8c498b6664 100644 --- a/doc/getting-started-guide/installing.rst +++ b/doc/getting-started-guide/installing.rst @@ -135,13 +135,13 @@ We also maintain other dependency sets for different subsets of functionality:: The above commands should install most of the `optional dependencies`_. However, some packages which are either not listed on PyPI or require extra installation steps are excluded. To know which dependencies would be -installed, take a look at the ``[options.extras_require]`` section in -``setup.cfg``: +installed, take a look at the ``[project.optional-dependencies]`` section in +``pyproject.toml``: -.. literalinclude:: ../../setup.cfg - :language: ini - :start-at: [options.extras_require] - :end-before: [options.package_data] +.. literalinclude:: ../../pyproject.toml + :language: toml + :start-at: [project.optional-dependencies] + :end-before: [build-system] Development versions -------------------- diff --git a/pyproject.toml b/pyproject.toml index 96d5361552f..c820c933def 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,19 +27,6 @@ dependencies = [ "pandas>=1.4", ] -[project.optional-dependencies] -accel = ["scipy", "bottleneck", "numbagg", "flox"] -complete = ["xarray[accel,io,parallel,viz]"] -io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr", "fsspec", "cftime", "pooch"] -parallel = ["dask[complete]"] -viz = ["matplotlib", "seaborn", "nc-time-axis"] - -[tool.setuptools] -packages = ["xarray"] - -[project.entry-points."xarray.chunkmanagers"] -dask = "xarray.core.daskmanager:DaskManager" - [project.urls] Documentation = "https://docs.xarray.dev" SciPy2015-talk = "https://www.youtube.com/watch?v=X0pAhJgySxk" @@ -47,6 +34,16 @@ homepage = "https://xarray.dev/" issue-tracker = "https://github.com/pydata/xarray/issues" source-code = "https://github.com/pydata/xarray" +[project.entry-points."xarray.chunkmanagers"] +dask = "xarray.core.daskmanager:DaskManager" + +[project.optional-dependencies] +accel = ["scipy", "bottleneck", "numbagg", "flox"] +complete = ["xarray[accel,io,parallel,viz]"] +io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr", "fsspec", "cftime", "pooch"] +parallel = ["dask[complete]"] +viz = ["matplotlib", "seaborn", "nc-time-axis"] + [build-system] build-backend = "setuptools.build_meta" requires = [ @@ -54,6 +51,9 @@ requires = [ "setuptools-scm>=7", ] +[tool.setuptools] +packages = ["xarray"] + [tool.setuptools_scm] fallback_version = "9999" diff --git a/xarray/plot/dataarray_plot.py b/xarray/plot/dataarray_plot.py index 4df1938f89f..1e9324791da 100644 --- a/xarray/plot/dataarray_plot.py +++ b/xarray/plot/dataarray_plot.py @@ -789,7 +789,7 @@ def _plot1d(plotfunc): be either ``'viridis'`` (if the function infers a sequential dataset) or ``'RdBu_r'`` (if the function infers a diverging dataset). - See :doc:`Choosing Colormaps in Matplotlib ` + See :doc:`Choosing Colormaps in Matplotlib ` for more information. If *seaborn* is installed, ``cmap`` may also be a @@ -1325,7 +1325,7 @@ def _plot2d(plotfunc): The mapping from data values to color space. If not provided, this will be either be ``'viridis'`` (if the function infers a sequential dataset) or ``'RdBu_r'`` (if the function infers a diverging dataset). - See :doc:`Choosing Colormaps in Matplotlib ` + See :doc:`Choosing Colormaps in Matplotlib ` for more information. If *seaborn* is installed, ``cmap`` may also be a diff --git a/xarray/plot/dataset_plot.py b/xarray/plot/dataset_plot.py index 4f703045f17..edb32b73d98 100644 --- a/xarray/plot/dataset_plot.py +++ b/xarray/plot/dataset_plot.py @@ -103,7 +103,7 @@ def _dsplot(plotfunc): be either ``'viridis'`` (if the function infers a sequential dataset) or ``'RdBu_r'`` (if the function infers a diverging dataset). - See :doc:`Choosing Colormaps in Matplotlib ` + See :doc:`Choosing Colormaps in Matplotlib ` for more information. If *seaborn* is installed, ``cmap`` may also be a