From deb2082ab6e648b7e87cd26a74d084262bd1cfdf Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sat, 22 Jun 2024 11:03:37 -0700 Subject: [PATCH 01/13] Improve zarr chunks docs (#9140) * Improve zarr chunks docs Makes them more structure, consistent. I think removes a mistake re the default chunks arg in `open_zarr` (it's not `None`, it's `auto`). Adds a comment re performance with `chunks=None`, closing https://github.com/pydata/xarray/issues/9111 --- doc/whats-new.rst | 2 ++ xarray/backends/api.py | 43 +++++++++++++++++++++++++---------------- xarray/backends/zarr.py | 18 +++++++++++------ 3 files changed, 40 insertions(+), 23 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e7a48458ae2..51a2c98fb9c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -40,6 +40,8 @@ Bug fixes Documentation ~~~~~~~~~~~~~ +- Improvements to Zarr & chunking docs (:pull:`9139`, :pull:`9140`, :pull:`9132`) + By `Maximilian Roos `_ Internal Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index ea3639db5c4..7054c62126e 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -425,15 +425,19 @@ def open_dataset( is chosen based on available dependencies, with a preference for "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``) can also be used. - chunks : int, dict, 'auto' or None, optional - If chunks is provided, it is used to load the new dataset into dask - arrays. ``chunks=-1`` loads the dataset with dask using a single - chunk for all arrays. ``chunks={}`` loads the dataset with dask using - engine preferred chunks if exposed by the backend, otherwise with - a single chunk for all arrays. In order to reproduce the default behavior - of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``. - ``chunks='auto'`` will use dask ``auto`` chunking taking into account the - engine preferred chunks. See dask chunking for more details. + chunks : int, dict, 'auto' or None, default: None + If provided, used to load the data into dask arrays. + + - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the + engine preferred chunks. + - ``chunks=None`` skips using dask, which is generally faster for + small arrays. + - ``chunks=-1`` loads the data with dask using a single chunk for all arrays. + - ``chunks={}`` loads the data with dask using the engine's preferred chunk + size, generally identical to the format's chunk size. If not available, a + single chunk for all arrays. + + See dask chunking for more details. cache : bool, optional If True, cache data loaded from the underlying datastore in memory as NumPy arrays when accessed to avoid reading from the underlying data- @@ -631,14 +635,19 @@ def open_dataarray( Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, with a preference for "netcdf4". - chunks : int, dict, 'auto' or None, optional - If chunks is provided, it is used to load the new dataset into dask - arrays. ``chunks=-1`` loads the dataset with dask using a single - chunk for all arrays. `chunks={}`` loads the dataset with dask using - engine preferred chunks if exposed by the backend, otherwise with - a single chunk for all arrays. - ``chunks='auto'`` will use dask ``auto`` chunking taking into account the - engine preferred chunks. See dask chunking for more details. + chunks : int, dict, 'auto' or None, default: None + If provided, used to load the data into dask arrays. + + - ``chunks='auto'`` will use dask ``auto`` chunking taking into account the + engine preferred chunks. + - ``chunks=None`` skips using dask, which is generally faster for + small arrays. + - ``chunks=-1`` loads the data with dask using a single chunk for all arrays. + - ``chunks={}`` loads the data with dask using engine preferred chunks if + exposed by the backend, otherwise with a single chunk for all arrays. + + See dask chunking for more details. + cache : bool, optional If True, cache data loaded from the underlying datastore in memory as NumPy arrays when accessed to avoid reading from the underlying data- diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 5f6aa0f119c..9796fcbf9e2 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -973,12 +973,18 @@ def open_zarr( Array synchronizer provided to zarr group : str, optional Group path. (a.k.a. `path` in zarr terminology.) - chunks : int or dict or tuple or {None, 'auto'}, optional - Chunk sizes along each dimension, e.g., ``5`` or - ``{'x': 5, 'y': 5}``. If `chunks='auto'`, dask chunks are created - based on the variable's zarr chunks. If `chunks=None`, zarr array - data will lazily convert to numpy arrays upon access. This accepts - all the chunk specifications as Dask does. + chunks : int, dict, 'auto' or None, default: 'auto' + If provided, used to load the data into dask arrays. + + - ``chunks='auto'`` will use dask ``auto`` chunking taking into account the + engine preferred chunks. + - ``chunks=None`` skips using dask, which is generally faster for + small arrays. + - ``chunks=-1`` loads the data with dask using a single chunk for all arrays. + - ``chunks={}`` loads the data with dask using engine preferred chunks if + exposed by the backend, otherwise with a single chunk for all arrays. + + See dask chunking for more details. overwrite_encoded_chunks : bool, optional Whether to drop the zarr chunks encoded for each variable when a dataset is loaded with specified chunk sizes (default: False) From fe4fb061499f77681dd330cffb116c24388fe3d9 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sun, 23 Jun 2024 20:39:25 -0700 Subject: [PATCH 02/13] Include numbagg in type checks (#9159) * Include numbagg in type checks --- pyproject.toml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index db64d7a18c5..2081f7f87bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -118,7 +118,6 @@ module = [ "matplotlib.*", "mpl_toolkits.*", "nc_time_axis.*", - "numbagg.*", "netCDF4.*", "netcdftime.*", "opt_einsum.*", @@ -329,8 +328,7 @@ filterwarnings = [ "default:the `pandas.MultiIndex` object:FutureWarning:xarray.tests.test_variable", "default:Using a non-tuple sequence for multidimensional indexing is deprecated:FutureWarning", "default:Duplicate dimension names present:UserWarning:xarray.namedarray.core", - "default:::xarray.tests.test_strategies", - # TODO: remove once we know how to deal with a changed signature in protocols + "default:::xarray.tests.test_strategies", # TODO: remove once we know how to deal with a changed signature in protocols "ignore:__array__ implementation doesn't accept a copy keyword, so passing copy=False failed.", ] From c8ff731aa83b5b555b1c75bf72120e9f1ca043d9 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sun, 23 Jun 2024 20:41:41 -0700 Subject: [PATCH 03/13] Remove mypy exclusions for a couple more libraries (#9160) * Remove mypy exclusions for a couple more libraries Also (unrelated) allow mypy passing without `array_api_strict` installed, which isn't in our dev dependencies... --- pyproject.toml | 2 -- xarray/tests/test_dtypes.py | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2081f7f87bc..1815fa6dd5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,7 +110,6 @@ module = [ "cloudpickle.*", "cubed.*", "cupy.*", - "dask.types.*", "fsspec.*", "h5netcdf.*", "h5py.*", @@ -126,7 +125,6 @@ module = [ "pooch.*", "pyarrow.*", "pydap.*", - "pytest.*", "scipy.*", "seaborn.*", "setuptools", diff --git a/xarray/tests/test_dtypes.py b/xarray/tests/test_dtypes.py index e817bfdb330..498ba2ce59f 100644 --- a/xarray/tests/test_dtypes.py +++ b/xarray/tests/test_dtypes.py @@ -11,9 +11,9 @@ except ImportError: class DummyArrayAPINamespace: - bool = None - int32 = None - float64 = None + bool = None # type: ignore[unused-ignore,var-annotated] + int32 = None # type: ignore[unused-ignore,var-annotated] + float64 = None # type: ignore[unused-ignore,var-annotated] array_api_strict = DummyArrayAPINamespace From 872c1c576dc4bc1724e1c526ddc45cb420394ce3 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sun, 23 Jun 2024 21:48:59 -0700 Subject: [PATCH 04/13] Add test for #9155 (#9161) * Add test for #9155 I can't get this to fail locally, so adding a test to assess what's going on. Alos excludes matplotlib from type exclusions * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- pyproject.toml | 1 - xarray/tests/test_plot.py | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1815fa6dd5d..2ada0c1c171 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,7 +114,6 @@ module = [ "h5netcdf.*", "h5py.*", "iris.*", - "matplotlib.*", "mpl_toolkits.*", "nc_time_axis.*", "netCDF4.*", diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index a44b621a981..b302ad3af93 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -3406,3 +3406,13 @@ def test_plot1d_filtered_nulls() -> None: actual = pc.get_offsets().shape[0] assert expected == actual + + +@requires_matplotlib +def test_9155() -> None: + # A test for types from issue #9155 + + with figure_context(): + data = xr.DataArray([1, 2, 3], dims=["x"]) + fig, ax = plt.subplots(ncols=1, nrows=1) + data.plot(ax=ax) From 56209bd9a3192e4f1e82c21e5ffcf4c3bacaaae3 Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Mon, 24 Jun 2024 11:31:30 -0400 Subject: [PATCH 05/13] Docs: Add page with figure for navigating help resources (#9147) * add config to build mermaid diagrams in docs --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian --- ci/requirements/doc.yml | 1 + doc/conf.py | 5 +++ doc/help-diagram.rst | 75 +++++++++++++++++++++++++++++++++++++++++ doc/index.rst | 4 ++- doc/whats-new.rst | 3 ++ 5 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 doc/help-diagram.rst diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 066d085ec53..39c2d4d6e88 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -42,5 +42,6 @@ dependencies: - sphinxext-rediraffe - zarr>=2.10 - pip: + - sphinxcontrib-mermaid # relative to this file. Needs to be editable to be accepted. - -e ../.. diff --git a/doc/conf.py b/doc/conf.py index 80b24445f71..91bcdf8b8f8 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -59,6 +59,7 @@ ) nbsphinx_allow_errors = False +nbsphinx_requirejs_path = "" # -- General configuration ------------------------------------------------ @@ -68,7 +69,9 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. + extensions = [ + "sphinxcontrib.mermaid", "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.intersphinx", @@ -175,6 +178,8 @@ "pd.NaT": "~pandas.NaT", } +# mermaid config +mermaid_version = "10.9.1" # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates", sphinx_autosummary_accessors.templates_path] diff --git a/doc/help-diagram.rst b/doc/help-diagram.rst new file mode 100644 index 00000000000..a42a2f0936a --- /dev/null +++ b/doc/help-diagram.rst @@ -0,0 +1,75 @@ +Getting Help +============ + +Navigating the wealth of resources available for Xarray can be overwhelming. +We've created this flow chart to help guide you towards the best way to get help, depending on what you're working towards. +The links to each resource are provided below the diagram. +Regardless of how you interact with us, we're always thrilled to hear from you! + +.. mermaid:: + :alt: Flowchart illustrating the different ways to access help using or contributing to Xarray. + + flowchart TD + intro[Welcome to Xarray! How can we help?]:::quesNodefmt + usage(["fa:fa-chalkboard-user Xarray Tutorials + fab:fa-readme Xarray Docs + fab:fa-google Google/fab:fa-stack-overflow Stack Exchange + fa:fa-robot Ask AI/a Language Learning Model (LLM)"]):::ansNodefmt + API([fab:fa-readme Xarray Docs + fab:fa-readme extension's docs]):::ansNodefmt + help([fab:fa-github Xarray Discussions + fab:fa-discord Xarray Discord + fa:fa-users Xarray Office Hours + fa:fa-globe Pangeo Discourse]):::ansNodefmt + bug([Report and Propose here: + fab:fa-github Xarray Issues]):::ansNodefmt + contrib([fa:fa-book-open Xarray Contributor's Guide]):::ansNodefmt + pr(["fab:fa-github Pull Request (PR)"]):::ansNodefmt + dev([fab:fa-github Comment on your PR + fa:fa-users Developer's Meeting]):::ansNodefmt + report[Thanks for letting us know!]:::quesNodefmt + merged[fa:fa-hands-clapping Your PR was merged. + Thanks for contributing to Xarray!]:::quesNodefmt + + + intro -->|How do I use Xarray?| usage + usage -->|"with extensions (like Dask)"| API + + usage -->|I'd like some more help| help + intro -->|I found a bug| bug + intro -->|I'd like to make a small change| contrib + subgraph bugcontrib[Bugs and Contributions] + bug + contrib + bug -->|I just wanted to tell you| report + bug<-->|I'd like to fix the bug!| contrib + pr -->|my PR was approved| merged + end + + + intro -->|I wish Xarray could...| bug + + + pr <-->|my PR is quiet| dev + contrib -->pr + + classDef quesNodefmt fill:#9DEEF4,stroke:#206C89 + + classDef ansNodefmt fill:#FFAA05,stroke:#E37F17 + + classDef boxfmt fill:#FFF5ED,stroke:#E37F17 + class bugcontrib boxfmt + + linkStyle default font-size:20pt,color:#206C89 + + +- `Xarray Tutorials `__ +- `Xarray Docs `__ +- `Google/Stack Exchange `__ +- `Xarray Discussions `__ +- `Xarray Discord `__ +- `Xarray Office Hours `__ +- `Pangeo Discourse `__ +- `Xarray Issues `__ +- `Xarray Contributors Guide `__ +- `Developer's Meeting `__ diff --git a/doc/index.rst b/doc/index.rst index 138e9d91601..4a5fe4ee080 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -14,7 +14,8 @@ efficient, and fun! `Releases `__ | `Stack Overflow `__ | `Mailing List `__ | -`Blog `__ +`Blog `__ | +`Tutorials `__ .. grid:: 1 1 2 2 @@ -65,6 +66,7 @@ efficient, and fun! Tutorials & Videos API Reference How do I ... + Getting Help Ecosystem .. toctree:: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 51a2c98fb9c..c3383a5648a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -40,9 +40,12 @@ Bug fixes Documentation ~~~~~~~~~~~~~ +- Adds a flow-chart diagram to help users navigate help resources (`Discussion #8990 `_). + By `Jessica Scheick `_. - Improvements to Zarr & chunking docs (:pull:`9139`, :pull:`9140`, :pull:`9132`) By `Maximilian Roos `_ + Internal Changes ~~~~~~~~~~~~~~~~ From b5180749d351f8b85fd39677bf137caaa90288a7 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Tue, 25 Jun 2024 15:18:53 +0200 Subject: [PATCH 06/13] switch to unit `"D"` (#9170) --- xarray/tests/test_missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 3adcc132b61..da9513a7c71 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -84,7 +84,7 @@ def make_interpolate_example_data(shape, frac_nan, seed=12345, non_uniform=False if non_uniform: # construct a datetime index that has irregular spacing - deltas = pd.to_timedelta(rs.normal(size=shape[0], scale=10), unit="d") + deltas = pd.to_timedelta(rs.normal(size=shape[0], scale=10), unit="D") coords = {"time": (pd.Timestamp("2000-01-01") + deltas).sort_values()} else: coords = {"time": pd.date_range("2000-01-01", freq="D", periods=shape[0])} From 07b175633eba30dbfcd6eb0cf514ef1b1da9cf64 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Wed, 26 Jun 2024 11:05:23 -0700 Subject: [PATCH 07/13] Slightly improve DataTree repr (#9064) * Improve DataTree repr * Adjust DataTree repr to include full path * More tweaks * Use "Group:" in repr instead of "DataTree:" * Fix errors in new repr tests * Fix repr on windows --- xarray/core/datatree.py | 11 ++++--- xarray/core/datatree_render.py | 11 ++++--- xarray/core/formatting.py | 15 +++------ xarray/core/iterators.py | 19 +++++------ xarray/tests/test_datatree.py | 57 +++++++++++++++++++++++++++++++++ xarray/tests/test_formatting.py | 18 ++++++----- 6 files changed, 94 insertions(+), 37 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 4e4d30885a3..c923ca2eb87 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -1314,11 +1314,12 @@ def match(self, pattern: str) -> DataTree: ... } ... ) >>> dt.match("*/B") - DataTree('None', parent=None) - ├── DataTree('a') - │ └── DataTree('B') - └── DataTree('b') - └── DataTree('B') + + Group: / + ├── Group: /a + │ └── Group: /a/B + └── Group: /b + └── Group: /b/B """ matching_nodes = { node.path: node.ds diff --git a/xarray/core/datatree_render.py b/xarray/core/datatree_render.py index d069071495e..f10f2540952 100644 --- a/xarray/core/datatree_render.py +++ b/xarray/core/datatree_render.py @@ -57,11 +57,12 @@ def __init__(self): >>> s0a = DataTree(name="sub0A", parent=s0) >>> s1 = DataTree(name="sub1", parent=root) >>> print(RenderDataTree(root)) - DataTree('root', parent=None) - ├── DataTree('sub0') - │ ├── DataTree('sub0B') - │ └── DataTree('sub0A') - └── DataTree('sub1') + + Group: / + ├── Group: /sub0 + │ ├── Group: /sub0/sub0B + │ └── Group: /sub0/sub0A + └── Group: /sub1 """ super().__init__("\u2502 ", "\u251c\u2500\u2500 ", "\u2514\u2500\u2500 ") diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index ad65a44d7d5..c15df34b5b1 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -1023,20 +1023,21 @@ def diff_datatree_repr(a: DataTree, b: DataTree, compat): def _single_node_repr(node: DataTree) -> str: """Information about this node, not including its relationships to other nodes.""" - node_info = f"DataTree('{node.name}')" - if node.has_data or node.has_attrs: ds_info = "\n" + repr(node.ds) else: ds_info = "" - return node_info + ds_info + return f"Group: {node.path}{ds_info}" def datatree_repr(dt: DataTree): """A printable representation of the structure of this entire tree.""" renderer = RenderDataTree(dt) - lines = [] + name_info = "" if dt.name is None else f" {dt.name!r}" + header = f"" + + lines = [header] for pre, fill, node in renderer: node_repr = _single_node_repr(node) @@ -1051,12 +1052,6 @@ def datatree_repr(dt: DataTree): else: lines.append(f"{fill}{' ' * len(renderer.style.vertical)}{line}") - # Tack on info about whether or not root node has a parent at the start - first_line = lines[0] - parent = f'"{dt.parent.name}"' if dt.parent is not None else "None" - first_line_with_parent = first_line[:-1] + f", parent={parent})" - lines[0] = first_line_with_parent - return "\n".join(lines) diff --git a/xarray/core/iterators.py b/xarray/core/iterators.py index dd5fa7ee97a..ae748b0066c 100644 --- a/xarray/core/iterators.py +++ b/xarray/core/iterators.py @@ -39,15 +39,16 @@ class LevelOrderIter(Iterator): >>> i = DataTree(name="i", parent=g) >>> h = DataTree(name="h", parent=i) >>> print(f) - DataTree('f', parent=None) - ├── DataTree('b') - │ ├── DataTree('a') - │ └── DataTree('d') - │ ├── DataTree('c') - │ └── DataTree('e') - └── DataTree('g') - └── DataTree('i') - └── DataTree('h') + + Group: / + ├── Group: /b + │ ├── Group: /b/a + │ └── Group: /b/d + │ ├── Group: /b/d/c + │ └── Group: /b/d/e + └── Group: /g + └── Group: /g/i + └── Group: /g/i/h >>> [node.name for node in LevelOrderIter(f)] ['f', 'b', 'g', 'a', 'd', 'i', 'c', 'e', 'h'] >>> [node.name for node in LevelOrderIter(f, maxlevel=3)] diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 58fec20d4c6..b0dc2accd3e 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -623,6 +623,63 @@ def test_operation_with_attrs_but_no_data(self): dt.sel(dim_0=0) +class TestRepr: + def test_repr(self): + dt: DataTree = DataTree.from_dict( + { + "/": xr.Dataset( + {"e": (("x",), [1.0, 2.0])}, + coords={"x": [2.0, 3.0]}, + ), + "/b": xr.Dataset({"f": (("y",), [3.0])}), + "/b/c": xr.Dataset(), + "/b/d": xr.Dataset({"g": 4.0}), + } + ) + + result = repr(dt) + expected = dedent( + """ + + Group: / + │ Dimensions: (x: 2) + │ Coordinates: + │ * x (x) float64 16B 2.0 3.0 + │ Data variables: + │ e (x) float64 16B 1.0 2.0 + └── Group: /b + │ Dimensions: (y: 1) + │ Dimensions without coordinates: y + │ Data variables: + │ f (y) float64 8B 3.0 + ├── Group: /b/c + └── Group: /b/d + Dimensions: () + Data variables: + g float64 8B 4.0 + """ + ).strip() + assert result == expected + + result = repr(dt.b) + expected = dedent( + """ + + Group: /b + │ Dimensions: (y: 1) + │ Dimensions without coordinates: y + │ Data variables: + │ f (y) float64 8B 3.0 + ├── Group: /b/c + └── Group: /b/d + Dimensions: () + Data variables: + g float64 8B 4.0 + """ + ).strip() + assert result == expected + + class TestRestructuring: def test_drop_nodes(self): sue = DataTree.from_dict({"Mary": None, "Kate": None, "Ashley": None}) diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index b9d5f401a4a..d7a46eeaefc 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -555,16 +555,17 @@ def test_array_scalar_format(self) -> None: def test_datatree_print_empty_node(self): dt: DataTree = DataTree(name="root") - printout = dt.__str__() - assert printout == "DataTree('root', parent=None)" + printout = str(dt) + assert printout == "\nGroup: /" def test_datatree_print_empty_node_with_attrs(self): dat = xr.Dataset(attrs={"note": "has attrs"}) dt: DataTree = DataTree(name="root", data=dat) - printout = dt.__str__() + printout = str(dt) assert printout == dedent( """\ - DataTree('root', parent=None) + + Group: / Dimensions: () Data variables: *empty* @@ -575,9 +576,10 @@ def test_datatree_print_empty_node_with_attrs(self): def test_datatree_print_node_with_data(self): dat = xr.Dataset({"a": [0, 2]}) dt: DataTree = DataTree(name="root", data=dat) - printout = dt.__str__() + printout = str(dt) expected = [ - "DataTree('root', parent=None)", + "", + "Group: /", "Dimensions", "Coordinates", "a", @@ -591,8 +593,8 @@ def test_datatree_printout_nested_node(self): dat = xr.Dataset({"a": [0, 2]}) root: DataTree = DataTree(name="root") DataTree(name="results", data=dat, parent=root) - printout = root.__str__() - assert printout.splitlines()[2].startswith(" ") + printout = str(root) + assert printout.splitlines()[3].startswith(" ") def test_datatree_repr_of_node_with_data(self): dat = xr.Dataset({"a": [0, 2]}) From 19d0fbfcbd3bd74f5846569a78ded68810446c48 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Wed, 26 Jun 2024 13:14:25 -0500 Subject: [PATCH 08/13] Fix example code formatting for CachingFileManager (#9178) --- xarray/backends/file_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/backends/file_manager.py b/xarray/backends/file_manager.py index df901f9a1d9..86d84f532b1 100644 --- a/xarray/backends/file_manager.py +++ b/xarray/backends/file_manager.py @@ -63,7 +63,7 @@ class CachingFileManager(FileManager): FileManager.close(), which ensures that closed files are removed from the cache as well. - Example usage: + Example usage:: manager = FileManager(open, 'example.txt', mode='w') f = manager.acquire() @@ -71,7 +71,7 @@ class CachingFileManager(FileManager): manager.close() # ensures file is closed Note that as long as previous files are still cached, acquiring a file - multiple times from the same FileManager is essentially free: + multiple times from the same FileManager is essentially free:: f1 = manager.acquire() f2 = manager.acquire() From 651bd12749e56b0b2f992c8cae51dae0ece29c65 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 26 Jun 2024 20:16:09 +0200 Subject: [PATCH 09/13] Change np.core.defchararray to np.char (#9165) (#9166) * Change np.core.defchararray to np.char.chararray (#9165) Replace a reference to np.core.defchararray with np.char.chararray in xarray.testing.assertions, since the former no longer works on NumPy 2.0.0 and the latter is the "preferred alias" according to NumPy docs. See Issue #9165. * Add test for assert_allclose on dtype S (#9165) * Use np.char.decode, not np.char.chararray.decode ... in assertions._decode_string_data. See #9166. * List #9165 fix in whats-new.rst * cross-like the fixed function * Improve a parameter ID in tests.test_assertions Co-authored-by: Justus Magin * whats-new normalization --------- Co-authored-by: Justus Magin --- doc/whats-new.rst | 2 ++ xarray/testing/assertions.py | 2 +- xarray/tests/test_assertions.py | 5 +++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c3383a5648a..97631b4c324 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,6 +35,8 @@ Deprecations Bug fixes ~~~~~~~~~ +- Make :py:func:`testing.assert_allclose` work with numpy 2.0 (:issue:`9165`, :pull:`9166`). + By `Pontus Lurcock `_. Documentation diff --git a/xarray/testing/assertions.py b/xarray/testing/assertions.py index 69885868f83..2a4c17e115a 100644 --- a/xarray/testing/assertions.py +++ b/xarray/testing/assertions.py @@ -36,7 +36,7 @@ def wrapper(*args, **kwargs): def _decode_string_data(data): if data.dtype.kind == "S": - return np.core.defchararray.decode(data, "utf-8", "replace") + return np.char.decode(data, "utf-8", "replace") return data diff --git a/xarray/tests/test_assertions.py b/xarray/tests/test_assertions.py index aa0ea46f7db..20b5e163662 100644 --- a/xarray/tests/test_assertions.py +++ b/xarray/tests/test_assertions.py @@ -52,6 +52,11 @@ def test_allclose_regression() -> None: xr.Dataset({"a": ("x", [0, 2]), "b": ("y", [0, 1])}), id="Dataset", ), + pytest.param( + xr.DataArray(np.array("a", dtype="|S1")), + xr.DataArray(np.array("b", dtype="|S1")), + id="DataArray_with_character_dtype", + ), ), ) def test_assert_allclose(obj1, obj2) -> None: From fa41cc0454e6daf47d1417f97a9e72ebb56e3add Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 27 Jun 2024 12:23:55 +0200 Subject: [PATCH 10/13] temporarily pin `numpy<2` (#9181) --- ci/requirements/doc.yml | 2 +- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 39c2d4d6e88..116eee7f702 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -21,7 +21,7 @@ dependencies: - nbsphinx - netcdf4>=1.5 - numba - - numpy>=1.21 + - numpy>=1.21,<2 - packaging>=21.3 - pandas>=1.4,!=2.1.0 - pooch diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 3b2e6dc62e6..4cdddc676eb 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -23,7 +23,7 @@ dependencies: - netcdf4 - numba - numbagg - - numpy + - numpy<2 - packaging - pandas # - pint>=0.22 diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 01521e950f4..f1a10bc040b 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -26,7 +26,7 @@ dependencies: - numba - numbagg - numexpr - - numpy + - numpy<2 - opt_einsum - packaging - pandas From 48a4f7ac6cf20a8b6d0247c701647c67251ded78 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 27 Jun 2024 14:28:48 +0200 Subject: [PATCH 11/13] temporarily remove `pydap` from CI (#9183) (the issue is that with `numpy>=2` `import pydap` succeeds, but `import pydap.lib` raises) --- ci/requirements/all-but-dask.yml | 2 +- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml index 2f47643cc87..119db282ad9 100644 --- a/ci/requirements/all-but-dask.yml +++ b/ci/requirements/all-but-dask.yml @@ -27,7 +27,7 @@ dependencies: - pandas - pint>=0.22 - pip - - pydap + # - pydap - pytest - pytest-cov - pytest-env diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 4cdddc676eb..2eedc9b0621 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -29,7 +29,7 @@ dependencies: # - pint>=0.22 - pip - pre-commit - - pydap + # - pydap - pytest - pytest-cov - pytest-env diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index f1a10bc040b..317e1fe5f41 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -35,7 +35,7 @@ dependencies: - pooch - pre-commit - pyarrow # pandas raises a deprecation warning without this, breaking doctests - - pydap + # - pydap - pytest - pytest-cov - pytest-env From f4183ec043de97273efdfdd4a33df2c3dc08ddff Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 27 Jun 2024 19:04:16 +0200 Subject: [PATCH 12/13] also pin `numpy` in the all-but-dask CI (#9184) --- ci/requirements/all-but-dask.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml index 119db282ad9..abf6a88690a 100644 --- a/ci/requirements/all-but-dask.yml +++ b/ci/requirements/all-but-dask.yml @@ -22,7 +22,7 @@ dependencies: - netcdf4 - numba - numbagg - - numpy + - numpy<2 - packaging - pandas - pint>=0.22 From 42ed6d30e81dce5b9922ac82f76c5b3cd748b19e Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Fri, 28 Jun 2024 10:18:55 +0200 Subject: [PATCH 13/13] promote floating-point numeric datetimes to 64-bit before decoding (#9182) * promote floating-point dates to 64-bit while decoding * add a test to make sure we don't regress * whats-new entry --- doc/whats-new.rst | 2 ++ xarray/coding/times.py | 2 ++ xarray/tests/test_coding_times.py | 16 ++++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 97631b4c324..c58f73cb1fa 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -37,6 +37,8 @@ Bug fixes ~~~~~~~~~ - Make :py:func:`testing.assert_allclose` work with numpy 2.0 (:issue:`9165`, :pull:`9166`). By `Pontus Lurcock `_. +- Promote floating-point numeric datetimes before decoding (:issue:`9179`, :pull:`9182`). + By `Justus Magin `_. Documentation diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 466e847e003..34d4f9a23ad 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -278,6 +278,8 @@ def _decode_datetime_with_pandas( # timedelta64 value, and therefore would raise an error in the lines above. if flat_num_dates.dtype.kind in "iu": flat_num_dates = flat_num_dates.astype(np.int64) + elif flat_num_dates.dtype.kind in "f": + flat_num_dates = flat_num_dates.astype(np.float64) # Cast input ordinals to integers of nanoseconds because pd.to_timedelta # works much faster when dealing with integers (GH 1399). diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 09221d66066..393f8400c46 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1182,6 +1182,22 @@ def test_decode_0size_datetime(use_cftime): np.testing.assert_equal(expected, actual) +def test_decode_float_datetime(): + num_dates = np.array([1867128, 1867134, 1867140], dtype="float32") + units = "hours since 1800-01-01" + calendar = "standard" + + expected = np.array( + ["2013-01-01T00:00:00", "2013-01-01T06:00:00", "2013-01-01T12:00:00"], + dtype="datetime64[ns]", + ) + + actual = decode_cf_datetime( + num_dates, units=units, calendar=calendar, use_cftime=False + ) + np.testing.assert_equal(actual, expected) + + @requires_cftime def test_scalar_unit() -> None: # test that a scalar units (often NaN when using to_netcdf) does not raise an error