From 97887fd9bbfb2be58b491155c6bb08498ce294ca Mon Sep 17 00:00:00 2001 From: Joseph K Aicher <4666753+jaicher@users.noreply.github.com> Date: Sat, 9 Oct 2021 20:02:41 -0400 Subject: [PATCH] Fix saving chunked datasets with zero length dimensions (#5742) * Added test_save_emptydim for zarr backends, which fails when chunking Added test that fails when saving to zarr a dataset with a chunked array that has a dimension of length zero (Issue #5741) * Load all variables with zero entries before saving to_zarr This addresses Issue #5741 and allows `test_save_emptydim` to pass. We get around `to_zarr` not liking dask arrays with zero length dimensions by giving it numpy arrays, which works for some reason * Updated whats-new.rst with information about fix for #5741 Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- doc/whats-new.rst | 3 +++ xarray/backends/api.py | 5 +++++ xarray/tests/test_backends.py | 10 ++++++++++ 3 files changed, 18 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9c2554c5e75..87c85d45454 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -68,6 +68,9 @@ Deprecations Bug fixes ~~~~~~~~~ + +- Fix ZeroDivisionError from saving dask array with empty dimension (:issue: `5741`). + By `Joseph K Aicher `_. - Fixed performance bug where ``cftime`` import attempted within various core operations if ``cftime`` not installed (:pull:`5640`). By `Luke Sewell `_ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index a101ce27ef5..b85b5015553 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1322,6 +1322,11 @@ def to_zarr( See `Dataset.to_zarr` for full API docs. """ + # Load empty arrays to avoid bug saving zero length dimensions (Issue #5741) + for v in dataset.variables.values(): + if v.size == 0: + v.load() + # expand str and Path arguments store = _normalize_path(store) chunk_store = _normalize_path(chunk_store) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 0dcdc3cb28e..13ce6af316d 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2186,6 +2186,16 @@ def test_to_zarr_append_compute_false_roundtrip(self): with self.open(store) as actual: assert_identical(xr.concat([ds, ds_to_append], dim="time"), actual) + @pytest.mark.parametrize("chunk", [False, True]) + def test_save_emptydim(self, chunk): + if chunk and not has_dask: + pytest.skip("requires dask") + ds = Dataset({"x": (("a", "b"), np.empty((5, 0))), "y": ("a", [1, 2, 5, 8, 9])}) + if chunk: + ds = ds.chunk({}) # chunk dataset to save dask array + with self.roundtrip(ds) as ds_reload: + assert_identical(ds, ds_reload) + @pytest.mark.parametrize("consolidated", [False, True]) @pytest.mark.parametrize("compute", [False, True]) @pytest.mark.parametrize("use_dask", [False, True])