From 10f2aa10a981d2a9ad3f4741cec538b70c7d73d1 Mon Sep 17 00:00:00 2001 From: Metamess Date: Fri, 3 Nov 2023 00:20:07 +0100 Subject: [PATCH] Fix for Dataset.to_zarr with both `consolidated` and `write_empty_chunks` (#8326) * Fixed an issue where Dataset.to_zarr with both `consolidated` and `write_empty_chunks` failed with a ReadOnlyError --- doc/whats-new.rst | 5 +++++ xarray/backends/zarr.py | 17 ++++++++++++++++- xarray/tests/test_backends.py | 13 +++++++++---- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3d9d3f6f310..3a9be494db2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -142,6 +142,11 @@ Bug fixes (:issue:`8290`, :pull:`8297`). By `Oliver McCormack `_. +- Fix to_zarr ending in a ReadOnlyError when consolidated metadata was used and the + write_empty_chunks was provided. + (:issue:`8323`, :pull:`8326`) + By `Matthijs Amesz `_. + Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index d6ad15f4f87..2b41fa5224e 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -675,8 +675,23 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No # metadata. This would need some case work properly with region # and append_dim. if self._write_empty is not None: + # Write to zarr_group.chunk_store instead of zarr_group.store + # See https://github.com/pydata/xarray/pull/8326#discussion_r1365311316 for a longer explanation + # The open_consolidated() enforces a mode of r or r+ + # (and to_zarr with region provided enforces a read mode of r+), + # and this function makes sure the resulting Group has a store of type ConsolidatedMetadataStore + # and a 'normal Store subtype for chunk_store. + # The exact type depends on if a local path was used, or a URL of some sort, + # but the point is that it's not a read-only ConsolidatedMetadataStore. + # It is safe to write chunk data to the chunk_store because no metadata would be changed by + # to_zarr with the region parameter: + # - Because the write mode is enforced to be r+, no new variables can be added to the store + # (this is also checked and enforced in xarray.backends.api.py::to_zarr()). + # - Existing variables already have their attrs included in the consolidated metadata file. + # - The size of dimensions can not be expanded, that would require a call using `append_dim` + # which is mutually exclusive with `region` zarr_array = zarr.open( - store=self.zarr_group.store, + store=self.zarr_group.chunk_store, path=f"{self.zarr_group.name}/{name}", write_empty_chunks=self._write_empty, ) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 59e9f655b2e..73352c3f7e1 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2459,7 +2459,8 @@ def test_no_warning_from_open_emptydim_with_chunks(self) -> None: @pytest.mark.parametrize("consolidated", [False, True, None]) @pytest.mark.parametrize("compute", [False, True]) @pytest.mark.parametrize("use_dask", [False, True]) - def test_write_region(self, consolidated, compute, use_dask) -> None: + @pytest.mark.parametrize("write_empty", [False, True, None]) + def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None: if (use_dask or not compute) and not has_dask: pytest.skip("requires dask") if consolidated and self.zarr_version > 2: @@ -2491,6 +2492,7 @@ def test_write_region(self, consolidated, compute, use_dask) -> None: store, region=region, consolidated=consolidated, + write_empty_chunks=write_empty, **self.version_kwargs, ) with xr.open_zarr( @@ -2772,9 +2774,12 @@ def roundtrip_dir( ) as ds: yield ds - @pytest.mark.parametrize("write_empty", [True, False]) - def test_write_empty(self, write_empty: bool) -> None: - if not write_empty: + @pytest.mark.parametrize("consolidated", [True, False, None]) + @pytest.mark.parametrize("write_empty", [True, False, None]) + def test_write_empty( + self, consolidated: bool | None, write_empty: bool | None + ) -> None: + if write_empty is False: expected = ["0.1.0", "1.1.0"] else: expected = [