diff --git a/ome_zarr/writer.py b/ome_zarr/writer.py index 137c5e3c..5f0be71d 100644 --- a/ome_zarr/writer.py +++ b/ome_zarr/writer.py @@ -235,6 +235,7 @@ def write_multiscale( Please use the 'storage_options' argument instead.""" warnings.warn(msg, DeprecationWarning) datasets: List[dict] = [] + is_zipstore = isinstance(group.store, zarr.storage.ZipStore) for path, data in enumerate(pyramid): options = _resolve_storage_options(storage_options, path) @@ -251,11 +252,13 @@ def write_multiscale( if chunks_opt is not None: data = da.array(data).rechunk(chunks=chunks_opt) options["chunks"] = chunks_opt + # storage options with chunks results in duplicate key error for ZipStore + # TODO: do other stores also cause this error with da.to_zarr? da_delayed = da.to_zarr( arr=data, url=group.store, component=str(Path(group.path, str(path))), - storage_options=options, + storage_options=None if is_zipstore else options, compressor=options.get("compressor", zarr.storage.default_compressor), dimension_separator=group._store._dimension_separator, compute=compute, @@ -585,6 +588,7 @@ def _write_dask_image( # for path, data in enumerate(pyramid): max_layer: int = scaler.max_layer if scaler is not None else 0 shapes = [] + is_zipstore = isinstance(group.store, zarr.storage.ZipStore) for path in range(0, max_layer + 1): # LOGGER.debug(f"write_image path: {path}") options = _resolve_storage_options(storage_options, path) @@ -609,12 +613,14 @@ def _write_dask_image( LOGGER.debug( "write dask.array to_zarr shape: %s, dtype: %s", image.shape, image.dtype ) + # storage options with chunks results in duplicate key error for ZipStore + # TODO: do other stores also cause this error with da.to_zarr? delayed.append( da.to_zarr( arr=image, url=group.store, component=str(Path(group.path, str(path))), - storage_options=options, + storage_options=None if is_zipstore else options, compute=False, compressor=options.get("compressor", zarr.storage.default_compressor), dimension_separator=group._store._dimension_separator, diff --git a/tests/test_writer.py b/tests/test_writer.py index 14a8ed50..b1419350 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -39,6 +39,8 @@ class TestWriter: @pytest.fixture(autouse=True) def initdir(self, tmpdir): self.path = pathlib.Path(tmpdir.mkdir("data")) + self.zip_path = pathlib.Path(tmpdir.mkdir("data_zip"), "data.zip") + self.zip_store = zarr.storage.ZipStore(self.zip_path, mode="w") self.store = parse_url(self.path, mode="w").store self.root = zarr.group(store=self.store) self.group = self.root.create_group("test") @@ -146,6 +148,71 @@ def test_write_image_current(self, array_constructor): for value in transfs[0]["scale"]: assert value >= 1 + # write_image/write_multiscale ZipStore when the storage_options is not None + @pytest.mark.parametrize("array_constructor", [np.array, da.from_array]) + @pytest.mark.parametrize("storage_options", [None, {"chunks": (1, 100, 100)}]) + def test_write_image_zipstore(self, array_constructor, storage_options): + # Initialize the Zarr group from ZipStore + group = zarr.group(store=self.zip_store, overwrite=True) + + shape = (3, 300, 300) + data = self.create_data(shape) + if array_constructor == da.from_array: + data = array_constructor(data, chunks=(1, 50, 50)) + else: + data = array_constructor(data) + + write_image(data, group, axes="cyx", storage_options=storage_options) + self.zip_store.close() + # load the data from the ZipStore + store = zarr.storage.ZipStore(self.zip_path, mode="r") + group = zarr.open_group(store=store) + # check the chunksize of the array + if array_constructor == da.from_array: + if storage_options is None: + assert group[0].chunks == (1, 50, 50) + else: + assert group[0].chunks == storage_options["chunks"] + else: + if storage_options is None: + pass + else: + assert group[0].chunks == storage_options["chunks"] + + @pytest.mark.parametrize("array_constructor", [np.array, da.from_array]) + @pytest.mark.parametrize("storage_options", [None, {"chunks": (1, 100, 100)}]) + def test_write_multiscale_zipstore(self, array_constructor, storage_options): + # Initialize the Zarr group from ZipStore + group = zarr.group(store=self.zip_store, overwrite=True) + + shape = (3, 300, 300) + data_arrs = [] + for i in range(2): + data = self.create_data(shape) + if array_constructor == da.from_array: + data = array_constructor(data, chunks=(1, 50, 50)) + else: + data = array_constructor(data) + data_arrs.append(data.copy()) + + write_multiscale(data_arrs, group, axes="cyx", storage_options=storage_options) + self.zip_store.close() + # load the data from the ZipStore + store = zarr.storage.ZipStore(self.zip_path, mode="r") + group = zarr.open_group(store=store) + # check the chunksize of the array + for i in range(2): + if array_constructor == da.from_array: + if storage_options is None: + assert group[i].chunks == (1, 50, 50) + else: + assert group[i].chunks == storage_options["chunks"] + else: + if storage_options is None: + pass + else: + assert group[i].chunks == storage_options["chunks"] + @pytest.mark.parametrize("read_from_zarr", [True, False]) @pytest.mark.parametrize("compute", [True, False]) def test_write_image_dask(self, read_from_zarr, compute):