diff --git a/src/spatialdata/_core/spatialdata.py b/src/spatialdata/_core/spatialdata.py index 1d8a2c03..3b0729a6 100644 --- a/src/spatialdata/_core/spatialdata.py +++ b/src/spatialdata/_core/spatialdata.py @@ -1059,27 +1059,34 @@ def _validate_can_safely_write_to_path( "Overwriting non-Zarr stores is not supported to prevent accidental data loss." ) if not overwrite: - raise ValueError("The Zarr store already exists. Use `overwrite=True` to try overwriting the store.") + raise ValueError( + "The Zarr store already exists. Use `overwrite=True` to try overwriting the store." + "Please note that only Zarr stores not currently in used by the current SpatialData object can be " + "overwritten." + ) + ERROR_MSG = ( + "Cannot overwrite. The target path of the write operation is in use. Please save the data to a " + "different location. " + ) + WORKAROUND = ( + "\nWorkaround: please see discussion here https://github.com/scverse/spatialdata/discussions/520." + ) if any(_backed_elements_contained_in_path(path=file_path, object=self)): raise ValueError( - "The file path specified is a parent directory of one or more files used for backing for one or " - "more elements in the SpatialData object. Please save the data to a different location." + ERROR_MSG + "\nDetails: the target path contains one or more files that Dask use for " + "backing elements in the SpatialData object." + WORKAROUND ) if self.path is not None and ( _is_subfolder(parent=self.path, child=file_path) or _is_subfolder(parent=file_path, child=self.path) ): if saving_an_element and _is_subfolder(parent=self.path, child=file_path): raise ValueError( - "Currently, overwriting existing elements is not supported. It is recommended to manually save " - "the element in a different location and then eventually copy it to the original desired " - "location. Deleting the old element and saving the new element to the same location is not " - "advised because data loss may occur if the execution is interrupted during writing." + ERROR_MSG + "\nDetails: the target path in which to save an element is a subfolder " + "of the current Zarr store." + WORKAROUND ) raise ValueError( - "The file path specified either contains either is contained in the one used for backing. " - "Currently, overwriting the backing Zarr store is not supported to prevent accidental data loss " - "that could occur if the execution is interrupted during writing. Please save the data to a " - "different location." + ERROR_MSG + "\nDetails: the target path either contains, coincides or is contained in" + " the current Zarr store." + WORKAROUND ) def write( @@ -2080,6 +2087,18 @@ def __setitem__(self, key: str, value: SpatialElement | AnnData) -> None: else: raise TypeError(f"Unknown element type with schema: {schema!r}.") + def __delitem__(self, key: str) -> None: + """ + Delete the element from the SpatialData object. + + Parameters + ---------- + key + The name of the element to delete. + """ + element_type, _, _ = self._find_element(key) + getattr(self, element_type).__delitem__(key) + class QueryManager: """Perform queries on SpatialData objects.""" diff --git a/tests/core/operations/test_spatialdata_operations.py b/tests/core/operations/test_spatialdata_operations.py index 938feffa..ece57f98 100644 --- a/tests/core/operations/test_spatialdata_operations.py +++ b/tests/core/operations/test_spatialdata_operations.py @@ -316,6 +316,15 @@ def test_set_item(full_sdata: SpatialData) -> None: full_sdata[name] = full_sdata[name] +def test_del_item(full_sdata: SpatialData) -> None: + for name in ["image2d", "labels2d", "points_0", "circles", "poly"]: + del full_sdata[name] + with pytest.raises(KeyError): + del full_sdata[name] + with pytest.raises(KeyError, match="Could not find element with name"): + _ = full_sdata["not_present"] + + def test_no_shared_transformations() -> None: """Test transformation dictionary copy for transformations not to be shared.""" sdata = blobs() diff --git a/tests/io/test_readwrite.py b/tests/io/test_readwrite.py index d94e7fb1..a9839f72 100644 --- a/tests/io/test_readwrite.py +++ b/tests/io/test_readwrite.py @@ -8,7 +8,7 @@ import pytest from anndata import AnnData from numpy.random import default_rng -from spatialdata import SpatialData +from spatialdata import SpatialData, read_zarr from spatialdata._io._utils import _are_directories_identical, get_dask_backing_files from spatialdata.datasets import blobs from spatialdata.models import Image2DModel @@ -131,7 +131,20 @@ def test_incremental_io_in_memory( with pytest.raises(KeyError, match="Key `table` already exists."): sdata["table"] = v - def test_incremental_io_on_disk(self, tmp_path: str, full_sdata: SpatialData): + @pytest.mark.parametrize("dask_backed", [True, False]) + @pytest.mark.parametrize("workaround", [1, 2]) + def test_incremental_io_on_disk( + self, tmp_path: str, full_sdata: SpatialData, dask_backed: bool, workaround: int + ) -> None: + """ + This tests shows workaround on how to rewrite existing data on disk. + + The user is recommended to study them, understand the implications and eventually adapt them to their use case. + We are working on simpler workarounds and on a more robust solution to this problem, but unfortunately it is not + yet available. + + In particular the complex "dask-backed" case for workaround 1 could be simplified once + """ tmpdir = Path(tmp_path) / "incremental_io.zarr" sdata = SpatialData() sdata.write(tmpdir) @@ -147,32 +160,63 @@ def test_incremental_io_on_disk(self, tmp_path: str, full_sdata: SpatialData): ]: sdata[name] = full_sdata[name] sdata.write_element(name) + if dask_backed: + # this forces the element to write to be dask-backed from disk. In this case, overwriting the data is + # more laborious because we are writing the data to the same location that defines the data! + sdata = read_zarr(sdata.path) with pytest.raises( ValueError, match="The Zarr store already exists. Use `overwrite=True` to try overwriting the store." ): sdata.write_element(name) - with pytest.raises(ValueError, match="Currently, overwriting existing elements is not supported."): + with pytest.raises(ValueError, match="Cannot overwrite."): sdata.write_element(name, overwrite=True) - # workaround 1, mostly safe (untested for Windows platform, network drives, multi-threaded - # setups, ...) - new_name = f"{name}_new_place" - # write a copy of the data - sdata[new_name] = sdata[name] - sdata.write_element(new_name) - # rewrite the original data - sdata.delete_element_from_disk(name) - sdata.write_element(name) - # remove the copy - sdata.delete_element_from_disk(new_name) - element_type = sdata._element_type_from_element_name(new_name) - del getattr(sdata, element_type)[new_name] - - # workaround 2, unsafe but sometimes acceptable depending on the user's workflow - sdata.delete_element_from_disk(name) - sdata.write_element(name) + if workaround == 1: + new_name = f"{name}_new_place" + # workaround 1, mostly safe (untested for Windows platform, network drives, multi-threaded + # setups, ...). If the scenario matches your use case, please use with caution. + + if not dask_backed: # easier case + # a. write a backup copy of the data + sdata[new_name] = sdata[name] + sdata.write_element(new_name) + # b. rewrite the original data + sdata.delete_element_from_disk(name) + sdata.write_element(name) + # c. remove the backup copy + del sdata[new_name] + sdata.delete_element_from_disk(new_name) + else: # dask-backed case, more complex + # a. write a backup copy of the data + sdata[new_name] = sdata[name] + sdata.write_element(new_name) + # a2. remove the in-memory copy from the SpatialData object (note, + # at this point the backup copy still exists on-disk) + del sdata[new_name] + del sdata[name] + # a3 load the backup copy into memory + sdata_copy = read_zarr(sdata.path) + # b1. rewrite the original data + sdata.delete_element_from_disk(name) + sdata[name] = sdata_copy[new_name] + sdata.write_element(name) + # b2. reload the new data into memory (because it has been written but in-memory it still points + # from the backup location) + sdata = read_zarr(sdata.path) + # c. remove the backup copy + del sdata[new_name] + sdata.delete_element_from_disk(new_name) + elif workaround == 2: + # workaround 2, unsafe but sometimes acceptable depending on the user's workflow. + + # this works only if the data is not dask-backed, otherwise an exception will be raised because the code + # would be trying to delete the data that the Dask object is pointing to! + if not dask_backed: + # a. rewrite the original data (risky!) + sdata.delete_element_from_disk(name) + sdata.write_element(name) def test_incremental_io_table_legacy(self, table_single_annotation: SpatialData) -> None: s = table_single_annotation @@ -291,8 +335,7 @@ def test_overwrite_fails_when_no_zarr_store_bug_dask_backed_data(self, full_sdat with pytest.raises( ValueError, - match="The file path specified is a parent directory of one or more files used for backing for one " - "or ", + match="Cannot overwrite.", ): full_sdata.write(f, overwrite=True) @@ -310,7 +353,7 @@ def test_overwrite_fails_when_zarr_store_present(self, full_sdata): with pytest.raises( ValueError, - match="The file path specified either contains either is contained in the one used for backing.", + match="Cannot overwrite.", ): full_sdata.write(f, overwrite=True)