From 8a7618fd27ea531b29ec3f0e25c09778673d426f Mon Sep 17 00:00:00 2001 From: Ivan Ivanov Date: Tue, 3 Dec 2024 15:28:53 -0800 Subject: [PATCH] Add conversion of ImageArray to dask (#262) * add conversion to dask * add testing * expand testing * add documentation * rename `ImageArray.dask` to `ImageArray.dask_array` * add explicit dask[array] requirement, matches ndtiff --- iohub/ngff/nodes.py | 7 +++++++ setup.cfg | 1 + tests/ngff/test_ngff.py | 21 +++++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/iohub/ngff/nodes.py b/iohub/ngff/nodes.py index 311d02e7..32ec1e7f 100644 --- a/iohub/ngff/nodes.py +++ b/iohub/ngff/nodes.py @@ -340,6 +340,13 @@ def numpy(self): `self.numpy()` is equivalent to `self[:]`.""" return self[:] + def dask_array(self): + """Return as a dask array""" + import dask.array as da + + # Note: Designed to work with zarr DirectoryStore + return da.from_zarr(self.store.path, component=self.path) + def downscale(self): raise NotImplementedError diff --git a/setup.cfg b/setup.cfg index 947878a0..1aa7e9d5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,6 +43,7 @@ install_requires = pillow>=9.4.0 blosc2 xarray>=2024.1.1 + dask[array] [options.extras_require] dev = diff --git a/tests/ngff/test_ngff.py b/tests/ngff/test_ngff.py index 4ec371a4..cf0ce4f0 100644 --- a/tests/ngff/test_ngff.py +++ b/tests/ngff/test_ngff.py @@ -270,6 +270,27 @@ def test_create_zeros(ch_shape_dtype, arr_name): assert dataset[arr_name].dtype == dtype +@given( + channels_and_random_5d=_channels_and_random_5d(), + arr_name=short_alpha_numeric, +) +@settings( + max_examples=16, + suppress_health_check=[HealthCheck.data_too_large], +) +def test_ome_zarr_to_dask(channels_and_random_5d, arr_name): + """Test `iohub.ngff.Position.data` to dask""" + channel_names, random_5d = channels_and_random_5d + with _temp_ome_zarr(random_5d, channel_names, "0") as dataset: + assert_array_almost_equal( + dataset.data.dask_array().compute(), random_5d + ) + with _temp_ome_zarr(random_5d, channel_names, arr_name) as dataset: + assert_array_almost_equal( + dataset[arr_name].dask_array().compute(), random_5d + ) + + @given( channels_and_random_5d=_channels_and_random_5d(), arr_name=short_alpha_numeric,