Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Zarr v3 #404

Open
wants to merge 38 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
b0d6b3d
Basic read example working (no labels)
will-moore Oct 26, 2024
da8c32f
cli_tests passing
will-moore Oct 30, 2024
19b89a8
Passing all 6 test_io.py
will-moore Oct 31, 2024
a954161
Passing tests/test_io.py and test_node.py
will-moore Nov 4, 2024
80f6e01
Include dtype in group.create_array()
will-moore Nov 4, 2024
e568911
Uncomment labels spec. Fixes test_ome_zarr.py download
will-moore Nov 4, 2024
b49ecc8
Fix test_scaler
will-moore Nov 4, 2024
18abe02
Add dimension_separator to existing v2 data .zarray to fix test_upgra…
will-moore Nov 4, 2024
86142c3
Fixed test_write_image_dask
will-moore Nov 4, 2024
31584bf
Pin zarr==v3.0.0-beta.1
will-moore Nov 7, 2024
daa3546
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 7, 2024
fa29ccc
Remove python 3.9 and 3.10 from build.yml
will-moore Nov 7, 2024
8fc02b4
Remove unused imports
will-moore Nov 7, 2024
29890b8
remove fsspec from .isort.cfg
will-moore Nov 7, 2024
35bc979
mypy fix
will-moore Nov 7, 2024
75ba690
Use Blosc compression by default
will-moore Nov 11, 2024
52aceb0
Black formatting fixes
will-moore Nov 11, 2024
55d4ba9
Use group.array_values() for iterating arrays
will-moore Nov 11, 2024
0ea21bc
Use zarr_format=2 for zarr.open() in test_writer.py
will-moore Nov 11, 2024
7fc113b
Fix return type RemoteStore | LocalStore
will-moore Nov 12, 2024
94f7ace
Support reading of Zarr v3 data
will-moore Nov 12, 2024
d140c6d
Hard-code zarr_version=2 in parse_url()
will-moore Nov 12, 2024
f7b5f98
Use read_only instead of mode when creating Stores
will-moore Nov 13, 2024
c527c77
Pin zarr-python to specific commit on main branch
will-moore Nov 13, 2024
d8d5378
Fix test_write_image_compressed
will-moore Nov 13, 2024
2138160
Support READING of zarr v3 data
will-moore Dec 9, 2024
af2648d
Merge remote-tracking branch 'origin/master' into zarr_v3. Use zarr v…
will-moore Dec 9, 2024
1ea9e1a
Check that PR is green IF we skip test_writer with 3D-scale-True-from…
will-moore Dec 9, 2024
7754774
Bump dependencies including zarr==v3.0.0-beta.3 in docs/requirements.txt
will-moore Dec 9, 2024
499531f
Specify python 3.12 in .readthedocs.yml
will-moore Dec 9, 2024
c0fe50d
Merge remote-tracking branch 'origin/master' into zarr_v3
will-moore Dec 17, 2024
e021c13
Merge remote-tracking branch 'origin/master' into zarr_v3
joshmoore Dec 18, 2024
0a8d0b4
test fixes
will-moore Jan 13, 2025
c953723
Merge remote-tracking branch 'gh/zarr_v3' into zarr_v3
will-moore Jan 14, 2025
4f2a4b1
Merge remote-tracking branch 'origin/master' into zarr_v3
will-moore Jan 14, 2025
50e43c1
Rename zarr.storage.RemoteStore to FsspecStore
will-moore Jan 14, 2025
6c4ba92
_blosc_compressor() helper and other zarr-python fixes
will-moore Jan 14, 2025
872ce11
Use zarr_format=2 for download dask.to_zarr()
will-moore Jan 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .isort.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[settings]
known_third_party = dask,fsspec,numcodecs,numpy,pytest,scipy,skimage,zarr
known_third_party = dask,numcodecs,numpy,pytest,scipy,skimage,zarr
multi_line_output = 3
include_trailing_comma = True
force_grid_wrap = 0
Expand Down
2 changes: 1 addition & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ version: 2
build:
os: ubuntu-22.04
tools:
python: "3.10"
python: "3.12"

# Build documentation in the docs/ directory with Sphinx
sphinx:
Expand Down
4 changes: 2 additions & 2 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
sphinx==7.1.2
sphinx==8.1.3
sphinx-rtd-theme==3.0.2
fsspec
zarr
zarr>=v3.0.0
dask
numpy
scipy
Expand Down
2 changes: 1 addition & 1 deletion ome_zarr/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def create_zarr(

loc = parse_url(zarr_directory, mode="w")
assert loc
grp = zarr.group(loc.store)
grp = zarr.group(loc.store, zarr_format=2)
axes = None
size_c = 1
if fmt.version not in ("0.1", "0.2"):
Expand Down
49 changes: 18 additions & 31 deletions ome_zarr/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from collections.abc import Iterator
from typing import Any, Optional

from zarr.storage import FSStore
from zarr.storage import FsspecStore, LocalStore

LOGGER = logging.getLogger("ome_zarr.format")

Expand Down Expand Up @@ -60,7 +60,7 @@ def matches(self, metadata: dict) -> bool: # pragma: no cover
raise NotImplementedError()

@abstractmethod
def init_store(self, path: str, mode: str = "r") -> FSStore:
def init_store(self, path: str, mode: str = "r") -> FsspecStore | LocalStore:
raise NotImplementedError()

# @abstractmethod
Expand Down Expand Up @@ -134,9 +134,22 @@ def matches(self, metadata: dict) -> bool:
LOGGER.debug("%s matches %s?", self.version, version)
return version == self.version

def init_store(self, path: str, mode: str = "r") -> FSStore:
store = FSStore(path, mode=mode, dimension_separator=".")
LOGGER.debug("Created legacy flat FSStore(%s, %s)", path, mode)
def init_store(self, path: str, mode: str = "r") -> FsspecStore | LocalStore:
"""
Not ideal. Stores should remain hidden
"dimension_separator" is specified at array creation time
"""

if path.startswith(("http", "s3")):
store = FsspecStore.from_url(
path,
storage_options=None,
read_only=(mode in ("r", "r+", "a")),
)
else:
# No other kwargs supported
store = LocalStore(path, read_only=(mode in ("r", "r+", "a")))
LOGGER.debug("Created nested FsspecStore(%s, %s)", path, mode)
return store

def generate_well_dict(
Expand Down Expand Up @@ -180,32 +193,6 @@ class FormatV02(FormatV01):
def version(self) -> str:
return "0.2"

def init_store(self, path: str, mode: str = "r") -> FSStore:
"""
Not ideal. Stores should remain hidden
TODO: could also check dimension_separator
"""

kwargs = {
"dimension_separator": "/",
"normalize_keys": False,
}

mkdir = True
if "r" in mode or path.startswith(("http", "s3")):
# Could be simplified on the fsspec side
mkdir = False
if mkdir:
kwargs["auto_mkdir"] = True

store = FSStore(
path,
mode=mode,
**kwargs,
) # TODO: open issue for using Path
LOGGER.debug("Created nested FSStore(%s, %s, %s)", path, mode, kwargs)
return store


class FormatV03(FormatV02): # inherits from V02 to avoid code duplication
"""
Expand Down
79 changes: 53 additions & 26 deletions ome_zarr/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
Primary entry point is the :func:`~ome_zarr.io.parse_url` method.
"""

import json
import logging
from pathlib import Path
from typing import Optional, Union
from urllib.parse import urljoin

import dask.array as da
from zarr.storage import FSStore
import zarr
from zarr.storage import FsspecStore, LocalStore, StoreLike

from .format import CurrentFormat, Format, detect_format
from .types import JSONDict
Expand All @@ -20,7 +20,7 @@

class ZarrLocation:
"""
IO primitive for reading and writing Zarr data. Uses FSStore for all
IO primitive for reading and writing Zarr data. Uses a store for all
data access.

No assumptions about the existence of the given path string are made.
Expand All @@ -29,7 +29,7 @@ class ZarrLocation:

def __init__(
self,
path: Union[Path, str, FSStore],
path: StoreLike,
mode: str = "r",
fmt: Format = CurrentFormat(),
) -> None:
Expand All @@ -40,18 +40,21 @@ def __init__(
self.__path = str(path.resolve())
elif isinstance(path, str):
self.__path = path
elif isinstance(path, FSStore):
elif isinstance(path, FsspecStore):
self.__path = path.path
elif isinstance(path, LocalStore):
self.__path = str(path.root)
else:
raise TypeError(f"not expecting: {type(path)}")

loader = fmt
if loader is None:
loader = CurrentFormat()
self.__store: FSStore = (
path if isinstance(path, FSStore) else loader.init_store(self.__path, mode)
self.__store: FsspecStore = (
path
if isinstance(path, FsspecStore)
else loader.init_store(self.__path, mode)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would also checking for LocalStore here work?

)

self.__init_metadata()
detected = detect_format(self.__metadata, loader)
LOGGER.debug("ZarrLocation.__init__ %s detected: %s", path, detected)
Expand All @@ -67,16 +70,41 @@ def __init_metadata(self) -> None:
"""
Load the Zarr metadata files for the given location.
"""
self.zarray: JSONDict = self.get_json(".zarray")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can get_json() now be deprecated?

self.zgroup: JSONDict = self.get_json(".zgroup")
self.zgroup: JSONDict = {}
self.zarray: JSONDict = {}
self.__metadata: JSONDict = {}
self.__exists: bool = True
if self.zgroup:
self.__metadata = self.get_json(".zattrs")
elif self.zarray:
self.__metadata = self.get_json(".zattrs")
else:
self.__exists = False
# If we want to *create* a new zarr v2 group, we need to specify
# zarr_format. This is not needed for reading.
zarr_format = None
if self.__mode == "w":
# For now, let's support writing of zarr v2
# TODO: handle writing of zarr v2 OR zarr v3
zarr_format = 2
try:
group = zarr.open_group(
store=self.__store, path="/", mode=self.__mode, zarr_format=zarr_format
)
self.zgroup = group.attrs.asdict()
# For zarr v3, everything is under the "ome" namespace
if "ome" in self.zgroup:
self.zgroup = self.zgroup["ome"]
self.__metadata = self.zgroup
except (ValueError, FileNotFoundError):
try:
array = zarr.open_array(
store=self.__store,
path="/",
mode=self.__mode,
zarr_format=zarr_format,
)
self.zarray = array.attrs.asdict()
self.__metadata = self.zarray
except (ValueError, FileNotFoundError):
# We actually get a ValueError when the file is not found
# /zarr-python/src/zarr/abc/store.py", line 189, in _check_writable
# raise ValueError("store mode does not support writing")
self.__exists = False

def __repr__(self) -> str:
"""Print the path as well as whether this is a group or an array."""
Expand Down Expand Up @@ -104,7 +132,7 @@ def path(self) -> str:
return self.__path

@property
def store(self) -> FSStore:
def store(self) -> FsspecStore:
"""Return the initialized store for this location"""
assert self.__store is not None
return self.__store
Expand Down Expand Up @@ -154,11 +182,9 @@ def get_json(self, subpath: str) -> JSONDict:
All other exceptions log at the ERROR level.
"""
try:
data = self.__store.get(subpath)
if not data:
return {}
return json.loads(data)
except KeyError:
array_or_group = zarr.open_group(store=self.__store, path="/")
return array_or_group.attrs.asdict()
except (KeyError, FileNotFoundError):
LOGGER.debug("JSON not found: %s", subpath)
return {}
except Exception:
Expand Down Expand Up @@ -193,10 +219,11 @@ def _isfile(self) -> bool:
Return whether the current underlying implementation
points to a local file or not.
"""
return self.__store.fs.protocol == "file" or self.__store.fs.protocol == (
"file",
"local",
)
# return self.__store.fs.protocol == "file" or self.__store.fs.protocol == (
# "file",
# "local",
# )
return isinstance(self.__store, LocalStore)

def _ishttp(self) -> bool:
"""
Expand Down
2 changes: 1 addition & 1 deletion ome_zarr/scale.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def __assert_values(self, pyramid: list[np.ndarray]) -> None:

def __create_group(
self, store: MutableMapping, base: np.ndarray, pyramid: list[np.ndarray]
) -> zarr.hierarchy.Group:
) -> zarr.Group:
"""Create group and datasets."""
grp = zarr.group(store)
grp.create_dataset("base", data=base)
Expand Down
4 changes: 3 additions & 1 deletion ome_zarr/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@ def download(input_path: str, output_dir: str = ".") -> None:
LOGGER.info("resolution %s...", dataset)
with pbar:
data.to_zarr(
str(target_path / dataset), dimension_separator="/"
str(target_path / dataset),
zarr_format=2,
dimension_separator="/",
)
else:
# Assume a group that needs metadata, like labels
Expand Down
Loading
Loading