Merge branch 'main' into document-group

scalableminds · Oct 30, 2024 · a027f0d · a027f0d
2 parents 2172ffe + 4c3081c
commit a027f0d
Show file tree

Hide file tree

Showing 21 changed files with 525 additions and 133 deletions.
diff --git a/.github/workflows/gpu_test.yml b/.github/workflows/gpu_test.yml
@@ -25,7 +25,7 @@ jobs:
     strategy:
       matrix:
         python-version: ['3.11']
-        numpy-version: ['2.0']
+        numpy-version: ['2.1']
         dependency-set: ["minimal"]
 
     steps:

diff --git a/.github/workflows/hypothesis.yaml b/.github/workflows/hypothesis.yaml
@@ -26,7 +26,7 @@ jobs:
     strategy:
       matrix:
         python-version: ['3.11']
-        numpy-version: ['1.26']
+        numpy-version: ['2.1']
         dependency-set: ["optional"]
 
     steps:

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -16,14 +16,33 @@ concurrency:
 
 jobs:
   test:
-    name: py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }}
+    name: os=${{ matrix.os }}, py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }}
 
-    runs-on: ubuntu-latest
     strategy:
       matrix:
         python-version: ['3.11', '3.12', '3.13']
-        numpy-version: ['1.25', '1.26', '2.0']
+        numpy-version: ['1.25', '2.1']
         dependency-set: ["minimal", "optional"]
+        os: ["ubuntu-latest"]
+        include:
+          - python-version: '3.11'
+            numpy-version: '1.25'
+            dependency-set: 'optional'
+            os: 'macos-latest'
+          - python-version: '3.13'
+            numpy-version: '2.1'
+            dependency-set: 'optional'
+            os: 'macos-latest'
+          # https://github.com/zarr-developers/zarr-python/issues/2438
+          # - python-version: '3.11'
+          #   numpy-version: '1.25'
+          #   dependency-set: 'optional'
+          #   os: 'windows-latest'
+          # - python-version: '3.13'
+          #   numpy-version: '2.1'
+          #   dependency-set: 'optional'
+          #   os: 'windows-latest'
+    runs-on: ${{ matrix.os }}
 
     steps:
     - uses: actions/checkout@v4

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ default_language_version:
   python: python3
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.7.0
+    rev: v0.7.1
     hooks:
     - id: ruff
       args: ["--fix", "--show-fixes"]
@@ -22,7 +22,7 @@ repos:
     hooks:
     - id: check-yaml
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.12.1
+    rev: v1.13.0
     hooks:
       - id: mypy
         files: src|tests
@@ -37,8 +37,6 @@ repos:
           - universal-pathlib
           # Tests
           - pytest
-          # Zarr v2
-          - types-redis
   - repo: https://github.com/scientific-python/cookie
     rev: 2024.08.19
     hooks:

diff --git a/pyproject.toml b/pyproject.toml
@@ -61,7 +61,6 @@ test = [
     "pytest",
     "pytest-cov",
     "msgpack",
-    "lmdb",
     "s3fs",
     "pytest-asyncio",
     "moto[s3]",
@@ -84,21 +83,19 @@ gpu = [
 docs = [
     'sphinx==8.1.3',
     'sphinx-autobuild>=2021.3.14',
-    'sphinx-autoapi==3.3.2',
+    'sphinx-autoapi==3.3.3',
     'sphinx_design',
     'sphinx-issues',
     'sphinx-copybutton',
     'pydata-sphinx-theme',
     'numpydoc',
     'numcodecs[msgpack]',
     'msgpack',
-    'lmdb',
 ]
 extra = [
     'msgpack',
 ]
 optional = [
-    'lmdb',
     'universal-pathlib>=0.0.22',
 ]
 
@@ -135,17 +132,17 @@ features = ["test", "extra"]
 
 [[tool.hatch.envs.test.matrix]]
 python = ["3.11", "3.12", "3.13"]
-numpy = ["1.25", "1.26", "2.0"]
+numpy = ["1.25", "2.1"]
 version = ["minimal"]
 
 [[tool.hatch.envs.test.matrix]]
 python = ["3.11", "3.12", "3.13"]
-numpy = ["1.25", "1.26", "2.0"]
+numpy = ["1.25", "2.1"]
 features = ["optional"]
 
 [[tool.hatch.envs.test.matrix]]
 python = ["3.11", "3.12", "3.13"]
-numpy = ["1.25", "1.26", "2.0"]
+numpy = ["1.25", "2.1"]
 features = ["gpu"]
 
 [tool.hatch.envs.test.scripts]
@@ -166,7 +163,7 @@ features = ["test", "extra", "gpu"]
 
 [[tool.hatch.envs.gputest.matrix]]
 python = ["3.11", "3.12", "3.13"]
-numpy = ["1.25", "1.26", "2.0"]
+numpy = ["1.25", "2.1"]
 version = ["minimal"]
 
 [tool.hatch.envs.gputest.scripts]

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
@@ -396,12 +396,16 @@ async def save_array(
 
     mode = kwargs.pop("mode", None)
     store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
+    if np.isscalar(arr):
+        arr = np.array(arr)
+    shape = arr.shape
+    chunks = getattr(arr, "chunks", None)  # for array-likes with chunks attribute
     new = await AsyncArray.create(
         store_path,
         zarr_format=zarr_format,
-        shape=arr.shape,
+        shape=shape,
         dtype=arr.dtype,
-        chunks=arr.shape,
+        chunks=chunks,
         **kwargs,
     )
     await new.setitem(slice(None), arr)

diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py
@@ -5,20 +5,21 @@
 from typing import TYPE_CHECKING
 
 import numcodecs
-from numcodecs.compat import ensure_bytes, ensure_ndarray
+from numcodecs.compat import ensure_ndarray_like
 
-from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec
-from zarr.core.buffer import Buffer, NDBuffer, default_buffer_prototype
+from zarr.abc.codec import ArrayBytesCodec
 from zarr.registry import get_ndbuffer_class
 
 if TYPE_CHECKING:
     import numcodecs.abc
 
     from zarr.core.array_spec import ArraySpec
+    from zarr.core.buffer import Buffer, NDBuffer
 
 
 @dataclass(frozen=True)
-class V2Compressor(ArrayBytesCodec):
+class V2Codec(ArrayBytesCodec):
+    filters: tuple[numcodecs.abc.Codec, ...] | None
     compressor: numcodecs.abc.Codec | None
 
     is_fixed_size = False
@@ -28,81 +29,61 @@ async def _decode_single(
         chunk_bytes: Buffer,
         chunk_spec: ArraySpec,
     ) -> NDBuffer:
-        if self.compressor is not None:
-            chunk_numpy_array = ensure_ndarray(
-                await asyncio.to_thread(self.compressor.decode, chunk_bytes.as_array_like())
-            )
+        cdata = chunk_bytes.as_array_like()
+        # decompress
+        if self.compressor:
+            chunk = await asyncio.to_thread(self.compressor.decode, cdata)
         else:
-            chunk_numpy_array = ensure_ndarray(chunk_bytes.as_array_like())
+            chunk = cdata
+
+        # apply filters
+        if self.filters:
+            for f in reversed(self.filters):
+                chunk = await asyncio.to_thread(f.decode, chunk)
+
+        # view as numpy array with correct dtype
+        chunk = ensure_ndarray_like(chunk)
+        # special case object dtype, because incorrect handling can lead to
+        # segfaults and other bad things happening
+        if chunk_spec.dtype != object:
+            chunk = chunk.view(chunk_spec.dtype)
+        elif chunk.dtype != object:
+            # If we end up here, someone must have hacked around with the filters.
+            # We cannot deal with object arrays unless there is an object
+            # codec in the filter chain, i.e., a filter that converts from object
+            # array to something else during encoding, and converts back to object
+            # array during decoding.
+            raise RuntimeError("cannot read object array without object codec")
 
-        # ensure correct dtype
-        if str(chunk_numpy_array.dtype) != chunk_spec.dtype and not chunk_spec.dtype.hasobject:
-            chunk_numpy_array = chunk_numpy_array.view(chunk_spec.dtype)
+        # ensure correct chunk shape
+        chunk = chunk.reshape(-1, order="A")
+        chunk = chunk.reshape(chunk_spec.shape, order=chunk_spec.order)
 
-        return get_ndbuffer_class().from_numpy_array(chunk_numpy_array)
+        return get_ndbuffer_class().from_ndarray_like(chunk)
 
     async def _encode_single(
-        self,
-        chunk_array: NDBuffer,
-        _chunk_spec: ArraySpec,
-    ) -> Buffer | None:
-        chunk_numpy_array = chunk_array.as_numpy_array()
-        if self.compressor is not None:
-            if (
-                not chunk_numpy_array.flags.c_contiguous
-                and not chunk_numpy_array.flags.f_contiguous
-            ):
-                chunk_numpy_array = chunk_numpy_array.copy(order="A")
-            encoded_chunk_bytes = ensure_bytes(
-                await asyncio.to_thread(self.compressor.encode, chunk_numpy_array)
-            )
-        else:
-            encoded_chunk_bytes = ensure_bytes(chunk_numpy_array)
-
-        return default_buffer_prototype().buffer.from_bytes(encoded_chunk_bytes)
-
-    def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
-        raise NotImplementedError
-
-
-@dataclass(frozen=True)
-class V2Filters(ArrayArrayCodec):
-    filters: tuple[numcodecs.abc.Codec, ...] | None
-
-    is_fixed_size = False
-
-    async def _decode_single(
         self,
         chunk_array: NDBuffer,
         chunk_spec: ArraySpec,
-    ) -> NDBuffer:
-        chunk_ndarray = chunk_array.as_ndarray_like()
-        # apply filters in reverse order
-        if self.filters is not None:
-            for filter in self.filters[::-1]:
-                chunk_ndarray = await asyncio.to_thread(filter.decode, chunk_ndarray)
-
-        # ensure correct chunk shape
-        if chunk_ndarray.shape != chunk_spec.shape:
-            chunk_ndarray = chunk_ndarray.reshape(
-                chunk_spec.shape,
-                order=chunk_spec.order,
-            )
+    ) -> Buffer | None:
+        chunk = chunk_array.as_ndarray_like()
 
-        return get_ndbuffer_class().from_ndarray_like(chunk_ndarray)
+        # apply filters
+        if self.filters:
+            for f in self.filters:
+                chunk = await asyncio.to_thread(f.encode, chunk)
 
-    async def _encode_single(
-        self,
-        chunk_array: NDBuffer,
-        chunk_spec: ArraySpec,
-    ) -> NDBuffer | None:
-        chunk_ndarray = chunk_array.as_ndarray_like().ravel(order=chunk_spec.order)
+        # check object encoding
+        if ensure_ndarray_like(chunk).dtype == object:
+            raise RuntimeError("cannot write object array without object codec")
 
-        if self.filters is not None:
-            for filter in self.filters:
-                chunk_ndarray = await asyncio.to_thread(filter.encode, chunk_ndarray)
+        # compress
+        if self.compressor:
+            cdata = await asyncio.to_thread(self.compressor.encode, chunk)
+        else:
+            cdata = chunk
 
-        return get_ndbuffer_class().from_ndarray_like(chunk_ndarray)
+        return chunk_spec.prototype.buffer.from_bytes(cdata)
 
     def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         raise NotImplementedError