Skip to content

Commit

Permalink
Merge branch 'main' into document-group
Browse files Browse the repository at this point in the history
  • Loading branch information
brokkoli71 authored Oct 30, 2024
2 parents 2172ffe + 4c3081c commit a027f0d
Show file tree
Hide file tree
Showing 21 changed files with 525 additions and 133 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/gpu_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
strategy:
matrix:
python-version: ['3.11']
numpy-version: ['2.0']
numpy-version: ['2.1']
dependency-set: ["minimal"]

steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/hypothesis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
strategy:
matrix:
python-version: ['3.11']
numpy-version: ['1.26']
numpy-version: ['2.1']
dependency-set: ["optional"]

steps:
Expand Down
25 changes: 22 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,33 @@ concurrency:

jobs:
test:
name: py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }}
name: os=${{ matrix.os }}, py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }}

runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.11', '3.12', '3.13']
numpy-version: ['1.25', '1.26', '2.0']
numpy-version: ['1.25', '2.1']
dependency-set: ["minimal", "optional"]
os: ["ubuntu-latest"]
include:
- python-version: '3.11'
numpy-version: '1.25'
dependency-set: 'optional'
os: 'macos-latest'
- python-version: '3.13'
numpy-version: '2.1'
dependency-set: 'optional'
os: 'macos-latest'
# https://github.com/zarr-developers/zarr-python/issues/2438
# - python-version: '3.11'
# numpy-version: '1.25'
# dependency-set: 'optional'
# os: 'windows-latest'
# - python-version: '3.13'
# numpy-version: '2.1'
# dependency-set: 'optional'
# os: 'windows-latest'
runs-on: ${{ matrix.os }}

steps:
- uses: actions/checkout@v4
Expand Down
6 changes: 2 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ default_language_version:
python: python3
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.0
rev: v0.7.1
hooks:
- id: ruff
args: ["--fix", "--show-fixes"]
Expand All @@ -22,7 +22,7 @@ repos:
hooks:
- id: check-yaml
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.12.1
rev: v1.13.0
hooks:
- id: mypy
files: src|tests
Expand All @@ -37,8 +37,6 @@ repos:
- universal-pathlib
# Tests
- pytest
# Zarr v2
- types-redis
- repo: https://github.com/scientific-python/cookie
rev: 2024.08.19
hooks:
Expand Down
13 changes: 5 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ test = [
"pytest",
"pytest-cov",
"msgpack",
"lmdb",
"s3fs",
"pytest-asyncio",
"moto[s3]",
Expand All @@ -84,21 +83,19 @@ gpu = [
docs = [
'sphinx==8.1.3',
'sphinx-autobuild>=2021.3.14',
'sphinx-autoapi==3.3.2',
'sphinx-autoapi==3.3.3',
'sphinx_design',
'sphinx-issues',
'sphinx-copybutton',
'pydata-sphinx-theme',
'numpydoc',
'numcodecs[msgpack]',
'msgpack',
'lmdb',
]
extra = [
'msgpack',
]
optional = [
'lmdb',
'universal-pathlib>=0.0.22',
]

Expand Down Expand Up @@ -135,17 +132,17 @@ features = ["test", "extra"]

[[tool.hatch.envs.test.matrix]]
python = ["3.11", "3.12", "3.13"]
numpy = ["1.25", "1.26", "2.0"]
numpy = ["1.25", "2.1"]
version = ["minimal"]

[[tool.hatch.envs.test.matrix]]
python = ["3.11", "3.12", "3.13"]
numpy = ["1.25", "1.26", "2.0"]
numpy = ["1.25", "2.1"]
features = ["optional"]

[[tool.hatch.envs.test.matrix]]
python = ["3.11", "3.12", "3.13"]
numpy = ["1.25", "1.26", "2.0"]
numpy = ["1.25", "2.1"]
features = ["gpu"]

[tool.hatch.envs.test.scripts]
Expand All @@ -166,7 +163,7 @@ features = ["test", "extra", "gpu"]

[[tool.hatch.envs.gputest.matrix]]
python = ["3.11", "3.12", "3.13"]
numpy = ["1.25", "1.26", "2.0"]
numpy = ["1.25", "2.1"]
version = ["minimal"]

[tool.hatch.envs.gputest.scripts]
Expand Down
8 changes: 6 additions & 2 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,12 +396,16 @@ async def save_array(

mode = kwargs.pop("mode", None)
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
if np.isscalar(arr):
arr = np.array(arr)
shape = arr.shape
chunks = getattr(arr, "chunks", None) # for array-likes with chunks attribute
new = await AsyncArray.create(
store_path,
zarr_format=zarr_format,
shape=arr.shape,
shape=shape,
dtype=arr.dtype,
chunks=arr.shape,
chunks=chunks,
**kwargs,
)
await new.setitem(slice(None), arr)
Expand Down
115 changes: 48 additions & 67 deletions src/zarr/codecs/_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,21 @@
from typing import TYPE_CHECKING

import numcodecs
from numcodecs.compat import ensure_bytes, ensure_ndarray
from numcodecs.compat import ensure_ndarray_like

from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec
from zarr.core.buffer import Buffer, NDBuffer, default_buffer_prototype
from zarr.abc.codec import ArrayBytesCodec
from zarr.registry import get_ndbuffer_class

if TYPE_CHECKING:
import numcodecs.abc

from zarr.core.array_spec import ArraySpec
from zarr.core.buffer import Buffer, NDBuffer


@dataclass(frozen=True)
class V2Compressor(ArrayBytesCodec):
class V2Codec(ArrayBytesCodec):
filters: tuple[numcodecs.abc.Codec, ...] | None
compressor: numcodecs.abc.Codec | None

is_fixed_size = False
Expand All @@ -28,81 +29,61 @@ async def _decode_single(
chunk_bytes: Buffer,
chunk_spec: ArraySpec,
) -> NDBuffer:
if self.compressor is not None:
chunk_numpy_array = ensure_ndarray(
await asyncio.to_thread(self.compressor.decode, chunk_bytes.as_array_like())
)
cdata = chunk_bytes.as_array_like()
# decompress
if self.compressor:
chunk = await asyncio.to_thread(self.compressor.decode, cdata)
else:
chunk_numpy_array = ensure_ndarray(chunk_bytes.as_array_like())
chunk = cdata

# apply filters
if self.filters:
for f in reversed(self.filters):
chunk = await asyncio.to_thread(f.decode, chunk)

# view as numpy array with correct dtype
chunk = ensure_ndarray_like(chunk)
# special case object dtype, because incorrect handling can lead to
# segfaults and other bad things happening
if chunk_spec.dtype != object:
chunk = chunk.view(chunk_spec.dtype)
elif chunk.dtype != object:
# If we end up here, someone must have hacked around with the filters.
# We cannot deal with object arrays unless there is an object
# codec in the filter chain, i.e., a filter that converts from object
# array to something else during encoding, and converts back to object
# array during decoding.
raise RuntimeError("cannot read object array without object codec")

# ensure correct dtype
if str(chunk_numpy_array.dtype) != chunk_spec.dtype and not chunk_spec.dtype.hasobject:
chunk_numpy_array = chunk_numpy_array.view(chunk_spec.dtype)
# ensure correct chunk shape
chunk = chunk.reshape(-1, order="A")
chunk = chunk.reshape(chunk_spec.shape, order=chunk_spec.order)

return get_ndbuffer_class().from_numpy_array(chunk_numpy_array)
return get_ndbuffer_class().from_ndarray_like(chunk)

async def _encode_single(
self,
chunk_array: NDBuffer,
_chunk_spec: ArraySpec,
) -> Buffer | None:
chunk_numpy_array = chunk_array.as_numpy_array()
if self.compressor is not None:
if (
not chunk_numpy_array.flags.c_contiguous
and not chunk_numpy_array.flags.f_contiguous
):
chunk_numpy_array = chunk_numpy_array.copy(order="A")
encoded_chunk_bytes = ensure_bytes(
await asyncio.to_thread(self.compressor.encode, chunk_numpy_array)
)
else:
encoded_chunk_bytes = ensure_bytes(chunk_numpy_array)

return default_buffer_prototype().buffer.from_bytes(encoded_chunk_bytes)

def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
raise NotImplementedError


@dataclass(frozen=True)
class V2Filters(ArrayArrayCodec):
filters: tuple[numcodecs.abc.Codec, ...] | None

is_fixed_size = False

async def _decode_single(
self,
chunk_array: NDBuffer,
chunk_spec: ArraySpec,
) -> NDBuffer:
chunk_ndarray = chunk_array.as_ndarray_like()
# apply filters in reverse order
if self.filters is not None:
for filter in self.filters[::-1]:
chunk_ndarray = await asyncio.to_thread(filter.decode, chunk_ndarray)

# ensure correct chunk shape
if chunk_ndarray.shape != chunk_spec.shape:
chunk_ndarray = chunk_ndarray.reshape(
chunk_spec.shape,
order=chunk_spec.order,
)
) -> Buffer | None:
chunk = chunk_array.as_ndarray_like()

return get_ndbuffer_class().from_ndarray_like(chunk_ndarray)
# apply filters
if self.filters:
for f in self.filters:
chunk = await asyncio.to_thread(f.encode, chunk)

async def _encode_single(
self,
chunk_array: NDBuffer,
chunk_spec: ArraySpec,
) -> NDBuffer | None:
chunk_ndarray = chunk_array.as_ndarray_like().ravel(order=chunk_spec.order)
# check object encoding
if ensure_ndarray_like(chunk).dtype == object:
raise RuntimeError("cannot write object array without object codec")

if self.filters is not None:
for filter in self.filters:
chunk_ndarray = await asyncio.to_thread(filter.encode, chunk_ndarray)
# compress
if self.compressor:
cdata = await asyncio.to_thread(self.compressor.encode, chunk)
else:
cdata = chunk

return get_ndbuffer_class().from_ndarray_like(chunk_ndarray)
return chunk_spec.prototype.buffer.from_bytes(cdata)

def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
raise NotImplementedError
Loading

0 comments on commit a027f0d

Please sign in to comment.