Skip to content

Commit

Permalink
Zarr chunking (GH2300) (#2487)
Browse files Browse the repository at this point in the history
* fixed typo

* added test for saving opened zarr dataset

* modified test for saving opened zarr dataset

* allow different last chunk

* removed whitespace

* modified error messages

* fixed pep8 issues

* updated whats-new
  • Loading branch information
lilyminium authored and shoyer committed Nov 2, 2018
1 parent cf798c5 commit f788084
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 6 deletions.
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ Bug fixes
the dates must be encoded using cftime rather than NumPy (:issue:`2272`).
By `Spencer Clark <https://github.com/spencerkclark>`_.

- Chunked datasets can now roundtrip to Zarr storage continually
with `to_zarr` and ``open_zarr`` (:issue:`2300`).
By `Lily Wang <https://github.com/lilyminium>`_.

.. _whats-new.0.10.9:

v0.10.9 (21 September 2018)
Expand Down
19 changes: 13 additions & 6 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,14 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim):
if var_chunks and enc_chunks is None:
if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks):
raise ValueError(
"Zarr requires uniform chunk sizes excpet for final chunk."
" Variable %r has incompatible chunks. Consider "
"Zarr requires uniform chunk sizes except for final chunk."
" Variable dask chunks %r are incompatible. Consider "
"rechunking using `chunk()`." % (var_chunks,))
if any((chunks[0] < chunks[-1]) for chunks in var_chunks):
raise ValueError(
"Final chunk of Zarr array must be smaller than first. "
"Variable %r has incompatible chunks. Consider rechunking "
"using `chunk()`." % var_chunks)
"Final chunk of Zarr array must be the same size or smaller "
"than the first. Variable Dask chunks %r are incompatible. "
"Consider rechunking using `chunk()`." % var_chunks)
# return the first chunk for each dimension
return tuple(chunk[0] for chunk in var_chunks)

Expand Down Expand Up @@ -126,14 +126,21 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim):
# threads
if var_chunks and enc_chunks_tuple:
for zchunk, dchunks in zip(enc_chunks_tuple, var_chunks):
for dchunk in dchunks:
for dchunk in dchunks[:-1]:
if dchunk % zchunk:
raise NotImplementedError(
"Specified zarr chunks %r would overlap multiple dask "
"chunks %r. This is not implemented in xarray yet. "
" Consider rechunking the data using "
"`chunk()` or specifying different chunks in encoding."
% (enc_chunks_tuple, var_chunks))
if dchunks[-1] > zchunk:
raise ValueError(
"Final chunk of Zarr array must be the same size or "
"smaller than the first. The specified Zarr chunk "
"encoding is %r, but %r in variable Dask chunks %r is "
"incompatible. Consider rechunking using `chunk()`."
% (enc_chunks_tuple, dchunks, var_chunks))
return enc_chunks_tuple

raise AssertionError(
Expand Down
4 changes: 4 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -1388,6 +1388,10 @@ def test_chunk_encoding_with_dask(self):
ds_chunk_irreg = ds.chunk({'x': (5, 5, 2)})
with self.roundtrip(ds_chunk_irreg) as actual:
assert (5,) == actual['var1'].encoding['chunks']
# re-save Zarr arrays
with self.roundtrip(ds_chunk_irreg) as original:
with self.roundtrip(original) as actual:
assert_identical(original, actual)

# - encoding specified -
# specify compatible encodings
Expand Down

0 comments on commit f788084

Please sign in to comment.