Skip to content

Commit

Permalink
Make xarray an optional dependency (#123)
Browse files Browse the repository at this point in the history
* Make xarray an optional dependency

* Pin to Prefect 1.0 since 2.0 is incompatible

* Fix min-dependencies GitHub Action

* More selective xarray skipping
  • Loading branch information
tomwhite authored Sep 19, 2022
1 parent 9d12932 commit 96fcf41
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 15 deletions.
25 changes: 16 additions & 9 deletions rechunker/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,7 @@

import dask
import dask.array
import xarray
import zarr
from xarray.backends.zarr import (
DIMENSION_KEY,
encode_zarr_attr_value,
encode_zarr_variable,
extract_zarr_variable_encoding,
)
from xarray.conventions import encode_dataset_coordinates

from rechunker.algorithm import rechunking_plan
from rechunker.pipeline import CopySpecToPipelinesMixin
Expand Down Expand Up @@ -143,6 +135,8 @@ def _get_dims_from_zarr_array(z_array):


def _encode_zarr_attributes(attrs):
from xarray.backends.zarr import encode_zarr_attr_value

return {k: encode_zarr_attr_value(v) for k, v in attrs.items()}


Expand Down Expand Up @@ -364,7 +358,20 @@ def _setup_rechunk(
target_options = target_options or {}
temp_options = temp_options or {}

if isinstance(source, xarray.Dataset):
# import xarray dynamically since it is not a required dependency
try:
import xarray
from xarray.backends.zarr import (
DIMENSION_KEY,
encode_zarr_attr_value,
encode_zarr_variable,
extract_zarr_variable_encoding,
)
from xarray.conventions import encode_dataset_coordinates
except ImportError:
xarray = None

if xarray and isinstance(source, xarray.Dataset):
if not isinstance(target_chunks, dict):
raise ValueError(
"You must specify ``target-chunks`` as a dict when rechunking a dataset."
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
install_requires = [
"dask[array,diagnostics]",
"zarr>=2.11",
"xarray>=2022.3",
"mypy_extensions",
]
doc_requires = [
Expand All @@ -24,7 +23,8 @@
test_requires = ["pytest", "hypothesis"]

extras_require = {
"complete": install_requires + ["apache_beam", "pyyaml", "fsspec", "prefect"],
"complete": install_requires
+ ["apache_beam", "pyyaml", "fsspec", "prefect<2", "xarray>=2022.3"],
"docs": doc_requires,
"test": test_requires,
}
Expand Down
18 changes: 14 additions & 4 deletions tests/test_rechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import numpy
import numpy as np
import pytest
import xarray
import zarr

from rechunker import api
Expand Down Expand Up @@ -43,6 +42,8 @@ def test_invalid_executor():

@pytest.fixture(scope="session")
def chunk_ds():
xarray = pytest.importorskip("xarray")

lon = numpy.arange(-180, 180)
lat = numpy.arange(-90, 90)
time = numpy.arange(365)
Expand All @@ -66,6 +67,7 @@ def example_dataset(shape):
# TODO: simplify the creation of datasets here
# TODO: See https://github.com/pangeo-data/rechunker/pull/93#discussion_r713939185
# TODO: Maybe it is best to refactor tests to use `chunk_ds`
xarray = pytest.importorskip("xarray")

a = numpy.arange(numpy.prod(shape)).reshape(shape).astype("f4")
a[-1] = numpy.nan
Expand Down Expand Up @@ -136,9 +138,7 @@ def example_dataset(shape):
),
],
)
def test_parse_target_chunks_from_dim_chunks(
chunk_ds: xarray.Dataset, target_chunks, expected
) -> None:
def test_parse_target_chunks_from_dim_chunks(chunk_ds, target_chunks, expected) -> None:
result = api.parse_target_chunks_from_dim_chunks(
ds=chunk_ds, target_chunks=target_chunks
)
Expand Down Expand Up @@ -205,6 +205,8 @@ def test_rechunk_dataset(
target_store,
temp_store,
):
xarray = pytest.importorskip("xarray")

if target_store.startswith("mapper"):
fsspec = pytest.importorskip("fsspec")
target_store = fsspec.get_mapper(str(tmp_path) + target_store)
Expand Down Expand Up @@ -273,6 +275,8 @@ def test_rechunk_dataset_dimchunks(
target_chunks,
max_mem,
):
xarray = pytest.importorskip("xarray")

temp_store = "temp.zarr"
target_store = "target.zarr"
target_store = str(tmp_path / target_store)
Expand Down Expand Up @@ -484,6 +488,8 @@ def test_rechunk_group(tmp_path, executor, source_store, target_store, temp_stor


def sample_xarray_dataset():
xarray = pytest.importorskip("xarray")

return xarray.Dataset(
dict(
a=xarray.DataArray(
Expand Down Expand Up @@ -580,6 +586,8 @@ def test_repr_html(rechunked):


def _is_collection(source):
xarray = pytest.importorskip("xarray")

assert isinstance(
source,
(dask.array.Array, zarr.core.Array, zarr.hierarchy.Group, xarray.Dataset),
Expand Down Expand Up @@ -640,6 +648,8 @@ def rechunk(compressor):


def test_rechunk_invalid_option(rechunk_args):
xarray = pytest.importorskip("xarray")

if isinstance(rechunk_args["source"], xarray.Dataset):
# Options are essentially unbounded for Xarray (for CF encoding params),
# so check only options with special error cases
Expand Down

0 comments on commit 96fcf41

Please sign in to comment.