From 848b3519461ad48bb843928ce1ff64e264d890ec Mon Sep 17 00:00:00 2001
From: Tom White <tom.e.white@gmail.com>
Date: Mon, 12 Aug 2024 12:04:18 +0100
Subject: [PATCH] Add an Xarray test suite adapted from
 xarray/tests/test_dask.py (#19)

* Add an Xarray test suite adapted from xarray/tests/test_dask.py

* Add dill to the test dependencies
---
 cubed_xarray/tests/test_xarray.py | 717 ++++++++++++++++++++++++++++++
 pyproject.toml                    |   4 +-
 2 files changed, 720 insertions(+), 1 deletion(-)
 create mode 100644 cubed_xarray/tests/test_xarray.py

diff --git a/cubed_xarray/tests/test_xarray.py b/cubed_xarray/tests/test_xarray.py
new file mode 100644
index 0000000..55663d1
--- /dev/null
+++ b/cubed_xarray/tests/test_xarray.py
@@ -0,0 +1,717 @@
+# Adapted from xarray/tests/test_dask.py
+
+from __future__ import annotations
+
+import operator
+from contextlib import suppress
+from textwrap import dedent
+
+import cubed
+import cubed.random
+import dill
+import numpy as np
+import pandas as pd
+import pytest
+import xarray as xr
+from xarray import DataArray, Dataset, Variable
+from xarray.tests import (
+    assert_allclose,
+    assert_array_equal,
+    assert_equal,
+    assert_identical,
+    mock,
+)
+
+try:
+    from cubed.testing import raise_if_computes as raise_if_cubed_computes
+except ImportError:
+    from contextlib import nullcontext
+
+    raise_if_cubed_computes = nullcontext
+
+
+@pytest.mark.xfail(reason="needs https://github.com/cubed-dev/cubed/pull/545")
+def test_raise_if_cubed_computes():
+    data = cubed.from_array(np.random.RandomState(0).randn(4, 6), chunks=(2, 2))
+    with pytest.raises(RuntimeError, match=r"'compute' was called"):
+        with raise_if_cubed_computes():
+            data.compute()
+
+
+class CubedTestCase:
+    def assertLazyAnd(self, expected, actual, test):
+        test(actual, expected)
+
+        if isinstance(actual, Dataset):
+            for k, v in actual.variables.items():
+                if k in actual.xindexes:
+                    assert isinstance(v.data, np.ndarray)
+                else:
+                    assert isinstance(v.data, cubed.Array)
+        elif isinstance(actual, DataArray):
+            assert isinstance(actual.data, cubed.Array)
+            for k, v in actual.coords.items():
+                if k in actual.xindexes:
+                    assert isinstance(v.data, np.ndarray)
+                else:
+                    assert isinstance(v.data, cubed.Array)
+        elif isinstance(actual, Variable):
+            assert isinstance(actual.data, cubed.Array)
+        else:
+            assert False
+
+
+class TestVariable(CubedTestCase):
+    def assertLazyAndIdentical(self, expected, actual):
+        self.assertLazyAnd(expected, actual, assert_identical)
+
+    def assertLazyAndAllClose(self, expected, actual):
+        self.assertLazyAnd(expected, actual, assert_allclose)
+
+    @pytest.fixture(autouse=True)
+    def setUp(self):
+        self.values = np.random.RandomState(0).randn(4, 6)
+        self.data = cubed.from_array(self.values, chunks=(2, 2))
+
+        self.eager_var = Variable(("x", "y"), self.values)
+        self.lazy_var = Variable(("x", "y"), self.data)
+
+    def test_basics(self):
+        v = self.lazy_var
+        assert self.data is v.data
+        assert self.data.chunks == v.chunks
+        assert_array_equal(self.values, v)
+
+    def test_copy(self):
+        self.assertLazyAndIdentical(self.eager_var, self.lazy_var.copy())
+        self.assertLazyAndIdentical(self.eager_var, self.lazy_var.copy(deep=True))
+
+    @pytest.mark.xfail(
+        reason="cubed rechunk handles chunks={} incorrectly, see https://github.com/cubed-dev/cubed/pull/546"
+    )
+    def test_chunk(self):
+        for chunks, expected in [
+            ({}, ((2, 2), (2, 2, 2))),
+            (3, ((3, 1), (3, 3))),
+            ({"x": 3, "y": 3}, ((3, 1), (3, 3))),
+            ({"x": 3}, ((3, 1), (2, 2, 2))),
+            ({"x": (3, 1)}, ((3, 1), (2, 2, 2))),
+        ]:
+            rechunked = self.lazy_var.chunk(chunks, chunked_array_type="cubed")
+            assert rechunked.chunks == expected
+            self.assertLazyAndIdentical(self.eager_var, rechunked)
+
+            expected_chunksizes = {
+                dim: chunks for dim, chunks in zip(self.lazy_var.dims, expected)
+            }
+            assert rechunked.chunksizes == expected_chunksizes
+
+    def test_indexing(self):
+        u = self.eager_var
+        v = self.lazy_var
+        self.assertLazyAndIdentical(u[0], v[0])
+        self.assertLazyAndIdentical(u[:1], v[:1])
+        self.assertLazyAndIdentical(u[[0, 1], [0, 1, 2]], v[[0, 1], [0, 1, 2]])
+
+    def test_squeeze(self):
+        u = self.eager_var
+        v = self.lazy_var
+        self.assertLazyAndIdentical(u[0].squeeze(), v[0].squeeze())
+
+    def test_equals(self):
+        v = self.lazy_var
+        assert v.equals(v)
+        assert isinstance(v.data, cubed.Array)
+        assert v.identical(v)
+        assert isinstance(v.data, cubed.Array)
+
+    def test_transpose(self):
+        u = self.eager_var
+        v = self.lazy_var
+        self.assertLazyAndIdentical(u.T, v.T)
+
+    @pytest.mark.xfail(
+        reason="xarray uses np.pad which is not yet wired up to cubed.pad"
+    )
+    def test_shift(self):
+        u = self.eager_var
+        v = self.lazy_var
+        self.assertLazyAndIdentical(u.shift(x=2), v.shift(x=2))
+        self.assertLazyAndIdentical(u.shift(x=-2), v.shift(x=-2))
+        assert v.data.chunks == v.shift(x=1).data.chunks
+
+    def test_roll(self):
+        u = self.eager_var
+        v = self.lazy_var
+        self.assertLazyAndIdentical(u.roll(x=2), v.roll(x=2))
+        # assert v.data.chunks == v.roll(x=1).data.chunks  # TODO: fails
+
+    def test_unary_op(self):
+        u = self.eager_var
+        v = self.lazy_var
+        self.assertLazyAndIdentical(-u, -v)
+        self.assertLazyAndIdentical(abs(u), abs(v))
+        # self.assertLazyAndIdentical(u.round(), v.round())  # TODO: fails, see https://github.com/pydata/xarray/pull/9326
+
+    def test_binary_op(self):
+        u = self.eager_var
+        v = self.lazy_var
+        self.assertLazyAndIdentical(2 * u, 2 * v)
+        self.assertLazyAndIdentical(u + u, v + v)
+        # self.assertLazyAndIdentical(u[0] + u, v[0] + v)  # TODO: fails
+
+    def test_binary_op_bitshift(self) -> None:
+        # bit shifts only work on ints so we need to generate
+        # new eager and lazy vars
+        rng = np.random.default_rng(0)
+        values = rng.integers(low=-10000, high=10000, size=(4, 6))
+        data = cubed.from_array(values, chunks=(2, 2))
+        u = Variable(("x", "y"), values)
+        v = Variable(("x", "y"), data)
+        self.assertLazyAndIdentical(u << 2, v << 2)
+        self.assertLazyAndIdentical(u << 5, v << 5)
+        self.assertLazyAndIdentical(u >> 2, v >> 2)
+        self.assertLazyAndIdentical(u >> 5, v >> 5)
+
+    def test_repr(self):
+        expected = dedent(
+            f"""\
+            <xarray.Variable (x: 4, y: 6)> Size: 192B
+            {self.lazy_var.data!r}"""
+        )
+        assert expected == repr(self.lazy_var)
+
+    @pytest.mark.xfail(reason="duck array ops problem in xarray")
+    def test_pickle(self):
+        # Test that pickling/unpickling does not convert the cubed
+        # backend to numpy
+        # Use dill since pickle can't handle cubed functions
+        a1 = self.lazy_var
+        a1.compute()
+        assert not a1._in_memory
+        a2 = dill.loads(dill.dumps(a1))
+        assert_identical(a1, a2)
+        assert not a1._in_memory
+        assert not a2._in_memory
+
+    def test_reduce(self):
+        u = self.eager_var
+        v = self.lazy_var
+        # TODO: remove skipna=False (https://github.com/cubed-dev/cubed/issues/153)
+        self.assertLazyAndAllClose(u.mean(skipna=False), v.mean(skipna=False))
+        # TODO: other reduce functions need work
+        # self.assertLazyAndAllClose(u.mean(), v.mean())
+        # self.assertLazyAndAllClose(u.std(), v.std())
+        # with raise_if_cubed_computes():
+        #     actual = v.argmax(dim="x")
+        # self.assertLazyAndAllClose(u.argmax(dim="x"), actual)
+        # with raise_if_cubed_computes():
+        #     actual = v.argmin(dim="x")
+        # self.assertLazyAndAllClose(u.argmin(dim="x"), actual)
+        # self.assertLazyAndAllClose((u > 1).any(), (v > 1).any())
+        # self.assertLazyAndAllClose((u < 1).all("x"), (v < 1).all("x"))
+        # with pytest.raises(NotImplementedError, match=r"only works along an axis"):
+        #     v.median()
+        # with pytest.raises(NotImplementedError, match=r"only works along an axis"):
+        #     v.median(v.dims)
+        # with raise_if_cubed_computes():
+        #     v.reduce(duck_array_ops.mean)
+
+    def test_missing_values(self):
+        values = np.array([0, 1, np.nan, 3])
+        data = cubed.from_array(values, chunks=(2,))
+
+        eager_var = Variable("x", values)
+        lazy_var = Variable("x", data)
+        self.assertLazyAndIdentical(eager_var, lazy_var.fillna(lazy_var))
+        self.assertLazyAndIdentical(Variable("x", range(4)), lazy_var.fillna(2))
+        # self.assertLazyAndIdentical(eager_var.count(), lazy_var.count())  # TODO: doesn't use array API
+
+    def test_concat(self):
+        u = self.eager_var
+        v = self.lazy_var
+        self.assertLazyAndIdentical(u, Variable.concat([v[:2], v[2:]], "x"))
+        self.assertLazyAndIdentical(u[:2], Variable.concat([v[0], v[1]], "x"))
+        # TODO: following fail
+        # self.assertLazyAndIdentical(u[:2], Variable.concat([u[0], v[1]], "x"))
+        # self.assertLazyAndIdentical(u[:2], Variable.concat([v[0], u[1]], "x"))
+        # self.assertLazyAndIdentical(
+        #     u[:3], Variable.concat([v[[0, 2]], v[[1]]], "x", positions=[[0, 2], [1]])
+        # )
+
+    def test_missing_methods(self):
+        v = self.lazy_var
+        with pytest.raises(AttributeError):
+            v.argsort()
+        with pytest.raises(AttributeError):
+            v[0].item()
+
+    @pytest.mark.xfail(reason="np ufuncs don't delegate to cubed")
+    def test_univariate_ufunc(self):
+        u = self.eager_var
+        v = self.lazy_var
+        self.assertLazyAndAllClose(np.sin(u), np.sin(v))
+
+    @pytest.mark.xfail(reason="np ufuncs don't delegate to cubed")
+    def test_bivariate_ufunc(self):
+        u = self.eager_var
+        v = self.lazy_var
+        self.assertLazyAndAllClose(np.maximum(u, 0), np.maximum(v, 0))
+        self.assertLazyAndAllClose(np.maximum(u, 0), np.maximum(0, v))
+
+    @pytest.mark.skip(reason="can't call cubed.compute on anything except cubed arrays")
+    def test_compute(self):
+        u = self.eager_var
+        v = self.lazy_var
+
+        # assert dask.is_dask_collection(v)
+        (v2,) = cubed.compute(v + 1)
+        # assert not dask.is_dask_collection(v2)
+
+        assert ((u + 1).data == v2.data).all()
+
+
+class TestDataArrayAndDataset(CubedTestCase):
+    def assertLazyAndIdentical(self, expected, actual):
+        self.assertLazyAnd(expected, actual, assert_identical)
+
+    def assertLazyAndAllClose(self, expected, actual):
+        self.assertLazyAnd(expected, actual, assert_allclose)
+
+    def assertLazyAndEqual(self, expected, actual):
+        self.assertLazyAnd(expected, actual, assert_equal)
+
+    @pytest.fixture(autouse=True)
+    def setUp(self):
+        self.values = np.random.randn(4, 6)
+        self.data = cubed.from_array(self.values, chunks=(2, 2))
+        self.eager_array = DataArray(
+            self.values, coords={"x": range(4)}, dims=("x", "y"), name="foo"
+        )
+        self.lazy_array = DataArray(
+            self.data, coords={"x": range(4)}, dims=("x", "y"), name="foo"
+        )
+
+    @pytest.mark.xfail(
+        reason="cubed rechunk handles chunks={} incorrectly, see https://github.com/cubed-dev/cubed/pull/546"
+    )
+    def test_chunk(self) -> None:
+        for chunks, expected in [
+            ({}, ((2, 2), (2, 2, 2))),
+            (3, ((3, 1), (3, 3))),
+            ({"x": 3, "y": 3}, ((3, 1), (3, 3))),
+            ({"x": 3}, ((3, 1), (2, 2, 2))),
+            ({"x": (3, 1)}, ((3, 1), (2, 2, 2))),
+            ({"x": "16B"}, ((1, 1, 1, 1), (2, 2, 2))),
+            ("16B", ((1, 1, 1, 1), (1,) * 6)),
+            ("16MB", ((4,), (6,))),
+        ]:
+            # Test DataArray
+            rechunked = self.lazy_array.chunk(chunks, chunked_array_type="cubed")
+            assert rechunked.chunks == expected
+            self.assertLazyAndIdentical(self.eager_array, rechunked)
+
+            expected_chunksizes = {
+                dim: chunks for dim, chunks in zip(self.lazy_array.dims, expected)
+            }
+            assert rechunked.chunksizes == expected_chunksizes
+
+            # Test Dataset
+            lazy_dataset = self.lazy_array.to_dataset()
+            eager_dataset = self.eager_array.to_dataset()
+            expected_chunksizes = {
+                dim: chunks for dim, chunks in zip(lazy_dataset.dims, expected)
+            }
+            rechunked = lazy_dataset.chunk(chunks, chunked_array_type="cubed")
+
+            # Dataset.chunks has a different return type to DataArray.chunks - see issue #5843
+            assert rechunked.chunks == expected_chunksizes
+            self.assertLazyAndIdentical(eager_dataset, rechunked)
+
+            assert rechunked.chunksizes == expected_chunksizes
+
+    @pytest.mark.xfail(reason="cubed rechunk bug")
+    def test_rechunk(self):
+        chunked = self.eager_array.chunk({"x": 2}).chunk(
+            {"y": 2}, chunked_array_type="cubed"
+        )
+        assert chunked.chunks == ((2,) * 2, (2,) * 3)
+        self.assertLazyAndIdentical(self.lazy_array, chunked)
+
+    def test_new_chunk(self):
+        chunked = self.eager_array.chunk(chunked_array_type="cubed")
+        assert chunked.data.name.startswith("array-")
+
+    def test_lazy_dataset(self):
+        lazy_ds = Dataset({"foo": (("x", "y"), self.data)})
+        assert isinstance(lazy_ds.foo.variable.data, cubed.Array)
+
+    def test_lazy_array(self):
+        u = self.eager_array
+        v = self.lazy_array
+
+        self.assertLazyAndAllClose(u, v)
+        self.assertLazyAndAllClose(-u, -v)
+        self.assertLazyAndAllClose(u.T, v.T)
+        # self.assertLazyAndAllClose(u.mean(), v.mean())  # TODO: isn't lazy
+        self.assertLazyAndAllClose(1 + u, 1 + v)
+
+        actual = xr.concat([v[:2], v[2:]], "x")
+        self.assertLazyAndAllClose(u, actual)
+
+    @pytest.mark.skip(reason="can't call cubed.compute on anything except cubed arrays")
+    def test_compute(self):
+        u = self.eager_array
+        v = self.lazy_array
+
+        # assert dask.is_dask_collection(v)
+        (v2,) = cubed.compute(v + 1)
+        # assert not dask.is_dask_collection(v2)
+
+        assert ((u + 1).data == v2.data).all()
+
+    @pytest.mark.xfail(reason="isn't lazy")
+    def test_groupby(self):
+        u = self.eager_array
+        v = self.lazy_array
+
+        expected = u.groupby("x").mean(...)
+        with raise_if_cubed_computes():
+            actual = v.groupby("x").mean(...)
+        self.assertLazyAndAllClose(expected, actual)
+
+    @pytest.mark.xfail(reason="isn't lazy")
+    def test_rolling(self):
+        u = self.eager_array
+        v = self.lazy_array
+
+        expected = u.rolling(x=2).mean()
+        with raise_if_cubed_computes():
+            actual = v.rolling(x=2).mean()
+        self.assertLazyAndAllClose(expected, actual)
+
+    @pytest.mark.xfail(reason="failure in cubed")
+    @pytest.mark.parametrize("func", ["first", "last"])
+    def test_groupby_first_last(self, func):
+        method = operator.methodcaller(func)
+        u = self.eager_array
+        v = self.lazy_array
+
+        for coords in [u.coords, v.coords]:
+            coords["ab"] = ("x", ["a", "a", "b", "b"])
+        expected = method(u.groupby("ab"))
+
+        with raise_if_cubed_computes():
+            actual = method(v.groupby("ab"))
+        self.assertLazyAndAllClose(expected, actual)
+
+        with raise_if_cubed_computes():
+            actual = method(v.groupby("ab"))
+        self.assertLazyAndAllClose(expected, actual)
+
+    @pytest.mark.xfail(reason="isn't lazy")
+    def test_reindex(self):
+        u = self.eager_array.assign_coords(y=range(6))
+        v = self.lazy_array.assign_coords(y=range(6))
+
+        for kwargs in [
+            {"x": [2, 3, 4]},
+            {"x": [1, 100, 2, 101, 3]},
+            {"x": [2.5, 3, 3.5], "y": [2, 2.5, 3]},
+        ]:
+            expected = u.reindex(**kwargs)
+            actual = v.reindex(**kwargs)
+            self.assertLazyAndAllClose(expected, actual)
+
+    def test_to_dataset_roundtrip(self):
+        u = self.eager_array
+        v = self.lazy_array
+
+        expected = u.assign_coords(x=u["x"])
+        self.assertLazyAndEqual(expected, v.to_dataset("x").to_dataarray("x"))
+
+    def test_merge(self):
+        def duplicate_and_merge(array):
+            return xr.merge([array, array.rename("bar")]).to_dataarray()
+
+        expected = duplicate_and_merge(self.eager_array)
+        actual = duplicate_and_merge(self.lazy_array)
+        self.assertLazyAndEqual(expected, actual)
+
+    @pytest.mark.xfail(reason="np ufuncs don't delegate to cubed")
+    def test_ufuncs(self):
+        u = self.eager_array
+        v = self.lazy_array
+        self.assertLazyAndAllClose(np.sin(u), np.sin(v))
+
+    @pytest.mark.xfail(reason="failure in cubed")
+    def test_where_dispatching(self):
+        a = np.arange(10)
+        b = a > 3
+        x = cubed.from_array(a, 5)
+        y = cubed.from_array(b, 5)
+        expected = DataArray(a).where(b)
+        self.assertLazyAndEqual(expected, DataArray(a).where(y))
+        self.assertLazyAndEqual(expected, DataArray(x).where(b))
+        self.assertLazyAndEqual(expected, DataArray(x).where(y))
+
+    def test_duplicate_dims(self):
+        data = np.random.normal(size=(4, 4))
+        arr = DataArray(data, dims=("x", "x"))
+        chunked_array = arr.chunk({"x": 2})
+        assert chunked_array.chunks == ((2, 2), (2, 2))
+        assert chunked_array.chunksizes == {"x": (2, 2)}
+
+    @pytest.mark.xfail(reason="duck array ops problem in xarray")
+    def test_stack(self):
+        data = cubed.random.random(size=(2, 3, 4), chunks=(1, 3, 4))
+        arr = DataArray(data, dims=("w", "x", "y"))
+        stacked = arr.stack(z=("x", "y"))
+        z = pd.MultiIndex.from_product([np.arange(3), np.arange(4)], names=["x", "y"])
+        expected = DataArray(cubed.reshape(data, (2, -1)), {"z": z}, dims=["w", "z"])
+        assert stacked.data.chunks == expected.data.chunks
+        self.assertLazyAndEqual(expected, stacked)
+
+    @pytest.mark.xfail(reason="relies on np.einsum which is not in cubed")
+    def test_dot(self):
+        eager = self.eager_array.dot(self.eager_array[0])
+        lazy = self.lazy_array.dot(self.lazy_array[0])
+        self.assertLazyAndAllClose(eager, lazy)
+
+    def test_dataarray_repr(self):
+        data = cubed.asarray([1], chunks=1)
+        data.name = "array-0"  # change name to something fixed for the repr
+        nonindex_coord = cubed.asarray([1], chunks=1)
+        a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)})
+        expected = dedent(
+            f"""\
+            <xarray.DataArray 'array-0' (x: 1)> Size: 8B
+            {data!r}
+            Coordinates:
+                y        (x) int64 8B cubed.Array<chunksize=(1,)>
+            Dimensions without coordinates: x"""
+        )
+        assert expected == repr(a)
+
+    def test_dataset_repr(self):
+        data = cubed.asarray([1], chunks=1)
+        nonindex_coord = cubed.asarray([1], chunks=1)
+        ds = Dataset(data_vars={"a": ("x", data)}, coords={"y": ("x", nonindex_coord)})
+        expected = dedent(
+            """\
+            <xarray.Dataset> Size: 16B
+            Dimensions:  (x: 1)
+            Coordinates:
+                y        (x) int64 8B cubed.Array<chunksize=(1,)>
+            Dimensions without coordinates: x
+            Data variables:
+                a        (x) int64 8B cubed.Array<chunksize=(1,)>"""
+        )
+        assert expected == repr(ds)
+
+    def test_dataarray_pickle(self):
+        # Test that pickling/unpickling converts the cubed backend
+        # to numpy in neither the data variable nor the non-index coords
+        data = cubed.asarray([1], chunks=1)
+        nonindex_coord = cubed.asarray([1], chunks=1)
+        a1 = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)})
+        a1.compute()
+        assert not a1._in_memory
+        assert not a1.coords["y"]._in_memory
+        a2 = dill.loads(dill.dumps(a1))
+        assert_identical(a1, a2)
+        assert not a1._in_memory
+        assert not a2._in_memory
+        assert not a1.coords["y"]._in_memory
+        assert not a2.coords["y"]._in_memory
+
+    def test_dataset_pickle(self):
+        # Test that pickling/unpickling converts the cubed backend
+        # to numpy in neither the data variables nor the non-index coords
+        data = cubed.asarray([1], chunks=1)
+        nonindex_coord = cubed.asarray([1], chunks=1)
+        ds1 = Dataset(data_vars={"a": ("x", data)}, coords={"y": ("x", nonindex_coord)})
+        ds1.compute()
+        assert not ds1["a"]._in_memory
+        assert not ds1["y"]._in_memory
+        ds2 = dill.loads(dill.dumps(ds1))
+        assert_identical(ds1, ds2)
+        assert not ds1["a"]._in_memory
+        assert not ds2["a"]._in_memory
+        assert not ds1["y"]._in_memory
+        assert not ds2["y"]._in_memory
+
+    def test_dataarray_getattr(self):
+        # ipython/jupyter does a long list of getattr() calls to when trying to
+        # represent an object.
+        # Make sure we're not accidentally computing cubed variables.
+        data = cubed.asarray([1], chunks=1)
+        nonindex_coord = cubed.asarray([1], chunks=1)
+        a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)})
+        with raise_if_cubed_computes():
+            with suppress(AttributeError):
+                getattr(a, "NOTEXIST")
+
+    def test_dataset_getattr(self):
+        # Test that pickling/unpickling converts the cubed backend
+        # to numpy in neither the data variables nor the non-index coords
+        data = cubed.asarray([1], chunks=1)
+        nonindex_coord = cubed.asarray([1], chunks=1)
+        ds = Dataset(data_vars={"a": ("x", data)}, coords={"y": ("x", nonindex_coord)})
+        with raise_if_cubed_computes():
+            with suppress(AttributeError):
+                getattr(ds, "NOTEXIST")
+
+    def test_values(self):
+        # Test that invoking the values property does not convert the cubed
+        # backend to numpy
+        a = DataArray([1, 2]).chunk()
+        assert not a._in_memory
+        assert a.values.tolist() == [1, 2]
+        assert not a._in_memory
+
+    def test_from_cubed_variable(self):
+        # Test array creation from Variable with cubed backend.
+        # This is used e.g. in broadcast()
+        a = DataArray(self.lazy_array.variable, coords={"x": range(4)}, name="foo")
+        self.assertLazyAndIdentical(self.lazy_array, a)
+
+
+@pytest.mark.parametrize("method", ["load", "compute"])
+def test_cubed_kwargs_variable(method):
+    chunked_array = cubed.from_array(np.arange(3), chunks=(2,))
+    x = Variable("y", chunked_array)
+    # args should be passed on to cubed.compute() (via CubedManager.compute())
+    with mock.patch.object(
+        cubed, "compute", return_value=(np.arange(3),)
+    ) as mock_compute:
+        getattr(x, method)(foo="bar")
+    mock_compute.assert_called_with(chunked_array, foo="bar")
+
+
+@pytest.mark.parametrize("method", ["load", "compute"])
+def test_cubed_kwargs_dataarray(method):
+    data = cubed.from_array(np.arange(3), chunks=(2,))
+    x = DataArray(data)
+    if method in ["load", "compute"]:
+        cubed_func = "cubed.compute"
+    # args should be passed on to "cubed_func"
+    with mock.patch(cubed_func) as mock_func:
+        getattr(x, method)(foo="bar")
+    mock_func.assert_called_with(data, foo="bar")
+
+
+@pytest.mark.parametrize("method", ["load", "compute"])
+def test_cubed_kwargs_dataset(method):
+    data = cubed.from_array(np.arange(3), chunks=(2,))
+    x = Dataset({"x": (("y"), data)})
+    if method in ["load", "compute"]:
+        cubed_func = "cubed.compute"
+    # args should be passed on to "cubed_func"
+    with mock.patch(cubed_func) as mock_func:
+        getattr(x, method)(foo="bar")
+    mock_func.assert_called_with(data, foo="bar")
+
+
+def test_basic_compute():
+    ds = Dataset({"foo": ("x", range(5)), "bar": ("x", range(5))}).chunk({"x": 2})
+    ds.compute()
+    ds.foo.compute()
+    ds.foo.variable.compute()
+
+
+def make_da():
+    da = xr.DataArray(
+        np.ones((10, 20)),
+        dims=["x", "y"],
+        coords={"x": np.arange(10), "y": np.arange(100, 120)},
+        name="a",
+    ).chunk({"x": 4, "y": 5})
+    da.x.attrs["long_name"] = "x"
+    da.attrs["test"] = "test"
+    da.coords["c2"] = 0.5
+    da.coords["ndcoord"] = da.x * 2
+    da.coords["cxy"] = (da.x * da.y).chunk({"x": 4, "y": 5})
+
+    return da
+
+
+def make_ds():
+    map_ds = xr.Dataset()
+    map_ds["a"] = make_da()
+    map_ds["b"] = map_ds.a + 50
+    map_ds["c"] = map_ds.x + 20
+    map_ds = map_ds.chunk({"x": 4, "y": 5})
+    map_ds["d"] = ("z", [1, 1, 1, 1])
+    map_ds["z"] = [0, 1, 2, 3]
+    map_ds["e"] = map_ds.x + map_ds.y
+    map_ds.coords["c1"] = 0.5
+    map_ds.coords["cx"] = ("x", np.arange(len(map_ds.x)))
+    map_ds.coords["cx"].attrs["test2"] = "test2"
+    map_ds.attrs["test"] = "test"
+    map_ds.coords["xx"] = map_ds["a"] * map_ds.y
+
+    map_ds.x.attrs["long_name"] = "x"
+    map_ds.y.attrs["long_name"] = "y"
+
+    return map_ds
+
+
+# fixtures cannot be used in parametrize statements
+# instead use this workaround
+# https://docs.pytest.org/en/latest/deprecations.html#calling-fixtures-directly
+@pytest.fixture
+def map_da():
+    return make_da()
+
+
+@pytest.fixture
+def map_ds():
+    return make_ds()
+
+
+def test_unify_chunks(map_ds):
+    ds_copy = map_ds.copy()
+    ds_copy["cxy"] = ds_copy.cxy.chunk({"y": 10})
+
+    with pytest.raises(ValueError, match=r"inconsistent chunks"):
+        ds_copy.chunks
+
+    expected_chunks = {"x": (4, 4, 2), "y": (5, 5, 5, 5)}
+    with raise_if_cubed_computes():
+        actual_chunks = ds_copy.unify_chunks().chunks
+    assert actual_chunks == expected_chunks
+    assert_identical(map_ds, ds_copy.unify_chunks())
+
+    out_a, out_b = xr.unify_chunks(ds_copy.cxy, ds_copy.drop_vars("cxy"))
+    assert out_a.chunks == ((4, 4, 2), (5, 5, 5, 5))
+    assert out_b.chunks == expected_chunks
+
+    # TODO: following fails
+    # # Test unordered dims
+    # da = ds_copy["cxy"]
+    # out_a, out_b = xr.unify_chunks(da.chunk({"x": -1}), da.T.chunk({"y": -1}))
+    # assert out_a.chunks == ((4, 4, 2), (5, 5, 5, 5))
+    # assert out_b.chunks == ((5, 5, 5, 5), (4, 4, 2))
+
+    # # Test mismatch
+    # with pytest.raises(ValueError, match=r"Dimension 'x' size mismatch: 10 != 2"):
+    #     xr.unify_chunks(da, da.isel(x=slice(2)))
+
+
+@pytest.mark.parametrize("obj", [make_ds(), make_da()])
+@pytest.mark.parametrize(
+    "transform", [lambda x: x.compute(), lambda x: x.unify_chunks()]
+)
+def test_unify_chunks_shallow_copy(obj, transform):
+    obj = transform(obj)
+    unified = obj.unify_chunks()
+    assert_identical(obj, unified) and obj is not obj.unify_chunks()
+
+
+@pytest.mark.parametrize("obj", [make_da()])
+def test_auto_chunk_da(obj):
+    actual = obj.chunk("auto").data
+    expected = obj.data.rechunk("auto")
+    np.testing.assert_array_equal(actual, expected)
+    assert actual.chunks == expected.chunks
diff --git a/pyproject.toml b/pyproject.toml
index f3a82cb..9557751 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,11 +30,13 @@ dependencies = [
 
 [project.optional-dependencies]
 test = [
+    "dill",
     "pre-commit",
     "ruff",
     "pytest-mypy",
     "pytest-cov",
-    "pytest"]
+    "pytest",
+]
 
 
 [project.urls]