Skip to content

Commit

Permalink
Merge pull request #57 from TomNicholas/chunk_tests
Browse files Browse the repository at this point in the history
Test chunking (including Hypothesis tests)
  • Loading branch information
TomNicholas authored Jun 22, 2021
2 parents 5c12e17 + 6fc4161 commit 3144ebd
Show file tree
Hide file tree
Showing 6 changed files with 260 additions and 0 deletions.
1 change: 1 addition & 0 deletions ci/environment-3.7.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dependencies:
- dask
- numpy=1.17
- pytest
- hypothesis
- pip
- pip:
- codecov
Expand Down
1 change: 1 addition & 0 deletions ci/environment-3.8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dependencies:
- dask
- numpy=1.18
- pytest
- hypothesis
- pip
- pip:
- codecov
Expand Down
1 change: 1 addition & 0 deletions ci/environment-3.9.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dependencies:
- dask
- numpy
- pytest
- hypothesis
- pip
- pip:
- codecov
Expand Down
25 changes: 25 additions & 0 deletions xhistogram/test/fixtures.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import uuid
import dask
import dask.array as dsa
import numpy as np
import xarray as xr


def empty_dask_array(shape, dtype=float, chunks=None):
Expand All @@ -12,3 +15,25 @@ def raise_if_computed():
a = a.rechunk(chunks)

return a


def example_dataarray(shape=(5, 20)):
data = np.random.randn(*shape)
dims = [f"dim_{i}" for i in range(len(shape))]
da = xr.DataArray(data, dims=dims, name="T")
return da


def example_dataset(n_dim=2, n_vars=2):
"""Random dataset with every variable having the same shape"""

shape = tuple(range(8, 8 + n_dim))
dims = [f"dim_{i}" for i in range(len(shape))]
var_names = [uuid.uuid4().hex for _ in range(n_vars)]
ds = xr.Dataset()
for i in range(n_vars):
name = var_names[i]
data = np.random.randn(*shape)
da = xr.DataArray(data, dims=dims, name=name)
ds[name] = da
return ds
146 changes: 146 additions & 0 deletions xhistogram/test/test_chunking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import numpy as np
import pytest

from .fixtures import example_dataarray
from ..xarray import histogram


@pytest.mark.parametrize("weights", [False, True])
@pytest.mark.parametrize("chunksize", [1, 2, 3, 10])
@pytest.mark.parametrize("shape", [(10,), (10, 4)])
def test_chunked_weights(chunksize, shape, weights):

data_a = example_dataarray(shape).chunk((chunksize,))

if weights:
weights = example_dataarray(shape).chunk((chunksize,))
weights_arr = weights.values
else:
weights = weights_arr = None

nbins_a = 6
bins_a = np.linspace(-4, 4, nbins_a + 1)

h = histogram(data_a, bins=[bins_a], weights=weights)

assert h.shape == (nbins_a,)

hist, _ = np.histogram(data_a.values, bins=bins_a, weights=weights_arr)

np.testing.assert_allclose(hist, h.values)


@pytest.mark.parametrize("xchunksize", [1, 2, 3, 10])
@pytest.mark.parametrize("ychunksize", [1, 2, 3, 12])
class TestFixedSize2DChunks:
def test_2d_chunks(self, xchunksize, ychunksize):

data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))

nbins_a = 8
bins_a = np.linspace(-4, 4, nbins_a + 1)

h = histogram(data_a, bins=[bins_a])

assert h.shape == (nbins_a,)

hist, _ = np.histogram(data_a.values, bins=bins_a)

np.testing.assert_allclose(hist, h.values)

@pytest.mark.parametrize("reduce_dim", ["dim_0", "dim_1"])
def test_2d_chunks_broadcast_dim(
self,
xchunksize,
ychunksize,
reduce_dim,
):
data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))
dims = list(data_a.dims)
broadcast_dim = [d for d in dims if d != reduce_dim][0]

nbins_a = 8
bins_a = np.linspace(-4, 4, nbins_a + 1)

h = histogram(data_a, bins=[bins_a], dim=(reduce_dim,))

assert h.shape == (data_a.sizes[broadcast_dim], nbins_a)

def _np_hist(*args, **kwargs):
h, _ = np.histogram(*args, **kwargs)
return h

hist = np.apply_along_axis(
_np_hist, axis=dims.index(reduce_dim), arr=data_a.values, bins=bins_a
)

if reduce_dim == "dim_0":
h = h.transpose()
np.testing.assert_allclose(hist, h.values)

def test_2d_chunks_2d_hist(self, xchunksize, ychunksize):

data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))
data_b = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))

nbins_a = 8
nbins_b = 9
bins_a = np.linspace(-4, 4, nbins_a + 1)
bins_b = np.linspace(-4, 4, nbins_b + 1)

h = histogram(data_a, data_b, bins=[bins_a, bins_b])

assert h.shape == (nbins_a, nbins_b)

hist, _, _ = np.histogram2d(
data_a.values.ravel(),
data_b.values.ravel(),
bins=[bins_a, bins_b],
)

np.testing.assert_allclose(hist, h.values)


@pytest.mark.parametrize("xchunksize", [1, 2, 3, 10])
@pytest.mark.parametrize("ychunksize", [1, 2, 3, 12])
class TestUnalignedChunks:
def test_unaligned_data_chunks(self, xchunksize, ychunksize):
data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))
data_b = example_dataarray(shape=(10, 12)).chunk(
(xchunksize + 1, ychunksize + 1)
)

nbins_a = 8
nbins_b = 9
bins_a = np.linspace(-4, 4, nbins_a + 1)
bins_b = np.linspace(-4, 4, nbins_b + 1)

h = histogram(data_a, data_b, bins=[bins_a, bins_b])

assert h.shape == (nbins_a, nbins_b)

hist, _, _ = np.histogram2d(
data_a.values.ravel(),
data_b.values.ravel(),
bins=[bins_a, bins_b],
)

np.testing.assert_allclose(hist, h.values)

def test_unaligned_weights_chunks(self, xchunksize, ychunksize):

data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))
weights = example_dataarray(shape=(10, 12)).chunk(
(xchunksize + 1, ychunksize + 1)
)

nbins_a = 8
bins_a = np.linspace(-4, 4, nbins_a + 1)

h = histogram(data_a, bins=[bins_a], weights=weights)

assert h.shape == (nbins_a,)

hist, _ = np.histogram(data_a.values, bins=bins_a, weights=weights.values)

np.testing.assert_allclose(hist, h.values)
86 changes: 86 additions & 0 deletions xhistogram/test/test_chunking_hypotheses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import numpy as np
import pytest

from .fixtures import example_dataarray, example_dataset
from ..xarray import histogram

pytest.importorskip("hypothesis")

import hypothesis.strategies as st # noqa
from hypothesis import given # noqa


@st.composite
def chunk_shapes(draw, n_dim=3, max_arr_len=10):
"""Generate different chunking patterns for an N-D array of data."""
chunks = []
for n in range(n_dim):
shape = draw(st.integers(min_value=1, max_value=max_arr_len))
chunks.append(shape)
return tuple(chunks)


class TestChunkingHypotheses:
@given(chunk_shapes(n_dim=1, max_arr_len=20))
def test_all_chunking_patterns_1d(self, chunks):

data = example_dataarray(shape=(20,)).chunk(chunks)

nbins_a = 8
bins = np.linspace(-4, 4, nbins_a + 1)

h = histogram(data, bins=[bins])

assert h.shape == (nbins_a,)

hist, _ = np.histogram(
data.values,
bins=bins,
)

np.testing.assert_allclose(hist, h)

# TODO mark as slow?
@given(chunk_shapes(n_dim=2, max_arr_len=8))
def test_all_chunking_patterns_2d(self, chunks):

data_a = example_dataarray(shape=(5, 20)).chunk(chunks)
data_b = example_dataarray(shape=(5, 20)).chunk(chunks)

nbins_a = 8
nbins_b = 9
bins_a = np.linspace(-4, 4, nbins_a + 1)
bins_b = np.linspace(-4, 4, nbins_b + 1)

h = histogram(data_a, data_b, bins=[bins_a, bins_b])

assert h.shape == (nbins_a, nbins_b)

hist, _, _ = np.histogram2d(
data_a.values.ravel(),
data_b.values.ravel(),
bins=[bins_a, bins_b],
)

np.testing.assert_allclose(hist, h.values)

# TODO mark as slow?
@pytest.mark.parametrize("n_vars", [1, 2, 3, 4])
@given(chunk_shapes(n_dim=2, max_arr_len=7))
def test_all_chunking_patterns_dd_hist(self, n_vars, chunk_shapes):
ds = example_dataset(n_dim=2, n_vars=n_vars)
ds = ds.chunk({d: c for d, c in zip(ds.dims.keys(), chunk_shapes)})

n_bins = (7, 8, 9, 10)[:n_vars]
bins = [np.linspace(-4, 4, n + 1) for n in n_bins]

h = histogram(*[da for name, da in ds.data_vars.items()], bins=bins)

assert h.shape == n_bins

input_data = np.stack(
[da.values.ravel() for name, da in ds.data_vars.items()], axis=-1
)
hist, _ = np.histogramdd(input_data, bins=bins)

np.testing.assert_allclose(hist, h.values)

0 comments on commit 3144ebd

Please sign in to comment.