diff --git a/ci/environment-3.7.yml b/ci/environment-3.7.yml
index 355cfc8..376b2e4 100644
--- a/ci/environment-3.7.yml
+++ b/ci/environment-3.7.yml
@@ -7,6 +7,7 @@ dependencies:
   - dask
   - numpy=1.16
   - pytest
+  - hypothesis
   - pip
   - pip:
     - codecov
diff --git a/ci/environment-3.8.yml b/ci/environment-3.8.yml
index f09af4a..5c7d84b 100644
--- a/ci/environment-3.8.yml
+++ b/ci/environment-3.8.yml
@@ -7,6 +7,7 @@ dependencies:
   - dask
   - numpy=1.18
   - pytest
+  - hypothesis
   - pip
   - pip:
     - codecov
diff --git a/ci/environment-3.9.yml b/ci/environment-3.9.yml
index 92d153a..de46895 100644
--- a/ci/environment-3.9.yml
+++ b/ci/environment-3.9.yml
@@ -7,6 +7,7 @@ dependencies:
   - dask
   - numpy
   - pytest
+  - hypothesis
   - pip
   - pip:
     - codecov
diff --git a/xhistogram/test/fixtures.py b/xhistogram/test/fixtures.py
index 330d07b..fd1cf44 100644
--- a/xhistogram/test/fixtures.py
+++ b/xhistogram/test/fixtures.py
@@ -1,5 +1,8 @@
+import uuid
 import dask
 import dask.array as dsa
+import numpy as np
+import xarray as xr
 
 
 def empty_dask_array(shape, dtype=float, chunks=None):
@@ -12,3 +15,25 @@ def raise_if_computed():
         a = a.rechunk(chunks)
 
     return a
+
+
+def example_dataarray(shape=(5, 20)):
+    data = np.random.randn(*shape)
+    dims = [f"dim_{i}" for i in range(len(shape))]
+    da = xr.DataArray(data, dims=dims, name="T")
+    return da
+
+
+def example_dataset(n_dim=2, n_vars=2):
+    """Random dataset with every variable having the same shape"""
+
+    shape = tuple(range(8, 8 + n_dim))
+    dims = [f"dim_{i}" for i in range(len(shape))]
+    var_names = [uuid.uuid4().hex for _ in range(n_vars)]
+    ds = xr.Dataset()
+    for i in range(n_vars):
+        name = var_names[i]
+        data = np.random.randn(*shape)
+        da = xr.DataArray(data, dims=dims, name=name)
+        ds[name] = da
+    return ds
diff --git a/xhistogram/test/test_chunking.py b/xhistogram/test/test_chunking.py
new file mode 100644
index 0000000..796afaf
--- /dev/null
+++ b/xhistogram/test/test_chunking.py
@@ -0,0 +1,146 @@
+import numpy as np
+import pytest
+
+from .fixtures import example_dataarray
+from ..xarray import histogram
+
+
+@pytest.mark.parametrize("weights", [False, True])
+@pytest.mark.parametrize("chunksize", [1, 2, 3, 10])
+@pytest.mark.parametrize("shape", [(10,), (10, 4)])
+def test_chunked_weights(chunksize, shape, weights):
+
+    data_a = example_dataarray(shape).chunk((chunksize,))
+
+    if weights:
+        weights = example_dataarray(shape).chunk((chunksize,))
+        weights_arr = weights.values
+    else:
+        weights = weights_arr = None
+
+    nbins_a = 6
+    bins_a = np.linspace(-4, 4, nbins_a + 1)
+
+    h = histogram(data_a, bins=[bins_a], weights=weights)
+
+    assert h.shape == (nbins_a,)
+
+    hist, _ = np.histogram(data_a.values, bins=bins_a, weights=weights_arr)
+
+    np.testing.assert_allclose(hist, h.values)
+
+
+@pytest.mark.parametrize("xchunksize", [1, 2, 3, 10])
+@pytest.mark.parametrize("ychunksize", [1, 2, 3, 12])
+class TestFixedSize2DChunks:
+    def test_2d_chunks(self, xchunksize, ychunksize):
+
+        data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))
+
+        nbins_a = 8
+        bins_a = np.linspace(-4, 4, nbins_a + 1)
+
+        h = histogram(data_a, bins=[bins_a])
+
+        assert h.shape == (nbins_a,)
+
+        hist, _ = np.histogram(data_a.values, bins=bins_a)
+
+        np.testing.assert_allclose(hist, h.values)
+
+    @pytest.mark.parametrize("reduce_dim", ["dim_0", "dim_1"])
+    def test_2d_chunks_broadcast_dim(
+        self,
+        xchunksize,
+        ychunksize,
+        reduce_dim,
+    ):
+        data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))
+        dims = list(data_a.dims)
+        broadcast_dim = [d for d in dims if d != reduce_dim][0]
+
+        nbins_a = 8
+        bins_a = np.linspace(-4, 4, nbins_a + 1)
+
+        h = histogram(data_a, bins=[bins_a], dim=(reduce_dim,))
+
+        assert h.shape == (data_a.sizes[broadcast_dim], nbins_a)
+
+        def _np_hist(*args, **kwargs):
+            h, _ = np.histogram(*args, **kwargs)
+            return h
+
+        hist = np.apply_along_axis(
+            _np_hist, axis=dims.index(reduce_dim), arr=data_a.values, bins=bins_a
+        )
+
+        if reduce_dim == "dim_0":
+            h = h.transpose()
+        np.testing.assert_allclose(hist, h.values)
+
+    def test_2d_chunks_2d_hist(self, xchunksize, ychunksize):
+
+        data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))
+        data_b = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))
+
+        nbins_a = 8
+        nbins_b = 9
+        bins_a = np.linspace(-4, 4, nbins_a + 1)
+        bins_b = np.linspace(-4, 4, nbins_b + 1)
+
+        h = histogram(data_a, data_b, bins=[bins_a, bins_b])
+
+        assert h.shape == (nbins_a, nbins_b)
+
+        hist, _, _ = np.histogram2d(
+            data_a.values.ravel(),
+            data_b.values.ravel(),
+            bins=[bins_a, bins_b],
+        )
+
+        np.testing.assert_allclose(hist, h.values)
+
+
+@pytest.mark.parametrize("xchunksize", [1, 2, 3, 10])
+@pytest.mark.parametrize("ychunksize", [1, 2, 3, 12])
+class TestUnalignedChunks:
+    def test_unaligned_data_chunks(self, xchunksize, ychunksize):
+        data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))
+        data_b = example_dataarray(shape=(10, 12)).chunk(
+            (xchunksize + 1, ychunksize + 1)
+        )
+
+        nbins_a = 8
+        nbins_b = 9
+        bins_a = np.linspace(-4, 4, nbins_a + 1)
+        bins_b = np.linspace(-4, 4, nbins_b + 1)
+
+        h = histogram(data_a, data_b, bins=[bins_a, bins_b])
+
+        assert h.shape == (nbins_a, nbins_b)
+
+        hist, _, _ = np.histogram2d(
+            data_a.values.ravel(),
+            data_b.values.ravel(),
+            bins=[bins_a, bins_b],
+        )
+
+        np.testing.assert_allclose(hist, h.values)
+
+    def test_unaligned_weights_chunks(self, xchunksize, ychunksize):
+
+        data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))
+        weights = example_dataarray(shape=(10, 12)).chunk(
+            (xchunksize + 1, ychunksize + 1)
+        )
+
+        nbins_a = 8
+        bins_a = np.linspace(-4, 4, nbins_a + 1)
+
+        h = histogram(data_a, bins=[bins_a], weights=weights)
+
+        assert h.shape == (nbins_a,)
+
+        hist, _ = np.histogram(data_a.values, bins=bins_a, weights=weights.values)
+
+        np.testing.assert_allclose(hist, h.values)
diff --git a/xhistogram/test/test_chunking_hypotheses.py b/xhistogram/test/test_chunking_hypotheses.py
new file mode 100644
index 0000000..7feb333
--- /dev/null
+++ b/xhistogram/test/test_chunking_hypotheses.py
@@ -0,0 +1,86 @@
+import numpy as np
+import pytest
+
+from .fixtures import example_dataarray, example_dataset
+from ..xarray import histogram
+
+pytest.importorskip("hypothesis")
+
+import hypothesis.strategies as st  # noqa
+from hypothesis import given  # noqa
+
+
+@st.composite
+def chunk_shapes(draw, n_dim=3, max_arr_len=10):
+    """Generate different chunking patterns for an N-D array of data."""
+    chunks = []
+    for n in range(n_dim):
+        shape = draw(st.integers(min_value=1, max_value=max_arr_len))
+        chunks.append(shape)
+    return tuple(chunks)
+
+
+class TestChunkingHypotheses:
+    @given(chunk_shapes(n_dim=1, max_arr_len=20))
+    def test_all_chunking_patterns_1d(self, chunks):
+
+        data = example_dataarray(shape=(20,)).chunk(chunks)
+
+        nbins_a = 8
+        bins = np.linspace(-4, 4, nbins_a + 1)
+
+        h = histogram(data, bins=[bins])
+
+        assert h.shape == (nbins_a,)
+
+        hist, _ = np.histogram(
+            data.values,
+            bins=bins,
+        )
+
+        np.testing.assert_allclose(hist, h)
+
+    # TODO mark as slow?
+    @given(chunk_shapes(n_dim=2, max_arr_len=8))
+    def test_all_chunking_patterns_2d(self, chunks):
+
+        data_a = example_dataarray(shape=(5, 20)).chunk(chunks)
+        data_b = example_dataarray(shape=(5, 20)).chunk(chunks)
+
+        nbins_a = 8
+        nbins_b = 9
+        bins_a = np.linspace(-4, 4, nbins_a + 1)
+        bins_b = np.linspace(-4, 4, nbins_b + 1)
+
+        h = histogram(data_a, data_b, bins=[bins_a, bins_b])
+
+        assert h.shape == (nbins_a, nbins_b)
+
+        hist, _, _ = np.histogram2d(
+            data_a.values.ravel(),
+            data_b.values.ravel(),
+            bins=[bins_a, bins_b],
+        )
+
+        np.testing.assert_allclose(hist, h.values)
+
+    # TODO mark as slow?
+    @pytest.mark.parametrize("n_vars", [1, 2, 3, 4])
+    @given(chunk_shapes(n_dim=2, max_arr_len=7))
+    def test_all_chunking_patterns_dd_hist(self, n_vars, chunk_shapes):
+        ds = example_dataset(n_dim=2, n_vars=n_vars)
+        ds = ds.chunk({d: c for d, c in zip(ds.dims.keys(), chunk_shapes)})
+
+        n_bins = (7, 8, 9, 10)[:n_vars]
+        bins = [np.linspace(-4, 4, n + 1) for n in n_bins]
+
+        h = histogram(*[da for name, da in ds.data_vars.items()], bins=bins)
+
+        assert h.shape == n_bins
+
+        input_data = np.stack(
+            [da.values.ravel() for name, da in ds.data_vars.items()], axis=-1
+        )
+        hist, _ = np.histogramdd(input_data, bins=bins)
+
+        np.testing.assert_allclose(hist, h.values)