jakirkham · jakirkham · Oct 7, 2017 · Oct 7, 2017 · Oct 7, 2017 · Oct 7, 2017
diff --git a/dask_distance/__init__.py b/dask_distance/__init__.py
@@ -160,12 +160,9 @@ def pdist(X, metric="euclidean", **kwargs):
 
     result = cdist(X, X, metric, **kwargs)
 
-    result = dask.array.triu(result, 1)
-
-    indices = _compat._indices(result.shape, chunks=result.chunks)
-    mask = (indices[1] > indices[0])
-
-    result = _compat._ravel(result)[_compat._ravel(mask)]
+    result = dask.array.concatenate([
+        result[i, i + 1:] for i in range(0, len(result) - 1)
+    ])
 
     return result
 

diff --git a/dask_distance/_compat.py b/dask_distance/_compat.py
@@ -1,8 +1,6 @@
 # -*- coding: utf-8 -*-
 
 
-import itertools
-
 import numpy
 
 import dask
@@ -28,77 +26,3 @@ def _asarray(a):
         a = dask.array.from_array(a, a.shape)
 
     return a
-
-
-def _indices(dimensions, dtype=int, chunks=None):
-    """
-    Implements NumPy's ``indices`` for Dask Arrays.
-    Generates a grid of indices covering the dimensions provided.
-    The final array has the shape ``(len(dimensions), *dimensions)``. The
-    chunks are used to specify the chunking for axis 1 up to
-    ``len(dimensions)``. The 0th axis always has chunks of length 1.
-
-    Parameters
-    ----------
-    dimensions : sequence of ints
-        The shape of the index grid.
-    dtype : dtype, optional
-        Type to use for the array. Default is ``int``.
-    chunks : sequence of ints
-        The number of samples on each block. Note that the last block will
-        have fewer samples if ``len(array) % chunks != 0``.
-
-    Returns
-    -------
-    grid : dask array
-
-    Notes
-    -----
-    Borrowed from my Dask Array contribution.
-    """
-    if chunks is None:
-        raise ValueError("Must supply a chunks= keyword argument")
-
-    dimensions = tuple(dimensions)
-    dtype = numpy.dtype(dtype)
-    chunks = tuple(chunks)
-
-    if len(dimensions) != len(chunks):
-        raise ValueError("Need one more chunk than dimensions.")
-
-    grid = []
-    if numpy.prod(dimensions):
-        for i in range(len(dimensions)):
-            s = len(dimensions) * [None]
-            s[i] = slice(None)
-            s = tuple(s)
-
-            r = dask.array.arange(dimensions[i], dtype=dtype, chunks=chunks[i])
-            r = r[s]
-
-            for j in itertools.chain(range(i), range(i + 1, len(dimensions))):
-                r = r.repeat(dimensions[j], axis=j)
-
-            grid.append(r)
-
-    if grid:
-        grid = dask.array.stack(grid)
-    else:
-        grid = dask.array.empty(
-            (len(dimensions),) + dimensions, dtype=dtype, chunks=(1,) + chunks
-        )
-
-    return grid
-
-
-def _ravel(a):
-    a = _asarray(a)
-
-    r = a
-    try:
-        r = r.ravel()
-    except ValueError:
-        # Fallback for Dask pre-0.14.1.
-        r = r.rechunk(r.chunks[:1] + r.shape[1:]).ravel()
-
-    return r
diff --git a/tests/test__compat.py b/tests/test__compat.py
@@ -32,85 +32,3 @@ def test_asarray(x):
         x = np.asarray(x)
 
     dau.assert_eq(d, x)
-
-
-def test_indices_no_chunks():
-    with pytest.raises(ValueError):
-        dask_distance._compat._indices((1,))
-
-
-def test_indices_wrong_chunks():
-    with pytest.raises(ValueError):
-        dask_distance._compat._indices((1,), chunks=tuple())
-
-
-@pytest.mark.parametrize(
-    "dimensions, dtype, chunks",
-    [
-        (tuple(), int, tuple()),
-        (tuple(), float, tuple()),
-        ((0,), float, (1,)),
-        ((0, 1, 2), float, (1, 1, 2)),
-    ]
-)
-def test_empty_indicies(dimensions, dtype, chunks):
-    darr = dask_distance._compat._indices(dimensions, dtype, chunks=chunks)
-    nparr = np.indices(dimensions, dtype)
-
-    assert darr.shape == nparr.shape
-    assert darr.dtype == nparr.dtype
-
-    try:
-        dau.assert_eq(darr, nparr)
-    except IndexError:
-        if len(dimensions) and old_dask:
-            pytest.skip(
-                "Dask pre-0.14.0 is unable to compute this empty array."
-            )
-        else:
-            raise
-
-
-def test_indicies():
-    darr = dask_distance._compat._indices((1,), chunks=(1,))
-    nparr = np.indices((1,))
-    dau.assert_eq(darr, nparr)
-
-    darr = dask_distance._compat._indices((1,), float, chunks=(1,))
-    nparr = np.indices((1,), float)
-    dau.assert_eq(darr, nparr)
-
-    darr = dask_distance._compat._indices((2, 1), chunks=(2, 1))
-    nparr = np.indices((2, 1))
-    dau.assert_eq(darr, nparr)
-
-    darr = dask_distance._compat._indices((2, 3), chunks=(1, 2))
-    nparr = np.indices((2, 3))
-    dau.assert_eq(darr, nparr)
-
-
-@pytest.mark.parametrize(
-    "shape, dtype, chunks",
-    [
-        ((10, 11, 12), int, (3, 5, 5)),
-        ((10, 11, 12), float, (3, 5, 5)),
-        ((10, 11, 12), float, (3, 2, 2)),
-        ((20, 17, 31), float, (6, 5, 10)),
-    ]
-)
-@pytest.mark.parametrize(
-    "seed",
-    [
-        153,
-    ]
-)
-def test_ravel(shape, dtype, chunks, seed):
-    np.random.random(seed)
-
-    a = np.random.randint(0, 10, shape).astype(dtype)
-    d = da.from_array(a, chunks=chunks)
-
-    r_a = np.ravel(a)
-    r_d = dask_distance._compat._ravel(d)
-
-    dau.assert_eq(r_d, r_a)
diff --git a/tests/test_dask_distance.py b/tests/test_dask_distance.py
@@ -214,6 +214,7 @@ def test_2d_pdist(metric, kw, seed, u_shape, u_chunks):
     a_r = spdist.pdist(a_u, metric, **kw)
     d_r = dask_distance.pdist(d_u, metric, **kw)
 
+    assert d_r.shape == a_r.shape
     assert np.allclose(np.array(d_r)[()], a_r, equal_nan=True)
 
 
@@ -362,4 +363,5 @@ def test_2d_bool_pdist(metric, seed, u_shape, u_chunks):
     a_r = spdist.pdist(a_u, metric)
     d_r = dask_distance.pdist(d_u, metric)
 
+    assert d_r.shape == a_r.shape
     assert np.allclose(np.array(d_r)[()], a_r, equal_nan=True)