From 85d582474388d65276af37fddf2b951dedd56c87 Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Wed, 17 Mar 2021 21:38:01 +1100 Subject: [PATCH] Initial CSC/ CSR classes (#442) * Initial csc/ csr * CSR/CSC from scipy conversion * black * Remove tocsc, tocsr * Share __init__ between compressed 2d and GCXS * Added support for GCXS(spmatrix) * CSR/ CSC transpose * Add test for transpose failure * Add a few missing tests * Remove instance check in GCXS init --- sparse/_compressed/__init__.py | 2 +- sparse/_compressed/compressed.py | 102 +++++++++++++++++++++++- sparse/tests/test_compressed_2d.py | 124 +++++++++++++++++++++++++++++ 3 files changed, 226 insertions(+), 2 deletions(-) create mode 100644 sparse/tests/test_compressed_2d.py diff --git a/sparse/_compressed/__init__.py b/sparse/_compressed/__init__.py index 6d2b1853..76d0d803 100644 --- a/sparse/_compressed/__init__.py +++ b/sparse/_compressed/__init__.py @@ -1,2 +1,2 @@ -from .compressed import GCXS +from .compressed import GCXS, CSC, CSR from .common import stack, concatenate diff --git a/sparse/_compressed/compressed.py b/sparse/_compressed/compressed.py index fd24b30e..3d04876f 100644 --- a/sparse/_compressed/compressed.py +++ b/sparse/_compressed/compressed.py @@ -5,6 +5,8 @@ from functools import reduce from collections.abc import Iterable import scipy.sparse as ss +from scipy.sparse import compressed +from typing import Tuple from .._sparse_array import SparseArray, _reduce_super_ufunc from .._coo.common import linear_loc @@ -136,6 +138,9 @@ def __init__( idx_dtype=None, ): + if isinstance(arg, ss.spmatrix): + arg = self.from_scipy_sparse(arg) + if isinstance(arg, np.ndarray): (arg, shape, compressed_axes, fill_value) = _from_coo( COO(arg), compressed_axes @@ -146,6 +151,16 @@ def __init__( arg, compressed_axes, idx_dtype ) + elif isinstance(arg, GCXS): + if compressed_axes is not None and arg.compressed_axes != compressed_axes: + arg = arg.change_compressed_axes(self.compressed_axes) + (arg, shape, compressed_axes, fill_value) = ( + (arg.data, arg.indices, arg.indptr), + arg.shape, + arg.compressed_axes, + arg.fill_value, + ) + if shape is None: raise ValueError("missing `shape` argument") @@ -160,6 +175,7 @@ def __init__( raise ValueError("data must be a scalar or 1-dimensional.") self.shape = shape + self.compressed_axes = ( tuple(compressed_axes) if isinstance(compressed_axes, Iterable) else None ) @@ -440,7 +456,7 @@ def todense(self): def todok(self): - from ..dok import DOK + from .. import DOK return DOK.from_coo(self.tocoo()) # probably a temporary solution @@ -496,6 +512,10 @@ def asformat(self, format, compressed_axes=None): return self.tocoo() elif format == "dok": return self.todok() + elif format == "csr": + return CSR(self) + elif format == "csc": + return CSC(self) elif format == "gcxs": if compressed_axes is None: compressed_axes = self.compressed_axes @@ -817,3 +837,83 @@ def _prune(self): self.indptr = indptr else: self.indices = self.indices[mask] + + +class Compressed2d(GCXS): + def __init__(self, arg, shape=None, prune=False, fill_value=0): + if not hasattr(arg, "shape") and shape is None: + raise ValueError("missing `shape` argument") + if shape is not None and hasattr(arg, "shape"): + raise NotImplementedError("Cannot change shape in constructor") + nd = len(shape if shape is not None else arg.shape) + if nd != 2: + raise ValueError(f"{type(self).__name__} must be 2-d, passed {nd}-d shape.") + + super().__init__( + arg, + shape=shape, + compressed_axes=self.compressed_axes, + prune=prune, + fill_value=fill_value, + ) + + def __str__(self): + return "<{}: shape={}, dtype={}, nnz={}, fill_value={}>".format( + type(self).__name__, + self.shape, + self.dtype, + self.nnz, + self.fill_value, + ) + + __repr__ = __str__ + + @property + def ndim(self) -> int: + return 2 + + +class CSR(Compressed2d): + @classmethod + def from_scipy_sparse(cls, x): + x = x.asformat("csr", copy=False) + return cls((x.data, x.indices, x.indptr), shape=x.shape) + + @property + def compressed_axes(self) -> int: + return (0,) + + @compressed_axes.setter + def compressed_axes(self, val): + if val != self.compressed_axes: + raise ValueError() + + def transpose(self, axes: None = None, copy: bool = False) -> "CSC": + if axes is not None: + raise ValueError() + if copy: + self = self.copy() + return CSC((self.data, self.indices, self.indptr), self.shape[::-1]) + + +class CSC(Compressed2d): + @classmethod + def from_scipy_sparse(cls, x): + x = x.asformat("csc", copy=False) + return cls((x.data, x.indices, x.indptr), shape=x.shape) + + @property + def compressed_axes(self) -> int: + return (1,) + + @compressed_axes.setter + def compressed_axes(self, val): + if val != self.compressed_axes: + raise ValueError() + + def transpose(self, axes: None = None, copy: bool = False) -> CSR: + if axes is not None: + raise ValueError() + if copy: + self = self.copy() + return CSR((self.data, self.indices, self.indptr), self.shape[::-1]) diff --git a/sparse/tests/test_compressed_2d.py b/sparse/tests/test_compressed_2d.py new file mode 100644 index 00000000..d116dc16 --- /dev/null +++ b/sparse/tests/test_compressed_2d.py @@ -0,0 +1,124 @@ +import numpy as np +from numpy.core.numeric import indices +import pytest +import scipy.sparse +from scipy.sparse import data +from scipy.sparse.construct import random +import scipy.stats + +import sparse +from sparse import COO +from sparse._compressed.compressed import GCXS, CSR, CSC +from sparse._utils import assert_eq + + +@pytest.fixture(scope="module", params=[CSR, CSC]) +def cls(request): + return request.param + + +@pytest.fixture(scope="module", params=["f8", "f4", "i8", "i4"]) +def dtype(request): + return request.param + + +@pytest.fixture(scope="module") +def random_sparse(cls, dtype): + if np.issubdtype(dtype, np.integer): + + def data_rvs(n): + return np.random.randint(-1000, 1000, n) + + else: + data_rvs = None + return cls(sparse.random((20, 30), density=0.25, data_rvs=data_rvs).astype(dtype)) + + +@pytest.fixture(scope="module") +def random_sparse_small(cls, dtype): + if np.issubdtype(dtype, np.integer): + + def data_rvs(n): + return np.random.randint(-10, 10, n) + + else: + data_rvs = None + return cls( + sparse.random((20, 30, 40), density=0.25, data_rvs=data_rvs).astype(dtype) + ) + + +def test_repr(random_sparse): + cls = type(random_sparse).__name__ + + str_repr = repr(random_sparse) + assert cls in str_repr + + +def test_bad_constructor_input(cls): + with pytest.raises(ValueError, match=r".*shape.*"): + cls(arg="hello world") + + +@pytest.mark.parametrize("n", [0, 1, 3]) +def test_bad_nd_input(cls, n): + a = np.ones(shape=tuple(5 for _ in range(n))) + with pytest.raises(ValueError, match=f"{n}-d"): + cls(a) + + +@pytest.mark.parametrize("source_type", ["gcxs", "coo"]) +def test_from_sparse(cls, source_type): + gcxs = sparse.random((20, 30), density=0.25, format=source_type) + result = cls(gcxs) + + assert_eq(result, gcxs) + + +@pytest.mark.parametrize("scipy_type", ["coo", "csr", "csc", "lil"]) +@pytest.mark.parametrize("CLS", [CSR, CSC, GCXS]) +def test_from_scipy_sparse(scipy_type, CLS, dtype): + orig = scipy.sparse.random(20, 30, density=0.2, format=scipy_type, dtype=dtype) + ref = COO.from_scipy_sparse(orig) + result = CLS.from_scipy_sparse(orig) + + assert_eq(ref, result) + + result_via_init = CLS(orig) + + assert_eq(ref, result_via_init) + + +@pytest.mark.parametrize("cls_str", ["coo", "dok", "csr", "csc", "gcxs"]) +def test_to_sparse(cls_str, random_sparse): + result = random_sparse.asformat(cls_str) + + assert_eq(random_sparse, result) + + +@pytest.mark.parametrize("copy", [True, False]) +def test_transpose(random_sparse, copy): + from operator import is_, is_not + + t = random_sparse.transpose(copy=copy) + tt = t.transpose(copy=copy) + + # Check if a copy was made + if copy: + check = is_not + else: + check = is_ + + assert check(random_sparse.data, t.data) + assert check(random_sparse.indices, t.indices) + assert check(random_sparse.indptr, t.indptr) + + assert random_sparse.shape == t.shape[::-1] + + assert_eq(random_sparse, tt) + assert type(random_sparse) == type(tt) + + +def test_transpose_error(random_sparse): + with pytest.raises(ValueError): + random_sparse.transpose(axes=1)