Skip to content

Commit

Permalink
Initial CSC/ CSR classes (#442)
Browse files Browse the repository at this point in the history
* Initial csc/ csr

* CSR/CSC from scipy conversion

* black

* Remove tocsc, tocsr

* Share __init__ between compressed 2d and GCXS

* Added support for GCXS(spmatrix)

* CSR/ CSC transpose

* Add test for transpose failure

* Add a few missing tests

* Remove instance check in GCXS init
  • Loading branch information
ivirshup authored Mar 17, 2021
1 parent d0a4074 commit 85d5824
Show file tree
Hide file tree
Showing 3 changed files with 226 additions and 2 deletions.
2 changes: 1 addition & 1 deletion sparse/_compressed/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .compressed import GCXS
from .compressed import GCXS, CSC, CSR
from .common import stack, concatenate
102 changes: 101 additions & 1 deletion sparse/_compressed/compressed.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from functools import reduce
from collections.abc import Iterable
import scipy.sparse as ss
from scipy.sparse import compressed
from typing import Tuple

from .._sparse_array import SparseArray, _reduce_super_ufunc
from .._coo.common import linear_loc
Expand Down Expand Up @@ -136,6 +138,9 @@ def __init__(
idx_dtype=None,
):

if isinstance(arg, ss.spmatrix):
arg = self.from_scipy_sparse(arg)

if isinstance(arg, np.ndarray):
(arg, shape, compressed_axes, fill_value) = _from_coo(
COO(arg), compressed_axes
Expand All @@ -146,6 +151,16 @@ def __init__(
arg, compressed_axes, idx_dtype
)

elif isinstance(arg, GCXS):
if compressed_axes is not None and arg.compressed_axes != compressed_axes:
arg = arg.change_compressed_axes(self.compressed_axes)
(arg, shape, compressed_axes, fill_value) = (
(arg.data, arg.indices, arg.indptr),
arg.shape,
arg.compressed_axes,
arg.fill_value,
)

if shape is None:
raise ValueError("missing `shape` argument")

Expand All @@ -160,6 +175,7 @@ def __init__(
raise ValueError("data must be a scalar or 1-dimensional.")

self.shape = shape

self.compressed_axes = (
tuple(compressed_axes) if isinstance(compressed_axes, Iterable) else None
)
Expand Down Expand Up @@ -440,7 +456,7 @@ def todense(self):

def todok(self):

from ..dok import DOK
from .. import DOK

return DOK.from_coo(self.tocoo()) # probably a temporary solution

Expand Down Expand Up @@ -496,6 +512,10 @@ def asformat(self, format, compressed_axes=None):
return self.tocoo()
elif format == "dok":
return self.todok()
elif format == "csr":
return CSR(self)
elif format == "csc":
return CSC(self)
elif format == "gcxs":
if compressed_axes is None:
compressed_axes = self.compressed_axes
Expand Down Expand Up @@ -817,3 +837,83 @@ def _prune(self):
self.indptr = indptr
else:
self.indices = self.indices[mask]


class Compressed2d(GCXS):
def __init__(self, arg, shape=None, prune=False, fill_value=0):
if not hasattr(arg, "shape") and shape is None:
raise ValueError("missing `shape` argument")
if shape is not None and hasattr(arg, "shape"):
raise NotImplementedError("Cannot change shape in constructor")
nd = len(shape if shape is not None else arg.shape)
if nd != 2:
raise ValueError(f"{type(self).__name__} must be 2-d, passed {nd}-d shape.")

super().__init__(
arg,
shape=shape,
compressed_axes=self.compressed_axes,
prune=prune,
fill_value=fill_value,
)

def __str__(self):
return "<{}: shape={}, dtype={}, nnz={}, fill_value={}>".format(
type(self).__name__,
self.shape,
self.dtype,
self.nnz,
self.fill_value,
)

__repr__ = __str__

@property
def ndim(self) -> int:
return 2


class CSR(Compressed2d):
@classmethod
def from_scipy_sparse(cls, x):
x = x.asformat("csr", copy=False)
return cls((x.data, x.indices, x.indptr), shape=x.shape)

@property
def compressed_axes(self) -> int:
return (0,)

@compressed_axes.setter
def compressed_axes(self, val):
if val != self.compressed_axes:
raise ValueError()

def transpose(self, axes: None = None, copy: bool = False) -> "CSC":
if axes is not None:
raise ValueError()
if copy:
self = self.copy()
return CSC((self.data, self.indices, self.indptr), self.shape[::-1])


class CSC(Compressed2d):
@classmethod
def from_scipy_sparse(cls, x):
x = x.asformat("csc", copy=False)
return cls((x.data, x.indices, x.indptr), shape=x.shape)

@property
def compressed_axes(self) -> int:
return (1,)

@compressed_axes.setter
def compressed_axes(self, val):
if val != self.compressed_axes:
raise ValueError()

def transpose(self, axes: None = None, copy: bool = False) -> CSR:
if axes is not None:
raise ValueError()
if copy:
self = self.copy()
return CSR((self.data, self.indices, self.indptr), self.shape[::-1])
124 changes: 124 additions & 0 deletions sparse/tests/test_compressed_2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import numpy as np
from numpy.core.numeric import indices
import pytest
import scipy.sparse
from scipy.sparse import data
from scipy.sparse.construct import random
import scipy.stats

import sparse
from sparse import COO
from sparse._compressed.compressed import GCXS, CSR, CSC
from sparse._utils import assert_eq


@pytest.fixture(scope="module", params=[CSR, CSC])
def cls(request):
return request.param


@pytest.fixture(scope="module", params=["f8", "f4", "i8", "i4"])
def dtype(request):
return request.param


@pytest.fixture(scope="module")
def random_sparse(cls, dtype):
if np.issubdtype(dtype, np.integer):

def data_rvs(n):
return np.random.randint(-1000, 1000, n)

else:
data_rvs = None
return cls(sparse.random((20, 30), density=0.25, data_rvs=data_rvs).astype(dtype))


@pytest.fixture(scope="module")
def random_sparse_small(cls, dtype):
if np.issubdtype(dtype, np.integer):

def data_rvs(n):
return np.random.randint(-10, 10, n)

else:
data_rvs = None
return cls(
sparse.random((20, 30, 40), density=0.25, data_rvs=data_rvs).astype(dtype)
)


def test_repr(random_sparse):
cls = type(random_sparse).__name__

str_repr = repr(random_sparse)
assert cls in str_repr


def test_bad_constructor_input(cls):
with pytest.raises(ValueError, match=r".*shape.*"):
cls(arg="hello world")


@pytest.mark.parametrize("n", [0, 1, 3])
def test_bad_nd_input(cls, n):
a = np.ones(shape=tuple(5 for _ in range(n)))
with pytest.raises(ValueError, match=f"{n}-d"):
cls(a)


@pytest.mark.parametrize("source_type", ["gcxs", "coo"])
def test_from_sparse(cls, source_type):
gcxs = sparse.random((20, 30), density=0.25, format=source_type)
result = cls(gcxs)

assert_eq(result, gcxs)


@pytest.mark.parametrize("scipy_type", ["coo", "csr", "csc", "lil"])
@pytest.mark.parametrize("CLS", [CSR, CSC, GCXS])
def test_from_scipy_sparse(scipy_type, CLS, dtype):
orig = scipy.sparse.random(20, 30, density=0.2, format=scipy_type, dtype=dtype)
ref = COO.from_scipy_sparse(orig)
result = CLS.from_scipy_sparse(orig)

assert_eq(ref, result)

result_via_init = CLS(orig)

assert_eq(ref, result_via_init)


@pytest.mark.parametrize("cls_str", ["coo", "dok", "csr", "csc", "gcxs"])
def test_to_sparse(cls_str, random_sparse):
result = random_sparse.asformat(cls_str)

assert_eq(random_sparse, result)


@pytest.mark.parametrize("copy", [True, False])
def test_transpose(random_sparse, copy):
from operator import is_, is_not

t = random_sparse.transpose(copy=copy)
tt = t.transpose(copy=copy)

# Check if a copy was made
if copy:
check = is_not
else:
check = is_

assert check(random_sparse.data, t.data)
assert check(random_sparse.indices, t.indices)
assert check(random_sparse.indptr, t.indptr)

assert random_sparse.shape == t.shape[::-1]

assert_eq(random_sparse, tt)
assert type(random_sparse) == type(tt)


def test_transpose_error(random_sparse):
with pytest.raises(ValueError):
random_sparse.transpose(axes=1)

0 comments on commit 85d5824

Please sign in to comment.