Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial CSC/ CSR classes #442

Merged
merged 11 commits into from
Mar 17, 2021
2 changes: 1 addition & 1 deletion sparse/_compressed/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .compressed import GCXS
from .compressed import GCXS, CSC, CSR
from .common import stack, concatenate
102 changes: 101 additions & 1 deletion sparse/_compressed/compressed.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from functools import reduce
from collections.abc import Iterable
import scipy.sparse as ss
from scipy.sparse import compressed
from typing import Tuple

from .._sparse_array import SparseArray, _reduce_super_ufunc
from .._coo.common import linear_loc
Expand Down Expand Up @@ -136,6 +138,9 @@ def __init__(
idx_dtype=None,
):

if isinstance(arg, ss.spmatrix):
arg = self.from_scipy_sparse(arg)

ivirshup marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(arg, np.ndarray):
(arg, shape, compressed_axes, fill_value) = _from_coo(
COO(arg), compressed_axes
Expand All @@ -146,6 +151,16 @@ def __init__(
arg, compressed_axes, idx_dtype
)

elif isinstance(arg, GCXS):
if compressed_axes is not None and arg.compressed_axes != compressed_axes:
arg = arg.change_compressed_axes(self.compressed_axes)
(arg, shape, compressed_axes, fill_value) = (
(arg.data, arg.indices, arg.indptr),
arg.shape,
arg.compressed_axes,
arg.fill_value,
)

if shape is None:
raise ValueError("missing `shape` argument")

Expand All @@ -160,6 +175,7 @@ def __init__(
raise ValueError("data must be a scalar or 1-dimensional.")

self.shape = shape

self.compressed_axes = (
tuple(compressed_axes) if isinstance(compressed_axes, Iterable) else None
)
Expand Down Expand Up @@ -440,7 +456,7 @@ def todense(self):

def todok(self):

from ..dok import DOK
from .. import DOK

return DOK.from_coo(self.tocoo()) # probably a temporary solution

Expand Down Expand Up @@ -496,6 +512,10 @@ def asformat(self, format, compressed_axes=None):
return self.tocoo()
elif format == "dok":
return self.todok()
elif format == "csr":
return CSR(self)
elif format == "csc":
return CSC(self)
elif format == "gcxs":
if compressed_axes is None:
compressed_axes = self.compressed_axes
Expand Down Expand Up @@ -817,3 +837,83 @@ def _prune(self):
self.indptr = indptr
else:
self.indices = self.indices[mask]


class Compressed2d(GCXS):
def __init__(self, arg, shape=None, prune=False, fill_value=0):
if not hasattr(arg, "shape") and shape is None:
raise ValueError("missing `shape` argument")
if shape is not None and hasattr(arg, "shape"):
raise NotImplementedError("Cannot change shape in constructor")
nd = len(shape if shape is not None else arg.shape)
if nd != 2:
raise ValueError(f"{type(self).__name__} must be 2-d, passed {nd}-d shape.")

super().__init__(
arg,
shape=shape,
compressed_axes=self.compressed_axes,
prune=prune,
fill_value=fill_value,
)

def __str__(self):
return "<{}: shape={}, dtype={}, nnz={}, fill_value={}>".format(
type(self).__name__,
self.shape,
self.dtype,
self.nnz,
self.fill_value,
)

__repr__ = __str__

@property
def ndim(self) -> int:
return 2


class CSR(Compressed2d):
@classmethod
def from_scipy_sparse(cls, x):
x = x.asformat("csr", copy=False)
return cls((x.data, x.indices, x.indptr), shape=x.shape)

@property
def compressed_axes(self) -> int:
return (0,)

@compressed_axes.setter
def compressed_axes(self, val):
if val != self.compressed_axes:
raise ValueError()

def transpose(self, axes: None = None, copy: bool = False) -> "CSC":
if axes is not None:
raise ValueError()
if copy:
self = self.copy()
return CSC((self.data, self.indices, self.indptr), self.shape[::-1])


class CSC(Compressed2d):
@classmethod
def from_scipy_sparse(cls, x):
x = x.asformat("csc", copy=False)
return cls((x.data, x.indices, x.indptr), shape=x.shape)

@property
def compressed_axes(self) -> int:
return (1,)

@compressed_axes.setter
def compressed_axes(self, val):
if val != self.compressed_axes:
raise ValueError()

def transpose(self, axes: None = None, copy: bool = False) -> CSR:
if axes is not None:
raise ValueError()
if copy:
self = self.copy()
return CSR((self.data, self.indices, self.indptr), self.shape[::-1])
124 changes: 124 additions & 0 deletions sparse/tests/test_compressed_2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import numpy as np
from numpy.core.numeric import indices
import pytest
import scipy.sparse
from scipy.sparse import data
from scipy.sparse.construct import random
import scipy.stats

import sparse
from sparse import COO
from sparse._compressed.compressed import GCXS, CSR, CSC
from sparse._utils import assert_eq


@pytest.fixture(scope="module", params=[CSR, CSC])
def cls(request):
return request.param


@pytest.fixture(scope="module", params=["f8", "f4", "i8", "i4"])
def dtype(request):
return request.param


@pytest.fixture(scope="module")
def random_sparse(cls, dtype):
if np.issubdtype(dtype, np.integer):

def data_rvs(n):
return np.random.randint(-1000, 1000, n)

else:
data_rvs = None
return cls(sparse.random((20, 30), density=0.25, data_rvs=data_rvs).astype(dtype))


@pytest.fixture(scope="module")
def random_sparse_small(cls, dtype):
if np.issubdtype(dtype, np.integer):

def data_rvs(n):
return np.random.randint(-10, 10, n)

else:
data_rvs = None
return cls(
sparse.random((20, 30, 40), density=0.25, data_rvs=data_rvs).astype(dtype)
)


def test_repr(random_sparse):
cls = type(random_sparse).__name__

str_repr = repr(random_sparse)
assert cls in str_repr


def test_bad_constructor_input(cls):
with pytest.raises(ValueError, match=r".*shape.*"):
cls(arg="hello world")


@pytest.mark.parametrize("n", [0, 1, 3])
def test_bad_nd_input(cls, n):
a = np.ones(shape=tuple(5 for _ in range(n)))
with pytest.raises(ValueError, match=f"{n}-d"):
cls(a)


@pytest.mark.parametrize("source_type", ["gcxs", "coo"])
def test_from_sparse(cls, source_type):
gcxs = sparse.random((20, 30), density=0.25, format=source_type)
result = cls(gcxs)

assert_eq(result, gcxs)


@pytest.mark.parametrize("scipy_type", ["coo", "csr", "csc", "lil"])
@pytest.mark.parametrize("CLS", [CSR, CSC, GCXS])
def test_from_scipy_sparse(scipy_type, CLS, dtype):
orig = scipy.sparse.random(20, 30, density=0.2, format=scipy_type, dtype=dtype)
ref = COO.from_scipy_sparse(orig)
result = CLS.from_scipy_sparse(orig)

assert_eq(ref, result)

result_via_init = CLS(orig)

assert_eq(ref, result_via_init)


@pytest.mark.parametrize("cls_str", ["coo", "dok", "csr", "csc", "gcxs"])
def test_to_sparse(cls_str, random_sparse):
result = random_sparse.asformat(cls_str)

assert_eq(random_sparse, result)


@pytest.mark.parametrize("copy", [True, False])
def test_transpose(random_sparse, copy):
from operator import is_, is_not

t = random_sparse.transpose(copy=copy)
tt = t.transpose(copy=copy)

# Check if a copy was made
if copy:
check = is_not
else:
check = is_

assert check(random_sparse.data, t.data)
assert check(random_sparse.indices, t.indices)
assert check(random_sparse.indptr, t.indptr)

assert random_sparse.shape == t.shape[::-1]

assert_eq(random_sparse, tt)
assert type(random_sparse) == type(tt)


def test_transpose_error(random_sparse):
with pytest.raises(ValueError):
random_sparse.transpose(axes=1)