Skip to content

Commit

Permalink
fix: load SparseDatasets into memory (#898)
Browse files Browse the repository at this point in the history
  • Loading branch information
Bento007 authored May 14, 2024
1 parent 05e9741 commit 717c9a9
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
7 changes: 6 additions & 1 deletion cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import numpy as np
import pandas as pd
import scipy
from anndata._core.sparse_dataset import SparseDataset
from cellxgene_ontology_guide.ontology_parser import OntologyParser
from pandas.core.computation.ops import UndefinedVariableError
from scipy import sparse
Expand Down Expand Up @@ -1222,7 +1223,9 @@ def _validate_raw_data(self, x: Union[np.ndarray, sparse.spmatrix], is_sparse_ma
self._raw_layer_exists = False
self.errors.append("All non-zero values in raw matrix must be positive integers of type numpy.float32.")

def _validate_raw_data_with_in_tissue_0(self, x: Union[np.ndarray, sparse.spmatrix], is_sparse_matrix: bool):
def _validate_raw_data_with_in_tissue_0(
self, x: Union[np.ndarray, sparse.spmatrix, SparseDataset], is_sparse_matrix: bool
):
"""
Special case validation checks for Visium data with is_single = True and in_tissue column in obs where in_tissue
has at least one value 0. Static matrix size of 4992 rows, so chunking is not required.
Expand All @@ -1232,6 +1235,8 @@ def _validate_raw_data_with_in_tissue_0(self, x: Union[np.ndarray, sparse.spmatr
"""
has_tissue_0_non_zero_row = False
has_tissue_1_zero_row = False
if isinstance(x, SparseDataset):
x = x.to_memory()
if is_sparse_matrix:
nonzero_row_indices, _ = x.nonzero()
else: # must be dense matrix
Expand Down
11 changes: 11 additions & 0 deletions cellxgene_schema_cli/tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,17 @@ def test__validate_spatial_visium_ok(self):
validator.validate_adata()
assert not validator.errors

def test__validate_from_file(self):
"""Testing compatibility with SparseDatset types in Anndata"""
validator: Validator = Validator()
validator._set_schema_def()
with tempfile.TemporaryDirectory() as temp_dir:
file_path = os.path.join(temp_dir, "slide_seqv2.h5ad")
adata_slide_seqv2.write_h5ad(file_path)
# Confirm spatial is valid.
validator.validate_adata(file_path)
assert not validator.errors

def test__validate_spatial_visium_dense_matrix_ok(self):
validator: Validator = Validator()
validator._set_schema_def()
Expand Down

0 comments on commit 717c9a9

Please sign in to comment.