Skip to content

Commit

Permalink
chore: bump dependencies (#982)
Browse files Browse the repository at this point in the history
Co-authored-by: Bento007 <[email protected]>
Co-authored-by: Trent Smith <[email protected]>
  • Loading branch information
3 people authored Aug 15, 2024
1 parent 6a04066 commit 95d1cdf
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 64 deletions.
8 changes: 5 additions & 3 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import scipy
from anndata._core.sparse_dataset import SparseDataset
from cellxgene_ontology_guide.ontology_parser import OntologyParser
from pandas.core.computation.ops import UndefinedVariableError
from pandas.errors import UndefinedVariableError
from scipy import sparse

from . import gencode, schema
Expand Down Expand Up @@ -1236,9 +1236,11 @@ def _validate_raw_data_with_in_tissue_0(
else: # must be dense matrix
nonzero_row_indices = np.where(np.any(x != 0, axis=1))[0]
for i in range(x.shape[0]):
if not has_tissue_0_non_zero_row and i in nonzero_row_indices and self.adata.obs["in_tissue"][i] == 0:
if not has_tissue_0_non_zero_row and i in nonzero_row_indices and self.adata.obs["in_tissue"].iloc[i] == 0:
has_tissue_0_non_zero_row = True
elif not has_tissue_1_zero_row and i not in nonzero_row_indices and self.adata.obs["in_tissue"][i] == 1:
elif (
not has_tissue_1_zero_row and i not in nonzero_row_indices and self.adata.obs["in_tissue"].iloc[i] == 1
):
has_tissue_1_zero_row = True
if has_tissue_0_non_zero_row and has_tissue_1_zero_row:
# exit early and report
Expand Down
4 changes: 2 additions & 2 deletions cellxgene_schema_cli/cellxgene_schema/write_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def _get_labels(
column: str,
column_definition: dict,
label_type: dict,
) -> pd.Categorical:
) -> pd.Series:
"""
Retrieves a new column (pandas categorical) with labels based on the IDs in 'column' and the logic in the
'column_definition'
Expand Down Expand Up @@ -287,7 +287,7 @@ def _get_labels(
else:
raise TypeError(f"'{label_type}' is not supported in 'add-labels' functionality")

new_column = original_column.copy().replace(mapping_dict).astype("category")
new_column = original_column.copy().map(mapping_dict).astype("category")

return new_column

Expand Down
23 changes: 11 additions & 12 deletions cellxgene_schema_cli/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
anndata==0.8.0
cellxgene-ontology-guide==1.0.0
click==8.1.7
Cython==3.0.10
numpy==1.26.4
pandas==1.4.4
PyYaml==6.0.1
wheel==0.43.0
scipy<1.13.0 # scipy 1.13.0 is not compatible with anndata <=0.10.6, revisit before next release
semver==3.0.2
xxhash==3.4.1
matplotlib==3.9.0
anndata>=0.8,<0.12
cellxgene-ontology-guide==1.0.0 # update before a schema migration
click<9
Cython<4
numpy<2
pandas>2,<3
PyYAML<7
scipy<2
semver<4
xxhash<4
matplotlib<4
26 changes: 14 additions & 12 deletions cellxgene_schema_cli/tests/fixtures/examples_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@
],
)

good_obs.loc[:, ["donor_id"]] = good_obs.astype("category")
good_obs.loc[:, ["suspension_type"]] = good_obs.astype("category")
good_obs.loc[:, ["tissue_type"]] = good_obs.astype("category")
good_obs["donor_id"] = good_obs["donor_id"].astype("category")
good_obs["suspension_type"] = good_obs["suspension_type"].astype("category")
good_obs["tissue_type"] = good_obs["tissue_type"].astype("category")

# Expected obs, this is what the obs above should look like after adding the necessary columns with the validator,
# these columns are defined in the schema
Expand Down Expand Up @@ -183,9 +183,9 @@
],
)

good_obs_visium.loc[:, ["donor_id"]] = good_obs_visium.astype("category")
good_obs_visium.loc[:, ["suspension_type"]] = good_obs_visium.astype("category")
good_obs_visium.loc[:, ["tissue_type"]] = good_obs_visium.astype("category")
good_obs_visium["donor_id"] = good_obs_visium["donor_id"].astype("category")
good_obs_visium["suspension_type"] = good_obs_visium["suspension_type"].astype("category")
good_obs_visium["tissue_type"] = good_obs_visium["tissue_type"].astype("category")

# Valid spatial obs per schema
good_obs_slide_seqv2 = pd.DataFrame(
Expand Down Expand Up @@ -236,9 +236,9 @@
],
)

good_obs_slide_seqv2.loc[:, ["donor_id"]] = good_obs_slide_seqv2.astype("category")
good_obs_slide_seqv2.loc[:, ["suspension_type"]] = good_obs_slide_seqv2.astype("category")
good_obs_slide_seqv2.loc[:, ["tissue_type"]] = good_obs.astype("category")
good_obs_slide_seqv2["donor_id"] = good_obs_slide_seqv2["donor_id"].astype("category")
good_obs_slide_seqv2["suspension_type"] = good_obs_slide_seqv2["suspension_type"].astype("category")
good_obs_slide_seqv2["tissue_type"] = good_obs_slide_seqv2["tissue_type"].astype("category")

good_obs_visium_is_single_false = pd.DataFrame(
[
Expand Down Expand Up @@ -288,9 +288,11 @@
],
)

good_obs_visium_is_single_false.loc[:, ["donor_id"]] = good_obs_visium_is_single_false.astype("category")
good_obs_visium_is_single_false.loc[:, ["suspension_type"]] = good_obs_visium_is_single_false.astype("category")
good_obs_visium_is_single_false.loc[:, ["tissue_type"]] = good_obs_visium_is_single_false.astype("category")
good_obs_visium_is_single_false["donor_id"] = good_obs_visium_is_single_false["donor_id"].astype("category")
good_obs_visium_is_single_false["suspension_type"] = good_obs_visium_is_single_false["suspension_type"].astype(
"category"
)
good_obs_visium_is_single_false["tissue_type"] = good_obs_visium_is_single_false["tissue_type"].astype("category")

# ---
# 2. Creating individual var components: valid object and valid object and with labels
Expand Down
14 changes: 8 additions & 6 deletions cellxgene_schema_cli/tests/test_schema_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def save_and_read_adata(adata: anndata.AnnData) -> anndata.AnnData:
"""
with tempfile.NamedTemporaryFile(suffix=".h5ad") as f:
adata.write_h5ad(f.name)
return anndata.read(f.name)
return anndata.read_h5ad(f.name)


class TestValidAnndata:
Expand Down Expand Up @@ -367,7 +367,7 @@ def test_raw_values__non_rna(self, validator_with_adata):
validator.errors = []
obs["assay_ontology_term_id"] = "EFO:0010891"
obs["suspension_type"] = "nucleus"
obs.loc[:, ["suspension_type"]] = obs.astype("category")
obs["suspension_type"] = obs["suspension_type"].astype("category")
validator.validate_adata()
assert validator.errors == []

Expand Down Expand Up @@ -2433,19 +2433,21 @@ def test_obs_added_tissue_type_label__unknown(self, validator_with_adata):
obs.at["Y", "tissue_type"] = "cell culture" # Already set in example data, just setting explicitly here
obs.at["Y", "tissue_ontology_term_id"] = "unknown" # Testing this term case
validator_with_adata.validate_adata() # Validate
AnnDataLabelAppender(validator_with_adata)._add_labels() # Annotate
labeler = AnnDataLabelAppender(validator_with_adata)
labeler._add_labels() # Annotate

assert obs.at["Y", "tissue"] == "unknown"
assert labeler.adata.obs.at["Y", "tissue"] == "unknown"

def test_obs_added_cell_type_label__unknown(self, validator_with_adata):
obs = validator_with_adata.adata.obs

# Arrange
obs.at["Y", "cell_type_ontology_term_id"] = "unknown" # Testing this term case
validator_with_adata.validate_adata() # Validate
AnnDataLabelAppender(validator_with_adata)._add_labels() # Annotate
labeler = AnnDataLabelAppender(validator_with_adata)
labeler._add_labels() # Annotate

assert obs.at["Y", "cell_type"] == "unknown"
assert labeler.adata.obs.at["Y", "cell_type"] == "unknown"

def test_remove_unused_categories(self, label_writer, adata_with_labels):
modified_donor_id = label_writer.adata.obs["donor_id"].cat.add_categories("donor_3")
Expand Down
4 changes: 2 additions & 2 deletions cellxgene_schema_cli/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,9 @@ def test_remap_deprecated_features__without_raw(adata_without_raw, remapped_feat
def test_replace_ontology_term__with_replacement(adata_with_raw, deprecated_term_map_with_replacement_match):
replace_ontology_term(adata_with_raw.obs, "assay", deprecated_term_map_with_replacement_match)

expected = ["EFO:0009918", "EFO:0000001"]
expected = ["EFO:0000001", "EFO:0009918"]
actual = adata_with_raw.obs["assay_ontology_term_id"].dtype.categories
assert all(a == b for a, b in zip(actual, expected))
assert sorted(actual) == expected


def test_replace_ontology_term__no_replacement(adata_with_raw, deprecated_term_map_no_replacement_match):
Expand Down
29 changes: 3 additions & 26 deletions cellxgene_schema_cli/tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1020,16 +1020,6 @@ def test_determine_seurat_convertibility(self):
assert len(self.validator.warnings) == 0
assert self.validator.is_seurat_convertible

# h5ad where raw matrix variable count != length of raw var variables array is not Seurat-convertible
matrix = sparse.csr_matrix(np.zeros([good_obs.shape[0], good_var.shape[0]], dtype=np.float32))
raw = anndata.AnnData(X=matrix, var=good_var)
raw.var.drop("ENSSASG00005000004", axis=0, inplace=True)
self.validation_helper(matrix, raw)
self.validator._validate_seurat_convertibility()
assert len(self.validator.errors) == 1
assert not self.validator.is_seurat_convertible
assert not self.validator.is_valid

# Visium datasets are not Seurat-convertible
self.validation_helper(sparse_matrix_with_zero)
self.validator.adata.obs = adata_visium.obs.copy()
Expand Down Expand Up @@ -1066,19 +1056,6 @@ def test_fail_categorical_mixed_types(self, tmp_path, valid_adata):
assert "in dataframe 'obs' contains 2 categorical types. Only one type is allowed." in validator.errors[0]
self._fail_write_h5ad(tmp_path, valid_adata)

def test_fail_categorical_bool(self, tmp_path, valid_adata):
# Arrange
categories = [True, False]
self._add_catagorical_obs(valid_adata, categories)
validator = self._create_validator(valid_adata)

# Act
validator._validate_dataframe("obs")

# Assert
assert "in dataframe 'obs' contains illegal_categorical_types={<class 'bool'>}." in validator.errors[0]
self._fail_write_h5ad(tmp_path, valid_adata)

def _add_catagorical_obs(self, adata, categories):
t = pd.CategoricalDtype(categories=categories)
adata.obs["test_cat"] = pd.Series(data=categories, index=["X", "Y"], dtype=t)
Expand Down Expand Up @@ -1131,11 +1108,11 @@ def create_validator(data: Union[ndarray, spmatrix], matrix_format: str) -> Vali
"data, matrix_format, expected_result",
[
# Test case with integer values in a dense matrix
(np.array([[1, 2, 3], [4, 5, 6]], dtype=int), "dense", True),
(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32), "dense", True),
# Test case with float values in a dense matrix
(np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]]), "dense", False),
# Test case with integer values in a sparse matrix (CSR format)
(sparse.csr_matrix([[1, 0, 3], [0, 5, 0]], dtype=int), "csr", True),
(sparse.csr_matrix([[1, 0, 3], [0, 5, 0]], dtype=np.float32), "csr", True),
# Test case with float values in a sparse matrix (CSC format)
(sparse.csc_matrix([[1.1, 0, 3.3], [0, 5.5, 0]]), "csc", False),
# Test case with mixed integer and float values in a dense matrix
Expand All @@ -1148,7 +1125,7 @@ def test_has_valid_raw(self, data, matrix_format, expected_result):

@mock.patch("cellxgene_schema.validate.get_matrix_format", return_value="unknown")
def test_has_valid_raw_with_unknown_format(self, mock_get_matrix_format):
data = np.array([[1, 2, 3], [4, 5, 6]], dtype=int)
data = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
validator = self.create_validator(data, "unknown")
with pytest.raises(AssertionError):
validator._has_valid_raw()
2 changes: 1 addition & 1 deletion scripts/schema_bump_dry_run_genes/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
requests==2.32.0
tiledb==0.21.4 # Should match version pinned in single-cell-data-portal
tiledb==0.30.2 # Should match version pinned in single-cell-data-portal
pandas==2.2.2
pyarrow>=1.0.0
jinja2<4

0 comments on commit 95d1cdf

Please sign in to comment.