Skip to content

Commit

Permalink
feat(schema 5.1.0): validate obs[cell_type_ontology_term_id] (#864)
Browse files Browse the repository at this point in the history
  • Loading branch information
MillenniumFalconMechanic authored May 1, 2024
1 parent 5b61078 commit f387825
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 12 deletions.
36 changes: 31 additions & 5 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@
ASSAY_VISIUM = "EFO:0010961"
ASSAY_SLIDE_SEQV2 = "EFO:0030062"

ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = "obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True."
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = "obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN = f"is only allowed for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED = f"is required for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0 = f"{ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE} and in_tissue is 0"


class Validator:
Expand Down Expand Up @@ -1382,6 +1383,31 @@ def _check_spatial_obs(self):
self._validate_spatial_tissue_position("array_row", 0, 77)
self._validate_spatial_tissue_position("in_tissue", 0, 1)

# Validate cell type.
self._validate_spatial_cell_type_ontology_term_id()

def _validate_spatial_cell_type_ontology_term_id(self):
"""
Validate cell type ontology term id is "unknown" if Visium, is_single is True and in_tissue is 0.
:rtype none
"""
# Exit if:
# - not Visium and is_single is True as no further checks are necessary
# - in_tissue is not specified as checks are dependent on this value
if not self._is_visium_and_is_single_true() or "in_tissue" not in self.adata.obs:
return

# Validate cell type: must be "unknown" if Visium and is_single is True and in_tissue is 0.
if (
(self.adata.obs["assay_ontology_term_id"] == ASSAY_VISIUM)
& (self.adata.obs["in_tissue"] == 0)
& (self.adata.obs["cell_type_ontology_term_id"] != "unknown")
).any():
self.errors.append(
f"obs['cell_type_ontology_term_id'] must be 'unknown' when {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0}."
)

def _validate_spatial_tissue_position(self, tissue_position_name: str, min: int, max: int):
"""
Validate tissue position is allowed and required, and are integers within the given range. Validation is not defined in
Expand All @@ -1397,7 +1423,7 @@ def _validate_spatial_tissue_position(self, tissue_position_name: str, min: int,
& (self.adata.obs[tissue_position_name].notnull())
).any()
):
self.errors.append(f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}")
self.errors.append(f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}.")
return

# Exit if we're not dealing with Visium and _is_single True as no further checks are necessary.
Expand All @@ -1414,7 +1440,7 @@ def _validate_spatial_tissue_position(self, tissue_position_name: str, min: int,
& (self.adata.obs[tissue_position_name].isnull())
).any()
):
self.errors.append(f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}")
self.errors.append(f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}.")
return

# Tissue position must be an int.
Expand Down Expand Up @@ -1502,7 +1528,7 @@ def _check_spatial_uns(self):
# library_id is forbidden if assay is not Visium or is_single is false.
is_visium_and_uns_is_single = self._is_visium_and_is_single_true()
if len(library_ids) > 0 and not is_visium_and_uns_is_single:
self.errors.append(f"uns['spatial'][library_id] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}")
self.errors.append(f"uns['spatial'][library_id] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}.")
# Exit as library_id is not allowed.
return

Expand All @@ -1513,7 +1539,7 @@ def _check_spatial_uns(self):
# library_id is required if assay is Visium and is_single is True.
if len(library_ids) == 0:
self.errors.append(
f"uns['spatial'] must contain at least one key representing the library_id when {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
f"uns['spatial'] must contain at least one key representing the library_id when {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}."
)
# Exit as library_id is missing.
return
Expand Down
2 changes: 1 addition & 1 deletion cellxgene_schema_cli/tests/fixtures/examples_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@
[
1,
1,
"CL:0000066",
"unknown",
"EFO:0010961",
"MONDO:0100096",
"NCBITaxon:9606",
Expand Down
52 changes: 46 additions & 6 deletions cellxgene_schema_cli/tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from cellxgene_schema.validate import (
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE,
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN,
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0,
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED,
Validator,
validate,
Expand Down Expand Up @@ -439,9 +440,9 @@ def test__validate_library_id_forbidden_if_slide_seqV2(self):
# Confirm library_id is not allowed for Slide-seqV2.
validator._check_spatial_uns()
assert len(validator.errors) == 1
assert f"uns['spatial'][library_id] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}" in validator.errors[0]
assert f"uns['spatial'][library_id] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}." in validator.errors[0]

def test__validate_library_id_forbidden_if_visium_and_is_single_false(self):
def test__validate_library_id_forbidden_if_visium_or_is_single_false(self):
validator: Validator = Validator()
validator._set_schema_def()

Expand All @@ -452,7 +453,7 @@ def test__validate_library_id_forbidden_if_visium_and_is_single_false(self):
# Confirm library_id is not allowed for Visium if is_single is False.
validator._check_spatial_uns()
assert len(validator.errors) == 1
assert f"uns['spatial'][library_id] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}" in validator.errors[0]
assert f"uns['spatial'][library_id] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}." in validator.errors[0]

def test__validate_library_id_required_if_visium(self):
validator: Validator = Validator()
Expand All @@ -464,7 +465,7 @@ def test__validate_library_id_required_if_visium(self):
validator._check_spatial_uns()
assert validator.errors
assert (
f"uns['spatial'] must contain at least one key representing the library_id when {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
f"uns['spatial'] must contain at least one key representing the library_id when {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}."
in validator.errors[0]
)

Expand Down Expand Up @@ -714,7 +715,7 @@ def test__validate_tissue_position_forbidden(self, assay_ontology_term_id, is_si
tissue_position_names = ["array_col", "array_row", "in_tissue"]
for i, tissue_position_name in enumerate(tissue_position_names):
assert (
f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}"
f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}."
in validator.errors[i]
)

Expand All @@ -727,7 +728,9 @@ def test__validate_tissue_position_required(self, tissue_position_name):

validator._check_spatial_obs()
assert validator.errors
assert f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}" in validator.errors[0]
assert (
f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}." in validator.errors[0]
)

def test__validate_tissue_position_not_required(self):
validator: Validator = Validator()
Expand Down Expand Up @@ -789,6 +792,43 @@ def test__validate_tissue_position_int_max_error(self, tissue_position_name, max
assert validator.errors
assert f"obs['{tissue_position_name}'] must be {error_message_token}" in validator.errors[0]

@pytest.mark.parametrize(
"cell_type_ontology_term_id, in_tissue",
[("unknown", 0), (["unknown", "CL:0000066"], [0, 1]), ("CL:0000066", 1)],
)
def test_validate_cell_type_ontology_term_id_ok(self, cell_type_ontology_term_id, in_tissue):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_visium.copy()
validator.adata.obs.cell_type_ontology_term_id = cell_type_ontology_term_id
validator.adata.obs.in_tissue = in_tissue

# Confirm cell type is valid.
validator._validate_spatial_cell_type_ontology_term_id()
assert not validator.errors

@pytest.mark.parametrize(
"cell_type_ontology_term_id, in_tissue",
[
("CL:0000066", 0),
(["CL:0000066", "unknown"], [0, 1]),
],
)
def test_validate_cell_type_ontology_term_id_error(self, cell_type_ontology_term_id, in_tissue):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_visium.copy()
validator.adata.obs.cell_type_ontology_term_id = cell_type_ontology_term_id
validator.adata.obs.in_tissue = in_tissue

# Confirm errors.
validator._validate_spatial_cell_type_ontology_term_id()
assert validator.errors
assert (
f"obs['cell_type_ontology_term_id'] must be 'unknown' when {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0}."
in validator.errors[0]
)


class TestSeuratConvertibility:
def validation_helper(self, matrix, raw=None):
Expand Down

0 comments on commit f387825

Please sign in to comment.