Skip to content

Commit

Permalink
feat: update validation for obs['is_primary_data'] to enforce False …
Browse files Browse the repository at this point in the history
…if uns['spatial']['is_single'] is False (#865)

Co-authored-by: MillenniumFalconMechanic <[email protected]>
  • Loading branch information
1 parent f387825 commit d8e94b5
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 36 deletions.
14 changes: 14 additions & 0 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1386,6 +1386,20 @@ def _check_spatial_obs(self):
# Validate cell type.
self._validate_spatial_cell_type_ontology_term_id()

self._validate_spatial_is_primary_data()

def _validate_spatial_is_primary_data(self):
"""
Validate is_primary_data for spatial datasets.
"""
obs = getattr_anndata(self.adata, "obs")
if obs is None or "is_primary_data" not in obs:
return
if self._is_single() is False and obs["is_primary_data"].any():
self.errors.append(
"When uns['spatial']['is_single'] is False, obs['is_primary_data'] must be False for all rows."
)

def _validate_spatial_cell_type_ontology_term_id(self):
"""
Validate cell type ontology term id is "unknown" if Visium, is_single is True and in_tissue is 0.
Expand Down
68 changes: 68 additions & 0 deletions cellxgene_schema_cli/tests/fixtures/examples_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,58 @@
good_obs_slide_seqv2.loc[:, ["suspension_type"]] = good_obs_slide_seqv2.astype("category")
good_obs_slide_seqv2.loc[:, ["tissue_type"]] = good_obs.astype("category")

good_obs_visium_is_single_false = pd.DataFrame(
[
[
"CL:0000066",
"EFO:0010961",
"MONDO:0100096",
"NCBITaxon:9606",
"PATO:0000383",
"UBERON:0002048",
"tissue",
False,
"HANCESTRO:0575",
"HsapDv:0000003",
"donor_1",
"na",
],
[
"CL:0000192",
"EFO:0010961",
"PATO:0000461",
"NCBITaxon:10090",
"unknown",
"CL:0000192",
"cell culture",
False,
"na",
"MmusDv:0000003",
"donor_2",
"na",
],
],
index=["X", "Y"],
columns=[
"cell_type_ontology_term_id",
"assay_ontology_term_id",
"disease_ontology_term_id",
"organism_ontology_term_id",
"sex_ontology_term_id",
"tissue_ontology_term_id",
"tissue_type",
"is_primary_data",
"self_reported_ethnicity_ontology_term_id",
"development_stage_ontology_term_id",
"donor_id",
"suspension_type",
],
)

good_obs_visium_is_single_false.loc[:, ["donor_id"]] = good_obs_visium_is_single_false.astype("category")
good_obs_visium_is_single_false.loc[:, ["suspension_type"]] = good_obs_visium_is_single_false.astype("category")
good_obs_visium_is_single_false.loc[:, ["tissue_type"]] = good_obs_visium_is_single_false.astype("category")

# ---
# 2. Creating individual var components: valid object and valid object and with labels

Expand Down Expand Up @@ -327,6 +379,14 @@
},
}

good_uns_with_is_single_false = {
"title": "A title",
"default_embedding": "X_umap",
"X_approximate_distribution": "normal",
"batch_condition": ["is_primary_data"],
"spatial": {"is_single": False},
}

good_uns_with_slide_seqV2_spatial = {
"title": "A title",
"default_embedding": "X_umap",
Expand Down Expand Up @@ -408,6 +468,14 @@
var=good_var,
)

adata_spatial_is_single_false = anndata.AnnData(
X=sparse.csr_matrix(X),
obs=good_obs_visium_is_single_false,
uns=good_uns_with_is_single_false,
obsm=good_obsm_spatial,
var=good_var,
)

# anndata for testing migration
unmigrated_obs = pd.DataFrame(
[
Expand Down
66 changes: 30 additions & 36 deletions cellxgene_schema_cli/tests/test_schema_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ def validator_with_adata_missing_raw(validator) -> Validator:
return validator


@pytest.fixture
def validator_with_spatial_and_is_single_false(validator) -> Validator:
validator.adata = examples.adata_spatial_is_single_false.copy()
return validator


@pytest.fixture
def validator_with_visium_assay(validator) -> Validator:
validator.adata = examples.adata_visium.copy()
Expand Down Expand Up @@ -1101,6 +1107,17 @@ def test_is_primary_data(self, validator_with_adata):
"ERROR: Column 'is_primary_data' in dataframe 'obs' " "must be boolean, not 'object'."
]

def test_is_primary_data__spatial(self, validator_with_spatial_and_is_single_false):
"""
is_primary_data bool. This MUST be False if dataset has uns['spatial']['is_single'] == False
"""
validator = validator_with_spatial_and_is_single_false
validator.adata.obs["is_primary_data"][0] = True
validator.validate_adata()
assert validator.errors == [
"ERROR: When uns['spatial']['is_single'] is False, obs['is_primary_data'] must be False for all rows."
]

def test_donor_id_must_be_categorical(self, validator_with_adata):
"""
donor_id categorical with str categories. This MUST be free-text that identifies
Expand Down Expand Up @@ -1993,26 +2010,18 @@ def test_obsm_values_no_X_embedding__non_spatial_dataset(self, validator_with_ad
"WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.",
]

@pytest.mark.parametrize(
"assay_ontology_term_id, adata_spatial",
[
("EFO:0010961", examples.adata_visium),
("EFO:0030062", examples.adata_slide_seqv2),
],
)
def test_obsm_values_no_X_embedding__spatial_dataset(
self, validator_with_adata, assay_ontology_term_id, adata_spatial
):
validator = validator_with_adata
validator.adata.obsm["harmony"] = validator.adata.obsm["X_umap"]
validator.adata.uns = adata_spatial.uns
validator.adata.uns["default_embedding"] = "harmony"
validator.adata.obsm["spatial"] = validator.adata.obsm["X_umap"]
def test_obsm_values_no_X_embedding__visium_dataset(self, validator_with_visium_assay):
validator = validator_with_visium_assay
validator.adata.uns["default_embedding"] = "spatial"
del validator.adata.obsm["X_umap"]
validator.validate_adata()
assert validator.errors == []
assert validator.is_spatial is True

def test_obsm_values_no_X_embedding__slide_seq_v2_dataset(self, validator_with_slide_seq_v2_assay):
validator = validator_with_slide_seq_v2_assay
validator.adata.uns["default_embedding"] = "spatial"
del validator.adata.obsm["X_umap"]
validator.adata.obs = adata_spatial.obs
validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
validator.adata.obs["suspension_type"] = "na"
validator.adata.obs.loc[:, ["suspension_type"]] = validator.adata.obs.astype("category")
validator.validate_adata()
assert validator.errors == []
assert validator.is_spatial is True
Expand All @@ -2025,24 +2034,9 @@ def test_obsm_values_spatial_embedding_missing__is_single_true(self, validator_w
"ERROR: 'spatial' embedding is required in 'adata.obsm' if adata.uns['spatial']['is_single'] is True."
]

def test_obsm_values_spatial_embedding_missing__is_single_false(self, validator_with_visium_assay):
validator = validator_with_visium_assay
validator.adata.uns["spatial"] = {"is_single": False}
def test_obsm_values_spatial_embedding_missing__is_single_false(self, validator_with_spatial_and_is_single_false):
validator = validator_with_spatial_and_is_single_false
del validator.adata.obsm["spatial"]
# Format adata.obs into valid shape for Visium and is_single False.
validator.adata.obs.pop("array_col")
validator.adata.obs.pop("array_row")
validator.adata.obs.pop("in_tissue")
validator.validate_adata()
assert validator.errors == []

def test_obsm_values_spatial_embedding_present__is_single_false(self, validator_with_visium_assay):
validator = validator_with_visium_assay
validator.adata.uns["spatial"] = {"is_single": False}
# Format adata.obs into valid shape for Visium and is_single False.
validator.adata.obs.pop("array_col")
validator.adata.obs.pop("array_row")
validator.adata.obs.pop("in_tissue")
validator.validate_adata()
assert validator.errors == []

Expand Down
12 changes: 12 additions & 0 deletions cellxgene_schema_cli/tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from fixtures.examples_validate import (
adata_minimal,
adata_slide_seqv2,
adata_spatial_is_single_false,
adata_visium,
adata_with_labels,
good_obs,
Expand Down Expand Up @@ -333,6 +334,15 @@ def test__validate_spatial_slide_seqV2_ok(self):
validator.validate_adata()
assert not validator.errors

def test__validate_spatial_is_single_false_ok(self):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_spatial_is_single_false.copy()

# Confirm spatial is valid.
validator.validate_adata()
assert not validator.errors

def test__validate_spatial_forbidden_if_not_visium_or_slide_seqv2(self):
validator: Validator = Validator()
validator._set_schema_def()
Expand Down Expand Up @@ -708,6 +718,7 @@ def test__validate_tissue_position_forbidden(self, assay_ontology_term_id, is_si
validator.adata = adata_visium.copy()
validator.adata.obs.assay_ontology_term_id = assay_ontology_term_id
validator.adata.uns["spatial"]["is_single"] = is_single
validator.adata.obs["is_primary_data"] = False

# Confirm tissue positions are not allowed.
validator._check_spatial_obs()
Expand Down Expand Up @@ -738,6 +749,7 @@ def test__validate_tissue_position_not_required(self):
validator.adata = adata_slide_seqv2.copy()
validator.adata.obs["assay_ontology_term_id"] = ["EFO:0010961", "EFO:0030062"]
validator.adata.uns["spatial"]["is_single"] = False
validator.adata.obs["is_primary_data"] = False

validator._check_spatial_obs()
assert not validator.errors
Expand Down

0 comments on commit d8e94b5

Please sign in to comment.