diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 20a918df0..ccb4d06db 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -1386,6 +1386,20 @@ def _check_spatial_obs(self): # Validate cell type. self._validate_spatial_cell_type_ontology_term_id() + self._validate_spatial_is_primary_data() + + def _validate_spatial_is_primary_data(self): + """ + Validate is_primary_data for spatial datasets. + """ + obs = getattr_anndata(self.adata, "obs") + if obs is None or "is_primary_data" not in obs: + return + if self._is_single() is False and obs["is_primary_data"].any(): + self.errors.append( + "When uns['spatial']['is_single'] is False, obs['is_primary_data'] must be False for all rows." + ) + def _validate_spatial_cell_type_ontology_term_id(self): """ Validate cell type ontology term id is "unknown" if Visium, is_single is True and in_tissue is 0. diff --git a/cellxgene_schema_cli/tests/fixtures/examples_validate.py b/cellxgene_schema_cli/tests/fixtures/examples_validate.py index 25a35b23b..fab74789f 100644 --- a/cellxgene_schema_cli/tests/fixtures/examples_validate.py +++ b/cellxgene_schema_cli/tests/fixtures/examples_validate.py @@ -240,6 +240,58 @@ good_obs_slide_seqv2.loc[:, ["suspension_type"]] = good_obs_slide_seqv2.astype("category") good_obs_slide_seqv2.loc[:, ["tissue_type"]] = good_obs.astype("category") +good_obs_visium_is_single_false = pd.DataFrame( + [ + [ + "CL:0000066", + "EFO:0010961", + "MONDO:0100096", + "NCBITaxon:9606", + "PATO:0000383", + "UBERON:0002048", + "tissue", + False, + "HANCESTRO:0575", + "HsapDv:0000003", + "donor_1", + "na", + ], + [ + "CL:0000192", + "EFO:0010961", + "PATO:0000461", + "NCBITaxon:10090", + "unknown", + "CL:0000192", + "cell culture", + False, + "na", + "MmusDv:0000003", + "donor_2", + "na", + ], + ], + index=["X", "Y"], + columns=[ + "cell_type_ontology_term_id", + "assay_ontology_term_id", + "disease_ontology_term_id", + "organism_ontology_term_id", + "sex_ontology_term_id", + "tissue_ontology_term_id", + "tissue_type", + "is_primary_data", + "self_reported_ethnicity_ontology_term_id", + "development_stage_ontology_term_id", + "donor_id", + "suspension_type", + ], +) + +good_obs_visium_is_single_false.loc[:, ["donor_id"]] = good_obs_visium_is_single_false.astype("category") +good_obs_visium_is_single_false.loc[:, ["suspension_type"]] = good_obs_visium_is_single_false.astype("category") +good_obs_visium_is_single_false.loc[:, ["tissue_type"]] = good_obs_visium_is_single_false.astype("category") + # --- # 2. Creating individual var components: valid object and valid object and with labels @@ -327,6 +379,14 @@ }, } +good_uns_with_is_single_false = { + "title": "A title", + "default_embedding": "X_umap", + "X_approximate_distribution": "normal", + "batch_condition": ["is_primary_data"], + "spatial": {"is_single": False}, +} + good_uns_with_slide_seqV2_spatial = { "title": "A title", "default_embedding": "X_umap", @@ -408,6 +468,14 @@ var=good_var, ) +adata_spatial_is_single_false = anndata.AnnData( + X=sparse.csr_matrix(X), + obs=good_obs_visium_is_single_false, + uns=good_uns_with_is_single_false, + obsm=good_obsm_spatial, + var=good_var, +) + # anndata for testing migration unmigrated_obs = pd.DataFrame( [ diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py index d752ca52f..b93c46cf4 100644 --- a/cellxgene_schema_cli/tests/test_schema_compliance.py +++ b/cellxgene_schema_cli/tests/test_schema_compliance.py @@ -63,6 +63,12 @@ def validator_with_adata_missing_raw(validator) -> Validator: return validator +@pytest.fixture +def validator_with_spatial_and_is_single_false(validator) -> Validator: + validator.adata = examples.adata_spatial_is_single_false.copy() + return validator + + @pytest.fixture def validator_with_visium_assay(validator) -> Validator: validator.adata = examples.adata_visium.copy() @@ -1101,6 +1107,17 @@ def test_is_primary_data(self, validator_with_adata): "ERROR: Column 'is_primary_data' in dataframe 'obs' " "must be boolean, not 'object'." ] + def test_is_primary_data__spatial(self, validator_with_spatial_and_is_single_false): + """ + is_primary_data bool. This MUST be False if dataset has uns['spatial']['is_single'] == False + """ + validator = validator_with_spatial_and_is_single_false + validator.adata.obs["is_primary_data"][0] = True + validator.validate_adata() + assert validator.errors == [ + "ERROR: When uns['spatial']['is_single'] is False, obs['is_primary_data'] must be False for all rows." + ] + def test_donor_id_must_be_categorical(self, validator_with_adata): """ donor_id categorical with str categories. This MUST be free-text that identifies @@ -1993,26 +2010,18 @@ def test_obsm_values_no_X_embedding__non_spatial_dataset(self, validator_with_ad "WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.", ] - @pytest.mark.parametrize( - "assay_ontology_term_id, adata_spatial", - [ - ("EFO:0010961", examples.adata_visium), - ("EFO:0030062", examples.adata_slide_seqv2), - ], - ) - def test_obsm_values_no_X_embedding__spatial_dataset( - self, validator_with_adata, assay_ontology_term_id, adata_spatial - ): - validator = validator_with_adata - validator.adata.obsm["harmony"] = validator.adata.obsm["X_umap"] - validator.adata.uns = adata_spatial.uns - validator.adata.uns["default_embedding"] = "harmony" - validator.adata.obsm["spatial"] = validator.adata.obsm["X_umap"] + def test_obsm_values_no_X_embedding__visium_dataset(self, validator_with_visium_assay): + validator = validator_with_visium_assay + validator.adata.uns["default_embedding"] = "spatial" + del validator.adata.obsm["X_umap"] + validator.validate_adata() + assert validator.errors == [] + assert validator.is_spatial is True + + def test_obsm_values_no_X_embedding__slide_seq_v2_dataset(self, validator_with_slide_seq_v2_assay): + validator = validator_with_slide_seq_v2_assay + validator.adata.uns["default_embedding"] = "spatial" del validator.adata.obsm["X_umap"] - validator.adata.obs = adata_spatial.obs - validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id - validator.adata.obs["suspension_type"] = "na" - validator.adata.obs.loc[:, ["suspension_type"]] = validator.adata.obs.astype("category") validator.validate_adata() assert validator.errors == [] assert validator.is_spatial is True @@ -2025,24 +2034,9 @@ def test_obsm_values_spatial_embedding_missing__is_single_true(self, validator_w "ERROR: 'spatial' embedding is required in 'adata.obsm' if adata.uns['spatial']['is_single'] is True." ] - def test_obsm_values_spatial_embedding_missing__is_single_false(self, validator_with_visium_assay): - validator = validator_with_visium_assay - validator.adata.uns["spatial"] = {"is_single": False} + def test_obsm_values_spatial_embedding_missing__is_single_false(self, validator_with_spatial_and_is_single_false): + validator = validator_with_spatial_and_is_single_false del validator.adata.obsm["spatial"] - # Format adata.obs into valid shape for Visium and is_single False. - validator.adata.obs.pop("array_col") - validator.adata.obs.pop("array_row") - validator.adata.obs.pop("in_tissue") - validator.validate_adata() - assert validator.errors == [] - - def test_obsm_values_spatial_embedding_present__is_single_false(self, validator_with_visium_assay): - validator = validator_with_visium_assay - validator.adata.uns["spatial"] = {"is_single": False} - # Format adata.obs into valid shape for Visium and is_single False. - validator.adata.obs.pop("array_col") - validator.adata.obs.pop("array_row") - validator.adata.obs.pop("in_tissue") validator.validate_adata() assert validator.errors == [] diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index 02c84864d..1cb383234 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -23,6 +23,7 @@ from fixtures.examples_validate import ( adata_minimal, adata_slide_seqv2, + adata_spatial_is_single_false, adata_visium, adata_with_labels, good_obs, @@ -333,6 +334,15 @@ def test__validate_spatial_slide_seqV2_ok(self): validator.validate_adata() assert not validator.errors + def test__validate_spatial_is_single_false_ok(self): + validator: Validator = Validator() + validator._set_schema_def() + validator.adata = adata_spatial_is_single_false.copy() + + # Confirm spatial is valid. + validator.validate_adata() + assert not validator.errors + def test__validate_spatial_forbidden_if_not_visium_or_slide_seqv2(self): validator: Validator = Validator() validator._set_schema_def() @@ -708,6 +718,7 @@ def test__validate_tissue_position_forbidden(self, assay_ontology_term_id, is_si validator.adata = adata_visium.copy() validator.adata.obs.assay_ontology_term_id = assay_ontology_term_id validator.adata.uns["spatial"]["is_single"] = is_single + validator.adata.obs["is_primary_data"] = False # Confirm tissue positions are not allowed. validator._check_spatial_obs() @@ -738,6 +749,7 @@ def test__validate_tissue_position_not_required(self): validator.adata = adata_slide_seqv2.copy() validator.adata.obs["assay_ontology_term_id"] = ["EFO:0010961", "EFO:0030062"] validator.adata.uns["spatial"]["is_single"] = False + validator.adata.obs["is_primary_data"] = False validator._check_spatial_obs() assert not validator.errors