From ab3c971bb2a13907147df2e831202c0dd0c00ac5 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Wed, 24 Apr 2024 16:04:00 -0700 Subject: [PATCH 01/38] feat(schema 5.1.0): validate uns[spatial] --- cellxgene_schema_cli/cellxgene_schema/validate.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index bd57a0ee..7d897d43 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -29,6 +29,9 @@ class Validator: """Handles validation of AnnData""" + ASSAY_VISIUM = "EFO:0010961" + ASSAY_SLIDE_SEQV2 = "EFO:0030062" + def __init__(self, ignore_labels=False): self.schema_def = dict() self.schema_version: str = None @@ -952,6 +955,13 @@ def _validate_seurat_convertibility(self): ) self.is_seurat_convertible = False + # Seurat conversion is not supported for Visium datasets. + if self._is_visium(): + self.warnings.append( + "Datasets with assay_ontology_term_id 'EFO:0010961' (Visium Spatial Gene Expression) are not compatible with Seurat." + ) + self.is_seurat_convertible = False + def _validate_obsm(self): """ Validates the embedding dictionary -- it checks that all values of adata.obsm are numpy arrays with the correct From b6c93adb4b89a061eaa8a63ce2a707f62c2065ef Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Wed, 24 Apr 2024 16:35:15 -0700 Subject: [PATCH 02/38] Linting --- cellxgene_schema_cli/cellxgene_schema/validate.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 7d897d43..4b0ede81 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -29,9 +29,6 @@ class Validator: """Handles validation of AnnData""" - ASSAY_VISIUM = "EFO:0010961" - ASSAY_SLIDE_SEQV2 = "EFO:0030062" - def __init__(self, ignore_labels=False): self.schema_def = dict() self.schema_version: str = None From 9010444cf671ec40fbb20f524934b994668b2e06 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Wed, 24 Apr 2024 22:38:05 -0700 Subject: [PATCH 03/38] Updated error messages --- cellxgene_schema_cli/tests/test_validate.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index 0de17c75..a796a59b 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -464,7 +464,11 @@ def test__validate_library_id_required_if_visium(self): validator._check_spatial() assert validator.errors assert ( +<<<<<<< HEAD "uns['spatial'] must contain at least one key representing the library_id when obs['assay_ontology_term_id'] " +======= + "uns['spatial'] must contain the key 'library_id' for obs['assay_ontology_term_id'] " +>>>>>>> 76c80db (Updated error messages) "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." in validator.errors[0] ) From ec8fb63b4bb0d50bcd9b67a61f6fc113d7415a55 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Thu, 25 Apr 2024 09:58:13 -0700 Subject: [PATCH 04/38] Review updates --- cellxgene_schema_cli/cellxgene_schema/validate.py | 7 ------- cellxgene_schema_cli/tests/test_validate.py | 4 ---- 2 files changed, 11 deletions(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 4b0ede81..bd57a0ee 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -952,13 +952,6 @@ def _validate_seurat_convertibility(self): ) self.is_seurat_convertible = False - # Seurat conversion is not supported for Visium datasets. - if self._is_visium(): - self.warnings.append( - "Datasets with assay_ontology_term_id 'EFO:0010961' (Visium Spatial Gene Expression) are not compatible with Seurat." - ) - self.is_seurat_convertible = False - def _validate_obsm(self): """ Validates the embedding dictionary -- it checks that all values of adata.obsm are numpy arrays with the correct diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index a796a59b..0de17c75 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -464,11 +464,7 @@ def test__validate_library_id_required_if_visium(self): validator._check_spatial() assert validator.errors assert ( -<<<<<<< HEAD "uns['spatial'] must contain at least one key representing the library_id when obs['assay_ontology_term_id'] " -======= - "uns['spatial'] must contain the key 'library_id' for obs['assay_ontology_term_id'] " ->>>>>>> 76c80db (Updated error messages) "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." in validator.errors[0] ) From 832b297e638c1ab5ff294c09aa99c80614ad7968 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Thu, 25 Apr 2024 13:37:59 -0700 Subject: [PATCH 05/38] Review updates --- cellxgene_schema_cli/cellxgene_schema/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index bd57a0ee..afa6a0bc 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -1362,7 +1362,7 @@ def _check_spatial(self): # is_single must be a boolean. uns_is_single = uns_spatial["is_single"] - if not isinstance(uns_is_single, (np.bool_, np.bool)): + if not isinstance(uns_is_single, (np.bool_, np.bool, bool)): self.errors.append(f"uns['spatial']['is_single'] must be of boolean type, it is {type(uns_is_single)}.") # Exit if is_single is not valid as all further checks are dependent on its value. return From faec6004160694e4971e48f456d7d36d43564129 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Thu, 25 Apr 2024 13:39:15 -0700 Subject: [PATCH 06/38] Reverted update --- cellxgene_schema_cli/cellxgene_schema/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index afa6a0bc..bd57a0ee 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -1362,7 +1362,7 @@ def _check_spatial(self): # is_single must be a boolean. uns_is_single = uns_spatial["is_single"] - if not isinstance(uns_is_single, (np.bool_, np.bool, bool)): + if not isinstance(uns_is_single, (np.bool_, np.bool)): self.errors.append(f"uns['spatial']['is_single'] must be of boolean type, it is {type(uns_is_single)}.") # Exit if is_single is not valid as all further checks are dependent on its value. return From 4c8a739a5112b63274556226dd6daa3d7a2ef2db Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Fri, 26 Apr 2024 13:40:43 -0700 Subject: [PATCH 07/38] feat(schema 5.1.0): validate obs[array_col], obs[array_row] and obs[in_tissue] --- .../cellxgene_schema/validate.py | 82 +++++++++- .../tests/fixtures/examples_validate.py | 9 ++ .../tests/test_schema_compliance.py | 11 +- cellxgene_schema_cli/tests/test_validate.py | 147 ++++++++++++++---- 4 files changed, 212 insertions(+), 37 deletions(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index bd57a0ee..74c1511a 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -1315,7 +1315,87 @@ def _check_column_availability(self): def _check_spatial(self): """ - Validate spatial-related values of the AnnData object. Validation is not defined in schema definition yaml. + Sequence validation of spatial-related values of the AnnData object. + + :rtype none + """ + self._check_spatial_uns() + self._check_spatial_obs() + + def _check_spatial_obs(self): + """ + Validate obs spatial-related values of the AnnData object. Validation is not defined in schema definition yaml. + Errors are added to self.errors. + + :rtype none + """ + + # Exit if obs is not specified. Error is reported in core validate functionality. + obs_component = getattr_anndata(self.adata, "obs") + if obs_component is None: + return + + # obs spatial validation is dependent on uns.spatial.is_single; exit if not specified. Errors are + # reported downstream. + uns_component = getattr_anndata(self.adata, "uns") + if uns_component is None or "spatial" not in uns_component or "is_single" not in uns_component["spatial"]: + return + + # Validate tissue positions. + is_visium_and_uns_is_single = self._is_visium() and uns_component["spatial"]["is_single"] + self._validate_spatial_tissue_position("array_col", 0, 127, is_visium_and_uns_is_single) + self._validate_spatial_tissue_position("array_row", 0, 77, is_visium_and_uns_is_single) + self._validate_spatial_tissue_position("in_tissue", 0, 1, is_visium_and_uns_is_single) + + def _validate_spatial_tissue_position( + self, tissue_position_name: str, min: int, max: int, is_visium_and_uns_is_single: bool + ): + """ + Validate tissue position is allowed and required, and are integers within the given range. Validation is not defined in + schema definition yaml. + + :rtype none + """ + # Tissue position is foribidden if assay is not Visium or is_single is False. + obs_tissue_position = self.adata.obs.get(tissue_position_name) + if obs_tissue_position is not None and not is_visium_and_uns_is_single: + self.errors.append( + f"obs['{tissue_position_name}'] is only allowed for obs['assay_ontology_term_id'] " + "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." + ) + return + + # Exit if we're not dealing with Visium and _is_single True as no further checks are necessary. + if not is_visium_and_uns_is_single: + return + + # Tissue position is required. + if obs_tissue_position is None: + self.errors.append( + f"obs['{tissue_position_name}'] is required for obs['assay_ontology_term_id'] " + "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] True." + ) + return + + # Tissue position must be an int. + if not np.issubdtype(obs_tissue_position.dtype, np.integer): + self.errors.append(f"obs['{tissue_position_name}'] must be of int type, it is {obs_tissue_position.dtype}.") + return + + # Tissue position must be within the given range. + if not ((obs_tissue_position >= min) & (obs_tissue_position <= max)).all(): + if tissue_position_name == "in_tissue": + error_message_token = f"{min} or {max}" + else: + error_message_token = f"between {min} and {max}" + self.errors.append( + f"obs['{tissue_position_name}'] must be {error_message_token}, the min and max are {obs_tissue_position.min()} and {obs_tissue_position.max()}. " + f"This must be the value of the column tissue_positions_in_tissue from the tissue_positions_list.csv or tissue_positions.csv." + ) + + def _check_spatial_uns(self): + """ + Validate uns spatial-related values of the AnnData object. Validation is not defined in schema definition yaml. Errors are added to self.errors. :rtype none diff --git a/cellxgene_schema_cli/tests/fixtures/examples_validate.py b/cellxgene_schema_cli/tests/fixtures/examples_validate.py index 93d21078..782f2819 100644 --- a/cellxgene_schema_cli/tests/fixtures/examples_validate.py +++ b/cellxgene_schema_cli/tests/fixtures/examples_validate.py @@ -129,6 +129,8 @@ good_obs_visium = pd.DataFrame( [ [ + 1, + 1, "CL:0000066", "EFO:0010961", "MONDO:0100096", @@ -141,8 +143,11 @@ "HsapDv:0000003", "donor_1", "nucleus", + 0, ], [ + 2, + 2, "CL:0000192", "EFO:0010961", "PATO:0000461", @@ -155,10 +160,13 @@ "MmusDv:0000003", "donor_2", "na", + 1, ], ], index=["X", "Y"], columns=[ + "array_col", + "array_row", "cell_type_ontology_term_id", "assay_ontology_term_id", "disease_ontology_term_id", @@ -171,6 +179,7 @@ "development_stage_ontology_term_id", "donor_id", "suspension_type", + "in_tissue", ], ) diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py index d68e6191..b8c66975 100644 --- a/cellxgene_schema_cli/tests/test_schema_compliance.py +++ b/cellxgene_schema_cli/tests/test_schema_compliance.py @@ -1977,20 +1977,21 @@ def test_obsm_values_no_X_embedding__non_spatial_dataset(self, validator_with_ad ] @pytest.mark.parametrize( - "assay_ontology_term_id, uns_spatial", + "assay_ontology_term_id, adata_spatial", [ - ("EFO:0010961", examples.good_uns_with_visium_spatial["spatial"]), - ("EFO:0030062", examples.good_uns_with_slide_seqV2_spatial["spatial"]), + ("EFO:0010961", examples.adata_visium), + ("EFO:0030062", examples.adata_slide_seqv2), ], ) def test_obsm_values_no_X_embedding__spatial_dataset( - self, validator_with_adata, assay_ontology_term_id, uns_spatial + self, validator_with_adata, assay_ontology_term_id, adata_spatial ): validator = validator_with_adata validator.adata.obsm["harmony"] = validator.adata.obsm["X_umap"] + validator.adata.uns = adata_spatial.uns validator.adata.uns["default_embedding"] = "harmony" - validator.adata.uns["spatial"] = uns_spatial del validator.adata.obsm["X_umap"] + validator.adata.obs = adata_spatial.obs validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id validator.adata.obs["suspension_type"] = "na" validator.adata.obs.loc[:, ["suspension_type"]] = validator.adata.obs.astype("category") diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index 0de17c75..9258994d 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -306,13 +306,15 @@ def test__validate_with_h5ad_invalid_and_without_labels(self): assert errors assert is_seurat_convertible + +class TestCheckSpatial: def test__validate_spatial_visium_ok(self): validator: Validator = Validator() validator._set_schema_def() validator.adata = adata_visium.copy() # Confirm spatial is valid. - validator._check_spatial() + validator._check_spatial_uns() assert not validator.errors def test__validate_spatial_slide_seqV2_ok(self): @@ -321,10 +323,10 @@ def test__validate_spatial_slide_seqV2_ok(self): validator.adata = adata_slide_seqv2.copy() # Confirm spatial is valid. - validator._check_spatial() + validator._check_spatial_uns() assert not validator.errors - def test__validate_spatial_forbidden_if_10x(self): + def test__validate_spatial_forbidden_if_not_visium_and_is_single_false(self): validator: Validator = Validator() validator._set_schema_def() @@ -333,7 +335,7 @@ def test__validate_spatial_forbidden_if_10x(self): validator.adata.obs = good_obs.copy() # Confirm spatial is not allowed for 10x 3' v2. - validator._check_spatial() + validator._check_spatial_uns() assert len(validator.errors) == 1 assert ( "uns['spatial'] is only allowed for obs['assay_ontology_term_id'] values " @@ -347,7 +349,7 @@ def test__validate_spatial_required_if_visium(self): validator.adata.uns = good_uns.copy() # Confirm spatial is required for Visium. - validator._check_spatial() + validator._check_spatial_uns() assert len(validator.errors) == 1 assert ( "uns['spatial'] is required for obs['assay_ontology_term_id'] values " @@ -361,7 +363,7 @@ def test__validate_spatial_required_if_slide_seqV2(self): validator.adata.uns = good_uns.copy() # Confirm spatial is required for Slide-seqV2. - validator._check_spatial() + validator._check_spatial_uns() assert len(validator.errors) == 1 assert ( "uns['spatial'] is required for obs['assay_ontology_term_id'] values " @@ -375,7 +377,7 @@ def test__validate_spatial_allowed_keys_error(self): validator.adata.uns["spatial"]["invalid_key"] = True # Confirm additional key is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'] must contain only two top-level keys: 'is_single' and a library_id. " @@ -389,7 +391,7 @@ def test__validate_is_single_required_visium_error(self): validator.adata.uns["spatial"].pop("is_single") # Confirm is_single is identified as required. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'] must contain the key 'is_single'." in validator.errors[0] @@ -405,7 +407,7 @@ def test__validate_is_single_required_slide_seqV2_error(self): } # Confirm is_single is identified as required. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'] must contain the key 'is_single'." in validator.errors[0] @@ -416,7 +418,7 @@ def test__validate_is_single_boolean_error(self): validator.adata.uns["spatial"]["is_single"] = "invalid" # Confirm is_single is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial']['is_single'] must be of boolean type" in validator.errors[0] @@ -429,7 +431,7 @@ def test__validate_library_id_forbidden_if_slide_seqV2(self): validator.adata.uns = good_uns_with_visium_spatial # Confirm library_id is not allowed for Slide-seqV2. - validator._check_spatial() + validator._check_spatial_uns() assert len(validator.errors) == 1 assert ( "uns['spatial'][library_id] is only allowed for obs['assay_ontology_term_id'] " @@ -437,7 +439,7 @@ def test__validate_library_id_forbidden_if_slide_seqV2(self): in validator.errors[0] ) - def test__validate_library_id_forbidden_if_visium_and_is_single_false(self): + def test__validate_library_id_forbidden_if_visium_or_is_single_false(self): validator: Validator = Validator() validator._set_schema_def() @@ -446,7 +448,7 @@ def test__validate_library_id_forbidden_if_visium_and_is_single_false(self): validator.adata.uns["spatial"]["is_single"] = np.bool_(False) # Confirm library_id is not allowed for Visium if is_single is False. - validator._check_spatial() + validator._check_spatial_uns() assert len(validator.errors) == 1 assert ( "uns['spatial'][library_id] is only allowed for obs['assay_ontology_term_id'] " @@ -461,7 +463,7 @@ def test__validate_library_id_required_if_visium(self): validator.adata.uns["spatial"].pop(visium_library_id) # Confirm library_id is identified as required. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'] must contain at least one key representing the library_id when obs['assay_ontology_term_id'] " @@ -476,7 +478,7 @@ def test__validate_library_id_allowed_keys_error(self): validator.adata.uns["spatial"][visium_library_id]["invalid_key"] = True # Confirm additional key is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'][library_id] can only contain the keys 'images' and 'scalefactors'." in validator.errors[0] @@ -489,7 +491,7 @@ def test__validate_images_required_error(self): validator.adata.uns["spatial"][visium_library_id].pop("images") # Confirm images is required. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'][library_id] must contain the key 'images'." in validator.errors[0] @@ -500,7 +502,7 @@ def test__validate_images_allowed_keys_error(self): validator.adata.uns["spatial"][visium_library_id]["images"]["invalid_key"] = True # Confirm additional key is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'][library_id]['images'] can only contain the keys 'fullres' and 'hires'." @@ -514,7 +516,7 @@ def test__validate_images_hires_required_error(self): validator.adata.uns["spatial"][visium_library_id]["images"].pop("hires") # Confirm hires is required. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'][library_id]['images'] must contain the key 'hires'." in validator.errors[0] @@ -525,7 +527,7 @@ def test__validate_images_fullres_optional(self): validator.adata.uns["spatial"][visium_library_id]["images"].pop("fullres") # Confirm fullres is optional. - validator._check_spatial() + validator._check_spatial_uns() assert not validator.errors assert validator.warnings assert ( @@ -541,7 +543,7 @@ def test__validate_images_hires_is_ndarray_error(self): validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = "invalid" # Confirm hires is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'][library_id]['images']['hires'] must be of numpy.ndarray type" in validator.errors[0] @@ -552,7 +554,7 @@ def test__valide_images_hires_is_shape_error(self): validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = np.zeros((1, 1)) # Confirm hires is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'][library_id]['images']['hires'] must have shape (,,3)" in validator.errors[0] @@ -563,7 +565,7 @@ def test__validate_images_hires_max_dimension_greater_than_error(self): validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = np.zeros((1, 2001, 3)) # Confirm hires is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "The largest dimension of uns['spatial'][library_id]['images']['hires'] must be 2000 pixels" @@ -577,7 +579,7 @@ def test__validate_images_hires_max_dimension_less_than_error(self): validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = np.zeros((1, 1999, 3)) # Confirm hires is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "The largest dimension of uns['spatial'][library_id]['images']['hires'] must be 2000 pixels" @@ -591,7 +593,7 @@ def test__validate_images_fullres_is_ndarray_error(self): validator.adata.uns["spatial"][visium_library_id]["images"]["fullres"] = "invalid" # Confirm fullres is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'][library_id]['images']['fullres'] must be of numpy.ndarray type" in validator.errors[0] @@ -602,7 +604,7 @@ def test__validate_images_fullres_is_shape_error(self): validator.adata.uns["spatial"][visium_library_id]["images"]["fullres"] = np.zeros((1, 1)) # Confirm fullres is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'][library_id]['images']['fullres'] must have shape (,,3)" in validator.errors[0] @@ -613,7 +615,7 @@ def test__validate_scalefactors_required_error(self): validator.adata.uns["spatial"][visium_library_id].pop("scalefactors") # Confirm scalefactors is required. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'][library_id] must contain the key 'scalefactors'." in validator.errors[0] @@ -624,7 +626,7 @@ def test__validate_scalefactors_allowed_keys_error(self): validator.adata.uns["spatial"][visium_library_id]["scalefactors"]["invalid_key"] = True # Confirm additional key is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'][library_id]['scalefactors'] can only contain the keys " @@ -638,7 +640,7 @@ def test__validate_scalefactors_spot_diameter_fullres_required_error(self): validator.adata.uns["spatial"][visium_library_id]["scalefactors"].pop("spot_diameter_fullres") # Confirm spot_diameter_fullres is required. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'][library_id]['scalefactors'] must contain the key 'spot_diameter_fullres'." @@ -652,7 +654,7 @@ def test__validate_scalefactors_tissue_hires_scalef_required_error(self): validator.adata.uns["spatial"][visium_library_id]["scalefactors"].pop("tissue_hires_scalef") # Confirm tissue_hires_scalef is required. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'][library_id]['scalefactors'] must contain the key 'tissue_hires_scalef'." @@ -666,7 +668,7 @@ def test__validate_scalefactors_spot_diameter_fullres_is_float_error(self): validator.adata.uns["spatial"][visium_library_id]["scalefactors"]["spot_diameter_fullres"] = "invalid" # Confirm spot_diameter_fullres is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'][library_id]['scalefactors']['spot_diameter_fullres'] must be of type float" @@ -680,13 +682,96 @@ def test__validate_scalefactors_tissue_hires_scalef_is_float_error(self): validator.adata.uns["spatial"][visium_library_id]["scalefactors"]["tissue_hires_scalef"] = "invalid" # Confirm tissue_hires_scalef is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'][library_id]['scalefactors']['tissue_hires_scalef'] must be of type float" in validator.errors[0] ) + def test__validate_tissue_position_forbidden_if_not_visium_or_is_single_false(self): + validator: Validator = Validator() + validator._set_schema_def() + + # Create Visium uns (with spatial) with 10x 3' v2 obs. + validator.adata = adata_visium.copy() + validator.adata.obs.assay_ontology_term_id = "EFO:0009899" + + # Confirm tissue positions are not allowed for not Visium and not is single. + validator._check_spatial_obs() + assert len(validator.errors) == 3 + tissue_position_names = ["array_col", "array_row", "in_tissue"] + for i, tissue_position_name in enumerate(tissue_position_names): + assert ( + f"obs['{tissue_position_name}'] is only allowed for obs['assay_ontology_term_id'] " + "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." + in validator.errors[i] + ) + + @pytest.mark.parametrize("tissue_position_name", ["array_col", "array_row", "in_tissue"]) + def test__validate_tissue_position_required(self, tissue_position_name): + validator: Validator = Validator() + validator._set_schema_def() + validator.adata = adata_visium.copy() + validator.adata.obs.pop(tissue_position_name) + + validator._check_spatial_obs() + assert validator.errors + assert ( + f"obs['{tissue_position_name}'] is required for obs['assay_ontology_term_id'] 'EFO:0010961' " + "(Visium Spatial Gene Expression) and uns['spatial']['is_single'] True." in validator.errors[0] + ) + + @pytest.mark.parametrize("tissue_position_name", ["array_col", "array_row", "in_tissue"]) + def test__validate_tissue_position_int_error(self, tissue_position_name): + validator: Validator = Validator() + validator._set_schema_def() + validator.adata = adata_visium.copy() + validator.adata.obs[tissue_position_name] = 1.0 + + # Confirm tissue_position is identified as invalid. + validator._check_spatial_obs() + assert validator.errors + assert f"obs['{tissue_position_name}'] must be of int type" in validator.errors[0] + + @pytest.mark.parametrize( + "tissue_position_name, min, error_message_token", + [ + ("array_col", 0, "between 0 and 127"), + ("array_row", 0, "between 0 and 77"), + ("in_tissue", 0, "0 or 1"), + ], + ) + def test__validate_tissue_position_int_min_error(self, tissue_position_name, min, error_message_token): + validator: Validator = Validator() + validator._set_schema_def() + validator.adata = adata_visium.copy() + validator.adata.obs[tissue_position_name] = min - 1 + + # Confirm tissue_position is identified as invalid. + validator._check_spatial_obs() + assert validator.errors + assert f"obs['{tissue_position_name}'] must be {error_message_token}" in validator.errors[0] + + @pytest.mark.parametrize( + "tissue_position_name, max, error_message_token", + [ + ("array_col", 127, "between 0 and 127"), + ("array_row", 77, "between 0 and 77"), + ("in_tissue", 1, "0 or 1"), + ], + ) + def test__validate_tissue_position_int_max_error(self, tissue_position_name, max, error_message_token): + validator: Validator = Validator() + validator._set_schema_def() + validator.adata = adata_visium.copy() + validator.adata.obs[tissue_position_name] = max + 1 + + # Confirm tissue_position is identified as invalid. + validator._check_spatial_obs() + assert validator.errors + assert f"obs['{tissue_position_name}'] must be {error_message_token}" in validator.errors[0] + class TestSeuratConvertibility: def validation_helper(self, matrix, raw=None): From 476b8694c0a9189bb831f15b0c8df71012a47d7e Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Fri, 26 Apr 2024 14:00:41 -0700 Subject: [PATCH 08/38] Corrected test name --- cellxgene_schema_cli/tests/test_validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index 9258994d..41859da1 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -439,7 +439,7 @@ def test__validate_library_id_forbidden_if_slide_seqV2(self): in validator.errors[0] ) - def test__validate_library_id_forbidden_if_visium_or_is_single_false(self): + def test__validate_library_id_forbidden_if_visium_and_is_single_false(self): validator: Validator = Validator() validator._set_schema_def() From b6020a8db2f0822995f9b08a34405f3370c1ec86 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Fri, 26 Apr 2024 14:09:30 -0700 Subject: [PATCH 09/38] Linting --- cellxgene_schema_cli/tests/test_validate.py | 29 +++++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index 41859da1..29285af9 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -326,11 +326,11 @@ def test__validate_spatial_slide_seqV2_ok(self): validator._check_spatial_uns() assert not validator.errors - def test__validate_spatial_forbidden_if_not_visium_and_is_single_false(self): + def test__validate_spatial_forbidden_if_not_visium_or_slide_seqv2(self): validator: Validator = Validator() validator._set_schema_def() - # Create Visium uns (with spatial) with 10x 3' v2 obs. + # Create Visium adata (with spatial) with 10x 3' v2 obs. validator.adata = adata_visium.copy() validator.adata.obs = good_obs.copy() @@ -689,15 +689,34 @@ def test__validate_scalefactors_tissue_hires_scalef_is_float_error(self): in validator.errors[0] ) - def test__validate_tissue_position_forbidden_if_not_visium_or_is_single_false(self): + def test__validate_tissue_position_forbidden_if_not_visium(self): validator: Validator = Validator() validator._set_schema_def() - # Create Visium uns (with spatial) with 10x 3' v2 obs. + # Create Visium adata (with spatial) with 10x 3' v2 obs. validator.adata = adata_visium.copy() validator.adata.obs.assay_ontology_term_id = "EFO:0009899" - # Confirm tissue positions are not allowed for not Visium and not is single. + # Confirm tissue positions are not allowed for 10x 3' v2. + validator._check_spatial_obs() + assert len(validator.errors) == 3 + tissue_position_names = ["array_col", "array_row", "in_tissue"] + for i, tissue_position_name in enumerate(tissue_position_names): + assert ( + f"obs['{tissue_position_name}'] is only allowed for obs['assay_ontology_term_id'] " + "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." + in validator.errors[i] + ) + + def test__validate_tissue_position_forbidden_if_not_visium_and_is_single_false(self): + validator: Validator = Validator() + validator._set_schema_def() + + # Create Visium adata (with spatial) with is_single false. + validator.adata = adata_visium.copy() + validator.adata.uns["spatial"]["is_single"] = False + + # Confirm tissue positions are not allowed. validator._check_spatial_obs() assert len(validator.errors) == 3 tissue_position_names = ["array_col", "array_row", "in_tissue"] From 74496f5a12b8bb64daefd220abce5b36a315303d Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Fri, 26 Apr 2024 14:21:30 -0700 Subject: [PATCH 10/38] Updated forbidden tests --- cellxgene_schema_cli/tests/test_validate.py | 27 ++++----------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index 29285af9..f91c180d 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -689,32 +689,15 @@ def test__validate_scalefactors_tissue_hires_scalef_is_float_error(self): in validator.errors[0] ) - def test__validate_tissue_position_forbidden_if_not_visium(self): + @pytest.mark.parametrize("assay_ontology_term_id, is_single", [("EFO:0030062", False), ("EFO:0010961", False)]) + def test__validate_tissue_position_forbidden(self, assay_ontology_term_id, is_single): validator: Validator = Validator() validator._set_schema_def() - # Create Visium adata (with spatial) with 10x 3' v2 obs. - validator.adata = adata_visium.copy() - validator.adata.obs.assay_ontology_term_id = "EFO:0009899" - - # Confirm tissue positions are not allowed for 10x 3' v2. - validator._check_spatial_obs() - assert len(validator.errors) == 3 - tissue_position_names = ["array_col", "array_row", "in_tissue"] - for i, tissue_position_name in enumerate(tissue_position_names): - assert ( - f"obs['{tissue_position_name}'] is only allowed for obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." - in validator.errors[i] - ) - - def test__validate_tissue_position_forbidden_if_not_visium_and_is_single_false(self): - validator: Validator = Validator() - validator._set_schema_def() - - # Create Visium adata (with spatial) with is_single false. + # Create Visium adata with tissue positions, update assay_ontology_term_id and is_single to trigger error. validator.adata = adata_visium.copy() - validator.adata.uns["spatial"]["is_single"] = False + validator.adata.obs.assay_ontology_term_id = assay_ontology_term_id + validator.adata.uns["spatial"]["is_single"] = is_single # Confirm tissue positions are not allowed. validator._check_spatial_obs() From b37c52cc5122a6cc434d043b813e65fb59a01425 Mon Sep 17 00:00:00 2001 From: Mim Hastie Date: Mon, 29 Apr 2024 09:56:34 -0700 Subject: [PATCH 11/38] Update cellxgene_schema_cli/cellxgene_schema/validate.py Co-authored-by: Nayib Gloria <55710092+nayib-jose-gloria@users.noreply.github.com> --- cellxgene_schema_cli/cellxgene_schema/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 74c1511a..dd9e8fad 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -1373,7 +1373,7 @@ def _validate_spatial_tissue_position( if obs_tissue_position is None: self.errors.append( f"obs['{tissue_position_name}'] is required for obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] True." + "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." ) return From 126cdb12354169224c0eeca1c89da2828a156a9d Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Mon, 29 Apr 2024 11:02:54 -0700 Subject: [PATCH 12/38] Updated tests --- cellxgene_schema_cli/tests/test_validate.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index f91c180d..fc952467 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -689,7 +689,9 @@ def test__validate_scalefactors_tissue_hires_scalef_is_float_error(self): in validator.errors[0] ) - @pytest.mark.parametrize("assay_ontology_term_id, is_single", [("EFO:0030062", False), ("EFO:0010961", False)]) + @pytest.mark.parametrize( + "assay_ontology_term_id, is_single", [("EFO:0030062", False), ("EFO:0010961", False), ("EFO:0030062", True)] + ) def test__validate_tissue_position_forbidden(self, assay_ontology_term_id, is_single): validator: Validator = Validator() validator._set_schema_def() @@ -721,7 +723,7 @@ def test__validate_tissue_position_required(self, tissue_position_name): assert validator.errors assert ( f"obs['{tissue_position_name}'] is required for obs['assay_ontology_term_id'] 'EFO:0010961' " - "(Visium Spatial Gene Expression) and uns['spatial']['is_single'] True." in validator.errors[0] + "(Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." in validator.errors[0] ) @pytest.mark.parametrize("tissue_position_name", ["array_col", "array_row", "in_tissue"]) From 3b014781797117130b35259fe224a660ed3e47b3 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Mon, 29 Apr 2024 12:44:12 -0700 Subject: [PATCH 13/38] Review updates --- .../cellxgene_schema/validate.py | 59 +++++++++++-------- cellxgene_schema_cli/tests/test_validate.py | 31 ++++------ 2 files changed, 46 insertions(+), 44 deletions(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index dd9e8fad..25674d3b 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -25,6 +25,10 @@ ASSAY_VISIUM = "EFO:0010961" ASSAY_SLIDE_SEQV2 = "EFO:0030062" +ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = "obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." +ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN = f"is only allowed for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}" +ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED = f"is required for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}" + class Validator: """Handles validation of AnnData""" @@ -46,6 +50,8 @@ def reset(self): self._raw_layer_exists = None self.is_seurat_convertible: bool = True self.is_spatial = None + self.is_visium = None + self.is_visium_and_is_single_true = None # Matrix (e.g., X, raw.X, ...) number non-zero cache self.number_non_zero = dict() @@ -76,6 +82,20 @@ def _is_supported_spatial_assay(self) -> bool: self.is_spatial = False return self.is_spatial + def _is_visium_and_is_single_true(self) -> bool: + """ + Determine if the assay_ontology_term_id is Visium (EFO:0010961) and uns.spatial.is_single is True. + + :return True if assay_ontology_term_id is Visium and is_single_cell is True, False otherwise. + :rtype bool + """ + if self.is_visium_and_is_single_true is None: + try: + self.is_visium_and_is_single_true = self._is_visium() and self.adata.uns["spatial"]["is_single"] + except AttributeError: + self.is_visium_and_is_single_true = False + return self.is_visium_and_is_single_true + def _validate_encoding_version(self): import h5py @@ -1342,14 +1362,11 @@ def _check_spatial_obs(self): return # Validate tissue positions. - is_visium_and_uns_is_single = self._is_visium() and uns_component["spatial"]["is_single"] - self._validate_spatial_tissue_position("array_col", 0, 127, is_visium_and_uns_is_single) - self._validate_spatial_tissue_position("array_row", 0, 77, is_visium_and_uns_is_single) - self._validate_spatial_tissue_position("in_tissue", 0, 1, is_visium_and_uns_is_single) + self._validate_spatial_tissue_position("array_col", 0, 127) + self._validate_spatial_tissue_position("array_row", 0, 77) + self._validate_spatial_tissue_position("in_tissue", 0, 1) - def _validate_spatial_tissue_position( - self, tissue_position_name: str, min: int, max: int, is_visium_and_uns_is_single: bool - ): + def _validate_spatial_tissue_position(self, tissue_position_name: str, min: int, max: int): """ Validate tissue position is allowed and required, and are integers within the given range. Validation is not defined in schema definition yaml. @@ -1357,12 +1374,10 @@ def _validate_spatial_tissue_position( :rtype none """ # Tissue position is foribidden if assay is not Visium or is_single is False. + is_visium_and_uns_is_single = self._is_visium_and_is_single_true() obs_tissue_position = self.adata.obs.get(tissue_position_name) if obs_tissue_position is not None and not is_visium_and_uns_is_single: - self.errors.append( - f"obs['{tissue_position_name}'] is only allowed for obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." - ) + self.errors.append(f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}") return # Exit if we're not dealing with Visium and _is_single True as no further checks are necessary. @@ -1371,10 +1386,7 @@ def _validate_spatial_tissue_position( # Tissue position is required. if obs_tissue_position is None: - self.errors.append( - f"obs['{tissue_position_name}'] is required for obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." - ) + self.errors.append(f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}") return # Tissue position must be an int. @@ -1459,13 +1471,9 @@ def _check_spatial_uns(self): return # library_id is forbidden if assay is not Visium or is_single is false. - is_visium = self._is_visium() - is_visium_and_uns_is_single = is_visium and uns_is_single + is_visium_and_uns_is_single = self._is_visium_and_is_single_true() if len(library_ids) > 0 and not is_visium_and_uns_is_single: - self.errors.append( - "uns['spatial'][library_id] is only allowed for obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." - ) + self.errors.append(f"uns['spatial'][library_id] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}") # Exit as library_id is not allowed. return @@ -1476,8 +1484,7 @@ def _check_spatial_uns(self): # library_id is required if assay is Visium and is_single is True. if len(library_ids) == 0: self.errors.append( - "uns['spatial'] must contain at least one key representing the library_id when obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." + f"uns['spatial'] must contain at least one key representing the library_id when {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}" ) # Exit as library_id is missing. return @@ -1595,8 +1602,10 @@ def _is_visium(self) -> bool: :return True if assay_ontology_term_id is Visium, False otherwise. :rtype bool """ - assay_ontology_term_id = self.adata.obs.get("assay_ontology_term_id") - return assay_ontology_term_id is not None and (assay_ontology_term_id == ASSAY_VISIUM).any() + if self.is_visium is None: + assay_ontology_term_id = self.adata.obs.get("assay_ontology_term_id") + self.is_visium = assay_ontology_term_id is not None and (assay_ontology_term_id == ASSAY_VISIUM).any() + return self.is_visium def _validate_spatial_image_shape(self, image_name: str, image: np.ndarray, max_dimension: int = None): """ diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index fc952467..6fd336a5 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -10,7 +10,13 @@ import pytest from cellxgene_ontology_guide.entities import Ontology from cellxgene_schema.schema import get_schema_definition -from cellxgene_schema.validate import Validator, validate +from cellxgene_schema.validate import ( + ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE, + ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN, + ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED, + Validator, + validate, +) from cellxgene_schema.write_labels import AnnDataLabelAppender from fixtures.examples_validate import adata as adata_valid from fixtures.examples_validate import ( @@ -433,11 +439,7 @@ def test__validate_library_id_forbidden_if_slide_seqV2(self): # Confirm library_id is not allowed for Slide-seqV2. validator._check_spatial_uns() assert len(validator.errors) == 1 - assert ( - "uns['spatial'][library_id] is only allowed for obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." - in validator.errors[0] - ) + assert f"uns['spatial'][library_id] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}" in validator.errors[0] def test__validate_library_id_forbidden_if_visium_and_is_single_false(self): validator: Validator = Validator() @@ -450,11 +452,7 @@ def test__validate_library_id_forbidden_if_visium_and_is_single_false(self): # Confirm library_id is not allowed for Visium if is_single is False. validator._check_spatial_uns() assert len(validator.errors) == 1 - assert ( - "uns['spatial'][library_id] is only allowed for obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." - in validator.errors[0] - ) + assert f"uns['spatial'][library_id] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}" in validator.errors[0] def test__validate_library_id_required_if_visium(self): validator: Validator = Validator() @@ -466,8 +464,7 @@ def test__validate_library_id_required_if_visium(self): validator._check_spatial_uns() assert validator.errors assert ( - "uns['spatial'] must contain at least one key representing the library_id when obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." + f"uns['spatial'] must contain at least one key representing the library_id when {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}" in validator.errors[0] ) @@ -707,8 +704,7 @@ def test__validate_tissue_position_forbidden(self, assay_ontology_term_id, is_si tissue_position_names = ["array_col", "array_row", "in_tissue"] for i, tissue_position_name in enumerate(tissue_position_names): assert ( - f"obs['{tissue_position_name}'] is only allowed for obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." + f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}" in validator.errors[i] ) @@ -721,10 +717,7 @@ def test__validate_tissue_position_required(self, tissue_position_name): validator._check_spatial_obs() assert validator.errors - assert ( - f"obs['{tissue_position_name}'] is required for obs['assay_ontology_term_id'] 'EFO:0010961' " - "(Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." in validator.errors[0] - ) + assert f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}" in validator.errors[0] @pytest.mark.parametrize("tissue_position_name", ["array_col", "array_row", "in_tissue"]) def test__validate_tissue_position_int_error(self, tissue_position_name): From 1fea0cde3df269a6be01bb968b59c21ab91feb13 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Tue, 30 Apr 2024 15:46:22 -0400 Subject: [PATCH 14/38] feat: update validation for obs['is_primary_data'] to enforce False if uns['spatial']['is_single'] is False --- .../cellxgene_schema/validate.py | 20 ++++++ .../tests/fixtures/examples_validate.py | 64 +++++++++++++++++++ .../tests/test_schema_compliance.py | 15 +++++ cellxgene_schema_cli/tests/test_validate.py | 11 ++++ 4 files changed, 110 insertions(+) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 25674d3b..a228b081 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -1366,6 +1366,26 @@ def _check_spatial_obs(self): self._validate_spatial_tissue_position("array_row", 0, 77) self._validate_spatial_tissue_position("in_tissue", 0, 1) + self._validate_spatial_is_primary_data() + + def _validate_spatial_is_primary_data(self): + """ + Validate is_primary_data for spatial datasets. + """ + is_single = ( + self.adata.uns["spatial"]["is_single"] + if "spatial" in self.adata.uns and "is_single" in self.adata.uns["spatial"] + else None + ) + obs = getattr_anndata(self.adata, "obs") + if obs is None or "is_primary_data" not in obs: + return + if is_single is False and obs["is_primary_data"].any(): + self.errors.append( + "When uns['spatial']['is_single'] is False, " + "obs['is_primary_data'] must be False for all rows." + ) + def _validate_spatial_tissue_position(self, tissue_position_name: str, min: int, max: int): """ Validate tissue position is allowed and required, and are integers within the given range. Validation is not defined in diff --git a/cellxgene_schema_cli/tests/fixtures/examples_validate.py b/cellxgene_schema_cli/tests/fixtures/examples_validate.py index 782f2819..3303efc0 100644 --- a/cellxgene_schema_cli/tests/fixtures/examples_validate.py +++ b/cellxgene_schema_cli/tests/fixtures/examples_validate.py @@ -240,6 +240,58 @@ good_obs_slide_seqv2.loc[:, ["suspension_type"]] = good_obs_slide_seqv2.astype("category") good_obs_slide_seqv2.loc[:, ["tissue_type"]] = good_obs.astype("category") +good_obs_visium_is_single_false = pd.DataFrame( + [ + [ + "CL:0000066", + "EFO:0010961", + "MONDO:0100096", + "NCBITaxon:9606", + "PATO:0000383", + "UBERON:0002048", + "tissue", + False, + "HANCESTRO:0575", + "HsapDv:0000003", + "donor_1", + "na", + ], + [ + "CL:0000192", + "EFO:0010961", + "PATO:0000461", + "NCBITaxon:10090", + "unknown", + "CL:0000192", + "cell culture", + False, + "na", + "MmusDv:0000003", + "donor_2", + "na", + ], + ], + index=["X", "Y"], + columns=[ + "cell_type_ontology_term_id", + "assay_ontology_term_id", + "disease_ontology_term_id", + "organism_ontology_term_id", + "sex_ontology_term_id", + "tissue_ontology_term_id", + "tissue_type", + "is_primary_data", + "self_reported_ethnicity_ontology_term_id", + "development_stage_ontology_term_id", + "donor_id", + "suspension_type", + ], +) + +good_obs_visium_is_single_false.loc[:, ["donor_id"]] = good_obs_visium_is_single_false.astype("category") +good_obs_visium_is_single_false.loc[:, ["suspension_type"]] = good_obs_visium_is_single_false.astype("category") +good_obs_visium_is_single_false.loc[:, ["tissue_type"]] = good_obs_visium_is_single_false.astype("category") + # --- # 2. Creating individual var components: valid object and valid object and with labels @@ -327,6 +379,14 @@ }, } +good_uns_with_is_single_false = { + "title": "A title", + "default_embedding": "X_umap", + "X_approximate_distribution": "normal", + "batch_condition": ["is_primary_data"], + "spatial": {"is_single": False}, +} + good_uns_with_slide_seqV2_spatial = { "title": "A title", "default_embedding": "X_umap", @@ -408,6 +468,10 @@ var=good_var, ) +adata_spatial_is_single_false = anndata.AnnData( + X=sparse.csr_matrix(X), obs=good_obs_visium_is_single_false, uns=good_uns_with_is_single_false, obsm=good_obsm, var=good_var +) + # anndata for testing migration unmigrated_obs = pd.DataFrame( [ diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py index b8c66975..ccca184f 100644 --- a/cellxgene_schema_cli/tests/test_schema_compliance.py +++ b/cellxgene_schema_cli/tests/test_schema_compliance.py @@ -62,6 +62,10 @@ def validator_with_adata_missing_raw(validator) -> Validator: validator.adata = examples.adata_non_raw.copy() return validator +@pytest.fixture +def validator_with_spatial_and_is_single_false(validator) -> Validator: + validator.adata = examples.adata_spatial_is_single_false.copy() + return validator @pytest.fixture def label_writer(validator_with_validated_adata) -> AnnDataLabelAppender: @@ -1089,6 +1093,17 @@ def test_is_primary_data(self, validator_with_adata): "ERROR: Column 'is_primary_data' in dataframe 'obs' " "must be boolean, not 'object'." ] + def test_is_primary_data__spatial(self, validator_with_spatial_and_is_single_false): + """ + is_primary_data bool. This MUST be False if dataset has uns['spatial']['is_single'] == False + """ + validator = validator_with_spatial_and_is_single_false + validator.adata.obs["is_primary_data"][0] = True + validator.validate_adata() + assert validator.errors == [ + "ERROR: When uns['spatial']['is_single'] is False, obs['is_primary_data'] must be False for all rows." + ] + def test_donor_id_must_be_categorical(self, validator_with_adata): """ donor_id categorical with str categories. This MUST be free-text that identifies diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index 6fd336a5..6fcbdaf9 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -22,6 +22,7 @@ from fixtures.examples_validate import ( adata_minimal, adata_slide_seqv2, + adata_spatial_is_single_false, adata_visium, adata_with_labels, good_obs, @@ -332,6 +333,15 @@ def test__validate_spatial_slide_seqV2_ok(self): validator._check_spatial_uns() assert not validator.errors + def test__validate_spatial_is_single_false_ok(self): + validator: Validator = Validator() + validator._set_schema_def() + validator.adata = adata_spatial_is_single_false.copy() + + # Confirm spatial is valid. + validator.validate_adata() + assert not validator.errors + def test__validate_spatial_forbidden_if_not_visium_or_slide_seqv2(self): validator: Validator = Validator() validator._set_schema_def() @@ -697,6 +707,7 @@ def test__validate_tissue_position_forbidden(self, assay_ontology_term_id, is_si validator.adata = adata_visium.copy() validator.adata.obs.assay_ontology_term_id = assay_ontology_term_id validator.adata.uns["spatial"]["is_single"] = is_single + validator.adata.obs["is_primary_data"] = False # Confirm tissue positions are not allowed. validator._check_spatial_obs() From 6f25b113b8e76454b61a53b2039c907f6ba0520b Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Tue, 30 Apr 2024 16:08:46 -0400 Subject: [PATCH 15/38] unit test refactor --- .../tests/fixtures/examples_validate.py | 2 +- .../tests/test_schema_compliance.py | 42 ++++++------------- 2 files changed, 14 insertions(+), 30 deletions(-) diff --git a/cellxgene_schema_cli/tests/fixtures/examples_validate.py b/cellxgene_schema_cli/tests/fixtures/examples_validate.py index caba5163..3802852e 100644 --- a/cellxgene_schema_cli/tests/fixtures/examples_validate.py +++ b/cellxgene_schema_cli/tests/fixtures/examples_validate.py @@ -469,7 +469,7 @@ ) adata_spatial_is_single_false = anndata.AnnData( - X=sparse.csr_matrix(X), obs=good_obs_visium_is_single_false, uns=good_uns_with_is_single_false, obsm=good_obsm, var=good_var + X=sparse.csr_matrix(X), obs=good_obs_visium_is_single_false, uns=good_uns_with_is_single_false, obsm=good_obsm_spatial, var=good_var ) # anndata for testing migration diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py index 19f6b58e..72d2799f 100644 --- a/cellxgene_schema_cli/tests/test_schema_compliance.py +++ b/cellxgene_schema_cli/tests/test_schema_compliance.py @@ -2008,27 +2008,18 @@ def test_obsm_values_no_X_embedding__non_spatial_dataset(self, validator_with_ad "WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.", ] - @pytest.mark.parametrize( - "assay_ontology_term_id, adata_spatial", - [ - ("EFO:0010961", examples.adata_visium), - ("EFO:0030062", examples.adata_slide_seqv2), - ], - ) - def test_obsm_values_no_X_embedding__spatial_dataset( - self, validator_with_adata, assay_ontology_term_id, adata_spatial - ): - validator = validator_with_adata - validator.adata.obsm["harmony"] = validator.adata.obsm["X_umap"] - validator.adata.uns = adata_spatial.uns - validator.adata.uns["default_embedding"] = "harmony" - validator.adata.uns["spatial"] = uns_spatial - validator.adata.obsm["spatial"] = validator.adata.obsm["X_umap"] + def test_obsm_values_no_X_embedding__visium_dataset(self, validator_with_visium_assay): + validator = validator_with_visium_assay + validator.adata.uns["default_embedding"] = "spatial" + del validator.adata.obsm["X_umap"] + validator.validate_adata() + assert validator.errors == [] + assert validator.is_spatial is True + + def test_obsm_values_no_X_embedding__slide_seq_v2_dataset(self, validator_with_slide_seq_v2_assay): + validator = validator_with_slide_seq_v2_assay + validator.adata.uns["default_embedding"] = "spatial" del validator.adata.obsm["X_umap"] - validator.adata.obs = adata_spatial.obs - validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id - validator.adata.obs["suspension_type"] = "na" - validator.adata.obs.loc[:, ["suspension_type"]] = validator.adata.obs.astype("category") validator.validate_adata() assert validator.errors == [] assert validator.is_spatial is True @@ -2041,19 +2032,12 @@ def test_obsm_values_spatial_embedding_missing__is_single_true(self, validator_w "ERROR: 'spatial' embedding is required in 'adata.obsm' if adata.uns['spatial']['is_single'] is True." ] - def test_obsm_values_spatial_embedding_missing__is_single_false(self, validator_with_visium_assay): - validator = validator_with_visium_assay - validator.adata.uns["spatial"] = {"is_single": False} + def test_obsm_values_spatial_embedding_missing__is_single_false(self, validator_with_spatial_and_is_single_false): + validator = validator_with_spatial_and_is_single_false del validator.adata.obsm["spatial"] validator.validate_adata() assert validator.errors == [] - def test_obsm_values_spatial_embedding_present__is_single_false(self, validator_with_visium_assay): - validator = validator_with_visium_assay - validator.adata.uns["spatial"] = {"is_single": False} - validator.validate_adata() - assert validator.errors == [] - def test_obsm_values_spatial_embedding_present__is_single_none(self, validator_with_adata): validator = validator_with_adata validator.adata.obsm["spatial"] = validator.adata.obsm["X_umap"] From 7fe05b2e78cbe22e7a301f60597ee32d828c4f70 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Wed, 24 Apr 2024 16:04:00 -0700 Subject: [PATCH 16/38] feat(schema 5.1.0): validate uns[spatial] --- cellxgene_schema_cli/cellxgene_schema/validate.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 600afb3f..ac8666d7 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -29,6 +29,9 @@ class Validator: """Handles validation of AnnData""" + ASSAY_VISIUM = "EFO:0010961" + ASSAY_SLIDE_SEQV2 = "EFO:0030062" + def __init__(self, ignore_labels=False): self.schema_def = dict() self.schema_version: str = None @@ -952,6 +955,13 @@ def _validate_seurat_convertibility(self): ) self.is_seurat_convertible = False + # Seurat conversion is not supported for Visium datasets. + if self._is_visium(): + self.warnings.append( + "Datasets with assay_ontology_term_id 'EFO:0010961' (Visium Spatial Gene Expression) are not compatible with Seurat." + ) + self.is_seurat_convertible = False + def _validate_obsm(self): """ Validates the embedding dictionary -- it checks that all values of adata.obsm are numpy arrays with the correct From dc30afb07d67de66b5aad3e22e6249099b7fb03a Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Wed, 24 Apr 2024 16:35:15 -0700 Subject: [PATCH 17/38] Linting --- cellxgene_schema_cli/cellxgene_schema/validate.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index ac8666d7..20d31261 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -29,9 +29,6 @@ class Validator: """Handles validation of AnnData""" - ASSAY_VISIUM = "EFO:0010961" - ASSAY_SLIDE_SEQV2 = "EFO:0030062" - def __init__(self, ignore_labels=False): self.schema_def = dict() self.schema_version: str = None From 869c29f3e00d08bdb2fda1bc68a893a24bc87cc9 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Wed, 24 Apr 2024 22:38:05 -0700 Subject: [PATCH 18/38] Updated error messages --- cellxgene_schema_cli/tests/test_validate.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index 3004270e..8115b09c 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -464,7 +464,11 @@ def test__validate_library_id_required_if_visium(self): validator._check_spatial() assert validator.errors assert ( +<<<<<<< HEAD "uns['spatial'] must contain at least one key representing the library_id when obs['assay_ontology_term_id'] " +======= + "uns['spatial'] must contain the key 'library_id' for obs['assay_ontology_term_id'] " +>>>>>>> 76c80db (Updated error messages) "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." in validator.errors[0] ) From 60cdf0452ba9ee08c2c36c7fa33cda7b2e0462bd Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Thu, 25 Apr 2024 09:58:13 -0700 Subject: [PATCH 19/38] Review updates --- cellxgene_schema_cli/cellxgene_schema/validate.py | 7 ------- cellxgene_schema_cli/tests/test_validate.py | 4 ---- 2 files changed, 11 deletions(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 20d31261..600afb3f 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -952,13 +952,6 @@ def _validate_seurat_convertibility(self): ) self.is_seurat_convertible = False - # Seurat conversion is not supported for Visium datasets. - if self._is_visium(): - self.warnings.append( - "Datasets with assay_ontology_term_id 'EFO:0010961' (Visium Spatial Gene Expression) are not compatible with Seurat." - ) - self.is_seurat_convertible = False - def _validate_obsm(self): """ Validates the embedding dictionary -- it checks that all values of adata.obsm are numpy arrays with the correct diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index 8115b09c..3004270e 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -464,11 +464,7 @@ def test__validate_library_id_required_if_visium(self): validator._check_spatial() assert validator.errors assert ( -<<<<<<< HEAD "uns['spatial'] must contain at least one key representing the library_id when obs['assay_ontology_term_id'] " -======= - "uns['spatial'] must contain the key 'library_id' for obs['assay_ontology_term_id'] " ->>>>>>> 76c80db (Updated error messages) "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." in validator.errors[0] ) From a2935a517f077a55a439b444a6756dc80eb62b54 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Thu, 25 Apr 2024 13:37:59 -0700 Subject: [PATCH 20/38] Review updates --- cellxgene_schema_cli/cellxgene_schema/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 600afb3f..fc88a52b 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -1378,7 +1378,7 @@ def _check_spatial(self): # is_single must be a boolean. uns_is_single = uns_spatial["is_single"] - if not isinstance(uns_is_single, (np.bool_, np.bool)): + if not isinstance(uns_is_single, (np.bool_, np.bool, bool)): self.errors.append(f"uns['spatial']['is_single'] must be of boolean type, it is {type(uns_is_single)}.") # Exit if is_single is not valid as all further checks are dependent on its value. return From 76d62c988b534ff2b412e52769fd8d2348e12c4b Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Thu, 25 Apr 2024 13:39:15 -0700 Subject: [PATCH 21/38] Reverted update --- cellxgene_schema_cli/cellxgene_schema/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index fc88a52b..600afb3f 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -1378,7 +1378,7 @@ def _check_spatial(self): # is_single must be a boolean. uns_is_single = uns_spatial["is_single"] - if not isinstance(uns_is_single, (np.bool_, np.bool, bool)): + if not isinstance(uns_is_single, (np.bool_, np.bool)): self.errors.append(f"uns['spatial']['is_single'] must be of boolean type, it is {type(uns_is_single)}.") # Exit if is_single is not valid as all further checks are dependent on its value. return From abb1ca40dc9968eea488667b042f683e705937d4 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Fri, 26 Apr 2024 13:40:43 -0700 Subject: [PATCH 22/38] feat(schema 5.1.0): validate obs[array_col], obs[array_row] and obs[in_tissue] --- .../cellxgene_schema/validate.py | 82 +++++++++- .../tests/fixtures/examples_validate.py | 9 ++ .../tests/test_schema_compliance.py | 11 +- cellxgene_schema_cli/tests/test_validate.py | 143 ++++++++++++++---- 4 files changed, 210 insertions(+), 35 deletions(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 600afb3f..e779e012 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -1331,7 +1331,87 @@ def _check_column_availability(self): def _check_spatial(self): """ - Validate spatial-related values of the AnnData object. Validation is not defined in schema definition yaml. + Sequence validation of spatial-related values of the AnnData object. + + :rtype none + """ + self._check_spatial_uns() + self._check_spatial_obs() + + def _check_spatial_obs(self): + """ + Validate obs spatial-related values of the AnnData object. Validation is not defined in schema definition yaml. + Errors are added to self.errors. + + :rtype none + """ + + # Exit if obs is not specified. Error is reported in core validate functionality. + obs_component = getattr_anndata(self.adata, "obs") + if obs_component is None: + return + + # obs spatial validation is dependent on uns.spatial.is_single; exit if not specified. Errors are + # reported downstream. + uns_component = getattr_anndata(self.adata, "uns") + if uns_component is None or "spatial" not in uns_component or "is_single" not in uns_component["spatial"]: + return + + # Validate tissue positions. + is_visium_and_uns_is_single = self._is_visium() and uns_component["spatial"]["is_single"] + self._validate_spatial_tissue_position("array_col", 0, 127, is_visium_and_uns_is_single) + self._validate_spatial_tissue_position("array_row", 0, 77, is_visium_and_uns_is_single) + self._validate_spatial_tissue_position("in_tissue", 0, 1, is_visium_and_uns_is_single) + + def _validate_spatial_tissue_position( + self, tissue_position_name: str, min: int, max: int, is_visium_and_uns_is_single: bool + ): + """ + Validate tissue position is allowed and required, and are integers within the given range. Validation is not defined in + schema definition yaml. + + :rtype none + """ + # Tissue position is foribidden if assay is not Visium or is_single is False. + obs_tissue_position = self.adata.obs.get(tissue_position_name) + if obs_tissue_position is not None and not is_visium_and_uns_is_single: + self.errors.append( + f"obs['{tissue_position_name}'] is only allowed for obs['assay_ontology_term_id'] " + "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." + ) + return + + # Exit if we're not dealing with Visium and _is_single True as no further checks are necessary. + if not is_visium_and_uns_is_single: + return + + # Tissue position is required. + if obs_tissue_position is None: + self.errors.append( + f"obs['{tissue_position_name}'] is required for obs['assay_ontology_term_id'] " + "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] True." + ) + return + + # Tissue position must be an int. + if not np.issubdtype(obs_tissue_position.dtype, np.integer): + self.errors.append(f"obs['{tissue_position_name}'] must be of int type, it is {obs_tissue_position.dtype}.") + return + + # Tissue position must be within the given range. + if not ((obs_tissue_position >= min) & (obs_tissue_position <= max)).all(): + if tissue_position_name == "in_tissue": + error_message_token = f"{min} or {max}" + else: + error_message_token = f"between {min} and {max}" + self.errors.append( + f"obs['{tissue_position_name}'] must be {error_message_token}, the min and max are {obs_tissue_position.min()} and {obs_tissue_position.max()}. " + f"This must be the value of the column tissue_positions_in_tissue from the tissue_positions_list.csv or tissue_positions.csv." + ) + + def _check_spatial_uns(self): + """ + Validate uns spatial-related values of the AnnData object. Validation is not defined in schema definition yaml. Errors are added to self.errors. :rtype none diff --git a/cellxgene_schema_cli/tests/fixtures/examples_validate.py b/cellxgene_schema_cli/tests/fixtures/examples_validate.py index 0c8974e8..b024dfa3 100644 --- a/cellxgene_schema_cli/tests/fixtures/examples_validate.py +++ b/cellxgene_schema_cli/tests/fixtures/examples_validate.py @@ -129,6 +129,8 @@ good_obs_visium = pd.DataFrame( [ [ + 1, + 1, "CL:0000066", "EFO:0010961", "MONDO:0100096", @@ -141,8 +143,11 @@ "HsapDv:0000003", "donor_1", "na", + 0, ], [ + 2, + 2, "CL:0000192", "EFO:0010961", "PATO:0000461", @@ -155,10 +160,13 @@ "MmusDv:0000003", "donor_2", "na", + 1, ], ], index=["X", "Y"], columns=[ + "array_col", + "array_row", "cell_type_ontology_term_id", "assay_ontology_term_id", "disease_ontology_term_id", @@ -171,6 +179,7 @@ "development_stage_ontology_term_id", "donor_id", "suspension_type", + "in_tissue", ], ) diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py index 3ece83b4..4216142f 100644 --- a/cellxgene_schema_cli/tests/test_schema_compliance.py +++ b/cellxgene_schema_cli/tests/test_schema_compliance.py @@ -1994,21 +1994,22 @@ def test_obsm_values_no_X_embedding__non_spatial_dataset(self, validator_with_ad ] @pytest.mark.parametrize( - "assay_ontology_term_id, uns_spatial", + "assay_ontology_term_id, adata_spatial", [ - ("EFO:0010961", examples.good_uns_with_visium_spatial["spatial"]), - ("EFO:0030062", examples.good_uns_with_slide_seqV2_spatial["spatial"]), + ("EFO:0010961", examples.adata_visium), + ("EFO:0030062", examples.adata_slide_seqv2), ], ) def test_obsm_values_no_X_embedding__spatial_dataset( - self, validator_with_adata, assay_ontology_term_id, uns_spatial + self, validator_with_adata, assay_ontology_term_id, adata_spatial ): validator = validator_with_adata validator.adata.obsm["harmony"] = validator.adata.obsm["X_umap"] + validator.adata.uns = adata_spatial.uns validator.adata.uns["default_embedding"] = "harmony" - validator.adata.uns["spatial"] = uns_spatial validator.adata.obsm["spatial"] = validator.adata.obsm["X_umap"] del validator.adata.obsm["X_umap"] + validator.adata.obs = adata_spatial.obs validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id validator.adata.obs["suspension_type"] = "na" validator.adata.obs.loc[:, ["suspension_type"]] = validator.adata.obs.astype("category") diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index 3004270e..2088db41 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -306,6 +306,8 @@ def test__validate_with_h5ad_invalid_and_without_labels(self): assert errors assert is_seurat_convertible + +class TestCheckSpatial: def test__validate_spatial_visium_ok(self): validator: Validator = Validator() validator._set_schema_def() @@ -324,7 +326,7 @@ def test__validate_spatial_slide_seqV2_ok(self): validator.validate_adata() assert not validator.errors - def test__validate_spatial_forbidden_if_10x(self): + def test__validate_spatial_forbidden_if_not_visium_and_is_single_false(self): validator: Validator = Validator() validator._set_schema_def() @@ -333,7 +335,7 @@ def test__validate_spatial_forbidden_if_10x(self): validator.adata.obs = good_obs.copy() # Confirm spatial is not allowed for 10x 3' v2. - validator._check_spatial() + validator._check_spatial_uns() assert len(validator.errors) == 1 assert ( "uns['spatial'] is only allowed for obs['assay_ontology_term_id'] values " @@ -347,7 +349,7 @@ def test__validate_spatial_required_if_visium(self): validator.adata.uns = good_uns.copy() # Confirm spatial is required for Visium. - validator._check_spatial() + validator._check_spatial_uns() assert len(validator.errors) == 1 assert ( "uns['spatial'] is required for obs['assay_ontology_term_id'] values " @@ -361,7 +363,7 @@ def test__validate_spatial_required_if_slide_seqV2(self): validator.adata.uns = good_uns.copy() # Confirm spatial is required for Slide-seqV2. - validator._check_spatial() + validator._check_spatial_uns() assert len(validator.errors) == 1 assert ( "uns['spatial'] is required for obs['assay_ontology_term_id'] values " @@ -375,7 +377,7 @@ def test__validate_spatial_allowed_keys_error(self): validator.adata.uns["spatial"]["invalid_key"] = True # Confirm additional key is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'] must contain only two top-level keys: 'is_single' and a library_id. " @@ -389,7 +391,7 @@ def test__validate_is_single_required_visium_error(self): validator.adata.uns["spatial"].pop("is_single") # Confirm is_single is identified as required. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'] must contain the key 'is_single'." in validator.errors[0] @@ -405,7 +407,7 @@ def test__validate_is_single_required_slide_seqV2_error(self): } # Confirm is_single is identified as required. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'] must contain the key 'is_single'." in validator.errors[0] @@ -416,7 +418,7 @@ def test__validate_is_single_boolean_error(self): validator.adata.uns["spatial"]["is_single"] = "invalid" # Confirm is_single is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial']['is_single'] must be of boolean type" in validator.errors[0] @@ -429,7 +431,7 @@ def test__validate_library_id_forbidden_if_slide_seqV2(self): validator.adata.uns = good_uns_with_visium_spatial # Confirm library_id is not allowed for Slide-seqV2. - validator._check_spatial() + validator._check_spatial_uns() assert len(validator.errors) == 1 assert ( "uns['spatial'][library_id] is only allowed for obs['assay_ontology_term_id'] " @@ -437,7 +439,7 @@ def test__validate_library_id_forbidden_if_slide_seqV2(self): in validator.errors[0] ) - def test__validate_library_id_forbidden_if_visium_and_is_single_false(self): + def test__validate_library_id_forbidden_if_visium_or_is_single_false(self): validator: Validator = Validator() validator._set_schema_def() @@ -446,7 +448,7 @@ def test__validate_library_id_forbidden_if_visium_and_is_single_false(self): validator.adata.uns["spatial"]["is_single"] = np.bool_(False) # Confirm library_id is not allowed for Visium if is_single is False. - validator._check_spatial() + validator._check_spatial_uns() assert len(validator.errors) == 1 assert ( "uns['spatial'][library_id] is only allowed for obs['assay_ontology_term_id'] " @@ -461,7 +463,7 @@ def test__validate_library_id_required_if_visium(self): validator.adata.uns["spatial"].pop(visium_library_id) # Confirm library_id is identified as required. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'] must contain at least one key representing the library_id when obs['assay_ontology_term_id'] " @@ -476,7 +478,7 @@ def test__validate_library_id_allowed_keys_error(self): validator.adata.uns["spatial"][visium_library_id]["invalid_key"] = True # Confirm additional key is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'][library_id] can only contain the keys 'images' and 'scalefactors'." in validator.errors[0] @@ -489,7 +491,7 @@ def test__validate_images_required_error(self): validator.adata.uns["spatial"][visium_library_id].pop("images") # Confirm images is required. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'][library_id] must contain the key 'images'." in validator.errors[0] @@ -500,7 +502,7 @@ def test__validate_images_allowed_keys_error(self): validator.adata.uns["spatial"][visium_library_id]["images"]["invalid_key"] = True # Confirm additional key is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'][library_id]['images'] can only contain the keys 'fullres' and 'hires'." @@ -514,7 +516,7 @@ def test__validate_images_hires_required_error(self): validator.adata.uns["spatial"][visium_library_id]["images"].pop("hires") # Confirm hires is required. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'][library_id]['images'] must contain the key 'hires'." in validator.errors[0] @@ -525,7 +527,7 @@ def test__validate_images_fullres_optional(self): validator.adata.uns["spatial"][visium_library_id]["images"].pop("fullres") # Confirm fullres is optional. - validator._check_spatial() + validator._check_spatial_uns() assert not validator.errors assert validator.warnings assert ( @@ -541,7 +543,7 @@ def test__validate_images_hires_is_ndarray_error(self): validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = "invalid" # Confirm hires is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'][library_id]['images']['hires'] must be of numpy.ndarray type" in validator.errors[0] @@ -552,7 +554,7 @@ def test__valide_images_hires_is_shape_error(self): validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = np.zeros((1, 1)) # Confirm hires is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'][library_id]['images']['hires'] must have shape (,,3)" in validator.errors[0] @@ -563,7 +565,7 @@ def test__validate_images_hires_max_dimension_greater_than_error(self): validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = np.zeros((1, 2001, 3)) # Confirm hires is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "The largest dimension of uns['spatial'][library_id]['images']['hires'] must be 2000 pixels" @@ -577,7 +579,7 @@ def test__validate_images_hires_max_dimension_less_than_error(self): validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = np.zeros((1, 1999, 3)) # Confirm hires is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "The largest dimension of uns['spatial'][library_id]['images']['hires'] must be 2000 pixels" @@ -591,7 +593,7 @@ def test__validate_images_fullres_is_ndarray_error(self): validator.adata.uns["spatial"][visium_library_id]["images"]["fullres"] = "invalid" # Confirm fullres is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'][library_id]['images']['fullres'] must be of numpy.ndarray type" in validator.errors[0] @@ -602,7 +604,7 @@ def test__validate_images_fullres_is_shape_error(self): validator.adata.uns["spatial"][visium_library_id]["images"]["fullres"] = np.zeros((1, 1)) # Confirm fullres is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'][library_id]['images']['fullres'] must have shape (,,3)" in validator.errors[0] @@ -613,7 +615,7 @@ def test__validate_scalefactors_required_error(self): validator.adata.uns["spatial"][visium_library_id].pop("scalefactors") # Confirm scalefactors is required. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert "uns['spatial'][library_id] must contain the key 'scalefactors'." in validator.errors[0] @@ -624,7 +626,7 @@ def test__validate_scalefactors_allowed_keys_error(self): validator.adata.uns["spatial"][visium_library_id]["scalefactors"]["invalid_key"] = True # Confirm additional key is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'][library_id]['scalefactors'] can only contain the keys " @@ -638,7 +640,7 @@ def test__validate_scalefactors_spot_diameter_fullres_required_error(self): validator.adata.uns["spatial"][visium_library_id]["scalefactors"].pop("spot_diameter_fullres") # Confirm spot_diameter_fullres is required. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'][library_id]['scalefactors'] must contain the key 'spot_diameter_fullres'." @@ -652,7 +654,7 @@ def test__validate_scalefactors_tissue_hires_scalef_required_error(self): validator.adata.uns["spatial"][visium_library_id]["scalefactors"].pop("tissue_hires_scalef") # Confirm tissue_hires_scalef is required. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'][library_id]['scalefactors'] must contain the key 'tissue_hires_scalef'." @@ -666,7 +668,7 @@ def test__validate_scalefactors_spot_diameter_fullres_is_float_error(self): validator.adata.uns["spatial"][visium_library_id]["scalefactors"]["spot_diameter_fullres"] = "invalid" # Confirm spot_diameter_fullres is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'][library_id]['scalefactors']['spot_diameter_fullres'] must be of type float" @@ -680,13 +682,96 @@ def test__validate_scalefactors_tissue_hires_scalef_is_float_error(self): validator.adata.uns["spatial"][visium_library_id]["scalefactors"]["tissue_hires_scalef"] = "invalid" # Confirm tissue_hires_scalef is identified as invalid. - validator._check_spatial() + validator._check_spatial_uns() assert validator.errors assert ( "uns['spatial'][library_id]['scalefactors']['tissue_hires_scalef'] must be of type float" in validator.errors[0] ) + def test__validate_tissue_position_forbidden_if_not_visium_or_is_single_false(self): + validator: Validator = Validator() + validator._set_schema_def() + + # Create Visium uns (with spatial) with 10x 3' v2 obs. + validator.adata = adata_visium.copy() + validator.adata.obs.assay_ontology_term_id = "EFO:0009899" + + # Confirm tissue positions are not allowed for not Visium and not is single. + validator._check_spatial_obs() + assert len(validator.errors) == 3 + tissue_position_names = ["array_col", "array_row", "in_tissue"] + for i, tissue_position_name in enumerate(tissue_position_names): + assert ( + f"obs['{tissue_position_name}'] is only allowed for obs['assay_ontology_term_id'] " + "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." + in validator.errors[i] + ) + + @pytest.mark.parametrize("tissue_position_name", ["array_col", "array_row", "in_tissue"]) + def test__validate_tissue_position_required(self, tissue_position_name): + validator: Validator = Validator() + validator._set_schema_def() + validator.adata = adata_visium.copy() + validator.adata.obs.pop(tissue_position_name) + + validator._check_spatial_obs() + assert validator.errors + assert ( + f"obs['{tissue_position_name}'] is required for obs['assay_ontology_term_id'] 'EFO:0010961' " + "(Visium Spatial Gene Expression) and uns['spatial']['is_single'] True." in validator.errors[0] + ) + + @pytest.mark.parametrize("tissue_position_name", ["array_col", "array_row", "in_tissue"]) + def test__validate_tissue_position_int_error(self, tissue_position_name): + validator: Validator = Validator() + validator._set_schema_def() + validator.adata = adata_visium.copy() + validator.adata.obs[tissue_position_name] = 1.0 + + # Confirm tissue_position is identified as invalid. + validator._check_spatial_obs() + assert validator.errors + assert f"obs['{tissue_position_name}'] must be of int type" in validator.errors[0] + + @pytest.mark.parametrize( + "tissue_position_name, min, error_message_token", + [ + ("array_col", 0, "between 0 and 127"), + ("array_row", 0, "between 0 and 77"), + ("in_tissue", 0, "0 or 1"), + ], + ) + def test__validate_tissue_position_int_min_error(self, tissue_position_name, min, error_message_token): + validator: Validator = Validator() + validator._set_schema_def() + validator.adata = adata_visium.copy() + validator.adata.obs[tissue_position_name] = min - 1 + + # Confirm tissue_position is identified as invalid. + validator._check_spatial_obs() + assert validator.errors + assert f"obs['{tissue_position_name}'] must be {error_message_token}" in validator.errors[0] + + @pytest.mark.parametrize( + "tissue_position_name, max, error_message_token", + [ + ("array_col", 127, "between 0 and 127"), + ("array_row", 77, "between 0 and 77"), + ("in_tissue", 1, "0 or 1"), + ], + ) + def test__validate_tissue_position_int_max_error(self, tissue_position_name, max, error_message_token): + validator: Validator = Validator() + validator._set_schema_def() + validator.adata = adata_visium.copy() + validator.adata.obs[tissue_position_name] = max + 1 + + # Confirm tissue_position is identified as invalid. + validator._check_spatial_obs() + assert validator.errors + assert f"obs['{tissue_position_name}'] must be {error_message_token}" in validator.errors[0] + class TestSeuratConvertibility: def validation_helper(self, matrix, raw=None): From e10eafefbb6b01be17e57c8f245f27b2d4a0c59a Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Fri, 26 Apr 2024 14:00:41 -0700 Subject: [PATCH 23/38] Corrected test name --- cellxgene_schema_cli/tests/test_validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index 2088db41..0ae54c7e 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -439,7 +439,7 @@ def test__validate_library_id_forbidden_if_slide_seqV2(self): in validator.errors[0] ) - def test__validate_library_id_forbidden_if_visium_or_is_single_false(self): + def test__validate_library_id_forbidden_if_visium_and_is_single_false(self): validator: Validator = Validator() validator._set_schema_def() From 42083072cf3fa6587589d5554448f3f4e302840b Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Fri, 26 Apr 2024 14:09:30 -0700 Subject: [PATCH 24/38] Linting --- cellxgene_schema_cli/tests/test_validate.py | 29 +++++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index 0ae54c7e..4fee9d25 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -326,11 +326,11 @@ def test__validate_spatial_slide_seqV2_ok(self): validator.validate_adata() assert not validator.errors - def test__validate_spatial_forbidden_if_not_visium_and_is_single_false(self): + def test__validate_spatial_forbidden_if_not_visium_or_slide_seqv2(self): validator: Validator = Validator() validator._set_schema_def() - # Create Visium uns (with spatial) with 10x 3' v2 obs. + # Create Visium adata (with spatial) with 10x 3' v2 obs. validator.adata = adata_visium.copy() validator.adata.obs = good_obs.copy() @@ -689,15 +689,34 @@ def test__validate_scalefactors_tissue_hires_scalef_is_float_error(self): in validator.errors[0] ) - def test__validate_tissue_position_forbidden_if_not_visium_or_is_single_false(self): + def test__validate_tissue_position_forbidden_if_not_visium(self): validator: Validator = Validator() validator._set_schema_def() - # Create Visium uns (with spatial) with 10x 3' v2 obs. + # Create Visium adata (with spatial) with 10x 3' v2 obs. validator.adata = adata_visium.copy() validator.adata.obs.assay_ontology_term_id = "EFO:0009899" - # Confirm tissue positions are not allowed for not Visium and not is single. + # Confirm tissue positions are not allowed for 10x 3' v2. + validator._check_spatial_obs() + assert len(validator.errors) == 3 + tissue_position_names = ["array_col", "array_row", "in_tissue"] + for i, tissue_position_name in enumerate(tissue_position_names): + assert ( + f"obs['{tissue_position_name}'] is only allowed for obs['assay_ontology_term_id'] " + "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." + in validator.errors[i] + ) + + def test__validate_tissue_position_forbidden_if_not_visium_and_is_single_false(self): + validator: Validator = Validator() + validator._set_schema_def() + + # Create Visium adata (with spatial) with is_single false. + validator.adata = adata_visium.copy() + validator.adata.uns["spatial"]["is_single"] = False + + # Confirm tissue positions are not allowed. validator._check_spatial_obs() assert len(validator.errors) == 3 tissue_position_names = ["array_col", "array_row", "in_tissue"] From 1cb1463aa6086da425c3ef3138868e32d81a94c2 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Fri, 26 Apr 2024 14:21:30 -0700 Subject: [PATCH 25/38] Updated forbidden tests --- cellxgene_schema_cli/tests/test_validate.py | 27 ++++----------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index 4fee9d25..8bfa321c 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -689,32 +689,15 @@ def test__validate_scalefactors_tissue_hires_scalef_is_float_error(self): in validator.errors[0] ) - def test__validate_tissue_position_forbidden_if_not_visium(self): + @pytest.mark.parametrize("assay_ontology_term_id, is_single", [("EFO:0030062", False), ("EFO:0010961", False)]) + def test__validate_tissue_position_forbidden(self, assay_ontology_term_id, is_single): validator: Validator = Validator() validator._set_schema_def() - # Create Visium adata (with spatial) with 10x 3' v2 obs. - validator.adata = adata_visium.copy() - validator.adata.obs.assay_ontology_term_id = "EFO:0009899" - - # Confirm tissue positions are not allowed for 10x 3' v2. - validator._check_spatial_obs() - assert len(validator.errors) == 3 - tissue_position_names = ["array_col", "array_row", "in_tissue"] - for i, tissue_position_name in enumerate(tissue_position_names): - assert ( - f"obs['{tissue_position_name}'] is only allowed for obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." - in validator.errors[i] - ) - - def test__validate_tissue_position_forbidden_if_not_visium_and_is_single_false(self): - validator: Validator = Validator() - validator._set_schema_def() - - # Create Visium adata (with spatial) with is_single false. + # Create Visium adata with tissue positions, update assay_ontology_term_id and is_single to trigger error. validator.adata = adata_visium.copy() - validator.adata.uns["spatial"]["is_single"] = False + validator.adata.obs.assay_ontology_term_id = assay_ontology_term_id + validator.adata.uns["spatial"]["is_single"] = is_single # Confirm tissue positions are not allowed. validator._check_spatial_obs() From b2a676c8b70f785f2ef2dc7b623a380bba21fbbb Mon Sep 17 00:00:00 2001 From: Mim Hastie Date: Mon, 29 Apr 2024 09:56:34 -0700 Subject: [PATCH 26/38] Update cellxgene_schema_cli/cellxgene_schema/validate.py Co-authored-by: Nayib Gloria <55710092+nayib-jose-gloria@users.noreply.github.com> --- cellxgene_schema_cli/cellxgene_schema/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index e779e012..178a4780 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -1389,7 +1389,7 @@ def _validate_spatial_tissue_position( if obs_tissue_position is None: self.errors.append( f"obs['{tissue_position_name}'] is required for obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] True." + "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." ) return From 7d06a3f8af6d5389cd6867ffd8d84d26053d885a Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Mon, 29 Apr 2024 11:02:54 -0700 Subject: [PATCH 27/38] Updated tests --- cellxgene_schema_cli/tests/test_validate.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index 8bfa321c..edfec833 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -689,7 +689,9 @@ def test__validate_scalefactors_tissue_hires_scalef_is_float_error(self): in validator.errors[0] ) - @pytest.mark.parametrize("assay_ontology_term_id, is_single", [("EFO:0030062", False), ("EFO:0010961", False)]) + @pytest.mark.parametrize( + "assay_ontology_term_id, is_single", [("EFO:0030062", False), ("EFO:0010961", False), ("EFO:0030062", True)] + ) def test__validate_tissue_position_forbidden(self, assay_ontology_term_id, is_single): validator: Validator = Validator() validator._set_schema_def() @@ -721,7 +723,7 @@ def test__validate_tissue_position_required(self, tissue_position_name): assert validator.errors assert ( f"obs['{tissue_position_name}'] is required for obs['assay_ontology_term_id'] 'EFO:0010961' " - "(Visium Spatial Gene Expression) and uns['spatial']['is_single'] True." in validator.errors[0] + "(Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." in validator.errors[0] ) @pytest.mark.parametrize("tissue_position_name", ["array_col", "array_row", "in_tissue"]) From 796010bf95e05c51da0dc7ac6b8c8366dc5fcd87 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Mon, 29 Apr 2024 12:44:12 -0700 Subject: [PATCH 28/38] Review updates --- .../cellxgene_schema/validate.py | 59 +++++++++++-------- cellxgene_schema_cli/tests/test_validate.py | 31 ++++------ 2 files changed, 46 insertions(+), 44 deletions(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 178a4780..4418f5f1 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -25,6 +25,10 @@ ASSAY_VISIUM = "EFO:0010961" ASSAY_SLIDE_SEQV2 = "EFO:0030062" +ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = "obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." +ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN = f"is only allowed for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}" +ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED = f"is required for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}" + class Validator: """Handles validation of AnnData""" @@ -46,6 +50,8 @@ def reset(self): self._raw_layer_exists = None self.is_seurat_convertible: bool = True self.is_spatial = None + self.is_visium = None + self.is_visium_and_is_single_true = None # Matrix (e.g., X, raw.X, ...) number non-zero cache self.number_non_zero = dict() @@ -76,6 +82,20 @@ def _is_supported_spatial_assay(self) -> bool: self.is_spatial = False return self.is_spatial + def _is_visium_and_is_single_true(self) -> bool: + """ + Determine if the assay_ontology_term_id is Visium (EFO:0010961) and uns.spatial.is_single is True. + + :return True if assay_ontology_term_id is Visium and is_single_cell is True, False otherwise. + :rtype bool + """ + if self.is_visium_and_is_single_true is None: + try: + self.is_visium_and_is_single_true = self._is_visium() and self.adata.uns["spatial"]["is_single"] + except AttributeError: + self.is_visium_and_is_single_true = False + return self.is_visium_and_is_single_true + def _validate_encoding_version(self): import h5py @@ -1358,14 +1378,11 @@ def _check_spatial_obs(self): return # Validate tissue positions. - is_visium_and_uns_is_single = self._is_visium() and uns_component["spatial"]["is_single"] - self._validate_spatial_tissue_position("array_col", 0, 127, is_visium_and_uns_is_single) - self._validate_spatial_tissue_position("array_row", 0, 77, is_visium_and_uns_is_single) - self._validate_spatial_tissue_position("in_tissue", 0, 1, is_visium_and_uns_is_single) + self._validate_spatial_tissue_position("array_col", 0, 127) + self._validate_spatial_tissue_position("array_row", 0, 77) + self._validate_spatial_tissue_position("in_tissue", 0, 1) - def _validate_spatial_tissue_position( - self, tissue_position_name: str, min: int, max: int, is_visium_and_uns_is_single: bool - ): + def _validate_spatial_tissue_position(self, tissue_position_name: str, min: int, max: int): """ Validate tissue position is allowed and required, and are integers within the given range. Validation is not defined in schema definition yaml. @@ -1373,12 +1390,10 @@ def _validate_spatial_tissue_position( :rtype none """ # Tissue position is foribidden if assay is not Visium or is_single is False. + is_visium_and_uns_is_single = self._is_visium_and_is_single_true() obs_tissue_position = self.adata.obs.get(tissue_position_name) if obs_tissue_position is not None and not is_visium_and_uns_is_single: - self.errors.append( - f"obs['{tissue_position_name}'] is only allowed for obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." - ) + self.errors.append(f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}") return # Exit if we're not dealing with Visium and _is_single True as no further checks are necessary. @@ -1387,10 +1402,7 @@ def _validate_spatial_tissue_position( # Tissue position is required. if obs_tissue_position is None: - self.errors.append( - f"obs['{tissue_position_name}'] is required for obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." - ) + self.errors.append(f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}") return # Tissue position must be an int. @@ -1475,13 +1487,9 @@ def _check_spatial_uns(self): return # library_id is forbidden if assay is not Visium or is_single is false. - is_visium = self._is_visium() - is_visium_and_uns_is_single = is_visium and uns_is_single + is_visium_and_uns_is_single = self._is_visium_and_is_single_true() if len(library_ids) > 0 and not is_visium_and_uns_is_single: - self.errors.append( - "uns['spatial'][library_id] is only allowed for obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." - ) + self.errors.append(f"uns['spatial'][library_id] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}") # Exit as library_id is not allowed. return @@ -1492,8 +1500,7 @@ def _check_spatial_uns(self): # library_id is required if assay is Visium and is_single is True. if len(library_ids) == 0: self.errors.append( - "uns['spatial'] must contain at least one key representing the library_id when obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." + f"uns['spatial'] must contain at least one key representing the library_id when {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}" ) # Exit as library_id is missing. return @@ -1611,8 +1618,10 @@ def _is_visium(self) -> bool: :return True if assay_ontology_term_id is Visium, False otherwise. :rtype bool """ - assay_ontology_term_id = self.adata.obs.get("assay_ontology_term_id") - return assay_ontology_term_id is not None and (assay_ontology_term_id == ASSAY_VISIUM).any() + if self.is_visium is None: + assay_ontology_term_id = self.adata.obs.get("assay_ontology_term_id") + self.is_visium = assay_ontology_term_id is not None and (assay_ontology_term_id == ASSAY_VISIUM).any() + return self.is_visium def _validate_spatial_image_shape(self, image_name: str, image: np.ndarray, max_dimension: int = None): """ diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index edfec833..c9dacc90 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -10,7 +10,13 @@ import pytest from cellxgene_ontology_guide.entities import Ontology from cellxgene_schema.schema import get_schema_definition -from cellxgene_schema.validate import Validator, validate +from cellxgene_schema.validate import ( + ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE, + ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN, + ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED, + Validator, + validate, +) from cellxgene_schema.write_labels import AnnDataLabelAppender from fixtures.examples_validate import adata as adata_valid from fixtures.examples_validate import ( @@ -433,11 +439,7 @@ def test__validate_library_id_forbidden_if_slide_seqV2(self): # Confirm library_id is not allowed for Slide-seqV2. validator._check_spatial_uns() assert len(validator.errors) == 1 - assert ( - "uns['spatial'][library_id] is only allowed for obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." - in validator.errors[0] - ) + assert f"uns['spatial'][library_id] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}" in validator.errors[0] def test__validate_library_id_forbidden_if_visium_and_is_single_false(self): validator: Validator = Validator() @@ -450,11 +452,7 @@ def test__validate_library_id_forbidden_if_visium_and_is_single_false(self): # Confirm library_id is not allowed for Visium if is_single is False. validator._check_spatial_uns() assert len(validator.errors) == 1 - assert ( - "uns['spatial'][library_id] is only allowed for obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." - in validator.errors[0] - ) + assert f"uns['spatial'][library_id] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}" in validator.errors[0] def test__validate_library_id_required_if_visium(self): validator: Validator = Validator() @@ -466,8 +464,7 @@ def test__validate_library_id_required_if_visium(self): validator._check_spatial_uns() assert validator.errors assert ( - "uns['spatial'] must contain at least one key representing the library_id when obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." + f"uns['spatial'] must contain at least one key representing the library_id when {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}" in validator.errors[0] ) @@ -707,8 +704,7 @@ def test__validate_tissue_position_forbidden(self, assay_ontology_term_id, is_si tissue_position_names = ["array_col", "array_row", "in_tissue"] for i, tissue_position_name in enumerate(tissue_position_names): assert ( - f"obs['{tissue_position_name}'] is only allowed for obs['assay_ontology_term_id'] " - "'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." + f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}" in validator.errors[i] ) @@ -721,10 +717,7 @@ def test__validate_tissue_position_required(self, tissue_position_name): validator._check_spatial_obs() assert validator.errors - assert ( - f"obs['{tissue_position_name}'] is required for obs['assay_ontology_term_id'] 'EFO:0010961' " - "(Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True." in validator.errors[0] - ) + assert f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}" in validator.errors[0] @pytest.mark.parametrize("tissue_position_name", ["array_col", "array_row", "in_tissue"]) def test__validate_tissue_position_int_error(self, tissue_position_name): From 0e5dfd72478f960e8d98d179f9fa7cfbf773476f Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Tue, 30 Apr 2024 13:22:19 -0700 Subject: [PATCH 29/38] Updated tissue position validation to be row-specific --- .../cellxgene_schema/validate.py | 27 ++++++++++++++----- cellxgene_schema_cli/tests/test_validate.py | 19 ++++++++++++- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 4418f5f1..3c165827 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -1389,23 +1389,36 @@ def _validate_spatial_tissue_position(self, tissue_position_name: str, min: int, :rtype none """ - # Tissue position is foribidden if assay is not Visium or is_single is False. - is_visium_and_uns_is_single = self._is_visium_and_is_single_true() - obs_tissue_position = self.adata.obs.get(tissue_position_name) - if obs_tissue_position is not None and not is_visium_and_uns_is_single: + # Tissue position is foribidden if assay is not Visium and is_single is True. + if ( + tissue_position_name in self.adata.obs + and ( + ~((self.adata.obs["assay_ontology_term_id"] == ASSAY_VISIUM) & (self.adata.uns["spatial"]["is_single"])) + & (self.adata.obs[tissue_position_name].notnull()) + ).any() + ): self.errors.append(f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}") return # Exit if we're not dealing with Visium and _is_single True as no further checks are necessary. - if not is_visium_and_uns_is_single: + if not self._is_visium_and_is_single_true(): return - # Tissue position is required. - if obs_tissue_position is None: + # Check if tissue position is required. At this point, is_single is True and: + # - there's at least one row with Visum, tissue position column is required + # - for any Visium row, tissue position is required. + if ( + tissue_position_name not in self.adata.obs + or ( + (self.adata.obs["assay_ontology_term_id"] == ASSAY_VISIUM) + & (self.adata.obs[tissue_position_name].isnull()) + ).any() + ): self.errors.append(f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}") return # Tissue position must be an int. + obs_tissue_position = self.adata.obs.get(tissue_position_name) if not np.issubdtype(obs_tissue_position.dtype, np.integer): self.errors.append(f"obs['{tissue_position_name}'] must be of int type, it is {obs_tissue_position.dtype}.") return diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index c9dacc90..f528c5de 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -687,7 +687,14 @@ def test__validate_scalefactors_tissue_hires_scalef_is_float_error(self): ) @pytest.mark.parametrize( - "assay_ontology_term_id, is_single", [("EFO:0030062", False), ("EFO:0010961", False), ("EFO:0030062", True)] + "assay_ontology_term_id, is_single", + [ + (["EFO:0010961", "EFO:0030062"], [True, True]), + (["EFO:0010961", "EFO:0030062"], [False, True]), + ("EFO:0010961", False), + ("EFO:0030062", True), + ("EFO:0030062", False), + ], ) def test__validate_tissue_position_forbidden(self, assay_ontology_term_id, is_single): validator: Validator = Validator() @@ -719,6 +726,16 @@ def test__validate_tissue_position_required(self, tissue_position_name): assert validator.errors assert f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}" in validator.errors[0] + def test__validate_tissue_position_not_required(self): + validator: Validator = Validator() + validator._set_schema_def() + validator.adata = adata_slide_seqv2.copy() + validator.adata.obs["assay_ontology_term_id"] = ["EFO:0010961", "EFO:0030062"] + validator.adata.uns["spatial"]["is_single"] = False + + validator._check_spatial_obs() + assert not validator.errors + @pytest.mark.parametrize("tissue_position_name", ["array_col", "array_row", "in_tissue"]) def test__validate_tissue_position_int_error(self, tissue_position_name): validator: Validator = Validator() From d202136c3b24c0cb745c45f2f57fa318d9b23d10 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Tue, 30 Apr 2024 13:34:20 -0700 Subject: [PATCH 30/38] Linting --- cellxgene_schema_cli/cellxgene_schema/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 3c165827..eb0bdc2e 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -1404,7 +1404,7 @@ def _validate_spatial_tissue_position(self, tissue_position_name: str, min: int, if not self._is_visium_and_is_single_true(): return - # Check if tissue position is required. At this point, is_single is True and: + # At this point, is_single is True and: # - there's at least one row with Visum, tissue position column is required # - for any Visium row, tissue position is required. if ( From 07ae9d9f6b76d59de062c0ce32faacfb33341ad3 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Tue, 30 Apr 2024 13:45:05 -0700 Subject: [PATCH 31/38] Corrected test --- cellxgene_schema_cli/tests/test_validate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index f528c5de..d84a62bc 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -689,8 +689,8 @@ def test__validate_scalefactors_tissue_hires_scalef_is_float_error(self): @pytest.mark.parametrize( "assay_ontology_term_id, is_single", [ - (["EFO:0010961", "EFO:0030062"], [True, True]), - (["EFO:0010961", "EFO:0030062"], [False, True]), + (["EFO:0010961", "EFO:0030062"], True), + (["EFO:0010961", "EFO:0030062"], False), ("EFO:0010961", False), ("EFO:0030062", True), ("EFO:0030062", False), From 4d4fb2ba94d63a739f4141a8f978ebcb88752b00 Mon Sep 17 00:00:00 2001 From: MillenniumFalconMechanic Date: Tue, 30 Apr 2024 16:10:32 -0700 Subject: [PATCH 32/38] Updated tests post-rebase --- cellxgene_schema_cli/tests/test_schema_compliance.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py index 4216142f..d752ca52 100644 --- a/cellxgene_schema_cli/tests/test_schema_compliance.py +++ b/cellxgene_schema_cli/tests/test_schema_compliance.py @@ -2029,12 +2029,20 @@ def test_obsm_values_spatial_embedding_missing__is_single_false(self, validator_ validator = validator_with_visium_assay validator.adata.uns["spatial"] = {"is_single": False} del validator.adata.obsm["spatial"] + # Format adata.obs into valid shape for Visium and is_single False. + validator.adata.obs.pop("array_col") + validator.adata.obs.pop("array_row") + validator.adata.obs.pop("in_tissue") validator.validate_adata() assert validator.errors == [] def test_obsm_values_spatial_embedding_present__is_single_false(self, validator_with_visium_assay): validator = validator_with_visium_assay validator.adata.uns["spatial"] = {"is_single": False} + # Format adata.obs into valid shape for Visium and is_single False. + validator.adata.obs.pop("array_col") + validator.adata.obs.pop("array_row") + validator.adata.obs.pop("in_tissue") validator.validate_adata() assert validator.errors == [] From 6ccd0937281e639c0a60b5ed7149abde021b0a65 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Wed, 1 May 2024 12:30:09 -0400 Subject: [PATCH 33/38] fix test --- cellxgene_schema_cli/tests/test_schema_compliance.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py index 7c262825..72d2799f 100644 --- a/cellxgene_schema_cli/tests/test_schema_compliance.py +++ b/cellxgene_schema_cli/tests/test_schema_compliance.py @@ -2035,10 +2035,6 @@ def test_obsm_values_spatial_embedding_missing__is_single_true(self, validator_w def test_obsm_values_spatial_embedding_missing__is_single_false(self, validator_with_spatial_and_is_single_false): validator = validator_with_spatial_and_is_single_false del validator.adata.obsm["spatial"] - # Format adata.obs into valid shape for Visium and is_single False. - validator.adata.obs.pop("array_col") - validator.adata.obs.pop("array_row") - validator.adata.obs.pop("in_tissue") validator.validate_adata() assert validator.errors == [] From 5a3c660916dea30aa4b4647aeb9d1c1a3c3d0621 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Wed, 1 May 2024 13:11:29 -0400 Subject: [PATCH 34/38] lint --- cellxgene_schema_cli/cellxgene_schema/validate.py | 3 +-- cellxgene_schema_cli/tests/fixtures/examples_validate.py | 6 +++++- cellxgene_schema_cli/tests/test_schema_compliance.py | 2 ++ cellxgene_schema_cli/tests/test_validate.py | 1 - 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index fd1e376e..19e08d9f 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -1398,8 +1398,7 @@ def _validate_spatial_is_primary_data(self): return if is_single is False and obs["is_primary_data"].any(): self.errors.append( - "When uns['spatial']['is_single'] is False, " - "obs['is_primary_data'] must be False for all rows." + "When uns['spatial']['is_single'] is False, " "obs['is_primary_data'] must be False for all rows." ) def _validate_spatial_tissue_position(self, tissue_position_name: str, min: int, max: int): diff --git a/cellxgene_schema_cli/tests/fixtures/examples_validate.py b/cellxgene_schema_cli/tests/fixtures/examples_validate.py index 3802852e..a8b1a285 100644 --- a/cellxgene_schema_cli/tests/fixtures/examples_validate.py +++ b/cellxgene_schema_cli/tests/fixtures/examples_validate.py @@ -469,7 +469,11 @@ ) adata_spatial_is_single_false = anndata.AnnData( - X=sparse.csr_matrix(X), obs=good_obs_visium_is_single_false, uns=good_uns_with_is_single_false, obsm=good_obsm_spatial, var=good_var + X=sparse.csr_matrix(X), + obs=good_obs_visium_is_single_false, + uns=good_uns_with_is_single_false, + obsm=good_obsm_spatial, + var=good_var, ) # anndata for testing migration diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py index 72d2799f..b93c46cf 100644 --- a/cellxgene_schema_cli/tests/test_schema_compliance.py +++ b/cellxgene_schema_cli/tests/test_schema_compliance.py @@ -62,11 +62,13 @@ def validator_with_adata_missing_raw(validator) -> Validator: validator.adata = examples.adata_non_raw.copy() return validator + @pytest.fixture def validator_with_spatial_and_is_single_false(validator) -> Validator: validator.adata = examples.adata_spatial_is_single_false.copy() return validator + @pytest.fixture def validator_with_visium_assay(validator) -> Validator: validator.adata = examples.adata_visium.copy() diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index e45fff06..46155d89 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -737,7 +737,6 @@ def test__validate_tissue_position_required(self, tissue_position_name): assert validator.errors assert f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}" in validator.errors[0] - @pytest.mark.parametrize("tissue_position_name", ["array_col", "array_row", "in_tissue"]) def test__validate_tissue_position_int_error(self, tissue_position_name): validator: Validator = Validator() From eb31ce367bd0c047263541554aff266154c0c9e7 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Wed, 1 May 2024 17:14:14 -0400 Subject: [PATCH 35/38] tests --- cellxgene_schema_cli/tests/test_schema_compliance.py | 4 ---- cellxgene_schema_cli/tests/test_validate.py | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py index ebc2b636..68a4434d 100644 --- a/cellxgene_schema_cli/tests/test_schema_compliance.py +++ b/cellxgene_schema_cli/tests/test_schema_compliance.py @@ -2033,10 +2033,6 @@ def test_obsm_values_spatial_embedding_missing__is_single_true(self, validator_w def test_obsm_values_spatial_embedding_missing__is_single_false(self, validator_with_spatial_and_is_single_false): validator = validator_with_spatial_and_is_single_false del validator.adata.obsm["spatial"] - # Format adata.obs into valid shape for Visium and is_single False. - validator.adata.obs.pop("array_col") - validator.adata.obs.pop("array_row") - validator.adata.obs.pop("in_tissue") validator.validate_adata() assert validator.errors == [] diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index dbaf15e2..2aafe91f 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -746,6 +746,7 @@ def test__validate_tissue_position_not_required(self): validator.adata = adata_slide_seqv2.copy() validator.adata.obs["assay_ontology_term_id"] = ["EFO:0010961", "EFO:0030062"] validator.adata.uns["spatial"]["is_single"] = False + validator.adata.obs["is_primary_data"] = False validator._check_spatial_obs() assert not validator.errors From 580f8e06f80bfd67693465839ce4ba62af18f4d3 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Wed, 1 May 2024 17:29:36 -0400 Subject: [PATCH 36/38] refactor + linting --- cellxgene_schema_cli/cellxgene_schema/validate.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 19047919..d9c1a78f 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -1388,17 +1388,12 @@ def _validate_spatial_is_primary_data(self): """ Validate is_primary_data for spatial datasets. """ - is_single = ( - self.adata.uns["spatial"]["is_single"] - if "spatial" in self.adata.uns and "is_single" in self.adata.uns["spatial"] - else None - ) obs = getattr_anndata(self.adata, "obs") if obs is None or "is_primary_data" not in obs: return - if is_single is False and obs["is_primary_data"].any(): + if self._is_single() is False and obs["is_primary_data"].any(): self.errors.append( - "When uns['spatial']['is_single'] is False, " "obs['is_primary_data'] must be False for all rows." + "When uns['spatial']['is_single'] is False, obs['is_primary_data'] must be False for all rows." ) def _validate_spatial_tissue_position(self, tissue_position_name: str, min: int, max: int): From 8bc61a9073ebff75b5677f2411abbd007d599d73 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Thu, 2 May 2024 10:52:08 -0400 Subject: [PATCH 37/38] fix test --- cellxgene_schema_cli/tests/test_schema_compliance.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py index 68a4434d..b93c46cf 100644 --- a/cellxgene_schema_cli/tests/test_schema_compliance.py +++ b/cellxgene_schema_cli/tests/test_schema_compliance.py @@ -2021,6 +2021,10 @@ def test_obsm_values_no_X_embedding__visium_dataset(self, validator_with_visium_ def test_obsm_values_no_X_embedding__slide_seq_v2_dataset(self, validator_with_slide_seq_v2_assay): validator = validator_with_slide_seq_v2_assay validator.adata.uns["default_embedding"] = "spatial" + del validator.adata.obsm["X_umap"] + validator.validate_adata() + assert validator.errors == [] + assert validator.is_spatial is True def test_obsm_values_spatial_embedding_missing__is_single_true(self, validator_with_visium_assay): validator = validator_with_visium_assay From 680e62fb4cc43e65d603709c39bc4a0ab3603916 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Thu, 2 May 2024 11:28:55 -0400 Subject: [PATCH 38/38] lint --- cellxgene_schema_cli/cellxgene_schema/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index ee111228..ccb4d06d 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -1385,7 +1385,7 @@ def _check_spatial_obs(self): # Validate cell type. self._validate_spatial_cell_type_ontology_term_id() - + self._validate_spatial_is_primary_data() def _validate_spatial_is_primary_data(self):