-
Notifications
You must be signed in to change notification settings - Fork 24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: update validation for obs['is_primary_data'] to enforce False if uns['spatial']['is_single'] is False #865
Changes from 16 commits
ab3c971
b6c93ad
9010444
ec8fb63
832b297
faec600
4c8a739
476b869
b6020a8
74496f5
b37c52c
126cdb1
3b01478
1fea0cd
f518647
6f25b11
7fe05b2
dc30afb
869c29f
60cdf04
a2935a5
76d62c9
abb1ca4
e10eafe
4208307
1cb1463
b2a676c
7d06a3f
796010b
0e5dfd7
d202136
07ae9d9
4d4fb2b
7be5f4b
6ccd093
5a3c660
01bea67
eb31ce3
580f8e0
8bc61a9
db7801d
680e62f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -129,6 +129,8 @@ | |
good_obs_visium = pd.DataFrame( | ||
[ | ||
[ | ||
1, | ||
1, | ||
"CL:0000066", | ||
"EFO:0010961", | ||
"MONDO:0100096", | ||
|
@@ -141,8 +143,11 @@ | |
"HsapDv:0000003", | ||
"donor_1", | ||
"na", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. diff is from #827 merged into main |
||
0, | ||
], | ||
[ | ||
2, | ||
2, | ||
"CL:0000192", | ||
"EFO:0010961", | ||
"PATO:0000461", | ||
|
@@ -155,10 +160,13 @@ | |
"MmusDv:0000003", | ||
"donor_2", | ||
"na", | ||
1, | ||
], | ||
], | ||
index=["X", "Y"], | ||
columns=[ | ||
"array_col", | ||
"array_row", | ||
"cell_type_ontology_term_id", | ||
"assay_ontology_term_id", | ||
"disease_ontology_term_id", | ||
|
@@ -171,6 +179,7 @@ | |
"development_stage_ontology_term_id", | ||
"donor_id", | ||
"suspension_type", | ||
"in_tissue", | ||
], | ||
) | ||
|
||
|
@@ -231,6 +240,58 @@ | |
good_obs_slide_seqv2.loc[:, ["suspension_type"]] = good_obs_slide_seqv2.astype("category") | ||
good_obs_slide_seqv2.loc[:, ["tissue_type"]] = good_obs.astype("category") | ||
|
||
good_obs_visium_is_single_false = pd.DataFrame( | ||
[ | ||
[ | ||
"CL:0000066", | ||
"EFO:0010961", | ||
"MONDO:0100096", | ||
"NCBITaxon:9606", | ||
"PATO:0000383", | ||
"UBERON:0002048", | ||
"tissue", | ||
False, | ||
"HANCESTRO:0575", | ||
"HsapDv:0000003", | ||
"donor_1", | ||
"na", | ||
], | ||
[ | ||
"CL:0000192", | ||
"EFO:0010961", | ||
"PATO:0000461", | ||
"NCBITaxon:10090", | ||
"unknown", | ||
"CL:0000192", | ||
"cell culture", | ||
False, | ||
"na", | ||
"MmusDv:0000003", | ||
"donor_2", | ||
"na", | ||
], | ||
], | ||
index=["X", "Y"], | ||
columns=[ | ||
"cell_type_ontology_term_id", | ||
"assay_ontology_term_id", | ||
"disease_ontology_term_id", | ||
"organism_ontology_term_id", | ||
"sex_ontology_term_id", | ||
"tissue_ontology_term_id", | ||
"tissue_type", | ||
"is_primary_data", | ||
"self_reported_ethnicity_ontology_term_id", | ||
"development_stage_ontology_term_id", | ||
"donor_id", | ||
"suspension_type", | ||
], | ||
) | ||
|
||
good_obs_visium_is_single_false.loc[:, ["donor_id"]] = good_obs_visium_is_single_false.astype("category") | ||
good_obs_visium_is_single_false.loc[:, ["suspension_type"]] = good_obs_visium_is_single_false.astype("category") | ||
good_obs_visium_is_single_false.loc[:, ["tissue_type"]] = good_obs_visium_is_single_false.astype("category") | ||
|
||
# --- | ||
# 2. Creating individual var components: valid object and valid object and with labels | ||
|
||
|
@@ -318,6 +379,14 @@ | |
}, | ||
} | ||
|
||
good_uns_with_is_single_false = { | ||
"title": "A title", | ||
"default_embedding": "X_umap", | ||
"X_approximate_distribution": "normal", | ||
"batch_condition": ["is_primary_data"], | ||
"spatial": {"is_single": False}, | ||
} | ||
|
||
good_uns_with_slide_seqV2_spatial = { | ||
"title": "A title", | ||
"default_embedding": "X_umap", | ||
|
@@ -399,6 +468,10 @@ | |
var=good_var, | ||
) | ||
|
||
adata_spatial_is_single_false = anndata.AnnData( | ||
X=sparse.csr_matrix(X), obs=good_obs_visium_is_single_false, uns=good_uns_with_is_single_false, obsm=good_obsm_spatial, var=good_var | ||
) | ||
|
||
# anndata for testing migration | ||
unmigrated_obs = pd.DataFrame( | ||
[ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,6 +62,10 @@ def validator_with_adata_missing_raw(validator) -> Validator: | |
validator.adata = examples.adata_non_raw.copy() | ||
return validator | ||
|
||
@pytest.fixture | ||
def validator_with_spatial_and_is_single_false(validator) -> Validator: | ||
validator.adata = examples.adata_spatial_is_single_false.copy() | ||
return validator | ||
|
||
@pytest.fixture | ||
def validator_with_visium_assay(validator) -> Validator: | ||
|
@@ -1101,6 +1105,17 @@ def test_is_primary_data(self, validator_with_adata): | |
"ERROR: Column 'is_primary_data' in dataframe 'obs' " "must be boolean, not 'object'." | ||
] | ||
|
||
def test_is_primary_data__spatial(self, validator_with_spatial_and_is_single_false): | ||
""" | ||
is_primary_data bool. This MUST be False if dataset has uns['spatial']['is_single'] == False | ||
""" | ||
validator = validator_with_spatial_and_is_single_false | ||
validator.adata.obs["is_primary_data"][0] = True | ||
validator.validate_adata() | ||
assert validator.errors == [ | ||
"ERROR: When uns['spatial']['is_single'] is False, obs['is_primary_data'] must be False for all rows." | ||
] | ||
|
||
def test_donor_id_must_be_categorical(self, validator_with_adata): | ||
""" | ||
donor_id categorical with str categories. This MUST be free-text that identifies | ||
|
@@ -1993,25 +2008,18 @@ def test_obsm_values_no_X_embedding__non_spatial_dataset(self, validator_with_ad | |
"WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.", | ||
] | ||
|
||
@pytest.mark.parametrize( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. split this test up into two for simplicity in the set-up |
||
"assay_ontology_term_id, uns_spatial", | ||
[ | ||
("EFO:0010961", examples.good_uns_with_visium_spatial["spatial"]), | ||
("EFO:0030062", examples.good_uns_with_slide_seqV2_spatial["spatial"]), | ||
], | ||
) | ||
def test_obsm_values_no_X_embedding__spatial_dataset( | ||
self, validator_with_adata, assay_ontology_term_id, uns_spatial | ||
): | ||
validator = validator_with_adata | ||
validator.adata.obsm["harmony"] = validator.adata.obsm["X_umap"] | ||
validator.adata.uns["default_embedding"] = "harmony" | ||
validator.adata.uns["spatial"] = uns_spatial | ||
validator.adata.obsm["spatial"] = validator.adata.obsm["X_umap"] | ||
def test_obsm_values_no_X_embedding__visium_dataset(self, validator_with_visium_assay): | ||
validator = validator_with_visium_assay | ||
validator.adata.uns["default_embedding"] = "spatial" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not familiar with the complete, required shape for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, its expected for non-spatial datasets, not having an X-prefix embedding like However, for spatial datasets, we do not require an X-prefix embedding, just 'spatial' embedding. This test case was originally modeled after |
||
del validator.adata.obsm["X_umap"] | ||
validator.validate_adata() | ||
assert validator.errors == [] | ||
assert validator.is_spatial is True | ||
|
||
def test_obsm_values_no_X_embedding__slide_seq_v2_dataset(self, validator_with_slide_seq_v2_assay): | ||
validator = validator_with_slide_seq_v2_assay | ||
validator.adata.uns["default_embedding"] = "spatial" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like this test is missing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sad, it must've gotten lost along the way during a merge resolution |
||
del validator.adata.obsm["X_umap"] | ||
validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id | ||
validator.adata.obs["suspension_type"] = "na" | ||
validator.adata.obs.loc[:, ["suspension_type"]] = validator.adata.obs.astype("category") | ||
validator.validate_adata() | ||
assert validator.errors == [] | ||
assert validator.is_spatial is True | ||
|
@@ -2024,19 +2032,12 @@ def test_obsm_values_spatial_embedding_missing__is_single_true(self, validator_w | |
"ERROR: 'spatial' embedding is required in 'adata.obsm' if adata.uns['spatial']['is_single'] is True." | ||
] | ||
|
||
def test_obsm_values_spatial_embedding_missing__is_single_false(self, validator_with_visium_assay): | ||
validator = validator_with_visium_assay | ||
validator.adata.uns["spatial"] = {"is_single": False} | ||
def test_obsm_values_spatial_embedding_missing__is_single_false(self, validator_with_spatial_and_is_single_false): | ||
validator = validator_with_spatial_and_is_single_false | ||
del validator.adata.obsm["spatial"] | ||
validator.validate_adata() | ||
assert validator.errors == [] | ||
|
||
def test_obsm_values_spatial_embedding_present__is_single_false(self, validator_with_visium_assay): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. moved this test into |
||
validator = validator_with_visium_assay | ||
validator.adata.uns["spatial"] = {"is_single": False} | ||
validator.validate_adata() | ||
assert validator.errors == [] | ||
|
||
def test_obsm_values_spatial_embedding_present__is_single_none(self, validator_with_adata): | ||
validator = validator_with_adata | ||
validator.adata.obsm["spatial"] = validator.adata.obsm["X_umap"] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we use
_is_single()
here once this is rebased onmain
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ah yes good call