-
Notifications
You must be signed in to change notification settings - Fork 24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: update validation for obs['is_primary_data'] to enforce False if uns['spatial']['is_single'] is False #865
Changes from all commits
ab3c971
b6c93ad
9010444
ec8fb63
832b297
faec600
4c8a739
476b869
b6020a8
74496f5
b37c52c
126cdb1
3b01478
1fea0cd
f518647
6f25b11
7fe05b2
dc30afb
869c29f
60cdf04
a2935a5
76d62c9
abb1ca4
e10eafe
4208307
1cb1463
b2a676c
7d06a3f
796010b
0e5dfd7
d202136
07ae9d9
4d4fb2b
7be5f4b
6ccd093
5a3c660
01bea67
eb31ce3
580f8e0
8bc61a9
db7801d
680e62f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -63,6 +63,12 @@ def validator_with_adata_missing_raw(validator) -> Validator: | |
return validator | ||
|
||
|
||
@pytest.fixture | ||
def validator_with_spatial_and_is_single_false(validator) -> Validator: | ||
validator.adata = examples.adata_spatial_is_single_false.copy() | ||
return validator | ||
|
||
|
||
@pytest.fixture | ||
def validator_with_visium_assay(validator) -> Validator: | ||
validator.adata = examples.adata_visium.copy() | ||
|
@@ -1101,6 +1107,17 @@ def test_is_primary_data(self, validator_with_adata): | |
"ERROR: Column 'is_primary_data' in dataframe 'obs' " "must be boolean, not 'object'." | ||
] | ||
|
||
def test_is_primary_data__spatial(self, validator_with_spatial_and_is_single_false): | ||
""" | ||
is_primary_data bool. This MUST be False if dataset has uns['spatial']['is_single'] == False | ||
""" | ||
validator = validator_with_spatial_and_is_single_false | ||
validator.adata.obs["is_primary_data"][0] = True | ||
validator.validate_adata() | ||
assert validator.errors == [ | ||
"ERROR: When uns['spatial']['is_single'] is False, obs['is_primary_data'] must be False for all rows." | ||
] | ||
|
||
def test_donor_id_must_be_categorical(self, validator_with_adata): | ||
""" | ||
donor_id categorical with str categories. This MUST be free-text that identifies | ||
|
@@ -1993,26 +2010,18 @@ def test_obsm_values_no_X_embedding__non_spatial_dataset(self, validator_with_ad | |
"WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.", | ||
] | ||
|
||
@pytest.mark.parametrize( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. split this test up into two for simplicity in the set-up |
||
"assay_ontology_term_id, adata_spatial", | ||
[ | ||
("EFO:0010961", examples.adata_visium), | ||
("EFO:0030062", examples.adata_slide_seqv2), | ||
], | ||
) | ||
def test_obsm_values_no_X_embedding__spatial_dataset( | ||
self, validator_with_adata, assay_ontology_term_id, adata_spatial | ||
): | ||
validator = validator_with_adata | ||
validator.adata.obsm["harmony"] = validator.adata.obsm["X_umap"] | ||
validator.adata.uns = adata_spatial.uns | ||
validator.adata.uns["default_embedding"] = "harmony" | ||
validator.adata.obsm["spatial"] = validator.adata.obsm["X_umap"] | ||
def test_obsm_values_no_X_embedding__visium_dataset(self, validator_with_visium_assay): | ||
validator = validator_with_visium_assay | ||
validator.adata.uns["default_embedding"] = "spatial" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not familiar with the complete, required shape for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, its expected for non-spatial datasets, not having an X-prefix embedding like However, for spatial datasets, we do not require an X-prefix embedding, just 'spatial' embedding. This test case was originally modeled after |
||
del validator.adata.obsm["X_umap"] | ||
validator.validate_adata() | ||
assert validator.errors == [] | ||
assert validator.is_spatial is True | ||
|
||
def test_obsm_values_no_X_embedding__slide_seq_v2_dataset(self, validator_with_slide_seq_v2_assay): | ||
validator = validator_with_slide_seq_v2_assay | ||
validator.adata.uns["default_embedding"] = "spatial" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like this test is missing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sad, it must've gotten lost along the way during a merge resolution |
||
del validator.adata.obsm["X_umap"] | ||
validator.adata.obs = adata_spatial.obs | ||
validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id | ||
validator.adata.obs["suspension_type"] = "na" | ||
validator.adata.obs.loc[:, ["suspension_type"]] = validator.adata.obs.astype("category") | ||
validator.validate_adata() | ||
assert validator.errors == [] | ||
assert validator.is_spatial is True | ||
|
@@ -2025,24 +2034,9 @@ def test_obsm_values_spatial_embedding_missing__is_single_true(self, validator_w | |
"ERROR: 'spatial' embedding is required in 'adata.obsm' if adata.uns['spatial']['is_single'] is True." | ||
] | ||
|
||
def test_obsm_values_spatial_embedding_missing__is_single_false(self, validator_with_visium_assay): | ||
validator = validator_with_visium_assay | ||
validator.adata.uns["spatial"] = {"is_single": False} | ||
def test_obsm_values_spatial_embedding_missing__is_single_false(self, validator_with_spatial_and_is_single_false): | ||
validator = validator_with_spatial_and_is_single_false | ||
del validator.adata.obsm["spatial"] | ||
# Format adata.obs into valid shape for Visium and is_single False. | ||
validator.adata.obs.pop("array_col") | ||
validator.adata.obs.pop("array_row") | ||
validator.adata.obs.pop("in_tissue") | ||
validator.validate_adata() | ||
assert validator.errors == [] | ||
|
||
def test_obsm_values_spatial_embedding_present__is_single_false(self, validator_with_visium_assay): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. moved this test into |
||
validator = validator_with_visium_assay | ||
validator.adata.uns["spatial"] = {"is_single": False} | ||
# Format adata.obs into valid shape for Visium and is_single False. | ||
validator.adata.obs.pop("array_col") | ||
validator.adata.obs.pop("array_row") | ||
validator.adata.obs.pop("in_tissue") | ||
validator.validate_adata() | ||
assert validator.errors == [] | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How are "secondary errors" typically handled? For example, should this error be reported for a non-spatial dataset that has (incorrectly) specified
uns.spatial.is_single = False
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
since we accumulate all errors by appending to "self.errors", something like that would be handled wherever the uns check is being done. In this case, we should handle that in check_spatial_uns. I'll double check if we already are and add it in if not.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh wait I see what you mean--yes, we do report secondary errors like this. That way, if the problem was that they did mean for it to be spatial but accidentally used the wrong assay, we still tell them in 1 run all the other issues associated with it. Generally, we try to report every possible error in one go even if they'd all be fixed with 1 upstream correction. With some exceptions--we don't validate the X matrix, for instance, if there are issues with dependent columns before then, because its an expensive operation to run if we know there will be failures. But we do print a warning letting them know that this was the case--
WARNING: "Validation of raw layer was not performed due to current errors, try again after fixing current errors."
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do think its fine that we exit early w/o checking spatial values in
check_spatial_uns
if assay is not 'EFO:0010961' (Visium Spatial Gene Expression) or 'EFO:0030062' (Slide-seqV2), since curators have not requested otherwise and the errors would end up being the same for each one (i.e.this key should not exist if its not visium
X times)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Got it, thanks Nayib!