Skip to content

Commit

Permalink
fix(schema 5.1.0): throw error if obsm is not type np.ndarray (#859)
Browse files Browse the repository at this point in the history
Co-authored-by: nayib-jose-gloria <[email protected]>
Co-authored-by: Nayib Gloria <[email protected]>
  • Loading branch information
3 people authored May 6, 2024
1 parent d8e94b5 commit e06940c
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 12 deletions.
27 changes: 22 additions & 5 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,7 @@ def _validate_obsm(self):

regex_pattern = r"^[a-zA-Z][a-zA-Z0-9_.-]*$"

unknown_key = False # an unknown key does not match 'spatial' or 'X_{suffix}'
if key.startswith("X_"):
obsm_with_x_prefix += 1
if key.lower() == "x_spatial":
Expand All @@ -1021,19 +1022,35 @@ def _validate_obsm(self):
f"not be available in Explorer"
)
issue_list = self.warnings
unknown_key = True

if not isinstance(value, np.ndarray):
issue_list.append(
self.errors.append(
f"All embeddings have to be of 'numpy.ndarray' type, " f"'adata.obsm['{key}']' is {type(value)}')."
)
# Skip over the subsequent checks that require the value to be an array
continue

if len(value.shape) < 2 or value.shape[0] != self.adata.n_obs or value.shape[1] < 2:
issue_list.append(
f"All embeddings must have as many rows as cells, and at least two columns."
f" 'adata.obsm['{key}']' has shape of '{value.shape}'."
if len(value.shape) < 2:
self.errors.append(
f"All embeddings must at least two dimensions. 'adata.obsm['{key}']' has a shape length of '{len(value.shape)}'."
)
else:
if value.shape[0] != self.adata.n_obs:
self.errors.append(
f"All embeddings must have as many rows as cells. 'adata.obsm['{key}']' has rows='{value.shape[0]}'."
)

if unknown_key and value.shape[1] < 1:
self.errors.append(
f"All unspecified embeddings must have at least one column. 'adata.obsm['{key}']' has columns='{value.shape[1]}'."
)

if not unknown_key and value.shape[1] < 2:
self.errors.append(
f"All 'X_' and 'spatial' embeddings must have at least two columns. 'adata.obsm['{key}']' has columns='{value.shape[1]}'."
)

if not (np.issubdtype(value.dtype, np.integer) or np.issubdtype(value.dtype, np.floating)):
issue_list.append(
f"adata.obsm['{key}'] has an invalid data type. It should be "
Expand Down
34 changes: 27 additions & 7 deletions cellxgene_schema_cli/tests/test_schema_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -2057,7 +2057,10 @@ def test_obsm_values_warn_start_with_X(self, validator_with_adata):
"WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
"WARNING: Embedding key in 'adata.obsm' harmony is not 'spatial' nor does it start with 'X_'. "
"Thus, it will not be available in Explorer",
"WARNING: All embeddings have to be of 'numpy.ndarray' type, 'adata.obsm['harmony']' is <class 'pandas.core.frame.DataFrame'>').",
"WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.",
]
assert validator.errors == [
"ERROR: All embeddings have to be of 'numpy.ndarray' type, 'adata.obsm['harmony']' is <class 'pandas.core.frame.DataFrame'>')."
]

def test_obsm_values_suffix_is_forbidden(self, validator_with_adata):
Expand All @@ -2080,13 +2083,14 @@ def test_obsm_values_key_start_with_number(self, validator_with_adata):
validator.adata.obsm["3D"] = pd.DataFrame(validator.adata.obsm["X_umap"], index=validator.adata.obs_names)
validator.validate_adata()
assert validator.errors == [
"ERROR: Embedding key in 'adata.obsm' 3D does not match the regex pattern ^[a-zA-Z][a-zA-Z0-9_.-]*$."
"ERROR: Embedding key in 'adata.obsm' 3D does not match the regex pattern ^[a-zA-Z][a-zA-Z0-9_.-]*$.",
"ERROR: All embeddings have to be of 'numpy.ndarray' type, 'adata.obsm['3D']' is <class "
"'pandas.core.frame.DataFrame'>').",
]
assert validator.warnings == [
"WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
"WARNING: Embedding key in 'adata.obsm' 3D is not 'spatial' nor does it start with 'X_'. "
"Thus, it will not be available in Explorer",
"WARNING: All embeddings have to be of 'numpy.ndarray' type, 'adata.obsm['3D']' is <class 'pandas.core.frame.DataFrame'>').",
"WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.",
]

Expand Down Expand Up @@ -2136,9 +2140,23 @@ def test_obsm_shape_one_column(self, validator_with_visium_assay, key):
validator.adata.obsm[key] = numpy.delete(validator.adata.obsm[key], 0, 1)
validator.validate_adata()
assert validator.errors == [
"ERROR: All embeddings must have as many rows as cells, and "
f"at least two columns. 'adata.obsm['{key}']' has shape "
"of '(2, 1)'."
"ERROR: All 'X_' and 'spatial' embeddings must have at least two columns. "
f"'adata.obsm['{key}']' has columns='1'."
]

def test_obsm_shape_zero_column_with_unknown_key(self, validator_with_adata):
"""
embeddings that are not 'X_' or 'spatial' that are ndarrays must have at least one column
"""
# Makes 0 column array
validator = validator_with_adata
n_obs = validator_with_adata.adata.n_obs
validator.adata.obsm["unknown"] = numpy.zeros((n_obs, 0))
validator.validate_adata()
assert validator.errors == [
"ERROR: The size of the ndarray stored for a 'adata.obsm['unknown']' MUST NOT " "be zero.",
"ERROR: All unspecified embeddings must have at least one column. "
"'adata.obsm['unknown']' has columns='0'.",
]

def test_obsm_shape_same_rows_and_columns(self, validator_with_adata):
Expand Down Expand Up @@ -2166,7 +2184,9 @@ def test_obsm_size_zero(self, validator_with_adata):
validator.adata = save_and_read_adata(adata)
validator.validate_adata()
assert validator.errors == [
"ERROR: The size of the ndarray stored for a 'adata.obsm['badsize']' MUST NOT be zero.",
"ERROR: The size of the ndarray stored for a 'adata.obsm['badsize']' MUST NOT " "be zero.",
"ERROR: All unspecified embeddings must have at least one column. "
"'adata.obsm['badsize']' has columns='0'.",
]


Expand Down

0 comments on commit e06940c

Please sign in to comment.