Skip to content

Commit

Permalink
Fix tests for similarity metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
LouisK92 committed Sep 22, 2024
1 parent 10f7e50 commit 2998666
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 64 deletions.
30 changes: 0 additions & 30 deletions src/api/comp_metric.yaml

This file was deleted.

34 changes: 34 additions & 0 deletions src/api/comp_metric_quality.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
namespace: metrics
info:
type: metric
subtype: quality metric
type_info:
label: Quality Metric
summary: A metric for evaluating the quality of the processed iST data
description: |
This metric assesses the quality of the processed iST data.
arguments:
- name: --input
__merge__: file_spatial_corrected_counts.yaml
required: true
direction: input
- name: --input_qc_col
__merge__: file_spatial_qc_col.yaml
direction: input
required: true
- name: --input_transcript_assignments
__merge__: file_transcript_assignments.yaml
direction: input
required: true
- name: "--score"
__merge__: file_score.yaml
direction: output
required: true

test_resources:
- path: /resources_test/task_ist_preprocessing/mouse_brain_combined
dest: resources_test/task_ist_preprocessing/mouse_brain_combined
- type: python_script
path: /common/component_tests/run_and_check_output.py
- type: python_script
path: /common/component_tests/check_config.py
12 changes: 7 additions & 5 deletions src/api/comp_metric_similarity.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ arguments:
required: true
direction: output
__merge__: file_score.yaml
# test_resources:
# - path: /resources_test/common/pancreas
# dest: resources_test/common/pancreas
# - type: python_script
# path: /common/component_tests/run_and_check_output.py

test_resources:
- path: /resources_test/task_ist_preprocessing/mouse_brain_combined
dest: resources_test/task_ist_preprocessing/mouse_brain_combined
- type: python_script
path: /common/component_tests/run_and_check_output.py
- type: python_script
path: /common/component_tests/check_config.py


16 changes: 8 additions & 8 deletions src/api/file_score.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,6 @@ info:
format:
type: h5ad
uns:
- type: string
name: dataset_id
description: "A unique identifier for the dataset"
required: true
- type: string
name: method_id
description: "A unique identifier for the method"
required: true
- type: string
name: metric_ids
description: "One or more unique metric identifiers"
Expand All @@ -24,3 +16,11 @@ info:
description: "The metric values obtained for the given prediction. Must be of same length as 'metric_ids'."
multiple: true
required: true
# - type: string
# name: dataset_id
# description: "A unique identifier for the dataset"
# required: true
# - type: string
# name: method_id
# description: "A unique identifier for the method"
# required: true
78 changes: 61 additions & 17 deletions src/metrics/similarity/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,34 +14,78 @@ name: similarity_metrics
# Metadata for your component
info:
metrics:
# A unique identifier for your metric (required).
# Can contain only lowercase letters or underscores.
- name: negative_marker_purity_reads
# A relatively short label, used when rendering visualisarions (required)
label: Negative Marker Purity (Reads)
# A one sentence summary of how this metric works (required). Used when
# rendering summary tables.
summary: "The percentage of negative marker reads assigned to the correct cell types."
# A multi-line description of how this component works (required). Used
# when rendering reference documentation.
description: |
The percentage of negative marker reads assigned to the correct cell types.
# A reference key from the bibtex library at src/common/library.bib (required).
references:
doi: None
# The minimum possible value for this metric (required)
doi: "10.1101/2023.02.13.528102"
min: 0
# The maximum possible value for this metric (required)
max: 1
# Whether a higher value represents a 'better' solution (required)
maximize: true
- name: negative_marker_purity_cells
label: Negative Marker Purity (Cells)
summary: "The percentage of cells that do not contain counts of negative markers of their specific cell type."
description: |
The percentage of cells that do not contain counts of negative markers of their specific cell type.
references:
doi: "10.1101/2023.02.13.528102"
min: 0
max: 1
maximize: true
- name: coexpr_similarity
label: Co-expression Similarity
summary: "The similarity between the co-expression patterns of spatial and scRNA-seq data."
description: |
The similarity is calculated as the absolute difference between the correlation matrices of spatial and
scRNA-seq data. The correlation matrices contain pair-wise correlations between all genes in the dataset.
references:
doi: "10.1101/2023.02.13.528102"
min: 0
max: 1
maximize: true
- name: coexpr_similarity_celltype
label: Co-expression Similarity (Cell Type)
summary: "The similarity between the within cell type co-expression patterns of spatial and scRNA-seq data."
description: |
The similarity is calculated as the absolute difference between the correlation matrices of spatial and
scRNA-seq data for each cell type. The final score is the mean over cell types. The correlation matrices contain
pair-wise correlations between all genes in the dataset.
references:
doi: "10.1101/2023.02.13.528102"
min: 0
max: 1
- name: rel_pairwise_ct_expr_sim
label: Relative Pairwise Cell Type Expression Similarity
summary: "Similarity of the mean expression difference between cell type pairs between spatial and scRNA-seq data."
description: |
todo
references:
doi: "10.1101/2023.02.13.528102"
min: 0
max: 1
- name: rel_pairwise_gene_expr_sim
label: Relative Pairwise Gene Expression Similarity
summary: "Similarity of the mean expression difference between gene pairs between spatial and scRNA-seq data."
description: |
todo
references:
doi: "10.1101/2023.02.13.528102"
min: 0
max: 1
- name: knn_mixing
label: KNN Mixing
summary: "Measure of the modality mixing within the joint knn graph of spatial and scRNA-seq data."
description: |
todo
references:
doi: "10.1101/2023.02.13.528102"
min: 0
max: 1


# Component-specific parameters (optional)
# arguments:
# - name: "--n_neighbors"
# type: "integer"
# default: 5
# description: Number of neighbors to use.

# Resources required to run the component
resources:
Expand Down
12 changes: 8 additions & 4 deletions src/metrics/similarity/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`.
par = {
'input': "resources_test/task_ist_preprocessing/mouse_brain_combined/corrected_counts.h5ad",
'input_sc': "resources_test/task_ist_preprocessing/mouse_brain_combined/normalised_counts.h5ad",
'input_qc_col': "resources_test/task_ist_preprocessing/mouse_brain_combined/qc_col.h5ad",
'input_sc': "resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad",
'input_qc_col': "resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_qc_col.h5ad",
'output': "metrics.h5ad",
}
meta = {
Expand All @@ -22,6 +22,8 @@
adata_sp_QC_obs_col = ad.read_h5ad(par['input_qc_col'])
adata_sp.obs['passed_QC'] = adata_sp_QC_obs_col.obs['passed_QC']
adata_sc = ad.read_h5ad(par['input_sc'])
adata_sp.X = adata_sp.layers['normalized'] # TODO: ideally we don't do this, but some txsim functions seem to still expect .X (e.g. coexpression_similarity), fix this within txsim.
adata_sc.X = adata_sc.layers['normalized'] # TODO: same for scRNAseq data

# There should be at least two cell types overlapping between scRNAseq and spatial data
cts_sc = adata_sc.obs['cell_type'].dtype.categories
Expand All @@ -36,8 +38,10 @@


print('Compute metrics', flush=True)
df_filtered = tx.metrics.all_metrics(adata_sp[adata_sp.obs['passed_QC']], adata_sc, key="cell_type")
df = tx.metrics.all_metrics(adata_sp, adata_sc, key="cell_type")
df_filtered = tx.metrics.all_metrics(
adata_sp[adata_sp.obs['passed_QC']], adata_sc, key="cell_type", raw_layer="counts", lognorm_layer="normalized"
)
df = tx.metrics.all_metrics(adata_sp, adata_sc, key="cell_type", raw_layer="counts", lognorm_layer="normalized")

uns_metric_ids = df.index.to_list() + [f"{metric}_qc_filtered" for metric in df_filtered.index]
uns_metric_values = np.concatenate([df.values, df_filtered.values])
Expand Down

0 comments on commit 2998666

Please sign in to comment.