From 2998666052c4c01f241b78a49e7f9dc0793e6cd5 Mon Sep 17 00:00:00 2001 From: LouisK92 <37270609+LouisK92@users.noreply.github.com> Date: Sun, 22 Sep 2024 23:07:49 +0200 Subject: [PATCH] Fix tests for similarity metrics --- src/api/comp_metric.yaml | 30 ---------- src/api/comp_metric_quality.yaml | 34 +++++++++++ src/api/comp_metric_similarity.yaml | 12 ++-- src/api/file_score.yaml | 16 +++--- src/metrics/similarity/config.vsh.yaml | 78 ++++++++++++++++++++------ src/metrics/similarity/script.py | 12 ++-- 6 files changed, 118 insertions(+), 64 deletions(-) delete mode 100644 src/api/comp_metric.yaml create mode 100644 src/api/comp_metric_quality.yaml diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml deleted file mode 100644 index 5de7d53b..00000000 --- a/src/api/comp_metric.yaml +++ /dev/null @@ -1,30 +0,0 @@ -namespace: metrics -info: - type: metric - type_info: - label: Metric - summary: A metric for evaluating iST preprocessing methods - description: | - This metric evaluates the quality of the iST preprocessing. -arguments: - # todo: update - # - name: "--solution" - # __merge__: file_solution.yaml - # direction: input - # required: true - # - name: "--prediction" - # __merge__: file_prediction.yaml - # direction: input - # required: true - - name: "--score" - __merge__: file_score.yaml - direction: output - required: true - -test_resources: - - path: /resources_test/task_ist_preprocessing/mouse_brain_combined - dest: resources_test/task_ist_preprocessing/mouse_brain_combined - - type: python_script - path: /common/component_tests/run_and_check_output.py - - type: python_script - path: /common/component_tests/check_config.py diff --git a/src/api/comp_metric_quality.yaml b/src/api/comp_metric_quality.yaml new file mode 100644 index 00000000..03284555 --- /dev/null +++ b/src/api/comp_metric_quality.yaml @@ -0,0 +1,34 @@ +namespace: metrics +info: + type: metric + subtype: quality metric + type_info: + label: Quality Metric + summary: A metric for evaluating the quality of the processed iST data + description: | + This metric assesses the quality of the processed iST data. +arguments: + - name: --input + __merge__: file_spatial_corrected_counts.yaml + required: true + direction: input + - name: --input_qc_col + __merge__: file_spatial_qc_col.yaml + direction: input + required: true + - name: --input_transcript_assignments + __merge__: file_transcript_assignments.yaml + direction: input + required: true + - name: "--score" + __merge__: file_score.yaml + direction: output + required: true + +test_resources: + - path: /resources_test/task_ist_preprocessing/mouse_brain_combined + dest: resources_test/task_ist_preprocessing/mouse_brain_combined + - type: python_script + path: /common/component_tests/run_and_check_output.py + - type: python_script + path: /common/component_tests/check_config.py diff --git a/src/api/comp_metric_similarity.yaml b/src/api/comp_metric_similarity.yaml index 2c114a24..068e91b7 100644 --- a/src/api/comp_metric_similarity.yaml +++ b/src/api/comp_metric_similarity.yaml @@ -24,11 +24,13 @@ arguments: required: true direction: output __merge__: file_score.yaml -# test_resources: -# - path: /resources_test/common/pancreas -# dest: resources_test/common/pancreas -# - type: python_script -# path: /common/component_tests/run_and_check_output.py +test_resources: + - path: /resources_test/task_ist_preprocessing/mouse_brain_combined + dest: resources_test/task_ist_preprocessing/mouse_brain_combined + - type: python_script + path: /common/component_tests/run_and_check_output.py + - type: python_script + path: /common/component_tests/check_config.py diff --git a/src/api/file_score.yaml b/src/api/file_score.yaml index 9bb39485..12ca9758 100644 --- a/src/api/file_score.yaml +++ b/src/api/file_score.yaml @@ -6,14 +6,6 @@ info: format: type: h5ad uns: - - type: string - name: dataset_id - description: "A unique identifier for the dataset" - required: true - - type: string - name: method_id - description: "A unique identifier for the method" - required: true - type: string name: metric_ids description: "One or more unique metric identifiers" @@ -24,3 +16,11 @@ info: description: "The metric values obtained for the given prediction. Must be of same length as 'metric_ids'." multiple: true required: true + # - type: string + # name: dataset_id + # description: "A unique identifier for the dataset" + # required: true + # - type: string + # name: method_id + # description: "A unique identifier for the method" + # required: true \ No newline at end of file diff --git a/src/metrics/similarity/config.vsh.yaml b/src/metrics/similarity/config.vsh.yaml index 2cdb7ea8..8e3ca4c6 100644 --- a/src/metrics/similarity/config.vsh.yaml +++ b/src/metrics/similarity/config.vsh.yaml @@ -14,34 +14,78 @@ name: similarity_metrics # Metadata for your component info: metrics: - # A unique identifier for your metric (required). - # Can contain only lowercase letters or underscores. - name: negative_marker_purity_reads - # A relatively short label, used when rendering visualisarions (required) label: Negative Marker Purity (Reads) - # A one sentence summary of how this metric works (required). Used when - # rendering summary tables. summary: "The percentage of negative marker reads assigned to the correct cell types." - # A multi-line description of how this component works (required). Used - # when rendering reference documentation. description: | The percentage of negative marker reads assigned to the correct cell types. # A reference key from the bibtex library at src/common/library.bib (required). references: - doi: None - # The minimum possible value for this metric (required) + doi: "10.1101/2023.02.13.528102" min: 0 - # The maximum possible value for this metric (required) max: 1 - # Whether a higher value represents a 'better' solution (required) maximize: true + - name: negative_marker_purity_cells + label: Negative Marker Purity (Cells) + summary: "The percentage of cells that do not contain counts of negative markers of their specific cell type." + description: | + The percentage of cells that do not contain counts of negative markers of their specific cell type. + references: + doi: "10.1101/2023.02.13.528102" + min: 0 + max: 1 + maximize: true + - name: coexpr_similarity + label: Co-expression Similarity + summary: "The similarity between the co-expression patterns of spatial and scRNA-seq data." + description: | + The similarity is calculated as the absolute difference between the correlation matrices of spatial and + scRNA-seq data. The correlation matrices contain pair-wise correlations between all genes in the dataset. + references: + doi: "10.1101/2023.02.13.528102" + min: 0 + max: 1 + maximize: true + - name: coexpr_similarity_celltype + label: Co-expression Similarity (Cell Type) + summary: "The similarity between the within cell type co-expression patterns of spatial and scRNA-seq data." + description: | + The similarity is calculated as the absolute difference between the correlation matrices of spatial and + scRNA-seq data for each cell type. The final score is the mean over cell types. The correlation matrices contain + pair-wise correlations between all genes in the dataset. + references: + doi: "10.1101/2023.02.13.528102" + min: 0 + max: 1 + - name: rel_pairwise_ct_expr_sim + label: Relative Pairwise Cell Type Expression Similarity + summary: "Similarity of the mean expression difference between cell type pairs between spatial and scRNA-seq data." + description: | + todo + references: + doi: "10.1101/2023.02.13.528102" + min: 0 + max: 1 + - name: rel_pairwise_gene_expr_sim + label: Relative Pairwise Gene Expression Similarity + summary: "Similarity of the mean expression difference between gene pairs between spatial and scRNA-seq data." + description: | + todo + references: + doi: "10.1101/2023.02.13.528102" + min: 0 + max: 1 + - name: knn_mixing + label: KNN Mixing + summary: "Measure of the modality mixing within the joint knn graph of spatial and scRNA-seq data." + description: | + todo + references: + doi: "10.1101/2023.02.13.528102" + min: 0 + max: 1 + -# Component-specific parameters (optional) -# arguments: -# - name: "--n_neighbors" -# type: "integer" -# default: 5 -# description: Number of neighbors to use. # Resources required to run the component resources: diff --git a/src/metrics/similarity/script.py b/src/metrics/similarity/script.py index 400590a0..82469f70 100644 --- a/src/metrics/similarity/script.py +++ b/src/metrics/similarity/script.py @@ -8,8 +8,8 @@ # in config.vsh.yaml and then run `viash config inject config.vsh.yaml`. par = { 'input': "resources_test/task_ist_preprocessing/mouse_brain_combined/corrected_counts.h5ad", - 'input_sc': "resources_test/task_ist_preprocessing/mouse_brain_combined/normalised_counts.h5ad", - 'input_qc_col': "resources_test/task_ist_preprocessing/mouse_brain_combined/qc_col.h5ad", + 'input_sc': "resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad", + 'input_qc_col': "resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_qc_col.h5ad", 'output': "metrics.h5ad", } meta = { @@ -22,6 +22,8 @@ adata_sp_QC_obs_col = ad.read_h5ad(par['input_qc_col']) adata_sp.obs['passed_QC'] = adata_sp_QC_obs_col.obs['passed_QC'] adata_sc = ad.read_h5ad(par['input_sc']) +adata_sp.X = adata_sp.layers['normalized'] # TODO: ideally we don't do this, but some txsim functions seem to still expect .X (e.g. coexpression_similarity), fix this within txsim. +adata_sc.X = adata_sc.layers['normalized'] # TODO: same for scRNAseq data # There should be at least two cell types overlapping between scRNAseq and spatial data cts_sc = adata_sc.obs['cell_type'].dtype.categories @@ -36,8 +38,10 @@ print('Compute metrics', flush=True) -df_filtered = tx.metrics.all_metrics(adata_sp[adata_sp.obs['passed_QC']], adata_sc, key="cell_type") -df = tx.metrics.all_metrics(adata_sp, adata_sc, key="cell_type") +df_filtered = tx.metrics.all_metrics( + adata_sp[adata_sp.obs['passed_QC']], adata_sc, key="cell_type", raw_layer="counts", lognorm_layer="normalized" +) +df = tx.metrics.all_metrics(adata_sp, adata_sc, key="cell_type", raw_layer="counts", lognorm_layer="normalized") uns_metric_ids = df.index.to_list() + [f"{metric}_qc_filtered" for metric in df_filtered.index] uns_metric_values = np.concatenate([df.values, df_filtered.values])