nf-core · nschcolnicov · Nov 29, 2024 · Nov 26, 2024 · Nov 26, 2024 · Nov 26, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#82](https://github.com/nf-core/nanostring/pull/82) - Updated to nf-core template 3.0.2
 - [#95](https://github.com/nf-core/nanostring/pull/95) - Add pipeline level nf-tests.
 - [#96](https://github.com/nf-core/nanostring/pull/96) - Remove timestamp suffix from nacho_norm.R and write_out_prepared_gex.R
+- [#114](https://github.com/nf-core/nanostring/pull/114) - Convert `CREATE_GENE_HEATMAP` and `COMPUTE_GENE_SCORES` to nf-core format and create a subworkflow for them.
 
 ### `Dependencies`
 

diff --git a/modules/local/compute_gene_scores/main.nf b/modules/local/compute_gene_scores/main.nf
@@ -7,12 +7,12 @@ process COMPUTE_GENE_SCORES {
         'biocontainers/mulled-v2-e6920e60d80922852a1b19630ebe16754cf5320d:75e2c0a29159bae8a964e43ae16a45c282fdf651-0' }"
 
     input:
-    path counts
-    path geneset_yaml
+    tuple val(meta), path(normalized_counts)
+    path gene_score_yaml
 
     output:
-    path "*.txt"        , emit: scores_for_mqc
-    path "versions.yml" , emit: versions
+    tuple val(meta), path("*.txt"), emit: scores_for_mqc
+    path "versions.yml"            , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -21,7 +21,7 @@ process COMPUTE_GENE_SCORES {
     def args = task.ext.args ?: ''
 
     """
-    compute_gene_scores.R $geneset_yaml $counts $args
+    compute_gene_scores.R $gene_score_yaml $normalized_counts $args
-    compute_gene_scores.R $gene_score_yaml $normalized_counts $args
+    compute_gene_scores.R \\
+    $gene_score_yaml \\
+    $args \\
+    $normalized_counts 
-    compute_gene_scores.R $gene_score_yaml $normalized_counts $args
+    compute_gene_scores.R \\
+    $gene_score_yaml \\
+    $args \\
+    $normalized_counts 
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -33,7 +33,24 @@ process COMPUTE_GENE_SCORES {
         r-yaml: \$(Rscript -e "library(yaml); cat(as.character(packageVersion('yaml')))")
         r-FactoMineR: \$(Rscript -e "library(FactoMineR); cat(as.character(packageVersion('FactoMineR')))")
         r-stringr: \$(Rscript -e "library(stringr); cat(as.character(packageVersion('stringr')))")
-        r-matrixstats: \$(Rscript -e "library(matrixstats); cat(as.character(packageVersion('matrixstats')))")
     END_VERSIONS
     """
+
+    stub:
+    """
+    touch scores_for_mqc.txt
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//')
+        r-dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))")
+        r-tibble: \$(Rscript -e "library(tibble); cat(as.character(packageVersion('tibble')))")
+        r-singscore: \$(Rscript -e "library(singscore); cat(as.character(packageVersion('singscore')))")
+        r-GSVA: \$(Rscript -e "library(GSVA); cat(as.character(packageVersion('GSVA')))")
+        r-yaml: \$(Rscript -e "library(yaml); cat(as.character(packageVersion('yaml')))")
+        r-FactoMineR: \$(Rscript -e "library(FactoMineR); cat(as.character(packageVersion('FactoMineR')))")
+        r-stringr: \$(Rscript -e "library(stringr); cat(as.character(packageVersion('stringr')))")
+    END_VERSIONS
+    """
+
 }
diff --git a/modules/local/compute_gene_scores/meta.yml b/modules/local/compute_gene_scores/meta.yml
@@ -2,23 +2,39 @@ name: compute_gene_scores
 description: calculate gene set enrichment scores using multiple methods.
 keywords:
   - bioinformatics tools
+  - gene scores
+  - Singscore
+  - GSVA
+  - SSGSEA
+  - PLAGE
+  - Samsscore
 tools:
   - compute_gene_scores:
       description: |
-        Compute gene scores calculates gene set scores using various methods such as Singscore, GSVA, SSGSEA, PLAGE, and Samsscore.
+        Computes gene scores using various methods such as Singscore, GSVA, SSGSEA, PLAGE, and Samsscore.
         It includes multiple gene set transformations and scoring techniques.
 input:
-  - - counts:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test' ]
+    - normalized_counts:
         type: file
         description: |
-          A matrix or data frame containing normalized gene counts data.
+          A tab-separated file containing normalized gene expression counts for all genes both Endogenous and Housekeeping genes, depending on the normalization method chosen (genes as rows and samples as columns).
         pattern: "*normalized_counts.tsv"
-  - - geneset_yaml:
+  - - gene_score_yaml:
         type: file
         description: |
-          This sets the YAML to be used for computing the gene scores. Needs both a name for each set of genes and respective genes to be selected.
+          Optional YAML file to be used for computing the gene scores. Needs both a name for each set of genes and respective genes to be selected.
 output:
   - scores_for_mqc:
+      - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test' ]
       - "*.txt":
         type: file
         description: |

diff --git a/bin/compute_gene_scores.R → ...s/resources/usr/bin/compute_gene_scores.R b/bin/compute_gene_scores.R → ...s/resources/usr/bin/compute_gene_scores.R
diff --git a/modules/local/compute_gene_scores/tests/main.nf.test b/modules/local/compute_gene_scores/tests/main.nf.test
@@ -3,22 +3,39 @@ nextflow_process {
     name "Test Process COMPUTE_GENE_SCORES"
     script "../main.nf"
     process "COMPUTE_GENE_SCORES"
+    config "./nextflow.config"
 
     tag "modules"
     tag "modules_nfcore"
     tag "compute_gene_scores"
 
     test("test profile") {
-
         when {
             process {
                 """
-                input[0] = file(params.pipelines_testdata_base_path + 'nanostring/modules_test_data/test_profile/normalized_counts.tsv', checkIfExists: true )
+                input[0] = [[id:"test"], file(params.pipelines_testdata_base_path + 'nanostring/modules_test_data/test_profile/normalized_counts.tsv', checkIfExists: true )]
                 input[1] = file(params.pipelines_testdata_base_path + 'nanostring/gene_score_test.yaml', checkIfExists: true )
                 """
             }
         }
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+            )
+        }
+    }
 
+    test("stub") {
+        options '-stub'
+        when {
+            process {
+                """
+                input[0] = [[id:"test"], file(params.pipelines_testdata_base_path + 'nanostring/modules_test_data/test_profile/normalized_counts.tsv', checkIfExists: true )]
+                input[1] = file(params.pipelines_testdata_base_path + 'nanostring/gene_score_test.yaml', checkIfExists: true )
+                """
+            }
+        }
         then {
             assertAll(
                 { assert process.success },

diff --git a/modules/local/compute_gene_scores/tests/main.nf.test.snap b/modules/local/compute_gene_scores/tests/main.nf.test.snap
@@ -4,28 +4,71 @@
             {
                 "0": [
                     [
-                        "signature_scores_mqc.txt:md5,315dc730c79b02067b542074eaeb07b9",
-                        "signature_scores_qc_mqc.txt:md5,88ec48e1d7488357663557b378615923"
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "signature_scores_mqc.txt:md5,315dc730c79b02067b542074eaeb07b9",
+                            "signature_scores_qc_mqc.txt:md5,88ec48e1d7488357663557b378615923"
+                        ]
                     ]
                 ],
                 "1": [
-                    "versions.yml:md5,61fc461ef40beb8b95aa31c8ee4d6760"
+                    "versions.yml:md5,267e6a81e12563418edabbdaa1a5ef79"
                 ],
                 "scores_for_mqc": [
                     [
-                        "signature_scores_mqc.txt:md5,315dc730c79b02067b542074eaeb07b9",
-                        "signature_scores_qc_mqc.txt:md5,88ec48e1d7488357663557b378615923"
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "signature_scores_mqc.txt:md5,315dc730c79b02067b542074eaeb07b9",
+                            "signature_scores_qc_mqc.txt:md5,88ec48e1d7488357663557b378615923"
+                        ]
                     ]
                 ],
                 "versions": [
-                    "versions.yml:md5,61fc461ef40beb8b95aa31c8ee4d6760"
+                    "versions.yml:md5,267e6a81e12563418edabbdaa1a5ef79"
                 ]
             }
         ],
         "meta": {
             "nf-test": "0.9.0",
             "nextflow": "24.10.1"
         },
-        "timestamp": "2024-11-19T18:06:40.93622974"
+        "timestamp": "2024-11-27T17:21:08.656175464"
+    },
+    "stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "scores_for_mqc.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,267e6a81e12563418edabbdaa1a5ef79"
+                ],
+                "scores_for_mqc": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "scores_for_mqc.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,267e6a81e12563418edabbdaa1a5ef79"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.10.1"
+        },
+        "timestamp": "2024-11-27T17:21:32.074726012"
     }
 }
diff --git a/modules/local/compute_gene_scores/tests/nextflow.config b/modules/local/compute_gene_scores/tests/nextflow.config
@@ -0,0 +1 @@
+nextflow.enable.moduleBinaries = true
diff --git a/modules/local/create_gene_heatmap/main.nf b/modules/local/create_gene_heatmap/main.nf
@@ -5,13 +5,12 @@ process CREATE_GENE_HEATMAP {
     container "community.wave.seqera.io/library/bioconductor-complexheatmap_r-base_r-circlize_r-dplyr_pruned:58d1af3dbaeba617"
 
     input:
-    path annotated_counts
-    path counts
+    tuple val(meta), path(annotated_endo_data), path(normalized_counts)
     path heatmap_genes_to_filter
 
     output:
-    path "*gene_heatmap_mqc.png", emit: gene_heatmap
-    path "versions.yml"         , emit: versions
+    tuple val(meta), path("*gene_heatmap_mqc.png"), emit: gene_heatmap
+    path "versions.yml"                           , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -20,7 +19,7 @@ process CREATE_GENE_HEATMAP {
     def args = task.ext.args ?: ''
 
     """
-    compute_gene_heatmap.R $annotated_counts $counts $heatmap_genes_to_filter $args
+    compute_gene_heatmap.R $annotated_endo_data $normalized_counts $heatmap_genes_to_filter $args
-    compute_gene_heatmap.R $annotated_endo_data $normalized_counts $heatmap_genes_to_filter $args
+    compute_gene_heatmap.R \\
+      $annotated_endo_data \\
+      $normalized_counts \\
+      $args \\
+      $heatmap_genes_to_filter
-    compute_gene_heatmap.R $annotated_endo_data $normalized_counts $heatmap_genes_to_filter $args
+    compute_gene_heatmap.R \\
+      $annotated_endo_data \\
+      $normalized_counts \\
+      $args \\
+      $heatmap_genes_to_filter
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -34,5 +33,23 @@ process CREATE_GENE_HEATMAP {
         r-fs: \$(Rscript -e "library(fs); cat(as.character(packageVersion('fs')))")
     END_VERSIONS
     """
+
+    stub:
+    """
+    touch gene_heatmap_mqc.png
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//')
+        r-dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))")
+        r-ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))")
+        r-rlang: \$(Rscript -e "library(rlang); cat(as.character(packageVersion('rlang')))")
+        bioconductor-ComplexHeatmap: \$(Rscript -e "library(ComplexHeatmap); cat(as.character(packageVersion('ComplexHeatmap')))")
+        r-circlize: \$(Rscript -e "library(circlize); cat(as.character(packageVersion('circlize')))")
+        r-yaml: \$(Rscript -e "library(yaml); cat(as.character(packageVersion('yaml')))")
+        r-fs: \$(Rscript -e "library(fs); cat(as.character(packageVersion('fs')))")
+    END_VERSIONS
+    """
+
 }
 
diff --git a/modules/local/create_gene_heatmap/meta.yml b/modules/local/create_gene_heatmap/meta.yml
@@ -7,25 +7,35 @@ keywords:
 tools:
   - create_gene_heatmap:
       description: |
-        The create_gene_heatmap module generates a heatmap of gene counts.
+        The create_gene_heatmap module generates a heatmap of gene counts using ComplexHeatmap R package.
         It supports gene selection via additional files and provides a log-transformed visualization of the data.
 input:
-  - - annotated_counts:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test' ]
+    - annotated_endo_data:
         type: file
         description: |
-          A tab-separated file containing annotated counts.
-        pattern: "*ENDO.tsv"
-  - - counts:
+          A tab-separated file containing  normalized expression counts for the Endogenous genes (genes as columns and samples as rows).
+        pattern: "*.tsv"
+    - normalized_counts:
         type: file
         description: |
-          A tab-separated file containing counts data for genes, used when no gene set YAML is provided.
-        pattern: "*normalized_counts.tsv"
+          A tab-separated file containing normalized gene expression counts for all genes both Endogenous and Housekeeping genes, depending on the normalization method chosen (genes as rows and samples as columns).
+        pattern: "*.tsv"
   - - heatmap_genes_to_filter:
         type: file
         description: |
-          Path to yml file (list, one item per line) to specify which genes should be used for the gene-count heatmap.
+          Optional Path to YAML file (list, one item per line) to specify which genes should be used for the gene-count heatmap.
 output:
   - gene_heatmap:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test' ]
       - "*gene_heatmap_mqc.png":
           type: file
           description: |

diff --git a/bin/compute_gene_heatmap.R → .../resources/usr/bin/compute_gene_heatmap.R b/bin/compute_gene_heatmap.R → .../resources/usr/bin/compute_gene_heatmap.R
@@ -10,7 +10,7 @@ library(ragg)
 ###Command line argument parsing###
 args = commandArgs(trailingOnly=TRUE)
 if (length(args) < 1) {
-    stop("Usage: compute_gene_heatmap.R <annotated_counts.tsv> <counts.tsv> <sample_id_col> or compute_gene_heatmap.R <annotated_counts.tsv> <counts.tsv> <genes.yaml> <sample_id_col>", call.=FALSE)
+    stop("Usage: compute_gene_heatmap.R <annotated_endo_data.tsv> <normalized_counts.tsv> <sample_id_col> or compute_gene_heatmap.R <annotated_counts.tsv> <counts.tsv> <genes.yaml> <sample_id_col>", call.=FALSE)
 }
 input_counts_annotated <- args[1]
 input_counts <- args[2]