From 06093bb4fd6d910b6167bc51abc40329b3b2c4f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= <64787947+alanmmobbs93@users.noreply.github.com> Date: Fri, 29 Nov 2024 08:41:28 -0300 Subject: [PATCH] New Module: NACHO_QC (#7108) * initialize module * update test * Remove TO-DO from main.nf.test * Remove comment from main.nf.test * update test after meta component in output channels * split channel into png and txt outputs * update meta file * move moduleBinaries to test nextflow config --- modules/nf-core/nacho/qc/environment.yml | 12 + modules/nf-core/nacho/qc/main.nf | 77 +++++ modules/nf-core/nacho/qc/meta.yml | 89 ++++++ .../nacho/qc/resources/usr/bin/nacho_qc.R | 263 ++++++++++++++++++ modules/nf-core/nacho/qc/tests/main.nf.test | 97 +++++++ .../nf-core/nacho/qc/tests/main.nf.test.snap | 78 ++++++ .../nf-core/nacho/qc/tests/nextflow.config | 1 + 7 files changed, 617 insertions(+) create mode 100644 modules/nf-core/nacho/qc/environment.yml create mode 100644 modules/nf-core/nacho/qc/main.nf create mode 100644 modules/nf-core/nacho/qc/meta.yml create mode 100755 modules/nf-core/nacho/qc/resources/usr/bin/nacho_qc.R create mode 100644 modules/nf-core/nacho/qc/tests/main.nf.test create mode 100644 modules/nf-core/nacho/qc/tests/main.nf.test.snap create mode 100644 modules/nf-core/nacho/qc/tests/nextflow.config diff --git a/modules/nf-core/nacho/qc/environment.yml b/modules/nf-core/nacho/qc/environment.yml new file mode 100644 index 00000000000..9cf652c88fe --- /dev/null +++ b/modules/nf-core/nacho/qc/environment.yml @@ -0,0 +1,12 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - conda-forge::r-dplyr=1.1.4 + - conda-forge::r-fs=1.6.4 + - conda-forge::r-ggplot2=3.4.4 + - conda-forge::r-nacho=2.0.6 + - conda-forge::r-optparse=1.7.5 + - conda-forge::r-readr=2.1.5 + - conda-forge::r-tidyr=1.3.0 diff --git a/modules/nf-core/nacho/qc/main.nf b/modules/nf-core/nacho/qc/main.nf new file mode 100644 index 00000000000..54bf2ae368e --- /dev/null +++ b/modules/nf-core/nacho/qc/main.nf @@ -0,0 +1,77 @@ +process NACHO_QC { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container 'community.wave.seqera.io/library/r-dplyr_r-fs_r-ggplot2_r-nacho_pruned:033bc017f5f36b6d' + + input: + tuple val(meta) , path(rcc_files, stageAs: "input/*") + tuple val(meta2), path(sample_sheet) + + output: + tuple val(meta), path("*.html") , emit: nacho_qc_reports + tuple val(meta), path("*_mqc.png"), emit: nacho_qc_png + tuple val(meta), path("*_mqc.txt"), emit: nacho_qc_txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + nacho_qc.R \\ + --input_rcc_path input \\ + --input_samplesheet ${sample_sheet} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + r-nacho: \$(Rscript -e "library(NACHO); cat(as.character(packageVersion('NACHO')))") + r-dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") + r-ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))") + r-tidyr: \$(Rscript -e "library(tidyr); cat(as.character(packageVersion('tidyr')))") + r-readr: \$(Rscript -e "library(readr); cat(as.character(packageVersion('readr')))") + r-fs: \$(Rscript -e "library(fs); cat(as.character(packageVersion('fs')))") + r-optparse: \$(Rscript -e "library(optparse); cat(as.character(packageVersion('optparse')))") + END_VERSIONS + """ + + stub: + """ + touch qc.html + touch qc_with_outliers.html + touch AVG_vs_BD_mqc.png + touch AVG_vs_MED_mqc.png + touch BD_mqc.png + touch FOV_mqc.png + touch HKF_mqc.png + touch HK_mqc.png + touch LOD_mqc.png + touch Neg_mqc.png + touch PCA1_vs_PCA2_mqc.png + touch PCAi_mqc.png + touch PCA_mqc.png + touch plot_normf_mqc.png + touch Posctrl_linearity_mqc.png + touch POSF_vs_NEGF_mqc.png + touch Pos_mqc.png + touch Pos_vs_neg_mqc.png + touch normalized_qc_mqc.txt + touch hk_detected_mqc.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + r-nacho: \$(Rscript -e "library(NACHO); cat(as.character(packageVersion('NACHO')))") + r-dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") + r-ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))") + r-tidyr: \$(Rscript -e "library(tidyr); cat(as.character(packageVersion('tidyr')))") + r-readr: \$(Rscript -e "library(readr); cat(as.character(packageVersion('readr')))") + r-fs: \$(Rscript -e "library(fs); cat(as.character(packageVersion('fs')))") + r-optparse: \$(Rscript -e "library(optparse); cat(as.character(packageVersion('optparse')))") + END_VERSIONS + """ +} diff --git a/modules/nf-core/nacho/qc/meta.yml b/modules/nf-core/nacho/qc/meta.yml new file mode 100644 index 00000000000..f3c14934bd4 --- /dev/null +++ b/modules/nf-core/nacho/qc/meta.yml @@ -0,0 +1,89 @@ +name: nacho_qc +description: | + NACHO (NAnostring quality Control dasHbOard) is developed for NanoString nCounter data. + NanoString nCounter data is a messenger-RNA/micro-RNA (mRNA/miRNA) expression assay and works with fluorescent barcodes. + Each barcode is assigned a mRNA/miRNA, which can be counted after bonding with its target. + As a result each count of a specific barcode represents the presence of its target mRNA/miRNA. +keywords: + - nacho + - nanostring + - mRNA + - miRNA + - qc +tools: + - NACHO: + description: | + R package that uses two main functions to summarize and visualize NanoString RCC files, + namely: `load_rcc()` and `visualise()`. It also includes a function `normalise()`, which (re)calculates + sample specific size factors and normalises the data. + For more information `vignette("NACHO")` and `vignette("NACHO-analysis")` + homepage: https://github.com/mcanouil/NACHO + documentation: https://cran.r-project.org/web/packages/NACHO/vignettes/NACHO.html + doi: "10.1093/bioinformatics/btz647" + licence: [ "GPL-3.0" ] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - rcc_files: + type: file + description: | + List of RCC files for all samples, which are direct outputs from NanoString runs + pattern: "*.RCC" + - - meta2: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test_samplesheet' ] + - sample_sheet: + type: "file" + pattern: "*.csv" + description: | + Comma-separated file with 3 columns: RCC_FILE, RCC_FILE_NAME, and SAMPLE_ID +output: + - nacho_qc_reports: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.html": + type: file + description: | + HTML report + pattern: "*.html" + - nacho_qc_png: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*_mqc.png": + type: file + description: | + Output PNG files + pattern: "*_mqc.png" + - nacho_qc_txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*_mqc.txt": + type: file + description: | + Plain text reports + pattern: "*_mqc.txt" + - versions: + - "versions.yml": + type: file + description: | + File containing software versions + pattern: "versions.yml" +authors: + - "@alanmmobbs93" +maintainers: + - "@alanmmobbs93" diff --git a/modules/nf-core/nacho/qc/resources/usr/bin/nacho_qc.R b/modules/nf-core/nacho/qc/resources/usr/bin/nacho_qc.R new file mode 100755 index 00000000000..21d20b317b0 --- /dev/null +++ b/modules/nf-core/nacho/qc/resources/usr/bin/nacho_qc.R @@ -0,0 +1,263 @@ +#!/usr/bin/env Rscript +library(optparse) +library(dplyr) +library(ggplot2) +library(fs) +library(NACHO) +library(readr) +library(tidyr) + +# Commandline Argument parsing +option_list <- list( + make_option( + c("--input_rcc_path"), + type = "character", + default = "./" , + help = "Path to the folder that contains the RCC input file(s)", + metavar = "character"), + make_option( + c("--input_samplesheet"), + type = "character", + default = NULL , + help = "Path to the sample sheet file", + metavar = "character") +) + +opt <- parse_args(OptionParser(option_list = option_list)) + +# Validate mandatory arguments +if (is.null(opt$input_rcc_path)) { + stop("Error: The --input_rcc_path parameter is mandatory and must be specified.") +} + +if (is.null(opt$input_samplesheet)) { + stop("Error: The --input_samplesheet parameter is mandatory and must be specified.") +} + +input_rcc_path <- opt$input_rcc_path +input_samplesheet <- opt$input_samplesheet + +# Create filelist for NachoQC +list_of_rccs <- dir_ls(path = input_rcc_path, glob = "*.RCC") + +# Core Code +nacho_data <- load_rcc(data_directory = input_rcc_path, + ssheet_csv = input_samplesheet, + id_colname = "RCC_FILE_NAME") + +output_base <- "./" + +# Write out HK genes detected and add to MultiQC report as custom content +line="#id: nf-core-nanostring-hk-genes +#section_name: 'Housekeeping Genes' +#description: 'The following Housekeeping Genes have been detected in the input RCC Files:' +#plot_type: 'html' +#section_href: 'https://github.com/nf-core/nanostring' +#data: + " + +write(line,file=paste0(output_base, "hk_detected_mqc.txt"),append=TRUE) +write(nacho_data$housekeeping_genes ,paste0(output_base,"hk_detected_mqc.txt"),append=TRUE) + +# Add in all plots as MQC output for MultiQC +plot_bd <- autoplot( + object = nacho_data, + x = "BD", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="BD_mqc.png", plot_bd) + +## Field of View (FoV) Imaging + +plot_fov <- autoplot( + object = nacho_data, + x = "FoV", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="FOV_mqc.png", plot_fov) + + +## Positive Control Linearity + +plot_posctrl_lin <- autoplot( + object = nacho_data, + x = "PCL", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) + +ggsave(filename="Posctrl_linearity_mqc.png", plot_posctrl_lin) + +## Limit of Detection + +plot_lod <- autoplot( + object = nacho_data, + x = "LoD", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) + +ggsave(filename="LOD_mqc.png", plot_lod) + +## Positive Controls + +plot_pos <- autoplot( + object = nacho_data, + x = "Positive", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="Pos_mqc.png", plot_pos) + + +## Negative Controls + +plot_neg <- autoplot( + object = nacho_data, + x = "Negative", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="Neg_mqc.png", plot_neg) + +## Housekeeping Genes + +plot_hk <- autoplot( + object = nacho_data, + x = "Housekeeping", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="HK_mqc.png", plot_hk) + +## Positive Controls vs Negative Controls + +plot_pos_vs_neg <- autoplot( + object = nacho_data, + x = "PN", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="Pos_vs_neg_mqc.png", plot_pos_vs_neg) + +## Average Counts vs. Binding Density + +plot_avg_vs_bd <- autoplot( + object = nacho_data, + x = "ACBD", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="AVG_vs_BD_mqc.png", plot_avg_vs_bd) + +## Average Counts vs. Median Counts + +plot_avg_vs_med <- autoplot( + object = nacho_data, + x = "ACMC", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="AVG_vs_MED_mqc.png", plot_avg_vs_med) + +## Principal Component 1 vs. 2 + +plot_pc12 <- autoplot( + object = nacho_data, + x = "PCA12", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="PCA1_vs_PCA2_mqc.png", plot_pc12) + +## Principal Component i + +plot_pcai <- autoplot( + object = nacho_data, + x = "PCAi", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="PCAi_mqc.png", plot_pcai) + +## Principal Component planes +plot_pcap <- autoplot( + object = nacho_data, + x = "PCA", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="PCA_mqc.png", plot_pcap) + +## Positive Factor vs. Negative Factor +plot_posf_vs_negf <- autoplot( + object = nacho_data, + x = "PFNF", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="POSF_vs_NEGF_mqc.png", plot_posf_vs_negf) + +## Housekeeping Factor + +plot_hkf <- autoplot( + object = nacho_data, + x = "HF", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="HKF_mqc.png", plot_hkf) + +## Normalization Factors + +plot_normf <- autoplot( + object = nacho_data, + x = "NORM", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="plot_normf_mqc.png", plot_normf) + +# Create QC table for MultiQC Report +outliers_thresholds <- nacho_data[["outliers_thresholds"]] + +qc_table <- nacho_data[["nacho"]] %>% + select(c(RCC_FILE_NAME,BD,FoV,PCL,LoD,MC,MedC,Positive_factor,Negative_factor,House_factor)) %>% + unique() %>% + mutate("BD QC" = if_else(BD < outliers_thresholds[["BD"]][1] | BD > outliers_thresholds[["BD"]][2], "FAIL", "PASS"), .after = BD) %>% + mutate("FoV QC" = if_else(FoV < outliers_thresholds[["FoV"]], "FAIL", "PASS"), .after = FoV) %>% + mutate("PCL QC" = if_else(PCL < outliers_thresholds[["PCL"]], "FAIL", "PASS"), .after = PCL) %>% + mutate("LoD QC" = if_else(LoD < outliers_thresholds[["LoD"]], "FAIL", "PASS"), .after = LoD) %>% + mutate("PNF QC" = if_else(Positive_factor < outliers_thresholds[["Positive_factor"]][1] | Positive_factor > outliers_thresholds[["Positive_factor"]][2], "FAIL", "PASS"), .after = Positive_factor) %>% + mutate("HKNF QC" = if_else(House_factor < outliers_thresholds[["House_factor"]][1] | House_factor > outliers_thresholds[["House_factor"]][2], "FAIL", "PASS"), .after = House_factor) %>% + relocate(Negative_factor, .after = last_col()) %>% + rename("Negative Factor" = Negative_factor) %>% + rename("House Factor" = House_factor) %>% + rename("Positive Factor" = Positive_factor) %>% + rename("RCC_FILE" = RCC_FILE_NAME) + +write_tsv(qc_table ,file=paste0(output_base,"normalized_qc_mqc.txt")) + +# Render Standard Report for investigation in main MultiQC Report +render(nacho_data, output_dir = output_base, output_file = "NanoQC.html", show_outliers = FALSE) + +# Render the same Report for standard investigation, but not for MultiQC Report +render(nacho_data, output_dir = output_base, output_file = "NanoQC_with_outliers.html", show_outliers = TRUE) diff --git a/modules/nf-core/nacho/qc/tests/main.nf.test b/modules/nf-core/nacho/qc/tests/main.nf.test new file mode 100644 index 00000000000..fe4176bdaf1 --- /dev/null +++ b/modules/nf-core/nacho/qc/tests/main.nf.test @@ -0,0 +1,97 @@ +nextflow_process { + + name "Test Process NACHO_QC" + script "../main.nf" + process "NACHO_QC" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "nacho" + tag "nacho/qc" + + test("Salmon - RCC files") { + + when { + process { + """ + // RCC Files: Collect from sample sheet + input[0] = + Channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) + .splitCsv( header: true ) + .map { row -> return file(row.RCC_FILE, checkIfExists: true) } // Select first column: path to file + .collect() + .map{ files -> + return tuple( [id: 'test1'], files ) // Add meta component + } + + // Sample sheet + input[1] = Channel.of( [ + [ id: 'test_samplesheet'], + [ file('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) ] + ] ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { with(process.out) { + assert nacho_qc_reports.get(0).get(1).size() == 2 + assert nacho_qc_png.get(0).get(1).size() == 16 + assert nacho_qc_txt.get(0).get(1).size() == 2 + assert snapshot( + nacho_qc_reports.get(0).get(1).collect { file(it).name }, //undeterministic .html mqc files + nacho_qc_png.get(0).get(1).collect { file(it).name }, //undeterministic .png mqc files + nacho_qc_txt.get(0).get(1), //stable .txt mqc files + versions + ).match() } + } + ) + } + } + + test("Salmon - RCC files - stub") { + + options "-stub" + when { + process { + """ + // RCC Files: Collect from sample sheet + input[0] = + Channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) + .splitCsv( header: true ) + .map{ row -> return file(row.RCC_FILE, checkIfExists: true) } // Select first column: path to file + .collect() + .map{ files -> + tuple( [id: 'test_stub'], files ) // Add meta component + } + + // Sample sheet + input[1] = Channel.of( [ + [ id: 'test_samplesheet'], + [ file('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) ] + ] ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { with(process.out) { + assert nacho_qc_reports.get(0).get(1).size() == 2 + assert nacho_qc_png.get(0).get(1).size() == 16 + assert nacho_qc_txt.get(0).get(1).size() == 2 + assert snapshot( + nacho_qc_reports.get(0).get(1).collect { file(it).name }, //undeterministic .html mqc files + nacho_qc_png.get(0).get(1).collect { file(it).name }, //undeterministic .png mqc files + nacho_qc_txt.get(0).get(1), //stable .txt mqc files + versions + ).match() } + } + ) + } + } +} diff --git a/modules/nf-core/nacho/qc/tests/main.nf.test.snap b/modules/nf-core/nacho/qc/tests/main.nf.test.snap new file mode 100644 index 00000000000..296b6a7562b --- /dev/null +++ b/modules/nf-core/nacho/qc/tests/main.nf.test.snap @@ -0,0 +1,78 @@ +{ + "Salmon - RCC files": { + "content": [ + [ + "NanoQC.html", + "NanoQC_with_outliers.html" + ], + [ + "AVG_vs_BD_mqc.png", + "AVG_vs_MED_mqc.png", + "BD_mqc.png", + "FOV_mqc.png", + "HKF_mqc.png", + "HK_mqc.png", + "LOD_mqc.png", + "Neg_mqc.png", + "PCA1_vs_PCA2_mqc.png", + "PCA_mqc.png", + "PCAi_mqc.png", + "POSF_vs_NEGF_mqc.png", + "Pos_mqc.png", + "Pos_vs_neg_mqc.png", + "Posctrl_linearity_mqc.png", + "plot_normf_mqc.png" + ], + [ + "hk_detected_mqc.txt:md5,61209383acc2abf6fc3ea309b5a5e094", + "normalized_qc_mqc.txt:md5,9a0b015a28094a17331b12b08898da8e" + ], + [ + "versions.yml:md5,771de828b0a5e1f2e715fd3f62d9a9c9" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-28T14:10:45.10759212" + }, + "Salmon - RCC files - stub": { + "content": [ + [ + "qc.html", + "qc_with_outliers.html" + ], + [ + "AVG_vs_BD_mqc.png", + "AVG_vs_MED_mqc.png", + "BD_mqc.png", + "FOV_mqc.png", + "HKF_mqc.png", + "HK_mqc.png", + "LOD_mqc.png", + "Neg_mqc.png", + "PCA1_vs_PCA2_mqc.png", + "PCA_mqc.png", + "PCAi_mqc.png", + "POSF_vs_NEGF_mqc.png", + "Pos_mqc.png", + "Pos_vs_neg_mqc.png", + "Posctrl_linearity_mqc.png", + "plot_normf_mqc.png" + ], + [ + "hk_detected_mqc.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "normalized_qc_mqc.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "versions.yml:md5,771de828b0a5e1f2e715fd3f62d9a9c9" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-28T14:07:24.754188513" + } +} \ No newline at end of file diff --git a/modules/nf-core/nacho/qc/tests/nextflow.config b/modules/nf-core/nacho/qc/tests/nextflow.config new file mode 100644 index 00000000000..651f0b86a59 --- /dev/null +++ b/modules/nf-core/nacho/qc/tests/nextflow.config @@ -0,0 +1 @@ +nextflow.enable.moduleBinaries = true