From 3426550ee6dabe52c002d701dba4bdd407e19187 Mon Sep 17 00:00:00 2001
From: "Huska, Matthew" <HuskaM@rki.de>
Date: Wed, 15 May 2024 17:20:26 +0200
Subject: [PATCH] Add --skip_qc to allow skipping fastqc/nanoplot/multiqc

---
 clean.nf        | 11 +++++++----
 nextflow.config | 25 +++++++++++++------------
 2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/clean.nf b/clean.nf
index bf2d4bc..8a38258 100755
--- a/clean.nf
+++ b/clean.nf
@@ -10,7 +10,7 @@ Author: hoelzer.martin@gmail.com
 
 // Parameters sanity checking
 
-Set valid_params = ['max_cores', 'cores', 'max_memory', 'memory', 'profile', 'help', 'input', 'input_type', 'list', 'host', 'own', 'control', 'keep', 'rm_rrna', 'bbduk', 'bbduk_kmer', 'bbduk_qin', 'reads_rna', 'min_clip', 'dcs_strict', 'output', 'multiqc_dir', 'nf_runinfo_dir', 'databases', 'cleanup_work_dir','condaCacheDir', 'singularityCacheDir', 'singularityCacheDir', 'cloudProcess', 'conda-cache-dir', 'singularity-cache-dir', 'cloud-process', 'publish_dir_mode', 'no_intermediate'] // don't ask me why there is also 'conda-cache-dir', 'singularity-cache-dir', 'cloud-process'
+Set valid_params = ['max_cores', 'cores', 'max_memory', 'memory', 'profile', 'help', 'input', 'input_type', 'list', 'host', 'own', 'control', 'keep', 'rm_rrna', 'bbduk', 'bbduk_kmer', 'bbduk_qin', 'reads_rna', 'min_clip', 'dcs_strict', 'output', 'multiqc_dir', 'nf_runinfo_dir', 'databases', 'cleanup_work_dir','condaCacheDir', 'singularityCacheDir', 'singularityCacheDir', 'cloudProcess', 'conda-cache-dir', 'singularity-cache-dir', 'cloud-process', 'publish_dir_mode', 'no_intermediate', 'skip_qc'] // don't ask me why there is also 'conda-cache-dir', 'singularity-cache-dir', 'cloud-process'
 def parameter_diff = params.keySet() - valid_params
 if (parameter_diff.size() != 0){
     exit 1, "ERROR: Parameter(s) $parameter_diff is/are not valid in the pipeline!\n"
@@ -210,7 +210,9 @@ workflow {
 
   }
 
-  qc(input_ch.map{ it -> tuple(it[0], 'input', it[1]) }.mix(clean.out.out_reads), params.input_type, clean.out.bbduk_summary, clean.out.idxstats, clean.out.flagstats, multiqc_config)
+  if (!params.skip_qc) {
+    qc(input_ch.map{ it -> tuple(it[0], 'input', it[1]) }.mix(clean.out.out_reads), params.input_type, clean.out.bbduk_summary, clean.out.idxstats, clean.out.flagstats, multiqc_config)
+  }
 }
 
 /**************************
@@ -265,8 +267,8 @@ def helpMSG() {
                                         - eno [ONT RNA-Seq: a positive control (yeast ENO2 Enolase II of strain S288C, YHR174W)]${c_reset}
     ${c_green}--own ${c_reset}          Use your own FASTA sequences (comma separated list of files) for decontamination, e.g. host.fasta.gz,spike.fasta [default: $params.own]
     ${c_green}--keep ${c_reset}         Use your own FASTA sequences (comma separated list of files) to explicitly keep mapped reads, e.g. target.fasta.gz,important.fasta [default: $params.keep]
-                                        Reads are assigned to a combined index for decontamination and keeping. The use of this parameter can prevent 
-                                        false positive hits and the accidental removal of reads due to (poor quality) mappings. 
+                                        Reads are assigned to a combined index for decontamination and keeping. The use of this parameter can prevent
+                                        false positive hits and the accidental removal of reads due to (poor quality) mappings.
     ${c_green}--rm_rrna ${c_reset}      Clean your data from rRNA [default: $params.rm_rrna]
     ${c_green}--bbduk${c_reset}         Add this flag to use bbduk instead of minimap2 for decontamination of short reads [default: $params.bbduk]
     ${c_green}--bbduk_kmer${c_reset}    Set kmer for bbduk [default: $params.bbduk_kmer]
@@ -275,6 +277,7 @@ def helpMSG() {
 
     ${c_green}--min_clip${c_reset}      Filter mapped reads by soft-clipped length (left + right). If >= 1 total number; if < 1 relative to read length
     ${c_green}--dcs_strict${c_reset}    Filter out alignments that cover artificial ends of the ONT DCS to discriminate between Lambda Phage and DCS
+    ${c_green}--skip_qc${c_reset}       Skip quality control steps (fastqc, nanoplot, multiqc, etc.) [default: $params.skip_qc]
 
     ${c_yellow}Compute options:${c_reset}
     --cores             Max cores per process for local use [default $params.cores]
diff --git a/nextflow.config b/nextflow.config
index 8c7f83d..d410891 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -29,12 +29,13 @@ params {
   reads_rna = false
   min_clip = ''
   dcs_strict = false
+  skip_qc = false
 
   // folder structure
   output = 'results'
   multiqc_dir = 'qc'
   nf_runinfo_dir = 'logs'
-  
+
   // location for storing the conda or singularity environments
   condaCacheDir = 'conda'
   singularityCacheDir = 'singularity'
@@ -81,7 +82,7 @@ profiles {
         executor {
             name = "lsf"
             queueSize = 200
-        }        
+        }
         params.cloudProcess = true
         process.cache = "lenient"
         includeConfig 'configs/node.config'
@@ -91,7 +92,7 @@ profiles {
         executor {
             name = "slurm"
             queueSize = 200
-        }        
+        }
         params.cloudProcess = true
         process.cache = "lenient"
         includeConfig 'configs/node.config'
@@ -99,13 +100,13 @@ profiles {
 
 
     // engines
-    docker { 
+    docker {
         docker { enabled = true }
         includeConfig 'configs/container.config'
     }
 
     singularity {
-        singularity { 
+        singularity {
                 enabled = true
                 autoMounts = true
                 cacheDir = params.singularityCacheDir
@@ -114,7 +115,7 @@ profiles {
         includeConfig 'configs/container.config'
     }
 
-    mamba { 
+    mamba {
         conda {
             enabled = true
             cacheDir = params.condaCacheDir
@@ -123,7 +124,7 @@ profiles {
         includeConfig 'configs/conda.config'
     }
 
-    conda { 
+    conda {
         conda {
             enabled = true
             cacheDir = params.condaCacheDir
@@ -147,17 +148,17 @@ profiles {
     }
 
     // CONFIGURE YOUR PRIVATE CLOUD
-    gcloud {             
+    gcloud {
         params.databases = 'gs://databases-matrice/databases/'
         bucketDir = 'gs://matrice/nextflow-tmp/clean'
         //workDir = "/tmp/nextflow-work-$USER"
 
         executor { name = 'google-lifesciences' }
- 
+
         google {
             project = 'nextflow-auto-255816'
-            zone = 'europe-west1-b' 
-        }  
+            zone = 'europe-west1-b'
+        }
 
         params.cloudProcess = true
         includeConfig 'configs/node.config'
@@ -169,7 +170,7 @@ profiles {
 
         // we need a docker also for basic functionalities in the cloud
         process {
-            withLabel: noDocker { cpus = 1; memory = '4.0 GB'; container = 'nanozoo/template:3.8--ccd0653' } 
+            withLabel: noDocker { cpus = 1; memory = '4.0 GB'; container = 'nanozoo/template:3.8--ccd0653' }
         }
     }
 }