From 3426550ee6dabe52c002d701dba4bdd407e19187 Mon Sep 17 00:00:00 2001 From: "Huska, Matthew" Date: Wed, 15 May 2024 17:20:26 +0200 Subject: [PATCH] Add --skip_qc to allow skipping fastqc/nanoplot/multiqc --- clean.nf | 11 +++++++---- nextflow.config | 25 +++++++++++++------------ 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/clean.nf b/clean.nf index bf2d4bc..8a38258 100755 --- a/clean.nf +++ b/clean.nf @@ -10,7 +10,7 @@ Author: hoelzer.martin@gmail.com // Parameters sanity checking -Set valid_params = ['max_cores', 'cores', 'max_memory', 'memory', 'profile', 'help', 'input', 'input_type', 'list', 'host', 'own', 'control', 'keep', 'rm_rrna', 'bbduk', 'bbduk_kmer', 'bbduk_qin', 'reads_rna', 'min_clip', 'dcs_strict', 'output', 'multiqc_dir', 'nf_runinfo_dir', 'databases', 'cleanup_work_dir','condaCacheDir', 'singularityCacheDir', 'singularityCacheDir', 'cloudProcess', 'conda-cache-dir', 'singularity-cache-dir', 'cloud-process', 'publish_dir_mode', 'no_intermediate'] // don't ask me why there is also 'conda-cache-dir', 'singularity-cache-dir', 'cloud-process' +Set valid_params = ['max_cores', 'cores', 'max_memory', 'memory', 'profile', 'help', 'input', 'input_type', 'list', 'host', 'own', 'control', 'keep', 'rm_rrna', 'bbduk', 'bbduk_kmer', 'bbduk_qin', 'reads_rna', 'min_clip', 'dcs_strict', 'output', 'multiqc_dir', 'nf_runinfo_dir', 'databases', 'cleanup_work_dir','condaCacheDir', 'singularityCacheDir', 'singularityCacheDir', 'cloudProcess', 'conda-cache-dir', 'singularity-cache-dir', 'cloud-process', 'publish_dir_mode', 'no_intermediate', 'skip_qc'] // don't ask me why there is also 'conda-cache-dir', 'singularity-cache-dir', 'cloud-process' def parameter_diff = params.keySet() - valid_params if (parameter_diff.size() != 0){ exit 1, "ERROR: Parameter(s) $parameter_diff is/are not valid in the pipeline!\n" @@ -210,7 +210,9 @@ workflow { } - qc(input_ch.map{ it -> tuple(it[0], 'input', it[1]) }.mix(clean.out.out_reads), params.input_type, clean.out.bbduk_summary, clean.out.idxstats, clean.out.flagstats, multiqc_config) + if (!params.skip_qc) { + qc(input_ch.map{ it -> tuple(it[0], 'input', it[1]) }.mix(clean.out.out_reads), params.input_type, clean.out.bbduk_summary, clean.out.idxstats, clean.out.flagstats, multiqc_config) + } } /************************** @@ -265,8 +267,8 @@ def helpMSG() { - eno [ONT RNA-Seq: a positive control (yeast ENO2 Enolase II of strain S288C, YHR174W)]${c_reset} ${c_green}--own ${c_reset} Use your own FASTA sequences (comma separated list of files) for decontamination, e.g. host.fasta.gz,spike.fasta [default: $params.own] ${c_green}--keep ${c_reset} Use your own FASTA sequences (comma separated list of files) to explicitly keep mapped reads, e.g. target.fasta.gz,important.fasta [default: $params.keep] - Reads are assigned to a combined index for decontamination and keeping. The use of this parameter can prevent - false positive hits and the accidental removal of reads due to (poor quality) mappings. + Reads are assigned to a combined index for decontamination and keeping. The use of this parameter can prevent + false positive hits and the accidental removal of reads due to (poor quality) mappings. ${c_green}--rm_rrna ${c_reset} Clean your data from rRNA [default: $params.rm_rrna] ${c_green}--bbduk${c_reset} Add this flag to use bbduk instead of minimap2 for decontamination of short reads [default: $params.bbduk] ${c_green}--bbduk_kmer${c_reset} Set kmer for bbduk [default: $params.bbduk_kmer] @@ -275,6 +277,7 @@ def helpMSG() { ${c_green}--min_clip${c_reset} Filter mapped reads by soft-clipped length (left + right). If >= 1 total number; if < 1 relative to read length ${c_green}--dcs_strict${c_reset} Filter out alignments that cover artificial ends of the ONT DCS to discriminate between Lambda Phage and DCS + ${c_green}--skip_qc${c_reset} Skip quality control steps (fastqc, nanoplot, multiqc, etc.) [default: $params.skip_qc] ${c_yellow}Compute options:${c_reset} --cores Max cores per process for local use [default $params.cores] diff --git a/nextflow.config b/nextflow.config index 8c7f83d..d410891 100644 --- a/nextflow.config +++ b/nextflow.config @@ -29,12 +29,13 @@ params { reads_rna = false min_clip = '' dcs_strict = false + skip_qc = false // folder structure output = 'results' multiqc_dir = 'qc' nf_runinfo_dir = 'logs' - + // location for storing the conda or singularity environments condaCacheDir = 'conda' singularityCacheDir = 'singularity' @@ -81,7 +82,7 @@ profiles { executor { name = "lsf" queueSize = 200 - } + } params.cloudProcess = true process.cache = "lenient" includeConfig 'configs/node.config' @@ -91,7 +92,7 @@ profiles { executor { name = "slurm" queueSize = 200 - } + } params.cloudProcess = true process.cache = "lenient" includeConfig 'configs/node.config' @@ -99,13 +100,13 @@ profiles { // engines - docker { + docker { docker { enabled = true } includeConfig 'configs/container.config' } singularity { - singularity { + singularity { enabled = true autoMounts = true cacheDir = params.singularityCacheDir @@ -114,7 +115,7 @@ profiles { includeConfig 'configs/container.config' } - mamba { + mamba { conda { enabled = true cacheDir = params.condaCacheDir @@ -123,7 +124,7 @@ profiles { includeConfig 'configs/conda.config' } - conda { + conda { conda { enabled = true cacheDir = params.condaCacheDir @@ -147,17 +148,17 @@ profiles { } // CONFIGURE YOUR PRIVATE CLOUD - gcloud { + gcloud { params.databases = 'gs://databases-matrice/databases/' bucketDir = 'gs://matrice/nextflow-tmp/clean' //workDir = "/tmp/nextflow-work-$USER" executor { name = 'google-lifesciences' } - + google { project = 'nextflow-auto-255816' - zone = 'europe-west1-b' - } + zone = 'europe-west1-b' + } params.cloudProcess = true includeConfig 'configs/node.config' @@ -169,7 +170,7 @@ profiles { // we need a docker also for basic functionalities in the cloud process { - withLabel: noDocker { cpus = 1; memory = '4.0 GB'; container = 'nanozoo/template:3.8--ccd0653' } + withLabel: noDocker { cpus = 1; memory = '4.0 GB'; container = 'nanozoo/template:3.8--ccd0653' } } } }