From 91d05cbba0cd860eee240a24dba99380997f528e Mon Sep 17 00:00:00 2001 From: maxibor Date: Mon, 11 Feb 2019 16:48:58 +0100 Subject: [PATCH 01/29] add noCollase option --- .travis.yml | 6 +++++ main.nf | 74 +++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 67 insertions(+), 13 deletions(-) diff --git a/.travis.yml b/.travis.yml index 459e925cb..ecb69d6d6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,14 +40,20 @@ script: - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --saveReference # Run the basic pipeline with single end data (pretending its single end actually) - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --singleEnd --bwa_index results/reference_genome/bwa_index/bwa_index/ + # Run the basic pipeline with paired end data + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --noCollapse --saveReference # Run the same pipeline testing optional step: fastp, complexity - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --complexity_filter --bwa_index results/reference_genome/bwa_index/bwa_index/ # Test BAM Trimming - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --trim_bam --bwa_index results/reference_genome/bwa_index/bwa_index/ # Test running with CircularMapper - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --circularmapper --circulartarget 'NC_007596.2' + # Test running with CircularMapper on paired end without collapsing + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --noCollapse --circularmapper --circulartarget 'NC_007596.2' # Test running with BWA Mem - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --bwamem --bwa_index results/reference_genome/bwa_index/bwa_index/ + # Test running with BWA Mem on paired end without collapsing + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --noCollapse --bwamem --bwa_index results/reference_genome/bwa_index/bwa_index/ # Test with zipped reference input - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --fasta 'https://raw.githubusercontent.com/nf-core/test-datasets/eager2/reference/Test.fasta.gz' # Run the basic pipeline with the bam input profile diff --git a/main.nf b/main.nf index f25ad3e79..12878e70c 100644 --- a/main.nf +++ b/main.nf @@ -28,7 +28,8 @@ def helpMessage() { --reads Path to input data (must be surrounded with quotes) -profile Hardware config to use (e.g. standard, docker, singularity, conda, aws). Ask your system admin if unsure, or check documentatoin. --singleEnd Specifies that the input is single end reads (required if not pairedEnd) - --pairedEnd Specifies that the input is paired end reads (required if not singleend) + --pairedEnd Specifies that the input is paired end reads (required if not singleEnd) + --noCollapse Specifies to avoid merging Forward and Reverse reads together. (Only for pairedEnd samples) --bam Specifies that the input is in BAM format --fasta Path to Fasta reference (required if not iGenome reference) --genome Name of iGenomes reference (required if not fasta reference) @@ -131,6 +132,7 @@ if (params.help){ params.name = false params.singleEnd = false params.pairedEnd = false +params.noCollapse = false params.genome = "Custom" params.snpcapture = false params.bedfile = '' @@ -265,6 +267,11 @@ if( params.singleEnd || params.pairedEnd || params.bam){ exit 1, "Please specify either --singleEnd, --pairedEnd to execute the pipeline on FastQ files and --bam for previously processed BAM files!" } +//Validate that noCollase is only set to True for pairedEnd reads! +if (params.noCollapse && params.singleEnd){ + exit 1, "--noCollapse can only be set for pairedEnd samples!" +} + //AWSBatch sanity checking if(workflow.profile == 'awsbatch'){ @@ -349,6 +356,7 @@ summary['Reads'] = params.reads summary['Fasta Ref'] = params.fasta if(params.bwa_index) summary['BWA Index'] = params.bwa_index summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' +summary['Collapse'] = !params.noCollapse ? 'Yes' : 'No' summary['Max Memory'] = params.max_memory summary['Max CPUs'] = params.max_cpus summary['Max Time'] = params.max_time @@ -508,7 +516,7 @@ process convertBam { output: set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_fastqc, ch_read_files_converted_fastp) - file("*.fastq.gz") into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem) + set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem) script: base = "${bam.baseName}" @@ -594,19 +602,25 @@ process adapter_removal { //Readprefixing only required for PE data with merging fixprefix = (params.singleEnd) ? "" : "AdapterRemovalFixPrefix ${prefix}.combined.fq.gz ${prefix}.combined.prefixed.fq.gz" - if( !params.singleEnd ){ + if( !params.singleEnd && !params.noCollapse){ """ - AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${prefix} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse + AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${name} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse #Combine files - zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > ${prefix}.combined.fq.gz - ${fixprefix} - rm ${prefix}.combined.fq.gz + zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > ${name}.combined.fq.gz """ - } else { + } else if (!params.singleEnd && params.noCollapse) { + """ + AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${name} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + #Rename files + mv ${name}.pair1.truncated.gz ${name}.pair1.combined.fq.gz + mv ${name}.pair2.truncated.gz ${name}.pair2.combined.fq.gz """ - AdapterRemoval --file1 ${reads[0]} --basename ${prefix} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} + } + else { + """ + AdapterRemoval --file1 ${reads[0]} --basename ${name} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} # Pseudo-Combine - mv *.truncated.gz ${prefix}.combined.fq.gz + mv *.truncated.gz ${name}.combined.fq.gz """ } } @@ -615,7 +629,7 @@ process adapter_removal { * STEP 2.1 - FastQC after clipping/merging (if applied!) */ process fastqc_after_clipping { - tag "${reads[0].baseName}" + tag "${prefix}" publishDir "${params.outdir}/FastQC/after_clipping", mode: 'copy', saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"} @@ -628,6 +642,7 @@ process fastqc_after_clipping { file "*_fastqc.{zip,html}" optional true into ch_fastqc_after_clipping script: + prefix = reads[0].toString().tokenize('.')[0] """ fastqc -q $reads """ @@ -654,13 +669,24 @@ process bwa { script: + fasta = "${index}/*.fasta" + if (!params.singleEnd && params.noCollapse){ + prefix = reads[0].toString().tokenize('.')[0] + """ + bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai + bwa aln -t ${task.cpus} $fasta ${reads[1]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r2.sai + bwa sampe -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.r1.sai ${prefix}.r2.sai ${reads[0]} ${reads[1]} | samtools sort -@ ${task.cpus} -O bam - > ${prefix}.sorted.bam + samtools index ${prefix}.sorted.bam + """ + } else { prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ - fasta = "${index}/*.fasta" """ bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam samtools index "${prefix}".sorted.bam """ + } + } process circulargenerator{ @@ -708,9 +734,21 @@ process circularmapper{ script: filter = "${params.circularfilter}" ? '' : '-f true -x false' - prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ + fasta = "${index}/*_*.fasta" + if (!params.singleEnd && params.noCollapse){ + prefix = reads[0].toString().tokenize('.')[0] + """ + bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai + bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r2.sai + bwa sampe -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.r1.sai ${prefix}.r1.sai ${reads[0]} ${reads[0]} > tmp.out + realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter + samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > ${prefix}.sorted.bam + samtools index ${prefix}.sorted.bam + """ + } else { + prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ """ bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads > tmp.out @@ -718,6 +756,8 @@ process circularmapper{ samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > "${prefix}".sorted.bam samtools index "${prefix}".sorted.bam """ + } + } process bwamem { @@ -738,10 +778,18 @@ process bwamem { script: prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ fasta = "${index}/*.fasta" + if (!params.singleEnd && params.noCollapse){ + """ + bwa mem -t ${task.cpus} $fasta ${reads[0]} ${reads[1]} -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam + samtools index -@ ${task.cpus} "${prefix}".sorted.bam + """ + } else { """ bwa mem -t ${task.cpus} $fasta $reads -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam samtools index -@ ${task.cpus} "${prefix}".sorted.bam """ + } + } /* From 758fb7fa53b7711e4563ed6bb059cea372c21087 Mon Sep 17 00:00:00 2001 From: maxibor Date: Mon, 11 Feb 2019 16:53:37 +0100 Subject: [PATCH 02/29] fix nf-PE read index --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 12878e70c..b6d9a32bf 100644 --- a/main.nf +++ b/main.nf @@ -741,8 +741,8 @@ process circularmapper{ prefix = reads[0].toString().tokenize('.')[0] """ bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai - bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r2.sai - bwa sampe -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.r1.sai ${prefix}.r1.sai ${reads[0]} ${reads[0]} > tmp.out + bwa aln -t ${task.cpus} $fasta ${reads[1]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r2.sai + bwa sampe -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.r1.sai ${prefix}.r2.sai ${reads[0]} ${reads[1]} > tmp.out realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > ${prefix}.sorted.bam samtools index ${prefix}.sorted.bam From e2aaba071e8595d2c3df0f403cb8fc443ad13d74 Mon Sep 17 00:00:00 2001 From: maxibor Date: Mon, 11 Feb 2019 16:58:24 +0100 Subject: [PATCH 03/29] update doc with noCollapse --- docs/usage.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index 6f0838d7d..02000d1b5 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -162,6 +162,14 @@ A normal glob pattern, enclosed in quotation marks, can then be used for `--read --pairedEnd --reads '*.fastq' ``` +### `--noCollapse` +If you have paired-end data, but you don't want to merge them, add the commind line argument `--noCollapse`. + +For example +```bash +--pairedEnd --noCollapse '*.fastq' +``` + ### `--fasta` If you prefer, you can specify the full path to your reference genome when you run the pipeline: From 55743dd25b85562549d290cbdca93b11b4153858 Mon Sep 17 00:00:00 2001 From: maxibor Date: Mon, 11 Feb 2019 17:31:39 +0100 Subject: [PATCH 04/29] main.nf to dev --- main.nf | 3 --- 1 file changed, 3 deletions(-) diff --git a/main.nf b/main.nf index 090d86e00..b71ca88b4 100644 --- a/main.nf +++ b/main.nf @@ -29,10 +29,7 @@ def helpMessage() { -profile Institution or personal hardware config to use (e.g. standard, docker, singularity, conda, aws). Ask your system admin if unsure, or check documentation. --singleEnd Specifies that the input is single end reads (required if not pairedEnd) --pairedEnd Specifies that the input is paired end reads (required if not singleEnd) -<<<<<<< HEAD -======= --noCollapse Specifies to avoid merging Forward and Reverse reads together. (Only for pairedEnd samples) ->>>>>>> master --bam Specifies that the input is in BAM format --fasta Path to Fasta reference (required if not iGenome reference) --genome Name of iGenomes reference (required if not fasta reference) From 8ec9d674a94016369190ba04ab21a09da9018ce7 Mon Sep 17 00:00:00 2001 From: maxibor Date: Mon, 11 Feb 2019 18:14:19 +0100 Subject: [PATCH 05/29] fix funky prefix --- main.nf | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/main.nf b/main.nf index b71ca88b4..6a179c0dd 100644 --- a/main.nf +++ b/main.nf @@ -678,11 +678,11 @@ process bwa { samtools index ${prefix}.sorted.bam """ } else { - prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ + prefix = reads[0].toString().tokenize('.')[0] """ - bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" - bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam - samtools index "${prefix}".sorted.bam + bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.sai + bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam + samtools index ${prefix}.sorted.bam """ } @@ -747,13 +747,13 @@ process circularmapper{ samtools index ${prefix}.sorted.bam """ } else { - prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ + prefix = reads[0].toString().tokenize('.')[0] """ - bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" - bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads > tmp.out + bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.sai + bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.sai $reads > tmp.out realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter - samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > "${prefix}".sorted.bam - samtools index "${prefix}".sorted.bam + samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > ${prefix}.sorted.bam + samtools index ${prefix}.sorted.bam """ } From 6d864e10dfef753edd9a8678b63e2fc906388611 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 11 Feb 2019 23:49:00 +0100 Subject: [PATCH 06/29] Update docs/usage.md Co-Authored-By: maxibor --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 8a17ecee5..34dd9f9bb 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -163,7 +163,7 @@ A normal glob pattern, enclosed in quotation marks, can then be used for `--read ``` ### `--noCollapse` -If you have paired-end data, but you don't want to merge them, add the commind line argument `--noCollapse`. +If you have paired-end data, but you don't want to merge them, add the command line argument `--noCollapse`. For example ```bash From f3a6ff7adf80ed9820a092f6b15fe21b1549d3f2 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 12 Feb 2019 14:52:16 +0100 Subject: [PATCH 07/29] Update docs/usage.md Co-Authored-By: maxibor --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 34dd9f9bb..fecaa9c13 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -167,7 +167,7 @@ If you have paired-end data, but you don't want to merge them, add the command l For example ```bash ---pairedEnd --noCollapse '*.fastq' +--pairedEnd --noCollapse --reads '*.fastq' ``` ### `--fasta` From 0e03a000d6e3fe11dc91657215074a30645d096e Mon Sep 17 00:00:00 2001 From: maxibor Date: Tue, 12 Feb 2019 17:32:34 +0100 Subject: [PATCH 08/29] skip trimming and collapsing --- docs/usage.md | 28 ++++++++++++++------- main.nf | 69 +++++++++++++++++++++++++++++++++------------------ 2 files changed, 64 insertions(+), 33 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index fecaa9c13..bee24f9ba 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -162,14 +162,6 @@ A normal glob pattern, enclosed in quotation marks, can then be used for `--read --pairedEnd --reads '*.fastq' ``` -### `--noCollapse` -If you have paired-end data, but you don't want to merge them, add the command line argument `--noCollapse`. - -For example -```bash ---pairedEnd --noCollapse --reads '*.fastq' -``` - ### `--fasta` If you prefer, you can specify the full path to your reference genome when you run the pipeline: @@ -287,12 +279,30 @@ This part of the documentation contains a list of user-adjustable parameters in ## Step skipping parameters -Some of the steps in the pipeline can be executed optionally. If you specify specific steps to be skipped, there won't be any output related to these modules. +Some of the steps in the pipeline can be executed optionally. If you specify specific steps to be skipped, there won't be any output related to these modules. ### `--skip_preseq` Turns off the computation of library complexity estimation. +### `--skip_collapse` +If you have paired-end data, but you don't want to merge them, add the command line argument `--noCollapse`. + +For example +```bash +--pairedEnd --skip_collapse --reads '*.fastq' +``` + +### `--skip_trimming` + +Turns off the adaptor and quality trimming + +For example +```bash +--pairedEnd --skip_trimming --reads '*.fastq' +``` + + ### `--skip_damage_calculation` Turns off the DamageProfiler module to compute DNA damage profiles. diff --git a/main.nf b/main.nf index 6a179c0dd..c98aadee6 100644 --- a/main.nf +++ b/main.nf @@ -29,7 +29,6 @@ def helpMessage() { -profile Institution or personal hardware config to use (e.g. standard, docker, singularity, conda, aws). Ask your system admin if unsure, or check documentation. --singleEnd Specifies that the input is single end reads (required if not pairedEnd) --pairedEnd Specifies that the input is paired end reads (required if not singleEnd) - --noCollapse Specifies to avoid merging Forward and Reverse reads together. (Only for pairedEnd samples) --bam Specifies that the input is in BAM format --fasta Path to Fasta reference (required if not iGenome reference) --genome Name of iGenomes reference (required if not fasta reference) @@ -45,6 +44,8 @@ def helpMessage() { --saveReference Saves reference genome indices for later reusage Skipping Skip any of the mentioned steps + --skip_collapse Skip merging Forward and Reverse reads together. (Only for pairedEnd samples) + --skip_trim Skip adaptor and quality trimming --skip_preseq --skip_damage_calculation --skip_qualimap @@ -131,7 +132,6 @@ if (params.help){ params.name = false params.singleEnd = false params.pairedEnd = false -params.noCollapse = false params.genome = "Custom" params.snpcapture = false params.bedfile = '' @@ -147,6 +147,8 @@ params.email = false params.plaintext_email = false // Skipping parts of the pipeline for impatient users +params.skip_collapse = false +params.skip_trim = false params.skip_preseq = false params.skip_damage_calculation = false params.skip_qualimap = false @@ -266,12 +268,11 @@ if( params.singleEnd || params.pairedEnd || params.bam){ exit 1, "Please specify either --singleEnd, --pairedEnd to execute the pipeline on FastQ files and --bam for previously processed BAM files!" } -//Validate that noCollase is only set to True for pairedEnd reads! -if (params.noCollapse && params.singleEnd){ +//Validate that skip_collapse is only set to True for pairedEnd reads! +if (params.skip_collapse && params.singleEnd){ exit 1, "--noCollapse can only be set for pairedEnd samples!" } - //AWSBatch sanity checking if(workflow.profile == 'awsbatch'){ if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" @@ -355,7 +356,8 @@ summary['Reads'] = params.reads summary['Fasta Ref'] = params.fasta if(params.bwa_index) summary['BWA Index'] = params.bwa_index summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' -summary['Collapse'] = !params.noCollapse ? 'Yes' : 'No' +summary['Skip Collapsing'] = params.skip_collapse ? 'Yes' : 'No' +summary['Skip Trimming'] = params.skip_trim ? 'Yes' : 'No' summary['Max Memory'] = params.max_memory summary['Max CPUs'] = params.max_cpus summary['Max Time'] = params.max_time @@ -375,7 +377,7 @@ if(workflow.profile == 'awsbatch'){ summary['AWS Queue'] = params.awsqueue } if(params.email) summary['E-mail Address'] = params.email -log.info summary.collect { k,v -> "${k.padRight(15)}: $v" }.join("\n") +log.info summary.collect { k,v -> "${k.padRight(35)}: $v" }.join("\n") log.info "=========================================" @@ -587,6 +589,8 @@ process adapter_removal { tag "$name" publishDir "${params.outdir}/read_merging", mode: 'copy' + echo true + when: !params.bam input: @@ -594,32 +598,49 @@ process adapter_removal { output: file "*.combined*.gz" into (ch_clipped_reads, ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem) - file "*.settings" into ch_adapterremoval_logs + file("*.settings") optional true into ch_adapterremoval_logs script: - prefix = reads[0].toString() - ~/(_R1)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ - //Readprefixing only required for PE data with merging - fixprefix = (params.singleEnd) ? "" : "AdapterRemovalFixPrefix ${prefix}.combined.fq.gz ${prefix}.combined.prefixed.fq.gz" + base = reads[0].baseName - if( !params.singleEnd && !params.noCollapse){ + if( !params.singleEnd && !params.skip_collapse && !params.skip_trim){ """ - AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${name} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse + AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse #Combine files - zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > ${name}.combined.fq.gz + zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > ${base}.combined.fq.gz + """ + } else if (!params.singleEnd && params.skip_collapse && !params.skip_trim) { + """ + AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + #Rename files + mv ${base}.pair1.truncated.gz ${base}.pair1.combined.fq.gz + mv ${base}.pair2.truncated.gz ${base}.pair2.combined.fq.gz """ - } else if (!params.singleEnd && params.noCollapse) { + } else if (!params.singleEnd && !params.skip_collapse && params.skip_trim) { + bogus_adaptor = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" """ - AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${name} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --basename ${base} --collapse --adapter1 $bogus_adaptor --adapter2 $bogus_adaptor #Rename files - mv ${name}.pair1.truncated.gz ${name}.pair1.combined.fq.gz - mv ${name}.pair2.truncated.gz ${name}.pair2.combined.fq.gz + mv ${base}.pair1.truncated.gz ${base}.pair1.combined.fq.gz + mv ${base}.pair2.truncated.gz ${base}.pair2.combined.fq.gz """ - } + } else if (params.singleEnd && params.skip_collapse && params.skip_trim){ + """ + mv ${reads[0]} ${base}.combined.fq.gz + echo "Skipped trimming and merging by AdapterRemoval" + """ + } else if (params.pairedEnd && params.skip_collapse && params.skip_trim){ + """ + mv ${reads[0]} ${base}.pair1.combined.fq.gz + mv ${reads[1]} ${base}.pair2.combined.fq.gz + echo "Skipped trimming and merging by AdapterRemoval" + """ + } else { """ - AdapterRemoval --file1 ${reads[0]} --basename ${name} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} + AdapterRemoval --file1 ${reads[0]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} # Pseudo-Combine - mv *.truncated.gz ${name}.combined.fq.gz + mv *.truncated.gz ${base}.combined.fq.gz """ } } @@ -669,7 +690,7 @@ process bwa { script: fasta = "${index}/*.fasta" - if (!params.singleEnd && params.noCollapse){ + if (!params.singleEnd && params.skip_collapse ){ prefix = reads[0].toString().tokenize('.')[0] """ bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai @@ -736,7 +757,7 @@ process circularmapper{ fasta = "${index}/*_*.fasta" - if (!params.singleEnd && params.noCollapse){ + if (!params.singleEnd && params.skip_collapse ){ prefix = reads[0].toString().tokenize('.')[0] """ bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai @@ -777,7 +798,7 @@ process bwamem { script: prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ fasta = "${index}/*.fasta" - if (!params.singleEnd && params.noCollapse){ + if (!params.singleEnd && params.skip_collapse ){ """ bwa mem -t ${task.cpus} $fasta ${reads[0]} ${reads[1]} -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam samtools index -@ ${task.cpus} "${prefix}".sorted.bam From 757e930003034026af27cc851b849edadc59f585 Mon Sep 17 00:00:00 2001 From: maxibor Date: Tue, 12 Feb 2019 17:45:07 +0100 Subject: [PATCH 09/29] update test --- .travis.yml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index ecb69d6d6..7fe187219 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,20 +40,18 @@ script: - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --saveReference # Run the basic pipeline with single end data (pretending its single end actually) - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --singleEnd --bwa_index results/reference_genome/bwa_index/bwa_index/ - # Run the basic pipeline with paired end data - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --noCollapse --saveReference + # Run the basic pipeline with paired end data without collapsing + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_collapse --saveReference + # Run the basic pipeline with paired end data without collapsing nor trimming + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_collapse --skip_trim --saveReference # Run the same pipeline testing optional step: fastp, complexity - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --complexity_filter --bwa_index results/reference_genome/bwa_index/bwa_index/ # Test BAM Trimming - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --trim_bam --bwa_index results/reference_genome/bwa_index/bwa_index/ # Test running with CircularMapper - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --circularmapper --circulartarget 'NC_007596.2' - # Test running with CircularMapper on paired end without collapsing - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --noCollapse --circularmapper --circulartarget 'NC_007596.2' # Test running with BWA Mem - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --bwamem --bwa_index results/reference_genome/bwa_index/bwa_index/ - # Test running with BWA Mem on paired end without collapsing - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --noCollapse --bwamem --bwa_index results/reference_genome/bwa_index/bwa_index/ # Test with zipped reference input - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --fasta 'https://raw.githubusercontent.com/nf-core/test-datasets/eager2/reference/Test.fasta.gz' # Run the basic pipeline with the bam input profile From 3320464a5586077ceb714d8bee5b95c74953093e Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 4 Mar 2019 11:19:14 +0100 Subject: [PATCH 10/29] move size out of the if clause --- main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 5b323b662..c8781788b 100644 --- a/main.nf +++ b/main.nf @@ -685,9 +685,10 @@ process bwa { script: fasta = "${index}/*.fasta" + size = "${params.large_ref}" ? '-c' : '' + if (!params.singleEnd && params.skip_collapse ){ prefix = reads[0].toString().tokenize('.')[0] - size = "${params.large_ref}" ? '-c' : '' """ bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai bwa aln -t ${task.cpus} $fasta ${reads[1]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r2.sai From def1b350c4836c123aca29dded1a0207cfe0faaf Mon Sep 17 00:00:00 2001 From: Maxime Date: Mon, 4 Mar 2019 17:21:08 +0100 Subject: [PATCH 11/29] match skip_* pattern --- docs/usage.md | 37 ++++++++++++++++++-------------- main.nf | 59 ++++++++++++++++++++++++++++++++++----------------- 2 files changed, 61 insertions(+), 35 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 1c8894865..2d78d15d0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -289,23 +289,10 @@ Some of the steps in the pipeline can be executed optionally. If you specify spe Turns off the computation of library complexity estimation. -### `--skip_collapse` -If you have paired-end data, but you don't want to merge them, add the command line argument `--noCollapse`. - -For example -```bash ---pairedEnd --skip_collapse --reads '*.fastq' -``` - -### `--skip_trimming` - -Turns off the adaptor and quality trimming - -For example -```bash ---pairedEnd --skip_trimming --reads '*.fastq' -``` +### `--skip_adapterremoval` +Turns off adaptor trimming and paired-end read merging. +Equivalent to setting both `--skip_collapse` and `--skip_trim` ### `--skip_damage_calculation` @@ -351,6 +338,24 @@ Defines the minimum read quality per base that is required for a base to be kept ### `--clip_min_adap_overlap` 1 Sets the minimum overlap between two reads when read merging is performed. Default is set to `1` base overlap. +### `--skip_collapse` + +Turns off the paired-end read merging. + +For example +```bash +--pairedEnd --skip_collapse --reads '*.fastq' +``` + +### `--skip_trim` + +Turns off the adaptor and quality trimming. + +For example +```bash +--pairedEnd --skip_trim --reads '*.fastq' +``` + ## Read Mapping Parameters ## BWA (default) diff --git a/main.nf b/main.nf index c8781788b..81540b346 100644 --- a/main.nf +++ b/main.nf @@ -44,8 +44,7 @@ def helpMessage() { --saveReference Saves reference genome indices for later reusage Skipping Skip any of the mentioned steps - --skip_collapse Skip merging Forward and Reverse reads together. (Only for pairedEnd samples) - --skip_trim Skip adaptor and quality trimming + --skip_adapterremoval --skip_preseq --skip_damage_calculation --skip_qualimap @@ -61,6 +60,8 @@ def helpMessage() { --clip_readlength Specify read minimum length to be kept for downstream analysis --clip_min_read_quality Specify minimum base quality for not trimming off bases --min_adap_overlap Specify minimum adapter overlap + --skip_collapse Skip merging Forward and Reverse reads together. (Only for pairedEnd samples) + --skip_trim Skip adaptor and quality trimming BWA Mapping --bwaalnn Specify the -n parameter for BWA aln. @@ -147,8 +148,7 @@ params.email = false params.plaintext_email = false // Skipping parts of the pipeline for impatient users -params.skip_collapse = false -params.skip_trim = false +params.skip_adapterremoval = false params.skip_preseq = false params.skip_damage_calculation = false params.skip_qualimap = false @@ -164,6 +164,8 @@ params.clip_reverse_adaptor = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA" params.clip_readlength = 30 params.clip_min_read_quality = 20 params.min_adap_overlap = 1 +params.skip_collapse = false +params.skip_trim = false //Read mapping parameters (default = BWA aln default) params.bwaalnn = 0.04 @@ -267,6 +269,25 @@ if (params.skip_collapse && params.singleEnd){ exit 1, "--noCollapse can only be set for pairedEnd samples!" } + + +//Skip adapterremoval compatibility with skip_trim and skip_collapse + +skip_collapse = params.skip_collapse +skip_trim = params.skip_trim +skip_adapterremoval = params.skip_adapterremoval + +if (params.skip_collapse && params.skip_trim){ + skip_adapterremoval = true +} + +if (params.skip_adapterremoval){ + skip_adapterremoval = true + skip_collapse = true + skip_trim = true +} + + //AWSBatch sanity checking if(workflow.profile == 'awsbatch'){ if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" @@ -351,8 +372,8 @@ summary['Fasta Ref'] = params.fasta summary['BAM Index Type'] = (params.large_ref == "") ? 'BAI' : 'CSI' if(params.bwa_index) summary['BWA Index'] = params.bwa_index summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' -summary['Skip Collapsing'] = params.skip_collapse ? 'Yes' : 'No' -summary['Skip Trimming'] = params.skip_trim ? 'Yes' : 'No' +summary['Skip Collapsing'] = skip_collapse ? 'Yes' : 'No' +summary['Skip Trimming'] = skip_trim ? 'Yes' : 'No' summary['Max Memory'] = params.max_memory summary['Max CPUs'] = params.max_cpus summary['Max Time'] = params.max_time @@ -579,7 +600,6 @@ process fastp { * STEP 2 - Adapter Clipping / Read Merging */ - process adapter_removal { tag "$name" publishDir "${params.outdir}/read_merging", mode: 'copy' @@ -598,20 +618,20 @@ process adapter_removal { script: base = reads[0].baseName - if( !params.singleEnd && !params.skip_collapse && !params.skip_trim){ + if( !params.singleEnd && !skip_collapse && !params.skip_trim && !skip_adapterremoval){ """ AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse #Combine files zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > ${base}.combined.fq.gz """ - } else if (!params.singleEnd && params.skip_collapse && !params.skip_trim) { + } else if (!params.singleEnd && skip_collapse && !params.skip_trim && !skip_adapterremoval) { """ AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} #Rename files mv ${base}.pair1.truncated.gz ${base}.pair1.combined.fq.gz mv ${base}.pair2.truncated.gz ${base}.pair2.combined.fq.gz """ - } else if (!params.singleEnd && !params.skip_collapse && params.skip_trim) { + } else if (!params.singleEnd && !skip_collapse && params.skip_trim && !skip_adapterremoval) { bogus_adaptor = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" """ AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --basename ${base} --collapse --adapter1 $bogus_adaptor --adapter2 $bogus_adaptor @@ -619,19 +639,18 @@ process adapter_removal { mv ${base}.pair1.truncated.gz ${base}.pair1.combined.fq.gz mv ${base}.pair2.truncated.gz ${base}.pair2.combined.fq.gz """ - } else if (params.singleEnd && params.skip_collapse && params.skip_trim){ + } else if (params.singleEnd && skip_adapterremoval){ """ mv ${reads[0]} ${base}.combined.fq.gz echo "Skipped trimming and merging by AdapterRemoval" """ - } else if (params.pairedEnd && params.skip_collapse && params.skip_trim){ + } else if (params.pairedEnd && skip_adapterremoval){ """ mv ${reads[0]} ${base}.pair1.combined.fq.gz mv ${reads[1]} ${base}.pair2.combined.fq.gz echo "Skipped trimming and merging by AdapterRemoval" """ - } - else { + } else { """ AdapterRemoval --file1 ${reads[0]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} # Pseudo-Combine @@ -640,9 +659,11 @@ process adapter_removal { } } + + /* - * STEP 2.1 - FastQC after clipping/merging (if applied!) - */ +* STEP 2.1 - FastQC after clipping/merging (if applied!) +*/ process fastqc_after_clipping { tag "${prefix}" publishDir "${params.outdir}/FastQC/after_clipping", mode: 'copy', @@ -687,7 +708,7 @@ process bwa { fasta = "${index}/*.fasta" size = "${params.large_ref}" ? '-c' : '' - if (!params.singleEnd && params.skip_collapse ){ + if (!params.singleEnd && skip_collapse ){ prefix = reads[0].toString().tokenize('.')[0] """ bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai @@ -755,7 +776,7 @@ process circularmapper{ fasta = "${index}/*_*.fasta" size = "${params.large_ref}" ? '-c' : '' - if (!params.singleEnd && params.skip_collapse ){ + if (!params.singleEnd && skip_collapse ){ prefix = reads[0].toString().tokenize('.')[0] """ bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai @@ -798,7 +819,7 @@ process bwamem { fasta = "${index}/*.fasta" size = "${params.large_ref}" ? '-c' : '' - if (!params.singleEnd && params.skip_collapse ){ + if (!params.singleEnd && skip_collapse ){ """ bwa mem -t ${task.cpus} $fasta ${reads[0]} ${reads[1]} -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam samtools index "${size}" -@ ${task.cpus} "${prefix}".sorted.bam From 2bef839be75de13538d21d7d70a7196770a741fc Mon Sep 17 00:00:00 2001 From: Maxime Date: Mon, 4 Mar 2019 17:21:24 +0100 Subject: [PATCH 12/29] update travis test --- .travis.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7fe187219..dab87ae31 100644 --- a/.travis.yml +++ b/.travis.yml @@ -42,8 +42,10 @@ script: - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --singleEnd --bwa_index results/reference_genome/bwa_index/bwa_index/ # Run the basic pipeline with paired end data without collapsing - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_collapse --saveReference - # Run the basic pipeline with paired end data without collapsing nor trimming - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_collapse --skip_trim --saveReference + # Run the basic pipeline with paired end data without trimming + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_trim --saveReference + # Run the basic pipeline with paired end data without adapterRemoval + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_adapterremoval --saveReference # Run the same pipeline testing optional step: fastp, complexity - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --complexity_filter --bwa_index results/reference_genome/bwa_index/bwa_index/ # Test BAM Trimming From e62b5032be1d65ad3b8354dfebadf00ba0ab669b Mon Sep 17 00:00:00 2001 From: Maxime Date: Mon, 4 Mar 2019 17:21:52 +0100 Subject: [PATCH 13/29] local executor for skipping AR process --- nextflow.config | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nextflow.config b/nextflow.config index 150d9bc25..5a701555c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -38,6 +38,14 @@ params { custom_config_version = 'master' } +if(params.skip_adapterremoval){ + process { + withName: adapter_removal{ + executor = 'local' + } + } +} + // Load base.config by default for all pipelines includeConfig 'conf/base.config' From e807c5859de9edbe6e9c6d3e62f2be4ad1145b98 Mon Sep 17 00:00:00 2001 From: Maxime Date: Mon, 4 Mar 2019 17:33:29 +0100 Subject: [PATCH 14/29] initialize skip_adapterremoval --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index 5a701555c..584710829 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,6 +14,7 @@ params { //Pipeline options aligner = 'bwa' + skip_adapterremoval = false saveReference = false saveTrimmed = true saveAlignedIntermediates = false From 01d056c613b616bbf0d474990696fad727b1309d Mon Sep 17 00:00:00 2001 From: Maxime Date: Mon, 4 Mar 2019 18:26:02 +0100 Subject: [PATCH 15/29] fix bam test --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 81540b346..fd6d1f663 100644 --- a/main.nf +++ b/main.nf @@ -533,7 +533,7 @@ process convertBam { output: set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_fastqc, ch_read_files_converted_fastp) - set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem) + file("*.fastq.gz") into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem) script: base = "${bam.baseName}" From a38c5ff8c3895db8a7131a82ae88ee8de7ccbb86 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 4 Mar 2019 21:22:08 +0100 Subject: [PATCH 16/29] Define defaults in nextflow.config --- nextflow.config | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 584710829..86736297f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,7 +14,7 @@ params { //Pipeline options aligner = 'bwa' - skip_adapterremoval = false + saveReference = false saveTrimmed = true saveAlignedIntermediates = false @@ -31,6 +31,11 @@ params { complexity_filter_poly_g_min = 10 trim_bam = false + //Skipping adapterremoval, trimming or collapsing defaults + skip_adapterremoval = false + skip_trim = false + skip_adapterremoval = false + // AWS Batch awsqueue = false awsregion = 'eu-west-1' From 20177fa6d421b1ef966b2eaec8a54129591ab04c Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 4 Mar 2019 21:22:31 +0100 Subject: [PATCH 17/29] Starting to polish PR --- main.nf | 64 ++++++++++++++++++++++----------------------------------- 1 file changed, 24 insertions(+), 40 deletions(-) diff --git a/main.nf b/main.nf index fd6d1f663..b38a4ba51 100644 --- a/main.nf +++ b/main.nf @@ -60,7 +60,7 @@ def helpMessage() { --clip_readlength Specify read minimum length to be kept for downstream analysis --clip_min_read_quality Specify minimum base quality for not trimming off bases --min_adap_overlap Specify minimum adapter overlap - --skip_collapse Skip merging Forward and Reverse reads together. (Only for pairedEnd samples) + --skip_collapse Skip merging Forward and Reverse reads together. (Only for PE samples) --skip_trim Skip adaptor and quality trimming BWA Mapping @@ -266,17 +266,9 @@ if( params.singleEnd || params.pairedEnd || params.bam){ //Validate that skip_collapse is only set to True for pairedEnd reads! if (params.skip_collapse && params.singleEnd){ - exit 1, "--noCollapse can only be set for pairedEnd samples!" + exit 1, "--skip_collapse can only be set for pairedEnd samples!" } - - -//Skip adapterremoval compatibility with skip_trim and skip_collapse - -skip_collapse = params.skip_collapse -skip_trim = params.skip_trim -skip_adapterremoval = params.skip_adapterremoval - if (params.skip_collapse && params.skip_trim){ skip_adapterremoval = true } @@ -372,8 +364,8 @@ summary['Fasta Ref'] = params.fasta summary['BAM Index Type'] = (params.large_ref == "") ? 'BAI' : 'CSI' if(params.bwa_index) summary['BWA Index'] = params.bwa_index summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' -summary['Skip Collapsing'] = skip_collapse ? 'Yes' : 'No' -summary['Skip Trimming'] = skip_trim ? 'Yes' : 'No' +summary['Skip Collapsing'] = params.skip_collapse ? 'Yes' : 'No' +summary['Skip Trimming'] = params.skip_trim ? 'Yes' : 'No' summary['Max Memory'] = params.max_memory summary['Max CPUs'] = params.max_cpus summary['Max Time'] = params.max_time @@ -604,57 +596,49 @@ process adapter_removal { tag "$name" publishDir "${params.outdir}/read_merging", mode: 'copy' - echo true - - when: !params.bam + when: !params.bam && !params.skip_adapterremoval input: set val(name), file(reads) from ( params.complexity_filter_poly_g ? ch_clipped_reads_complexity_filtered_poly_g : ch_read_files_clip ) output: - file "*.combined*.gz" into (ch_clipped_reads, ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem) - file("*.settings") optional true into ch_adapterremoval_logs + file "output/*.fq.gz" into (ch_clipped_reads, ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem) + file("*.settings") into ch_adapterremoval_logs script: base = reads[0].baseName - if( !params.singleEnd && !skip_collapse && !params.skip_trim && !skip_adapterremoval){ + //PE, collapse & trim reads + if( !params.singleEnd && !params.skip_collapse && !params.skip_trim && !params.skip_adapterremoval){ """ + mkdir -p output AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse #Combine files - zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > ${base}.combined.fq.gz + zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > output/${base}.combined.fq.gz """ - } else if (!params.singleEnd && skip_collapse && !params.skip_trim && !skip_adapterremoval) { + //PE, don't collapse, but trim reads + } else if (!params.singleEnd && params.skip_collapse && !params.skip_trim && !params.skip_adapterremoval) { """ + mkdir -p output AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} - #Rename files - mv ${base}.pair1.truncated.gz ${base}.pair1.combined.fq.gz - mv ${base}.pair2.truncated.gz ${base}.pair2.combined.fq.gz + mv ${base}.pair*.truncated.gz output/ """ - } else if (!params.singleEnd && !skip_collapse && params.skip_trim && !skip_adapterremoval) { + //PE, collapse, but don't trim reads + } else if (!params.singleEnd && !params.skip_collapse && params.skip_trim && !params.skip_adapterremoval) { bogus_adaptor = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" """ + mkdir -p output AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --basename ${base} --collapse --adapter1 $bogus_adaptor --adapter2 $bogus_adaptor - #Rename files - mv ${base}.pair1.truncated.gz ${base}.pair1.combined.fq.gz - mv ${base}.pair2.truncated.gz ${base}.pair2.combined.fq.gz - """ - } else if (params.singleEnd && skip_adapterremoval){ - """ - mv ${reads[0]} ${base}.combined.fq.gz - echo "Skipped trimming and merging by AdapterRemoval" - """ - } else if (params.pairedEnd && skip_adapterremoval){ - """ - mv ${reads[0]} ${base}.pair1.combined.fq.gz - mv ${reads[1]} ${base}.pair2.combined.fq.gz - echo "Skipped trimming and merging by AdapterRemoval" + + mv ${base}.pair*.truncated.gz output/ """ } else { + //SE, collapse not possible, trim reads """ + mkdir -p output AdapterRemoval --file1 ${reads[0]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} - # Pseudo-Combine - mv *.truncated.gz ${base}.combined.fq.gz + + mv *.truncated.gz output/ """ } } From f9b79c2e47a70ab4cf459231effcf3944771bdc9 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 4 Mar 2019 21:49:43 +0100 Subject: [PATCH 18/29] Some more fixes for channel handling --- main.nf | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/main.nf b/main.nf index b38a4ba51..8bfa5049a 100644 --- a/main.nf +++ b/main.nf @@ -270,13 +270,13 @@ if (params.skip_collapse && params.singleEnd){ } if (params.skip_collapse && params.skip_trim){ - skip_adapterremoval = true + params.skip_adapterremoval = true } if (params.skip_adapterremoval){ - skip_adapterremoval = true - skip_collapse = true - skip_trim = true + params.skip_adapterremoval = true + params.skip_collapse = true + params.skip_trim = true } @@ -591,7 +591,10 @@ process fastp { /* * STEP 2 - Adapter Clipping / Read Merging */ - +//Initialize empty channel if we skip adapterremoval entirely +if(params.skip_adapterremoval) { + ch_clipped_reads = Channel.empty() +} process adapter_removal { tag "$name" publishDir "${params.outdir}/read_merging", mode: 'copy' @@ -602,7 +605,7 @@ process adapter_removal { set val(name), file(reads) from ( params.complexity_filter_poly_g ? ch_clipped_reads_complexity_filtered_poly_g : ch_read_files_clip ) output: - file "output/*.fq.gz" into (ch_clipped_reads, ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem) + set val(base), file "output/*.gz" into (ch_clipped_reads,ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem) file("*.settings") into ch_adapterremoval_logs script: @@ -653,7 +656,7 @@ process fastqc_after_clipping { publishDir "${params.outdir}/FastQC/after_clipping", mode: 'copy', saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"} - when: !params.bam + when: !params.bam && !params.skip_adapterremoval input: file(reads) from ch_clipped_reads_for_fastqc @@ -679,7 +682,8 @@ process bwa { when: !params.circularmapper && !params.bwamem input: - file(reads) from ch_clipped_reads.mix(ch_read_files_converted_mapping_bwa) + set val(name), file(reads) from ( params.skip_adapterremoval ? ch_read_files_clip : ch_clipped_reads.mix(ch_read_files_converted_mapping_bwa) ) + file index from ch_bwa_index.first() @@ -692,7 +696,8 @@ process bwa { fasta = "${index}/*.fasta" size = "${params.large_ref}" ? '-c' : '' - if (!params.singleEnd && skip_collapse ){ + //PE data without merging, PE data without any AR applied + if (!params.singleEnd && (params.skip_collapse || params.skip_adapterremoval)){ prefix = reads[0].toString().tokenize('.')[0] """ bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai @@ -701,6 +706,7 @@ process bwa { samtools index "${size}" "${prefix}".sorted.bam """ } else { + //PE collapsed, or SE data prefix = reads[0].toString().tokenize('.')[0] """ bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.sai @@ -747,7 +753,7 @@ process circularmapper{ when: params.circularmapper input: - file reads from ch_clipped_reads_circularmapper.mix(ch_read_files_converted_mapping_cm) + set val(name), file reads from (params.skip_adapterremoval ? ch_clipped_reads : ch_clipped_reads_circularmapper.mix(ch_read_files_converted_mapping_cm) ) file index from ch_circularmapper_indices.first() output: @@ -790,7 +796,7 @@ process bwamem { when: params.bwamem && !params.circularmapper input: - file(reads) from ch_clipped_reads_bwamem.mix(ch_read_files_converted_mapping_bwamem) + set val(name), file(reads) from (params.skip_adapterremoval ? ch_clipped_reads : ch_clipped_reads_bwamem.mix(ch_read_files_converted_mapping_bwamem) ) file index from ch_bwa_index_bwamem.first() output: From 5caefbfe758ad483bc8fd4bc36b6ffe7e2803ecd Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 4 Mar 2019 22:23:16 +0100 Subject: [PATCH 19/29] Fix being able to handle modern data too --- main.nf | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/main.nf b/main.nf index 8bfa5049a..7c755d9f1 100644 --- a/main.nf +++ b/main.nf @@ -269,17 +269,6 @@ if (params.skip_collapse && params.singleEnd){ exit 1, "--skip_collapse can only be set for pairedEnd samples!" } -if (params.skip_collapse && params.skip_trim){ - params.skip_adapterremoval = true -} - -if (params.skip_adapterremoval){ - params.skip_adapterremoval = true - params.skip_collapse = true - params.skip_trim = true -} - - //AWSBatch sanity checking if(workflow.profile == 'awsbatch'){ if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" @@ -593,8 +582,11 @@ process fastp { */ //Initialize empty channel if we skip adapterremoval entirely if(params.skip_adapterremoval) { - ch_clipped_reads = Channel.empty() -} + //No logs if no AR is run + ch_adapterremoval_logs = Channel.empty() + //Either coming from complexity filtering, or directly use reads normally directed to clipping first and push them through to the other channels downstream! + ch_clipped_reads_complexity_filtered_poly_g.mix(ch_read_files_clip).into { ch_clipped_reads;ch_clipped_reads_for_fastqc;ch_clipped_reads_circularmapper;ch_clipped_reads_bwamem } +} else { process adapter_removal { tag "$name" publishDir "${params.outdir}/read_merging", mode: 'copy' @@ -605,7 +597,7 @@ process adapter_removal { set val(name), file(reads) from ( params.complexity_filter_poly_g ? ch_clipped_reads_complexity_filtered_poly_g : ch_read_files_clip ) output: - set val(base), file "output/*.gz" into (ch_clipped_reads,ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem) + set val(base), file("output/*.gz") into (ch_clipped_reads,ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem) file("*.settings") into ch_adapterremoval_logs script: @@ -645,6 +637,7 @@ process adapter_removal { """ } } +} @@ -659,7 +652,7 @@ process fastqc_after_clipping { when: !params.bam && !params.skip_adapterremoval input: - file(reads) from ch_clipped_reads_for_fastqc + set val(name), file(reads) from ch_clipped_reads_for_fastqc output: file "*_fastqc.{zip,html}" optional true into ch_fastqc_after_clipping @@ -682,7 +675,7 @@ process bwa { when: !params.circularmapper && !params.bwamem input: - set val(name), file(reads) from ( params.skip_adapterremoval ? ch_read_files_clip : ch_clipped_reads.mix(ch_read_files_converted_mapping_bwa) ) + set val(name), file(reads) from ch_clipped_reads.mix(ch_read_files_converted_mapping_bwa) file index from ch_bwa_index.first() @@ -753,7 +746,7 @@ process circularmapper{ when: params.circularmapper input: - set val(name), file reads from (params.skip_adapterremoval ? ch_clipped_reads : ch_clipped_reads_circularmapper.mix(ch_read_files_converted_mapping_cm) ) + set val(name), file(reads) from ch_clipped_reads_circularmapper.mix(ch_read_files_converted_mapping_cm) file index from ch_circularmapper_indices.first() output: @@ -796,7 +789,7 @@ process bwamem { when: params.bwamem && !params.circularmapper input: - set val(name), file(reads) from (params.skip_adapterremoval ? ch_clipped_reads : ch_clipped_reads_bwamem.mix(ch_read_files_converted_mapping_bwamem) ) + set val(name), file(reads) from ch_clipped_reads_bwamem.mix(ch_read_files_converted_mapping_bwamem) file index from ch_bwa_index_bwamem.first() output: From 48075bfd94c346c06897938032e03a533a2f65cd Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 4 Mar 2019 22:36:11 +0100 Subject: [PATCH 20/29] Remove if clause here --- nextflow.config | 7 ------- 1 file changed, 7 deletions(-) diff --git a/nextflow.config b/nextflow.config index 86736297f..9fcbb2404 100644 --- a/nextflow.config +++ b/nextflow.config @@ -44,13 +44,6 @@ params { custom_config_version = 'master' } -if(params.skip_adapterremoval){ - process { - withName: adapter_removal{ - executor = 'local' - } - } -} // Load base.config by default for all pipelines includeConfig 'conf/base.config' From 990140c14316fd8e47dfc961040211e01bebb32d Mon Sep 17 00:00:00 2001 From: phue Date: Tue, 5 Mar 2019 08:45:41 +0100 Subject: [PATCH 21/29] Update main.nf Co-Authored-By: apeltzer --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 7c755d9f1..05d69596f 100644 --- a/main.nf +++ b/main.nf @@ -60,7 +60,7 @@ def helpMessage() { --clip_readlength Specify read minimum length to be kept for downstream analysis --clip_min_read_quality Specify minimum base quality for not trimming off bases --min_adap_overlap Specify minimum adapter overlap - --skip_collapse Skip merging Forward and Reverse reads together. (Only for PE samples) + --skip_collapse Skip merging forward and reverse reads together. (Only for PE samples) --skip_trim Skip adaptor and quality trimming BWA Mapping From 9f45104f29d28e66a4f9ee4fb4de614c6d4b03dc Mon Sep 17 00:00:00 2001 From: phue Date: Tue, 5 Mar 2019 08:45:48 +0100 Subject: [PATCH 22/29] Update main.nf Co-Authored-By: apeltzer --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 05d69596f..b833a1cd7 100644 --- a/main.nf +++ b/main.nf @@ -604,7 +604,7 @@ process adapter_removal { base = reads[0].baseName //PE, collapse & trim reads - if( !params.singleEnd && !params.skip_collapse && !params.skip_trim && !params.skip_adapterremoval){ + if (!params.singleEnd && !params.skip_collapse && !params.skip_trim && !params.skip_adapterremoval){ """ mkdir -p output AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse From 68fa27df622c67b1483de2e058a967fb9c47ac8d Mon Sep 17 00:00:00 2001 From: phue Date: Tue, 5 Mar 2019 08:45:58 +0100 Subject: [PATCH 23/29] Update main.nf Co-Authored-By: apeltzer --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index b833a1cd7..2193d3f10 100644 --- a/main.nf +++ b/main.nf @@ -802,7 +802,7 @@ process bwamem { fasta = "${index}/*.fasta" size = "${params.large_ref}" ? '-c' : '' - if (!params.singleEnd && skip_collapse ){ + if (!params.singleEnd && params.skip_collapse){ """ bwa mem -t ${task.cpus} $fasta ${reads[0]} ${reads[1]} -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam samtools index "${size}" -@ ${task.cpus} "${prefix}".sorted.bam From b2b9189de3dc30d15ce67c691363c47a8b1e4783 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Mar 2019 08:47:16 +0100 Subject: [PATCH 24/29] Fix remaining issue with circularmapper --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 2193d3f10..ac5ca98ad 100644 --- a/main.nf +++ b/main.nf @@ -759,7 +759,7 @@ process circularmapper{ fasta = "${index}/*_*.fasta" size = "${params.large_ref}" ? '-c' : '' - if (!params.singleEnd && skip_collapse ){ + if (!params.singleEnd && params.skip_collapse ){ prefix = reads[0].toString().tokenize('.')[0] """ bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai From 5b924db9fd7fe4d98a5826dc2fa87ddd74b5e9b9 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Mar 2019 10:42:09 +0100 Subject: [PATCH 25/29] Fix cardinality issues --- main.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/main.nf b/main.nf index ac5ca98ad..59dfabd46 100644 --- a/main.nf +++ b/main.nf @@ -513,8 +513,7 @@ process convertBam { file bam from ch_bam_to_fastq_convert output: - set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_fastqc, ch_read_files_converted_fastp) - file("*.fastq.gz") into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem) + set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_fastqc, ch_read_files_converted_fastp, ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem) script: base = "${bam.baseName}" From 712535fa18b8c4d391a6baeb34be315f19f0f6c5 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Mar 2019 12:38:34 +0100 Subject: [PATCH 26/29] No need to check for AR skipping here --- main.nf | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/main.nf b/main.nf index 59dfabd46..203889f92 100644 --- a/main.nf +++ b/main.nf @@ -601,9 +601,11 @@ process adapter_removal { script: base = reads[0].baseName + //This checks whether we skip trimming and defines a variable respectively + trim_me = params.skip_trim ? '' : '' //PE, collapse & trim reads - if (!params.singleEnd && !params.skip_collapse && !params.skip_trim && !params.skip_adapterremoval){ + if (!params.singleEnd && !params.skip_collapse && !params.skip_trim){ """ mkdir -p output AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse @@ -611,14 +613,14 @@ process adapter_removal { zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > output/${base}.combined.fq.gz """ //PE, don't collapse, but trim reads - } else if (!params.singleEnd && params.skip_collapse && !params.skip_trim && !params.skip_adapterremoval) { + } else if (!params.singleEnd && params.skip_collapse && !params.skip_trim) { """ mkdir -p output AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} mv ${base}.pair*.truncated.gz output/ """ //PE, collapse, but don't trim reads - } else if (!params.singleEnd && !params.skip_collapse && params.skip_trim && !params.skip_adapterremoval) { + } else if (!params.singleEnd && !params.skip_collapse && params.skip_trim) { bogus_adaptor = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" """ mkdir -p output From f6b7313c14bd90fd8d327e35eb3a6f2764085e93 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Mar 2019 12:51:40 +0100 Subject: [PATCH 27/29] Polish PR even further --- main.nf | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/main.nf b/main.nf index 203889f92..5b807c8ef 100644 --- a/main.nf +++ b/main.nf @@ -602,13 +602,14 @@ process adapter_removal { script: base = reads[0].baseName //This checks whether we skip trimming and defines a variable respectively - trim_me = params.skip_trim ? '' : '' + trim_me = params.skip_trim ? '' : "--trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap}" + collapse_me = params.skip_collapse ? '' : '--collapse' - //PE, collapse & trim reads + //PE mode, dependent on trim_me and collapse_me the respective procedure is run or not :-) if (!params.singleEnd && !params.skip_collapse && !params.skip_trim){ """ mkdir -p output - AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse + AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} ${trim_me} --gzip --threads ${task.cpus} ${collapse_me} #Combine files zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > output/${base}.combined.fq.gz """ @@ -616,15 +617,14 @@ process adapter_removal { } else if (!params.singleEnd && params.skip_collapse && !params.skip_trim) { """ mkdir -p output - AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} ${trim_me} ${collapse_me} mv ${base}.pair*.truncated.gz output/ """ //PE, collapse, but don't trim reads } else if (!params.singleEnd && !params.skip_collapse && params.skip_trim) { - bogus_adaptor = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" """ mkdir -p output - AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --basename ${base} --collapse --adapter1 $bogus_adaptor --adapter2 $bogus_adaptor + AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --basename ${base} ${collapse_me} ${trim_me} mv ${base}.pair*.truncated.gz output/ """ @@ -632,7 +632,7 @@ process adapter_removal { //SE, collapse not possible, trim reads """ mkdir -p output - AdapterRemoval --file1 ${reads[0]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} + AdapterRemoval --file1 ${reads[0]} --basename ${base} --gzip --threads ${task.cpus} ${trim_me} mv *.truncated.gz output/ """ From cb72002746969c0ca348dd14a9470108b55b044d Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Mar 2019 12:55:47 +0100 Subject: [PATCH 28/29] Typos fixed in usage --- docs/usage.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 2d78d15d0..5a848760b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -291,8 +291,7 @@ Turns off the computation of library complexity estimation. ### `--skip_adapterremoval` -Turns off adaptor trimming and paired-end read merging. -Equivalent to setting both `--skip_collapse` and `--skip_trim` +Turns off adaptor trimming and paired-end read merging. Equivalent to setting both `--skip_collapse` and `--skip_trim`. ### `--skip_damage_calculation` @@ -340,7 +339,7 @@ Sets the minimum overlap between two reads when read merging is performed. Defau ### `--skip_collapse` -Turns off the paired-end read merging. +Turns off the paired-end read merging. For example ```bash @@ -349,9 +348,9 @@ For example ### `--skip_trim` -Turns off the adaptor and quality trimming. +Turns off adaptor and quality trimming. -For example +For example: ```bash --pairedEnd --skip_trim --reads '*.fastq' ``` From 2099a36573407a6acc115ca90b3488371990130a Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Mar 2019 12:57:20 +0100 Subject: [PATCH 29/29] Add proper changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 661f73ea6..e281225a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * [#152](https://github.com/nf-core/eager/pull/152) - Clarified `--complexity_filter` flag to be specifically for poly G trimming. * [#155](https://github.com/nf-core/eager/pull/155) - Added [Dedup log to output folders](https://github.com/nf-core/eager/issues/154) +* [#159](https://github.com/nf-core/eager/pull/159) - Added Possibility to skip AdapterRemoval, skip merging, skip trimming fixing [#64](https://github.com/nf-core/eager/issues/64),[#137](https://github.com/nf-core/eager/issues/137) - thanks to @maxibor, @jfy133 ### `Fixed`