diff --git a/CHANGELOG.md b/CHANGELOG.md index eb85bd4ba..3b3f1b640 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - [#750](https://github.com/nf-core/eager/issues/750) - Fixed piped commands requesting the same number of CPUs at each command step - [#757](https://github.com/nf-core/eager/issues/757) - Removed confusing 'Data Type' variable from MultiQC workflow summary (not consistent with TSV input) - [#759](https://github.com/nf-core/eager/pull/759) - Fixed malformed software scraping regex that resulted in N/A in MultiQC report +- [#761](https://github.com/nf-core/eager/pull/759) - Fixed issues related to instability of samtools filtering related CI tests ### `Dependencies` diff --git a/main.nf b/main.nf index b0c3e0d3b..9dcac94ee 100644 --- a/main.nf +++ b/main.nf @@ -1540,87 +1540,87 @@ process samtools_filter { tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.unmapped.bam") optional true // Using shell block rather than script because we are playing with awk - shell: + script: - size = !{params.large_ref} ? '-c' : '' + def size = params.large_ref ? '-c' : '' // Unmapped/MAPQ Filtering WITHOUT min-length filtering if ( "${params.bam_unmapped_type}" == "keep" && params.bam_filter_minreadlength == 0 ) { - ''' - samtools view -h -b !{bam} -@ !{task.cpus} -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam - samtools index !{libraryid}.filtered.bam !{size} - ''' + """ + samtools view -h ${bam} -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} -b > ${libraryid}.filtered.bam + samtools index ${libraryid}.filtered.bam ${size} + """ } else if ( "${params.bam_unmapped_type}" == "discard" && params.bam_filter_minreadlength == 0 ){ - ''' - samtools view -h -b !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam - samtools index !{libraryid}.filtered.bam !{size} - ''' + """ + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > ${libraryid}.filtered.bam + samtools index ${libraryid}.filtered.bam ${size} + """ } else if ( "${params.bam_unmapped_type}" == "bam" && params.bam_filter_minreadlength == 0 ){ - ''' - samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam - samtools index !{libraryid}.filtered.bam !{size} - ''' + """ + samtools view -h ${bam} -@ ${task.cpus} -f4 -b > ${libraryid}.unmapped.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > ${libraryid}.filtered.bam + samtools index ${libraryid}.filtered.bam ${size} + """ } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength == 0 ){ - ''' - samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam - samtools index !{libraryid}.filtered.bam !{size} + """ + samtools view -h ${bam} -@ ${task.cpus} -f4 -b > ${libraryid}.unmapped.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > ${libraryid}.filtered.bam + samtools index ${libraryid}.filtered.bam ${size} ## FASTQ - samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus - 1} > !{libraryid}.unmapped.fastq.gz - rm !{libraryid}.unmapped.bam - ''' + samtools fastq -tn ${libraryid}.unmapped.bam | pigz -p ${task.cpus - 1} > ${libraryid}.unmapped.fastq.gz + rm ${libraryid}.unmapped.bam + """ } else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength == 0 ){ - ''' - samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam - samtools index !{libraryid}.filtered.bam !{size} + """ + samtools view -h ${bam} -@ ${task.cpus} -f4 -b > ${libraryid}.unmapped.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > ${libraryid}.filtered.bam + samtools index ${libraryid}.filtered.bam ${size} ## FASTQ - samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus -1} > !{libraryid}.unmapped.fastq.gz - ''' + samtools fastq -tn ${libraryid}.unmapped.bam | pigz -p ${task.cpus -1} > ${libraryid}.unmapped.fastq.gz + """ // Unmapped/MAPQ Filtering WITH min-length filtering } else if ( "${params.bam_unmapped_type}" == "keep" && params.bam_filter_minreadlength != 0 ) { - ''' - samtools view -h -b !{bam} -@ !{task.cpus} -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam - filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam - samtools index !{libraryid}.filtered.bam !{size} - ''' + """ + samtools view -h ${bam} -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} -b > tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam + samtools index ${libraryid}.filtered.bam ${size} + """ } else if ( "${params.bam_unmapped_type}" == "discard" && params.bam_filter_minreadlength != 0 ){ - ''' - samtools view -h -b !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam - filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam - samtools index !{libraryid}.filtered.bam !{size} - ''' + """ + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam + samtools index ${libraryid}.filtered.bam ${size} + """ } else if ( "${params.bam_unmapped_type}" == "bam" && params.bam_filter_minreadlength != 0 ){ - ''' - samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam - filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam - samtools index !{libraryid}.filtered.bam !{size} - ''' + """ + samtools view -h ${bam} -@ ${task.cpus} -f4 -b > ${libraryid}.unmapped.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam + samtools index ${libraryid}.filtered.bam ${size} + """ } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength != 0 ){ - ''' - samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam - filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam - samtools index !{libraryid}.filtered.bam !{size} + """ + samtools view -h ${bam} -@ ${task.cpus} -f4 -b > ${libraryid}.unmapped.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam + samtools index ${libraryid}.filtered.bam ${size} ## FASTQ - samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus - 1} > !{libraryid}.unmapped.fastq.gz - rm !{libraryid}.unmapped.bam - ''' + samtools fastq -tn ${libraryid}.unmapped.bam | pigz -p ${task.cpus - 1} > ${libraryid}.unmapped.fastq.gz + rm ${libraryid}.unmapped.bam + """ } else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength != 0 ){ - ''' - samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam - filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam - samtools index !{libraryid}.filtered.bam !{size} + """ + samtools view -h ${bam} -@ ${task.cpus} -f4 -b > ${libraryid}.unmapped.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam + samtools index ${libraryid}.filtered.bam ${size} ## FASTQ - samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus} > !{libraryid}.unmapped.fastq.gz - ''' + samtools fastq -tn ${libraryid}.unmapped.bam | pigz -p ${task.cpus} > ${libraryid}.unmapped.fastq.gz + """ } }