Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix specified CPUs in piped commands #753

Merged
merged 7 commits into from
May 27, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Updated template to nf-core/tools 1.14
- [#688](https://github.com/nf-core/eager/issues/688) - Clarified the pipeline is not just for humans and microbes, but also plants and animals, and also for modern DNA
- [#751](https://github.com/nf-core/eager/pull/751) - Added missing label to mtnucratio
- General code cleanup and standarisation of parameters with no default setting
- General code cleanup and standardisation of parameters with no default setting
- [#750](https://github.com/nf-core/eager/issues/750) - Fixed piped commands requesting the same number of CPUs at each command step

### `Dependencies`

Expand Down
71 changes: 37 additions & 34 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -791,7 +791,7 @@ process adapter_removal {
mv *.settings output/

## Add R_ and L_ for unmerged reads for DeDup compatibility
AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz
AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus - 1} > output/${base}.pe.combined.fq.gz
"""
//PE mode, collapse and trim, outputting all reads, preserving 5p
} else if (seqtype == 'PE' && !params.skip_collapse && !params.skip_trim && !params.mergedonly && params.preserve5p) {
Expand All @@ -805,7 +805,7 @@ process adapter_removal {
mv *.settings output/

## Add R_ and L_ for unmerged reads for DeDup compatibility
AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz
AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus - 1} > output/${base}.pe.combined.fq.gz
"""
// PE mode, collapse and trim but only output collapsed reads
} else if ( seqtype == 'PE' && !params.skip_collapse && !params.skip_trim && params.mergedonly && !params.preserve5p ) {
Expand All @@ -816,7 +816,7 @@ process adapter_removal {
cat *.collapsed.gz *.collapsed.truncated.gz > output/${base}.pe.combined.tmp.fq.gz

## Add R_ and L_ for unmerged reads for DeDup compatibility
AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz
AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus - 1} > output/${base}.pe.combined.fq.gz

mv *.settings output/
"""
Expand All @@ -829,7 +829,7 @@ process adapter_removal {
cat *.collapsed.gz > output/${base}.pe.combined.tmp.fq.gz

## Add R_ and L_ for unmerged reads for DeDup compatibility
AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz
AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus - 1} > output/${base}.pe.combined.fq.gz

mv *.settings output/
"""
Expand All @@ -843,7 +843,7 @@ process adapter_removal {
cat *.collapsed.gz *.pair1.truncated.gz *.pair2.truncated.gz > output/${base}.pe.combined.tmp.fq.gz

## Add R_ and L_ for unmerged reads for DeDup compatibility
AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz
AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus - 1} > output/${base}.pe.combined.fq.gz

mv *.settings output/
"""
Expand All @@ -857,7 +857,7 @@ process adapter_removal {
cat *.collapsed.gz > output/${base}.pe.combined.tmp.fq.gz

## Add R_ and L_ for unmerged reads for DeDup compatibility
AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz
AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus - 1} > output/${base}.pe.combined.fq.gz

mv *.settings output/
"""
Expand Down Expand Up @@ -1162,14 +1162,14 @@ process bwa {
"""
bwa aln -t ${task.cpus} $fasta ${r1} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -o ${params.bwaalno} -f ${libraryid}.r1.sai
bwa aln -t ${task.cpus} $fasta ${r2} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -o ${params.bwaalno} -f ${libraryid}.r2.sai
bwa sampe -r "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" $fasta ${libraryid}.r1.sai ${libraryid}.r2.sai ${r1} ${r2} | samtools sort -@ ${task.cpus} -O bam - > ${libraryid}_"${seqtype}".mapped.bam
bwa sampe -r "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" $fasta ${libraryid}.r1.sai ${libraryid}.r2.sai ${r1} ${r2} | samtools sort -@ ${task.cpus - 1} -O bam - > ${libraryid}_"${seqtype}".mapped.bam
samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size}
"""
} else {
//PE collapsed, or SE data
"""
bwa aln -t ${task.cpus} ${fasta} ${r1} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -o ${params.bwaalno} -f ${libraryid}.sai
bwa samse -r "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" $fasta ${libraryid}.sai $r1 | samtools sort -@ ${task.cpus} -O bam - > "${libraryid}"_"${seqtype}".mapped.bam
bwa samse -r "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" $fasta ${libraryid}.sai $r1 | samtools sort -@ ${task.cpus - 1} -O bam - > "${libraryid}"_"${seqtype}".mapped.bam
samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size}
"""
}
Expand All @@ -1194,17 +1194,18 @@ process bwamem {
params.mapper == 'bwamem'

script:
def split_cpus = Math.floor(task.cpus/2)
def fasta = "${index}/${fasta_base}"
def size = params.large_ref ? '-c' : ''

if (!params.single_end && params.skip_collapse){
"""
bwa mem -t ${task.cpus} $fasta $r1 $r2 -R "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" | samtools sort -@ ${task.cpus} -O bam - > "${libraryid}"_"${seqtype}".mapped.bam
bwa mem -t ${split_cpus} $fasta $r1 $r2 -R "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" | samtools sort -@ ${split_cpus} -O bam - > "${libraryid}"_"${seqtype}".mapped.bam
samtools index ${size} -@ ${task.cpus} "${libraryid}".mapped.bam
"""
} else {
"""
bwa mem -t ${task.cpus} $fasta $r1 -R "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" | samtools sort -@ ${task.cpus} -O bam - > "${libraryid}"_"${seqtype}".mapped.bam
bwa mem -t ${split_cpus} $fasta $r1 -R "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" | samtools sort -@ ${split_cpus} -O bam - > "${libraryid}"_"${seqtype}".mapped.bam
samtools index -@ ${task.cpus} "${libraryid}"_"${seqtype}".mapped.bam ${size}
"""
}
Expand Down Expand Up @@ -1302,6 +1303,7 @@ process bowtie2 {
params.mapper == 'bowtie2'

script:
def split_cpus = Math.floor(task.cpus/2)
def size = params.large_ref ? '-c' : ''
def fasta = "${index}/${fasta_base}"
def trim5 = params.bt2_trim5 != 0 ? "--trim5 ${params.bt2_trim5}" : ""
Expand Down Expand Up @@ -1345,13 +1347,13 @@ process bowtie2 {
//PE data without merging, PE data without any AR applied
if ( seqtype == 'PE' && ( params.skip_collapse || params.skip_adapterremoval ) ){
"""
bowtie2 -x ${fasta} -1 ${r1} -2 ${r2} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --maxins ${params.bt2_maxins} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:illumina --rg PU:ILLUMINA-${libraryid}-${seqtype} 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam
bowtie2 -x ${fasta} -1 ${r1} -2 ${r2} -p ${split_cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --maxins ${params.bt2_maxins} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:illumina --rg PU:ILLUMINA-${libraryid}-${seqtype} 2> "${libraryid}"_bt2.log | samtools sort -@ ${split_cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam
samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size}
jfy133 marked this conversation as resolved.
Show resolved Hide resolved
"""
} else {
//PE collapsed, or SE data
"""
bowtie2 -x ${fasta} -U ${r1} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:illumina --rg PU:ILLUMINA-${libraryid}-${seqtype} 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam
bowtie2 -x ${fasta} -U ${r1} -p ${split_cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:illumina --rg PU:ILLUMINA-${libraryid}-${seqtype} 2> "${libraryid}"_bt2.log | samtools sort -@ ${split_cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam
samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size}
jfy133 marked this conversation as resolved.
Show resolved Hide resolved
"""
}
Expand Down Expand Up @@ -1540,6 +1542,7 @@ process samtools_filter {

// Using shell block rather than script because we are playing with awk
shell:

size = !{params.large_ref} ? '-c' : ''

// Unmapped/MAPQ Filtering WITHOUT min-length filtering
Expand All @@ -1555,28 +1558,28 @@ process samtools_filter {
'''
} else if ( "${params.bam_unmapped_type}" == "bam" && params.bam_filter_minreadlength == 0 ){
'''
samtools view -h !{bam} | samtools view - -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam
samtools view -h !{bam} | samtools view - -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam
samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam
samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam
samtools index !{libraryid}.filtered.bam !{size}
'''
} else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength == 0 ){
'''
samtools view -h !{bam} | samtools view - -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam
samtools view -h !{bam} | samtools view - -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam
samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam
samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam
samtools index !{libraryid}.filtered.bam !{size}

## FASTQ
samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus} > !{libraryid}.unmapped.fastq.gz
samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus - 1} > !{libraryid}.unmapped.fastq.gz
rm !{libraryid}.unmapped.bam
'''
} else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength == 0 ){
'''
samtools view -h !{bam} | samtools view - -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam
samtools view -h !{bam} | samtools view - -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam
samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam
samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam
samtools index !{libraryid}.filtered.bam !{size}

## FASTQ
samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus} > !{libraryid}.unmapped.fastq.gz
samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus -1} > !{libraryid}.unmapped.fastq.gz
'''
// Unmapped/MAPQ Filtering WITH min-length filtering
} else if ( "${params.bam_unmapped_type}" == "keep" && params.bam_filter_minreadlength != 0 ) {
Expand All @@ -1593,26 +1596,26 @@ process samtools_filter {
'''
} else if ( "${params.bam_unmapped_type}" == "bam" && params.bam_filter_minreadlength != 0 ){
'''
samtools view -h !{bam} | samtools view - -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam
samtools view -h !{bam} | samtools view - -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam
samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam
samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam
filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam
samtools index !{libraryid}.filtered.bam !{size}
'''
} else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength != 0 ){
'''
samtools view -h !{bam} | samtools view - -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam
samtools view -h !{bam} | samtools view - -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam
samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam
samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam
filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam
samtools index !{libraryid}.filtered.bam !{size}

## FASTQ
samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus} > !{libraryid}.unmapped.fastq.gz
samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus - 1} > !{libraryid}.unmapped.fastq.gz
rm !{libraryid}.unmapped.bam
'''
} else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength != 0 ){
'''
samtools view -h !{bam} | samtools view - -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam
samtools view -h !{bam} | samtools view - -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam
samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam
samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam
filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam
samtools index !{libraryid}.filtered.bam !{size}

Expand Down Expand Up @@ -1936,8 +1939,8 @@ process bedtools {

script:
"""
bedtools coverage -nonamecheck -a ${anno_file} -b $bam | pigz -p ${task.cpus} > "${bam.baseName}".breadth.gz
bedtools coverage -nonamecheck -a ${anno_file} -b $bam -mean | pigz -p ${task.cpus} > "${bam.baseName}".depth.gz
bedtools coverage -nonamecheck -a ${anno_file} -b $bam | pigz -p ${task.cpus - 1} > "${bam.baseName}".breadth.gz
bedtools coverage -nonamecheck -a ${anno_file} -b $bam -mean | pigz -p ${task.cpus - 1} > "${bam.baseName}".depth.gz
"""
}

Expand Down Expand Up @@ -2006,7 +2009,7 @@ process mapdamage_rescaling {
// Optionally perform further aDNA evaluation or filtering for just reads with damage etc.

process pmdtools {
label 'mc_small'
label 'mc_medium'
tag "${libraryid}"
publishDir "${params.outdir}/pmdtools", mode: params.publish_dir_mode

Expand All @@ -2033,12 +2036,12 @@ process pmdtools {
def platypus = params.pmdtools_platypus ? '--platypus' : ''
"""
#Run Filtering step
samtools calmd -b ${bam} ${fasta} | samtools view -h - | pmdtools --threshold ${params.pmdtools_threshold} ${treatment} ${snpcap} --header | samtools view -@ ${task.cpus} -Sb - > "${libraryid}".pmd.bam
samtools calmd -b ${bam} ${fasta} | pmdtools --threshold ${params.pmdtools_threshold} ${treatment} ${snpcap} --header | samtools view -@ ${task.cpus - 2} -Sb - > "${libraryid}".pmd.bam

#Run Calc Range step
## To allow early shut off of pipe: https://github.com/nextflow-io/nextflow/issues/1564
trap 'if [[ \$? == 141 ]]; then echo "Shutting samtools early due to -n parameter" && samtools index ${libraryid}.pmd.bam ${size}; exit 0; fi' EXIT
samtools calmd -b ${bam} ${fasta} | samtools view -h - | pmdtools --deamination ${platypus} --range ${params.pmdtools_range} ${treatment} ${snpcap} -n ${params.pmdtools_max_reads} > "${libraryid}".cpg.range."${params.pmdtools_range}".txt
samtools calmd -b ${bam} ${fasta} | pmdtools --deamination ${platypus} --range ${params.pmdtools_range} ${treatment} ${snpcap} -n ${params.pmdtools_max_reads} > "${libraryid}".cpg.range."${params.pmdtools_range}".txt

echo "Running indexing"
samtools index ${libraryid}.pmd.bam ${size}
Expand Down Expand Up @@ -2573,7 +2576,7 @@ process sexdeterrmine_prep {

// As we collect all files for a single sex_deterrmine run, we DO NOT use the normal input/output tuple
process sexdeterrmine {
label 'sc_small'
label 'mc_small'
publishDir "${params.outdir}/sex_determination", mode: params.publish_dir_mode

input:
Expand Down Expand Up @@ -2909,7 +2912,7 @@ process output_documentation {
*/

process get_software_versions {
label 'sc_tiny'
label 'mc_small'
publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode,
saveAs: { filename ->
if (filename.indexOf(".csv") > 0) filename
Expand Down