Skip to content

Commit

Permalink
Merge pull request #111 from rki-mf1/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
hoelzer authored Nov 8, 2024
2 parents 6385b01 + 61c9f31 commit 3bde915
Show file tree
Hide file tree
Showing 12 changed files with 158 additions and 32 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## unreleased

### Added

- `bwa mem` as short-read mapper alternative, parameter: `--bwa`

## [v1.0.2] - 2024-05-17

### Added
Expand Down
4 changes: 4 additions & 0 deletions CITATIONS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@

> Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824.
- [BWA](https://arxiv.org/abs/1303.3997)

> Li H. (2013) Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. arXiv:1303.3997v2 [q-bio.GN]
- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)

- [Minimap2](https://pubmed.ncbi.nlm.nih.gov/29750242/)
Expand Down
3 changes: 2 additions & 1 deletion clean.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Author: [email protected]

// Parameters sanity checking

Set valid_params = ['max_cores', 'cores', 'max_memory', 'memory', 'profile', 'help', 'input', 'input_type', 'list', 'host', 'own', 'control', 'keep', 'rm_rrna', 'bbduk', 'bbduk_kmer', 'bbduk_qin', 'reads_rna', 'min_clip', 'dcs_strict', 'output', 'multiqc_dir', 'nf_runinfo_dir', 'databases', 'cleanup_work_dir','condaCacheDir', 'singularityCacheDir', 'singularityCacheDir', 'cloudProcess', 'conda-cache-dir', 'singularity-cache-dir', 'cloud-process', 'publish_dir_mode', 'no_intermediate', 'skip_qc'] // don't ask me why there is also 'conda-cache-dir', 'singularity-cache-dir', 'cloud-process'
Set valid_params = ['max_cores', 'cores', 'max_memory', 'memory', 'profile', 'help', 'input', 'input_type', 'list', 'host', 'own', 'control', 'keep', 'rm_rrna', 'bwa', 'bbduk', 'bbduk_kmer', 'bbduk_qin', 'reads_rna', 'min_clip', 'dcs_strict', 'output', 'multiqc_dir', 'nf_runinfo_dir', 'databases', 'cleanup_work_dir','condaCacheDir', 'singularityCacheDir', 'singularityCacheDir', 'cloudProcess', 'conda-cache-dir', 'singularity-cache-dir', 'cloud-process', 'publish_dir_mode', 'no_intermediate', 'skip_qc'] // don't ask me why there is also 'conda-cache-dir', 'singularity-cache-dir', 'cloud-process'
def parameter_diff = params.keySet() - valid_params
if (parameter_diff.size() != 0){
exit 1, "ERROR: Parameter(s) $parameter_diff is/are not valid in the pipeline!\n"
Expand Down Expand Up @@ -275,6 +275,7 @@ def helpMSG() {
Reads are assigned to a combined index for decontamination and keeping. The use of this parameter can prevent
false positive hits and the accidental removal of reads due to (poor quality) mappings.
${c_green}--rm_rrna ${c_reset} Clean your data from rRNA [default: $params.rm_rrna]
${c_green}--bwa${c_reset} Add this flag to use BAW MEM instead of minimap2 for decontamination of short reads [default: $params.bwa]
${c_green}--bbduk${c_reset} Add this flag to use bbduk instead of minimap2 for decontamination of short reads [default: $params.bbduk]
${c_green}--bbduk_kmer${c_reset} Set kmer for bbduk [default: $params.bbduk_kmer]
${c_green}--bbduk_qin${c_reset} Set quality ASCII encoding for bbduk [default: $params.bbduk_qin; options are: 64, 33, auto]
Expand Down
19 changes: 10 additions & 9 deletions configs/container.config
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
process {
withLabel: smallTask { container = 'nanozoo/samtools:1.14--d8fb865' }
withLabel: minimap2 { container = 'nanozoo/minimap2:2.26--d9ef6b6' }
withLabel: bbmap { container = 'nanozoo/bbmap:38.79--8e915d7' }
withLabel: multiqc { container = 'nanozoo/multiqc:1.9--aba729b' }
withLabel: fastqc { container = 'nanozoo/fastqc:0.11.9--f61b8b4' }
withLabel: nanoplot { container = 'nanozoo/nanoplot:1.32.0--1ae6f5d' }
withLabel: quast { container = 'nanozoo/quast:5.0.2--e7f0cfe' }
withLabel: bed_samtools { container = 'nanozoo/bed_samtools:2.30.0--cc7d1b9' }
withLabel: seqkit { container = 'nanozoo/seqkit:2.6.1--022e008' }
withLabel: smallTask { container = 'nanozoo/samtools:1.14--d8fb865' }
withLabel: minimap2 { container = 'nanozoo/minimap2:2.26--d9ef6b6' }
withLabel: bwa { container = 'nanozoo/bwa:0.7.18--0eff897' }
withLabel: bbmap { container = 'nanozoo/bbmap:38.79--8e915d7' }
withLabel: multiqc { container = 'nanozoo/multiqc:1.9--aba729b' }
withLabel: fastqc { container = 'nanozoo/fastqc:0.11.9--f61b8b4' }
withLabel: nanoplot { container = 'nanozoo/nanoplot:1.32.0--1ae6f5d' }
withLabel: quast { container = 'nanozoo/quast:5.0.2--e7f0cfe' }
withLabel: bed_samtools { container = 'nanozoo/bed_samtools:2.30.0--cc7d1b9' }
withLabel: seqkit { container = 'nanozoo/seqkit:2.6.1--022e008' }
}
17 changes: 9 additions & 8 deletions configs/node.config
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
process {
withLabel: minimap2 { cpus = 24; memory = 24.GB }
withLabel: bbmap { cpus = 24; memory = 24.GB }
withLabel: smallTask { cpus = 1; memory = 2.GB }
withLabel: pysam { cpus = 2; memory = 4.GB }
withLabel: fastqc { cpus = {2 * task.attempt}; memory = {4.GB * task.attempt } ; maxRetries = 3 ; errorStrategy = { task.exitStatus in 130..140 ? 'retry' : 'terminate' } }
withLabel: multiqc { cpus = 4; memory = 4.GB }
withLabel: nanoplot{ cpus = 8; memory = 8.GB }
withLabel: quast{ cpus = 8; memory = 8.GB }
withLabel: minimap2 { cpus = 24; memory = {24.GB * task.attempt}; maxRetries = 4 ; errorStrategy = { task.exitStatus in 1 || 130..140 ? 'retry' : 'terminate' }; }
withLabel: bwa { cpus = 24; memory = {24.GB * task.attempt}; maxRetries = 3 ; errorStrategy = { task.exitStatus in 130..140 ? 'retry' : 'terminate' } }
withLabel: bbmap { cpus = 24; memory = {24.GB * task.attempt}; maxRetries = 6 ; errorStrategy = { task.exitStatus in 1 || 130..140 ? 'retry' : 'terminate' }; }
withLabel: smallTask { cpus = 1; memory = 2.GB }
withLabel: pysam { cpus = 2; memory = 4.GB }
withLabel: fastqc { cpus = 2; memory = {4.GB * task.attempt } ; maxRetries = 3 ; errorStrategy = { task.exitStatus in 130..140 ? 'retry' : 'terminate' }; }
withLabel: multiqc { cpus = 4; memory = 4.GB }
withLabel: nanoplot { cpus = 8; memory = 8.GB }
withLabel: quast { cpus = 8; memory = 8.GB }
}

8 changes: 8 additions & 0 deletions envs/bwa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
name: bwa
channels:
- bioconda
- conda-forge
dependencies:
- bwa=0.7.18
- samtools=1.20
- htslib=1.20
52 changes: 52 additions & 0 deletions modules/bwa.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
process bwa_index {
label 'bwa'

input:
path(fasta)

output:
path(bwa) , emit: index

script:
"""
mkdir bwa
bwa \\
index \\
-p bwa/db \\
$fasta
"""

stub:
"""
mkdir bwa
touch bwa/db.{amb,ann,bwt,pac,sa}
"""
}

process bwa {
label 'bwa'

input:
tuple val(name), path(input)
path(db_index)
path(db)


output:
tuple val(name), val('raw'), path("${name}.bam"), emit: bam // input just for naming

script:
"""
INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'`
bwa mem \\
-t $task.cpus \\
\$INDEX \\
$input \\
| samtools view -bhS -@ $task.cpus > ${name}.bam
"""
stub:
"""
touch ${name}.bam
"""
}
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ params {
control = false
keep = false
rm_rrna = false
bwa = false
bbduk = false
bbduk_kmer = 27
bbduk_qin = 'auto'
Expand Down
1 change: 1 addition & 0 deletions tests/fasta/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ nextflow_pipeline {
script "../../clean.nf"

test("Stub") {
tag "minimap2"
options "-stub-run"

when {
Expand Down
60 changes: 52 additions & 8 deletions tests/illumina/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@ nextflow_pipeline {
script "../../clean.nf"

test("Stub paired-end") {
tag "minimap2"
options "-stub-run"

when {
params {
input_type = "illumina"
input = "$projectDir/assets/EMPTY_FILE_{R1,R2}.fastq"
input_type = "illumina"
input = "$projectDir/assets/EMPTY_FILE_{R1,R2}.fastq"
own = "$projectDir/assets/EMPTY_FILE"
}
}
Expand All @@ -22,12 +23,13 @@ nextflow_pipeline {
}

test("Stub single-end") {
tag "minimap2"
options "-stub-run"

when {
params {
input_type = "illumina_single_end"
input = "$projectDir/assets/EMPTY_FILE"
input_type = "illumina_single_end"
input = "$projectDir/assets/EMPTY_FILE"
own = "$projectDir/assets/EMPTY_FILE"
}
}
Expand All @@ -39,13 +41,54 @@ nextflow_pipeline {
}
}

test("Stub paired-end bwa") {
tag "bwa"
options "-stub-run"

when {
params {
input_type = "illumina"
input = "$projectDir/assets/EMPTY_FILE_{R1,R2}.fastq"
own = "$projectDir/assets/EMPTY_FILE"
bwa = true
}
}

then {
assertAll(
{ assert workflow.success }
)
}
}

test("Stub single-end bwa") {
tag "bwa"
options "-stub-run"

when {
params {
input_type = "illumina_single_end"
input = "$projectDir/assets/EMPTY_FILE"
own = "$projectDir/assets/EMPTY_FILE"
bwa = true
}
}

then {
assertAll(
{ assert workflow.success }
)
}
}

test("Stub paired-end bbduk") {
tag "bbduk"
options "-stub-run"

when {
params {
input_type = "illumina"
input = "$projectDir/assets/EMPTY_FILE_{R1,R2}.fastq"
input_type = "illumina"
input = "$projectDir/assets/EMPTY_FILE_{R1,R2}.fastq"
own = "$projectDir/assets/EMPTY_FILE"
bbduk = true
}
Expand All @@ -59,12 +102,13 @@ nextflow_pipeline {
}

test("Stub single-end bbduk") {
tag "bbduk"
options "-stub-run"

when {
params {
input_type = "illumina_single_end"
input = "$projectDir/assets/EMPTY_FILE"
input_type = "illumina_single_end"
input = "$projectDir/assets/EMPTY_FILE"
own = "$projectDir/assets/EMPTY_FILE"
bbduk = true
}
Expand Down
1 change: 1 addition & 0 deletions tests/nanopore/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ nextflow_pipeline {
script "../../clean.nf"

test("Stub") {
tag "minimap2"
options "-stub-run"

when {
Expand Down
18 changes: 12 additions & 6 deletions workflows/clean_wf.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
include { minimap2 } from '../modules/minimap2'
include { bwa_index; bwa } from '../modules/bwa'
include { bbduk } from '../modules/bbmap'
include { bbduk_stats } from '../modules/utils'
include { split_bam; fastq_from_bam ; idxstats_from_bam ; flagstats_from_bam ; index_bam as index_bam; index_bam as index_bam2; sort_bam ; filter_true_dcs_alignments ; merge_bam as merge_bam1 ; merge_bam as merge_bam2 ; merge_bam as merge_bam3 ; merge_bam as merge_bam4 ; filter_soft_clipped_alignments } from '../modules/alignment_processing'
Expand All @@ -22,11 +23,16 @@ workflow clean {
out_reads = bbduk.out.cleaned_reads.concat(bbduk.out.contaminated_reads)
bams_bai = Channel.empty()
sort_bam_ch = Channel.empty()
}
}
else {
minimap2(input, contamination) | sort_bam | index_bam | ( idxstats_from_bam & flagstats_from_bam )

split_bam(minimap2.out.bam)
if ( params.bwa ) {
bwa_index(contamination)
bwa(input, bwa_index.out, contamination) | sort_bam | index_bam | ( idxstats_from_bam & flagstats_from_bam )
split_bam(bwa.out.bam)
} else {
minimap2(input, contamination) | sort_bam | index_bam | ( idxstats_from_bam & flagstats_from_bam )
split_bam(minimap2.out.bam)
}
contamination_bam = split_bam.out.mapped
cleaned_bam = split_bam.out.unmapped
if ( params.control && 'dcs' in params.control.split(',') && params.dcs_strict ) {
Expand All @@ -51,7 +57,7 @@ workflow clean {
idxstats = idxstats_from_bam.out
flagstats = flagstats_from_bam.out
out_reads = fastq_from_bam.out
sort_bam_ch = sort_bam.out
sort_bam_ch = sort_bam.out
}

emit:
Expand All @@ -60,5 +66,5 @@ workflow clean {
flagstats
out_reads
bams_bai
sort_bam_ch
sort_bam_ch
}

0 comments on commit 3bde915

Please sign in to comment.