Skip to content

Commit

Permalink
Merge pull request #159 from nf-core/ar_optional
Browse files Browse the repository at this point in the history
Flexible AdapterRemoval
  • Loading branch information
apeltzer authored Mar 5, 2019
2 parents 5a255fd + 5a637c9 commit 85b0406
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 36 deletions.
6 changes: 6 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ script:
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --saveReference
# Run the basic pipeline with single end data (pretending its single end actually)
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --singleEnd --bwa_index results/reference_genome/bwa_index/bwa_index/
# Run the basic pipeline with paired end data without collapsing
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_collapse --saveReference
# Run the basic pipeline with paired end data without trimming
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_trim --saveReference
# Run the basic pipeline with paired end data without adapterRemoval
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_adapterremoval --saveReference
# Run the same pipeline testing optional step: fastp, complexity
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --complexity_filter --bwa_index results/reference_genome/bwa_index/bwa_index/
# Test BAM Trimming
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

* [#152](https://github.com/nf-core/eager/pull/152) - Clarified `--complexity_filter` flag to be specifically for poly G trimming.
* [#155](https://github.com/nf-core/eager/pull/155) - Added [Dedup log to output folders](https://github.com/nf-core/eager/issues/154)
* [#159](https://github.com/nf-core/eager/pull/159) - Added Possibility to skip AdapterRemoval, skip merging, skip trimming fixing [#64](https://github.com/nf-core/eager/issues/64),[#137](https://github.com/nf-core/eager/issues/137) - thanks to @maxibor, @jfy133

### `Fixed`

Expand Down
24 changes: 23 additions & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -283,12 +283,16 @@ This part of the documentation contains a list of user-adjustable parameters in

## Step skipping parameters

Some of the steps in the pipeline can be executed optionally. If you specify specific steps to be skipped, there won't be any output related to these modules.
Some of the steps in the pipeline can be executed optionally. If you specify specific steps to be skipped, there won't be any output related to these modules.

### `--skip_preseq`

Turns off the computation of library complexity estimation.

### `--skip_adapterremoval`

Turns off adaptor trimming and paired-end read merging. Equivalent to setting both `--skip_collapse` and `--skip_trim`.

### `--skip_damage_calculation`

Turns off the DamageProfiler module to compute DNA damage profiles.
Expand Down Expand Up @@ -333,6 +337,24 @@ Defines the minimum read quality per base that is required for a base to be kept
### `--clip_min_adap_overlap` 1
Sets the minimum overlap between two reads when read merging is performed. Default is set to `1` base overlap.

### `--skip_collapse`

Turns off the paired-end read merging.

For example
```bash
--pairedEnd --skip_collapse --reads '*.fastq'
```

### `--skip_trim`

Turns off adaptor and quality trimming.

For example:
```bash
--pairedEnd --skip_trim --reads '*.fastq'
```

## Read Mapping Parameters

## BWA (default)
Expand Down
146 changes: 111 additions & 35 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def helpMessage() {
--saveReference Saves reference genome indices for later reusage
Skipping Skip any of the mentioned steps
--skip_adapterremoval
--skip_preseq
--skip_damage_calculation
--skip_qualimap
Expand All @@ -59,6 +60,8 @@ def helpMessage() {
--clip_readlength Specify read minimum length to be kept for downstream analysis
--clip_min_read_quality Specify minimum base quality for not trimming off bases
--min_adap_overlap Specify minimum adapter overlap
--skip_collapse Skip merging forward and reverse reads together. (Only for PE samples)
--skip_trim Skip adaptor and quality trimming
BWA Mapping
--bwaalnn Specify the -n parameter for BWA aln.
Expand Down Expand Up @@ -145,6 +148,7 @@ params.email = false
params.plaintext_email = false

// Skipping parts of the pipeline for impatient users
params.skip_adapterremoval = false
params.skip_preseq = false
params.skip_damage_calculation = false
params.skip_qualimap = false
Expand All @@ -160,6 +164,8 @@ params.clip_reverse_adaptor = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA"
params.clip_readlength = 30
params.clip_min_read_quality = 20
params.min_adap_overlap = 1
params.skip_collapse = false
params.skip_trim = false

//Read mapping parameters (default = BWA aln default)
params.bwaalnn = 0.04
Expand Down Expand Up @@ -258,6 +264,10 @@ if( params.singleEnd || params.pairedEnd || params.bam){
exit 1, "Please specify either --singleEnd, --pairedEnd to execute the pipeline on FastQ files and --bam for previously processed BAM files!"
}

//Validate that skip_collapse is only set to True for pairedEnd reads!
if (params.skip_collapse && params.singleEnd){
exit 1, "--skip_collapse can only be set for pairedEnd samples!"
}

//AWSBatch sanity checking
if(workflow.profile == 'awsbatch'){
Expand Down Expand Up @@ -343,6 +353,8 @@ summary['Fasta Ref'] = params.fasta
summary['BAM Index Type'] = (params.large_ref == "") ? 'BAI' : 'CSI'
if(params.bwa_index) summary['BWA Index'] = params.bwa_index
summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End'
summary['Skip Collapsing'] = params.skip_collapse ? 'Yes' : 'No'
summary['Skip Trimming'] = params.skip_trim ? 'Yes' : 'No'
summary['Max Memory'] = params.max_memory
summary['Max CPUs'] = params.max_cpus
summary['Max Time'] = params.max_time
Expand All @@ -362,7 +374,7 @@ if(workflow.profile == 'awsbatch'){
summary['AWS Queue'] = params.awsqueue
}
if(params.email) summary['E-mail Address'] = params.email
log.info summary.collect { k,v -> "${k.padRight(15)}: $v" }.join("\n")
log.info summary.collect { k,v -> "${k.padRight(35)}: $v" }.join("\n")
log.info "========================================="


Expand Down Expand Up @@ -501,8 +513,7 @@ process convertBam {
file bam from ch_bam_to_fastq_convert

output:
set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_fastqc, ch_read_files_converted_fastp)
file("*.fastq.gz") into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem)
set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_fastqc, ch_read_files_converted_fastp, ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem)

script:
base = "${bam.baseName}"
Expand Down Expand Up @@ -568,60 +579,87 @@ process fastp {
/*
* STEP 2 - Adapter Clipping / Read Merging
*/


//Initialize empty channel if we skip adapterremoval entirely
if(params.skip_adapterremoval) {
//No logs if no AR is run
ch_adapterremoval_logs = Channel.empty()
//Either coming from complexity filtering, or directly use reads normally directed to clipping first and push them through to the other channels downstream!
ch_clipped_reads_complexity_filtered_poly_g.mix(ch_read_files_clip).into { ch_clipped_reads;ch_clipped_reads_for_fastqc;ch_clipped_reads_circularmapper;ch_clipped_reads_bwamem }
} else {
process adapter_removal {
tag "$name"
publishDir "${params.outdir}/read_merging", mode: 'copy'

when: !params.bam
when: !params.bam && !params.skip_adapterremoval

input:
set val(name), file(reads) from ( params.complexity_filter_poly_g ? ch_clipped_reads_complexity_filtered_poly_g : ch_read_files_clip )

output:
file "*.combined*.gz" into (ch_clipped_reads, ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem)
file "*.settings" into ch_adapterremoval_logs
set val(base), file("output/*.gz") into (ch_clipped_reads,ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem)
file("*.settings") into ch_adapterremoval_logs

script:
prefix = reads[0].toString() - ~/(_R1)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/
//Readprefixing only required for PE data with merging
fixprefix = (params.singleEnd) ? "" : "AdapterRemovalFixPrefix ${prefix}.combined.fq.gz ${prefix}.combined.prefixed.fq.gz"
base = reads[0].baseName
//This checks whether we skip trimming and defines a variable respectively
trim_me = params.skip_trim ? '' : "--trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap}"
collapse_me = params.skip_collapse ? '' : '--collapse'

if( !params.singleEnd ){
//PE mode, dependent on trim_me and collapse_me the respective procedure is run or not :-)
if (!params.singleEnd && !params.skip_collapse && !params.skip_trim){
"""
AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${prefix} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse
mkdir -p output
AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} ${trim_me} --gzip --threads ${task.cpus} ${collapse_me}
#Combine files
zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > ${prefix}.combined.fq.gz
${fixprefix}
rm ${prefix}.combined.fq.gz
zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > output/${base}.combined.fq.gz
"""
//PE, don't collapse, but trim reads
} else if (!params.singleEnd && params.skip_collapse && !params.skip_trim) {
"""
mkdir -p output
AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} ${trim_me} ${collapse_me}
mv ${base}.pair*.truncated.gz output/
"""
//PE, collapse, but don't trim reads
} else if (!params.singleEnd && !params.skip_collapse && params.skip_trim) {
"""
mkdir -p output
AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --basename ${base} ${collapse_me} ${trim_me}
mv ${base}.pair*.truncated.gz output/
"""
} else {
//SE, collapse not possible, trim reads
"""
AdapterRemoval --file1 ${reads[0]} --basename ${prefix} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality}
# Pseudo-Combine
mv *.truncated.gz ${prefix}.combined.fq.gz
mkdir -p output
AdapterRemoval --file1 ${reads[0]} --basename ${base} --gzip --threads ${task.cpus} ${trim_me}
mv *.truncated.gz output/
"""
}
}
}



/*
* STEP 2.1 - FastQC after clipping/merging (if applied!)
*/
* STEP 2.1 - FastQC after clipping/merging (if applied!)
*/
process fastqc_after_clipping {
tag "${reads[0].baseName}"
tag "${prefix}"
publishDir "${params.outdir}/FastQC/after_clipping", mode: 'copy',
saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"}

when: !params.bam
when: !params.bam && !params.skip_adapterremoval

input:
file(reads) from ch_clipped_reads_for_fastqc
set val(name), file(reads) from ch_clipped_reads_for_fastqc

output:
file "*_fastqc.{zip,html}" optional true into ch_fastqc_after_clipping

script:
prefix = reads[0].toString().tokenize('.')[0]
"""
fastqc -q $reads
"""
Expand All @@ -638,7 +676,8 @@ process bwa {
when: !params.circularmapper && !params.bwamem

input:
file(reads) from ch_clipped_reads.mix(ch_read_files_converted_mapping_bwa)
set val(name), file(reads) from ch_clipped_reads.mix(ch_read_files_converted_mapping_bwa)

file index from ch_bwa_index.first()


Expand All @@ -648,14 +687,28 @@ process bwa {


script:
prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/
fasta = "${index}/*.fasta"
fasta = "${index}/*.fasta"
size = "${params.large_ref}" ? '-c' : ''

//PE data without merging, PE data without any AR applied
if (!params.singleEnd && (params.skip_collapse || params.skip_adapterremoval)){
prefix = reads[0].toString().tokenize('.')[0]
"""
bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai"
bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam
bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai
bwa aln -t ${task.cpus} $fasta ${reads[1]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r2.sai
bwa sampe -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.r1.sai ${prefix}.r2.sai ${reads[0]} ${reads[1]} | samtools sort -@ ${task.cpus} -O bam - > ${prefix}.sorted.bam
samtools index "${size}" "${prefix}".sorted.bam
"""
} else {
//PE collapsed, or SE data
prefix = reads[0].toString().tokenize('.')[0]
"""
bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.sai
bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam
samtools index "${size}" "${prefix}".sorted.bam
"""
}

}

process circulargenerator{
Expand Down Expand Up @@ -694,7 +747,7 @@ process circularmapper{
when: params.circularmapper

input:
file reads from ch_clipped_reads_circularmapper.mix(ch_read_files_converted_mapping_cm)
set val(name), file(reads) from ch_clipped_reads_circularmapper.mix(ch_read_files_converted_mapping_cm)
file index from ch_circularmapper_indices.first()

output:
Expand All @@ -703,17 +756,31 @@ process circularmapper{

script:
filter = "${params.circularfilter}" ? '' : '-f true -x false'
prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/

fasta = "${index}/*_*.fasta"
size = "${params.large_ref}" ? '-c' : ''

if (!params.singleEnd && params.skip_collapse ){
prefix = reads[0].toString().tokenize('.')[0]
"""
bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai"
bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads > tmp.out
bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai
bwa aln -t ${task.cpus} $fasta ${reads[1]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r2.sai
bwa sampe -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.r1.sai ${prefix}.r2.sai ${reads[0]} ${reads[1]} > tmp.out
realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter
samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > ${prefix}.sorted.bam
samtools index "${size}" ${prefix}.sorted.bam
"""
} else {
prefix = reads[0].toString().tokenize('.')[0]
"""
bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.sai
bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.sai $reads > tmp.out
realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter
samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > "${prefix}".sorted.bam
samtools index "${size}" "${prefix}".sorted.bam
"""
}

}

process bwamem {
Expand All @@ -723,7 +790,7 @@ process bwamem {
when: params.bwamem && !params.circularmapper

input:
file(reads) from ch_clipped_reads_bwamem.mix(ch_read_files_converted_mapping_bwamem)
set val(name), file(reads) from ch_clipped_reads_bwamem.mix(ch_read_files_converted_mapping_bwamem)
file index from ch_bwa_index_bwamem.first()

output:
Expand All @@ -735,10 +802,19 @@ process bwamem {
prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/
fasta = "${index}/*.fasta"
size = "${params.large_ref}" ? '-c' : ''

if (!params.singleEnd && params.skip_collapse){
"""
bwa mem -t ${task.cpus} $fasta ${reads[0]} ${reads[1]} -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam
samtools index "${size}" -@ ${task.cpus} "${prefix}".sorted.bam
"""
} else {
"""
bwa mem -t ${task.cpus} $fasta $reads -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam
samtools index "${size}" -@ ${task.cpus} "${prefix}".sorted.bam
samtools index "${size}" -@ ${task.cpus} "${prefix}".sorted.bam
"""
}

}

/*
Expand Down
7 changes: 7 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ params {

//Pipeline options
aligner = 'bwa'

saveReference = false
saveTrimmed = true
saveAlignedIntermediates = false
Expand All @@ -30,6 +31,11 @@ params {
complexity_filter_poly_g_min = 10
trim_bam = false

//Skipping adapterremoval, trimming or collapsing defaults
skip_adapterremoval = false
skip_trim = false
skip_adapterremoval = false

// AWS Batch
awsqueue = false
awsregion = 'eu-west-1'
Expand All @@ -38,6 +44,7 @@ params {
custom_config_version = 'master'
}


// Load base.config by default for all pipelines
includeConfig 'conf/base.config'

Expand Down

0 comments on commit 85b0406

Please sign in to comment.