-
Notifications
You must be signed in to change notification settings - Fork 0
/
trimming.nf
86 lines (77 loc) · 2.57 KB
/
trimming.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { set_key_for_group_tuple } from "./aligning"
def remove_ambiguous_bases(adapter) {
x = adapter.takeWhile { it in ['A', 'C', 'T', 'G'] }
if (x != adapter) {
println("WARN: Adapter '${adapter}' contains ambiguous bases, using '${x}' instead")
}
return x
}
process fastp_adapter_trim {
cpus params.threads
scratch true
container "${params.container}"
tag "${align_id}"
publishDir "${params.outdir}/${sample_id}/stats/${align_id}", pattern: "fastp*"
input:
tuple val(sample_id), val(align_id), path(r1), path(r2), val(adapterP7), val(adapterP5), val(is_paired)
output:
tuple val(sample_id), path(name1), path(name2), val(is_paired), emit: fastq
tuple val(sample_id), path('fastp.json'), emit: json
tuple val(sample_id), path('fastp.html'), emit: html
script:
name1 = "${align_id}.1.trimmed.fastq.gz"
if (adapterP7 == "") {
command = is_paired ? "--detect_adapter_for_pe" : ""
} else {
command = "--adapter_sequence ${adapterP7}" + (is_paired ? " --adapter_sequence_r2 ${adapterP5}" : "")
}
if (is_paired) {
name2 = "${align_id}.2.trimmed.fastq.gz"
"""
fastp --in1 "${r1}" \
--in2 "${r2}" \
${command} \
--out1 "${name1}" \
--out2 "${name2}" \
--disable_quality_filtering \
--disable_length_filtering \
--thread ${task.cpus}
"""
} else {
name2 = './'
"""
fastp -i "${r1}" \
${command} \
-o ${name1} \
--disable_quality_filtering \
--disable_length_filtering \
--thread ${task.cpus}
"""
}
}
workflow trimReads {
take: // [sample_id, align_id, r1, r2, adapter7, adapter5, is_paired]
data
main:
trimmed = fastp_adapter_trim(data).fastq
emit:
trimmed
}
workflow trimReadsFromFile {
main:
reads_ch = Channel.fromPath(params.samples_file)
.splitCsv(header:true, sep:'\t')
.map(row -> tuple(row.sample_id, row.align_id, row.reads1,
row.type == 'paired' ? row.reads2 : file('./'),
remove_ambiguous_bases(row.adapterP7),
row.type == 'paired' ? remove_ambiguous_bases(row.adapterP5) : "",
row.type == 'paired'))
trimReads(set_key_for_group_tuple(reads_ch))
emit:
trimReads.out
}
workflow {
trimReadsFromFile()
}