forked from nf-core/sarek
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.nf
75 lines (67 loc) · 2.36 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
process CREATE_INTERVALS_BED {
tag "$intervals"
label 'process_single'
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gawk:5.1.0' :
'biocontainers/gawk:5.1.0' }"
input:
path(intervals)
val(nucleotides_per_second)
output:
path("*.bed") , emit: bed
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
// If intervals file is in BED format,
// Fifth column is interpreted to contain runtime estimates
// Which is then used to combine short-running jobs
if (intervals.toString().toLowerCase().endsWith("bed")) {
"""
awk -vFS="\t" '{
t = \$5 # runtime estimate
if (t == "") {
# no runtime estimate in this row, assume default value
t = (\$3 - \$2) / ${nucleotides_per_second}
}
if (name == "" || (chunk > 600 && (chunk + t) > longest * 1.05)) {
# start a new chunk
name = sprintf("%s_%d-%d.bed", \$1, \$2+1, \$3)
chunk = 0
longest = 0
}
if (t > longest)
longest = t
chunk += t
print \$0 > name
}' ${intervals}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
END_VERSIONS
"""
} else if (intervals.toString().toLowerCase().endsWith("interval_list")) {
"""
grep -v '^@' ${intervals} | awk -vFS="\t" '{
name = sprintf("%s_%d-%d", \$1, \$2, \$3);
printf("%s\\t%d\\t%d\\n", \$1, \$2-1, \$3) > name ".bed"
}'
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
END_VERSIONS
"""
} else {
"""
awk -vFS="[:-]" '{
name = sprintf("%s_%d-%d", \$1, \$2, \$3);
printf("%s\\t%d\\t%d\\n", \$1, \$2-1, \$3) > name ".bed"
}' ${intervals}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
END_VERSIONS
"""
}
}