-
Notifications
You must be signed in to change notification settings - Fork 0
/
02_discovery.nf
103 lines (74 loc) · 2.09 KB
/
02_discovery.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env nextflow
// Path channels
DNDS_ANNOTMUTS_FILES = Channel.fromPath("${INTOGEN_DATASETS}/steps/dndscv/*.dndscv_annotmuts.tsv.gz")
OUT_EVAL_PATH = Channel.fromPath("${OUTPUT}/evaluation/*/*.eval.pickle.gz")
OUT_EVAL = OUT_EVAL_PATH.map{it -> [it.getParent().baseName, it.baseName.split('\\.')[0], it]}
process Mutations4Discovery {
tag 'Mutations for discovery'
label "boostdm"
publishDir "${OUTPUT}/discovery", mode: 'copy'
input:
path input from DNDS_ANNOTMUTS_FILES.collect()
output:
path(output) into DISCOVERY_MUTS
script:
output = "mutations.tsv.gz"
"""
runner.sh discovery_index/muts.py \
--output ${output} \
${input}
"""
}
VARIANTS_STATS_JSON_FOLDER = Channel.fromPath("${INTOGEN_DATASETS}/steps/variants", type: 'dir')
process CollectVariants {
tag 'Create variants.json'
label "boostdm"
publishDir "${OUTPUT}/discovery", mode: 'copy'
input:
path input from VARIANTS_STATS_JSON_FOLDER
output:
path(output) into VARIANTS_JSON
script:
output = "variants.json"
"""
runner.sh discovery_index/preprocess_variants.py \
--inputfolder ${input} \
--output ${output}
"""
}
process Samples4Discovery {
tag 'Samples for discovery'
label "boostdm"
publishDir "${OUTPUT}/discovery", mode: 'copy'
input:
path input from VARIANTS_JSON
output:
path(output) into DISCOVERY_SAMPLES
script:
output = "samples.json"
"""
runner.sh discovery_index/samples.py \
--input ${input} \
--output ${output}
"""
}
process DiscoveryIndex {
tag 'Discovery index'
label "boostdm"
publishDir "${OUTPUT}/discovery", mode: 'copy'
input:
val (input) from OUT_EVAL.collect()
path samples from DISCOVERY_SAMPLES
path mutations from DISCOVERY_MUTS
output:
path(output) into DISCOVERY_INDEX
script:
output = "discovery.tsv.gz"
"""
runner.sh discovery_index/discovery.py \
--output ${output} \
--mutations ${mutations} \
--samples ${samples} \
--evaluation-path ${OUTPUT}/evaluation
"""
}