-
Notifications
You must be signed in to change notification settings - Fork 8
/
small_germline_benchmark.nf
159 lines (130 loc) · 5.68 KB
/
small_germline_benchmark.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
//
// SMALL_GERMLINE_BENCHMARK: SUBWORKFLOW FOR SMALL GERMLINE VARIANTS
//
include { RTGTOOLS_FORMAT } from '../../modules/nf-core/rtgtools/format/main'
include { RTGTOOLS_VCFEVAL } from '../../modules/nf-core/rtgtools/vcfeval/main'
include { HAPPY_HAPPY } from '../../modules/nf-core/happy/happy/main'
include { HAPPY_PREPY } from '../../modules/nf-core/happy/prepy/main'
include { VCF_REHEADER_SAMPLENAME as VCF_REHEADER_SAMPLENAME_1 } from '../local/vcf_reheader_samplename'
include { VCF_REHEADER_SAMPLENAME as VCF_REHEADER_SAMPLENAME_2 } from '../local/vcf_reheader_samplename'
include { VCF_REHEADER_SAMPLENAME as VCF_REHEADER_SAMPLENAME_3 } from '../local/vcf_reheader_samplename'
include { VCF_REHEADER_SAMPLENAME as VCF_REHEADER_SAMPLENAME_4 } from '../local/vcf_reheader_samplename'
workflow SMALL_GERMLINE_BENCHMARK {
take:
input_ch // channel: [val(meta),test_vcf,test_index,truth_vcf,truth_index, bed]
fasta // reference channel [val(meta), ref.fa]
fai // reference channel [val(meta), ref.fa.fai]
sdf // reference channel [val(meta), sdf]
main:
versions = Channel.empty()
summary_reports = Channel.empty()
tagged_variants = Channel.empty()
if (params.method.contains('rtgtools')){
if (!params.sdf){
// Use rtgtools format to generate sdf file if necessary
RTGTOOLS_FORMAT(
fasta.map { meta, file -> [ meta, file, [], [] ] }
)
versions = versions.mix(RTGTOOLS_FORMAT.out.versions)
sdf = RTGTOOLS_FORMAT.out.sdf
}
// apply rtgtools eval method
RTGTOOLS_VCFEVAL(
input_ch.map { meta, vcf, tbi, truth_vcf, truth_tbi, bed ->
[ meta, vcf, tbi, truth_vcf, truth_tbi, bed, [] ]
},
sdf
)
versions = versions.mix(RTGTOOLS_VCFEVAL.out.versions.first())
// collect summary reports
RTGTOOLS_VCFEVAL.out.summary
.map { _meta, file -> tuple([vartype: params.variant_type] + [benchmark_tool: "rtgtools"], file) }
.groupTuple()
.set{ report }
summary_reports = summary_reports.mix(report)
// reheader benchmarking results properly and tag meta
VCF_REHEADER_SAMPLENAME_1(
RTGTOOLS_VCFEVAL.out.fn_vcf,
fai
)
versions = versions.mix(VCF_REHEADER_SAMPLENAME_1.out.versions)
VCF_REHEADER_SAMPLENAME_1.out.ch_vcf
.map { _meta, file, index -> tuple([vartype: params.variant_type] + [tag: "FN"] + [id: "rtgtools"], file, index) }
.set { vcf_fn }
VCF_REHEADER_SAMPLENAME_2(
RTGTOOLS_VCFEVAL.out.fp_vcf,
fai
)
versions = versions.mix(VCF_REHEADER_SAMPLENAME_2.out.versions)
VCF_REHEADER_SAMPLENAME_2.out.ch_vcf
.map { _meta, file, index -> tuple([vartype: params.variant_type] + [tag: "FP"] + [id: "rtgtools"], file, index) }
.set { vcf_fp }
VCF_REHEADER_SAMPLENAME_3(
RTGTOOLS_VCFEVAL.out.baseline_vcf,
fai
)
versions = versions.mix(VCF_REHEADER_SAMPLENAME_3.out.versions)
VCF_REHEADER_SAMPLENAME_3.out.ch_vcf
.map { _meta, file, index -> tuple([vartype: params.variant_type] + [tag: "TP_base"] + [id: "rtgtools"], file, index) }
.set { vcf_tp_base }
VCF_REHEADER_SAMPLENAME_4(
RTGTOOLS_VCFEVAL.out.tp_vcf,
fai
)
versions = versions.mix(VCF_REHEADER_SAMPLENAME_4.out.versions)
VCF_REHEADER_SAMPLENAME_4.out.ch_vcf
.map { _meta, file, index -> tuple([vartype: params.variant_type] + [tag: "TP_comp"] + [id: "rtgtools"], file, index) }
.set { vcf_tp_comp }
tagged_variants = tagged_variants.mix(
vcf_fn,
vcf_fp,
vcf_tp_base,
vcf_tp_comp
)
}
if (params.method.contains('happy')){
input_ch
.map{ meta, vcf, _tbi, _truth_vcf, _truth_tbi, _bed ->
[ meta, vcf ]
}
.set { test_ch }
input_ch
.map{ meta, _vcf, _tbi, truth_vcf, _truth_tbi, bed ->
[ meta, truth_vcf, bed, [] ]
}
.set { truth_ch }
if (params.preprocess.contains("prepy")){
// apply prepy if required
HAPPY_PREPY(
input_ch.map{ meta, vcf, _tbi, _truth_vcf, _truth_tbi, bed ->
[ meta, vcf, bed ]
},
fasta,
fai
)
versions = versions.mix(HAPPY_PREPY.out.versions.first())
// TODO: Check norm settings https://github.com/Illumina/hap.py/blob/master/doc/normalisation.md
test_ch = HAPPY_PREPY.out.preprocessed_vcf
}
// apply happy method for benchmarking
HAPPY_HAPPY(
test_ch.join(truth_ch, failOnDuplicate:true, failOnMismatch:true),
fasta,
fai,
[[],[]],
[[],[]],
[[],[]]
)
versions = versions.mix(HAPPY_HAPPY.out.versions.first())
// tag meta and collect summary reports
HAPPY_HAPPY.out.summary_csv
.map { _meta, file -> tuple([vartype: params.variant_type] + [benchmark_tool: "happy"], file) }
.groupTuple()
.set{ report }
summary_reports = summary_reports.mix(report)
}
emit:
summary_reports // channel: [val(meta), reports]
tagged_variants // channel: [val(meta), vcfs]
versions // channel: [versions.yml]
}