nf-core · famosab · Mar 22, 2024 · Mar 22, 2024 · May 16, 2024 · Aug 28, 2024
@@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#1340](https://github.com/nf-core/sarek/pull/1340) - Adds Azure test profiles and megatests.
 - [#1372](https://github.com/nf-core/sarek/pull/1372) - Add NCBench test profile for Agilent datasets
 - [#1409](https://github.com/nf-core/sarek/pull/1409) - Add params `modules_testdata_base_path` to test profile
+- [#1448](https://github.com/nf-core/sarek/pull/1448) - Internal benchmarking of germline small variants
 
 ### Changed
 

@@ -98,6 +98,9 @@ params {
     vep_spliceregion         = null // spliceregion plugin disabled within VEP
     vep_version              = "110.0-0"      // Should be updated when we update VEP, needs this to get full path to some plugins
 
+    // Special
+    benchmark = true
+
     // MultiQC options
     multiqc_config             = null
     multiqc_title              = null

@@ -0,0 +1,40 @@
+//
+// SMALL_GERMLINE_BENCHMARK: SUBWORKFLOW FOR SMALL GERMLINE VARIANTS
+//
+
+include { RTGTOOLS_VCFEVAL                                     } from '../../modules/nf-core/rtgtools/vcfeval/main'
+
+
+workflow VCF_BENCHMARK_SMALL_VARIANTS {
+    take:
+    ch_test   // channel: test vcf coming from pipeline [val(meta), test.vcf.gz, test.vcf.gz.tbi]
+    ch_truth  // channel: truth vcf [val(meta), truth.vcf.gz, truth.vcf.gz.tbi]
+    ch_bed    // channel: bed file [val(meta), target.bed] //TODO: is optional for rtgvcfeval -> remove?
+
+    main:
+    versions        = Channel.empty()
+    summary_reports = Channel.empty()
+
+    // apply rtgtools eval method
+    RTGTOOLS_VCFEVAL(
+        // TODO: correct mapping according to input channels
+        input_ch.map { meta, vcf, tbi, truth_vcf, truth_tbi, bed ->
+            [ meta, vcf, tbi, truth_vcf, truth_tbi, bed, [] ]
+        },
+        [ [], [] ]
+    )
+    versions = versions.mix(RTGTOOLS_VCFEVAL.out.versions.first())
+
+    // collect summary reports
+    RTGTOOLS_VCFEVAL.out.summary
+        .map { meta, file -> tuple([vartype: meta.vartype] + [benchmark_tool: "rtgtools"], file) }
+        .groupTuple()
+        .set{ report }
+
+    summary_reports = summary_reports.mix(report)
+
+    emit:
+    versions
+    summary_reports
+
+}
@@ -0,0 +1,28 @@
+//
+// Validation against truth.
+//
+
+include { RTGTOOLS_VCFEVAL                 } from '../../modules/nf-core/rtgtools/vcfeval/main'
+// or do it with hap.py???
+
+workflow VCF_VALIDATE_SMALL_VARIANTS {
+
+    take:
+
+    main:
+    versions = Channel.empty()
+        // input: of rtgtools/vcfeval
+        // tuple val(meta), path(query_vcf), path(query_vcf_tbi), path(truth_vcf), path(truth_vcf_tbi), path(truth_bed), path(evaluation_bed)
+        // query_vcf = generated by sarek run
+        // truth_vcf = denoted in test_data or somewhere
+        // tuple val(meta2), path(sdf)
+
+        // if benchmark = true then do "normal" sarek run and compare to truth sample here
+        // needs param which truth sample was used maybe?
+        // get truth, normalize truth
+
+        RTGTOOLS_VCFEVAL ( ch_vcfeval_in, ch_sdf )
+
+    emit:
+        versions        = ch_versions // channel: [ path(versions.yml) ]
+}
@@ -217,11 +217,14 @@ include { POST_VARIANTCALLING                         } from '../subworkflows/lo
 include { VCF_QC_BCFTOOLS_VCFTOOLS                    } from '../subworkflows/local/vcf_qc_bcftools_vcftools/main'
 
 // Sample QC on CRAM files
-include { CRAM_SAMPLEQC                                } from '../subworkflows/local/cram_sampleqc/main'
+include { CRAM_SAMPLEQC                               } from '../subworkflows/local/cram_sampleqc/main'
 
 // Annotation
 include { VCF_ANNOTATE_ALL                            } from '../subworkflows/local/vcf_annotate_all/main'
 
+// Validation (experimental)
+include { VCF_VALIDATE_SMALL_VARIANTS                 } from '../subworkflows/nf-core/vcf_eval/main'
+
 // MULTIQC
 include { MULTIQC                                     } from '../modules/nf-core/multiqc/main'
 
@@ -1077,6 +1080,11 @@ workflow SAREK {
         vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.vcf_all)
         vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.vcf_all)
 
+        if(params.benchmark) {
+            VCF_VALIDATE_SMALL_VARIANTS(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all)
+            VCF_BENCHMARK_SMALL_VARIANTS(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all)
+        }
+
         // QC
         VCF_QC_BCFTOOLS_VCFTOOLS(vcf_to_annotate, intervals_bed_combined)