Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Benchmarking per pipeline / POC #1448

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#1340](https://github.com/nf-core/sarek/pull/1340) - Adds Azure test profiles and megatests.
- [#1372](https://github.com/nf-core/sarek/pull/1372) - Add NCBench test profile for Agilent datasets
- [#1409](https://github.com/nf-core/sarek/pull/1409) - Add params `modules_testdata_base_path` to test profile
- [#1448](https://github.com/nf-core/sarek/pull/1448) - Internal benchmarking of germline small variants

### Changed

Expand Down
427 changes: 326 additions & 101 deletions modules.json

Large diffs are not rendered by default.

49 changes: 49 additions & 0 deletions modules/nf-core/rtgtools/rocplot/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 47 additions & 0 deletions modules/nf-core/rtgtools/rocplot/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

66 changes: 66 additions & 0 deletions modules/nf-core/rtgtools/vcfeval/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

114 changes: 114 additions & 0 deletions modules/nf-core/rtgtools/vcfeval/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ params {
vep_spliceregion = null // spliceregion plugin disabled within VEP
vep_version = "110.0-0" // Should be updated when we update VEP, needs this to get full path to some plugins

// Special
benchmark = true

// MultiQC options
multiqc_config = null
multiqc_title = null
Expand Down
40 changes: 40 additions & 0 deletions subworkflows/local/vcf_benchmark_small_variants/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
//
// SMALL_GERMLINE_BENCHMARK: SUBWORKFLOW FOR SMALL GERMLINE VARIANTS
//

include { RTGTOOLS_VCFEVAL } from '../../modules/nf-core/rtgtools/vcfeval/main'


workflow VCF_BENCHMARK_SMALL_VARIANTS {
take:
ch_test // channel: test vcf coming from pipeline [val(meta), test.vcf.gz, test.vcf.gz.tbi]
ch_truth // channel: truth vcf [val(meta), truth.vcf.gz, truth.vcf.gz.tbi]
ch_bed // channel: bed file [val(meta), target.bed] //TODO: is optional for rtgvcfeval -> remove?

main:
versions = Channel.empty()
summary_reports = Channel.empty()

// apply rtgtools eval method
RTGTOOLS_VCFEVAL(
// TODO: correct mapping according to input channels
input_ch.map { meta, vcf, tbi, truth_vcf, truth_tbi, bed ->
[ meta, vcf, tbi, truth_vcf, truth_tbi, bed, [] ]
},
[ [], [] ]
)
versions = versions.mix(RTGTOOLS_VCFEVAL.out.versions.first())

// collect summary reports
RTGTOOLS_VCFEVAL.out.summary
.map { meta, file -> tuple([vartype: meta.vartype] + [benchmark_tool: "rtgtools"], file) }
.groupTuple()
.set{ report }

summary_reports = summary_reports.mix(report)

emit:
versions
summary_reports

}
28 changes: 28 additions & 0 deletions subworkflows/local/vcf_validate_small_variants/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//
// Validation against truth.
//

include { RTGTOOLS_VCFEVAL } from '../../modules/nf-core/rtgtools/vcfeval/main'
// or do it with hap.py???

workflow VCF_VALIDATE_SMALL_VARIANTS {

take:

main:
versions = Channel.empty()
// input: of rtgtools/vcfeval
// tuple val(meta), path(query_vcf), path(query_vcf_tbi), path(truth_vcf), path(truth_vcf_tbi), path(truth_bed), path(evaluation_bed)
// query_vcf = generated by sarek run
// truth_vcf = denoted in test_data or somewhere
// tuple val(meta2), path(sdf)

// if benchmark = true then do "normal" sarek run and compare to truth sample here
// needs param which truth sample was used maybe?
// get truth, normalize truth

RTGTOOLS_VCFEVAL ( ch_vcfeval_in, ch_sdf )

emit:
versions = ch_versions // channel: [ path(versions.yml) ]
}
10 changes: 9 additions & 1 deletion workflows/sarek.nf
Original file line number Diff line number Diff line change
Expand Up @@ -217,11 +217,14 @@ include { POST_VARIANTCALLING } from '../subworkflows/lo
include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../subworkflows/local/vcf_qc_bcftools_vcftools/main'

// Sample QC on CRAM files
include { CRAM_SAMPLEQC } from '../subworkflows/local/cram_sampleqc/main'
include { CRAM_SAMPLEQC } from '../subworkflows/local/cram_sampleqc/main'

// Annotation
include { VCF_ANNOTATE_ALL } from '../subworkflows/local/vcf_annotate_all/main'

// Validation (experimental)
include { VCF_VALIDATE_SMALL_VARIANTS } from '../subworkflows/nf-core/vcf_eval/main'

// MULTIQC
include { MULTIQC } from '../modules/nf-core/multiqc/main'

Expand Down Expand Up @@ -1077,6 +1080,11 @@ workflow SAREK {
vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.vcf_all)
vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.vcf_all)

if(params.benchmark) {
VCF_VALIDATE_SMALL_VARIANTS(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all)
VCF_BENCHMARK_SMALL_VARIANTS(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all)
}

// QC
VCF_QC_BCFTOOLS_VCFTOOLS(vcf_to_annotate, intervals_bed_combined)

Expand Down