Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix join mismatch error for joint tumor only Mutect2 #1181

Merged
merged 12 commits into from
Aug 31, 2023
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Rapaselet is a delta formed by the Rapaätno river between the Bielloriehppe mas
- [#1173](https://github.com/nf-core/sarek/pull/1173) - Fixed duplicated entries in joint germline recalibrated VCF ([#966](https://github.com/nf-core/sarek/pull/966), [#1102](https://github.com/nf-core/sarek/pull/1102)),
fixed grouping joint germline recalibrated VCF ([#1137](https://github.com/nf-core/sarek/pull/1137))
- [#1177](https://github.com/nf-core/sarek/pull/1177) - Fix status inference when using nf-validation plugin
- [#1181](https://github.com/nf-core/sarek/pull/1181) - Fix join mismatch error in Mutect2 tumor only subworkflow
- [#1183](https://github.com/nf-core/sarek/pull/1183) - Add docs for concatentated germline variants
- [#1184](https://github.com/nf-core/sarek/pull/1184) - Fix issue with duplicated variants in VCF from Sentieon-based joint-germline variant-calling with VQSR. (Corresponding to [#966](https://github.com/nf-core/sarek/issues/966) for GATK.)
- [#1192](https://github.com/nf-core/sarek/pull/1192) - Add `ASCATprofile.png` to ASCAT output docs
Expand Down
12 changes: 9 additions & 3 deletions subworkflows/local/bam_variant_calling_somatic_all/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL {
// CONTROLFREEC
if (tools.split(',').contains('controlfreec')) {
// Remap channels to match module/subworkflow
cram_normal = cram.map { meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, normal_cram, normal_crai ] }
cram_tumor = cram.map { meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, tumor_cram, tumor_crai ] }
cram_normal = cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, normal_cram, normal_crai ] }
cram_tumor = cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, tumor_cram, tumor_crai ] }

MPILEUP_NORMAL(
cram_normal,
Expand Down Expand Up @@ -183,7 +183,13 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL {
if (tools.split(',').contains('mutect2')) {
BAM_VARIANT_CALLING_SOMATIC_MUTECT2(
// Remap channel to match module/subworkflow
cram.map { meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ] },
// Adjust meta.map to simplify joining channels
// joint_mutect2 mode needs different meta.map than regular mode
cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai ->
joint_mutect2 ?
[ meta + [ id:meta.patient ] - meta.subMap('patient', 'tumor_id'), [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ] :
[ meta, [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ]
},
maxulysse marked this conversation as resolved.
Show resolved Hide resolved
// Remap channel to match module/subworkflow
fasta.map{ it -> [ [ id:'fasta' ], it ] },
// Remap channel to match module/subworkflow
Expand Down
30 changes: 15 additions & 15 deletions subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 {

if (joint_mutect2) {
// Separate normal cram files and remove duplicates
ch_normal_cram = input.map{ meta, cram, crai -> [ meta - meta.subMap('tumor_id') + [id:meta.patient], cram[0], crai[0] ] }.unique()
ch_normal_cram = input.map{ meta, cram, crai -> [ meta, cram[0], crai[0] ] }.unique()
// Extract tumor cram files
ch_tumor_cram = input.map{ meta, cram, crai -> [ meta - meta.subMap('tumor_id') + [id:meta.patient], cram[1], crai[1] ] }
ch_tumor_cram = input.map{ meta, cram, crai -> [ meta, cram[1], crai[1] ] }
// Merge normal and tumor crams by patient
ch_tn_cram = ch_normal_cram.mix(ch_tumor_cram).groupTuple()
// Combine input and intervals for scatter and gather strategy
Expand Down Expand Up @@ -153,27 +153,27 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 {
CALCULATECONTAMINATION(ch_calculatecontamination_in_tables)

// Initialize empty channel: Contamination calculation is run on pileup table, pileup is not run if germline resource is not provided
ch_seg_to_filtermutectcalls = Channel.empty()
ch_cont_to_filtermutectcalls = Channel.empty()
calculatecontamination_out_seg = Channel.empty()
calculatecontamination_out_cont = Channel.empty()

if (joint_mutect2) {
// Reduce the meta to only patient name
ch_seg_to_filtermutectcalls = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('tumor_id') + [id: meta.patient], seg]}.groupTuple()
ch_cont_to_filtermutectcalls = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('tumor_id') + [id: meta.patient], cont]}.groupTuple()
calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('tumor_id') + [id: meta.patient], seg]}.groupTuple()
calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('tumor_id') + [id: meta.patient], cont]}.groupTuple()
}
else {
// Keep tumor_vs_normal ID
ch_seg_to_filtermutectcalls = CALCULATECONTAMINATION.out.segmentation
ch_cont_to_filtermutectcalls = CALCULATECONTAMINATION.out.contamination
calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation
calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination
}

// Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables
vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true)
.join(stats, failOnDuplicate: true, failOnMismatch: true)
.join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true)
.join(ch_seg_to_filtermutectcalls)
.join(ch_cont_to_filtermutectcalls)
.map{ meta, vcf, tbi, stats, orientation, seg, cont -> [ meta, vcf, tbi, stats, orientation, seg, cont, [] ] }
.join(stats, failOnDuplicate: true, failOnMismatch: true)
.join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true)
.join(calculatecontamination_out_seg)
.join(calculatecontamination_out_cont)
.map{ meta, vcf, tbi, stats, orientation, seg, cont -> [ meta, vcf, tbi, stats, orientation, seg, cont, [] ] }

FILTERMUTECTCALLS(vcf_to_filter, fasta, fai, dict)

Expand Down Expand Up @@ -205,8 +205,8 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 {
pileup_table_normal // channel: [ meta, table_normal ]
pileup_table_tumor // channel: [ meta, table_tumor ]

contamination_table = ch_cont_to_filtermutectcalls // channel: [ meta, contamination ]
segmentation_table = ch_seg_to_filtermutectcalls // channel: [ meta, segmentation ]
contamination_table = calculatecontamination_out_cont // channel: [ meta, contamination ]
segmentation_table = calculatecontamination_out_seg // channel: [ meta, segmentation ]

versions // channel: [ versions.yml ]
}
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,13 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL {
// MUTECT2
if (tools.split(',').contains('mutect2')) {
BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2(
cram,
// Adjust meta.map to simplify joining channels
// joint_mutect2 mode needs different meta.map than regular mode
cram.map{ meta, cram, crai ->
joint_mutect2 ?
[ meta + [ id:meta.patient ] - meta.subMap('sample', 'status', 'num_intervals', 'data_type', 'patient') , cram, crai ] :
[ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), cram, crai ]
},
maxulysse marked this conversation as resolved.
Show resolved Hide resolved
// Remap channel to match module/subworkflow
fasta.map{ it -> [ [ id:'fasta' ], it ] },
// Remap channel to match module/subworkflow
Expand Down
26 changes: 13 additions & 13 deletions subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 {
if (joint_mutect2) {
// Perform variant calling using mutect2 module in tumor single mode
// Group cram files by patient
patient_crams = input.map{ meta, t_cram, t_crai -> [ meta - meta.subMap('sample') + [id:meta.patient], t_cram, t_crai ] }.groupTuple()
patient_crams = input.groupTuple()
// Add intervals for scatter-gather scaling
patient_cram_intervals = patient_crams.combine(intervals)
// Move num_intervals to meta map and reorganize channel for MUTECT2 module
Expand Down Expand Up @@ -90,10 +90,11 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 {
MERGEMUTECTSTATS(stats_to_merge)

// Mix intervals and no_intervals channels together
vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals)
tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals)
stats = Channel.empty().mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals)
f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals)
// Remove unnecessary metadata
vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals).map{ meta, vcf -> [ meta - meta.subMap('num_intervals'), vcf ] }
tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals).map{ meta, tbi -> [ meta - meta.subMap('num_intervals'), tbi ] }
stats = Channel.empty().mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals).map{ meta, stats -> [ meta - meta.subMap('num_intervals'), stats ] }
f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals).map{ meta, f1r2 -> [ meta - meta.subMap('num_intervals'), f1r2 ] }
maxulysse marked this conversation as resolved.
Show resolved Hide resolved

// Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2
LEARNREADORIENTATIONMODEL(f1r2)
Expand Down Expand Up @@ -124,14 +125,13 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 {
calculatecontamination_out_cont = Channel.empty()

if (joint_mutect2) {
// Remove sample names and retain patient name as the main identifier
calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('sample') + [id:meta.patient], seg ] }.groupTuple()
calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('sample') + [id:meta.patient], cont ] }.groupTuple()
}
else {
// Group tables by samples
calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('num_intervals'), seg ] }.groupTuple()
calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('num_intervals'), cont ] }.groupTuple()
} else {
// Regular single sample mode
calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation
calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination
calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('num_intervals'), seg ] }
calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('num_intervals'), cont ] }
}

// Mutect2 calls filtered by filtermutectcalls using the contamination and segmentation tables
Expand All @@ -146,7 +146,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 {

vcf_filtered = FILTERMUTECTCALLS.out.vcf
// add variantcaller to meta map and remove no longer necessary field: num_intervals
.map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'mutect2' ], vcf ] }
.map{ meta, vcf -> [ meta + [ variantcaller:'mutect2' ], vcf ] }

versions = versions.mix(MERGE_MUTECT2.out.versions)
versions = versions.mix(CALCULATECONTAMINATION.out.versions)
Expand Down