From 2c10752e787921ceed146b032886c71dbed3ba05 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Sat, 28 May 2022 08:49:53 +0000 Subject: [PATCH 01/24] WIP: Replace CONCAT_VCFs with GATKs MergeVcfs. With this first commit I just did the replacement for STRELKA. --- conf/modules.config | 6 ++-- .../variantcalling/strelka/single/main.nf | 29 ++++++------------- 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 5e447ce785..aab02a2c3a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -606,17 +606,17 @@ process{ } // STRELKA - withName: 'CONCAT_STRELKA.*' { + withName: 'MERGE_STRELKA.*' { publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/strelka" }, pattern: "*{vcf.gz,vcf.gz.tbi}" ] } - withName: 'CONCAT_STRELKA' { + withName: 'MERGE_STRELKA' { ext.prefix = {"${meta.id}.variants"} } - withName: 'CONCAT_STRELKA_GENOME' { + withName: 'MERGE_STRELKA_GENOME' { ext.prefix = {"${meta.id}.genome"} } withName: 'STRELKA_.*' { diff --git a/subworkflows/nf-core/variantcalling/strelka/single/main.nf b/subworkflows/nf-core/variantcalling/strelka/single/main.nf index 4d06d8a3a8..cf892011cb 100644 --- a/subworkflows/nf-core/variantcalling/strelka/single/main.nf +++ b/subworkflows/nf-core/variantcalling/strelka/single/main.nf @@ -1,7 +1,6 @@ -include { TABIX_BGZIP as BGZIP_VC_STRELKA } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' include { CONCAT_VCF as CONCAT_STRELKA } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../../../../modules/local/concat_vcf/main' +include { MERGE_VCFS as MERGE_STRELKA } from '../../../../../modules/local/merge_vcfs/main' +include { MERGE_VCFS as MERGE_STRELKA_GENOME } from '../../../../../modules/local/merge_vcfs/main' include { STRELKA_GERMLINE as STRELKA_SINGLE } from '../../../../../modules/nf-core/modules/strelka/germline/main' workflow RUN_STRELKA_SINGLE { @@ -28,13 +27,9 @@ workflow RUN_STRELKA_SINGLE { no_intervals: it[0].num_intervals <= 1 }.set{strelka_genome_vcf} - // Only when using intervals - BGZIP_VC_STRELKA(strelka_vcf.intervals) - - CONCAT_STRELKA( - BGZIP_VC_STRELKA.out.output + MERGE_STRELKA( + strelka_vcf.intervals .map{ meta, vcf -> - new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] @@ -42,10 +37,8 @@ workflow RUN_STRELKA_SINGLE { fasta_fai, intervals_bed_gz) - BGZIP_VC_STRELKA_GENOME(strelka_genome_vcf.intervals) - - CONCAT_STRELKA_GENOME( - BGZIP_VC_STRELKA_GENOME.out.output + MERGE_STRELKA_GENOME( + strelka_genome_vcf.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] @@ -57,18 +50,14 @@ workflow RUN_STRELKA_SINGLE { // Mix output channels for "no intervals" and "with intervals" results strelka_vcf = Channel.empty().mix( - CONCAT_STRELKA.out.vcf, - //CONCAT_STRELKA_GENOME.out.vcf, - //strelka_genome_vcf.no_intervals, + MERGE_STRELKA.out.vcf, strelka_vcf.no_intervals) .map{ meta, vcf -> [[patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"Strelka"], vcf] } - ch_versions = ch_versions.mix(BGZIP_VC_STRELKA.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_GENOME.out.versions) - ch_versions = ch_versions.mix(CONCAT_STRELKA.out.versions) - ch_versions = ch_versions.mix(CONCAT_STRELKA_GENOME.out.versions) + ch_versions = ch_versions.mix(MERGE_STRELKA.out.versions) + ch_versions = ch_versions.mix(MERGE_STRELKA_GENOME.out.versions) ch_versions = ch_versions.mix(STRELKA_SINGLE.out.versions) emit: From 2a8e8709cf0e35ddf50da2e424efe7d5191f4344 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Sat, 28 May 2022 08:51:54 +0000 Subject: [PATCH 02/24] Nextflow module/process for GATKs MergeVcfs. --- modules/local/merge_vcfs/main.nf | 43 ++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 modules/local/merge_vcfs/main.nf diff --git a/modules/local/merge_vcfs/main.nf b/modules/local/merge_vcfs/main.nf new file mode 100644 index 0000000000..5e78f3a3c1 --- /dev/null +++ b/modules/local/merge_vcfs/main.nf @@ -0,0 +1,43 @@ +process MERGE_VCFS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.5.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }" + + input: + tuple val(meta), path(vcf) + path fasta_fai + path target_bed + + output: + tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.tbi"), emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def target_options = target_bed ? "-t ${target_bed}" : "" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK ApplyBQSRSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + ls *.vcf.gz > ${prefix}.vcf.list + gatk --java-options "-Xmx${avail_mem}g" MergeVcfs -I ${prefix}.vcf.list -O ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} From 56de5c7656eb924d27577739b9b8cf3014ab221e Mon Sep 17 00:00:00 2001 From: asp8200 Date: Sat, 28 May 2022 14:10:55 +0000 Subject: [PATCH 03/24] Using the nf-core module with GATK's mergeVcfs. --- modules.json | 5 +- .../nf-core/modules/gatk4/mergevcfs/main.nf | 46 +++++++++++++++++++ .../nf-core/modules/gatk4/mergevcfs/meta.yml | 43 +++++++++++++++++ .../local/germline_variant_calling.nf | 1 + subworkflows/local/tumor_variant_calling.nf | 9 ++-- .../variantcalling/strelka/single/main.nf | 13 +++--- 6 files changed, 106 insertions(+), 11 deletions(-) create mode 100644 modules/nf-core/modules/gatk4/mergevcfs/main.nf create mode 100644 modules/nf-core/modules/gatk4/mergevcfs/meta.yml diff --git a/modules.json b/modules.json index 02497f14d8..f697c71d31 100644 --- a/modules.json +++ b/modules.json @@ -141,6 +141,9 @@ "gatk4/mergemutectstats": { "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" }, + "gatk4/mergevcfs": { + "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + }, "gatk4/mutect2": { "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" }, @@ -236,4 +239,4 @@ } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/modules/gatk4/mergevcfs/main.nf b/modules/nf-core/modules/gatk4/mergevcfs/main.nf new file mode 100644 index 0000000000..964c1a3bbb --- /dev/null +++ b/modules/nf-core/modules/gatk4/mergevcfs/main.nf @@ -0,0 +1,46 @@ +process GATK4_MERGEVCFS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + + input: + tuple val(meta), path(vcf) + path dict + + output: + tuple val(meta), path('*.vcf.gz'), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = vcf.collect{ "--INPUT $it"}.join(' ') + def reference_command = dict ? "--SEQUENCE_DICTIONARY $dict" : "" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK MergeVcfs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" MergeVcfs \\ + $input_list \\ + --OUTPUT ${prefix}.vcf.gz \\ + $reference_command \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/gatk4/mergevcfs/meta.yml b/modules/nf-core/modules/gatk4/mergevcfs/meta.yml new file mode 100644 index 0000000000..8d4123d9e5 --- /dev/null +++ b/modules/nf-core/modules/gatk4/mergevcfs/meta.yml @@ -0,0 +1,43 @@ +name: gatk4_mergevcfs +description: Merges several vcf files +keywords: + - vcf + - merge +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: list + description: Two or more VCF files + pattern: "*.{vcf,vcf.gz}" + - ref_dict: + type: file + description: Optional Sequence Dictionary as input + pattern: "*.dict" + - use_ref_dict: + type: boolean + description: Specify whether or not to use a given reference dictionary +output: + - vcf: + type: file + description: merged vcf file + pattern: "*.vcf.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 9b1859ae45..f77ebc9d65 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -113,6 +113,7 @@ workflow GERMLINE_VARIANT_CALLING { // STRELKA if (params.tools.contains('strelka')){ RUN_STRELKA_SINGLE(cram_recalibrated_intervals_gz_tbi, + dict, fasta, fasta_fai, intervals_bed_combine_gz) diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 99aa57430d..1ea8063958 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -122,10 +122,11 @@ workflow TUMOR_ONLY_VARIANT_CALLING { } if (tools.contains('strelka')) { - RUN_STRELKA_SINGLE( cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai, - intervals_bed_combine_gz) + RUN_STRELKA_SINGLE(cram_recalibrated_intervals_gz_tbi, + dict, + fasta, + fasta_fai, + intervals_bed_combine_gz) strelka_vcf = RUN_STRELKA_SINGLE.out.strelka_vcf ch_versions = ch_versions.mix(RUN_STRELKA_SINGLE.out.versions) diff --git a/subworkflows/nf-core/variantcalling/strelka/single/main.nf b/subworkflows/nf-core/variantcalling/strelka/single/main.nf index cf892011cb..d97051e722 100644 --- a/subworkflows/nf-core/variantcalling/strelka/single/main.nf +++ b/subworkflows/nf-core/variantcalling/strelka/single/main.nf @@ -1,11 +1,12 @@ include { CONCAT_VCF as CONCAT_STRELKA } from '../../../../../modules/local/concat_vcf/main' -include { MERGE_VCFS as MERGE_STRELKA } from '../../../../../modules/local/merge_vcfs/main' -include { MERGE_VCFS as MERGE_STRELKA_GENOME } from '../../../../../modules/local/merge_vcfs/main' +include { GATK4_MERGEVCFS as MERGE_STRELKA } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_STRELKA_GENOME } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' include { STRELKA_GERMLINE as STRELKA_SINGLE } from '../../../../../modules/nf-core/modules/strelka/germline/main' workflow RUN_STRELKA_SINGLE { take: cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] + dict // channel: [mandatory] fasta // channel: [mandatory] fasta_fai // channel: [mandatory] intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. @@ -34,8 +35,8 @@ workflow RUN_STRELKA_SINGLE { [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict + ) MERGE_STRELKA_GENOME( strelka_genome_vcf.intervals @@ -45,8 +46,8 @@ workflow RUN_STRELKA_SINGLE { [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict + ) // Mix output channels for "no intervals" and "with intervals" results strelka_vcf = Channel.empty().mix( From a7716a36cff0097ebe5122d2b8581df0665e78a3 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Sat, 28 May 2022 14:13:48 +0000 Subject: [PATCH 04/24] Removing redundant home-brewed nextflow-module with GATK's mergeVcfs. --- modules/local/merge_vcfs/main.nf | 43 -------------------------------- 1 file changed, 43 deletions(-) delete mode 100644 modules/local/merge_vcfs/main.nf diff --git a/modules/local/merge_vcfs/main.nf b/modules/local/merge_vcfs/main.nf deleted file mode 100644 index 5e78f3a3c1..0000000000 --- a/modules/local/merge_vcfs/main.nf +++ /dev/null @@ -1,43 +0,0 @@ -process MERGE_VCFS { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.5.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }" - - input: - tuple val(meta), path(vcf) - path fasta_fai - path target_bed - - output: - tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf - tuple val(meta), path("${prefix}.vcf.gz.tbi"), emit: tbi - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def target_options = target_bed ? "-t ${target_bed}" : "" - - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK ApplyBQSRSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - ls *.vcf.gz > ${prefix}.vcf.list - gatk --java-options "-Xmx${avail_mem}g" MergeVcfs -I ${prefix}.vcf.list -O ${prefix}.vcf.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} From 767d99ceff4a66fd862ebce37edad78efcf3c8f7 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Sat, 28 May 2022 15:25:25 +0000 Subject: [PATCH 05/24] Using GATKs mergeVcfs for mutect2 in somatic variant calling. --- .../main.nf | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf index d95488dfc4..f15c702021 100644 --- a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf @@ -2,8 +2,7 @@ // Run GATK mutect2 in tumor normal mode, getepileupsummaries, calculatecontamination, learnreadorientationmodel and filtermutectcalls // -include { TABIX_BGZIP as BGZIP_VC_MUTECT2 } from '../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_MUTECT2 } from '../../../../modules/local/concat_vcf/main' +include { GATK4_MERGEVCFS as MERGE_MUTECT2 } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../../modules/nf-core/modules/gatk4/calculatecontamination/main' include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../../modules/nf-core/modules/gatk4/filtermutectcalls/main' include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES_NORMAL} from '../../../../modules/nf-core/modules/gatk4/gatherpileupsummaries/main' @@ -63,19 +62,16 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { }.set{ mutect2_f1r2_branch } //Only when using intervals - //Merge Mutect2 VCF - BGZIP_VC_MUTECT2(mutect2_vcf_branch.intervals) - - CONCAT_MUTECT2( - BGZIP_VC_MUTECT2.out.output + MERGE_MUTECT2( + mutect2_vcf_branch.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fai, - intervals_bed_combine_gz) + dict + ) mutect2_vcf = Channel.empty().mix( CONCAT_MUTECT2.out.vcf, @@ -202,8 +198,7 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { FILTERMUTECTCALLS ( ch_filtermutect_in, fasta, fai, dict ) - ch_versions = ch_versions.mix(BGZIP_VC_MUTECT2.out.versions) - ch_versions = ch_versions.mix(CONCAT_MUTECT2.out.versions) + ch_versions = ch_versions.mix(MERGE_MUTECT2.out.versions) ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions) ch_versions = ch_versions.mix(FILTERMUTECTCALLS.out.versions) ch_versions = ch_versions.mix(GETPILEUPSUMMARIES_NORMAL.out.versions) From f8d857ce68da71c588946a242348761b82d215b2 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Sat, 28 May 2022 15:36:39 +0000 Subject: [PATCH 06/24] Using GATKs mergeVcfs for freeBayes vcfs. --- .../local/germline_variant_calling.nf | 2 +- subworkflows/local/pair_variant_calling.nf | 2 +- subworkflows/local/tumor_variant_calling.nf | 2 +- .../nf-core/variantcalling/freebayes/main.nf | 26 +++++++++---------- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index f77ebc9d65..26a0a93408 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -77,7 +77,7 @@ workflow GERMLINE_VARIANT_CALLING { .map{ meta, cram, crai, intervals -> [meta, cram, crai, [], [], intervals] } - RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai, intervals_bed_combine_gz) + RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, dict, fasta, fasta_fai, intervals_bed_combine_gz) freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf index 6d0f1b59b0..e172066dee 100644 --- a/subworkflows/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -92,7 +92,7 @@ workflow PAIR_VARIANT_CALLING { } if (tools.contains('freebayes')){ - RUN_FREEBAYES_SOMATIC(cram_pair_intervals, fasta, fasta_fai, intervals_bed_combine_gz) + RUN_FREEBAYES_SOMATIC(cram_pair_intervals, dict, fasta, fasta_fai, intervals_bed_combine_gz) freebayes_vcf = RUN_FREEBAYES_SOMATIC.out.freebayes_vcf ch_versions = ch_versions.mix(RUN_FREEBAYES_SOMATIC.out.versions) diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 1ea8063958..5ca8cf9560 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -90,7 +90,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING { [meta, cram, crai, [], [], intervals] } - RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai, intervals_bed_combine_gz) + RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, dict, fasta, fasta_fai, intervals_bed_combine_gz) freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) diff --git a/subworkflows/nf-core/variantcalling/freebayes/main.nf b/subworkflows/nf-core/variantcalling/freebayes/main.nf index 2e2eb1f96c..d56ea77822 100644 --- a/subworkflows/nf-core/variantcalling/freebayes/main.nf +++ b/subworkflows/nf-core/variantcalling/freebayes/main.nf @@ -1,12 +1,13 @@ -include { BCFTOOLS_SORT } from '../../../../modules/nf-core/modules/bcftools/sort/main' -include { TABIX_BGZIP as BGZIP_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../../../modules/local/concat_vcf/main' -include { FREEBAYES } from '../../../../modules/nf-core/modules/freebayes/main' -include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/tabix/main' +include { BCFTOOLS_SORT } from '../../../../modules/nf-core/modules/bcftools/sort/main' +include { TABIX_BGZIP as BGZIP_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/bgzip/main' +include { GATK4_MERGEVCFS as MERGE_FREEBAYES } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { FREEBAYES } from '../../../../modules/nf-core/modules/freebayes/main' +include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/tabix/main' workflow RUN_FREEBAYES { take: cram // channel: [mandatory] [meta, cram, crai, [], [], interval] + dict fasta // channel: [mandatory] fasta_fai // channel: [mandatory] intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. @@ -31,10 +32,8 @@ workflow RUN_FREEBAYES { TABIX_VC_FREEBAYES(BCFTOOLS_SORT.out.vcf) // Only when using intervals - BGZIP_VC_FREEBAYES(freebayes_vcf_out.intervals) - - CONCAT_FREEBAYES( - BGZIP_VC_FREEBAYES.out.output + MERGE_FREEBAYES( + freebayes_vcf_out.intervals .map{ meta, vcf -> new_id = meta.tumor_id ? meta.tumor_id + "_vs_" + meta.normal_id : meta.sample @@ -43,12 +42,12 @@ workflow RUN_FREEBAYES { : [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:new_id, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict + ) // Mix output channels for "no intervals" and "with intervals" results freebayes_vcf = Channel.empty().mix( - CONCAT_FREEBAYES.out.vcf, + MERGE_FREEBAYES.out.vcf, freebayes_vcf_out.no_intervals) .map{ meta, vcf -> @@ -60,8 +59,7 @@ workflow RUN_FREEBAYES { } ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_FREEBAYES.out.versions) - ch_versions = ch_versions.mix(CONCAT_FREEBAYES.out.versions) + ch_versions = ch_versions.mix(MERGE_FREEBAYES.out.versions) ch_versions = ch_versions.mix(FREEBAYES.out.versions) ch_versions = ch_versions.mix(TABIX_VC_FREEBAYES.out.versions) From 58fa5703953dce0ec92e67cd5f561db067fb4a9d Mon Sep 17 00:00:00 2001 From: asp8200 Date: Sat, 28 May 2022 15:53:57 +0000 Subject: [PATCH 07/24] Using GATKs mergeVcfs for deepvariant. --- conf/modules.config | 6 +-- .../local/germline_variant_calling.nf | 2 +- .../variantcalling/deepvariant/main.nf | 37 ++++++++----------- .../variantcalling/strelka/single/main.nf | 9 ++--- 4 files changed, 24 insertions(+), 30 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index aab02a2c3a..6910961183 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -484,14 +484,14 @@ process{ } // DEEPVARIANT - withName: 'CONCAT_DEEPVARIANT_.*' { + withName: 'MERGE_DEEPVARIANT_.*' { publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/deepvariant" }, pattern: "*{vcf.gz,vcf.gz.tbi}" ] } - withName: 'CONCAT_DEEPVARIANT_GVCF' { + withName: 'MERGE_DEEPVARIANT_GVCF' { ext.prefix = {"${meta.id}.g"} } withName: 'DEEPVARIANT' { @@ -513,7 +513,7 @@ process{ } // FREEBAYES - withName: 'CONCAT_FREEBAYES' { + withName: 'MERGE_FREEBAYES' { publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/freebayes" }, diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 26a0a93408..cc13d91cd2 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -64,7 +64,7 @@ workflow GERMLINE_VARIANT_CALLING { // DEEPVARIANT if(params.tools.contains('deepvariant')){ - RUN_DEEPVARIANT(cram_recalibrated_intervals, fasta, fasta_fai, intervals_bed_combine_gz) + RUN_DEEPVARIANT(cram_recalibrated_intervals, dict, fasta, fasta_fai, intervals_bed_combine_gz) deepvariant_vcf = RUN_DEEPVARIANT.out.deepvariant_vcf ch_versions = ch_versions.mix(RUN_DEEPVARIANT.out.versions) diff --git a/subworkflows/nf-core/variantcalling/deepvariant/main.nf b/subworkflows/nf-core/variantcalling/deepvariant/main.nf index 0929ea7eca..e44ea5e7ba 100644 --- a/subworkflows/nf-core/variantcalling/deepvariant/main.nf +++ b/subworkflows/nf-core/variantcalling/deepvariant/main.nf @@ -1,16 +1,15 @@ -include { TABIX_BGZIP as BGZIP_VC_DEEPVARIANT_GVCF } from '../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_DEEPVARIANT_VCF } from '../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_DEEPVARIANT_GVCF } from '../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_DEEPVARIANT_VCF } from '../../../../modules/local/concat_vcf/main' -include { DEEPVARIANT } from '../../../../modules/nf-core/modules/deepvariant/main' -include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_GVCF } from '../../../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_VCF } from '../../../../modules/nf-core/modules/tabix/tabix/main' +include { GATK4_MERGEVCFS as MERGE_DEEPVARIANT_GVCF } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_DEEPVARIANT_VCF } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { DEEPVARIANT } from '../../../../modules/nf-core/modules/deepvariant/main' +include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_GVCF } from '../../../../modules/nf-core/modules/tabix/tabix/main' +include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_VCF } from '../../../../modules/nf-core/modules/tabix/tabix/main' //TODO: benchmark if it is better to provide multiple bed files & run on multiple machines + mergeing afterwards || one containing all intervals and run on one larger machine // Deepvariant: https://github.com/google/deepvariant/issues/510 workflow RUN_DEEPVARIANT { take: cram // channel: [mandatory] [meta, cram, crai, interval] + dict // channel: [optional] fasta // channel: [mandatory] fasta_fai // channel: [mandatory] intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. @@ -39,42 +38,38 @@ workflow RUN_DEEPVARIANT { BGZIP_VC_DEEPVARIANT_VCF(deepvariant_vcf_out.intervals) BGZIP_VC_DEEPVARIANT_GVCF(deepvariant_gvcf_out.intervals) - CONCAT_DEEPVARIANT_VCF( - BGZIP_VC_DEEPVARIANT_VCF.out.output + MERGE_DEEPVARIANT_VCF( + deepvariant_vcf_out.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict) - CONCAT_DEEPVARIANT_GVCF( - BGZIP_VC_DEEPVARIANT_GVCF.out.output + MERGE_DEEPVARIANT_GVCF( + deepvariant_gvcf_out.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict) // Mix output channels for "no intervals" and "with intervals" results deepvariant_vcf = Channel.empty().mix( - CONCAT_DEEPVARIANT_GVCF.out.vcf, - CONCAT_DEEPVARIANT_VCF.out.vcf, + MERGE_DEEPVARIANT_GVCF.out.vcf, + MERGE_DEEPVARIANT_VCF.out.vcf, deepvariant_gvcf_out.no_intervals, deepvariant_vcf_out.no_intervals) .map{ meta, vcf -> [[patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"Deepvariant"], vcf] } - ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_GVCF.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_VCF.out.versions) - ch_versions = ch_versions.mix(CONCAT_DEEPVARIANT_GVCF.out.versions) - ch_versions = ch_versions.mix(CONCAT_DEEPVARIANT_VCF.out.versions) + ch_versions = ch_versions.mix(MERGE_DEEPVARIANT_GVCF.out.versions) + ch_versions = ch_versions.mix(MERGE_DEEPVARIANT_VCF.out.versions) ch_versions = ch_versions.mix(DEEPVARIANT.out.versions) ch_versions = ch_versions.mix(TABIX_VC_DEEPVARIANT_GVCF.out.versions) ch_versions = ch_versions.mix(TABIX_VC_DEEPVARIANT_VCF.out.versions) diff --git a/subworkflows/nf-core/variantcalling/strelka/single/main.nf b/subworkflows/nf-core/variantcalling/strelka/single/main.nf index d97051e722..0536905ade 100644 --- a/subworkflows/nf-core/variantcalling/strelka/single/main.nf +++ b/subworkflows/nf-core/variantcalling/strelka/single/main.nf @@ -1,12 +1,11 @@ -include { CONCAT_VCF as CONCAT_STRELKA } from '../../../../../modules/local/concat_vcf/main' -include { GATK4_MERGEVCFS as MERGE_STRELKA } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_STRELKA_GENOME } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { STRELKA_GERMLINE as STRELKA_SINGLE } from '../../../../../modules/nf-core/modules/strelka/germline/main' +include { GATK4_MERGEVCFS as MERGE_STRELKA } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_STRELKA_GENOME } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { STRELKA_GERMLINE as STRELKA_SINGLE } from '../../../../../modules/nf-core/modules/strelka/germline/main' workflow RUN_STRELKA_SINGLE { take: cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] - dict // channel: [mandatory] + dict // channel: [optional] fasta // channel: [mandatory] fasta_fai // channel: [mandatory] intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. From 4c7bb2f745cb1f0c19e383b751edb1c3136d27b7 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Sat, 28 May 2022 17:44:22 +0000 Subject: [PATCH 08/24] Removing redundant calls to BGZIP_VC in deepvariant module. --- subworkflows/nf-core/variantcalling/deepvariant/main.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/subworkflows/nf-core/variantcalling/deepvariant/main.nf b/subworkflows/nf-core/variantcalling/deepvariant/main.nf index e44ea5e7ba..9e799c2a40 100644 --- a/subworkflows/nf-core/variantcalling/deepvariant/main.nf +++ b/subworkflows/nf-core/variantcalling/deepvariant/main.nf @@ -35,8 +35,6 @@ workflow RUN_DEEPVARIANT { TABIX_VC_DEEPVARIANT_GVCF(deepvariant_gvcf_out.no_intervals) // Only when using intervals - BGZIP_VC_DEEPVARIANT_VCF(deepvariant_vcf_out.intervals) - BGZIP_VC_DEEPVARIANT_GVCF(deepvariant_gvcf_out.intervals) MERGE_DEEPVARIANT_VCF( deepvariant_vcf_out.intervals From fe5d8156d06fd22e06e157c65cf2f9d63d7691d1 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Sun, 29 May 2022 06:35:57 +0000 Subject: [PATCH 09/24] Fixing legacy in tumor_normal_somatic_variant_calling --- .../gatk4/tumor_normal_somatic_variant_calling/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf index f15c702021..b0f30883bd 100644 --- a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf @@ -74,11 +74,11 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { ) mutect2_vcf = Channel.empty().mix( - CONCAT_MUTECT2.out.vcf, + MERGE_MUTECT2.out.vcf, mutect2_vcf_branch.no_intervals) mutect2_tbi = Channel.empty().mix( - CONCAT_MUTECT2.out.tbi, + MERGE_MUTECT2.out.tbi, mutect2_tbi_branch.no_intervals) //Merge Muteect2 Stats From 9f6b54155a9c006a32ec1efd47586a9b9dcbb182 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Mon, 30 May 2022 10:53:41 +0000 Subject: [PATCH 10/24] Using mergeVcfs for Haplotypecaller --- conf/modules.config | 4 ++-- .../variantcalling/haplotypecaller/main.nf | 19 +++++++------------ 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 6910961183..c04d4b9fc8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -547,7 +547,7 @@ process{ } // HAPLOTYPECALLER - withName: 'CONCAT_HAPLOTYPECALLER' { + withName: 'MERGE_HAPLOTYPECALLER' { ext.prefix = {"${meta.id}.g"} publishDir = [ enabled: !params.no_intervals, @@ -750,7 +750,7 @@ process{ ] } - withName: 'CONCAT_MUTECT2.*' { + withName: 'MERGE_MUTECT2.*' { publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" }, diff --git a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf index fe7e24fbbf..69fcbdb3a3 100644 --- a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf +++ b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf @@ -1,5 +1,4 @@ -include { TABIX_BGZIP as BGZIP_VC_HAPLOTYPECALLER } from '../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../../../modules/local/concat_vcf/main' +include { GATK4_MERGEVCFS as MERGE_HAPLOTYPECALLER } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' include { GATK4_GENOTYPEGVCFS as GENOTYPEGVCFS } from '../../../../modules/nf-core/modules/gatk4/genotypegvcfs/main' include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../../../modules/nf-core/modules/gatk4/haplotypecaller/main' include { GATK_JOINT_GERMLINE_VARIANT_CALLING } from '../../../../subworkflows/nf-core/gatk4/joint_germline_variant_calling/main' @@ -39,25 +38,22 @@ workflow RUN_HAPLOTYPECALLER { }.set{haplotypecaller_tbi_branch} // Only when using intervals - BGZIP_VC_HAPLOTYPECALLER(haplotypecaller_vcf_branch.intervals) - - CONCAT_HAPLOTYPECALLER( - BGZIP_VC_HAPLOTYPECALLER.out.output + MERGE_HAPLOTYPECALLER( + haplotypecaller_vcf_branch.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, new_meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict) haplotypecaller_vcf = Channel.empty().mix( - CONCAT_HAPLOTYPECALLER.out.vcf, + MERGE_HAPLOTYPECALLER.out.vcf, haplotypecaller_vcf_branch.no_intervals) haplotypecaller_tbi = Channel.empty().mix( - CONCAT_HAPLOTYPECALLER.out.tbi, + MERGE_HAPLOTYPECALLER.out.tbi, haplotypecaller_vcf_branch.no_intervals) // genotype_gvcf_to_call = haplotypecaller_gvcf.join(haplotypecaller_gvcf_tbi) @@ -112,8 +108,7 @@ workflow RUN_HAPLOTYPECALLER { } - ch_versions = ch_versions.mix(BGZIP_VC_HAPLOTYPECALLER.out.versions) - ch_versions = ch_versions.mix(CONCAT_HAPLOTYPECALLER.out.versions) + ch_versions = ch_versions.mix(MERGE_HAPLOTYPECALLER.out.versions) //ch_versions = ch_versions.mix(GENOTYPEGVCFS.out.versions) //ch_versions = ch_versions.mix(GATK_JOINT_GERMLINE_VARIANT_CALLING.out.versions) ch_versions = ch_versions.mix(HAPLOTYPECALLER.out.versions) From 8e54a724d29c5fb76f854bf31dc9543eef055966 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Mon, 30 May 2022 11:05:39 +0000 Subject: [PATCH 11/24] Adding tbi-file to the output from GATK4_MERGEVCFS. This change should also be done in nf-core/modules/modules/gatk4/mergevcfs/main --- modules/nf-core/modules/gatk4/mergevcfs/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/nf-core/modules/gatk4/mergevcfs/main.nf b/modules/nf-core/modules/gatk4/mergevcfs/main.nf index 964c1a3bbb..35930a6e51 100644 --- a/modules/nf-core/modules/gatk4/mergevcfs/main.nf +++ b/modules/nf-core/modules/gatk4/mergevcfs/main.nf @@ -13,6 +13,7 @@ process GATK4_MERGEVCFS { output: tuple val(meta), path('*.vcf.gz'), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi path "versions.yml" , emit: versions when: From 76d7abf8de629581436b12cbe55f1e26b051e9a6 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Mon, 30 May 2022 20:46:03 +0000 Subject: [PATCH 12/24] Using GATKs MergeVcfs for VCF-files from Manta --- conf/modules.config | 12 +-- .../local/germline_variant_calling.nf | 1 + subworkflows/local/pair_variant_calling.nf | 1 + subworkflows/local/tumor_variant_calling.nf | 1 + .../variantcalling/manta/germline/main.nf | 52 +++++-------- .../variantcalling/manta/somatic/main.nf | 73 +++++++------------ .../variantcalling/manta/tumoronly/main.nf | 52 +++++-------- 7 files changed, 74 insertions(+), 118 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index bf9afa7a57..5a887975aa 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -578,20 +578,20 @@ process{ } // MANTA - withName: 'CONCAT_MANTA.*' { + withName: 'MERGE_MANTA.*' { publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/manta" }, pattern: "*{vcf.gz,vcf.gz.tbi}" ] } - withName: 'CONCAT_MANTA_DIPLOID' { + withName: 'MERGE_MANTA_DIPLOID' { ext.prefix = {"${meta.id}.diploid_sv"} } - withName: 'CONCAT_MANTA_SMALL_INDELS' { + withName: 'MERGE_MANTA_SMALL_INDELS' { ext.prefix = {"${meta.id}.candidate_small_indels"} } - withName: 'CONCAT_MANTA_SV' { + withName: 'MERGE_MANTA_SV' { ext.prefix = {"${meta.id}.candidate_sv"} } withName: 'MANTA.*' { @@ -736,7 +736,7 @@ process{ } //MANTA - withName: 'CONCAT_MANTA_TUMOR' { + withName: 'MERGE_MANTA_TUMOR' { ext.prefix = {"${meta.id}.tumor_sv"} } @@ -949,7 +949,7 @@ process{ } //MANTA - withName: 'CONCAT_MANTA_SOMATIC' { + withName: 'MERGE_MANTA_SOMATIC' { ext.prefix = {"${meta.id}.somatic_sv"} } diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index cc13d91cd2..0314705a04 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -102,6 +102,7 @@ workflow GERMLINE_VARIANT_CALLING { // MANTA if (params.tools.contains('manta')){ RUN_MANTA_GERMLINE (cram_recalibrated_intervals_gz_tbi, + dict, fasta, fasta_fai, intervals_bed_combine_gz) diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf index e172066dee..d8fb131a9c 100644 --- a/subworkflows/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -100,6 +100,7 @@ workflow PAIR_VARIANT_CALLING { if (tools.contains('manta')) { RUN_MANTA_SOMATIC( cram_pair_intervals_gz_tbi, + dict, fasta, fasta_fai, intervals_bed_combine_gz) diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index d89232fbe2..7e49b905bd 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -128,6 +128,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING { if (tools.contains('manta')){ RUN_MANTA_TUMORONLY(cram_recalibrated_intervals_gz_tbi, + dict, fasta, fasta_fai, intervals_bed_combine_gz) diff --git a/subworkflows/nf-core/variantcalling/manta/germline/main.nf b/subworkflows/nf-core/variantcalling/manta/germline/main.nf index 4d71dc1250..979306225a 100644 --- a/subworkflows/nf-core/variantcalling/manta/germline/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/germline/main.nf @@ -1,9 +1,6 @@ -include { TABIX_BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_DIPLOID } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_SV } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' include { MANTA_GERMLINE } from '../../../../../modules/nf-core/modules/manta/germline/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. @@ -11,6 +8,7 @@ include { MANTA_GERMLINE } from '../../../../../modu workflow RUN_MANTA_GERMLINE { take: cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] + dict // channel: [optional] fasta // channel: [mandatory] fasta_fai // channel: [mandatory] intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. @@ -38,50 +36,41 @@ workflow RUN_MANTA_GERMLINE { }.set{manta_diploid_sv_vcf} // Only when using intervals - BGZIP_VC_MANTA_SMALL_INDELS(manta_small_indels_vcf.intervals) - - CONCAT_MANTA_SMALL_INDELS( - BGZIP_VC_MANTA_SMALL_INDELS.out.output + MERGE_MANTA_SMALL_INDELS( + manta_small_indels_vcf.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) - - BGZIP_VC_MANTA_SV(manta_sv_vcf.intervals) + dict) - CONCAT_MANTA_SV( - BGZIP_VC_MANTA_SV.out.output + MERGE_MANTA_SV( + manta_sv_vcf.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [ groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) - - BGZIP_VC_MANTA_DIPLOID(manta_diploid_sv_vcf.intervals) + dict) - CONCAT_MANTA_DIPLOID( - BGZIP_VC_MANTA_DIPLOID.out.output + MERGE_MANTA_DIPLOID( + manta_diploid_sv_vcf.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict) // Mix output channels for "no intervals" and "with intervals" results manta_vcf = Channel.empty().mix( - CONCAT_MANTA_DIPLOID.out.vcf, - //CONCAT_MANTA_SMALL_INDELS.out.vcf, - CONCAT_MANTA_SV.out.vcf, + MERGE_MANTA_DIPLOID.out.vcf, + //MERGE_MANTA_SMALL_INDELS.out.vcf, + MERGE_MANTA_SV.out.vcf, manta_diploid_sv_vcf.no_intervals, //manta_small_indels_vcf.no_intervals, manta_sv_vcf.no_intervals) @@ -89,12 +78,9 @@ workflow RUN_MANTA_GERMLINE { [ [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"Manta"], vcf] } - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_DIPLOID.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_SV.out.versions) ch_versions = ch_versions.mix(MANTA_GERMLINE.out.versions) emit: diff --git a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf index c3d108fc7a..db6c247750 100644 --- a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf @@ -1,16 +1,13 @@ -include { TABIX_BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SOMATIC } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_DIPLOID } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_SV } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' include { MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/manta/somatic/main' workflow RUN_MANTA_SOMATIC { take: cram // channel: [mandatory] [meta, normal_cram, normal_crai, tumor_cram, tumor_crai, interval.bed.gz, interval.bed.gz.tbi] + dict // channel: [optional] fasta // channel: [mandatory] fasta_fai // channel: [mandatory] intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. @@ -48,57 +45,45 @@ workflow RUN_MANTA_SOMATIC { }.set{manta_somatic_sv_vcf} //Only when using intervals - BGZIP_VC_MANTA_SV(manta_candidate_small_indels_vcf.intervals) - - CONCAT_MANTA_SV( - BGZIP_VC_MANTA_SV.out.output.map{ meta, vcf -> + MERGE_MANTA_SV( + manta_candidate_small_indels_vcf.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] [ groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) - - BGZIP_VC_MANTA_SMALL_INDELS(manta_candidate_sv_vcf.intervals) + dict) - CONCAT_MANTA_SMALL_INDELS( - BGZIP_VC_MANTA_SMALL_INDELS.out.output.map{ meta, vcf -> + MERGE_MANTA_SMALL_INDELS( + manta_candidate_sv_vcf.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict) - BGZIP_VC_MANTA_DIPLOID(manta_diploid_sv_vcf.intervals) - - CONCAT_MANTA_DIPLOID( - BGZIP_VC_MANTA_DIPLOID.out.output.map{ meta, vcf -> + MERGE_MANTA_DIPLOID( + manta_diploid_sv_vcf.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) - - BGZIP_VC_MANTA_SOMATIC(manta_somatic_sv_vcf.intervals) + dict) - CONCAT_MANTA_SOMATIC( - BGZIP_VC_MANTA_SOMATIC.out.output.map{ meta, vcf -> + MERGE_MANTA_SOMATIC( + manta_somatic_sv_vcf.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict) // Mix output channels for "no intervals" and "with intervals" results manta_vcf = Channel.empty().mix( - //CONCAT_MANTA_SV.out.vcf, - //CONCAT_MANTA_SMALL_INDELS.out.vcf, - CONCAT_MANTA_DIPLOID.out.vcf, - CONCAT_MANTA_SOMATIC.out.vcf, + //MERGE_MANTA_SV.out.vcf, + //MERGE_MANTA_SMALL_INDELS.out.vcf, + MERGE_MANTA_DIPLOID.out.vcf, + MERGE_MANTA_SOMATIC.out.vcf, //manta_candidate_sv_vcf.no_intervals, //manta_candidate_small_indels_vcf.no_intervals, manta_diploid_sv_vcf.no_intervals, @@ -109,7 +94,7 @@ workflow RUN_MANTA_SOMATIC { } manta_candidate_small_indels_vcf = Channel.empty().mix( - CONCAT_MANTA_SMALL_INDELS.out.vcf, + MERGE_MANTA_SMALL_INDELS.out.vcf, manta_candidate_small_indels_vcf.no_intervals ).map{ meta, vcf -> [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:"Manta"], @@ -117,21 +102,17 @@ workflow RUN_MANTA_SOMATIC { } manta_candidate_small_indels_vcf_tbi = Channel.empty().mix( - CONCAT_MANTA_SMALL_INDELS.out.tbi, + MERGE_MANTA_SMALL_INDELS.out.tbi, manta_candidate_small_indels_vcf_tbi.no_intervals ).map{ meta, vcf -> [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:"Manta"], vcf] } - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SOMATIC.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SOMATIC.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_DIPLOID.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_SOMATIC.out.versions) ch_versions = ch_versions.mix(MANTA_SOMATIC.out.versions) emit: diff --git a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf index b0c1bfc807..31aeb7eb10 100644 --- a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf @@ -1,9 +1,6 @@ -include { TABIX_BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_MANTA_TUMOR } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_TUMOR } from '../../../../../modules/local/concat_vcf/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_SV } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_TUMOR } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' include { MANTA_TUMORONLY } from '../../../../../modules/nf-core/modules/manta/tumoronly/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. @@ -11,6 +8,7 @@ include { MANTA_TUMORONLY } from '../../../../../modu workflow RUN_MANTA_TUMORONLY { take: cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] + dict // channel: [optional] fasta // channel: [mandatory] fasta_fai // channel: [mandatory] intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. @@ -38,47 +36,38 @@ workflow RUN_MANTA_TUMORONLY { }.set{manta_tumor_sv_vcf} //Only when using intervals - BGZIP_VC_MANTA_SMALL_INDELS(manta_small_indels_vcf.intervals) - - CONCAT_MANTA_SMALL_INDELS( - BGZIP_VC_MANTA_SMALL_INDELS.out.output.map{ meta, vcf -> + MERGE_MANTA_SMALL_INDELS( + manta_small_indels_vcf.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) - - BGZIP_VC_MANTA_SV(manta_candidate_sv_vcf.intervals) + dict) - CONCAT_MANTA_SV( - BGZIP_VC_MANTA_SV.out.output.map{ meta, vcf -> + MERGE_MANTA_SV( + manta_candidate_sv_vcf.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) - - BGZIP_VC_MANTA_TUMOR(manta_tumor_sv_vcf.intervals) + dict) - CONCAT_MANTA_TUMOR( - BGZIP_VC_MANTA_TUMOR.out.output.map{ meta, vcf -> + MERGE_MANTA_TUMOR( + manta_tumor_sv_vcf.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict) // Mix output channels for "no intervals" and "with intervals" results manta_vcf = Channel.empty().mix( - CONCAT_MANTA_SMALL_INDELS.out.vcf, - CONCAT_MANTA_SV.out.vcf, - CONCAT_MANTA_TUMOR.out.vcf, + MERGE_MANTA_SMALL_INDELS.out.vcf, + MERGE_MANTA_SV.out.vcf, + MERGE_MANTA_TUMOR.out.vcf, manta_small_indels_vcf.no_intervals, manta_candidate_sv_vcf.no_intervals, manta_tumor_sv_vcf.no_intervals @@ -87,12 +76,9 @@ workflow RUN_MANTA_TUMORONLY { vcf] } - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_TUMOR.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_TUMOR.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_TUMOR.out.versions) ch_versions = ch_versions.mix(MANTA_TUMORONLY.out.versions) emit: From 8afe98064790133cb379b5954297a48a448b8ecb Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 31 May 2022 06:40:35 +0000 Subject: [PATCH 13/24] Using GATK4_MERGEVCFS for STRELKA_SNVS and STRELKA_INDELS. Clean up. --- conf/modules.config | 9 ++--- subworkflows/local/pair_variant_calling.nf | 1 + .../main.nf | 18 ++++------ .../nf-core/variantcalling/freebayes/main.nf | 1 - .../variantcalling/strelka/somatic/main.nf | 33 +++++++------------ 5 files changed, 22 insertions(+), 40 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 5a887975aa..f7be5e86f9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -477,11 +477,6 @@ process{ errorStrategy = {task.exitStatus == 141 ? 'retry' : 'terminate'} ext.args = { params.no_intervals ? "-n" : "" } //Why ConcatVCF is never run when no_intervals is set.. } - withName : 'BGZIP_VC_.*' { - publishDir = [ - enabled: false - ] - } // DEEPVARIANT withName: 'MERGE_DEEPVARIANT_.*' { @@ -971,10 +966,10 @@ process{ } //STRELKA - withName: 'CONCAT_STRELKA_INDELS' { + withName: 'MERGE_STRELKA_INDELS' { ext.prefix = {"${meta.id}.somatic_indels"} } - withName: 'CONCAT_STRELKA_SNVS' { + withName: 'MERGE_STRELKA_SNVS' { ext.prefix = {"${meta.id}.somatic_snvs"} } diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf index d8fb131a9c..e7e029e854 100644 --- a/subworkflows/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -137,6 +137,7 @@ workflow PAIR_VARIANT_CALLING { } RUN_STRELKA_SOMATIC(cram_pair_strelka, + dict, fasta, fasta_fai, intervals_bed_combine_gz) diff --git a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf index 5f07d2e147..be311a4094 100644 --- a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf @@ -2,8 +2,7 @@ // Run GATK mutect2 in tumor only mode, getepileupsummaries, calculatecontamination and filtermutectcalls // -include { TABIX_BGZIP as BGZIP_VC_MUTECT2 } from '../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_MUTECT2 } from '../../../../modules/local/concat_vcf/main' +include { GATK4_MERGEVCFS as MERGE_MUTECT2 } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../../modules/nf-core/modules/gatk4/calculatecontamination/main' include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../../modules/nf-core/modules/gatk4/filtermutectcalls/main' include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES } from '../../../../modules/nf-core/modules/gatk4/getpileupsummaries/main' @@ -62,24 +61,22 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { //Only when using intervals //Merge Mutect2 VCF - BGZIP_VC_MUTECT2(mutect2_vcf_branch.intervals) - CONCAT_MUTECT2( - BGZIP_VC_MUTECT2.out.output + MERGE_MUTECT2( + mutect2_vcf_branch.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fai, - intervals_bed_combine_gz) + dict) mutect2_vcf = Channel.empty().mix( - CONCAT_MUTECT2.out.vcf, + MERGE_MUTECT2.out.vcf, mutect2_vcf_branch.no_intervals) mutect2_tbi = Channel.empty().mix( - CONCAT_MUTECT2.out.tbi, + MERGE_MUTECT2.out.tbi, mutect2_tbi_branch.no_intervals) //Merge Mutect2 Stats @@ -150,8 +147,7 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { FILTERMUTECTCALLS ( ch_filtermutect_in, fasta, fai, dict ) - ch_versions = ch_versions.mix(BGZIP_VC_MUTECT2.out.versions) - ch_versions = ch_versions.mix(CONCAT_MUTECT2.out.versions) + ch_versions = ch_versions.mix(MERGE_MUTECT2.out.versions) ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions) ch_versions = ch_versions.mix(FILTERMUTECTCALLS.out.versions) ch_versions = ch_versions.mix(GETPILEUPSUMMARIES.out.versions) diff --git a/subworkflows/nf-core/variantcalling/freebayes/main.nf b/subworkflows/nf-core/variantcalling/freebayes/main.nf index d56ea77822..ed63dbe260 100644 --- a/subworkflows/nf-core/variantcalling/freebayes/main.nf +++ b/subworkflows/nf-core/variantcalling/freebayes/main.nf @@ -1,5 +1,4 @@ include { BCFTOOLS_SORT } from '../../../../modules/nf-core/modules/bcftools/sort/main' -include { TABIX_BGZIP as BGZIP_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/bgzip/main' include { GATK4_MERGEVCFS as MERGE_FREEBAYES } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' include { FREEBAYES } from '../../../../modules/nf-core/modules/freebayes/main' include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/tabix/main' diff --git a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf index e34117bb16..ff01cdb584 100644 --- a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf +++ b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf @@ -1,12 +1,11 @@ -include { TABIX_BGZIP as BGZIP_VC_STRELKA_INDELS } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_STRELKA_SNVS } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_STRELKA_INDELS } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_SNVS } from '../../../../../modules/local/concat_vcf/main' -include { STRELKA_SOMATIC } from '../../../../../modules/nf-core/modules/strelka/somatic/main' +include { GATK4_MERGEVCFS as MERGE_STRELKA_INDELS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_STRELKA_SNVS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { STRELKA_SOMATIC } from '../../../../../modules/nf-core/modules/strelka/somatic/main' workflow RUN_STRELKA_SOMATIC { take: cram // channel: [mandatory] [meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, interval.bed.gz, interval.bed.gz.tbi] manta* are optional + dict // channel: [optional] fasta // channel: [mandatory] fasta_fai // channel: [mandatory] intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. @@ -29,30 +28,24 @@ workflow RUN_STRELKA_SOMATIC { }.set{strelka_vcf_indels} // Only when using intervals - BGZIP_VC_STRELKA_SNVS(strelka_vcf_snvs.intervals) - - CONCAT_STRELKA_SNVS(BGZIP_VC_STRELKA_SNVS.out.output.map{ meta, vcf -> + MERGE_STRELKA_SNVS(strelka_vcf_snvs.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] [groupKey(new_meta, new_meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) - - BGZIP_VC_STRELKA_INDELS(strelka_vcf_indels.intervals) + dict) - CONCAT_STRELKA_INDELS(BGZIP_VC_STRELKA_INDELS.out.output.map{ meta, vcf -> + MERGE_STRELKA_INDELS(strelka_vcf_indels.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] [groupKey(new_meta, new_meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict) // Mix output channels for "no intervals" and "with intervals" results strelka_vcf = Channel.empty().mix( - CONCAT_STRELKA_SNVS.out.vcf, - CONCAT_STRELKA_INDELS.out.vcf, + MERGE_STRELKA_SNVS.out.vcf, + MERGE_STRELKA_INDELS.out.vcf, strelka_vcf_snvs.no_intervals, strelka_vcf_indels.no_intervals) .map{ meta, vcf -> @@ -60,10 +53,8 @@ workflow RUN_STRELKA_SOMATIC { , vcf] } - ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_SNVS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_STRELKA_SNVS.out.versions) - ch_versions = ch_versions.mix(CONCAT_STRELKA_INDELS.out.versions) + ch_versions = ch_versions.mix(MERGE_STRELKA_SNVS.out.versions) + ch_versions = ch_versions.mix(MERGE_STRELKA_INDELS.out.versions) ch_versions = ch_versions.mix(STRELKA_SOMATIC.out.versions) emit: From f36bf8ed3e706376406127597848613a5ef7c356 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 31 May 2022 15:59:40 +0000 Subject: [PATCH 14/24] Removing legacy configuration of CONCAT_ --- conf/modules.config | 8 -------- 1 file changed, 8 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index f7be5e86f9..40c4061ad1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -470,14 +470,6 @@ process { // VARIANT CALLING process{ - // ALL - withName: 'CONCAT_.*' { - // For unknown reasons, CONCAT_VCF sometimes fails with SIGPIPE - // (exit code 141). Rerunning the process will usually work. - errorStrategy = {task.exitStatus == 141 ? 'retry' : 'terminate'} - ext.args = { params.no_intervals ? "-n" : "" } //Why ConcatVCF is never run when no_intervals is set.. - } - // DEEPVARIANT withName: 'MERGE_DEEPVARIANT_.*' { publishDir = [ From 85a1f07a64ab2e5916a98ef47c230ab546f0fa79 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 31 May 2022 16:05:05 +0000 Subject: [PATCH 15/24] Removing the module CONCAT_VCF and the corresponding bash-script --- bin/concatenateVCFs.sh | 107 ------------------------------- modules/local/concat_vcf/main.nf | 35 ---------- 2 files changed, 142 deletions(-) delete mode 100755 bin/concatenateVCFs.sh delete mode 100644 modules/local/concat_vcf/main.nf diff --git a/bin/concatenateVCFs.sh b/bin/concatenateVCFs.sh deleted file mode 100755 index d4a9bff1d1..0000000000 --- a/bin/concatenateVCFs.sh +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# This script concatenates all VCF files that are in the local directory, -# that were created from different intervals to make a single final VCF - -usage() { echo "Usage: $0 [-i genome_index_file] [-o output.file.no.gz.extension] <-t target.bed> <-c cpus> <-n>" 1>&2; exit 1; } - -while [[ $# -gt 0 ]] -do - key=$1 - case $key in - -i) - genomeIndex=$2 - shift # past argument - shift # past value - ;; - -c) - cpus=$2 - shift # past argument - shift # past value - ;; - -o) - outputFile=$2 - shift # past argument - shift # past value - ;; - -t) - targetBED=$2 - shift # past argument - shift # past value - ;; - -n) - noInt=1 - shift # past argument - ;; - *) - usage - shift # past argument - ;; - esac -done - -if [ -z ${genomeIndex} ]; then echo "Missing index file "; usage; fi -if [ -z ${cpus} ]; then echo "No CPUs defined: setting to 1"; cpus=1; fi -if [ -z ${outputFile} ]; then echo "Missing output file name"; usage; fi - -if [ -z ${noInt+x} ] -then - # First make a header from one of the VCF - # Remove interval information from the GATK command-line, but leave the rest - FIRSTVCF=$(set +o pipefail; ls *.vcf | head -n 1) - sed -n '/^[^#]/q;p' $FIRSTVCF | \ - awk '!/GATKCommandLine/{print}/GATKCommandLine/{for(i=1;i<=NF;i++){if($i!~/intervals=/ && $i !~ /out=/){printf("%s ",$i)}}printf("\n")}' \ - > header - - # Get list of contigs from the FASTA index (.fai) - # ##contig header in the VCF cannot be used as it is optional (FreeBayes does not save it, for example) - - CONTIGS=($(cut -f1 ${genomeIndex})) - - #Concatenate VCFs in the correct order - ( - cat header - - for chr in "${CONTIGS[@]}"; do - # Skip if globbing would not match any file to avoid errors such as - # "ls: cannot access chr3_*.vcf: No such file or directory" when chr3 - # was not processed. - pattern="*_${chr}_*.vcf" - if ! compgen -G "${pattern}" > /dev/null ; then continue; fi - - # ls -v sorts by numeric value ("version"), which means that chr1_100_ - # is sorted *after* chr1_99_. - for vcf in $(ls -v ${pattern}); do - # Determine length of header. - # The 'q' command makes sed exit when it sees the first non-header - # line, which avoids reading in the entire file. - L=$(sed -n '/^[^#]/q;p' ${vcf} | wc -l) - - # Then print all non-header lines. Since tail is very fast (nearly as - # fast as cat), this is way more efficient than using a single sed, - # awk or grep command. - tail -n +$((L+1)) ${vcf} - done - done - ) | bgzip -@${cpus} > rawcalls.unsorted.vcf.gz -else - VCF=$(ls no_intervals*.vcf) - cp $VCF rawcalls.unsorted.vcf - bgzip -@${cpus} rawcalls.unsorted.vcf -fi - -bcftools sort -T . rawcalls.unsorted.vcf.gz | bgzip > rawcalls.vcf.gz -tabix -p vcf rawcalls.vcf.gz - -set +u - -# Now we have the concatenated VCF file, check for WES/panel targets, and generate a subset if there is a BED provided -if [ ! -z ${targetBED+x} ]; then - echo "Target is $targetBED - Selecting subset..." - bcftools isec --targets-file ${targetBED} rawcalls.vcf.gz | bgzip -@${cpus} > ${outputFile}.gz - tabix ${outputFile}.gz -else - # Rename the raw calls as WGS results - for f in rawcalls.vcf*; do mv -v $f ${outputFile}${f#rawcalls.vcf}; done -fi diff --git a/modules/local/concat_vcf/main.nf b/modules/local/concat_vcf/main.nf deleted file mode 100644 index 97db0f2a7b..0000000000 --- a/modules/local/concat_vcf/main.nf +++ /dev/null @@ -1,35 +0,0 @@ -process CONCAT_VCF { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::bcftools=1.14" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--hde04aa1_1' : - 'quay.io/biocontainers/bcftools:1.14--hde04aa1_1' }" - - input: - tuple val(meta), path(vcf) - path fasta_fai - path target_bed - - output: - tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf - tuple val(meta), path("${prefix}.vcf.gz.tbi"), emit: tbi - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def target_options = target_bed ? "-t ${target_bed}" : "" - """ - concatenateVCFs.sh -i ${fasta_fai} -c ${task.cpus} -o ${prefix}.vcf ${target_options} $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ -} From ef324948d89e41743962c0b3fd7dde032152d24e Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 31 May 2022 16:09:22 +0000 Subject: [PATCH 16/24] prettier --- modules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules.json b/modules.json index 4fdbddb575..1003ecd3f9 100644 --- a/modules.json +++ b/modules.json @@ -245,4 +245,4 @@ } } } -} \ No newline at end of file +} From 95234616584f3d457492af37747c6ef78ba4463f Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 31 May 2022 17:55:30 +0000 Subject: [PATCH 17/24] Removing tabix/bgzip from modules.json --- modules.json | 3 --- 1 file changed, 3 deletions(-) diff --git a/modules.json b/modules.json index 1003ecd3f9..045aa5b2f5 100644 --- a/modules.json +++ b/modules.json @@ -222,9 +222,6 @@ "strelka/somatic": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, - "tabix/bgzip": { - "git_sha": "37bf3936f3665483d070a5e0e0b314311032af7c" - }, "tabix/bgziptabix": { "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" }, From a125ead856e5ed7d815d8e5f8dd8bda2b5a164d0 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Wed, 1 Jun 2022 20:57:40 +0000 Subject: [PATCH 18/24] Sorting vcf-files from freebayes. (Needed for running mergeVcfs.) --- .../nf-core/variantcalling/freebayes/main.nf | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/subworkflows/nf-core/variantcalling/freebayes/main.nf b/subworkflows/nf-core/variantcalling/freebayes/main.nf index ed63dbe260..3f7171e0b2 100644 --- a/subworkflows/nf-core/variantcalling/freebayes/main.nf +++ b/subworkflows/nf-core/variantcalling/freebayes/main.nf @@ -1,7 +1,8 @@ -include { BCFTOOLS_SORT } from '../../../../modules/nf-core/modules/bcftools/sort/main' -include { GATK4_MERGEVCFS as MERGE_FREEBAYES } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { FREEBAYES } from '../../../../modules/nf-core/modules/freebayes/main' -include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/tabix/main' +include { BCFTOOLS_SORT as BCFTOOLS_SORT_INTERVAL_VCFS } from '../../../../modules/nf-core/modules/bcftools/sort/main' +include { BCFTOOLS_SORT } from '../../../../modules/nf-core/modules/bcftools/sort/main' +include { GATK4_MERGEVCFS as MERGE_FREEBAYES } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { FREEBAYES } from '../../../../modules/nf-core/modules/freebayes/main' +include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/tabix/main' workflow RUN_FREEBAYES { take: @@ -31,8 +32,9 @@ workflow RUN_FREEBAYES { TABIX_VC_FREEBAYES(BCFTOOLS_SORT.out.vcf) // Only when using intervals + BCFTOOLS_SORT_INTERVAL_VCFS(freebayes_vcf_out.intervals) MERGE_FREEBAYES( - freebayes_vcf_out.intervals + BCFTOOLS_SORT_INTERVAL_VCFS.out.vcf .map{ meta, vcf -> new_id = meta.tumor_id ? meta.tumor_id + "_vs_" + meta.normal_id : meta.sample From f6ce15bbecdd0307613877e94049b9dc294b8297 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Thu, 2 Jun 2022 11:08:04 +0000 Subject: [PATCH 19/24] Simplifying code to only use one vcf-sort-operation --- .../nf-core/variantcalling/freebayes/main.nf | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/subworkflows/nf-core/variantcalling/freebayes/main.nf b/subworkflows/nf-core/variantcalling/freebayes/main.nf index 3f7171e0b2..d4d36da41f 100644 --- a/subworkflows/nf-core/variantcalling/freebayes/main.nf +++ b/subworkflows/nf-core/variantcalling/freebayes/main.nf @@ -1,4 +1,3 @@ -include { BCFTOOLS_SORT as BCFTOOLS_SORT_INTERVAL_VCFS } from '../../../../modules/nf-core/modules/bcftools/sort/main' include { BCFTOOLS_SORT } from '../../../../modules/nf-core/modules/bcftools/sort/main' include { GATK4_MERGEVCFS as MERGE_FREEBAYES } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' include { FREEBAYES } from '../../../../modules/nf-core/modules/freebayes/main' @@ -22,19 +21,18 @@ workflow RUN_FREEBAYES { fasta_fai, [], [], []) - FREEBAYES.out.vcf.branch{ + BCFTOOLS_SORT(FREEBAYES.out.vcf) + BCFTOOLS_SORT.out.vcf.branch{ intervals: it[0].num_intervals > 1 no_intervals: it[0].num_intervals <= 1 - }.set{freebayes_vcf_out} + }.set{bcftools_vcf_out} // Only when no intervals - BCFTOOLS_SORT(freebayes_vcf_out.no_intervals) - TABIX_VC_FREEBAYES(BCFTOOLS_SORT.out.vcf) + TABIX_VC_FREEBAYES(bcftools_vcf_out.no_intervals) // Only when using intervals - BCFTOOLS_SORT_INTERVAL_VCFS(freebayes_vcf_out.intervals) MERGE_FREEBAYES( - BCFTOOLS_SORT_INTERVAL_VCFS.out.vcf + bcftools_vcf_out.intervals .map{ meta, vcf -> new_id = meta.tumor_id ? meta.tumor_id + "_vs_" + meta.normal_id : meta.sample @@ -49,7 +47,7 @@ workflow RUN_FREEBAYES { // Mix output channels for "no intervals" and "with intervals" results freebayes_vcf = Channel.empty().mix( MERGE_FREEBAYES.out.vcf, - freebayes_vcf_out.no_intervals) + bcftools_vcf_out.no_intervals) .map{ meta, vcf -> new_id = meta.tumor_id ? meta.tumor_id + "_vs_" + meta.normal_id : meta.sample From ceaa36ee5f52432a742c4eeb4c98ebe4a157077a Mon Sep 17 00:00:00 2001 From: asp8200 Date: Thu, 2 Jun 2022 12:01:35 +0000 Subject: [PATCH 20/24] Updating git-sha for gatk4/mergevcfs --- modules.json | 2 +- modules/nf-core/modules/gatk4/mergevcfs/meta.yml | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/modules.json b/modules.json index 045aa5b2f5..9c660fffdd 100644 --- a/modules.json +++ b/modules.json @@ -148,7 +148,7 @@ "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" }, "gatk4/mergevcfs": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + "git_sha": "4199a05aeb0ec277d40cb112949bb85893310873" }, "gatk4/mutect2": { "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" diff --git a/modules/nf-core/modules/gatk4/mergevcfs/meta.yml b/modules/nf-core/modules/gatk4/mergevcfs/meta.yml index 8d4123d9e5..3ebce0b9e1 100644 --- a/modules/nf-core/modules/gatk4/mergevcfs/meta.yml +++ b/modules/nf-core/modules/gatk4/mergevcfs/meta.yml @@ -35,6 +35,11 @@ output: type: file description: merged vcf file pattern: "*.vcf.gz" + - tbi: + type: file + description: index files for the merged vcf files + pattern: "*.tbi" + - versions: type: file description: File containing software versions From 658c21bccaf81a2e2a2385fb112d0fb30266547d Mon Sep 17 00:00:00 2001 From: asp8200 Date: Thu, 2 Jun 2022 12:13:04 +0000 Subject: [PATCH 21/24] Removing redundant tabix/bgzip-module --- modules/nf-core/modules/tabix/bgzip/main.nf | 34 ---------------- modules/nf-core/modules/tabix/bgzip/meta.yml | 42 -------------------- 2 files changed, 76 deletions(-) delete mode 100644 modules/nf-core/modules/tabix/bgzip/main.nf delete mode 100644 modules/nf-core/modules/tabix/bgzip/meta.yml diff --git a/modules/nf-core/modules/tabix/bgzip/main.nf b/modules/nf-core/modules/tabix/bgzip/main.nf deleted file mode 100644 index 18e83c84d8..0000000000 --- a/modules/nf-core/modules/tabix/bgzip/main.nf +++ /dev/null @@ -1,34 +0,0 @@ -process TABIX_BGZIP { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? 'bioconda::tabix=1.11' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : - 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" - - input: - tuple val(meta), path(input) - - output: - tuple val(meta), path("${prefix}*"), emit: output - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - in_bgzip = input.toString().endsWith(".gz") - command1 = in_bgzip ? '-d' : '-c' - command2 = in_bgzip ? '' : " > ${prefix}.${input.getExtension()}.gz" - """ - bgzip $command1 $args -@${task.cpus} $input $command2 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/tabix/bgzip/meta.yml b/modules/nf-core/modules/tabix/bgzip/meta.yml deleted file mode 100644 index 5007017510..0000000000 --- a/modules/nf-core/modules/tabix/bgzip/meta.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: tabix_bgzip -description: Compresses/decompresses files -keywords: - - compress - - decompress - - bgzip - - tabix -tools: - - bgzip: - description: | - Bgzip compresses or decompresses files in a similar manner to, and compatible with, gzip. - homepage: https://www.htslib.org/doc/tabix.html - documentation: http://www.htslib.org/doc/bgzip.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: file to compress or to decompress -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - output: - type: file - description: Output compressed/decompressed file - pattern: "*." - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" - - "@maxulysse" From 52ed9640bf3432ed17305f4f5f3770945a21c54d Mon Sep 17 00:00:00 2001 From: asp8200 Date: Thu, 2 Jun 2022 13:28:02 +0000 Subject: [PATCH 22/24] Fixig some indentation --- .../nf-core/variantcalling/manta/germline/main.nf | 8 ++++---- subworkflows/nf-core/variantcalling/manta/somatic/main.nf | 2 +- .../nf-core/variantcalling/manta/tumoronly/main.nf | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/subworkflows/nf-core/variantcalling/manta/germline/main.nf b/subworkflows/nf-core/variantcalling/manta/germline/main.nf index 979306225a..0c8bd8962b 100644 --- a/subworkflows/nf-core/variantcalling/manta/germline/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/germline/main.nf @@ -1,7 +1,7 @@ -include { GATK4_MERGEVCFS as MERGE_MANTA_DIPLOID } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_MANTA_SV } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { MANTA_GERMLINE } from '../../../../../modules/nf-core/modules/manta/germline/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_DIPLOID } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_SV } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { MANTA_GERMLINE } from '../../../../../modules/nf-core/modules/manta/germline/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too diff --git a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf index db6c247750..036cab3d48 100644 --- a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf @@ -2,7 +2,7 @@ include { GATK4_MERGEVCFS as MERGE_MANTA_DIPLOID } from '../../../../. include { GATK4_MERGEVCFS as MERGE_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' include { GATK4_MERGEVCFS as MERGE_MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' include { GATK4_MERGEVCFS as MERGE_MANTA_SV } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/manta/somatic/main' +include { MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/manta/somatic/main' workflow RUN_MANTA_SOMATIC { take: diff --git a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf index 31aeb7eb10..658615a21a 100644 --- a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf @@ -1,7 +1,7 @@ include { GATK4_MERGEVCFS as MERGE_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' include { GATK4_MERGEVCFS as MERGE_MANTA_SV } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' include { GATK4_MERGEVCFS as MERGE_MANTA_TUMOR } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { MANTA_TUMORONLY } from '../../../../../modules/nf-core/modules/manta/tumoronly/main' +include { MANTA_TUMORONLY } from '../../../../../modules/nf-core/modules/manta/tumoronly/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too From 5dfef56d6070fa3151f8cac2a8f24ab16c10553d Mon Sep 17 00:00:00 2001 From: asp8200 Date: Thu, 2 Jun 2022 19:57:20 +0000 Subject: [PATCH 23/24] Updating CHANGELOG and README --- CHANGELOG.md | 1 + README.md | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e34199acd6..825ec46e36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- [#571](https://github.com/nf-core/sarek/pull/571) - Including and using GATK4's mergeVcfs. Removing the local module `concat_vcf`. - [#383](https://github.com/nf-core/sarek/pull/383), [#528](https://github.com/nf-core/sarek/pull/528) - Update `CHANGELOG` - [#390](https://github.com/nf-core/sarek/pull/390) - Update `nextflow_schema.json` - [#408](https://github.com/nf-core/sarek/pull/408) - Sync `TEMPLATE` with `tools` `2.0.1` diff --git a/README.md b/README.md index be6f34a6bf..d93929f39d 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,7 @@ We thank the following people for their extensive assistance in the development - [Abhinav Sharma](https://github.com/abhi18av) - [Adrian Lärkeryd](https://github.com/adrlar) - [Alexander Peltzer](https://github.com/apeltzer) +- [Anders Sune Pedersen](https://github.com/asp8200) - [Chela James](https://github.com/chelauk) - [David Mas-Ponte](https://github.com/davidmasp) - [Francesco L](https://github.com/nibscles) From 11d42af84af4b6cffb60c394009765dcbc0d67f7 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Fri, 3 Jun 2022 08:21:47 +0200 Subject: [PATCH 24/24] moving changelog-entry --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 825ec46e36..dfdfe7531c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,7 +29,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- [#571](https://github.com/nf-core/sarek/pull/571) - Including and using GATK4's mergeVcfs. Removing the local module `concat_vcf`. - [#383](https://github.com/nf-core/sarek/pull/383), [#528](https://github.com/nf-core/sarek/pull/528) - Update `CHANGELOG` - [#390](https://github.com/nf-core/sarek/pull/390) - Update `nextflow_schema.json` - [#408](https://github.com/nf-core/sarek/pull/408) - Sync `TEMPLATE` with `tools` `2.0.1` @@ -53,6 +52,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#544](https://github.com/nf-core/sarek/pull/544) - `Mutect2` is no longer compatible with `--no_intervals` - [#551](https://github.com/nf-core/sarek/pull/551) - Sync `TEMPLATE` with `tools` `2.4` - [#563](https://github.com/nf-core/sarek/pull/563) - Updated subway map +- [#571](https://github.com/nf-core/sarek/pull/571) - Including and using GATK4's mergeVcfs. Removing the local module `concat_vcf`. ### Fixed