Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dsl2 module for deepvariant #394

Merged
merged 8 commits into from
Jul 15, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ Main authors:

Helpful contributors:

* [Abhinav Sharma](https://github.com/abhi18av)
* [Adrian Lärkeryd](https://github.com/adrlar)
* [Alexander Peltzer](https://github.com/apeltzer)
* [Chela James](https://github.com/chelauk)
Expand Down
1 change: 1 addition & 0 deletions bin/scrape_software_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"FastQC": ["v_fastqc.txt", r"FastQC v(\S+)"],
"FreeBayes": ["v_freebayes.txt", r"version: v(\d\.\d\.\d+)"],
"GATK": ["v_gatk.txt", r"Version:(\S+)"],
"DeepVariant": ["v_deepvariant.txt", r"Version:(\S+)"],
"htslib": ["v_samtools.txt", r"htslib (\S+)"],
"Manta": ["v_manta.txt", r"([0-9.]+)"],
"MultiQC": ["v_multiqc.txt", r"multiqc, version (\S+)"],
Expand Down
7 changes: 7 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,13 @@ params {
publish_dir = 'variant_calling'
publish_files = ['vcf.gz':'strelka', 'vcf.gz.tbi':'strelka']
}
'deepvariant' {
args = '--model_type=WGS'
publish_by_meta = true
publish_dir = 'variant_calling'
publish_files = false
}

// TUMOR_VARIANT_CALLING

// PAIR_VARIANT_CALLING
Expand Down
2 changes: 1 addition & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ profiles {
tool_pair {
params.input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/tiny-recal-pair-https.csv'
params.step = 'variant_calling'
}
}
trimming {
params.clip_r1 = 1
params.clip_r2 = 1
Expand Down
70 changes: 70 additions & 0 deletions modules/local/deepvariant/functions.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
maxulysse marked this conversation as resolved.
Show resolved Hide resolved
* -----------------------------------------------------
* Utility functions used in nf-core DSL2 module files
* -----------------------------------------------------
*/

/*
* Extract name of software tool from process name using $task.process
*/
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}

/*
* Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
*/
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}

/*
* Tidy up and join elements of a list to return a path string
*/
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}

/*
* Function to save/publish module results
*/
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
}
61 changes: 61 additions & 0 deletions modules/local/deepvariant/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// TODO Remove the module declaration
nextflow.enable.dsl=2

// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'

// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
// All other parameters MUST be provided as a string i.e. "options.args"
// where "params.options" is a Groovy Map that MUST be provided via the addParams section of the including workflow.
// Any parameters that need to be evaluated in the context of a particular sample
// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.

params.options = [:]
options = initOptions(params.options)

process DEEPVARIANT {
tag "$meta.id"
label 'process_high'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }

conda (params.enable_conda ? "bioconda::deepvariant=1.1.0" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/deepvariant:1.1.0--py36hf3e76ba_2"
} else {
// TODO update the bioconda container to work with run_deepvariant.sh script
// container "quay.io/biocontainers/deepvariant:1.1.0--py36hf3e76ba_2"
container "google/deepvariant:1.1.0"
}

input:
// TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
// MUST be provided as an input via a Groovy Map called "meta".
// This information may not be required in some instances e.g. indexing reference genome files:
// https://github.com/nf-core/modules/blob/master/software/bwa/index/main.nf
tuple val(meta), path(bam), path(bai)
path fasta
path fai

output:
tuple val(meta), path("*.vcf.gz"), emit: vcf
tuple val(meta), path("*g.vcf.gz"), emit: gvcf
path "*.version.txt" , emit: version

script:
def software = getSoftwareName(task.process)
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
"""
/opt/deepvariant/bin/run_deepvariant \\
--ref=${fasta} \\
--reads=${bam} \\
--output_vcf=${prefix}.vcf.gz \\
--output_gvcf=${prefix}.g.vcf.gz \\
--num_shards=${task.cpus} \\
${options.args}

echo \$(/opt/deepvariant/bin/run_deepvariant --version) > ${software}.version.txt
"""

}
13 changes: 13 additions & 0 deletions subworkflows/local/germline_variant_calling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
*/

params.haplotypecaller_options = [:]
params.deepvariant_options = [:]
params.genotypegvcf_options = [:]
params.concat_gvcf_options = [:]
params.concat_haplotypecaller_options = [:]
params.strelka_options = [:]

include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/nf-core/software/gatk4/haplotypecaller/main' addParams(options: params.haplotypecaller_options)
include { DEEPVARIANT } from '../../modules/local/deepvariant/main' addParams(options: params.deepvariant_options)
include { GATK4_GENOTYPEGVCF as GENOTYPEGVCF } from '../../modules/nf-core/software/gatk4/genotypegvcf/main' addParams(options: params.genotypegvcf_options)
include { CONCAT_VCF as CONCAT_GVCF } from '../../modules/local/concat_vcf/main' addParams(options: params.concat_gvcf_options)
include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../modules/local/concat_vcf/main' addParams(options: params.concat_haplotypecaller_options)
Expand Down Expand Up @@ -94,6 +96,17 @@ workflow GERMLINE_VARIANT_CALLING {
haplotypecaller_vcf = CONCAT_HAPLOTYPECALLER.out.vcf
}

if ('deepvariant' in params.tools.toLowerCase()) {
maxulysse marked this conversation as resolved.
Show resolved Hide resolved

DEEPVARIANT(
bam,
maxulysse marked this conversation as resolved.
Show resolved Hide resolved
fasta,
fai)

haplotypecaller_vcf = DEEPVARIANT.out.vcf
haplotypecaller_gvcf = DEEPVARIANT.out.gvcf
maxulysse marked this conversation as resolved.
Show resolved Hide resolved
}

if ('strelka' in params.tools.toLowerCase()) {
STRELKA(
bam,
Expand Down
2 changes: 1 addition & 1 deletion workflows/sarek.nf
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ workflow SAREK {
msisensorpro_scan = BUILD_INDICES.out.msisensorpro_scan
target_bed_gz_tbi = BUILD_INDICES.out.target_bed_gz_tbi

// PREPREOCESSING
// PREPROCESSING

bam_mapped = Channel.empty()
bam_mapped_qc = Channel.empty()
Expand Down