Skip to content

Commit

Permalink
deepvariant calling
Browse files Browse the repository at this point in the history
  • Loading branch information
kdm9 committed Jan 14, 2024
1 parent 8250857 commit c2fe6c7
Show file tree
Hide file tree
Showing 14 changed files with 161 additions and 11 deletions.
7 changes: 6 additions & 1 deletion acanthophis/template/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ samplesets:
callers:
- mpileup
- freebayes
- deepvariant
# Which short read aligners to use for variant calling? (can be
# more/less/different to the align section above)
aligners:
Expand All @@ -124,7 +125,9 @@ samplesets:
refs:
- lambda

# Which set of filter expressions to use? (see tool_settings section below)
# Which set of filter expressions to use? (see tool_settings section
# below). CRITICAL NOTE: deepvariant calls will not be subject to these
# filters.
filters:
- default

Expand Down Expand Up @@ -152,6 +155,8 @@ samplesets:
# had better luck with bcftools csq, which will be supported here soon.
snpeff: false

# Calling model to use with deepvariant
deepvariant_model: WGS

tool_settings:
# Compression level. This sets a trade-off between compression time and disk
Expand Down
5 changes: 2 additions & 3 deletions acanthophis/template/environment.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
name: acanthophis
channels:
- defaults
- conda-forge
- bioconda
- kdm801
dependencies:
- snakemake
- snakemake=8
- singularity
- natsort
2 changes: 2 additions & 0 deletions acanthophis/template/workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ include: "rules/reads.rules"
include: "rules/align.rules"
include: "rules/denovo.rules"
include: "rules/varcall.rules"
include: "rules/deepvariant.rules"
include: "rules/taxonid.rules"
include: "rules/metagenome.rules"
include: "rules/sampleset.rules"
Expand All @@ -18,6 +19,7 @@ rule all:
rules.all_reads.input,
rules.all_align.input,
rules.all_varcall.input,
rules.all_deepvariant.input,
rules.all_denovo.input,
rules.all_taxonid.input,
rules.all_megahit.input,
Expand Down
9 changes: 9 additions & 0 deletions acanthophis/template/workflow/config.schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ properties:
enum:
- mpileup
- freebayes
- deepvariant
aligners:
type: array
items:
Expand All @@ -178,6 +179,14 @@ properties:
snpeff:
type: boolean
default: false
deepvariant_model:
type: string
enum:
- WGS
- WES
- ONT_R104
- HYBRID_PACBIO_ILLUMINA
- PACBIO
tool_settings:
type: object
properties:
Expand Down
3 changes: 3 additions & 0 deletions acanthophis/template/workflow/rules/base.rules
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,11 @@ wildcard_constraints:
lib="[^/~]+",
aligner="[^/~]+",
sample="[^/~]+",
sampleset="[^/~]+",
ref="[^/~]+",
type="[^/~]+",

resource_scopes:
runtime="local",

container: "docker://quay.io/condaforge/miniforge3"
110 changes: 110 additions & 0 deletions acanthophis/template/workflow/rules/deepvariant.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@

rule deepvariant_gvcf:
input:
bam=T("alignments/samples/{aligner}~{ref}~{sample}.bam"),
bai=T("alignments/samples/{aligner}~{ref}~{sample}.bam.bai"),
ref=lambda wc: R(config["data_paths"]["references"][wc.ref]["fasta"], keep_local=True),
output:
vcf=T("deepvariant/{aligner}~{ref}~{sampleset}/{sample}.vcf.gz"),
gvcf=T("deepvariant/{aligner}~{ref}~{sampleset}/{sample}.g.vcf.gz"),
log:
T("deepvariant/{aligner}~{ref}~{sampleset}/{sample}_examples.log"),
#conda:
# "envs/deepvariant.yml",
container:
"docker://google/deepvariant:1.6.0"
params:
model=lambda wc: config["samplesets"][wc.sampleset]["varcall"].get("deepvariant_model", "WGS"),
extra="",
tmp_dir=lambda wc: T(f"deepvariant/{wc.aligner}~{wc.ref}~{wc.sampleset}/{wc.sample}.workdir"),
threads: 32
shadow: "shallow"
resources: **rule_resources(config, "deepvariant_gvcf", runtime=180, mem_gb=64, cores=32)
shell:
"( mkdir -p {params.tmp_dir}"
" && /opt/deepvariant/bin/run_deepvariant"
" --model_type={params.model}"
" --ref={input.ref}"
" --make_examples_extra_args 'normalize_reads=true'"
" --reads={input.bam}"
" --output_vcf={output.vcf}"
" --output_gvcf={output.gvcf}"
" --intermediate_results_dir={params.tmp_dir}"
" --num_shards={threads}"
") &> {log}"
#"( mkdir -p {params.tmp_dir}"
#" && dv_make_examples.py"
#" --cores {threads}"
#" --ref {input.ref}"
#" --reads {input.bam}"
#" --sample {wildcards.sample}"
#" --examples {params.tmp_dir}"
#" --logdir {params.tmp_dir}"
#" --gvcf {params.tmp_dir}"
#" {params.extra}"
#" && dv_call_variants.py"
#" --cores {threads}"
#" --outfile {params.tmp_dir}/{wc.sample}.calls"
#" --sample {wildcards.sample} "
#" --examples {params.tmp_dir}"
#" --model {params.model}"
#"&& dv_postprocess_variants.py "
#" --ref {input.ref} "
#" --gvcf_infile {params.tmp_dir}/{wc.sample}.gvcf.tfrecord@{threads}.gz"
#" --gvcf_outfile {output.gvcf} "
#" --infile {params.tmp_dir}/{wc.sample}.calls"
#" --outfile {output.vcf}"
#") &> {log}"


localrules: glnexus_fofn
rule glnexus_fofn:
input:
gvcf=lambda wc: T(expand("deepvariant/{aligner}~{ref}~{sampleset}/{sample}.g.vcf.gz",
aligner=wc.aligner, ref=wc.ref, sampleset=wc.sampleset,
sample=config["SAMPLESETS"][wc.sampleset])),
output:
T("deepvariant/{aligner}~{ref}~{sampleset}.gvcf_fofn.txt"),
run:
with open(output[0], "w") as fh:
for gvcf in input:
print(gvcf, file=fh)

rule glnexus_call:
input:
gvcf=lambda wc: T(expand("deepvariant/{aligner}~{ref}~{sampleset}/{sample}.g.vcf.gz",
aligner=wc.aligner, ref=wc.ref, sampleset=wc.sampleset,
sample=config["SAMPLESETS"][wc.sampleset])),
fofn=T("deepvariant/{aligner}~{ref}~{sampleset}.gvcf_fofn.txt"),
output:
vcf=T("deepvariant/{aligner}~{ref}~{sampleset}.vcf.gz"),
log:
T("deepvariant/{aligner}~{ref}~{sampleset}.vcf.gz.log"),
conda:
"envs/glnexus.yml",
#container:
# "docker://ghcr.io/dnanexus-rnd/glnexus:v1.4.1"
shadow: "shallow"
resources: **rule_resources(config, "glnexus_call", runtime=180, mem_gb=128, cores=128)
shell:
"( glnexus_cli"
" --config DeepVariant"
" --list"
" {input.fofn}"
" | bcftools view -Oz8 --threads {threads} -o {output.vcf}"
") &> {log}"



#######################################################################
# Target Rules #
#######################################################################
rule all_deepvariant:
input:
[T(expand("deepvariant/{aligner}~{ref}~{sampleset}.vcf.gz",
aligner=config["samplesets"][sampleset]["varcall"]["aligners"],
ref=config["samplesets"][sampleset]["varcall"]["refs"],
sampleset=sampleset))
for sampleset in config["samplesets"]
if "deepvariant" in config["samplesets"][sampleset].get("varcall", {}).get("callers", [])
],
6 changes: 6 additions & 0 deletions acanthophis/template/workflow/rules/envs/deepvariant.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- defaults
- conda-forge
- bioconda
dependencies:
- deepvariant=1.5
7 changes: 7 additions & 0 deletions acanthophis/template/workflow/rules/envs/glnexus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
channels:
- conda-forge
- bioconda
dependencies:
- glnexus=1.4
- bcftools

3 changes: 2 additions & 1 deletion acanthophis/template/workflow/rules/varcall.rules
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ rule all_filtered_variants:
[P(expand("variants/final/{caller}~{aligner}~{ref}~{sampleset}~filtered-{filter}.{ext}",
ext=["bcf", "bcf.csi", "vcf.gz", "vcf.gz.csi", "vcf.gz.stats"] if config["tool_settings"].get("varcall", {}).get("make_bcfs", False)
else ["vcf.gz", "vcf.gz.csi", "vcf.gz.stats"],
caller=config["samplesets"][sampleset]["varcall"]["callers"],
caller=filter(lambda x: x in ["mpileup", "freebayes"], config["samplesets"][sampleset]["varcall"]["callers"]),
aligner=config["samplesets"][sampleset]["varcall"]["aligners"],
ref=config["samplesets"][sampleset]["varcall"]["refs"],
filter=config["samplesets"][sampleset]["varcall"]["filters"],
Expand All @@ -406,3 +406,4 @@ rule all_filtered_variants:
rule all_varcall:
input:
rules.all_filtered_variants.input,
rules.all_deepvariant.input,
4 changes: 4 additions & 0 deletions tests/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@ rawdata
!rawdata/lambda/
.snakemake
tmp/
!tmp/nobackup
output/
!output/nobackup
workflow
config.yml
environment.yml
.*
!.gitignore
Empty file removed tests/output/nobackup
Empty file.
2 changes: 1 addition & 1 deletion tests/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ mkdir -p output/ tmp/ rawdata/
touch output/nobackup tmp/nobackup rawdata/nobackup
pip install -e ../
set -xeuo pipefail
snakemake --snakefile Snakefile.generate-rawdata -j 4 --use-conda --conda-frontend mamba
snakemake --snakefile Snakefile.generate-rawdata -j 4 --sofware-distribution-method conda
tree rawdata
14 changes: 9 additions & 5 deletions tests/test.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
#!/bin/bash
conda activate acanthophis-tests
set -xeuo pipefail
python3 -m pip install ..
set -euo pipefail
mamba activate acanthophis-tests
python3 -m pip uninstall --yes acanthophis
python3 -m pip install -e ..
acanthophis-init --yes
mamba env update -f environment.yml
conda activate acanthophis
mamba activate acanthophis
rm -fr output tmp
snakemake -j 2 --use-conda --ri "${@}"
set -x
which snakemake
snakemake --version
snakemake -j $(nproc 2>/dev/null || echo 2) --software-deployment-method conda apptainer --ri "${@}"
Empty file removed tests/tmp/nobackup
Empty file.

0 comments on commit c2fe6c7

Please sign in to comment.