Skip to content

Commit

Permalink
feat: make germline variants pon optional for mutect2 (#375)
Browse files Browse the repository at this point in the history
  • Loading branch information
ericblanc20 authored Feb 20, 2023
1 parent fad1ad6 commit 30bc591
Show file tree
Hide file tree
Showing 21 changed files with 1,381 additions and 918 deletions.
103 changes: 51 additions & 52 deletions snappy_pipeline/workflows/somatic_variant_calling/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -72,58 +72,57 @@ rule somatic_variant_calling_mutect_run:

# Run MuTect 2 ----------------------------------------------------------------


rule somatic_variant_calling_mutect2_pileup_normal:
input:
unpack(wf.get_input_files("mutect2", "pileup_normal")),
output:
**wf.get_output_files("mutect2", "pileup_normal"),
threads: wf.get_resource("mutect2", "pileup_normal", "threads")
resources:
time=wf.get_resource("mutect2", "pileup_normal", "time"),
memory=wf.get_resource("mutect2", "pileup_normal", "memory"),
partition=wf.get_resource("mutect2", "pileup_normal", "partition"),
tmpdir=wf.get_resource("mutect2", "pileup_normal", "tmpdir"),
log:
**wf.get_log_file("mutect2", "pileup_normal"),
params:
normal_lib_name=wf.substep_getattr("mutect2", "get_normal_lib_name"),
wrapper:
wf.wrapper_path("mutect2/pileup")


rule somatic_variant_calling_mutect2_pileup_tumor:
input:
unpack(wf.get_input_files("mutect2", "pileup_tumor")),
output:
**wf.get_output_files("mutect2", "pileup_tumor"),
threads: wf.get_resource("mutect2", "pileup_tumor", "threads")
resources:
time=wf.get_resource("mutect2", "pileup_tumor", "time"),
memory=wf.get_resource("mutect2", "pileup_tumor", "memory"),
partition=wf.get_resource("mutect2", "pileup_tumor", "partition"),
tmpdir=wf.get_resource("mutect2", "pileup_tumor", "tmpdir"),
log:
**wf.get_log_file("mutect2", "pileup_tumor"),
wrapper:
wf.wrapper_path("mutect2/pileup")


rule somatic_variant_calling_mutect2_contamination:
input:
unpack(wf.get_input_files("mutect2", "contamination")),
output:
**wf.get_output_files("mutect2", "contamination"),
threads: wf.get_resource("mutect2", "contamination", "threads")
resources:
time=wf.get_resource("mutect2", "contamination", "time"),
memory=wf.get_resource("mutect2", "contamination", "memory"),
partition=wf.get_resource("mutect2", "contamination", "partition"),
tmpdir=wf.get_resource("mutect2", "contamination", "tmpdir"),
log:
**wf.get_log_file("mutect2", "contamination"),
wrapper:
wf.wrapper_path("mutect2/contamination")
if config["step_config"]["somatic_variant_calling"]["mutect2"]["common_variants"]:

rule somatic_variant_calling_mutect2_pileup_normal:
input:
unpack(wf.get_input_files("mutect2", "pileup_normal")),
output:
**wf.get_output_files("mutect2", "pileup_normal"),
threads: wf.get_resource("mutect2", "pileup_normal", "threads")
resources:
time=wf.get_resource("mutect2", "pileup_normal", "time"),
memory=wf.get_resource("mutect2", "pileup_normal", "memory"),
partition=wf.get_resource("mutect2", "pileup_normal", "partition"),
tmpdir=wf.get_resource("mutect2", "pileup_normal", "tmpdir"),
log:
**wf.get_log_file("mutect2", "pileup_normal"),
params:
normal_lib_name=wf.substep_getattr("mutect2", "get_normal_lib_name"),
wrapper:
wf.wrapper_path("mutect2/pileup")

rule somatic_variant_calling_mutect2_pileup_tumor:
input:
unpack(wf.get_input_files("mutect2", "pileup_tumor")),
output:
**wf.get_output_files("mutect2", "pileup_tumor"),
threads: wf.get_resource("mutect2", "pileup_tumor", "threads")
resources:
time=wf.get_resource("mutect2", "pileup_tumor", "time"),
memory=wf.get_resource("mutect2", "pileup_tumor", "memory"),
partition=wf.get_resource("mutect2", "pileup_tumor", "partition"),
tmpdir=wf.get_resource("mutect2", "pileup_tumor", "tmpdir"),
log:
**wf.get_log_file("mutect2", "pileup_tumor"),
wrapper:
wf.wrapper_path("mutect2/pileup")

rule somatic_variant_calling_mutect2_contamination:
input:
unpack(wf.get_input_files("mutect2", "contamination")),
output:
**wf.get_output_files("mutect2", "contamination"),
threads: wf.get_resource("mutect2", "contamination", "threads")
resources:
time=wf.get_resource("mutect2", "contamination", "time"),
memory=wf.get_resource("mutect2", "contamination", "memory"),
partition=wf.get_resource("mutect2", "contamination", "partition"),
tmpdir=wf.get_resource("mutect2", "contamination", "tmpdir"),
log:
**wf.get_log_file("mutect2", "contamination"),
wrapper:
wf.wrapper_path("mutect2/contamination")


rule somatic_variant_calling_mutect2_run:
Expand Down
75 changes: 42 additions & 33 deletions snappy_pipeline/workflows/somatic_variant_calling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@
#: Names of the files to create for the extension
EXT_NAMES = ("vcf", "vcf_tbi", "vcf_md5", "vcf_tbi_md5")

EXTS_MATCHED = {
EXT_MATCHED = {
"mutect": {
"vcf": ".vcf.gz",
"vcf_md5": ".vcf.gz.md5",
Expand All @@ -127,6 +127,7 @@
"full_vcf_tbi": ".full.vcf.gz.tbi",
"full_vcf_tbi_md5": ".full.vcf.gz.tbi.md5",
"tar": ".tar.gz",
"tar_md5": ".tar.gz.md5",
},
"mutect2": {
"vcf": ".vcf.gz",
Expand All @@ -137,10 +138,6 @@
"full_md5": ".full.vcf.gz.md5",
"full_vcf_tbi": ".full.vcf.gz.tbi",
"full_vcf_tbi_md5": ".full.vcf.gz.tbi.md5",
"stats": ".vcf.stats",
"stats_md5": ".vcf.stats.md5",
"f1r2": ".f1r2_tar.tar.gz",
"f1r2_md5": ".f1r2_tar.tar.gz.md5",
},
}

Expand Down Expand Up @@ -220,8 +217,15 @@
# Configuration for MuTect 2
mutect2:
panel_of_normals: '' # Set path to panel of normals vcf if required
germline_resource: REQUIRED # Germline variants resource (same as panel of normals)
common_variants: REQUIRED # Common germline variants for contamination estimation
germline_resource: '' # Germline variants resource (same as panel of normals)
common_variants: '' # Common germline variants for contamination estimation
extra_arguments: [] # List additional Mutect2 arguments
# Each additional argument xust be in the form:
# "--<argument name> <argument value>"
# For example, to filter reads prior to calling & to
# add annotations to the output vcf:
# - "--read-filter CigarContainsNoNOperator"
# - "--annotation AssemblyComplexity BaseQuality"
# Parallelization configuration
num_cores: 2 # number of cores to use locally
window_length: 50000000 # split input into windows of this size, each triggers a job
Expand Down Expand Up @@ -440,7 +444,7 @@ def check_config(self):

def get_output_files(self, action):
output_files = {}
for k, v in EXTS_MATCHED[self.name].items():
for k, v in EXT_MATCHED[self.name].items():
output_files[k] = self.base_path_out.format(var_caller=self.name, ext=v)
return output_files

Expand Down Expand Up @@ -480,7 +484,7 @@ class Mutect2StepPart(MutectBaseStepPart):
name = "mutect2"

#: Class available actions
actions = ("run", "filter", "contamination", "pileup_normal", "pileup_tumor")
actions = ["run", "filter"] # "contamination", "pileup_normal", "pileup_tumor")

#: Class resource usage dictionary. Key: action (string); Value: resource (ResourceUsage).
resource_usage_dict = {
Expand Down Expand Up @@ -521,6 +525,8 @@ def check_config(self):
("static_data_config", "reference", "path"),
"Path to reference FASTA not configured but required for %s" % (self.name,),
)
if self.config[self.name]["common_variants"]:
self.actions.extend(["contamination", "pileup_normal", "pileup_tumor"])

def get_input_files(self, action):
"""Return input function for Mutect2 rules.
Expand Down Expand Up @@ -563,8 +569,7 @@ def _get_input_files_run(self, wildcards):
"tumor_bai": ngs_mapping(tumor_base_path + ".bam.bai"),
}

@staticmethod
def _get_input_files_filter(wildcards):
def _get_input_files_filter(self, wildcards):
"""Get input files for rule ``filter``.
:param wildcards: Snakemake wildcards associated with rule, namely: 'mapper' (e.g., 'bwa')
Expand All @@ -578,13 +583,15 @@ def _get_input_files_filter(wildcards):
**wildcards
)
)
return {
input_files = {
"raw": base_path + ".raw.vcf.gz",
"stats": base_path + ".raw.vcf.stats",
"f1r2": base_path + ".raw.f1r2_tar.tar.gz",
"table": base_path + ".contamination.tbl",
"segments": base_path + ".segments.tbl",
}
if "contamination" in self.actions:
input_files["table"] = base_path + ".contamination.tbl"
input_files["segments"] = base_path + ".segments.tbl"
return input_files

def _get_input_files_pileup_normal(self, wildcards):
"""Get input files for rule ``pileup_normal``.
Expand Down Expand Up @@ -766,6 +773,7 @@ def get_output_files(self, action):
somatic_ext_names = expand("full_{name}", name=EXT_NAMES)
somatic_ext_values = expand(".full{ext}", ext=EXT_VALUES)
result["tar"] = self.base_path_out.format(var_caller="scalpel", ext=".tar.gz")
result["tar_md5"] = self.base_path_out.format(var_caller="scalpel", ext=".tar.gz.md5")
result.update(
dict(
zip(
Expand Down Expand Up @@ -1130,25 +1138,26 @@ def get_result_files(self):
We will process all NGS libraries of all bio samples in all sample sheets.
"""
name_pattern = "{mapper}.{caller}.{tumor_library.name}"
yield from self._yield_result_files_matched(
os.path.join("output", name_pattern, "out", name_pattern + "{ext}"),
mapper=self.w_config["step_config"]["ngs_mapping"]["tools"]["dna"],
caller=set(self.config["tools"]) & set(SOMATIC_VARIANT_CALLERS_MATCHED),
ext=EXT_VALUES,
)
yield from self._yield_result_files_matched(
os.path.join("output", name_pattern, "log", name_pattern + "{ext}"),
mapper=self.w_config["step_config"]["ngs_mapping"]["tools"]["dna"],
caller=set(self.config["tools"]) & set(SOMATIC_VARIANT_CALLERS_MATCHED),
ext=(
".log",
".log.md5",
".conda_info.txt",
".conda_info.txt.md5",
".conda_list.txt",
".conda_list.txt.md5",
),
)
for caller in set(self.config["tools"]) & set(SOMATIC_VARIANT_CALLERS_MATCHED):
yield from self._yield_result_files_matched(
os.path.join("output", name_pattern, "out", name_pattern + "{ext}"),
mapper=self.w_config["step_config"]["ngs_mapping"]["tools"]["dna"],
caller=caller,
ext=EXT_MATCHED[caller].values() if caller in EXT_MATCHED else EXT_VALUES,
)
yield from self._yield_result_files_matched(
os.path.join("output", name_pattern, "log", name_pattern + "{ext}"),
mapper=self.w_config["step_config"]["ngs_mapping"]["tools"]["dna"],
caller=caller,
ext=(
".log",
".log.md5",
".conda_info.txt",
".conda_info.txt.md5",
".conda_list.txt",
".conda_list.txt.md5",
),
)
# Panel of normals
# joint calling
name_pattern = "{mapper}.{caller}.{donor.name}"
Expand Down
Loading

0 comments on commit 30bc591

Please sign in to comment.