From 9690f12e6725447e035903d3b4eecf31d01b1576 Mon Sep 17 00:00:00 2001 From: Thomas Battenfeld <46334240+thomasbtf@users.noreply.github.com> Date: Sun, 19 Dec 2021 18:32:11 +0100 Subject: [PATCH] fix: pangolin input for overview table, bamclipper samtools error, include flag for high quality genome report (#418) * fixes from production * fmt --- workflow/envs/bamclipper.yaml | 1 + workflow/rules/common.smk | 26 +++++++++++++++++++++++++- workflow/rules/generate_output.smk | 4 +--- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/workflow/envs/bamclipper.yaml b/workflow/envs/bamclipper.yaml index f88e6e2e9..c0cc02571 100644 --- a/workflow/envs/bamclipper.yaml +++ b/workflow/envs/bamclipper.yaml @@ -3,3 +3,4 @@ channels: - conda-forge dependencies: - bamclipper =1.0 + - samtools =1.9 # see https://github.com/merenlab/anvio/issues/1479 diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 8c08b4520..77e8f233c 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -1273,7 +1273,10 @@ def get_include_flag(sample): def get_include_flag_for_date(wildcards): - return [get_include_flag(sample) for sample in get_samples_for_date(wildcards.date)] + return [ + get_include_flag(sample) + for sample in get_assemblies_for_submission(wildcards, "accepted samples") + ] def get_artic_primer(wildcards): @@ -1541,6 +1544,27 @@ def get_aggregated_pangolin_calls(wildcards, return_list="paths"): return expanded_patterns +def get_pangolin_for_report(wildcards): + paths = [] + + path = "results/{date}/tables/strain-calls/{sample}.{stage}.strains.pangolin.csv" + + for entry in get_assemblies_for_submission(wildcards, "all samples"): + sample, assembly = entry.split(",") + if assembly == "normal": + pango_stage = "polished" + elif assembly == "pseudo": + pango_stage = "pseudo" + elif assembly == "consensus": + pango_stage = "consensus" + elif assembly == "not-accepted": + pango_stage = "polished" + + paths.append(path.format(sample=sample, date=wildcards.date, stage=pango_stage)) + + return paths + + wildcard_constraints: sample="[^/.]+", vartype="|".join(VARTYPES), diff --git a/workflow/rules/generate_output.smk b/workflow/rules/generate_output.smk index e6fb8232d..26099a51e 100644 --- a/workflow/rules/generate_output.smk +++ b/workflow/rules/generate_output.smk @@ -134,9 +134,7 @@ rule overview_table_csv: pseudo_contigs=get_fallbacks_for_report("pseudo"), consensus_contigs=get_fallbacks_for_report("consensus"), kraken=get_kraken_output, - pangolin=expand_samples_for_date( - "results/{{date}}/tables/strain-calls/{sample}.polished.strains.pangolin.csv", - ), + pangolin=get_pangolin_for_report, bcf=expand_samples_for_date( "results/{{date}}/filtered-calls/ref~main/{sample}.subclonal.high+moderate-impact.bcf", ),