From bd5048af11d3f1f28a1853a9ec1070fe85ff977b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ann-Kathrin=20Br=C3=BCggemann?= <90249112+AKBrueggemann@users.noreply.github.com> Date: Wed, 15 Dec 2021 17:28:36 +0100 Subject: [PATCH] feat: added variants-over-time-plot to report again (#405) * Added variant-plots-over-time to report * fmt * Correction of code in snakemake_reports * fmt Co-authored-by: Thomas Battenfeld <46334240+thomasbtf@users.noreply.github.com> --- workflow/rules/benchmarking.smk | 4 +++- workflow/rules/generate_output.smk | 37 ++++++++++++++++++++++++++++++ workflow/rules/long_read.smk | 4 +++- workflow/rules/qc.smk | 12 +++++++--- workflow/rules/read_clipping.smk | 4 +++- workflow/rules/variant_report.smk | 4 +++- 6 files changed, 58 insertions(+), 7 deletions(-) diff --git a/workflow/rules/benchmarking.smk b/workflow/rules/benchmarking.smk index 9d30be6d6..b3d8e41d8 100644 --- a/workflow/rules/benchmarking.smk +++ b/workflow/rules/benchmarking.smk @@ -264,7 +264,9 @@ rule assembly_comparison_velvet: rule order_contigs_assembly_comparison: input: - contigs="results/{date}/assembly/{sample}/{assembler}-pe/{sample}.contigs.fasta", + contigs=( + "results/{date}/assembly/{sample}/{assembler}-pe/{sample}.contigs.fasta" + ), reference="resources/genomes/main.fasta", output: temp( diff --git a/workflow/rules/generate_output.smk b/workflow/rules/generate_output.smk index 339f3df7c..448da98c7 100644 --- a/workflow/rules/generate_output.smk +++ b/workflow/rules/generate_output.smk @@ -251,6 +251,39 @@ rule plot_lineages_over_time: "../scripts/plot-lineages-over-time.py" +rule plot_variants_over_time: + input: + bcf=lambda wildcards: expand( + "results/{date}/filtered-calls/ref~main/{sample}.subclonal.high+moderate-impact.bcf", + zip, + date=get_dates_before_date(wildcards), + sample=get_samples_before_date(wildcards), + ), + csi=lambda wildcards: expand( + "results/{date}/filtered-calls/ref~main/{sample}.subclonal.high+moderate-impact.bcf.csi", + zip, + date=get_dates_before_date(wildcards), + sample=get_samples_before_date(wildcards), + ), + output: + report( + "results/{date}/plots/variants-{ORFNAME}-over-time.svg", + caption="../report/variants-over-time.rst", + category="1. Overview", + subcategory="3. Variants Development", + ), + "results/{date}/tables/variants-{ORFNAME}-over-time.csv", + params: + dates=get_dates_before_date, + samples=get_samples_before_date, + log: + "logs/{date}/{ORFNAME}-over-time.log", + conda: + "../envs/python.yaml" + script: + "../scripts/plot-variants-over-time.py" + + rule pangolin_call_overview_csv: input: get_aggregated_pangolin_calls, @@ -297,6 +330,10 @@ rule snakemake_reports: # 1. Overview "results/{date}/overview/", "results/{date}/plots/lineages-over-time.svg", + expand( + "results/{{date}}/tables/variants-{ORFNAME}-over-time.csv", + ORFNAME=config["orf_names"], + ), "results/{date}/plots/all.strains.pangolin.svg", "results/{date}/plots/all.major-strain.strains.kallisto.svg", expand_samples_for_date( diff --git a/workflow/rules/long_read.smk b/workflow/rules/long_read.smk index 06245c44f..11a401868 100644 --- a/workflow/rules/long_read.smk +++ b/workflow/rules/long_read.smk @@ -63,7 +63,9 @@ rule customize_primer_porechop: rule porechop_primer_trimming: input: - fastq_in="results/{date}/trimmed/porechop/adapter_barcode_trimming/{sample}.fastq", + fastq_in=( + "results/{date}/trimmed/porechop/adapter_barcode_trimming/{sample}.fastq" + ), repl_flag="results/.indicators/replacement_notice.txt", output: "results/{date}/trimmed/porechop/primer_clipped/{sample}.fastq", diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index dbd59b735..681f5b1bc 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -41,7 +41,9 @@ rule multiqc: input_dirs=lambda w, input: set(path.dirname(fp) for fp in input), output_dir=lambda w, output: path.dirname(output[0]), output_name=lambda w, output: path.basename(output[0]), - params="--config config/multiqc_config.yaml --title 'Results for data from {date}'", + params=( + "--config config/multiqc_config.yaml --title 'Results for data from {date}'" + ), log: "logs/{date}/multiqc.log", conda: @@ -134,7 +136,9 @@ rule species_diversity_before_pe: kraken_output=temp( "results/{date}/species-diversity/pe/{sample}/{sample}.kraken" ), - report="results/{date}/species-diversity/pe/{sample}/{sample}.uncleaned.kreport2", + report=( + "results/{date}/species-diversity/pe/{sample}/{sample}.uncleaned.kreport2" + ), log: "logs/{date}/kraken/pe/{sample}.log", params: @@ -161,7 +165,9 @@ rule species_diversity_before_se: kraken_output=temp( "results/{date}/species-diversity/se/{sample}/{sample}.kraken" ), - report="results/{date}/species-diversity/se/{sample}/{sample}.uncleaned.kreport2", + report=( + "results/{date}/species-diversity/se/{sample}/{sample}.uncleaned.kreport2" + ), log: "logs/{date}/kraken/se/{sample}.log", threads: 8 diff --git a/workflow/rules/read_clipping.smk b/workflow/rules/read_clipping.smk index 77ff07cb2..7a0896b1a 100644 --- a/workflow/rules/read_clipping.smk +++ b/workflow/rules/read_clipping.smk @@ -10,7 +10,9 @@ rule samtools_sort: output: "results/{date}/read-sorted/{read_type}~{sorted_by}/{sample}.{stage}.bam", params: - extra=lambda wildcards: "-n -m 4G" if wildcards.sorted_by == "name" else "-m 4G", + extra=( + lambda wildcards: "-n -m 4G" if wildcards.sorted_by == "name" else "-m 4G" + ), tmp_dir="/tmp/", log: "logs/{date}/sort-bam/{read_type}~{sorted_by}/{sample}.{stage}.log", diff --git a/workflow/rules/variant_report.smk b/workflow/rules/variant_report.smk index 3960600d8..89c5154c5 100644 --- a/workflow/rules/variant_report.smk +++ b/workflow/rules/variant_report.smk @@ -43,7 +43,9 @@ rule ucsc_vcf: bcfs=get_report_input( "results/{date}/filtered-calls/ref~main/{sample}.subclonal.{filter}.bcf" ), - strain_call="results/{date}/tables/strain-calls/{target}.polished.strains.pangolin.csv", + strain_call=( + "results/{date}/tables/strain-calls/{target}.polished.strains.pangolin.csv" + ), output: report( "results/{date}/ucsc-vcfs/{target}.{filter}.vcf",