From bf8d957c74dbeae2f530d067e37ba05ddabd921a Mon Sep 17 00:00:00 2001
From: "Lataretu, Marie" <LataretuM@rki.de>
Date: Tue, 9 Jul 2024 15:53:43 +0200
Subject: [PATCH 1/8] added script & credits

---
 README.md                      |   2 +
 bin/convert_VCF_info_fields.py | 192 +++++++++++++++++++++++++++++++++
 2 files changed, 194 insertions(+)
 create mode 100755 bin/convert_VCF_info_fields.py

diff --git a/README.md b/README.md
index a08fd86..0abb30a 100644
--- a/README.md
+++ b/README.md
@@ -262,3 +262,5 @@ Hardware|Default settings
   
 # 8. Credits
 The key steps of poreCov are carried out using the [ARTIC Network field bioinformatics pipeline](https://github.com/artic-network/fieldbioinformatics). Kudos to all amazing developers for your incredible efforts during this pandemic! Many thanks to all others who have helped out and contributed to poreCov as well. 
+
+The script [convert_VCF_info_fields.py](bin/convert_VCF_info_fields.py) was originally developed by the Intergalactic Utilities Commission in the [Tool Shed repository](https://github.com/galaxyproject/tools-iuc) under a MIT license.
\ No newline at end of file
diff --git a/bin/convert_VCF_info_fields.py b/bin/convert_VCF_info_fields.py
new file mode 100755
index 0000000..19cef64
--- /dev/null
+++ b/bin/convert_VCF_info_fields.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+
+# -----------------------------------------------------------------------------
+# The MIT License (MIT)
+
+# Copyright (c) 2014 galaxy-iuc
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# -----------------------------------------------------------------------------
+
+# Takes in VCF file annotated with medaka tools annotate and converts
+#
+# Usage statement:
+# python convert_VCF_info_fields.py in_vcf.vcf out_vcf.vcf
+
+# 10/21/2020 - Nathan P. Roach, natproach@gmail.com
+
+# adapted June 2024 - Marie Lataretu, lataretum@rki.de
+# source for this scripts:
+# https://github.com/galaxyproject/tools-iuc/blob/fd148a124034b44d0d61db3eec32ff991d8c152c/tools/medaka/convert_VCF_info_fields.py
+
+import re
+import sys
+from collections import OrderedDict
+from math import log10
+
+import scipy.stats
+
+
+def pval_to_phredqual(pval):
+    try:
+        ret = round(-10 * log10(pval))
+    except ValueError:
+        ret = 2147483647  # transform pval of 0.0 to max signed 32 bit int
+    return ret
+
+
+def parseInfoField(info):
+    info_fields = info.split(";")
+    info_dict = OrderedDict()
+    for info_field in info_fields:
+        code, val = info_field.split("=")
+        info_dict[code] = val
+    return info_dict
+
+
+def annotateVCF(in_vcf_filepath, out_vcf_filepath):
+    """Postprocess output of medaka tools annotate.
+
+    Splits multiallelic sites into separate records.
+    Replaces medaka INFO fields that might represent information of the ref
+    and multiple alternate alleles with simple ref, alt allele counterparts.
+    """
+
+    in_vcf = open(in_vcf_filepath, "r")
+    # medaka INFO fields that do not make sense after splitting of
+    # multi-allelic records
+    # DP will be overwritten with the value of DPSP because medaka tools
+    # annotate currently only calculates the latter correctly
+    # (https://github.com/nanoporetech/medaka/issues/192).
+    # DPS, which is as unreliable as DP, gets skipped and the code
+    # calculates the spanning reads equivalent DPSPS instead.
+    to_skip = {"SC", "SR", "AR", "DP", "DPSP", "DPS"}
+    struct_meta_pat = re.compile("##(.+)=<ID=([^,]+)(,.+)?>")
+    header_lines = []
+    contig_ids = set()
+    contig_ids_simple = set()
+    # parse the metadata lines of the input VCF and drop:
+    # - duplicate lines
+    # - INFO lines declaring keys we are not going to write
+    # - redundant contig information
+    while True:
+        line = in_vcf.readline()
+        if line[:2] != "##":
+            assert line.startswith("#CHROM")
+            break
+        if line in header_lines:
+            # the annotate tool may generate lines already written by
+            # medaka variant again (example: medaka version line)
+            continue
+        match = struct_meta_pat.match(line)
+        if match:
+            match_type, match_id, match_misc = match.groups()
+            if match_type == "INFO":
+                if match_id == "DPSP":
+                    line = line.replace("DPSP", "DP")
+                elif match_id in to_skip:
+                    continue
+            elif match_type == "contig":
+                contig_ids.add(match_id)
+                if not match_misc:
+                    # the annotate tools writes its own contig info,
+                    # which is redundant with contig info generated by
+                    # medaka variant, but lacks a length value.
+                    # We don't need the incomplete line.
+                    contig_ids_simple.add(match_id)
+                    continue
+        header_lines.append(line)
+    # Lets check the above assumption about each ID-only contig line
+    # having a more complete counterpart.
+    assert not (contig_ids_simple - contig_ids)
+    header_lines.insert(1, "##convert_VCF_info_fields=0.2\n")
+    header_lines += [
+        '##INFO=<ID=DPSPS,Number=2,Type=Integer,Description="Depth of spanning reads by strand">\n',
+        '##INFO=<ID=AF,Number=1,Type=Float,Description="Spanning Reads Allele Frequency">\n',
+        '##INFO=<ID=FAF,Number=1,Type=Float,Description="Forward Spanning Reads Allele Frequency">\n',
+        '##INFO=<ID=RAF,Number=1,Type=Float,Description="Reverse Spanning Reads Allele Frequency">\n',
+        '##INFO=<ID=SB,Number=1,Type=Integer,Description="Phred-scaled strand bias of spanning reads at this position">\n',
+        '##INFO=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases in spanning reads">\n',
+        '##INFO=<ID=AS,Number=4,Type=Integer,Description="Total alignment score to ref and alt allele of spanning reads by strand (ref fwd, ref rev, alt fwd, alt rev) aligned with parasail match 5, mismatch -4, open 5, extend 3">\n',
+        line,
+    ]
+
+    with open(out_vcf_filepath, "w") as out_vcf:
+        out_vcf.writelines(header_lines)
+        for line in in_vcf:
+            fields = line.split("\t")
+            info_dict = parseInfoField(fields[7])
+            sr_list = [int(x) for x in info_dict["SR"].split(",")]
+            sc_list = [int(x) for x in info_dict["SC"].split(",")]
+            if len(sr_list) != len(sc_list):
+                print("WARNING - SR and SC are different lengths, " "skipping variant")
+                print(line.strip())  # Print the line for debugging purposes
+                continue
+            variant_list = fields[4].split(",")
+            dpsp = int(info_dict["DPSP"])
+            ref_fwd, ref_rev = 0, 1
+            dpspf, dpspr = (int(x) for x in info_dict["AR"].split(","))
+            for i in range(0, len(sr_list), 2):
+                dpspf += sr_list[i]
+                dpspr += sr_list[i + 1]
+            for j, i in enumerate(range(2, len(sr_list), 2)):
+                dp4 = (sr_list[ref_fwd], sr_list[ref_rev], sr_list[i], sr_list[i + 1])
+                dp2x2 = [[dp4[0], dp4[1]], [dp4[2], dp4[3]]]
+                _, p_val = scipy.stats.fisher_exact(dp2x2)
+                sb = pval_to_phredqual(p_val)
+
+                as_ = (sc_list[ref_fwd], sc_list[ref_rev], sc_list[i], sc_list[i + 1])
+
+                info = []
+                for code in info_dict:
+                    if code in to_skip:
+                        continue
+                    val = info_dict[code]
+                    info.append("%s=%s" % (code, val))
+
+                info.append("DP=%d" % dpsp)
+                info.append("DPSPS=%d,%d" % (dpspf, dpspr))
+
+                if dpsp == 0:
+                    info.append("AF=.")
+                else:
+                    af = (dp4[2] + dp4[3]) / dpsp
+                    info.append("AF=%.6f" % af)
+                if dpspf == 0:
+                    info.append("FAF=.")
+                else:
+                    faf = dp4[2] / dpspf
+                    info.append("FAF=%.6f" % faf)
+                if dpspr == 0:
+                    info.append("RAF=.")
+                else:
+                    raf = dp4[3] / dpspr
+                    info.append("RAF=%.6f" % raf)
+                info.append("SB=%d" % sb)
+                info.append("DP4=%d,%d,%d,%d" % dp4)
+                info.append("AS=%d,%d,%d,%d" % as_)
+                new_info = ";".join(info)
+                fields[4] = variant_list[j]
+                fields[7] = new_info
+                out_vcf.write("\t".join(fields))
+    in_vcf.close()
+
+
+if __name__ == "__main__":
+    annotateVCF(sys.argv[1], sys.argv[2])
\ No newline at end of file

From 3a9c9d8cf9bcdd578b014a8ac52442c1879f08dd Mon Sep 17 00:00:00 2001
From: "Lataretu, Marie" <LataretuM@rki.de>
Date: Tue, 9 Jul 2024 15:56:08 +0200
Subject: [PATCH 2/8] output primer scheme dir (including the bed file) for
 custom primers

---
 workflows/process/artic.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/workflows/process/artic.nf b/workflows/process/artic.nf
index d34aa00..6c738a4 100755
--- a/workflows/process/artic.nf
+++ b/workflows/process/artic.nf
@@ -88,6 +88,7 @@ process artic_medaka_custom_bed {
         tuple val(name), path("${name}.primersitereport.txt"), emit: primersitereport
         tuple val(name), path("${name}.coverage_mask.txt"), emit: coverage_mask
         tuple val(name), path("${name}.fail.vcf"), emit: vcf_fail
+        path ("primer_scheme/nCoV-2019/"), emit: primer_dir
     script:   
         def normalise_arg = normalise_threshold ? "--normalise ${normalise_threshold}" : '--normalise 0'
         """

From 168a3616a6aabdef80ea6cb07c6d33f1443ec640 Mon Sep 17 00:00:00 2001
From: "Lataretu, Marie" <LataretuM@rki.de>
Date: Tue, 9 Jul 2024 15:57:43 +0200
Subject: [PATCH 3/8] emit BAM, VCFs and primer dir from the medaka ARTIC
 workflow

---
 workflows/artic_nanopore_nCov19.nf | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/workflows/artic_nanopore_nCov19.nf b/workflows/artic_nanopore_nCov19.nf
index 8c5feba..f7cc3f3 100755
--- a/workflows/artic_nanopore_nCov19.nf
+++ b/workflows/artic_nanopore_nCov19.nf
@@ -15,6 +15,10 @@ workflow artic_ncov_wf {
             artic_medaka_custom_bed(fastq.combine(external_primer_schemes).combine(primerBed), normalise_threshold)
             assembly = artic_medaka_custom_bed.out.fasta
             binary_alignment = artic_medaka_custom_bed.out.fullbam
+            trimmed_bam = artic_medaka.out.reference_bam
+            vcf = artic_medaka.out.vcf
+            failed_vcf = artic_medaka.out.vcf_fail
+            primer_dir = artic_medaka_custom_bed.out.primer_dir
 
             // plot amplicon coverage
             covarplot_custom_bed(artic_medaka_custom_bed.out.covarplot.combine(primerBed))
@@ -27,6 +31,10 @@ workflow artic_ncov_wf {
             artic_medaka(fastq.combine(external_primer_schemes), normalise_threshold)
             assembly = artic_medaka.out.fasta
             binary_alignment = artic_medaka.out.fullbam
+            trimmed_bam = artic_medaka.out.reference_bam
+            vcf = artic_medaka.out.vcf
+            failed_vcf = artic_medaka.out.vcf_fail
+            primer_dir = Channel.empty()
 
             // plot amplicon coverage
             covarplot(artic_medaka.out.covarplot.combine(external_primer_schemes))
@@ -40,6 +48,10 @@ workflow artic_ncov_wf {
     emit:   
         assembly
         binary_alignment
+        trimmed_bam
+        vcf
+        primer_dir
+        failed_vcf
 }
 
 workflow artic_ncov_np_wf {

From 1be18712600720b1a048108ec63a0a4777491961 Mon Sep 17 00:00:00 2001
From: "Lataretu, Marie" <LataretuM@rki.de>
Date: Tue, 9 Jul 2024 16:06:27 +0200
Subject: [PATCH 4/8] fixed channel name

---
 workflows/artic_nanopore_nCov19.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/workflows/artic_nanopore_nCov19.nf b/workflows/artic_nanopore_nCov19.nf
index f7cc3f3..b2f5860 100755
--- a/workflows/artic_nanopore_nCov19.nf
+++ b/workflows/artic_nanopore_nCov19.nf
@@ -15,9 +15,9 @@ workflow artic_ncov_wf {
             artic_medaka_custom_bed(fastq.combine(external_primer_schemes).combine(primerBed), normalise_threshold)
             assembly = artic_medaka_custom_bed.out.fasta
             binary_alignment = artic_medaka_custom_bed.out.fullbam
-            trimmed_bam = artic_medaka.out.reference_bam
-            vcf = artic_medaka.out.vcf
-            failed_vcf = artic_medaka.out.vcf_fail
+            trimmed_bam = artic_medaka_custom_bed.out.reference_bam
+            vcf = artic_medaka_custom_bed.out.vcf
+            failed_vcf = artic_medaka_custom_bed.out.vcf_fail
             primer_dir = artic_medaka_custom_bed.out.primer_dir
 
             // plot amplicon coverage

From 53ff11445c3da3fb23590163d55ac8e60d4bd0e6 Mon Sep 17 00:00:00 2001
From: "Lataretu, Marie" <LataretuM@rki.de>
Date: Tue, 9 Jul 2024 17:04:37 +0200
Subject: [PATCH 5/8] added process

---
 modules/add_alt_allele_ration_vcf.nf | 46 ++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 modules/add_alt_allele_ration_vcf.nf

diff --git a/modules/add_alt_allele_ration_vcf.nf b/modules/add_alt_allele_ration_vcf.nf
new file mode 100644
index 0000000..dd8f7c7
--- /dev/null
+++ b/modules/add_alt_allele_ration_vcf.nf
@@ -0,0 +1,46 @@
+process add_alt_allele_ration_vcf {
+    label 'artic'
+    publishDir "${params.output}/${params.lineagedir}/${name}/", mode: 'copy'
+    input:
+        tuple val(name), path(bam), path(bai), path(vcf), path(failed_vcf)
+        path(external_scheme) // primer scheme dir as input
+    output:
+       tuple val(name), path("${name}_all-vars-with-aar.vcf"), emit: vcf
+       tuple val(name), path("mixed_sites_stats.csv"), emit: stats
+    script:
+    primer_version_tag = params.primerV.toString().contains(".bed") ? 'V_custom' : "${params.primerV}"
+    primer_dir = params.primerV.toString().contains(".bed") ? "${external_scheme}" : "${external_scheme}/nCoV-2019"
+    """
+    # reheader failed VCF and change FILTER
+    echo '##FILTER=<ID=ARTICFAIL,Description="ARTIC filter failed">' > add-to-hdr.txt
+    # -c and --rename-annots add a header with a default/wrong description
+    bcftools annotate -h add-to-hdr.txt -c "FILTER/ARTICFAIL:=FILTER/PASS" ${failed_vcf} | sed '/##FILTER=<ID=ARTICFAIL,Description="All filters passed">/d' > tmp_failed_updated-filter.vcf
+
+    # concat failed and passed VCF
+    bcftools concat ${vcf} tmp_failed_updated-filter.vcf | bcftools sort -o tmp_merged.vcf
+
+    # call medaka tools annotate for each pool and add the alternate allele ratio
+    for pool in `cut -f5 ${primer_dir}/${primer_version_tag}/nCoV-2019.scheme.bed | sort | uniq`; do
+        bcftools view -i 'INFO/Pool="'\$pool'"' tmp_merged.vcf -o tmp_\$pool.vcf
+        medaka tools annotate --dpsp --pad 25 --RG \$pool tmp_\$pool.vcf ${primer_dir}/${primer_version_tag}/nCoV-2019.reference.fasta ${bam} tmp_annotated_\$pool.vcf
+        convert_VCF_info_fields.py tmp_annotated_\$pool.vcf tmp_aar_\$pool.vcf
+    done
+
+    # merge pool VCF and sort VCF
+    bcftools concat tmp_aar_*.vcf | bcftools sort -o ${name}_all-vars-with-aar.vcf
+
+    # count mixed sites
+    # thresholds for human geotyping: 0.35 <= x <= 0.65 
+    NUM_MIXED_SITES=\$(bcftools view -H -i 'INFO/DP>${params.min_depth} & INFO/AF>=0.3 & INFO/AF<=0.8' ${name}_all-vars-with-aar.vcf | wc -l)
+    echo sample,num_mixed_sites > mixed_sites_stats.csv
+    echo ${name},\$NUM_MIXED_SITES >> mixed_sites_stats.csv
+
+    # remove intermediate files
+    rm tmp_*
+    """
+    stub:
+	"""
+	touch ${name}_all-vars-with-aar.vcf mixed_sites_stats.csv
+	"""
+    
+}

From 69b21840dd5cc7b2a67d2d349e6c70173dcca773 Mon Sep 17 00:00:00 2001
From: "Lataretu, Marie" <LataretuM@rki.de>
Date: Tue, 9 Jul 2024 17:05:25 +0200
Subject: [PATCH 6/8] add the new process to the workflow and the datatable

---
 bin/summary_report.py               |  6 ++++++
 poreCov.nf                          | 12 +++++++++++-
 workflows/create_summary_report.nf  |  6 ++++--
 workflows/process/summary_report.nf |  4 ++++
 4 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/bin/summary_report.py b/bin/summary_report.py
index c0884f4..ab4a247 100755
--- a/bin/summary_report.py
+++ b/bin/summary_report.py
@@ -640,6 +640,9 @@ def unclass_markup(value):
 
         self.add_col_description(f'Read classification was determined against a database containing only SARS-CoV-2 and human with <a href="https://ccb.jhu.edu/software/kraken2/">Kraken2</a> (v{self.tool_versions["kraken2"]}).')
 
+    def add_mixed_sites_results(self, mixed_site_results):
+        res_data = pd.read_csv(mixed_site_results, index_col='sample', dtype={'sample': str})
+        self.add_column_raw('num_mixed_sites', res_data['num_mixed_sites'])
 
     def add_coverage_plots(self, coverage_plots):
         for coverage_plot in sorted(coverage_plots.split(',')):
@@ -826,6 +829,7 @@ def get_lineage_status(self, lineage):
     parser.add_argument("-n", "--nextclade_results", help="nextclade results")
     parser.add_argument("-q", "--president_results", help="president results")
     parser.add_argument("-k", "--kraken2_results", help="kraken2 results")
+    parser.add_argument("-m", "--mixed_sites_results", help="mixed sites statisics")
     parser.add_argument("-c", "--coverage_plots", help="coverage plots (comma separated)")
     parser.add_argument("-s", "--samples", help="sample ids (comma separated)")
     args = parser.parse_args()
@@ -852,6 +856,8 @@ def get_lineage_status(self, lineage):
         report.add_pangolin_results(args.pangolin_results)
     if args.nextclade_results:
         report.add_nextclade_results(args.nextclade_results)
+    if args.mixed_sites_results:
+        report.add_mixed_sites_results(args.mixed_sites_results)
     if args.coverage_plots:
         report.add_coverage_plots(args.coverage_plots)
 
diff --git a/poreCov.nf b/poreCov.nf
index 0cf7d5e..5f8a8ac 100755
--- a/poreCov.nf
+++ b/poreCov.nf
@@ -309,6 +309,7 @@ include { get_fasta } from './modules/get_fasta_test_data.nf'
 include { align_to_reference } from './modules/align_to_reference.nf'
 include { split_fasta } from './modules/split_fasta.nf'
 include { filter_fastq_by_length } from './modules/filter_fastq_by_length.nf'
+include { add_alt_allele_ration_vcf } from './modules/add_alt_allele_ration_vcf.nf'
 
 /************************** 
 * Workflows
@@ -405,6 +406,15 @@ workflow {
         determine_mutations_wf(fasta_input_ch)
         genome_quality_wf(fasta_input_ch, reference_for_qc_input_ch)
 
+        // add alternative allele ratio to the VCF
+        if (params.primerV.toString().contains(".bed")) {
+            external_primer_schemes = artic_ncov_wf.out.primer_dir
+        }
+        else {
+            external_primer_schemes = file(workflow.projectDir + "/data/external_primer_schemes", checkIfExists: true, type: 'dir' )
+        }
+        add_alt_allele_ration_vcf(artic_ncov_wf.out.trimmed_bam.join(artic_ncov_wf.out.vcf).join(artic_ncov_wf.out.failed_vcf), external_primer_schemes)
+
     // 3. Specialised outputs (rki, json)
         rki_report_wf(genome_quality_wf.out[0], genome_quality_wf.out[1], extended_input_ch)
 
@@ -436,7 +446,7 @@ workflow {
         else { samples_table_ch = Channel.from( ['deactivated'] ) }
 */
         create_summary_report_wf(determine_lineage_wf.out, genome_quality_wf.out[0], determine_mutations_wf.out,
-                                taxonomic_read_classification_ch, alignments_ch, samples_file_ch)
+                                taxonomic_read_classification_ch, add_alt_allele_ration_vcf.out.stats, alignments_ch, samples_file_ch)
 
 }
 
diff --git a/workflows/create_summary_report.nf b/workflows/create_summary_report.nf
index 806f0ba..9b606c0 100644
--- a/workflows/create_summary_report.nf
+++ b/workflows/create_summary_report.nf
@@ -8,6 +8,7 @@ workflow create_summary_report_wf {
         president
         nextclade
         kraken2
+        mixed_sites
         alignments
         samples_table
 
@@ -18,6 +19,7 @@ workflow create_summary_report_wf {
         pangolin_results = pangolin.map {it -> it[1]}.collectFile(name: 'pangolin_results.csv', skip: 1, keepHeader: true)
         president_results = president.map {it -> it[1]}.collectFile(name: 'president_results.tsv', skip: 1, keepHeader: true)
         nextclade_results = nextclade.map {it -> it[1]}.collectFile(name: 'nextclade_results.tsv', skip: 1, keepHeader: true)
+        mixed_site_results = mixed_sites.map {it -> it[1]}.collectFile(name: 'mixed_sites_results.tsv', skip: 1, keepHeader: true)
 
         alignment_files = alignments.map {it -> it[0]}.collect()
         if (params.fasta || workflow.profile.contains('test_fasta')) {
@@ -30,7 +32,7 @@ workflow create_summary_report_wf {
             coverage_plots = plot_coverages(alignments.map{it -> it[0]}.toSortedList({ a, b -> a.simpleName <=> b.simpleName }).flatten().collate(6), \
                                             alignments.map{it -> it[1]}.toSortedList({ a, b -> a.simpleName <=> b.simpleName }).flatten().collate(6)).collect()
 
-            if (params.samples) { summary_report(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots, samples_table) }
-            else { summary_report_default(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots) }
+            if (params.samples) { summary_report(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, mixed_site_results, coverage_plots, samples_table) }
+            else { summary_report_default(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, mixed_site_results, coverage_plots) }
         }
 } 
diff --git a/workflows/process/summary_report.nf b/workflows/process/summary_report.nf
index f601060..d0406d7 100644
--- a/workflows/process/summary_report.nf
+++ b/workflows/process/summary_report.nf
@@ -10,6 +10,7 @@ process summary_report {
         path(president_results)
         path(nextclade_results)
         file(kraken2_results)
+        file(mixed_sites_results)
         file(coverage_plots)
         file(samples_table)
     output:
@@ -43,6 +44,7 @@ process summary_report {
             -q !{president_results} \
             -n !{nextclade_results} \
             -k kraken2_results.csv \
+            -m !{mixed_sites_results} \
             -c $(echo !{coverage_plots} | tr ' ' ',') \
             -s !{samples_table}
         '''
@@ -64,6 +66,7 @@ process summary_report_default {
         path(president_results)
         path(nextclade_results)
         path(kraken2_results)
+        path(mixed_sites_results)
         path(coverage_plots)
     output:
 	    path("poreCov_summary_report_*.html")
@@ -96,6 +99,7 @@ process summary_report_default {
             -q !{president_results} \
             -n !{nextclade_results} \
             -k kraken2_results.csv \
+            -m !{mixed_sites_results} \
             -c $(echo !{coverage_plots} | tr ' ' ',') 
         '''
     stub:

From d7636f12d28d7f9d361e416aceb5dc8132ed628c Mon Sep 17 00:00:00 2001
From: "Lataretu, Marie" <LataretuM@rki.de>
Date: Wed, 10 Jul 2024 13:35:19 +0200
Subject: [PATCH 7/8] fixed typo

---
 ...add_alt_allele_ration_vcf.nf => add_alt_allele_ratio_vcf.nf} | 2 +-
 poreCov.nf                                                      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename modules/{add_alt_allele_ration_vcf.nf => add_alt_allele_ratio_vcf.nf} (98%)

diff --git a/modules/add_alt_allele_ration_vcf.nf b/modules/add_alt_allele_ratio_vcf.nf
similarity index 98%
rename from modules/add_alt_allele_ration_vcf.nf
rename to modules/add_alt_allele_ratio_vcf.nf
index dd8f7c7..396f301 100644
--- a/modules/add_alt_allele_ration_vcf.nf
+++ b/modules/add_alt_allele_ratio_vcf.nf
@@ -1,4 +1,4 @@
-process add_alt_allele_ration_vcf {
+process add_alt_allele_ratio_vcf {
     label 'artic'
     publishDir "${params.output}/${params.lineagedir}/${name}/", mode: 'copy'
     input:
diff --git a/poreCov.nf b/poreCov.nf
index 5f8a8ac..5954d03 100755
--- a/poreCov.nf
+++ b/poreCov.nf
@@ -309,7 +309,7 @@ include { get_fasta } from './modules/get_fasta_test_data.nf'
 include { align_to_reference } from './modules/align_to_reference.nf'
 include { split_fasta } from './modules/split_fasta.nf'
 include { filter_fastq_by_length } from './modules/filter_fastq_by_length.nf'
-include { add_alt_allele_ration_vcf } from './modules/add_alt_allele_ration_vcf.nf'
+include { add_alt_allele_ratio_vcf } from './modules/add_alt_allele_ratio_vcf.nf'
 
 /************************** 
 * Workflows

From 752e20b0bf965acd1429679b0f689d21aeff0a97 Mon Sep 17 00:00:00 2001
From: "Lataretu, Marie" <LataretuM@rki.de>
Date: Wed, 10 Jul 2024 14:03:40 +0200
Subject: [PATCH 8/8] restructured the workflow

The new process is only run with (fastq and fast5 input) and (not nanopolish). Else it's deactivated.
---
 poreCov.nf | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/poreCov.nf b/poreCov.nf
index 5954d03..1b91e30 100755
--- a/poreCov.nf
+++ b/poreCov.nf
@@ -361,7 +361,16 @@ workflow {
             else if (!params.nanopolish) { 
                 artic_ncov_wf(filtered_reads_ch, params.artic_normalize) 
                 fasta_input_ch = artic_ncov_wf.out.assembly
+
+                // add alternative allele ratio to the VCF
+                if (params.primerV.toString().contains(".bed")) {
+                    external_primer_schemes = artic_ncov_wf.out.primer_dir
+                }
+                else {
+                    external_primer_schemes = file(workflow.projectDir + "/data/external_primer_schemes", checkIfExists: true, type: 'dir' )
                 }
+                add_alt_allele_ratio_vcf(artic_ncov_wf.out.trimmed_bam.join(artic_ncov_wf.out.vcf).join(artic_ncov_wf.out.failed_vcf), external_primer_schemes)
+            }
         }
         // fastq input via dir and or files
         if ( (params.fastq || params.fastq_pass) || workflow.profile.contains('test_fastq')) { 
@@ -393,7 +402,16 @@ workflow {
             else if (!params.nanopolish) { 
                 artic_ncov_wf(filtered_reads_ch, params.artic_normalize)
                 fasta_input_ch = artic_ncov_wf.out.assembly
+                
+                // add alternative allele ratio to the VCF
+                if (params.primerV.toString().contains(".bed")) {
+                    external_primer_schemes = artic_ncov_wf.out.primer_dir
+                }
+                else {
+                    external_primer_schemes = file(workflow.projectDir + "/data/external_primer_schemes", checkIfExists: true, type: 'dir' )
                 }
+                add_alt_allele_ratio_vcf(artic_ncov_wf.out.trimmed_bam.join(artic_ncov_wf.out.vcf).join(artic_ncov_wf.out.failed_vcf), external_primer_schemes)
+            }
         }
 
     // 2. Genome quality, lineages, clades and mutations
@@ -406,15 +424,6 @@ workflow {
         determine_mutations_wf(fasta_input_ch)
         genome_quality_wf(fasta_input_ch, reference_for_qc_input_ch)
 
-        // add alternative allele ratio to the VCF
-        if (params.primerV.toString().contains(".bed")) {
-            external_primer_schemes = artic_ncov_wf.out.primer_dir
-        }
-        else {
-            external_primer_schemes = file(workflow.projectDir + "/data/external_primer_schemes", checkIfExists: true, type: 'dir' )
-        }
-        add_alt_allele_ration_vcf(artic_ncov_wf.out.trimmed_bam.join(artic_ncov_wf.out.vcf).join(artic_ncov_wf.out.failed_vcf), external_primer_schemes)
-
     // 3. Specialised outputs (rki, json)
         rki_report_wf(genome_quality_wf.out[0], genome_quality_wf.out[1], extended_input_ch)
 
@@ -426,6 +435,7 @@ workflow {
         if (params.fasta || workflow.profile.contains('test_fasta')) {
             taxonomic_read_classification_ch = Channel.from( ['deactivated', 'deactivated', 'deactivated'] ).collect()
             alignments_ch = Channel.from( ['deactivated'] )
+
         } else {
             taxonomic_read_classification_ch = read_classification_wf.out.kraken
             if (params.screen_reads) {
@@ -438,6 +448,11 @@ workflow {
             }
             alignments_ch = align_to_reference(filtered_reads_ch.combine(reference_for_qc_input_ch))
         }
+        if (params.fasta || workflow.profile.contains('test_fasta') || params.nanopolish ) {
+            alt_allele_ratio_ch = Channel.from( ['deactivated'] )
+        } else {
+            alt_allele_ratio_ch = add_alt_allele_ratio_vcf.out.stats
+        }
 
 /*
         if (params.samples) {
@@ -446,7 +461,7 @@ workflow {
         else { samples_table_ch = Channel.from( ['deactivated'] ) }
 */
         create_summary_report_wf(determine_lineage_wf.out, genome_quality_wf.out[0], determine_mutations_wf.out,
-                                taxonomic_read_classification_ch, add_alt_allele_ration_vcf.out.stats, alignments_ch, samples_file_ch)
+                                taxonomic_read_classification_ch, alt_allele_ratio_ch, alignments_ch, samples_file_ch)
 
 }