replikation · replikation · Feb 2, 2022 · Jan 3, 2022 · Jan 28, 2022 · Jan 28, 2022
diff --git a/README.md b/README.md
@@ -72,6 +72,8 @@ Table of Contents
 ### Singularity
 * Singularity installation [here](https://singularity.lbl.gov/install-linux)
 * if you can't use Docker
+
+Note, that with Singularity the following environment variables are automatically passed to the container to ensure execution on HPCs: `HTTPS_PROXY`, `HTTP_PROXY`, `http_proxy`, `https_proxy`, `FTP_PROXY` and `ftp_proxy`.
 ### Conda (not recommended)
 * Conda installation [here](https://docs.conda.io/projects/conda/en/latest/user-guide/install/)
 * install Nextflow and Singularity via conda (not cluster compatible) - and use the `singularity` profile
@@ -88,41 +90,41 @@ Table of Contents
 
 ```bash
 # for a Docker installation
-nextflow run replikation/poreCov -profile test_fastq,local,docker -r 0.11.0
+nextflow run replikation/poreCov -profile test_fastq,local,docker -r 1.1.0 --update
 
 # or for Singularity or conda installation
-nextflow run replikation/poreCov -profile test_fastq,local,singularity -r 0.11.0
+nextflow run replikation/poreCov -profile test_fastq,local,singularity -r 1.1.0 --update
 ```
 
 ## 2.2 Quick run examples
 
 * poreCov with basecalling and Docker
-    * `--update` tryies to force the most recent lineage release version (optional)
-    * `-r 0.11.0` specifies the workflow release from [here](https://github.com/replikation/poreCov/releases)
+    * `--update` tryies to force the most recent pangolin lineage and nextclade release version (optional)
+    * `-r 1.1.0` specifies the workflow release from [here](https://github.com/replikation/poreCov/releases)
 ```bash
-nextflow run replikation/poreCov --fast5 fast5/ -r 0.11.0 \
+nextflow run replikation/poreCov --fast5 fast5/ -r 1.1.0 \
     --cores 6 -profile local,docker --update 
 ```
 
 * poreCov with a basecalled fastq directory 
 
 ```bash
-nextflow run replikation/poreCov --fastq_pass 'fastq_pass/' -r 0.11.0 \
-    --cores 32  -profile local,docker
+nextflow run replikation/poreCov --fastq_pass 'fastq_pass/' -r 1.1.0 \
+    --cores 32  -profile local,docker --update
 ```
 
 * poreCov with basecalling and renaming of barcodes based on `sample_names.csv`
 
 ```bash
 # rename barcodes automatically by providing an input file, also using another primer scheme
 nextflow run replikation/poreCov --fast5 fast5_dir/ --samples sample_names.csv \
-   --primerV V1200 --output results -profile local,docker
+   --primerV V1200 --output results -profile local,docker --update
 ```
 
 ## 2.3 Extended Usage
-* see also `nextflow run replikation/poreCov --help -r 0.11.0`
+* see also `nextflow run replikation/poreCov --help -r 1.1.0`
 ### Version control
-* poreCov supports version control via `-r` this way, you can run everything reproducible (e.g. `-r 0.11.0`)
+* poreCov supports version control via `-r` this way, you can run everything reproducible (e.g. `-r 1.1.0`)
 * poreCov releases are listed [here](https://github.com/replikation/poreCov/releases)
 * add `-r <version>` to a poreCoV run to activate this
 * run `nextflow pull replikation/poreCov` to install updates

diff --git a/data/external_primer_schemes/Readme.md b/data/external_primer_schemes/Readme.md
@@ -1,3 +1,18 @@
 # source
-* V1200 bp scheme from:
-https://zenodo.org/record/3897530#.X4QxOpqxUay
+* V1200 bp scheme from: https://zenodo.org/record/3897530#.X4QxOpqxUay
+* Primers V to V4* -> https://github.com/artic-network/artic-ncov2019
+
+They are just copied here for workflow stability and ease of use.
+
+# IMPORTANT
++ PRIMER DIRS NEED TO START WITH A "V"
++ BED FILES structure
+```
+MN908947.3	22	46	varskip-0317-1_01_LEFT	nCoV-2019_1	+
+```
+* first column MN908947.3 (the fasta header of the reference)
+* 4th colum sort it (sort -k4)
+    * also remove _alt   might be problematic
+* 5th needs to be nCoV-2019_1 or nCoV-2019_2
+    `awk '{if (NR%4==0 || NR%4==3){print $1"\t"$2"\t"$3"\t"$4"\tnCoV-2019_2\t"$6} else {print $1"\t"$2"\t"$3"\t"$4"\tnCoV-2019_1\t"$6}}'`
+* 6th is + or - but can be skipped
diff --git a/data/external_primer_schemes/nCoV-2019/README.md b/data/external_primer_schemes/nCoV-2019/README.md
diff --git a/data/external_primer_schemes/nCoV-2019/V_custom/Readme.md b/data/external_primer_schemes/nCoV-2019/V_custom/Readme.md
@@ -0,0 +1 @@
+Data from: https://github.com/nebiolabs/VarSkip
diff --git a/data/external_primer_schemes/nCoV-2019/V_custom/nCoV-2019.reference.fasta b/data/external_primer_schemes/nCoV-2019/V_custom/nCoV-2019.reference.fasta
diff --git a/data/external_primer_schemes/nCoV-2019/V_custom/nCoV-2019.reference.fasta.fai b/data/external_primer_schemes/nCoV-2019/V_custom/nCoV-2019.reference.fasta.fai
@@ -0,0 +1 @@
+MN908947.3	29903	12	60	61
diff --git a/data/external_primer_schemes/nCoV-2019/VarSkipV2/nCoV-2019.scheme.bed b/data/external_primer_schemes/nCoV-2019/VarSkipV2/nCoV-2019.scheme.bed
diff --git a/nextflow.config b/nextflow.config
@@ -102,7 +102,7 @@ profiles {
             memory = params.memory
         }
 
-        process.errorStrategy = 'ignore'
+        //process.errorStrategy = 'ignore'
     }
 
     slurm {
@@ -129,6 +129,7 @@ profiles {
                 enabled = true
                 autoMounts = true
                 cacheDir = params.cachedir
+                envWhitelist = "HTTPS_PROXY,HTTP_PROXY,http_proxy,https_proxy,FTP_PROXY,ftp_proxy"
                 //runOptions = "-B /tmp/nextflow-nCov-$USER"
         }
         includeConfig 'configs/container.config'

diff --git a/poreCov.nf b/poreCov.nf
@@ -469,6 +469,7 @@ ${c_yellow}Parameters - SARS-CoV-2 genome reconstruction (optional)${c_reset}
                         ${c_dim}ARTIC:${c_reset} V1, V2, V3, V4, V4.1
                         ${c_dim}NEB:${c_reset} VarSkipV1a, VarSkipV2
                         ${c_dim}Other:${c_reset} V1200    ${c_dim}(also known as midnight)${c_reset}
+                        ${c_dim}Primer bed file:${c_reset} e.g. primers.bed  ${c_dim}${c_reset}
     --rapid         rapid-barcoding-kit was used [default: ${params.rapid}]
     --minLength     min length filter raw reads [default: 100]
     --maxLength     max length filter raw reads [default: 700 (primer-scheme: V1-4, rapid); 1500 (primer-scheme: V1200)]
@@ -506,6 +507,9 @@ ${c_yellow}Execution/Engine profiles (choose executer and engine${c_reset}
        test_fasta
        test_fastq
        test_fast5
+
+       Note: The singularity profile automatically passes the following environment variables to the container. 
+       to ensure execution on HPCs: HTTPS_PROXY, HTTP_PROXY, http_proxy, https_proxy, FTP_PROXY, ftp_proxy
     """.stripIndent()
 }
 

diff --git a/workflows/artic_nanopore_nCov19.nf b/workflows/artic_nanopore_nCov19.nf
@@ -1,25 +1,37 @@
-include { artic_medaka ; artic_nanopolish } from './process/artic.nf' 
-include { covarplot } from './process/covarplot.nf'
+include { artic_medaka ; artic_nanopolish; artic_medaka_custom_bed; artic_nanopolish_custom_bed } from './process/artic.nf' 
+include { covarplot; covarplot_custom_bed } from './process/covarplot.nf'
 
 workflow artic_ncov_wf {
     take:   
         fastq
     main: 
 
-        // assembly
-        external_primer_schemes = Channel.fromPath(workflow.projectDir + "/data/external_primer_schemes", checkIfExists: true, type: 'dir' )
+        // assembly with a primer bed file
+        if (params.primerV.toString().contains(".bed")) {
+            primerBed = Channel.fromPath(params.primerV, checkIfExists: true )
+            external_primer_schemes = Channel.fromPath(workflow.projectDir + "/data/external_primer_schemes", checkIfExists: true, type: 'dir' )
+
+            artic_medaka_custom_bed(fastq.combine(external_primer_schemes).combine(primerBed))
+            assembly = artic_medaka_custom_bed.out.fasta
+
+            // plot amplicon coverage
+            covarplot_custom_bed(artic_medaka_custom_bed.out.covarplot.combine(primerBed))
+        }
 
-        artic_medaka(fastq.combine(external_primer_schemes))
+        // assembly via pre installed Primers
+        else {
+            external_primer_schemes = Channel.fromPath(workflow.projectDir + "/data/external_primer_schemes", checkIfExists: true, type: 'dir' )
+
+            artic_medaka(fastq.combine(external_primer_schemes))
+            assembly = artic_medaka.out.fasta
 
-        assembly = artic_medaka.out.fasta
+            // plot amplicon coverage
+            covarplot(artic_medaka.out.covarplot.combine(external_primer_schemes))
+        }
 
-        // plot amplicon coverage
-        covarplot(
-            artic_medaka.out.covarplot.combine(external_primer_schemes)
-        )
 
         // error logging
-        nogenomesatall = artic_medaka.out.fasta.ifEmpty{ log.info "\033[0;33mCould not generate any genomes, please check your reads $params.output/$params.readqcdir\033[0m" }
+        no_genomes_at_all = assembly.ifEmpty{ log.info "\033[0;33mCould not generate any genomes, please check your reads $params.output/$params.readqcdir\033[0m" }
 
     emit:   
         assembly
@@ -33,21 +45,42 @@ workflow artic_ncov_np_wf {
     main: 
 
         // assembly
-        external_primer_schemes = Channel.fromPath(workflow.projectDir + "/data/external_primer_schemes", checkIfExists: true, type: 'dir' )
-        artic_nanopolish(
+        if (params.primerV.toString().contains(".bed")) {
+            primerBed = Channel.fromPath(params.primerV, checkIfExists: true )
+            external_primer_schemes = Channel.fromPath(workflow.projectDir + "/data/external_primer_schemes", checkIfExists: true, type: 'dir' )
+
+            artic_nanopolish_custom_bed(
                 fastq
                     .combine(external_primer_schemes)
                     .combine(fast5.map{it -> it[1]})
                     .combine(sequence_summaries)
-                    .map{it -> tuple(it[0],it[1],it[2],it[3],it[5])}
+                    .combine(primerBed)
+                    .map{it -> tuple(it[0],it[1],it[2],it[3],it[5],it[6])}
         )
 
-        assembly = artic_nanopolish.out.fasta
+            assembly = artic_nanopolish_custom_bed.out.fasta
 
-        // plot amplicon coverage
-        covarplot(
-            artic_nanopolish.out.covarplot.combine(external_primer_schemes)
-        )
+            // plot amplicon coverage
+            covarplot_custom_bed(artic_nanopolish_custom_bed.out.covarplot.combine(primerBed))
+        }
+
+
+        // assembly via pre installed Primers
+        else {
+        external_primer_schemes = Channel.fromPath(workflow.projectDir + "/data/external_primer_schemes", checkIfExists: true, type: 'dir' )
+            artic_nanopolish(
+                    fastq
+                        .combine(external_primer_schemes)
+                        .combine(fast5.map{it -> it[1]})
+                        .combine(sequence_summaries)
+                        .map{it -> tuple(it[0],it[1],it[2],it[3],it[5])}
+            )
+
+            assembly = artic_nanopolish.out.fasta
+
+            // plot amplicon coverage
+            covarplot(artic_nanopolish.out.covarplot.combine(external_primer_schemes))
+        }
 
     emit:   
         assembly

diff --git a/workflows/process/artic.nf b/workflows/process/artic.nf
@@ -52,6 +52,72 @@ process artic_medaka {
         """
 }
 
+process artic_medaka_custom_bed {
+        label 'artic'
+        publishDir "${params.output}/${params.genomedir}/${name}/", mode: 'copy', pattern: "*.consensus.fasta"
+        publishDir "${params.output}/${params.genomedir}/${name}/", mode: 'copy', pattern: "${name}_mapped_*.primertrimmed.sorted.bam*"
+        publishDir "${params.output}/${params.genomedir}/${name}/", mode: 'copy', pattern: "${name}.trimmed.rg.sorted.bam"
+        publishDir "${params.output}/${params.genomedir}/all_consensus_sequences/", mode: 'copy', pattern: "*.consensus.fasta"
+
+    input:
+        tuple val(name), path(reads), path(external_scheme), path(primerBed)
+    output:
+        tuple val(name), path("*.consensus.fasta"), emit: fasta
+        tuple val(name), path("${name}_mapped_*.primertrimmed.sorted.bam"), path("${name}_mapped_*.primertrimmed.sorted.bam.bai"), emit: reference_bam
+        tuple val(name), path("SNP_${name}.pass.vcf"), emit: vcf
+        tuple val(name), path("${name}.pass.vcf.gz"), path("${name}.coverage_mask.txt.*1.depths"), path("${name}.coverage_mask.txt.*2.depths"), emit: covarplot
+        tuple val(name), path("${name}.trimmed.rg.sorted.bam"), emit: fullbam
+    script:   
+        """
+        # create a new primer dir as input for artic
+        mkdir -p primer_scheme/nCoV-2019
+        cp -r ${external_scheme}/nCoV-2019/V_custom primer_scheme/nCoV-2019/
+
+        # clean up bed file: replace first colum with MN908947.3, remove empty lines and sort by 4th column (primer names) 
+        cut -f2- ${primerBed} |\
+            sed '/^[[:space:]]*\$/d' |\
+            sed -e \$'s/^/MN908947.3\\t/' |\
+            sort -k4 > primer_scheme/nCoV-2019/V_custom/nCoV-2019.scheme.bed
+
+        # start artic
+        artic minion    --medaka \
+                        --medaka-model ${params.medaka_model} \
+                        --min-depth ${params.min_depth} \
+                        --normalise 500 \
+                        --threads ${task.cpus} \
+                        --scheme-directory primer_scheme \
+                        --read-file ${reads} \
+                        nCoV-2019/V_custom ${name}
+
+        # generate depth files
+        artic_make_depth_mask --depth ${params.min_depth} \
+            --store-rg-depths primer_scheme/nCoV-2019/V_custom/nCoV-2019.reference.fasta \
+            ${name}.primertrimmed.rg.sorted.bam \
+            ${name}.coverage_mask.txt
+
+        zcat ${name}.pass.vcf.gz > SNP_${name}.pass.vcf
+
+        sed -i "1s/.*/>${name}/" *.consensus.fasta
+
+        # get reference FASTA ID to rename BAM
+        REF=\$(samtools view -H ${name}.primertrimmed.rg.sorted.bam | awk 'BEGIN{FS="\\t"};{if(\$1=="@SQ"){print \$2}}' | sed 's/SN://g')
+        mv ${name}.primertrimmed.rg.sorted.bam ${name}_mapped_\${REF}.primertrimmed.sorted.bam
+        samtools index ${name}_mapped_\${REF}.primertrimmed.sorted.bam
+        """
+        stub:
+        """
+        touch genome.consensus.fasta \
+            ${name}_mapped_1.primertrimmed.sorted.bam \
+            ${name}_mapped_1.primertrimmed.sorted.bam.bai \
+            SNP_${name}.pass.vcf \
+            ${name}.pass.vcf.gz \
+            ${name}.coverage_mask.txt.nCoV-2019_1.depths \
+            ${name}.coverage_mask.txt.nCoV-2019_2.depths \
+            ${name}.trimmed.rg.sorted.bam
+        """
+}
+
+
 process artic_nanopolish {
         label 'artic'
         publishDir "${params.output}/${params.genomedir}/${name}/", mode: 'copy', pattern: "*.consensus.fasta"
@@ -106,5 +172,69 @@ process artic_nanopolish {
 }
 
 
+process artic_nanopolish_custom_bed {
+        label 'artic'
+        publishDir "${params.output}/${params.genomedir}/${name}/", mode: 'copy', pattern: "*.consensus.fasta"
+        publishDir "${params.output}/${params.genomedir}/${name}/", mode: 'copy', pattern: "${name}_mapped_*.primertrimmed.sorted.bam*"
+        publishDir "${params.output}/${params.genomedir}/${name}/", mode: 'copy', pattern: "${name}.trimmed.rg.sorted.bam"        
+        publishDir "${params.output}/${params.genomedir}/all_consensus_sequences/", mode: 'copy', pattern: "*.consensus.fasta"
+
+    input:
+        tuple val(name), path(reads), path(external_scheme), path(fast5_dir), path(txt_files), path(primerBed)
+    output:
+        tuple val(name), path("*.consensus.fasta"), emit: fasta
+        tuple val(name), path("${name}_mapped_*.primertrimmed.sorted.bam"), path("${name}_mapped_*.primertrimmed.sorted.bam.bai"), emit: reference_bam
+        tuple val(name), path("SNP_${name}.pass.vcf"), emit: vcf
+        tuple val(name), path("${name}.pass.vcf.gz"), path("${name}.coverage_mask.txt.*1.depths"), path("${name}.coverage_mask.txt.*2.depths"), emit: covarplot
+        tuple val(name), path("${name}.trimmed.rg.sorted.bam"), emit: fullbam
+    script:   
+        """
+        # create a new primer dir as input for artic
+        mkdir -p primer_scheme/nCoV-2019
+        cp -r ${external_scheme}/nCoV-2019/V_custom primer_scheme/nCoV-2019/
+
+        # clean up bed file: replace first colum with MN908947.3, remove empty lines and sort by 4th column (primer names) 
+        cut -f2- ${primerBed} |\
+            sed '/^[[:space:]]*\$/d' |\
+            sed -e \$'s/^/MN908947.3\\t/' |\
+            sort -k4 > primer_scheme/nCoV-2019/V_custom/nCoV-2019.scheme.bed
+
+        # start artic
+        artic minion --minimap2 --normalise 500 \
+            --threads ${task.cpus} \
+            --scheme-directory primer_scheme \
+            --read-file ${reads} \
+            --fast5-directory ${fast5_dir} \
+            --sequencing-summary sequencing_summary*.txt \
+            nCoV-2019/V_custom ${name}
+
+        # generate depth files
+        artic_make_depth_mask --depth ${params.min_depth} \
+            --store-rg-depths primer_scheme/nCoV-2019/V_custom/nCoV-2019.reference.fasta \
+            ${name}.primertrimmed.rg.sorted.bam \
+            ${name}.coverage_mask.txt
+
+        zcat ${name}.pass.vcf.gz > SNP_${name}.pass.vcf
+
+        sed -i "1s/.*/>${name}/" *.consensus.fasta
+
+        # get reference FASTA ID to rename BAM
+        REF=\$(samtools view -H ${name}.primertrimmed.rg.sorted.bam | awk 'BEGIN{FS="\\t"};{if(\$1=="@SQ"){print \$2}}' | sed 's/SN://g')
+        mv ${name}.primertrimmed.rg.sorted.bam ${name}_mapped_\${REF}.primertrimmed.sorted.bam
+        samtools index ${name}_mapped_\${REF}.primertrimmed.sorted.bam
+        """
+        stub:
+        """
+        touch genome.consensus.fasta \
+            ${name}_mapped_1.primertrimmed.sorted.bam \
+            ${name}_mapped_1.primertrimmed.sorted.bam.bai \
+            SNP_${name}.pass.vcf \
+            ${name}.pass.vcf.gz \
+            ${name}.coverage_mask.txt.nCoV-2019_1.depths \
+            ${name}.coverage_mask.txt.nCoV-2019_2.depths \
+            ${name}.trimmed.rg.sorted.bam
+        """
+}
+
 
 // --min-depth
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Data from: https://github.com/nebiolabs/VarSkip