diff --git a/bin/lcs_bar_plot.R b/bin/lcs_bar_plot.R
index a38cb25..8655489 100755
--- a/bin/lcs_bar_plot.R
+++ b/bin/lcs_bar_plot.R
@@ -6,13 +6,15 @@ library(ggplot2)
library(dplyr)
args <- commandArgs(TRUE)
-proportion_cutoff <- eval( parse(text=args[1]) )[1]
+tsv <- args[1]
+name <- parse(text=args[2])
+proportion_cutoff <- eval( parse(text=args[3]) )
-input <- read.csv("lcs_results.tsv", sep='\t')
+input <- read.csv(tsv, sep='\t')
filter(input, proportion>=proportion_cutoff) %>% ggplot(aes(x=variant_group, y=proportion)) + geom_col(position='dodge') +
geom_errorbar(width=0.4,aes(ymin=proportion-std_error, ymax=proportion+std_error)) + facet_wrap(~sample) +
-theme(axis.text.x=element_text(angle=90,vjust=.5)) + coord_flip() + ggsave("lcs_bar_plot.png")
+theme(axis.text.x=element_text(angle=90,vjust=.5)) + coord_flip() + ggsave(paste0('lcs_barplot_', name, '.png'))
# for ggplot2 versions > 3.3.0:
# ggplot aes flip x and y
diff --git a/bin/summary_report.py b/bin/summary_report.py
index 3362472..58fa93f 100755
--- a/bin/summary_report.py
+++ b/bin/summary_report.py
@@ -38,7 +38,7 @@ class SummaryReport():
scorpio_constellations_version = None
variants_table = None
pangolin_version = None
- pangolearn_version = None
+ pangolindata_version = None
nextclade_version = None
nextcladedata_version = None
tabledata = None
@@ -110,20 +110,17 @@ def add_poreCov_version_param(self, porecov_version):
def add_pangolin_version_param(self):
- if self.pangolearn_version is None:
- error('add_pangolin_version_param() called before parse_pangolin_version()')
- warning_msg = f' - Warning: A rather old version of PangoLEARN was used ({self.pangolearn_version}). Use parameter \'--update\' to force the use of the most recent Pangolin container!'
+ if self.pangolindata_version is None:
+ error('add_pangolin_version_param() called before pangolin version was set')
+ warning_msg = f' - Warning: A rather old version of pangolin-data was used ({self.pangolindata_version}). Use parameter \'--update\' to force the use of the most recent Pangolin container!'
- # pa_param = f'Pangolin version'
- # pa_val = f'{self.pangolin_version}'
- pl_param = f'PangoLEARN version'
- pl_val = f'{self.pangolearn_version}'
+ pl_param = f'pangolin-data version'
+ pl_val = f'{self.pangolindata_version}'
- year, month, day = self.pangolearn_version.split('-')
- if int(year) <= 2021 and int(month) <= 10:
+ v1, v2, v3 = self.pangolindata_version.split('.')
+ if int(v1) <= 1 and int(v2) <= 2 and int(v3) <= 132:
pl_val += warning_msg
- # self.add_param(pa_param, pa_val)
self.add_param(pl_param, pl_val)
@@ -162,22 +159,6 @@ def parse_version_config(self, version_config_file):
log('Parsed version config file.')
- def parse_scorpio_versions(self, scorpio_version, sc_constell_version):
- # e.g. 'scorpio 0.3.14'
- name, vers = scorpio_version.split(' ')
- assert name == 'scorpio'
- self.scorpio_version = vers.lstrip('v')
-
- # e.g. 'constellations v0.0.24'
- name, vers = sc_constell_version.split(' ')
- assert name == 'constellations'
- self.scorpio_constellations_version = vers.lstrip('v')
-
-
- def parse_pangolin_version(self, pangolin_docker):
- # e.g. nanozoo/pangolin:2.3.8--2021-04-21
- self.pangolin_version, self.pangolearn_version = pangolin_docker.split(':',1)[-1].split('--')
-
def parse_nextclade_version(self, nextclade_docker):
# e.g. nanozoo/nextclade:1.3.0--2021-06-25
self.nextclade_version, self.nextcladedata_version = nextclade_docker.split(':',1)[-1].split('--')
@@ -377,10 +358,20 @@ def add_pangolin_results(self, pangolin_results):
self.force_index_dtype_string(res_data)
self.check_and_init_tabledata(res_data.index)
+ # pangolin and scorpio versions
+ # column names used:
+ # version,pangolin_version,scorpio_version,constellation_version
+ assert res_data.shape[0] > 0
+ self.pangolin_version = res_data.iloc[0]['pangolin_version']
+ self.pangolindata_version = res_data.iloc[0]['version'].split('-',1)[-1].split('v',1)[-1]
+ self.scorpio_version = res_data.iloc[0]['scorpio_version']
+ self.scorpio_constellations_version = res_data.iloc[0]['constellation_version']
+
+
+ # get data
self.add_column_raw('pangolin_lineage', res_data['lineage'])
self.add_column_raw('pangolin_conflict', res_data['conflict'])
-
res_data['lineage_conflict'] = [f'{l}
({p if pd.notnull(p) else "-"})' for l,p in zip(res_data['lineage'], res_data['conflict'])]
colname = 'Lineage
(conflict)'
@@ -399,13 +390,16 @@ def add_pangolin_results(self, pangolin_results):
res_data.at[row, 'lineage_conflict'] += f'
{var_status}'
self.add_column_raw('variant_status', res_data['variant_status'])
- self.add_col_description(f'Variant type (VOC, VOI, etc.) was determined from the variants table of SARS-CoV-2-Variants.')
+ self.add_col_description(f'Variant type (VOC, VOI, etc.) was determined from the variants table ' + \
+ 'of SARS-CoV-2-Variants.')
self.add_column(colname, res_data['lineage_conflict'])
- if self.pangolin_version is None or self.pangolearn_version is None:
- error('No pangolin/pangoLEARN versions were added before adding pangolin results.')
- self.add_col_description(f'Lineage and the corresponding tree resolution conflict measure were determined with Pangolin (v{self.pangolin_version} using PangoLEARN data release {self.pangolearn_version}).')
+ if self.pangolin_version is None or self.pangolindata_version is None:
+ error('No pangolin/pangolin-data versions were added before adding pangolin results.')
+ self.add_col_description(f'Lineage and the corresponding tree resolution conflict measure were determined with ' + \
+ f'Pangolin (v{self.pangolin_version} using ' + \
+ f'pangolin-data v{self.pangolindata_version}).')
# Add scorpio info if any is present
if res_data['scorpio_call'].notna().any():
@@ -419,7 +413,10 @@ def add_pangolin_results(self, pangolin_results):
self.add_column('Constellation
(conflict)', res_data['scorpio_conflict'])
if self.scorpio_version is None or self.scorpio_constellations_version is None:
error('No Scorpio/constellations versions were added before adding Pangolin results.')
- self.add_col_description(f'Constellation and the corresponding tree resolution conflict measure were determined with Scorpio (v{self.scorpio_version} using Constellations version {self.scorpio_constellations_version}).')
+ self.add_col_description(f'Constellation and the corresponding tree resolution conflict measure were determined with ' + \
+ f'Scorpio (v{self.scorpio_version} using ' + \
+ f'Constellations version {self.scorpio_constellations_version}).')
+
def add_president_results(self, president_results):
@@ -505,6 +502,13 @@ def add_nextclade_results(self, nextclade_results):
self.add_column_raw('nextclade_insertions', res_data['aaInsertions'])
self.add_column_raw('nextclade_frameshifts', res_data['frameShifts'])
+ # private mutation information
+ self.add_column_raw('nextclade_privateNucMutations_reversion', res_data["privateNucMutations.reversionSubstitutions"])
+ self.add_column_raw('nextclade_privateNucMutations_labeled', res_data["privateNucMutations.labeledSubstitutions"])
+ self.add_column_raw('nextclade_privateNucMutations_unlabeled', res_data["privateNucMutations.unlabeledSubstitutions"])
+ self.add_column_raw('nextclade_privateNucMutations_qc_status', res_data["qc.privateMutations.status"])
+
+
res_data['mutations_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaSubstitutions']]
res_data['deletions_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaDeletions']]
res_data['insertions_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaInsertions']]
@@ -560,7 +564,8 @@ def get_markup_with_toggle_and_tag(tag):
if self.nextclade_version is None or self.nextcladedata_version is None:
error('No nextclade/nextcladedata versions were added before adding nextclade results.')
- self.add_col_description(f'Clade, mutations, deletions, insertions and frameshifts were determined with Nextclade (v{self.nextclade_version} using nextclade data release {self.nextcladedata_version}).')
+ self.add_col_description(f'Clade, mutations, deletions, insertions and frameshifts were determined with ' + \
+ f'Nextclade (v{self.nextclade_version} using nextclade data release {self.nextcladedata_version}).')
@@ -782,16 +787,13 @@ def get_lineage_status(self, lineage):
log('Started summary_report.py ...')
parser = argparse.ArgumentParser(description='Generate a summary report for multiple samples run with poreCov')
- parser.add_argument("-v", "--version_config", help="version config", required=True)
- parser.add_argument("--scorpio_version", help="scorpio version", required=True)
- parser.add_argument("--scorpio_constellations_version", help="scorpio constellations version", required=True)
+ parser.add_argument("-v", "--version_config", help="version config", required=True)
parser.add_argument("--variants_table", help="variants table with VOCs, VOIs etc.", required=True)
parser.add_argument("--porecov_version", help="porecov version", required=True)
parser.add_argument("--guppy_used", help="guppy used")
parser.add_argument("--guppy_model", help="guppy model")
parser.add_argument("--medaka_model", help="medaka model")
parser.add_argument("--nf_commandline", help="full nextflow command call", required=True)
- parser.add_argument("--pangolin_docker", help="pangolin/pangoLEARN version", required=True)
parser.add_argument("--nextclade_docker", help="nextclade/nextcladedata version", required=True)
parser.add_argument("--primer", help="primer version")
parser.add_argument("-p", "--pangolin_results", help="pangolin results")
@@ -806,9 +808,7 @@ def get_lineage_status(self, lineage):
### build report
report = SummaryReport()
report.parse_version_config(args.version_config)
- report.parse_scorpio_versions(args.scorpio_version, args.scorpio_constellations_version)
report.parse_variants_table(args.variants_table)
- report.parse_pangolin_version(args.pangolin_docker)
report.parse_nextclade_version(args.nextclade_docker)
diff --git a/modules/get_scorpio_version.nf b/modules/get_scorpio_version.nf
deleted file mode 100644
index 128aef5..0000000
--- a/modules/get_scorpio_version.nf
+++ /dev/null
@@ -1,17 +0,0 @@
-process get_scorpio_version {
- label 'pangolin'
- cpus = 1
- container = params.pangolindocker
- output:
- tuple env(SCORPIO_VER), env(SCORPIO_CONSTELLATIONS_VER)
- shell:
- '''
- SCORPIO_VER=$(scorpio --version)
- SCORPIO_CONSTELLATIONS_VER=$(scorpio -cv)
- '''
- stub:
- """
- SCORPIO_VER='scorpio 0.3.14'
- SCORPIO_CONSTELLATIONS_VER='constellations v0.0.24'
- """
- }
diff --git a/nextflow.config b/nextflow.config
index 1ae8621..2614199 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -41,8 +41,8 @@ params {
update = false
screen_reads = false
screen_reads_plot_cutoff = 0.03
- defaultpangolin = 'nanozoo/pangolin:3.1.20--2022-02-28'
- defaultnextclade = 'nanozoo/nextclade:1.10.3--2022-02-07'
+ defaultpangolin = 'nanozoo/pangolin-v4:4.0.4--1.2.133'
+ defaultnextclade = 'nanozoo/nextclade:1.11.0--2022-03-31'
// parameters
primerV = 'V3'
diff --git a/poreCov.nf b/poreCov.nf
index ce19d83..350b6b9 100755
--- a/poreCov.nf
+++ b/poreCov.nf
@@ -235,7 +235,7 @@ if (params.samples) {
static boolean DockernetIsAvailable() {
try {
- final URL url = new URL("https://registry.hub.docker.com/v2/repositories/nanozoo/pangolin/tags/");
+ final URL url = new URL("https://registry.hub.docker.com/v2/repositories/nanozoo/pangolin-v4/tags/");
final URLConnection conn = url.openConnection();
conn.connect();
conn.getInputStream().close();
@@ -252,8 +252,8 @@ def internetcheck = DockernetIsAvailable()
if (params.update) {
println "\033[0;33mWarning: Running --update might not be poreCov compatible!\033[0m"
if ( internetcheck.toString() == "true" ) {
- tagname = 'https://registry.hub.docker.com/v2/repositories/nanozoo/pangolin/tags/'.toURL().text.split(',"name":"')[1].split('","')[0]
- params.pangolindocker = "nanozoo/pangolin:" + tagname
+ tagname = 'https://registry.hub.docker.com/v2/repositories/nanozoo/pangolin-v4/tags/'.toURL().text.split(',"name":"')[1].split('","')[0]
+ params.pangolindocker = "nanozoo/pangolin-v4:" + tagname
println "\033[0;32mFound latest pangolin container, using: " + params.pangolindocker + " \033[0m"
tagname = 'https://registry.hub.docker.com/v2/repositories/nanozoo/nextclade/tags/'.toURL().text.split(',"name":"')[1].split('","')[0]
diff --git a/workflows/create_summary_report.nf b/workflows/create_summary_report.nf
index dc08d1d..8e2f03c 100644
--- a/workflows/create_summary_report.nf
+++ b/workflows/create_summary_report.nf
@@ -1,8 +1,7 @@
include { summary_report; summary_report_fasta; summary_report_default } from './process/summary_report'
include { plot_coverages } from '../modules/plot_coverages.nf'
-include { get_scorpio_version } from '../modules/get_scorpio_version.nf'
include { get_variants_classification } from '../modules/get_variants_classification.nf'
-include { lcs_plot } from './process/lcs_sc2'
+include { lcs_plot as lcs_plot; lcs_plot as lcs_plot_control } from './process/lcs_sc2'
workflow create_summary_report_wf {
take:
@@ -16,7 +15,6 @@ workflow create_summary_report_wf {
main:
version_ch = Channel.fromPath(workflow.projectDir + "/configs/container.config")
- scorpio_ver_ch = get_scorpio_version()
variants_table_ch = get_variants_classification()
pangolin_results = pangolin.map {it -> it[1]}.collectFile(name: 'pangolin_results.csv', skip: 1, keepHeader: true)
@@ -27,7 +25,7 @@ workflow create_summary_report_wf {
alignment_files = alignments.map {it -> it[0]}.collect()
if (params.fasta || workflow.profile.contains('test_fasta')) {
- summary_report_fasta(version_ch, scorpio_ver_ch, variants_table_ch, pangolin_results, president_results, nextclade_results)
+ summary_report_fasta(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results)
} else {
kraken2_results = kraken2.map {it -> it[2]}.collect()
@@ -35,13 +33,15 @@ workflow create_summary_report_wf {
coverage_plots = plot_coverages(alignments.map{it -> it[0]}.toSortedList({ a, b -> a.simpleName <=> b.simpleName }).flatten().collate(6), \
alignments.map{it -> it[1]}.toSortedList({ a, b -> a.simpleName <=> b.simpleName }).flatten().collate(6)).collect()
- if (params.samples) { summary_report(version_ch, scorpio_ver_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots, samples_table) }
- else { summary_report_default(version_ch, scorpio_ver_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots) }
+ if (params.samples) { summary_report(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots, samples_table) }
+ else { summary_report_default(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots) }
}
if (params.screen_reads){
- lcs_plot(lcs_results, params.screen_reads_plot_cutoff)
+ lcs_plot(lcs.filter({ !it[0].contains("negativ") }).map {it -> it[1]}.collectFile(name: 'lcs_results.tsv', skip: 1, keepHeader: true).map{ it -> ['samples', it] }, params.screen_reads_plot_cutoff)
+
+ lcs_plot_control(lcs.filter({ it[0].contains("negativ") }), params.screen_reads_plot_cutoff)
}
}
diff --git a/workflows/process/lcs_sc2.nf b/workflows/process/lcs_sc2.nf
index ec13d28..ef34121 100644
--- a/workflows/process/lcs_sc2.nf
+++ b/workflows/process/lcs_sc2.nf
@@ -36,14 +36,14 @@ process lcs_plot {
publishDir "${params.output}/${params.lineagedir}/", mode: 'copy'
input:
- path(tsv)
+ tuple val(name), path(tsv)
val(cutoff)
output:
- path("lcs_bar_plot.png")
+ path("*.png")
script:
"""
- lcs_bar_plot.R ${cutoff}
+ lcs_bar_plot.R '${tsv}' '${name}' ${cutoff}
"""
}
\ No newline at end of file
diff --git a/workflows/process/summary_report.nf b/workflows/process/summary_report.nf
index 0c35e15..f601060 100644
--- a/workflows/process/summary_report.nf
+++ b/workflows/process/summary_report.nf
@@ -5,7 +5,6 @@ process summary_report {
input:
path(version_config)
- tuple val(scorpio_ver), val(scorpio_constellations_ver)
path(variants_table)
path(pangolin_results)
path(president_results)
@@ -32,8 +31,6 @@ process summary_report {
summary_report.py \
-v !{version_config} \
- --scorpio_version "!{scorpio_ver}" \
- --scorpio_constellations_version "!{scorpio_constellations_ver}" \
--variants_table !{variants_table} \
--porecov_version !{workflow.revision}:!{workflow.commitId}:!{workflow.scriptId} \
--nextclade_docker !{params.nextcladedocker} \
@@ -41,7 +38,6 @@ process summary_report {
--guppy_model !{params.guppy_model} \
--medaka_model !{params.medaka_model} \
--nf_commandline '!{workflow.commandLine}' \
- --pangolin_docker !{params.pangolindocker} \
--primer !{params.primerV} \
-p !{pangolin_results} \
-q !{president_results} \
@@ -63,7 +59,6 @@ process summary_report_default {
input:
path(version_config)
- tuple val(scorpio_ver), val(scorpio_constellations_ver)
path(variants_table)
path(pangolin_results)
path(president_results)
@@ -89,15 +84,12 @@ process summary_report_default {
summary_report.py \
-v !{version_config} \
- --scorpio_version "!{scorpio_ver}" \
- --scorpio_constellations_version "!{scorpio_constellations_ver}" \
--variants_table !{variants_table} \
--porecov_version !{workflow.revision}:!{workflow.commitId}:!{workflow.scriptId} \
--guppy_used !{guppyused} \
--guppy_model !{params.guppy_model} \
--medaka_model !{params.medaka_model} \
--nf_commandline '!{workflow.commandLine}' \
- --pangolin_docker !{params.pangolindocker} \
--nextclade_docker !{params.nextcladedocker} \
--primer !{params.primerV} \
-p !{pangolin_results} \
@@ -117,7 +109,6 @@ process summary_report_fasta {
label 'fastcov'
input:
path(version_config)
- tuple val(scorpio_ver), val(scorpio_constellations_ver)
path(variants_table)
path(pangolin_results)
path(president_results)
@@ -131,12 +122,9 @@ process summary_report_fasta {
"""
summary_report.py \
-v ${version_config} \
- --scorpio_version "${scorpio_ver}" \
- --scorpio_constellations_version "${scorpio_constellations_ver}" \
--variants_table ${variants_table} \
--porecov_version ${workflow.revision}:${workflow.commitId}:${workflow.scriptId} \
--nf_commandline '${workflow.commandLine}' \
- --pangolin_docker ${params.pangolindocker} \
--nextclade_docker ${params.nextcladedocker} \
-p ${pangolin_results} \
-q ${president_results} \