Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release1.5 #228

Merged
merged 18 commits into from
Apr 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions bin/lcs_bar_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@ library(ggplot2)
library(dplyr)

args <- commandArgs(TRUE)
proportion_cutoff <- eval( parse(text=args[1]) )[1]
tsv <- args[1]
name <- parse(text=args[2])
proportion_cutoff <- eval( parse(text=args[3]) )

input <- read.csv("lcs_results.tsv", sep='\t')
input <- read.csv(tsv, sep='\t')

filter(input, proportion>=proportion_cutoff) %>% ggplot(aes(x=variant_group, y=proportion)) + geom_col(position='dodge') +
geom_errorbar(width=0.4,aes(ymin=proportion-std_error, ymax=proportion+std_error)) + facet_wrap(~sample) +
theme(axis.text.x=element_text(angle=90,vjust=.5)) + coord_flip() + ggsave("lcs_bar_plot.png")
theme(axis.text.x=element_text(angle=90,vjust=.5)) + coord_flip() + ggsave(paste0('lcs_barplot_', name, '.png'))

# for ggplot2 versions > 3.3.0:
# ggplot aes flip x and y
Expand Down
80 changes: 40 additions & 40 deletions bin/summary_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class SummaryReport():
scorpio_constellations_version = None
variants_table = None
pangolin_version = None
pangolearn_version = None
pangolindata_version = None
nextclade_version = None
nextcladedata_version = None
tabledata = None
Expand Down Expand Up @@ -110,20 +110,17 @@ def add_poreCov_version_param(self, porecov_version):


def add_pangolin_version_param(self):
if self.pangolearn_version is None:
error('add_pangolin_version_param() called before parse_pangolin_version()')
warning_msg = f' - <font color="{self.color_error_red}"><b>Warning</b>: A rather old version of PangoLEARN was used ({self.pangolearn_version}). Use parameter \'--update\' to force the use of the most recent Pangolin container!</font>'
if self.pangolindata_version is None:
error('add_pangolin_version_param() called before pangolin version was set')
warning_msg = f' - <font color="{self.color_error_red}"><b>Warning</b>: A rather old version of pangolin-data was used ({self.pangolindata_version}). Use parameter \'--update\' to force the use of the most recent Pangolin container!</font>'

# pa_param = f'<a href="https://cov-lineages.org/pangolin.html"><b>Pangolin</b></a> version'
# pa_val = f'{self.pangolin_version}'
pl_param = f'<a href="https://cov-lineages.org/resources/pangolin/pangolearn.html"><b>PangoLEARN</b></a> version'
pl_val = f'{self.pangolearn_version}'
pl_param = f'<a href="https://cov-lineages.org/resources/pangolin/requirements.html"><b>pangolin-data</b></a> version'
pl_val = f'{self.pangolindata_version}'

year, month, day = self.pangolearn_version.split('-')
if int(year) <= 2021 and int(month) <= 10:
v1, v2, v3 = self.pangolindata_version.split('.')
if int(v1) <= 1 and int(v2) <= 2 and int(v3) <= 132:
pl_val += warning_msg

# self.add_param(pa_param, pa_val)
self.add_param(pl_param, pl_val)


Expand Down Expand Up @@ -162,22 +159,6 @@ def parse_version_config(self, version_config_file):
log('Parsed version config file.')


def parse_scorpio_versions(self, scorpio_version, sc_constell_version):
# e.g. 'scorpio 0.3.14'
name, vers = scorpio_version.split(' ')
assert name == 'scorpio'
self.scorpio_version = vers.lstrip('v')

# e.g. 'constellations v0.0.24'
name, vers = sc_constell_version.split(' ')
assert name == 'constellations'
self.scorpio_constellations_version = vers.lstrip('v')


def parse_pangolin_version(self, pangolin_docker):
# e.g. nanozoo/pangolin:2.3.8--2021-04-21
self.pangolin_version, self.pangolearn_version = pangolin_docker.split(':',1)[-1].split('--')

def parse_nextclade_version(self, nextclade_docker):
# e.g. nanozoo/nextclade:1.3.0--2021-06-25
self.nextclade_version, self.nextcladedata_version = nextclade_docker.split(':',1)[-1].split('--')
Expand Down Expand Up @@ -377,10 +358,20 @@ def add_pangolin_results(self, pangolin_results):
self.force_index_dtype_string(res_data)
self.check_and_init_tabledata(res_data.index)

# pangolin and scorpio versions
# column names used:
# version,pangolin_version,scorpio_version,constellation_version
assert res_data.shape[0] > 0
self.pangolin_version = res_data.iloc[0]['pangolin_version']
self.pangolindata_version = res_data.iloc[0]['version'].split('-',1)[-1].split('v',1)[-1]
self.scorpio_version = res_data.iloc[0]['scorpio_version']
self.scorpio_constellations_version = res_data.iloc[0]['constellation_version']


# get data
self.add_column_raw('pangolin_lineage', res_data['lineage'])
self.add_column_raw('pangolin_conflict', res_data['conflict'])


res_data['lineage_conflict'] = [f'<b>{l}</b><br>({p if pd.notnull(p) else "-"})' for l,p in zip(res_data['lineage'], res_data['conflict'])]
colname = 'Lineage<br>(conflict)'

Expand All @@ -399,13 +390,16 @@ def add_pangolin_results(self, pangolin_results):
res_data.at[row, 'lineage_conflict'] += f'<br><font color="{color}"><b>{var_status}</b></font>'

self.add_column_raw('variant_status', res_data['variant_status'])
self.add_col_description(f'Variant type (VOC, VOI, etc.) was determined from the <a href="{args.variants_table}">variants table</a> of <a href="https://github.com/3dgiordano/SARS-CoV-2-Variants">SARS-CoV-2-Variants</a>.')
self.add_col_description(f'Variant type (VOC, VOI, etc.) was determined from the <a href="{args.variants_table}">variants table</a> ' + \
'of <a href="https://github.com/3dgiordano/SARS-CoV-2-Variants">SARS-CoV-2-Variants</a>.')


self.add_column(colname, res_data['lineage_conflict'])
if self.pangolin_version is None or self.pangolearn_version is None:
error('No pangolin/pangoLEARN versions were added before adding pangolin results.')
self.add_col_description(f'Lineage and the corresponding tree resolution conflict measure were determined with <a href="https://cov-lineages.org/pangolin.html">Pangolin</a> (v{self.pangolin_version} using <a href="https://cov-lineages.org/resources/pangolin/pangolearn.html">PangoLEARN</a> data release {self.pangolearn_version}).')
if self.pangolin_version is None or self.pangolindata_version is None:
error('No pangolin/pangolin-data versions were added before adding pangolin results.')
self.add_col_description(f'Lineage and the corresponding tree resolution conflict measure were determined with ' + \
f'<a href="https://cov-lineages.org/pangolin.html">Pangolin</a> (v{self.pangolin_version} using ' + \
f'<a href="https://cov-lineages.org/resources/pangolin/requirements.html">pangolin-data</a> v{self.pangolindata_version}).')

# Add scorpio info if any is present
if res_data['scorpio_call'].notna().any():
Expand All @@ -419,7 +413,10 @@ def add_pangolin_results(self, pangolin_results):
self.add_column('Constellation<br>(conflict)', res_data['scorpio_conflict'])
if self.scorpio_version is None or self.scorpio_constellations_version is None:
error('No Scorpio/constellations versions were added before adding Pangolin results.')
self.add_col_description(f'Constellation and the corresponding tree resolution conflict measure were determined with <a href="https://github.com/cov-lineages/scorpio">Scorpio</a> (v{self.scorpio_version} using <a href="https://cov-lineages.org/constellations.html">Constellations</a> version {self.scorpio_constellations_version}).')
self.add_col_description(f'Constellation and the corresponding tree resolution conflict measure were determined with ' + \
f'<a href="https://github.com/cov-lineages/scorpio">Scorpio</a> (v{self.scorpio_version} using ' + \
f'<a href="https://cov-lineages.org/constellations.html">Constellations</a> version {self.scorpio_constellations_version}).')



def add_president_results(self, president_results):
Expand Down Expand Up @@ -505,6 +502,13 @@ def add_nextclade_results(self, nextclade_results):
self.add_column_raw('nextclade_insertions', res_data['aaInsertions'])
self.add_column_raw('nextclade_frameshifts', res_data['frameShifts'])

# private mutation information
self.add_column_raw('nextclade_privateNucMutations_reversion', res_data["privateNucMutations.reversionSubstitutions"])
self.add_column_raw('nextclade_privateNucMutations_labeled', res_data["privateNucMutations.labeledSubstitutions"])
self.add_column_raw('nextclade_privateNucMutations_unlabeled', res_data["privateNucMutations.unlabeledSubstitutions"])
self.add_column_raw('nextclade_privateNucMutations_qc_status', res_data["qc.privateMutations.status"])


res_data['mutations_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaSubstitutions']]
res_data['deletions_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaDeletions']]
res_data['insertions_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaInsertions']]
Expand Down Expand Up @@ -560,7 +564,8 @@ def get_markup_with_toggle_and_tag(tag):

if self.nextclade_version is None or self.nextcladedata_version is None:
error('No nextclade/nextcladedata versions were added before adding nextclade results.')
self.add_col_description(f'Clade, mutations, deletions, insertions and frameshifts were determined with <a href="https://clades.nextstrain.org/">Nextclade</a> (v{self.nextclade_version} using nextclade data release {self.nextcladedata_version}).')
self.add_col_description(f'Clade, mutations, deletions, insertions and frameshifts were determined with ' + \
f'<a href="https://clades.nextstrain.org/">Nextclade</a> (v{self.nextclade_version} using nextclade data release {self.nextcladedata_version}).')



Expand Down Expand Up @@ -782,16 +787,13 @@ def get_lineage_status(self, lineage):
log('Started summary_report.py ...')

parser = argparse.ArgumentParser(description='Generate a summary report for multiple samples run with poreCov')
parser.add_argument("-v", "--version_config", help="version config", required=True)
parser.add_argument("--scorpio_version", help="scorpio version", required=True)
parser.add_argument("--scorpio_constellations_version", help="scorpio constellations version", required=True)
parser.add_argument("-v", "--version_config", help="version config", required=True)
parser.add_argument("--variants_table", help="variants table with VOCs, VOIs etc.", required=True)
parser.add_argument("--porecov_version", help="porecov version", required=True)
parser.add_argument("--guppy_used", help="guppy used")
parser.add_argument("--guppy_model", help="guppy model")
parser.add_argument("--medaka_model", help="medaka model")
parser.add_argument("--nf_commandline", help="full nextflow command call", required=True)
parser.add_argument("--pangolin_docker", help="pangolin/pangoLEARN version", required=True)
parser.add_argument("--nextclade_docker", help="nextclade/nextcladedata version", required=True)
parser.add_argument("--primer", help="primer version")
parser.add_argument("-p", "--pangolin_results", help="pangolin results")
Expand All @@ -806,9 +808,7 @@ def get_lineage_status(self, lineage):
### build report
report = SummaryReport()
report.parse_version_config(args.version_config)
report.parse_scorpio_versions(args.scorpio_version, args.scorpio_constellations_version)
report.parse_variants_table(args.variants_table)
report.parse_pangolin_version(args.pangolin_docker)
report.parse_nextclade_version(args.nextclade_docker)


Expand Down
17 changes: 0 additions & 17 deletions modules/get_scorpio_version.nf

This file was deleted.

4 changes: 2 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ params {
update = false
screen_reads = false
screen_reads_plot_cutoff = 0.03
defaultpangolin = 'nanozoo/pangolin:3.1.20--2022-02-28'
defaultnextclade = 'nanozoo/nextclade:1.10.3--2022-02-07'
defaultpangolin = 'nanozoo/pangolin-v4:4.0.4--1.2.133'
defaultnextclade = 'nanozoo/nextclade:1.11.0--2022-03-31'

// parameters
primerV = 'V3'
Expand Down
6 changes: 3 additions & 3 deletions poreCov.nf
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ if (params.samples) {

static boolean DockernetIsAvailable() {
try {
final URL url = new URL("https://registry.hub.docker.com/v2/repositories/nanozoo/pangolin/tags/");
final URL url = new URL("https://registry.hub.docker.com/v2/repositories/nanozoo/pangolin-v4/tags/");
final URLConnection conn = url.openConnection();
conn.connect();
conn.getInputStream().close();
Expand All @@ -252,8 +252,8 @@ def internetcheck = DockernetIsAvailable()
if (params.update) {
println "\033[0;33mWarning: Running --update might not be poreCov compatible!\033[0m"
if ( internetcheck.toString() == "true" ) {
tagname = 'https://registry.hub.docker.com/v2/repositories/nanozoo/pangolin/tags/'.toURL().text.split(',"name":"')[1].split('","')[0]
params.pangolindocker = "nanozoo/pangolin:" + tagname
tagname = 'https://registry.hub.docker.com/v2/repositories/nanozoo/pangolin-v4/tags/'.toURL().text.split(',"name":"')[1].split('","')[0]
params.pangolindocker = "nanozoo/pangolin-v4:" + tagname
println "\033[0;32mFound latest pangolin container, using: " + params.pangolindocker + " \033[0m"

tagname = 'https://registry.hub.docker.com/v2/repositories/nanozoo/nextclade/tags/'.toURL().text.split(',"name":"')[1].split('","')[0]
Expand Down
14 changes: 7 additions & 7 deletions workflows/create_summary_report.nf
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
include { summary_report; summary_report_fasta; summary_report_default } from './process/summary_report'
include { plot_coverages } from '../modules/plot_coverages.nf'
include { get_scorpio_version } from '../modules/get_scorpio_version.nf'
include { get_variants_classification } from '../modules/get_variants_classification.nf'
include { lcs_plot } from './process/lcs_sc2'
include { lcs_plot as lcs_plot; lcs_plot as lcs_plot_control } from './process/lcs_sc2'

workflow create_summary_report_wf {
take:
Expand All @@ -16,7 +15,6 @@ workflow create_summary_report_wf {

main:
version_ch = Channel.fromPath(workflow.projectDir + "/configs/container.config")
scorpio_ver_ch = get_scorpio_version()
variants_table_ch = get_variants_classification()

pangolin_results = pangolin.map {it -> it[1]}.collectFile(name: 'pangolin_results.csv', skip: 1, keepHeader: true)
Expand All @@ -27,21 +25,23 @@ workflow create_summary_report_wf {
alignment_files = alignments.map {it -> it[0]}.collect()
if (params.fasta || workflow.profile.contains('test_fasta')) {

summary_report_fasta(version_ch, scorpio_ver_ch, variants_table_ch, pangolin_results, president_results, nextclade_results)
summary_report_fasta(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results)

} else {
kraken2_results = kraken2.map {it -> it[2]}.collect()
// sort by sample name, group in lists of 6, collect the grouped plots
coverage_plots = plot_coverages(alignments.map{it -> it[0]}.toSortedList({ a, b -> a.simpleName <=> b.simpleName }).flatten().collate(6), \
alignments.map{it -> it[1]}.toSortedList({ a, b -> a.simpleName <=> b.simpleName }).flatten().collate(6)).collect()

if (params.samples) { summary_report(version_ch, scorpio_ver_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots, samples_table) }
else { summary_report_default(version_ch, scorpio_ver_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots) }
if (params.samples) { summary_report(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots, samples_table) }
else { summary_report_default(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots) }

}

if (params.screen_reads){
lcs_plot(lcs_results, params.screen_reads_plot_cutoff)
lcs_plot(lcs.filter({ !it[0].contains("negativ") }).map {it -> it[1]}.collectFile(name: 'lcs_results.tsv', skip: 1, keepHeader: true).map{ it -> ['samples', it] }, params.screen_reads_plot_cutoff)

lcs_plot_control(lcs.filter({ it[0].contains("negativ") }), params.screen_reads_plot_cutoff)
}

}
6 changes: 3 additions & 3 deletions workflows/process/lcs_sc2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ process lcs_plot {
publishDir "${params.output}/${params.lineagedir}/", mode: 'copy'

input:
path(tsv)
tuple val(name), path(tsv)
val(cutoff)

output:
path("lcs_bar_plot.png")
path("*.png")

script:
"""
lcs_bar_plot.R ${cutoff}
lcs_bar_plot.R '${tsv}' '${name}' ${cutoff}
"""
}
Loading