diff --git a/bin/lcs_bar_plot.R b/bin/lcs_bar_plot.R index a38cb25..8655489 100755 --- a/bin/lcs_bar_plot.R +++ b/bin/lcs_bar_plot.R @@ -6,13 +6,15 @@ library(ggplot2) library(dplyr) args <- commandArgs(TRUE) -proportion_cutoff <- eval( parse(text=args[1]) )[1] +tsv <- args[1] +name <- parse(text=args[2]) +proportion_cutoff <- eval( parse(text=args[3]) ) -input <- read.csv("lcs_results.tsv", sep='\t') +input <- read.csv(tsv, sep='\t') filter(input, proportion>=proportion_cutoff) %>% ggplot(aes(x=variant_group, y=proportion)) + geom_col(position='dodge') + geom_errorbar(width=0.4,aes(ymin=proportion-std_error, ymax=proportion+std_error)) + facet_wrap(~sample) + -theme(axis.text.x=element_text(angle=90,vjust=.5)) + coord_flip() + ggsave("lcs_bar_plot.png") +theme(axis.text.x=element_text(angle=90,vjust=.5)) + coord_flip() + ggsave(paste0('lcs_barplot_', name, '.png')) # for ggplot2 versions > 3.3.0: # ggplot aes flip x and y diff --git a/bin/summary_report.py b/bin/summary_report.py index 3362472..58fa93f 100755 --- a/bin/summary_report.py +++ b/bin/summary_report.py @@ -38,7 +38,7 @@ class SummaryReport(): scorpio_constellations_version = None variants_table = None pangolin_version = None - pangolearn_version = None + pangolindata_version = None nextclade_version = None nextcladedata_version = None tabledata = None @@ -110,20 +110,17 @@ def add_poreCov_version_param(self, porecov_version): def add_pangolin_version_param(self): - if self.pangolearn_version is None: - error('add_pangolin_version_param() called before parse_pangolin_version()') - warning_msg = f' - Warning: A rather old version of PangoLEARN was used ({self.pangolearn_version}). Use parameter \'--update\' to force the use of the most recent Pangolin container!' + if self.pangolindata_version is None: + error('add_pangolin_version_param() called before pangolin version was set') + warning_msg = f' - Warning: A rather old version of pangolin-data was used ({self.pangolindata_version}). Use parameter \'--update\' to force the use of the most recent Pangolin container!' - # pa_param = f'Pangolin version' - # pa_val = f'{self.pangolin_version}' - pl_param = f'PangoLEARN version' - pl_val = f'{self.pangolearn_version}' + pl_param = f'pangolin-data version' + pl_val = f'{self.pangolindata_version}' - year, month, day = self.pangolearn_version.split('-') - if int(year) <= 2021 and int(month) <= 10: + v1, v2, v3 = self.pangolindata_version.split('.') + if int(v1) <= 1 and int(v2) <= 2 and int(v3) <= 132: pl_val += warning_msg - # self.add_param(pa_param, pa_val) self.add_param(pl_param, pl_val) @@ -162,22 +159,6 @@ def parse_version_config(self, version_config_file): log('Parsed version config file.') - def parse_scorpio_versions(self, scorpio_version, sc_constell_version): - # e.g. 'scorpio 0.3.14' - name, vers = scorpio_version.split(' ') - assert name == 'scorpio' - self.scorpio_version = vers.lstrip('v') - - # e.g. 'constellations v0.0.24' - name, vers = sc_constell_version.split(' ') - assert name == 'constellations' - self.scorpio_constellations_version = vers.lstrip('v') - - - def parse_pangolin_version(self, pangolin_docker): - # e.g. nanozoo/pangolin:2.3.8--2021-04-21 - self.pangolin_version, self.pangolearn_version = pangolin_docker.split(':',1)[-1].split('--') - def parse_nextclade_version(self, nextclade_docker): # e.g. nanozoo/nextclade:1.3.0--2021-06-25 self.nextclade_version, self.nextcladedata_version = nextclade_docker.split(':',1)[-1].split('--') @@ -377,10 +358,20 @@ def add_pangolin_results(self, pangolin_results): self.force_index_dtype_string(res_data) self.check_and_init_tabledata(res_data.index) + # pangolin and scorpio versions + # column names used: + # version,pangolin_version,scorpio_version,constellation_version + assert res_data.shape[0] > 0 + self.pangolin_version = res_data.iloc[0]['pangolin_version'] + self.pangolindata_version = res_data.iloc[0]['version'].split('-',1)[-1].split('v',1)[-1] + self.scorpio_version = res_data.iloc[0]['scorpio_version'] + self.scorpio_constellations_version = res_data.iloc[0]['constellation_version'] + + + # get data self.add_column_raw('pangolin_lineage', res_data['lineage']) self.add_column_raw('pangolin_conflict', res_data['conflict']) - res_data['lineage_conflict'] = [f'{l}
({p if pd.notnull(p) else "-"})' for l,p in zip(res_data['lineage'], res_data['conflict'])] colname = 'Lineage
(conflict)' @@ -399,13 +390,16 @@ def add_pangolin_results(self, pangolin_results): res_data.at[row, 'lineage_conflict'] += f'
{var_status}' self.add_column_raw('variant_status', res_data['variant_status']) - self.add_col_description(f'Variant type (VOC, VOI, etc.) was determined from the variants table of SARS-CoV-2-Variants.') + self.add_col_description(f'Variant type (VOC, VOI, etc.) was determined from the variants table ' + \ + 'of SARS-CoV-2-Variants.') self.add_column(colname, res_data['lineage_conflict']) - if self.pangolin_version is None or self.pangolearn_version is None: - error('No pangolin/pangoLEARN versions were added before adding pangolin results.') - self.add_col_description(f'Lineage and the corresponding tree resolution conflict measure were determined with Pangolin (v{self.pangolin_version} using PangoLEARN data release {self.pangolearn_version}).') + if self.pangolin_version is None or self.pangolindata_version is None: + error('No pangolin/pangolin-data versions were added before adding pangolin results.') + self.add_col_description(f'Lineage and the corresponding tree resolution conflict measure were determined with ' + \ + f'Pangolin (v{self.pangolin_version} using ' + \ + f'pangolin-data v{self.pangolindata_version}).') # Add scorpio info if any is present if res_data['scorpio_call'].notna().any(): @@ -419,7 +413,10 @@ def add_pangolin_results(self, pangolin_results): self.add_column('Constellation
(conflict)', res_data['scorpio_conflict']) if self.scorpio_version is None or self.scorpio_constellations_version is None: error('No Scorpio/constellations versions were added before adding Pangolin results.') - self.add_col_description(f'Constellation and the corresponding tree resolution conflict measure were determined with Scorpio (v{self.scorpio_version} using Constellations version {self.scorpio_constellations_version}).') + self.add_col_description(f'Constellation and the corresponding tree resolution conflict measure were determined with ' + \ + f'Scorpio (v{self.scorpio_version} using ' + \ + f'Constellations version {self.scorpio_constellations_version}).') + def add_president_results(self, president_results): @@ -505,6 +502,13 @@ def add_nextclade_results(self, nextclade_results): self.add_column_raw('nextclade_insertions', res_data['aaInsertions']) self.add_column_raw('nextclade_frameshifts', res_data['frameShifts']) + # private mutation information + self.add_column_raw('nextclade_privateNucMutations_reversion', res_data["privateNucMutations.reversionSubstitutions"]) + self.add_column_raw('nextclade_privateNucMutations_labeled', res_data["privateNucMutations.labeledSubstitutions"]) + self.add_column_raw('nextclade_privateNucMutations_unlabeled', res_data["privateNucMutations.unlabeledSubstitutions"]) + self.add_column_raw('nextclade_privateNucMutations_qc_status', res_data["qc.privateMutations.status"]) + + res_data['mutations_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaSubstitutions']] res_data['deletions_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaDeletions']] res_data['insertions_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaInsertions']] @@ -560,7 +564,8 @@ def get_markup_with_toggle_and_tag(tag): if self.nextclade_version is None or self.nextcladedata_version is None: error('No nextclade/nextcladedata versions were added before adding nextclade results.') - self.add_col_description(f'Clade, mutations, deletions, insertions and frameshifts were determined with Nextclade (v{self.nextclade_version} using nextclade data release {self.nextcladedata_version}).') + self.add_col_description(f'Clade, mutations, deletions, insertions and frameshifts were determined with ' + \ + f'Nextclade (v{self.nextclade_version} using nextclade data release {self.nextcladedata_version}).') @@ -782,16 +787,13 @@ def get_lineage_status(self, lineage): log('Started summary_report.py ...') parser = argparse.ArgumentParser(description='Generate a summary report for multiple samples run with poreCov') - parser.add_argument("-v", "--version_config", help="version config", required=True) - parser.add_argument("--scorpio_version", help="scorpio version", required=True) - parser.add_argument("--scorpio_constellations_version", help="scorpio constellations version", required=True) + parser.add_argument("-v", "--version_config", help="version config", required=True) parser.add_argument("--variants_table", help="variants table with VOCs, VOIs etc.", required=True) parser.add_argument("--porecov_version", help="porecov version", required=True) parser.add_argument("--guppy_used", help="guppy used") parser.add_argument("--guppy_model", help="guppy model") parser.add_argument("--medaka_model", help="medaka model") parser.add_argument("--nf_commandline", help="full nextflow command call", required=True) - parser.add_argument("--pangolin_docker", help="pangolin/pangoLEARN version", required=True) parser.add_argument("--nextclade_docker", help="nextclade/nextcladedata version", required=True) parser.add_argument("--primer", help="primer version") parser.add_argument("-p", "--pangolin_results", help="pangolin results") @@ -806,9 +808,7 @@ def get_lineage_status(self, lineage): ### build report report = SummaryReport() report.parse_version_config(args.version_config) - report.parse_scorpio_versions(args.scorpio_version, args.scorpio_constellations_version) report.parse_variants_table(args.variants_table) - report.parse_pangolin_version(args.pangolin_docker) report.parse_nextclade_version(args.nextclade_docker) diff --git a/modules/get_scorpio_version.nf b/modules/get_scorpio_version.nf deleted file mode 100644 index 128aef5..0000000 --- a/modules/get_scorpio_version.nf +++ /dev/null @@ -1,17 +0,0 @@ -process get_scorpio_version { - label 'pangolin' - cpus = 1 - container = params.pangolindocker - output: - tuple env(SCORPIO_VER), env(SCORPIO_CONSTELLATIONS_VER) - shell: - ''' - SCORPIO_VER=$(scorpio --version) - SCORPIO_CONSTELLATIONS_VER=$(scorpio -cv) - ''' - stub: - """ - SCORPIO_VER='scorpio 0.3.14' - SCORPIO_CONSTELLATIONS_VER='constellations v0.0.24' - """ - } diff --git a/nextflow.config b/nextflow.config index 1ae8621..2614199 100644 --- a/nextflow.config +++ b/nextflow.config @@ -41,8 +41,8 @@ params { update = false screen_reads = false screen_reads_plot_cutoff = 0.03 - defaultpangolin = 'nanozoo/pangolin:3.1.20--2022-02-28' - defaultnextclade = 'nanozoo/nextclade:1.10.3--2022-02-07' + defaultpangolin = 'nanozoo/pangolin-v4:4.0.4--1.2.133' + defaultnextclade = 'nanozoo/nextclade:1.11.0--2022-03-31' // parameters primerV = 'V3' diff --git a/poreCov.nf b/poreCov.nf index ce19d83..350b6b9 100755 --- a/poreCov.nf +++ b/poreCov.nf @@ -235,7 +235,7 @@ if (params.samples) { static boolean DockernetIsAvailable() { try { - final URL url = new URL("https://registry.hub.docker.com/v2/repositories/nanozoo/pangolin/tags/"); + final URL url = new URL("https://registry.hub.docker.com/v2/repositories/nanozoo/pangolin-v4/tags/"); final URLConnection conn = url.openConnection(); conn.connect(); conn.getInputStream().close(); @@ -252,8 +252,8 @@ def internetcheck = DockernetIsAvailable() if (params.update) { println "\033[0;33mWarning: Running --update might not be poreCov compatible!\033[0m" if ( internetcheck.toString() == "true" ) { - tagname = 'https://registry.hub.docker.com/v2/repositories/nanozoo/pangolin/tags/'.toURL().text.split(',"name":"')[1].split('","')[0] - params.pangolindocker = "nanozoo/pangolin:" + tagname + tagname = 'https://registry.hub.docker.com/v2/repositories/nanozoo/pangolin-v4/tags/'.toURL().text.split(',"name":"')[1].split('","')[0] + params.pangolindocker = "nanozoo/pangolin-v4:" + tagname println "\033[0;32mFound latest pangolin container, using: " + params.pangolindocker + " \033[0m" tagname = 'https://registry.hub.docker.com/v2/repositories/nanozoo/nextclade/tags/'.toURL().text.split(',"name":"')[1].split('","')[0] diff --git a/workflows/create_summary_report.nf b/workflows/create_summary_report.nf index dc08d1d..8e2f03c 100644 --- a/workflows/create_summary_report.nf +++ b/workflows/create_summary_report.nf @@ -1,8 +1,7 @@ include { summary_report; summary_report_fasta; summary_report_default } from './process/summary_report' include { plot_coverages } from '../modules/plot_coverages.nf' -include { get_scorpio_version } from '../modules/get_scorpio_version.nf' include { get_variants_classification } from '../modules/get_variants_classification.nf' -include { lcs_plot } from './process/lcs_sc2' +include { lcs_plot as lcs_plot; lcs_plot as lcs_plot_control } from './process/lcs_sc2' workflow create_summary_report_wf { take: @@ -16,7 +15,6 @@ workflow create_summary_report_wf { main: version_ch = Channel.fromPath(workflow.projectDir + "/configs/container.config") - scorpio_ver_ch = get_scorpio_version() variants_table_ch = get_variants_classification() pangolin_results = pangolin.map {it -> it[1]}.collectFile(name: 'pangolin_results.csv', skip: 1, keepHeader: true) @@ -27,7 +25,7 @@ workflow create_summary_report_wf { alignment_files = alignments.map {it -> it[0]}.collect() if (params.fasta || workflow.profile.contains('test_fasta')) { - summary_report_fasta(version_ch, scorpio_ver_ch, variants_table_ch, pangolin_results, president_results, nextclade_results) + summary_report_fasta(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results) } else { kraken2_results = kraken2.map {it -> it[2]}.collect() @@ -35,13 +33,15 @@ workflow create_summary_report_wf { coverage_plots = plot_coverages(alignments.map{it -> it[0]}.toSortedList({ a, b -> a.simpleName <=> b.simpleName }).flatten().collate(6), \ alignments.map{it -> it[1]}.toSortedList({ a, b -> a.simpleName <=> b.simpleName }).flatten().collate(6)).collect() - if (params.samples) { summary_report(version_ch, scorpio_ver_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots, samples_table) } - else { summary_report_default(version_ch, scorpio_ver_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots) } + if (params.samples) { summary_report(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots, samples_table) } + else { summary_report_default(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots) } } if (params.screen_reads){ - lcs_plot(lcs_results, params.screen_reads_plot_cutoff) + lcs_plot(lcs.filter({ !it[0].contains("negativ") }).map {it -> it[1]}.collectFile(name: 'lcs_results.tsv', skip: 1, keepHeader: true).map{ it -> ['samples', it] }, params.screen_reads_plot_cutoff) + + lcs_plot_control(lcs.filter({ it[0].contains("negativ") }), params.screen_reads_plot_cutoff) } } diff --git a/workflows/process/lcs_sc2.nf b/workflows/process/lcs_sc2.nf index ec13d28..ef34121 100644 --- a/workflows/process/lcs_sc2.nf +++ b/workflows/process/lcs_sc2.nf @@ -36,14 +36,14 @@ process lcs_plot { publishDir "${params.output}/${params.lineagedir}/", mode: 'copy' input: - path(tsv) + tuple val(name), path(tsv) val(cutoff) output: - path("lcs_bar_plot.png") + path("*.png") script: """ - lcs_bar_plot.R ${cutoff} + lcs_bar_plot.R '${tsv}' '${name}' ${cutoff} """ } \ No newline at end of file diff --git a/workflows/process/summary_report.nf b/workflows/process/summary_report.nf index 0c35e15..f601060 100644 --- a/workflows/process/summary_report.nf +++ b/workflows/process/summary_report.nf @@ -5,7 +5,6 @@ process summary_report { input: path(version_config) - tuple val(scorpio_ver), val(scorpio_constellations_ver) path(variants_table) path(pangolin_results) path(president_results) @@ -32,8 +31,6 @@ process summary_report { summary_report.py \ -v !{version_config} \ - --scorpio_version "!{scorpio_ver}" \ - --scorpio_constellations_version "!{scorpio_constellations_ver}" \ --variants_table !{variants_table} \ --porecov_version !{workflow.revision}:!{workflow.commitId}:!{workflow.scriptId} \ --nextclade_docker !{params.nextcladedocker} \ @@ -41,7 +38,6 @@ process summary_report { --guppy_model !{params.guppy_model} \ --medaka_model !{params.medaka_model} \ --nf_commandline '!{workflow.commandLine}' \ - --pangolin_docker !{params.pangolindocker} \ --primer !{params.primerV} \ -p !{pangolin_results} \ -q !{president_results} \ @@ -63,7 +59,6 @@ process summary_report_default { input: path(version_config) - tuple val(scorpio_ver), val(scorpio_constellations_ver) path(variants_table) path(pangolin_results) path(president_results) @@ -89,15 +84,12 @@ process summary_report_default { summary_report.py \ -v !{version_config} \ - --scorpio_version "!{scorpio_ver}" \ - --scorpio_constellations_version "!{scorpio_constellations_ver}" \ --variants_table !{variants_table} \ --porecov_version !{workflow.revision}:!{workflow.commitId}:!{workflow.scriptId} \ --guppy_used !{guppyused} \ --guppy_model !{params.guppy_model} \ --medaka_model !{params.medaka_model} \ --nf_commandline '!{workflow.commandLine}' \ - --pangolin_docker !{params.pangolindocker} \ --nextclade_docker !{params.nextcladedocker} \ --primer !{params.primerV} \ -p !{pangolin_results} \ @@ -117,7 +109,6 @@ process summary_report_fasta { label 'fastcov' input: path(version_config) - tuple val(scorpio_ver), val(scorpio_constellations_ver) path(variants_table) path(pangolin_results) path(president_results) @@ -131,12 +122,9 @@ process summary_report_fasta { """ summary_report.py \ -v ${version_config} \ - --scorpio_version "${scorpio_ver}" \ - --scorpio_constellations_version "${scorpio_constellations_ver}" \ --variants_table ${variants_table} \ --porecov_version ${workflow.revision}:${workflow.commitId}:${workflow.scriptId} \ --nf_commandline '${workflow.commandLine}' \ - --pangolin_docker ${params.pangolindocker} \ --nextclade_docker ${params.nextcladedocker} \ -p ${pangolin_results} \ -q ${president_results} \