From 69b21840dd5cc7b2a67d2d349e6c70173dcca773 Mon Sep 17 00:00:00 2001 From: "Lataretu, Marie" Date: Tue, 9 Jul 2024 17:05:25 +0200 Subject: [PATCH] add the new process to the workflow and the datatable --- bin/summary_report.py | 6 ++++++ poreCov.nf | 12 +++++++++++- workflows/create_summary_report.nf | 6 ++++-- workflows/process/summary_report.nf | 4 ++++ 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/bin/summary_report.py b/bin/summary_report.py index c0884f4..ab4a247 100755 --- a/bin/summary_report.py +++ b/bin/summary_report.py @@ -640,6 +640,9 @@ def unclass_markup(value): self.add_col_description(f'Read classification was determined against a database containing only SARS-CoV-2 and human with Kraken2 (v{self.tool_versions["kraken2"]}).') + def add_mixed_sites_results(self, mixed_site_results): + res_data = pd.read_csv(mixed_site_results, index_col='sample', dtype={'sample': str}) + self.add_column_raw('num_mixed_sites', res_data['num_mixed_sites']) def add_coverage_plots(self, coverage_plots): for coverage_plot in sorted(coverage_plots.split(',')): @@ -826,6 +829,7 @@ def get_lineage_status(self, lineage): parser.add_argument("-n", "--nextclade_results", help="nextclade results") parser.add_argument("-q", "--president_results", help="president results") parser.add_argument("-k", "--kraken2_results", help="kraken2 results") + parser.add_argument("-m", "--mixed_sites_results", help="mixed sites statisics") parser.add_argument("-c", "--coverage_plots", help="coverage plots (comma separated)") parser.add_argument("-s", "--samples", help="sample ids (comma separated)") args = parser.parse_args() @@ -852,6 +856,8 @@ def get_lineage_status(self, lineage): report.add_pangolin_results(args.pangolin_results) if args.nextclade_results: report.add_nextclade_results(args.nextclade_results) + if args.mixed_sites_results: + report.add_mixed_sites_results(args.mixed_sites_results) if args.coverage_plots: report.add_coverage_plots(args.coverage_plots) diff --git a/poreCov.nf b/poreCov.nf index 0cf7d5e..5f8a8ac 100755 --- a/poreCov.nf +++ b/poreCov.nf @@ -309,6 +309,7 @@ include { get_fasta } from './modules/get_fasta_test_data.nf' include { align_to_reference } from './modules/align_to_reference.nf' include { split_fasta } from './modules/split_fasta.nf' include { filter_fastq_by_length } from './modules/filter_fastq_by_length.nf' +include { add_alt_allele_ration_vcf } from './modules/add_alt_allele_ration_vcf.nf' /************************** * Workflows @@ -405,6 +406,15 @@ workflow { determine_mutations_wf(fasta_input_ch) genome_quality_wf(fasta_input_ch, reference_for_qc_input_ch) + // add alternative allele ratio to the VCF + if (params.primerV.toString().contains(".bed")) { + external_primer_schemes = artic_ncov_wf.out.primer_dir + } + else { + external_primer_schemes = file(workflow.projectDir + "/data/external_primer_schemes", checkIfExists: true, type: 'dir' ) + } + add_alt_allele_ration_vcf(artic_ncov_wf.out.trimmed_bam.join(artic_ncov_wf.out.vcf).join(artic_ncov_wf.out.failed_vcf), external_primer_schemes) + // 3. Specialised outputs (rki, json) rki_report_wf(genome_quality_wf.out[0], genome_quality_wf.out[1], extended_input_ch) @@ -436,7 +446,7 @@ workflow { else { samples_table_ch = Channel.from( ['deactivated'] ) } */ create_summary_report_wf(determine_lineage_wf.out, genome_quality_wf.out[0], determine_mutations_wf.out, - taxonomic_read_classification_ch, alignments_ch, samples_file_ch) + taxonomic_read_classification_ch, add_alt_allele_ration_vcf.out.stats, alignments_ch, samples_file_ch) } diff --git a/workflows/create_summary_report.nf b/workflows/create_summary_report.nf index 806f0ba..9b606c0 100644 --- a/workflows/create_summary_report.nf +++ b/workflows/create_summary_report.nf @@ -8,6 +8,7 @@ workflow create_summary_report_wf { president nextclade kraken2 + mixed_sites alignments samples_table @@ -18,6 +19,7 @@ workflow create_summary_report_wf { pangolin_results = pangolin.map {it -> it[1]}.collectFile(name: 'pangolin_results.csv', skip: 1, keepHeader: true) president_results = president.map {it -> it[1]}.collectFile(name: 'president_results.tsv', skip: 1, keepHeader: true) nextclade_results = nextclade.map {it -> it[1]}.collectFile(name: 'nextclade_results.tsv', skip: 1, keepHeader: true) + mixed_site_results = mixed_sites.map {it -> it[1]}.collectFile(name: 'mixed_sites_results.tsv', skip: 1, keepHeader: true) alignment_files = alignments.map {it -> it[0]}.collect() if (params.fasta || workflow.profile.contains('test_fasta')) { @@ -30,7 +32,7 @@ workflow create_summary_report_wf { coverage_plots = plot_coverages(alignments.map{it -> it[0]}.toSortedList({ a, b -> a.simpleName <=> b.simpleName }).flatten().collate(6), \ alignments.map{it -> it[1]}.toSortedList({ a, b -> a.simpleName <=> b.simpleName }).flatten().collate(6)).collect() - if (params.samples) { summary_report(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots, samples_table) } - else { summary_report_default(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots) } + if (params.samples) { summary_report(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, mixed_site_results, coverage_plots, samples_table) } + else { summary_report_default(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, mixed_site_results, coverage_plots) } } } diff --git a/workflows/process/summary_report.nf b/workflows/process/summary_report.nf index f601060..d0406d7 100644 --- a/workflows/process/summary_report.nf +++ b/workflows/process/summary_report.nf @@ -10,6 +10,7 @@ process summary_report { path(president_results) path(nextclade_results) file(kraken2_results) + file(mixed_sites_results) file(coverage_plots) file(samples_table) output: @@ -43,6 +44,7 @@ process summary_report { -q !{president_results} \ -n !{nextclade_results} \ -k kraken2_results.csv \ + -m !{mixed_sites_results} \ -c $(echo !{coverage_plots} | tr ' ' ',') \ -s !{samples_table} ''' @@ -64,6 +66,7 @@ process summary_report_default { path(president_results) path(nextclade_results) path(kraken2_results) + path(mixed_sites_results) path(coverage_plots) output: path("poreCov_summary_report_*.html") @@ -96,6 +99,7 @@ process summary_report_default { -q !{president_results} \ -n !{nextclade_results} \ -k kraken2_results.csv \ + -m !{mixed_sites_results} \ -c $(echo !{coverage_plots} | tr ' ' ',') ''' stub: