From 0034a4d9cb171e7c6d1fa37cece24420ce890bcf Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 13 Nov 2023 13:03:48 +0100 Subject: [PATCH 1/5] add versions to template scripts --- templates/alignment_summary.py | 4 ++++ templates/clustering_summary.py | 5 +++-- templates/preprocessing_summary.py | 17 +++++++---------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/templates/alignment_summary.py b/templates/alignment_summary.py index 12dd6109..a6b12c06 100644 --- a/templates/alignment_summary.py +++ b/templates/alignment_summary.py @@ -22,3 +22,7 @@ add_line = False if add_line: output_file.write(f"aligned-reads, {mapped_reads_count} ({round(mapped_reads_percentage, 1)}%)\\n") + +with open("versions.yml", "w") as f: + f.write('"${task.process}":\\n') + f.write(f' pysam: "{pysam.__version__}"\\n') diff --git a/templates/clustering_summary.py b/templates/clustering_summary.py index 33feb1b1..4981351a 100644 --- a/templates/clustering_summary.py +++ b/templates/clustering_summary.py @@ -3,10 +3,10 @@ import gzip import sys -from Bio import SeqIO +import Bio with gzip.open("$reads", "rt") as handle: - clusters_count = len(list(SeqIO.parse(handle, "fastq"))) + clusters_count = len(list(Bio.SeqIO.parse(handle, "fastq"))) with open("$summary", "r") as summary: summary_lines = summary.readlines() @@ -25,3 +25,4 @@ with open("versions.yml", "w") as f: f.write('"${task.process}":\\n') + f.write(f' biopython: "{Bio.__version__}"\\n') diff --git a/templates/preprocessing_summary.py b/templates/preprocessing_summary.py index c2a30578..2089eb7c 100755 --- a/templates/preprocessing_summary.py +++ b/templates/preprocessing_summary.py @@ -2,19 +2,19 @@ import gzip -from Bio import SeqIO +import Bio with gzip.open("${raw_reads[0]}", "rt") as handle: - raw_reads_count = len(list(SeqIO.parse(handle, "fastq"))) + raw_reads_count = len(list(Bio.SeqIO.parse(handle, "fastq"))) if "$assembled_reads" == "": assembled_reads_count = 0 else: with gzip.open("$assembled_reads", "rt") as handle: - assembled_reads_count = len(list(SeqIO.parse(handle, "fastq"))) # Merged reads R1+R2 + assembled_reads_count = len(list(Bio.SeqIO.parse(handle, "fastq"))) # Merged reads R1+R2 with gzip.open("$trimmed_reads", "rt") as handle: - trimmed_reads_count = len(list(SeqIO.parse(handle, "fastq"))) # Filtered reads + trimmed_reads_count = len(list(Bio.SeqIO.parse(handle, "fastq"))) # Filtered reads if "$trimmed_adapters" == "": adapters_count = 0 @@ -50,10 +50,7 @@ f"quality-filtered-reads, {trimmed_reads_count} ({round(trimmed_reads_count * 100 / assembled_reads_count,1)}%)\\n" ) - # Output version information - #version = pd.__version__ - #matplotlib_version = plt.matplotlib.__version__ - # alas, no `pyyaml` pre-installed in the cellranger container - with open("versions.yml", "w") as f: - f.write('"${task.process}":\\n') +with open("versions.yml", "w") as f: + f.write('"${task.process}":\\n') + f.write(f' biopython: "{Bio.__version__}"\\n') From 2e58467bbf21f56556d1048f65c6107bd90047e8 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 13 Nov 2023 13:05:59 +0100 Subject: [PATCH 2/5] collect versions --- workflows/crisprseq_targeted.nf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/workflows/crisprseq_targeted.nf b/workflows/crisprseq_targeted.nf index 87c4cb89..abf67bb2 100644 --- a/workflows/crisprseq_targeted.nf +++ b/workflows/crisprseq_targeted.nf @@ -370,6 +370,7 @@ workflow CRISPRSEQ_TARGETED { PREPROCESSING_SUMMARY { ch_preprocessing_summary_data } + ch_versions = ch_versions.mix(PREPROCESSING_SUMMARY.out.versions) if (params.umi_clustering) { @@ -663,6 +664,7 @@ workflow CRISPRSEQ_TARGETED { ch_mapped_bam .join(CLUSTERING_SUMMARY.out.summary) ) + ch_versions = ch_versions.mix(ALIGNMENT_SUMMARY.out.versions) // @@ -705,6 +707,7 @@ workflow CRISPRSEQ_TARGETED { } } .set { ch_template_bam } + ch_versions = ch_versions.mix(MINIMAP2_ALIGN_TEMPLATE.out.versions) ch_mapped_bam .join(SAMTOOLS_INDEX.out.bai) From 69d20c1b74dbc6e567da64ae03870f41ea7e2aab Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 13 Nov 2023 13:21:16 +0100 Subject: [PATCH 3/5] add versions to R scripts --- modules/local/cigar_parser.nf | 8 +++++++- modules/local/crisprseq_plotter.nf | 14 +++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/modules/local/cigar_parser.nf b/modules/local/cigar_parser.nf index 5b87bba4..45dfd7d5 100644 --- a/modules/local/cigar_parser.nf +++ b/modules/local/cigar_parser.nf @@ -41,7 +41,13 @@ process CIGAR_PARSER { cat <<-END_VERSIONS > versions.yml "${task.process}": - Rscript: \$(Rscript --version) + seqinr: \$(Rscript -e "cat(paste(packageVersion('seqinr'), collapse='.'))") + Rsamtools: \$(Rscript -e "cat(paste(packageVersion('Rsamtools'), collapse='.'))") + dplyr: \$(Rscript -e "cat(paste(packageVersion('dplyr'), collapse='.'))") + ShortRead: \$(Rscript -e "cat(paste(packageVersion('ShortRead'), collapse='.'))") + jsonlite: \$(Rscript -e "cat(paste(packageVersion('jsonlite'), collapse='.'))") + stringr: \$(Rscript -e "cat(paste(packageVersion('stringr'), collapse='.'))") + plotly: \$(Rscript -e "cat(paste(packageVersion('plotly'), collapse='.'))") END_VERSIONS """ } diff --git a/modules/local/crisprseq_plotter.nf b/modules/local/crisprseq_plotter.nf index 5612260e..c3b0ed62 100644 --- a/modules/local/crisprseq_plotter.nf +++ b/modules/local/crisprseq_plotter.nf @@ -35,7 +35,19 @@ process CRISPRSEQ_PLOTTER { cat <<-END_VERSIONS > versions.yml "${task.process}": - Rscript: \$(Rscript --version) + ggplot2: \$(Rscript -e "cat(paste(packageVersion('ggplot2'), collapse='.'))") + ShortRead: \$(Rscript -e "cat(paste(packageVersion('ShortRead'), collapse='.'))") + plyr: \$(Rscript -e "cat(paste(packageVersion('plyr'), collapse='.'))") + dplyr: \$(Rscript -e "cat(paste(packageVersion('dplyr'), collapse='.'))") + seqinr: \$(Rscript -e "cat(paste(packageVersion('seqinr'), collapse='.'))") + ggpubr: \$(Rscript -e "cat(paste(packageVersion('ggpubr'), collapse='.'))") + ggmsa: \$(Rscript -e "cat(paste(packageVersion('ggmsa'), collapse='.'))") + seqmagick: \$(Rscript -e "cat(paste(packageVersion('seqmagick'), collapse='.'))") + stringr: \$(Rscript -e "cat(paste(packageVersion('stringr'), collapse='.'))") + tidyr: \$(Rscript -e "cat(paste(packageVersion('tidyr'), collapse='.'))") + ggseqlogo: \$(Rscript -e "cat(paste(packageVersion('ggseqlogo'), collapse='.'))") + plotly: \$(Rscript -e "cat(paste(packageVersion('plotly'), collapse='.'))") + cowplot: \$(Rscript -e "cat(paste(packageVersion('cowplot'), collapse='.'))") END_VERSIONS """ } From b134f12d61b0928565d68d90bbd868200723765a Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 13 Nov 2023 13:35:30 +0100 Subject: [PATCH 4/5] add versions channel to alignment_summary --- modules/local/alignment_summary.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/local/alignment_summary.nf b/modules/local/alignment_summary.nf index 3520689f..1947812c 100644 --- a/modules/local/alignment_summary.nf +++ b/modules/local/alignment_summary.nf @@ -12,6 +12,7 @@ process ALIGNMENT_SUMMARY { output: tuple val(meta), path("*_alignment_summary.csv"), emit: summary + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when From ea081e0c557e019e63ded3ed2efc2a310d40533c Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 13 Nov 2023 13:48:55 +0100 Subject: [PATCH 5/5] fix import SeqIO --- templates/clustering_summary.py | 3 ++- templates/preprocessing_summary.py | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/templates/clustering_summary.py b/templates/clustering_summary.py index 4981351a..1f2d7eb8 100644 --- a/templates/clustering_summary.py +++ b/templates/clustering_summary.py @@ -4,9 +4,10 @@ import sys import Bio +from Bio import SeqIO with gzip.open("$reads", "rt") as handle: - clusters_count = len(list(Bio.SeqIO.parse(handle, "fastq"))) + clusters_count = len(list(SeqIO.parse(handle, "fastq"))) with open("$summary", "r") as summary: summary_lines = summary.readlines() diff --git a/templates/preprocessing_summary.py b/templates/preprocessing_summary.py index 2089eb7c..16f8c74a 100755 --- a/templates/preprocessing_summary.py +++ b/templates/preprocessing_summary.py @@ -3,18 +3,19 @@ import gzip import Bio +from Bio import SeqIO with gzip.open("${raw_reads[0]}", "rt") as handle: - raw_reads_count = len(list(Bio.SeqIO.parse(handle, "fastq"))) + raw_reads_count = len(list(SeqIO.parse(handle, "fastq"))) if "$assembled_reads" == "": assembled_reads_count = 0 else: with gzip.open("$assembled_reads", "rt") as handle: - assembled_reads_count = len(list(Bio.SeqIO.parse(handle, "fastq"))) # Merged reads R1+R2 + assembled_reads_count = len(list(SeqIO.parse(handle, "fastq"))) # Merged reads R1+R2 with gzip.open("$trimmed_reads", "rt") as handle: - trimmed_reads_count = len(list(Bio.SeqIO.parse(handle, "fastq"))) # Filtered reads + trimmed_reads_count = len(list(SeqIO.parse(handle, "fastq"))) # Filtered reads if "$trimmed_adapters" == "": adapters_count = 0