From 0034a4d9cb171e7c6d1fa37cece24420ce890bcf Mon Sep 17 00:00:00 2001
From: mirpedrol <mirp.julia@gmail.com>
Date: Mon, 13 Nov 2023 13:03:48 +0100
Subject: [PATCH 1/5] add versions to template scripts

---
 templates/alignment_summary.py     |  4 ++++
 templates/clustering_summary.py    |  5 +++--
 templates/preprocessing_summary.py | 17 +++++++----------
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/templates/alignment_summary.py b/templates/alignment_summary.py
index 12dd6109..a6b12c06 100644
--- a/templates/alignment_summary.py
+++ b/templates/alignment_summary.py
@@ -22,3 +22,7 @@
             add_line = False
     if add_line:
         output_file.write(f"aligned-reads, {mapped_reads_count} ({round(mapped_reads_percentage, 1)}%)\\n")
+
+with open("versions.yml", "w") as f:
+    f.write('"${task.process}":\\n')
+    f.write(f'  pysam: "{pysam.__version__}"\\n')
diff --git a/templates/clustering_summary.py b/templates/clustering_summary.py
index 33feb1b1..4981351a 100644
--- a/templates/clustering_summary.py
+++ b/templates/clustering_summary.py
@@ -3,10 +3,10 @@
 import gzip
 import sys
 
-from Bio import SeqIO
+import Bio
 
 with gzip.open("$reads", "rt") as handle:
-    clusters_count = len(list(SeqIO.parse(handle, "fastq")))
+    clusters_count = len(list(Bio.SeqIO.parse(handle, "fastq")))
 
 with open("$summary", "r") as summary:
     summary_lines = summary.readlines()
@@ -25,3 +25,4 @@
 
 with open("versions.yml", "w") as f:
     f.write('"${task.process}":\\n')
+    f.write(f'  biopython: "{Bio.__version__}"\\n')
diff --git a/templates/preprocessing_summary.py b/templates/preprocessing_summary.py
index c2a30578..2089eb7c 100755
--- a/templates/preprocessing_summary.py
+++ b/templates/preprocessing_summary.py
@@ -2,19 +2,19 @@
 
 import gzip
 
-from Bio import SeqIO
+import Bio
 
 with gzip.open("${raw_reads[0]}", "rt") as handle:
-    raw_reads_count = len(list(SeqIO.parse(handle, "fastq")))
+    raw_reads_count = len(list(Bio.SeqIO.parse(handle, "fastq")))
 
 if "$assembled_reads" == "":
     assembled_reads_count = 0
 else:
     with gzip.open("$assembled_reads", "rt") as handle:
-        assembled_reads_count = len(list(SeqIO.parse(handle, "fastq")))  # Merged reads R1+R2
+        assembled_reads_count = len(list(Bio.SeqIO.parse(handle, "fastq")))  # Merged reads R1+R2
 
 with gzip.open("$trimmed_reads", "rt") as handle:
-    trimmed_reads_count = len(list(SeqIO.parse(handle, "fastq")))  # Filtered reads
+    trimmed_reads_count = len(list(Bio.SeqIO.parse(handle, "fastq")))  # Filtered reads
 
 if "$trimmed_adapters" == "":
     adapters_count = 0
@@ -50,10 +50,7 @@
             f"quality-filtered-reads, {trimmed_reads_count} ({round(trimmed_reads_count * 100 / assembled_reads_count,1)}%)\\n"
         )
 
-    # Output version information
-    #version = pd.__version__
-    #matplotlib_version = plt.matplotlib.__version__
-    # alas, no `pyyaml` pre-installed in the cellranger container
-    with open("versions.yml", "w") as f:
-        f.write('"${task.process}":\\n')
 
+with open("versions.yml", "w") as f:
+    f.write('"${task.process}":\\n')
+    f.write(f'  biopython: "{Bio.__version__}"\\n')

From 2e58467bbf21f56556d1048f65c6107bd90047e8 Mon Sep 17 00:00:00 2001
From: mirpedrol <mirp.julia@gmail.com>
Date: Mon, 13 Nov 2023 13:05:59 +0100
Subject: [PATCH 2/5] collect versions

---
 workflows/crisprseq_targeted.nf | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/workflows/crisprseq_targeted.nf b/workflows/crisprseq_targeted.nf
index 87c4cb89..abf67bb2 100644
--- a/workflows/crisprseq_targeted.nf
+++ b/workflows/crisprseq_targeted.nf
@@ -370,6 +370,7 @@ workflow CRISPRSEQ_TARGETED {
     PREPROCESSING_SUMMARY {
         ch_preprocessing_summary_data
     }
+    ch_versions = ch_versions.mix(PREPROCESSING_SUMMARY.out.versions)
 
 
     if (params.umi_clustering) {
@@ -663,6 +664,7 @@ workflow CRISPRSEQ_TARGETED {
         ch_mapped_bam
             .join(CLUSTERING_SUMMARY.out.summary)
     )
+    ch_versions = ch_versions.mix(ALIGNMENT_SUMMARY.out.versions)
 
 
     //
@@ -705,6 +707,7 @@ workflow CRISPRSEQ_TARGETED {
             }
     }
     .set { ch_template_bam }
+    ch_versions = ch_versions.mix(MINIMAP2_ALIGN_TEMPLATE.out.versions)
 
     ch_mapped_bam
         .join(SAMTOOLS_INDEX.out.bai)

From 69d20c1b74dbc6e567da64ae03870f41ea7e2aab Mon Sep 17 00:00:00 2001
From: mirpedrol <mirp.julia@gmail.com>
Date: Mon, 13 Nov 2023 13:21:16 +0100
Subject: [PATCH 3/5] add versions to R scripts

---
 modules/local/cigar_parser.nf      |  8 +++++++-
 modules/local/crisprseq_plotter.nf | 14 +++++++++++++-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/modules/local/cigar_parser.nf b/modules/local/cigar_parser.nf
index 5b87bba4..45dfd7d5 100644
--- a/modules/local/cigar_parser.nf
+++ b/modules/local/cigar_parser.nf
@@ -41,7 +41,13 @@ process CIGAR_PARSER {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        Rscript: \$(Rscript --version)
+        seqinr: \$(Rscript -e "cat(paste(packageVersion('seqinr'), collapse='.'))")
+        Rsamtools: \$(Rscript -e "cat(paste(packageVersion('Rsamtools'), collapse='.'))")
+        dplyr: \$(Rscript -e "cat(paste(packageVersion('dplyr'), collapse='.'))")
+        ShortRead: \$(Rscript -e "cat(paste(packageVersion('ShortRead'), collapse='.'))")
+        jsonlite: \$(Rscript -e "cat(paste(packageVersion('jsonlite'), collapse='.'))")
+        stringr: \$(Rscript -e "cat(paste(packageVersion('stringr'), collapse='.'))")
+        plotly: \$(Rscript -e "cat(paste(packageVersion('plotly'), collapse='.'))")
     END_VERSIONS
     """
 }
diff --git a/modules/local/crisprseq_plotter.nf b/modules/local/crisprseq_plotter.nf
index 5612260e..c3b0ed62 100644
--- a/modules/local/crisprseq_plotter.nf
+++ b/modules/local/crisprseq_plotter.nf
@@ -35,7 +35,19 @@ process CRISPRSEQ_PLOTTER {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        Rscript: \$(Rscript --version)
+        ggplot2: \$(Rscript -e "cat(paste(packageVersion('ggplot2'), collapse='.'))")
+        ShortRead: \$(Rscript -e "cat(paste(packageVersion('ShortRead'), collapse='.'))")
+        plyr: \$(Rscript -e "cat(paste(packageVersion('plyr'), collapse='.'))")
+        dplyr: \$(Rscript -e "cat(paste(packageVersion('dplyr'), collapse='.'))")
+        seqinr: \$(Rscript -e "cat(paste(packageVersion('seqinr'), collapse='.'))")
+        ggpubr: \$(Rscript -e "cat(paste(packageVersion('ggpubr'), collapse='.'))")
+        ggmsa: \$(Rscript -e "cat(paste(packageVersion('ggmsa'), collapse='.'))")
+        seqmagick: \$(Rscript -e "cat(paste(packageVersion('seqmagick'), collapse='.'))")
+        stringr: \$(Rscript -e "cat(paste(packageVersion('stringr'), collapse='.'))")
+        tidyr: \$(Rscript -e "cat(paste(packageVersion('tidyr'), collapse='.'))")
+        ggseqlogo: \$(Rscript -e "cat(paste(packageVersion('ggseqlogo'), collapse='.'))")
+        plotly: \$(Rscript -e "cat(paste(packageVersion('plotly'), collapse='.'))")
+        cowplot: \$(Rscript -e "cat(paste(packageVersion('cowplot'), collapse='.'))")
     END_VERSIONS
     """
 }

From b134f12d61b0928565d68d90bbd868200723765a Mon Sep 17 00:00:00 2001
From: mirpedrol <mirp.julia@gmail.com>
Date: Mon, 13 Nov 2023 13:35:30 +0100
Subject: [PATCH 4/5] add versions channel to alignment_summary

---
 modules/local/alignment_summary.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/local/alignment_summary.nf b/modules/local/alignment_summary.nf
index 3520689f..1947812c 100644
--- a/modules/local/alignment_summary.nf
+++ b/modules/local/alignment_summary.nf
@@ -12,6 +12,7 @@ process ALIGNMENT_SUMMARY {
 
     output:
     tuple val(meta), path("*_alignment_summary.csv"), emit: summary
+    path "versions.yml",                              emit: versions
 
     when:
     task.ext.when == null || task.ext.when

From ea081e0c557e019e63ded3ed2efc2a310d40533c Mon Sep 17 00:00:00 2001
From: mirpedrol <mirp.julia@gmail.com>
Date: Mon, 13 Nov 2023 13:48:55 +0100
Subject: [PATCH 5/5] fix import SeqIO

---
 templates/clustering_summary.py    | 3 ++-
 templates/preprocessing_summary.py | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/templates/clustering_summary.py b/templates/clustering_summary.py
index 4981351a..1f2d7eb8 100644
--- a/templates/clustering_summary.py
+++ b/templates/clustering_summary.py
@@ -4,9 +4,10 @@
 import sys
 
 import Bio
+from Bio import SeqIO
 
 with gzip.open("$reads", "rt") as handle:
-    clusters_count = len(list(Bio.SeqIO.parse(handle, "fastq")))
+    clusters_count = len(list(SeqIO.parse(handle, "fastq")))
 
 with open("$summary", "r") as summary:
     summary_lines = summary.readlines()
diff --git a/templates/preprocessing_summary.py b/templates/preprocessing_summary.py
index 2089eb7c..16f8c74a 100755
--- a/templates/preprocessing_summary.py
+++ b/templates/preprocessing_summary.py
@@ -3,18 +3,19 @@
 import gzip
 
 import Bio
+from Bio import SeqIO
 
 with gzip.open("${raw_reads[0]}", "rt") as handle:
-    raw_reads_count = len(list(Bio.SeqIO.parse(handle, "fastq")))
+    raw_reads_count = len(list(SeqIO.parse(handle, "fastq")))
 
 if "$assembled_reads" == "":
     assembled_reads_count = 0
 else:
     with gzip.open("$assembled_reads", "rt") as handle:
-        assembled_reads_count = len(list(Bio.SeqIO.parse(handle, "fastq")))  # Merged reads R1+R2
+        assembled_reads_count = len(list(SeqIO.parse(handle, "fastq")))  # Merged reads R1+R2
 
 with gzip.open("$trimmed_reads", "rt") as handle:
-    trimmed_reads_count = len(list(Bio.SeqIO.parse(handle, "fastq")))  # Filtered reads
+    trimmed_reads_count = len(list(SeqIO.parse(handle, "fastq")))  # Filtered reads
 
 if "$trimmed_adapters" == "":
     adapters_count = 0