From d0b398c7cd325a939cb32337cb2f7036229fa8f6 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Sat, 29 Apr 2023 17:01:20 -0400
Subject: [PATCH 01/16] add missing citations

---
 CITATIONS.md | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/CITATIONS.md b/CITATIONS.md
index 10d23253..f0871b97 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -16,10 +16,6 @@
 
   > Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu, fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics. 2018 Sept 1; 34(17):i884–i890. doi: 10.1093/bioinformatics/bty560.
 
-- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
-
-  > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
-
 - [pRESTO](https://doi.org/10.1093/bioinformatics/btu138)
 
   > Vander Heiden, J. A., Yaari, G., Uduman, M., Stern, J. N. H., O’Connor, K. C., Hafler, D. A., … Kleinstein, S. H. (2014). pRESTO: a toolkit for processing high-throughput sequencing raw reads of lymphocyte receptor repertoires. Bioinformatics, 30(13), 1930–1932.
@@ -32,9 +28,30 @@
 
   > Stern, J. N. H., Yaari, G., Vander Heiden, J. A., Church, G., Donahue, W. F., Hintzen, R. Q., … O’Connor, K. C. (2014). B cells populating the multiple sclerosis brain mature in the draining cervical lymph nodes. Science Translational Medicine, 6(248).
 
+- [SCOPer](https://doi.org/10.1093/bioinformatics/bty235)
+
+  > Nouri N, Kleinstein S (2018). “A spectral clustering-based method for identifying clones from high-throughput B cell repertoire sequencing data.” Bioinformatics, i341-i349.
+
+  > Nouri N, Kleinstein S (2020). “Somatic hypermutation analysis for improved identification of B cell clonal families from next-generation sequencing data.” PLOS Computational Biology, 16(6), e1007977.
+
+  > Gupta N, Adams K, Briggs A, Timberlake S, Vigneault F, Kleinstein S (2017). “Hierarchical clustering can identify B cell clones with high confidence in Ig repertoire sequencing data.” The Journal of Immunology, 2489-2499.
+
+- [Dowser](https://doi.org/10.1371/journal.pcbi.1009885)
+
+  > Hoehn K, Pybus O, Kleinstein S (2022). “Phylogenetic analysis of migration, differentiation, and class switching in B cells.” PLoS Computational Biology.
+
+- [IgPhyML](https://www.pnas.org/doi/10.1073/pnas.1906020116)
+
+  > Hoehn K, Van der Heiden J, Zhou J, Lunter G, Pybus O, Kleinstein S (2019). “Repertoire-wide phylogenetic models of B cell molecular evolution reveal evolutionary signatures of aging and vaccination.” PNAS.
+
 - [TIgGER](https://doi.org/10.1073/pnas.1417683112)
   > Gadala-maria, D., Yaari, G., Uduman, M., & Kleinstein, S. H. (2015). Automated analysis of high-throughput B-cell sequencing data reveals a high frequency of novel immunoglobulin V gene segment alleles. Proceedings of the National Academy of Sciences, 112(8), 1–9.
 
+- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
+
+  > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
+
+
 ## Software packaging/containerisation tools
 
 - [Anaconda](https://anaconda.com)

From 1798bcba9d8616cb1a709597904e62da78df670a Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Sat, 29 Apr 2023 17:01:47 -0400
Subject: [PATCH 02/16] update report rm plotly because of memory error

---
 assets/repertoire_comparison.Rmd | 60 ++++++++++++++++++++++----------
 bin/execute_report.R             |  6 ++++
 2 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/assets/repertoire_comparison.Rmd b/assets/repertoire_comparison.Rmd
index 542beba5..907777bc 100644
--- a/assets/repertoire_comparison.Rmd
+++ b/assets/repertoire_comparison.Rmd
@@ -10,10 +10,6 @@ output:
     css: ./nf-core_style.css
     highlight: pygments
     pdf_document: true
-    pandoc_args: [
-      "+RTS", "-K4000m",
-      "-RTS"
-    ]
   html_notebook:
     toc: yes
 ---
@@ -199,13 +195,22 @@ p_ca <- ggplot(abund@abundance, aes(x = rank, y = p)) +
                     ymax = upper, fill = sample_id), alpha = 0.4) +
     geom_line(aes(color = sample_id)) +
     ggtitle(abund_main) +
-    xlab("log(Rank)") + ylab("Abundance") +
+    xlab('log(Rank)') + ylab('Abundance') +
     scale_x_log10(limits = NULL,
                 breaks = scales::trans_breaks("log10", function(x) 10^x),
                 labels = scales::trans_format("log10", scales::math_format(10^.x))) +
     scale_y_continuous(labels = scales::percent) #+
     #facet_grid(cols = vars(locus), rows = vars(subject_id), scales="free", drop = T)
-ggplotly(p_ca)
+ggplot(p_ca)
+
+# Try with plotly native
+# abund@abundance %>% plot_ly( x=~rank, y=~p,
+#                             type='scatter', color=~sample_id, mode='lines') %>%
+#                     add_ribbons(ymin = ~lower,
+#                                 ymax = ~upper,
+#                                 line = list(color=~sample_id),
+#                                 fillcolor = ~sample_id)
+
 ```
 
 ```{r plot_abundance, include = FALSE}
@@ -214,6 +219,7 @@ ggsave(plot=p_ca, filename = paste0(abundance_dir,"/Clonal_abundance_subject.png
 write.table(abund@abundance, file = paste0(abundance_dir, "/Clonal_abundance_data_subject.tsv"), sep="\t", quote = F, row.names = F)
 ```
 
+
 # Clonal diversity
 
 The clonal diversity $D$ of the repertoire was calculated according to the general formula of Hill Diversity
@@ -270,7 +276,7 @@ div_p <- ggplot(sample_div@diversity, aes(x = q, y = d, group=sample_id)) +
     xlab("q") + ylab("Diversity(q)") +
     ggtitle(sample_main) #+
     #facet_grid(cols=vars(locus), rows=vars(subject_id))
-ggplotly(div_p)
+ggplot(div_p)
 ```
 ```{r plot_diversity, include = FALSE}
 ggsave(plot=div_p, filename=paste0(diversity_dir,"/Diversity_patient_grid.png"), device="png", width = 25, height = 10, units="cm")
@@ -366,16 +372,6 @@ write.table(family, file = paste0(vfamily_dir, "/V_gene_distribution_by_sequence
 
 If you use nf-core/airrflow for your analysis, please cite it using the following DOI: [10.5281/zenodo.3607408](https://doi.org/10.5281/zenodo.3607408)
 
-Please also cite the `nf-core` and `Nextflow` publications:
-
-- [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/)
-
-  > Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031.
-
-- [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/)
-
-  > Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311.
-
 In addition, citations for the tools and data used in this pipeline are as follows:
 
 - [pRESTO](https://doi.org/10.1093/bioinformatics/btu138)
@@ -386,12 +382,40 @@ In addition, citations for the tools and data used in this pipeline are as follo
 
   > Gupta, N. T., Vander Heiden, J. A., Uduman, M., Gadala-Maria, D., Yaari, G., & Kleinstein, S. H. (2015). Change-O: a toolkit for analyzing large-scale B cell immunoglobulin repertoire sequencing data: Table 1. Bioinformatics, 31(20), 3356–3358.
 
+- [IgBLAST](https://doi.org/10.1093/nar/gkt382)
+
+  > Ye, J., Ma, N., Madden, T. L., & Ostell, J. M. (2013). IgBLAST: An immunoglobulin variable domain sequence analysis tool. Nucleic Acids Research, 41(Web Server issue), W34.
+
 - [Alakazam](https://doi.org/10.1126/scitranslmed.3008879)
 
-  > Stern, J. N. H., Yaari, G., Vander Heiden, J. A., Church, G., Donahue, W. F., Hintzen, R. Q., … O’Connor, K. C. (2014). B cells populating the multiple sclerosis brain mature in the draining cervical lymph nodes. Science Translational Medicine, 6(248).
+  > Stern, J. N. H., Yaari, G., Vander Heiden, J. A., Church, G., Donahue, W. F., Hintzen, R. Q., … O’Connor, K. C. (2014). B cells populating the multiple sclerosis brain mature in the draining cervical lymph nodes. Science Translational Medicine, 6(248), 248ra107.
+
+- [SCOPer](https://doi.org/10.1093/bioinformatics/bty235)
+
+  > Nouri N, Kleinstein S (2018). “A spectral clustering-based method for identifying clones from high-throughput B cell repertoire sequencing data.” Bioinformatics, i341-i349.
+
+  > Nouri N, Kleinstein S (2020). “Somatic hypermutation analysis for improved identification of B cell clonal families from next-generation sequencing data.” PLOS Computational Biology, 16(6), e1007977.
+
+  > Gupta N, Adams K, Briggs A, Timberlake S, Vigneault F, Kleinstein S (2017). “Hierarchical clustering can identify B cell clones with high confidence in Ig repertoire sequencing data.” The Journal of Immunology, 2489-2499.
+
+- [Dowser](https://doi.org/10.1371/journal.pcbi.1009885)
+
+  > Hoehn K, Pybus O, Kleinstein S (2022). “Phylogenetic analysis of migration, differentiation, and class switching in B cells.” PLoS Computational Biology.
+
+- [IgPhyML](https://www.pnas.org/doi/10.1073/pnas.1906020116)
+
+  > Hoehn K, Van der Heiden J, Zhou J, Lunter G, Pybus O, Kleinstein S (2019). “Repertoire-wide phylogenetic models of B cell molecular evolution reveal evolutionary signatures of aging and vaccination.” PNAS.
+
+- [TIgGER](https://doi.org/10.1073/pnas.1417683112)
+
+  > Gadala-maria, D., Yaari, G., Uduman, M., & Kleinstein, S. H. (2015). Automated analysis of high-throughput B-cell sequencing data reveals a high frequency of novel immunoglobulin V gene segment alleles. Proceedings of the National Academy of Sciences, 112(8), 1–9.
 
 - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
 
+- [Fastp](https://doi.org/10.1093/bioinformatics/bty560)
+
+  > Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu, fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics. 2018 Sept 1; 34(17):i884–i890.
+
 - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
 
   > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
diff --git a/bin/execute_report.R b/bin/execute_report.R
index c1e090f8..c0d0b448 100755
--- a/bin/execute_report.R
+++ b/bin/execute_report.R
@@ -4,13 +4,19 @@
 library(rmarkdown)
 library(optparse)
 
+
 option_list = list(
     make_option(c("-r", "--report_file"), type="character", default=NULL, help="report rmarkdown file", metavar="character")
 )
 
+
 opt_parser = OptionParser(option_list=option_list)
 opt = parse_args(opt_parser)
 
+
+#Set pandoc stack size memory
+options(pandoc.stack.size="4000m")
+
 wd=getwd()
 
 rmarkdown::render(opt$report_file, output_file = "Airrflow_report.html", knit_root_dir = wd, output_dir = wd)

From eecb3a4bba595b9324d2e08d8dc0025c212adb27 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Mon, 1 May 2023 14:56:17 -0400
Subject: [PATCH 03/16] revert abundance and diversity without plotly

---
 assets/repertoire_comparison.Rmd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/assets/repertoire_comparison.Rmd b/assets/repertoire_comparison.Rmd
index 907777bc..33c73e43 100644
--- a/assets/repertoire_comparison.Rmd
+++ b/assets/repertoire_comparison.Rmd
@@ -201,7 +201,7 @@ p_ca <- ggplot(abund@abundance, aes(x = rank, y = p)) +
                 labels = scales::trans_format("log10", scales::math_format(10^.x))) +
     scale_y_continuous(labels = scales::percent) #+
     #facet_grid(cols = vars(locus), rows = vars(subject_id), scales="free", drop = T)
-ggplot(p_ca)
+p_ca
 
 # Try with plotly native
 # abund@abundance %>% plot_ly( x=~rank, y=~p,
@@ -276,7 +276,7 @@ div_p <- ggplot(sample_div@diversity, aes(x = q, y = d, group=sample_id)) +
     xlab("q") + ylab("Diversity(q)") +
     ggtitle(sample_main) #+
     #facet_grid(cols=vars(locus), rows=vars(subject_id))
-ggplot(div_p)
+div_p
 ```
 ```{r plot_diversity, include = FALSE}
 ggsave(plot=div_p, filename=paste0(diversity_dir,"/Diversity_patient_grid.png"), device="png", width = 25, height = 10, units="cm")

From 9e5ead6cb42cb12be84d8d3b108b5ac7c7dcbf03 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Wed, 3 May 2023 13:32:49 -0400
Subject: [PATCH 04/16] do not ignore dowser errors

---
 modules/local/enchantr/dowser_lineages.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/local/enchantr/dowser_lineages.nf b/modules/local/enchantr/dowser_lineages.nf
index 61aca2ec..37eaad05 100644
--- a/modules/local/enchantr/dowser_lineages.nf
+++ b/modules/local/enchantr/dowser_lineages.nf
@@ -19,7 +19,6 @@ process DOWSER_LINEAGES {
     tag "${meta.id}"
 
     label 'process_long_parallelized'
-    label 'error_ignore'
     label 'immcantation'
 
     conda "bioconda::r-enchantr=0.1.1"

From fd2d04148bf86ce0dadbf66d748c965b8b63d903 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Thu, 11 May 2023 10:44:19 -0400
Subject: [PATCH 05/16] parametrize find threshold

---
 conf/modules.config                      |  5 +++++
 modules/local/enchantr/find_threshold.nf | 21 +++++++++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index adf238b0..24385e45 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -407,6 +407,11 @@ process {
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
+        ext.args = ['findthreshold_method':'gmm',
+                    'findthreshold_model':'gamma-norm',
+                    'findthreshold_edge':0.9,
+                    'findthreshold_cutoff':'user',
+                    'findthreshold_spc':0.995]
     }
 
     withName: REPORT_THRESHOLD {
diff --git a/modules/local/enchantr/find_threshold.nf b/modules/local/enchantr/find_threshold.nf
index 62de6b82..01f44e3d 100644
--- a/modules/local/enchantr/find_threshold.nf
+++ b/modules/local/enchantr/find_threshold.nf
@@ -1,3 +1,20 @@
+def asString (args) {
+    def s = ""
+    def value = ""
+    if (args.size()>0) {
+        if (args[0] != 'none') {
+            for (param in args.keySet().sort()){
+                value = args[param].toString()
+                if (!value.isNumber()) {
+                    value = "'"+value+"'"
+                }
+                s = s + ",'"+param+"'="+value
+            }
+        }
+    }
+    return s
+}
+
 process FIND_THRESHOLD {
     tag "all_reps"
 
@@ -25,7 +42,7 @@ process FIND_THRESHOLD {
     script:
     """
     Rscript -e "enchantr::enchantr_report('find_threshold', \\
-        report_params=list('input'='${tab}',\\
+        report_params=list('input'='${tab.join(',')}',\\
             'cloneby'='${params.cloneby}',\\
             'crossby'='${params.crossby}',\\
             'singlecell'='${params.singlecell}',\\
@@ -33,7 +50,7 @@ process FIND_THRESHOLD {
             'nproc'=${task.cpus},\\
             'outname'='all_reps',\\
             'log'='all_reps_threshold_command_log',\\
-            'logo'='${logo}'))"
+            'logo'='${logo}' ${args}))"
 
     echo "${task.process}": > versions.yml
     Rscript -e "cat(paste0('  enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml

From e549b8ae18b60ac079492bead028a71f8ec3c25c Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Thu, 11 May 2023 10:44:31 -0400
Subject: [PATCH 06/16] revert fixes for slurm size limit

---
 modules/local/enchantr/report_file_size.nf      |  3 ++-
 subworkflows/local/clonal_analysis.nf           | 16 +---------------
 .../local/repertoire_analysis_reporting.nf      | 17 +----------------
 3 files changed, 4 insertions(+), 32 deletions(-)

diff --git a/modules/local/enchantr/report_file_size.nf b/modules/local/enchantr/report_file_size.nf
index 51775767..fcf7f670 100644
--- a/modules/local/enchantr/report_file_size.nf
+++ b/modules/local/enchantr/report_file_size.nf
@@ -22,8 +22,9 @@ process REPORT_FILE_SIZE {
 
     script:
     """
+    echo "${logs.join('\n')}" > logs.txt
     Rscript -e "enchantr::enchantr_report('file_size', \\
-        report_params=list('input'='${logs}', 'metadata'='${metadata}',\\
+        report_params=list('input'='logs.txt', 'metadata'='${metadata}',\\
         'outdir'=getwd()))"
 
     echo "\"${task.process}\":" > versions.yml
diff --git a/subworkflows/local/clonal_analysis.nf b/subworkflows/local/clonal_analysis.nf
index ddf31550..91921756 100644
--- a/subworkflows/local/clonal_analysis.nf
+++ b/subworkflows/local/clonal_analysis.nf
@@ -19,9 +19,6 @@ workflow CLONAL_ANALYSIS {
 
         ch_find_threshold = ch_repertoire.map{ it -> it[1] }
                                         .collect()
-                                        .flatten()
-                                        .map{ it -> it.toString() }
-                                        .collectFile(name: 'find_threshold_tabs.txt', newLine: true)
 
         FIND_CLONAL_THRESHOLD (
             ch_find_threshold,
@@ -42,9 +39,6 @@ workflow CLONAL_ANALYSIS {
 
         ch_find_threshold = ch_repertoire.map{ it -> it[1] }
                                         .collect()
-                                        .flatten()
-                                        .map{ it -> it.toString() }
-                                        .collectFile(name: 'report_threshold_tabs.txt', newLine: true)
 
         REPORT_THRESHOLD (
             ch_find_threshold,
@@ -82,16 +76,8 @@ workflow CLONAL_ANALYSIS {
 
     if (!params.skip_all_clones_report){
 
-        ch_all_repertoires_cloned_tabs = ch_all_repertoires_cloned.map{ it -> it[1] }
-                                            .collect()
-                                            .flatten()
-                                            .map{ it -> it.toString() }
-                                            .dump(tag: 'ch_all_repertoires_cloned_tabs')
-                                            .collectFile(name: 'all_repertoires_cloned_tabs.txt', newLine: true)
-                                            .map { it -> [ [id:'all_reps'], it ] }
-
         DEFINE_CLONES_REPORT(
-            ch_all_repertoires_cloned_tabs,
+            ch_all_repertoires_cloned,
             clone_threshold.collect(),
             ch_imgt.collect()
         )
diff --git a/subworkflows/local/repertoire_analysis_reporting.nf b/subworkflows/local/repertoire_analysis_reporting.nf
index dd3a7aec..877494b6 100644
--- a/subworkflows/local/repertoire_analysis_reporting.nf
+++ b/subworkflows/local/repertoire_analysis_reporting.nf
@@ -56,27 +56,12 @@ workflow REPERTOIRE_ANALYSIS_REPORTING {
                                         ch_sc_qc_and_filter_logs,
                                         ch_clonal_analysis_logs)
 
-    ch_logs_tabs =  ch_logs.collect()
-                        .flatten()
-                        .map{ it -> it.toString() }
-                        .dump(tag: 'ch_logs_tabs')
-                        .collectFile(name: 'all_logs_tabs.txt', newLine: true)
-
     REPORT_FILE_SIZE(
-        ch_logs_tabs.ifEmpty([]),
+        ch_logs.collect().ifEmpty([]),
         ch_metadata
     )
     ch_versions = ch_versions.mix(REPORT_FILE_SIZE.out.versions)
 
-    ch_repertoires_report = ch_repertoires
-        .map{ it -> it[1] }
-        .collect()
-        .flatten()
-        .map{ it -> it.toString() }
-        .dump(tag: 'ch_repertoires_report')
-        .collectFile(name: 'all_repertoires_report_tabs.txt', newLine: true)
-        .map { it -> [ [id:'all_reps'], it ] }
-
     AIRRFLOW_REPORT(
         ch_repertoires_report,
         ch_parsed_logs.collect().ifEmpty([]),

From 3900b52b961c9bbc64dfaa3a40b1be07965d012f Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Mon, 15 May 2023 16:31:11 -0400
Subject: [PATCH 07/16] fix find threshold report analysis

---
 modules/local/enchantr/find_threshold.nf            | 1 +
 subworkflows/local/repertoire_analysis_reporting.nf | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/local/enchantr/find_threshold.nf b/modules/local/enchantr/find_threshold.nf
index 01f44e3d..ed10b1c1 100644
--- a/modules/local/enchantr/find_threshold.nf
+++ b/modules/local/enchantr/find_threshold.nf
@@ -40,6 +40,7 @@ process FIND_THRESHOLD {
     path "versions.yml", emit: versions
 
     script:
+    def args = asString(task.ext.args) ?: ''
     """
     Rscript -e "enchantr::enchantr_report('find_threshold', \\
         report_params=list('input'='${tab.join(',')}',\\
diff --git a/subworkflows/local/repertoire_analysis_reporting.nf b/subworkflows/local/repertoire_analysis_reporting.nf
index 877494b6..bab6b21d 100644
--- a/subworkflows/local/repertoire_analysis_reporting.nf
+++ b/subworkflows/local/repertoire_analysis_reporting.nf
@@ -63,7 +63,7 @@ workflow REPERTOIRE_ANALYSIS_REPORTING {
     ch_versions = ch_versions.mix(REPORT_FILE_SIZE.out.versions)
 
     AIRRFLOW_REPORT(
-        ch_repertoires_report,
+        ch_repertoires,
         ch_parsed_logs.collect().ifEmpty([]),
         REPORT_FILE_SIZE.out.table.ifEmpty([]),
         ch_report_rmd,

From de54d75d69afed48ac1f0036dc6cfe250e1ba010 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Mon, 15 May 2023 16:34:03 -0400
Subject: [PATCH 08/16] revert parametrize findthreshold

---
 conf/modules.config | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 24385e45..adf238b0 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -407,11 +407,6 @@ process {
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
-        ext.args = ['findthreshold_method':'gmm',
-                    'findthreshold_model':'gamma-norm',
-                    'findthreshold_edge':0.9,
-                    'findthreshold_cutoff':'user',
-                    'findthreshold_spc':0.995]
     }
 
     withName: REPORT_THRESHOLD {

From 38da0546b5d1baa9afe318d14b8a1895bdc26e0d Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Mon, 15 May 2023 16:43:01 -0400
Subject: [PATCH 09/16] fix bug asString function call when no args

---
 modules/local/enchantr/define_clones.nf   | 2 +-
 modules/local/enchantr/dowser_lineages.nf | 2 +-
 modules/local/enchantr/find_threshold.nf  | 2 +-
 modules/local/enchantr/single_cell_qc.nf  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/local/enchantr/define_clones.nf b/modules/local/enchantr/define_clones.nf
index afec5b63..a4081e84 100644
--- a/modules/local/enchantr/define_clones.nf
+++ b/modules/local/enchantr/define_clones.nf
@@ -39,7 +39,7 @@ process DEFINE_CLONES {
 
 
     script:
-    def args = asString(task.ext.args) ?: ''
+    def args = task.ext.args ? asString(task.ext.args) : ''
     def thr = threshold.join("")
     """
     Rscript -e "enchantr::enchantr_report('define_clones', \\
diff --git a/modules/local/enchantr/dowser_lineages.nf b/modules/local/enchantr/dowser_lineages.nf
index 37eaad05..e408a9df 100644
--- a/modules/local/enchantr/dowser_lineages.nf
+++ b/modules/local/enchantr/dowser_lineages.nf
@@ -35,7 +35,7 @@ process DOWSER_LINEAGES {
     path "versions.yml", emit: versions
 
     script:
-    def args = asString(task.ext.args) ?: ''
+    def args = task.ext.args ? asString(task.ext.args) : ''
     def id_name = "$tabs".replaceFirst('__.*','')
     // TODO use nice outname, not tabs
     """
diff --git a/modules/local/enchantr/find_threshold.nf b/modules/local/enchantr/find_threshold.nf
index ed10b1c1..22f9ad69 100644
--- a/modules/local/enchantr/find_threshold.nf
+++ b/modules/local/enchantr/find_threshold.nf
@@ -40,7 +40,7 @@ process FIND_THRESHOLD {
     path "versions.yml", emit: versions
 
     script:
-    def args = asString(task.ext.args) ?: ''
+    def args = task.ext.args ? asString(task.ext.args) : ''
     """
     Rscript -e "enchantr::enchantr_report('find_threshold', \\
         report_params=list('input'='${tab.join(',')}',\\
diff --git a/modules/local/enchantr/single_cell_qc.nf b/modules/local/enchantr/single_cell_qc.nf
index 813ff127..673a7886 100644
--- a/modules/local/enchantr/single_cell_qc.nf
+++ b/modules/local/enchantr/single_cell_qc.nf
@@ -35,7 +35,7 @@ process SINGLE_CELL_QC {
     path("versions.yml"), emit: versions
 
     script:
-    def args = asString(task.ext.args) ?: ''
+    def args = task.ext.args ? asString(task.ext.args) : ''
     """
     echo "${tabs.join('\n')}" > tabs.txt
     Rscript -e "enchantr::enchantr_report('single_cell_qc', \\

From 62455844e2c5c1bf10db4e206e421216b340dfb8 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Thu, 18 May 2023 09:04:14 -0400
Subject: [PATCH 10/16] fix report reading tab

---
 assets/repertoire_comparison.Rmd | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/assets/repertoire_comparison.Rmd b/assets/repertoire_comparison.Rmd
index 33c73e43..8a181523 100644
--- a/assets/repertoire_comparison.Rmd
+++ b/assets/repertoire_comparison.Rmd
@@ -42,6 +42,9 @@ dir.create(outdir)
 seq_dir <- paste(outdir, "Sequence_numbers_summary", sep="/")
 dir.create(seq_dir)
 
+# Read data from the work directory
+datadir <- "."
+
 ```
 
 # Number of sequences
@@ -139,7 +142,7 @@ ggplotly(seqs_plot_assembled)
 ```{r read_data, include=FALSE}
 # paths to the files are found in the first column of all_repertoires_report_tabs.txt,
 # in the current folder
-all_files <- read.delim("all_repertoires_report_tabs.txt", header=F, sep="\t")[[1]]
+all_files <- system(paste0("find '",datadir,"' -name '*clone-pass.tsv'"), intern=T)
 
 diversity_dir <- paste(outdir, "Diversity", sep="/")
 abundance_dir <- paste(outdir, "Abundance", sep="/")

From 62f93a7a41e9de66040ef8f224d0b1b4aafc39e0 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Wed, 24 May 2023 13:44:24 -0400
Subject: [PATCH 11/16] fix actions checkout

---
 .github/workflows/ci.yml              | 2 +-
 .github/workflows/ci_immcantation.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c4955ad9..29fa81bd 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -52,7 +52,7 @@ jobs:
       fail-fast: false
     steps:
       - name: Check out pipeline code
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Install Nextflow
         uses: nf-core/setup-nextflow@v1
diff --git a/.github/workflows/ci_immcantation.yml b/.github/workflows/ci_immcantation.yml
index 13d1d4d7..73995668 100644
--- a/.github/workflows/ci_immcantation.yml
+++ b/.github/workflows/ci_immcantation.yml
@@ -29,7 +29,7 @@ jobs:
       fail-fast: false
     steps:
       - name: Check out pipeline code
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Install Nextflow
         uses: nf-core/setup-nextflow@v1

From ac468d3a96bd0e2f740fce417c346ef99d253a84 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Wed, 24 May 2023 14:38:51 -0400
Subject: [PATCH 12/16] clean report comments

---
 assets/repertoire_comparison.Rmd | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/assets/repertoire_comparison.Rmd b/assets/repertoire_comparison.Rmd
index 8a181523..4ff18aae 100644
--- a/assets/repertoire_comparison.Rmd
+++ b/assets/repertoire_comparison.Rmd
@@ -202,17 +202,9 @@ p_ca <- ggplot(abund@abundance, aes(x = rank, y = p)) +
     scale_x_log10(limits = NULL,
                 breaks = scales::trans_breaks("log10", function(x) 10^x),
                 labels = scales::trans_format("log10", scales::math_format(10^.x))) +
-    scale_y_continuous(labels = scales::percent) #+
-    #facet_grid(cols = vars(locus), rows = vars(subject_id), scales="free", drop = T)
-p_ca
+    scale_y_continuous(labels = scales::percent)
 
-# Try with plotly native
-# abund@abundance %>% plot_ly( x=~rank, y=~p,
-#                             type='scatter', color=~sample_id, mode='lines') %>%
-#                     add_ribbons(ymin = ~lower,
-#                                 ymax = ~upper,
-#                                 line = list(color=~sample_id),
-#                                 fillcolor = ~sample_id)
+p_ca
 
 ```
 
@@ -277,8 +269,8 @@ div_p <- ggplot(sample_div@diversity, aes(x = q, y = d, group=sample_id)) +
                     fill = sample_id), alpha = 0.4) +
     geom_line(aes(color = sample_id)) +
     xlab("q") + ylab("Diversity(q)") +
-    ggtitle(sample_main) #+
-    #facet_grid(cols=vars(locus), rows=vars(subject_id))
+    ggtitle(sample_main)
+
 div_p
 ```
 ```{r plot_diversity, include = FALSE}
@@ -308,7 +300,6 @@ g2 <- ggplot(family, aes(x=gene, y=clone_freq, fill=sample_id, group=sample_id))
     theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) +
     ylab("Frequency") +
     xlab("") +
-    #facet_grid(cols=vars(locus), rows=vars(subject_id)) +
     theme(legend.position = "right")
 ggplotly(g2)
 
@@ -337,9 +328,7 @@ g2 <- ggplot(family, aes(x=gene, y=clone_freq, fill=sample_id, group=sample_id))
     ggtitle("V Gene Family Usage") +
     theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) +
     ylab("Frequency") +
-    xlab("") #+
-    #facet_wrap(vars(subject_id,locus), scales="free_x", ncol = 1) +
-    #theme(legend.position = "none")
+    xlab("")
 ggplotly(g2)
 ggsave(filename = paste0(vfamily_dir, "/V_gene_distribution_by_clone_patient.pdf"), plot = g2, width = 20, height = 40, units = "cm")
 ggsave(filename = paste0(vfamily_dir, "/V_gene_distribution_by_clone_patient.png"), plot = g2, width = 20, height = 40, units = "cm")
@@ -361,9 +350,7 @@ g2 <- ggplot(family, aes(x=gene, y=seq_freq, fill=sample_id, group=sample_id)) +
     ggtitle("V Gene Usage") +
     theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) +
     ylab("Frequency") +
-    xlab("") #+
-    #facet_wrap(vars(subject_id,locus), scales="free_x", ncol = 1) +
-    #theme(legend.position = "none")
+    xlab("")
 ggplotly(g2)
 ggsave(filename = paste0(vfamily_dir, "/V_gene_distribution_by_sequence_patient.pdf"), plot = g2, width = 20, height = 40, units = "cm")
 ggsave(filename = paste0(vfamily_dir, "/V_gene_distribution_by_sequence_patient.png"), plot = g2, width = 20, height = 40, units = "cm")

From 395952557e19ebdd68f750784ea78adbd59cacaf Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Wed, 24 May 2023 14:58:59 -0400
Subject: [PATCH 13/16] rm directcall igblast nasty workaround

---
 .github/workflows/ci.yml             |  2 +-
 CHANGELOG.md                         |  7 ++++-
 conf/modules.config                  | 19 ------------
 conf/test_igblast.config             | 46 ----------------------------
 nextflow.config                      |  2 --
 nextflow_schema.json                 |  5 ---
 subworkflows/local/vdj_annotation.nf | 43 +++++++++-----------------
 7 files changed, 22 insertions(+), 102 deletions(-)
 delete mode 100644 conf/test_igblast.config

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 29fa81bd..ddf7f1da 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -48,7 +48,7 @@ jobs:
         NXF_VER:
           - "22.10.1"
           - "latest-everything"
-        profile: ["test_tcr", "test_no_umi", "test_nocluster", "test_fetchimgt", "test_assembled", "test_igblast"]
+        profile: ["test_tcr", "test_no_umi", "test_nocluster", "test_fetchimgt", "test_assembled"]
       fail-fast: false
     steps:
       - name: Check out pipeline code
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4414dce7..e5d15661 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,15 +3,20 @@
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
-## [3.0.1dev] - "Portus" Hotfix
+## [3.1] - "Portus" Hotfix
 
 ### `Added`
 
 - [#250](https://github.com/nf-core/airrflow/pull/250) Back to `dev`.
+- [#256](https://github.com/nf-core/airrflow/pull/256) Template update to nf-core/tools v2.8
+
+
 
 ### `Fixed`
 
 - [#250](https://github.com/nf-core/airrflow/pull/250) Fixed log parsing with `removeprefix` instead of `lstrip`.
+- [#258](https://github.com/nf-core/airrflow/pull/258) Fixes to plotly plots in report sometimes not rendering.
+- [#258](https://github.com/nf-core/airrflow/pull/258) Remove direct call to Igblast in favor of a fix in ChangeoO.
 
 ## [3.0] - 2023-03-20 "Portus"
 
diff --git a/conf/modules.config b/conf/modules.config
index adf238b0..d16975a4 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -299,15 +299,6 @@ process {
         ext.args = '--format blast'
     }
 
-    withName: IGBLAST_ASSIGNGENES {
-        publishDir = [
-            path: { "${params.outdir}/vdj_annotation/01-assign-genes/${meta.id}" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-        ext.args = "-domain_system imgt -ig_seqtype Ig -outfmt 19 -show_translation"
-    }
-
     withName: CHANGEO_MAKEDB {
         publishDir = [
             path: { "${params.outdir}/vdj_annotation/02-make-db/${meta.id}" },
@@ -450,16 +441,6 @@ process {
         ext.args = ['build':'igphyml']
     }
 
-//    withName: CHANGEO_PARSEDB_SELECT {
-//        publishDir = [
-//            path: { "${params.outdir}/changeo/04-parsedb-select/${meta.id}" },
-//            mode: params.publish_dir_mode,
-//            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-//        ]
-//        ext.args = '-f v_call j_call -u "IG[HLK]" --regex --logic all'
-//        ext.args2 = '-f v_call j_call -u "TR" --regex --logic all'
-//    }
-
     // -------------------------------
     // Reports
     // -------------------------------
diff --git a/conf/test_igblast.config b/conf/test_igblast.config
deleted file mode 100644
index e1eb2082..00000000
--- a/conf/test_igblast.config
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Nextflow config file for running minimal tests
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Defines input files and everything required to run a fast and simple pipeline test.
-
-    Use as follows:
-        nextflow run nf-core/airrflow -profile test,<docker/singularity> --outdir <OUTDIR>
-
-----------------------------------------------------------------------------------------
-*/
-
-params {
-    config_profile_name        = 'Test igblast profile'
-    config_profile_description = 'Test pipeline with direct call to igblast.'
-
-    // Limit resources so that this can run on GitHub Actions
-    max_cpus   = 2
-    max_memory = '6.GB'
-    max_time   = '6.h'
-
-    // Input data
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/Metadata_test_airr.tsv'
-    cprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/C_primers.fasta'
-    vprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/V_primers.fasta'
-    imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip'
-    igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip'
-
-    mode = 'fastq'
-
-    library_generation_method = 'specific_pcr_umi'
-    cprimer_position = 'R1'
-    umi_length = 8
-    umi_start = 6
-    umi_position = 'R1'
-    index_file = true
-    directcall_igblast = true
-}
-
-process{
-    withName:"DEFINE_CLONES*"{
-        ext.args = ['outname':'', 'model':'hierarchical',
-                    'method':'nt', 'linkage':'single',
-                    'outputby':'sample_id', 'min_n':10]
-    }
-}
diff --git a/nextflow.config b/nextflow.config
index 8c7744f7..ca67f548 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -63,8 +63,6 @@ params {
     igblast_base = null
     imgtdb_base = null
     save_databases = true
-    directcall_igblast = false
-
 
     // -----------------------
     // bulk filtering options
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 49949544..8b6194e9 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -290,11 +290,6 @@
                     "description": "Path to the cached igblast database.",
                     "help_text": "If it is not provided, the database will be newly downloaded.",
                     "fa_icon": "fas fa-database"
-                },
-                "directcall_igblast": {
-                    "type": "boolean",
-                    "fa_icon": "fas fa-align-center",
-                    "description": "Use this flag for directly calling IgBlast for reference alignment, instead of using the `changeo assigngenes` and `changeo makedb` options that call IgBlast in the background.  Allows  for additional configuration of the IgBlast call."
                 }
             },
             "fa_icon": "fas fa-edit"
diff --git a/subworkflows/local/vdj_annotation.nf b/subworkflows/local/vdj_annotation.nf
index 8a68fb60..31ed0fd6 100644
--- a/subworkflows/local/vdj_annotation.nf
+++ b/subworkflows/local/vdj_annotation.nf
@@ -61,38 +61,25 @@ workflow VDJ_ANNOTATION {
         ch_imgt = FETCH_DATABASES.out.imgt
         ch_versions = ch_versions.mix(FETCH_DATABASES.out.versions.ifEmpty(null))
     }
-    if (!params.directcall_igblast){
-        CHANGEO_ASSIGNGENES (
-            ch_fasta,
-            ch_igblast.collect()
-        )
-
-        ch_logs = ch_logs.mix(CHANGEO_ASSIGNGENES.out.logs)
-        ch_versions = ch_versions.mix(CHANGEO_ASSIGNGENES.out.versions.ifEmpty(null))
 
-        CHANGEO_MAKEDB (
-            CHANGEO_ASSIGNGENES.out.fasta,
-            CHANGEO_ASSIGNGENES.out.blast,
-            ch_imgt.collect()
-        )
-        ch_logs = ch_logs.mix(CHANGEO_MAKEDB.out.logs)
-        ch_versions = ch_versions.mix(CHANGEO_MAKEDB.out.versions.ifEmpty(null))
+    CHANGEO_ASSIGNGENES (
+        ch_fasta,
+        ch_igblast.collect()
+    )
 
-        ch_assigned_tab = CHANGEO_MAKEDB.out.tab
-        ch_assignment_logs = CHANGEO_MAKEDB.out.logs
+    ch_logs = ch_logs.mix(CHANGEO_ASSIGNGENES.out.logs)
+    ch_versions = ch_versions.mix(CHANGEO_ASSIGNGENES.out.versions.ifEmpty(null))
 
-    } else {
-        IGBLAST_ASSIGNGENES(
-            ch_fasta,
-            ch_igblast.collect()
-        )
-
-        ch_assigned_tab = IGBLAST_ASSIGNGENES.out.tab
+    CHANGEO_MAKEDB (
+        CHANGEO_ASSIGNGENES.out.fasta,
+        CHANGEO_ASSIGNGENES.out.blast,
+        ch_imgt.collect()
+    )
+    ch_logs = ch_logs.mix(CHANGEO_MAKEDB.out.logs)
+    ch_versions = ch_versions.mix(CHANGEO_MAKEDB.out.versions.ifEmpty(null))
 
-        ch_logs = ch_logs.mix(IGBLAST_ASSIGNGENES.out.logs)
-        ch_versions = ch_versions.mix(IGBLAST_ASSIGNGENES.out.versions.ifEmpty(null))
-        ch_assignment_logs = IGBLAST_ASSIGNGENES.out.makedb_log
-    }
+    ch_assigned_tab = CHANGEO_MAKEDB.out.tab
+    ch_assignment_logs = CHANGEO_MAKEDB.out.logs
 
     // Apply quality filters:
     // - locus should match v_call chain

From 0deb76c182594b826b9bf4cd23efb7d4010354b9 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Wed, 24 May 2023 15:04:53 -0400
Subject: [PATCH 14/16] fix prettier

---
 CITATIONS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CITATIONS.md b/CITATIONS.md
index f0871b97..9f71f033 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -45,13 +45,13 @@
   > Hoehn K, Van der Heiden J, Zhou J, Lunter G, Pybus O, Kleinstein S (2019). “Repertoire-wide phylogenetic models of B cell molecular evolution reveal evolutionary signatures of aging and vaccination.” PNAS.
 
 - [TIgGER](https://doi.org/10.1073/pnas.1417683112)
+
   > Gadala-maria, D., Yaari, G., Uduman, M., & Kleinstein, S. H. (2015). Automated analysis of high-throughput B-cell sequencing data reveals a high frequency of novel immunoglobulin V gene segment alleles. Proceedings of the National Academy of Sciences, 112(8), 1–9.
 
 - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
 
   > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
 
-
 ## Software packaging/containerisation tools
 
 - [Anaconda](https://anaconda.com)

From db1afb020b122db595ce037552c78322923f742a Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Wed, 24 May 2023 17:23:13 -0400
Subject: [PATCH 15/16] add check for whitespaces in columns

---
 bin/check_samplesheet.py | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index 1c439211..41a0c1c9 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -7,6 +7,7 @@
 import errno
 import argparse
 import pandas as pd
+import re
 
 
 def parse_args(args=None):
@@ -51,8 +52,8 @@ def check_samplesheet(file_in):
     sample_run_dict = {}
     with open(file_in, "r") as fin:
         ## Check that required columns are present
-        MIN_COLS = 7
-        REQUIRED_COLUMNS = [
+        min_cols = 7
+        required_columns = [
             "sample_id",
             "filename_R1",
             "filename_R2",
@@ -65,12 +66,21 @@ def check_samplesheet(file_in):
             "biomaterial_provider",
             "age",
         ]
+        no_whitespaces = [
+            "sample_id",
+            "filename_R1",
+            "filename_R2",
+            "subject_id",
+            "species",
+            "pcr_target_locus",
+            "tissue",
+        ]
         header = [x.strip('"') for x in fin.readline().strip().split("\t")]
-        for col in REQUIRED_COLUMNS:
+        for col in required_columns:
             if col not in header:
                 print("ERROR: Please check samplesheet header: {} ".format(",".join(header)))
                 print("Header is missing column {}".format(col))
-                print("Header must contain columns {}".format("\t".join(REQUIRED_COLUMNS)))
+                print("Header must contain columns {}".format("\t".join(required_columns)))
                 sys.exit(1)
 
         ## Check that rows have the same fields as header, and at least the compulsory ones are provided
@@ -85,9 +95,9 @@ def check_samplesheet(file_in):
                     line,
                 )
             num_cols = len([x for x in lspl if x])
-            if num_cols < MIN_COLS:
+            if num_cols < min_cols:
                 print_error(
-                    "Invalid number of populated columns (should be {})!".format(MIN_COLS),
+                    "Invalid number of populated columns (should be {})!".format(min_cols),
                     "Line",
                     line,
                 )
@@ -119,6 +129,14 @@ def check_samplesheet(file_in):
                     "The same subject_id cannot belong to different species! Check input file columns 'subject_id' and 'species'."
                 )
 
+        ## Check that values do not contain spaces in the no whitespaces columns
+        for col in no_whitespaces:
+            values = tab[col].tolist()
+            if any([re.search(r'\s+',s) for s in values]):
+                print_error(
+                    "The column {} contains values with whitespaces. Please ensure that there are no tabs, spaces or any other whitespaces in these columns as well: {}".format(col,no_whitespaces)
+                )
+
 
 def main(args=None):
     args = parse_args(args)

From 7ee7461e1b35f16c3ab7584d2bf706bef98da4ad Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Wed, 24 May 2023 17:31:12 -0400
Subject: [PATCH 16/16] fix linting update changelog

---
 CHANGELOG.md             | 4 +++-
 bin/check_samplesheet.py | 6 ++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7e13b2ef..a0d8779e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,7 +14,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 - [#250](https://github.com/nf-core/airrflow/pull/250) Fixed log parsing with `removeprefix` instead of `lstrip`.
 - [#258](https://github.com/nf-core/airrflow/pull/258) Fixes to plotly plots in report sometimes not rendering.
-- [#258](https://github.com/nf-core/airrflow/pull/258) Remove direct call to Igblast in favor of a fix in ChangeoO.
+- [#258](https://github.com/nf-core/airrflow/pull/258) Remove direct call to Igblast in favor of a fix in ChangeO.
+- [#258](https://github.com/nf-core/airrflow/pull/258) Added check for whitespaces in certain columns in samplesheet.
+- [#258](https://github.com/nf-core/airrflow/pull/258) Added missing Immcantation references in Airrflow report.
 
 ## [3.0] - 2023-03-20 "Portus"
 
diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index 41a0c1c9..84e1087f 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -132,9 +132,11 @@ def check_samplesheet(file_in):
         ## Check that values do not contain spaces in the no whitespaces columns
         for col in no_whitespaces:
             values = tab[col].tolist()
-            if any([re.search(r'\s+',s) for s in values]):
+            if any([re.search(r"\s+", s) for s in values]):
                 print_error(
-                    "The column {} contains values with whitespaces. Please ensure that there are no tabs, spaces or any other whitespaces in these columns as well: {}".format(col,no_whitespaces)
+                    "The column {} contains values with whitespaces. Please ensure that there are no tabs, spaces or any other whitespaces in these columns as well: {}".format(
+                        col, no_whitespaces
+                    )
                 )