From d0b398c7cd325a939cb32337cb2f7036229fa8f6 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Sat, 29 Apr 2023 17:01:20 -0400 Subject: [PATCH 01/16] add missing citations --- CITATIONS.md | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/CITATIONS.md b/CITATIONS.md index 10d23253..f0871b97 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -16,10 +16,6 @@ > Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu, fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics. 2018 Sept 1; 34(17):i884–i890. doi: 10.1093/bioinformatics/bty560. -- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) - - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. - - [pRESTO](https://doi.org/10.1093/bioinformatics/btu138) > Vander Heiden, J. A., Yaari, G., Uduman, M., Stern, J. N. H., O’Connor, K. C., Hafler, D. A., … Kleinstein, S. H. (2014). pRESTO: a toolkit for processing high-throughput sequencing raw reads of lymphocyte receptor repertoires. Bioinformatics, 30(13), 1930–1932. @@ -32,9 +28,30 @@ > Stern, J. N. H., Yaari, G., Vander Heiden, J. A., Church, G., Donahue, W. F., Hintzen, R. Q., … O’Connor, K. C. (2014). B cells populating the multiple sclerosis brain mature in the draining cervical lymph nodes. Science Translational Medicine, 6(248). +- [SCOPer](https://doi.org/10.1093/bioinformatics/bty235) + + > Nouri N, Kleinstein S (2018). “A spectral clustering-based method for identifying clones from high-throughput B cell repertoire sequencing data.” Bioinformatics, i341-i349. + + > Nouri N, Kleinstein S (2020). “Somatic hypermutation analysis for improved identification of B cell clonal families from next-generation sequencing data.” PLOS Computational Biology, 16(6), e1007977. + + > Gupta N, Adams K, Briggs A, Timberlake S, Vigneault F, Kleinstein S (2017). “Hierarchical clustering can identify B cell clones with high confidence in Ig repertoire sequencing data.” The Journal of Immunology, 2489-2499. + +- [Dowser](https://doi.org/10.1371/journal.pcbi.1009885) + + > Hoehn K, Pybus O, Kleinstein S (2022). “Phylogenetic analysis of migration, differentiation, and class switching in B cells.” PLoS Computational Biology. + +- [IgPhyML](https://www.pnas.org/doi/10.1073/pnas.1906020116) + + > Hoehn K, Van der Heiden J, Zhou J, Lunter G, Pybus O, Kleinstein S (2019). “Repertoire-wide phylogenetic models of B cell molecular evolution reveal evolutionary signatures of aging and vaccination.” PNAS. + - [TIgGER](https://doi.org/10.1073/pnas.1417683112) > Gadala-maria, D., Yaari, G., Uduman, M., & Kleinstein, S. H. (2015). Automated analysis of high-throughput B-cell sequencing data reveals a high frequency of novel immunoglobulin V gene segment alleles. Proceedings of the National Academy of Sciences, 112(8), 1–9. +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) From 1798bcba9d8616cb1a709597904e62da78df670a Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Sat, 29 Apr 2023 17:01:47 -0400 Subject: [PATCH 02/16] update report rm plotly because of memory error --- assets/repertoire_comparison.Rmd | 60 ++++++++++++++++++++++---------- bin/execute_report.R | 6 ++++ 2 files changed, 48 insertions(+), 18 deletions(-) diff --git a/assets/repertoire_comparison.Rmd b/assets/repertoire_comparison.Rmd index 542beba5..907777bc 100644 --- a/assets/repertoire_comparison.Rmd +++ b/assets/repertoire_comparison.Rmd @@ -10,10 +10,6 @@ output: css: ./nf-core_style.css highlight: pygments pdf_document: true - pandoc_args: [ - "+RTS", "-K4000m", - "-RTS" - ] html_notebook: toc: yes --- @@ -199,13 +195,22 @@ p_ca <- ggplot(abund@abundance, aes(x = rank, y = p)) + ymax = upper, fill = sample_id), alpha = 0.4) + geom_line(aes(color = sample_id)) + ggtitle(abund_main) + - xlab("log(Rank)") + ylab("Abundance") + + xlab('log(Rank)') + ylab('Abundance') + scale_x_log10(limits = NULL, breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x))) + scale_y_continuous(labels = scales::percent) #+ #facet_grid(cols = vars(locus), rows = vars(subject_id), scales="free", drop = T) -ggplotly(p_ca) +ggplot(p_ca) + +# Try with plotly native +# abund@abundance %>% plot_ly( x=~rank, y=~p, +# type='scatter', color=~sample_id, mode='lines') %>% +# add_ribbons(ymin = ~lower, +# ymax = ~upper, +# line = list(color=~sample_id), +# fillcolor = ~sample_id) + ``` ```{r plot_abundance, include = FALSE} @@ -214,6 +219,7 @@ ggsave(plot=p_ca, filename = paste0(abundance_dir,"/Clonal_abundance_subject.png write.table(abund@abundance, file = paste0(abundance_dir, "/Clonal_abundance_data_subject.tsv"), sep="\t", quote = F, row.names = F) ``` + # Clonal diversity The clonal diversity $D$ of the repertoire was calculated according to the general formula of Hill Diversity @@ -270,7 +276,7 @@ div_p <- ggplot(sample_div@diversity, aes(x = q, y = d, group=sample_id)) + xlab("q") + ylab("Diversity(q)") + ggtitle(sample_main) #+ #facet_grid(cols=vars(locus), rows=vars(subject_id)) -ggplotly(div_p) +ggplot(div_p) ``` ```{r plot_diversity, include = FALSE} ggsave(plot=div_p, filename=paste0(diversity_dir,"/Diversity_patient_grid.png"), device="png", width = 25, height = 10, units="cm") @@ -366,16 +372,6 @@ write.table(family, file = paste0(vfamily_dir, "/V_gene_distribution_by_sequence If you use nf-core/airrflow for your analysis, please cite it using the following DOI: [10.5281/zenodo.3607408](https://doi.org/10.5281/zenodo.3607408) -Please also cite the `nf-core` and `Nextflow` publications: - -- [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) - - > Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. - -- [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) - - > Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. - In addition, citations for the tools and data used in this pipeline are as follows: - [pRESTO](https://doi.org/10.1093/bioinformatics/btu138) @@ -386,12 +382,40 @@ In addition, citations for the tools and data used in this pipeline are as follo > Gupta, N. T., Vander Heiden, J. A., Uduman, M., Gadala-Maria, D., Yaari, G., & Kleinstein, S. H. (2015). Change-O: a toolkit for analyzing large-scale B cell immunoglobulin repertoire sequencing data: Table 1. Bioinformatics, 31(20), 3356–3358. +- [IgBLAST](https://doi.org/10.1093/nar/gkt382) + + > Ye, J., Ma, N., Madden, T. L., & Ostell, J. M. (2013). IgBLAST: An immunoglobulin variable domain sequence analysis tool. Nucleic Acids Research, 41(Web Server issue), W34. + - [Alakazam](https://doi.org/10.1126/scitranslmed.3008879) - > Stern, J. N. H., Yaari, G., Vander Heiden, J. A., Church, G., Donahue, W. F., Hintzen, R. Q., … O’Connor, K. C. (2014). B cells populating the multiple sclerosis brain mature in the draining cervical lymph nodes. Science Translational Medicine, 6(248). + > Stern, J. N. H., Yaari, G., Vander Heiden, J. A., Church, G., Donahue, W. F., Hintzen, R. Q., … O’Connor, K. C. (2014). B cells populating the multiple sclerosis brain mature in the draining cervical lymph nodes. Science Translational Medicine, 6(248), 248ra107. + +- [SCOPer](https://doi.org/10.1093/bioinformatics/bty235) + + > Nouri N, Kleinstein S (2018). “A spectral clustering-based method for identifying clones from high-throughput B cell repertoire sequencing data.” Bioinformatics, i341-i349. + + > Nouri N, Kleinstein S (2020). “Somatic hypermutation analysis for improved identification of B cell clonal families from next-generation sequencing data.” PLOS Computational Biology, 16(6), e1007977. + + > Gupta N, Adams K, Briggs A, Timberlake S, Vigneault F, Kleinstein S (2017). “Hierarchical clustering can identify B cell clones with high confidence in Ig repertoire sequencing data.” The Journal of Immunology, 2489-2499. + +- [Dowser](https://doi.org/10.1371/journal.pcbi.1009885) + + > Hoehn K, Pybus O, Kleinstein S (2022). “Phylogenetic analysis of migration, differentiation, and class switching in B cells.” PLoS Computational Biology. + +- [IgPhyML](https://www.pnas.org/doi/10.1073/pnas.1906020116) + + > Hoehn K, Van der Heiden J, Zhou J, Lunter G, Pybus O, Kleinstein S (2019). “Repertoire-wide phylogenetic models of B cell molecular evolution reveal evolutionary signatures of aging and vaccination.” PNAS. + +- [TIgGER](https://doi.org/10.1073/pnas.1417683112) + + > Gadala-maria, D., Yaari, G., Uduman, M., & Kleinstein, S. H. (2015). Automated analysis of high-throughput B-cell sequencing data reveals a high frequency of novel immunoglobulin V gene segment alleles. Proceedings of the National Academy of Sciences, 112(8), 1–9. - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +- [Fastp](https://doi.org/10.1093/bioinformatics/bty560) + + > Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu, fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics. 2018 Sept 1; 34(17):i884–i890. + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. diff --git a/bin/execute_report.R b/bin/execute_report.R index c1e090f8..c0d0b448 100755 --- a/bin/execute_report.R +++ b/bin/execute_report.R @@ -4,13 +4,19 @@ library(rmarkdown) library(optparse) + option_list = list( make_option(c("-r", "--report_file"), type="character", default=NULL, help="report rmarkdown file", metavar="character") ) + opt_parser = OptionParser(option_list=option_list) opt = parse_args(opt_parser) + +#Set pandoc stack size memory +options(pandoc.stack.size="4000m") + wd=getwd() rmarkdown::render(opt$report_file, output_file = "Airrflow_report.html", knit_root_dir = wd, output_dir = wd) From eecb3a4bba595b9324d2e08d8dc0025c212adb27 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Mon, 1 May 2023 14:56:17 -0400 Subject: [PATCH 03/16] revert abundance and diversity without plotly --- assets/repertoire_comparison.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/repertoire_comparison.Rmd b/assets/repertoire_comparison.Rmd index 907777bc..33c73e43 100644 --- a/assets/repertoire_comparison.Rmd +++ b/assets/repertoire_comparison.Rmd @@ -201,7 +201,7 @@ p_ca <- ggplot(abund@abundance, aes(x = rank, y = p)) + labels = scales::trans_format("log10", scales::math_format(10^.x))) + scale_y_continuous(labels = scales::percent) #+ #facet_grid(cols = vars(locus), rows = vars(subject_id), scales="free", drop = T) -ggplot(p_ca) +p_ca # Try with plotly native # abund@abundance %>% plot_ly( x=~rank, y=~p, @@ -276,7 +276,7 @@ div_p <- ggplot(sample_div@diversity, aes(x = q, y = d, group=sample_id)) + xlab("q") + ylab("Diversity(q)") + ggtitle(sample_main) #+ #facet_grid(cols=vars(locus), rows=vars(subject_id)) -ggplot(div_p) +div_p ``` ```{r plot_diversity, include = FALSE} ggsave(plot=div_p, filename=paste0(diversity_dir,"/Diversity_patient_grid.png"), device="png", width = 25, height = 10, units="cm") From 9e5ead6cb42cb12be84d8d3b108b5ac7c7dcbf03 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Wed, 3 May 2023 13:32:49 -0400 Subject: [PATCH 04/16] do not ignore dowser errors --- modules/local/enchantr/dowser_lineages.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/local/enchantr/dowser_lineages.nf b/modules/local/enchantr/dowser_lineages.nf index 61aca2ec..37eaad05 100644 --- a/modules/local/enchantr/dowser_lineages.nf +++ b/modules/local/enchantr/dowser_lineages.nf @@ -19,7 +19,6 @@ process DOWSER_LINEAGES { tag "${meta.id}" label 'process_long_parallelized' - label 'error_ignore' label 'immcantation' conda "bioconda::r-enchantr=0.1.1" From fd2d04148bf86ce0dadbf66d748c965b8b63d903 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Thu, 11 May 2023 10:44:19 -0400 Subject: [PATCH 05/16] parametrize find threshold --- conf/modules.config | 5 +++++ modules/local/enchantr/find_threshold.nf | 21 +++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index adf238b0..24385e45 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -407,6 +407,11 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] + ext.args = ['findthreshold_method':'gmm', + 'findthreshold_model':'gamma-norm', + 'findthreshold_edge':0.9, + 'findthreshold_cutoff':'user', + 'findthreshold_spc':0.995] } withName: REPORT_THRESHOLD { diff --git a/modules/local/enchantr/find_threshold.nf b/modules/local/enchantr/find_threshold.nf index 62de6b82..01f44e3d 100644 --- a/modules/local/enchantr/find_threshold.nf +++ b/modules/local/enchantr/find_threshold.nf @@ -1,3 +1,20 @@ +def asString (args) { + def s = "" + def value = "" + if (args.size()>0) { + if (args[0] != 'none') { + for (param in args.keySet().sort()){ + value = args[param].toString() + if (!value.isNumber()) { + value = "'"+value+"'" + } + s = s + ",'"+param+"'="+value + } + } + } + return s +} + process FIND_THRESHOLD { tag "all_reps" @@ -25,7 +42,7 @@ process FIND_THRESHOLD { script: """ Rscript -e "enchantr::enchantr_report('find_threshold', \\ - report_params=list('input'='${tab}',\\ + report_params=list('input'='${tab.join(',')}',\\ 'cloneby'='${params.cloneby}',\\ 'crossby'='${params.crossby}',\\ 'singlecell'='${params.singlecell}',\\ @@ -33,7 +50,7 @@ process FIND_THRESHOLD { 'nproc'=${task.cpus},\\ 'outname'='all_reps',\\ 'log'='all_reps_threshold_command_log',\\ - 'logo'='${logo}'))" + 'logo'='${logo}' ${args}))" echo "${task.process}": > versions.yml Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml From e549b8ae18b60ac079492bead028a71f8ec3c25c Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Thu, 11 May 2023 10:44:31 -0400 Subject: [PATCH 06/16] revert fixes for slurm size limit --- modules/local/enchantr/report_file_size.nf | 3 ++- subworkflows/local/clonal_analysis.nf | 16 +--------------- .../local/repertoire_analysis_reporting.nf | 17 +---------------- 3 files changed, 4 insertions(+), 32 deletions(-) diff --git a/modules/local/enchantr/report_file_size.nf b/modules/local/enchantr/report_file_size.nf index 51775767..fcf7f670 100644 --- a/modules/local/enchantr/report_file_size.nf +++ b/modules/local/enchantr/report_file_size.nf @@ -22,8 +22,9 @@ process REPORT_FILE_SIZE { script: """ + echo "${logs.join('\n')}" > logs.txt Rscript -e "enchantr::enchantr_report('file_size', \\ - report_params=list('input'='${logs}', 'metadata'='${metadata}',\\ + report_params=list('input'='logs.txt', 'metadata'='${metadata}',\\ 'outdir'=getwd()))" echo "\"${task.process}\":" > versions.yml diff --git a/subworkflows/local/clonal_analysis.nf b/subworkflows/local/clonal_analysis.nf index ddf31550..91921756 100644 --- a/subworkflows/local/clonal_analysis.nf +++ b/subworkflows/local/clonal_analysis.nf @@ -19,9 +19,6 @@ workflow CLONAL_ANALYSIS { ch_find_threshold = ch_repertoire.map{ it -> it[1] } .collect() - .flatten() - .map{ it -> it.toString() } - .collectFile(name: 'find_threshold_tabs.txt', newLine: true) FIND_CLONAL_THRESHOLD ( ch_find_threshold, @@ -42,9 +39,6 @@ workflow CLONAL_ANALYSIS { ch_find_threshold = ch_repertoire.map{ it -> it[1] } .collect() - .flatten() - .map{ it -> it.toString() } - .collectFile(name: 'report_threshold_tabs.txt', newLine: true) REPORT_THRESHOLD ( ch_find_threshold, @@ -82,16 +76,8 @@ workflow CLONAL_ANALYSIS { if (!params.skip_all_clones_report){ - ch_all_repertoires_cloned_tabs = ch_all_repertoires_cloned.map{ it -> it[1] } - .collect() - .flatten() - .map{ it -> it.toString() } - .dump(tag: 'ch_all_repertoires_cloned_tabs') - .collectFile(name: 'all_repertoires_cloned_tabs.txt', newLine: true) - .map { it -> [ [id:'all_reps'], it ] } - DEFINE_CLONES_REPORT( - ch_all_repertoires_cloned_tabs, + ch_all_repertoires_cloned, clone_threshold.collect(), ch_imgt.collect() ) diff --git a/subworkflows/local/repertoire_analysis_reporting.nf b/subworkflows/local/repertoire_analysis_reporting.nf index dd3a7aec..877494b6 100644 --- a/subworkflows/local/repertoire_analysis_reporting.nf +++ b/subworkflows/local/repertoire_analysis_reporting.nf @@ -56,27 +56,12 @@ workflow REPERTOIRE_ANALYSIS_REPORTING { ch_sc_qc_and_filter_logs, ch_clonal_analysis_logs) - ch_logs_tabs = ch_logs.collect() - .flatten() - .map{ it -> it.toString() } - .dump(tag: 'ch_logs_tabs') - .collectFile(name: 'all_logs_tabs.txt', newLine: true) - REPORT_FILE_SIZE( - ch_logs_tabs.ifEmpty([]), + ch_logs.collect().ifEmpty([]), ch_metadata ) ch_versions = ch_versions.mix(REPORT_FILE_SIZE.out.versions) - ch_repertoires_report = ch_repertoires - .map{ it -> it[1] } - .collect() - .flatten() - .map{ it -> it.toString() } - .dump(tag: 'ch_repertoires_report') - .collectFile(name: 'all_repertoires_report_tabs.txt', newLine: true) - .map { it -> [ [id:'all_reps'], it ] } - AIRRFLOW_REPORT( ch_repertoires_report, ch_parsed_logs.collect().ifEmpty([]), From 3900b52b961c9bbc64dfaa3a40b1be07965d012f Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Mon, 15 May 2023 16:31:11 -0400 Subject: [PATCH 07/16] fix find threshold report analysis --- modules/local/enchantr/find_threshold.nf | 1 + subworkflows/local/repertoire_analysis_reporting.nf | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/local/enchantr/find_threshold.nf b/modules/local/enchantr/find_threshold.nf index 01f44e3d..ed10b1c1 100644 --- a/modules/local/enchantr/find_threshold.nf +++ b/modules/local/enchantr/find_threshold.nf @@ -40,6 +40,7 @@ process FIND_THRESHOLD { path "versions.yml", emit: versions script: + def args = asString(task.ext.args) ?: '' """ Rscript -e "enchantr::enchantr_report('find_threshold', \\ report_params=list('input'='${tab.join(',')}',\\ diff --git a/subworkflows/local/repertoire_analysis_reporting.nf b/subworkflows/local/repertoire_analysis_reporting.nf index 877494b6..bab6b21d 100644 --- a/subworkflows/local/repertoire_analysis_reporting.nf +++ b/subworkflows/local/repertoire_analysis_reporting.nf @@ -63,7 +63,7 @@ workflow REPERTOIRE_ANALYSIS_REPORTING { ch_versions = ch_versions.mix(REPORT_FILE_SIZE.out.versions) AIRRFLOW_REPORT( - ch_repertoires_report, + ch_repertoires, ch_parsed_logs.collect().ifEmpty([]), REPORT_FILE_SIZE.out.table.ifEmpty([]), ch_report_rmd, From de54d75d69afed48ac1f0036dc6cfe250e1ba010 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Mon, 15 May 2023 16:34:03 -0400 Subject: [PATCH 08/16] revert parametrize findthreshold --- conf/modules.config | 5 ----- 1 file changed, 5 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 24385e45..adf238b0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -407,11 +407,6 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - ext.args = ['findthreshold_method':'gmm', - 'findthreshold_model':'gamma-norm', - 'findthreshold_edge':0.9, - 'findthreshold_cutoff':'user', - 'findthreshold_spc':0.995] } withName: REPORT_THRESHOLD { From 38da0546b5d1baa9afe318d14b8a1895bdc26e0d Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Mon, 15 May 2023 16:43:01 -0400 Subject: [PATCH 09/16] fix bug asString function call when no args --- modules/local/enchantr/define_clones.nf | 2 +- modules/local/enchantr/dowser_lineages.nf | 2 +- modules/local/enchantr/find_threshold.nf | 2 +- modules/local/enchantr/single_cell_qc.nf | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/local/enchantr/define_clones.nf b/modules/local/enchantr/define_clones.nf index afec5b63..a4081e84 100644 --- a/modules/local/enchantr/define_clones.nf +++ b/modules/local/enchantr/define_clones.nf @@ -39,7 +39,7 @@ process DEFINE_CLONES { script: - def args = asString(task.ext.args) ?: '' + def args = task.ext.args ? asString(task.ext.args) : '' def thr = threshold.join("") """ Rscript -e "enchantr::enchantr_report('define_clones', \\ diff --git a/modules/local/enchantr/dowser_lineages.nf b/modules/local/enchantr/dowser_lineages.nf index 37eaad05..e408a9df 100644 --- a/modules/local/enchantr/dowser_lineages.nf +++ b/modules/local/enchantr/dowser_lineages.nf @@ -35,7 +35,7 @@ process DOWSER_LINEAGES { path "versions.yml", emit: versions script: - def args = asString(task.ext.args) ?: '' + def args = task.ext.args ? asString(task.ext.args) : '' def id_name = "$tabs".replaceFirst('__.*','') // TODO use nice outname, not tabs """ diff --git a/modules/local/enchantr/find_threshold.nf b/modules/local/enchantr/find_threshold.nf index ed10b1c1..22f9ad69 100644 --- a/modules/local/enchantr/find_threshold.nf +++ b/modules/local/enchantr/find_threshold.nf @@ -40,7 +40,7 @@ process FIND_THRESHOLD { path "versions.yml", emit: versions script: - def args = asString(task.ext.args) ?: '' + def args = task.ext.args ? asString(task.ext.args) : '' """ Rscript -e "enchantr::enchantr_report('find_threshold', \\ report_params=list('input'='${tab.join(',')}',\\ diff --git a/modules/local/enchantr/single_cell_qc.nf b/modules/local/enchantr/single_cell_qc.nf index 813ff127..673a7886 100644 --- a/modules/local/enchantr/single_cell_qc.nf +++ b/modules/local/enchantr/single_cell_qc.nf @@ -35,7 +35,7 @@ process SINGLE_CELL_QC { path("versions.yml"), emit: versions script: - def args = asString(task.ext.args) ?: '' + def args = task.ext.args ? asString(task.ext.args) : '' """ echo "${tabs.join('\n')}" > tabs.txt Rscript -e "enchantr::enchantr_report('single_cell_qc', \\ From 62455844e2c5c1bf10db4e206e421216b340dfb8 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Thu, 18 May 2023 09:04:14 -0400 Subject: [PATCH 10/16] fix report reading tab --- assets/repertoire_comparison.Rmd | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/assets/repertoire_comparison.Rmd b/assets/repertoire_comparison.Rmd index 33c73e43..8a181523 100644 --- a/assets/repertoire_comparison.Rmd +++ b/assets/repertoire_comparison.Rmd @@ -42,6 +42,9 @@ dir.create(outdir) seq_dir <- paste(outdir, "Sequence_numbers_summary", sep="/") dir.create(seq_dir) +# Read data from the work directory +datadir <- "." + ``` # Number of sequences @@ -139,7 +142,7 @@ ggplotly(seqs_plot_assembled) ```{r read_data, include=FALSE} # paths to the files are found in the first column of all_repertoires_report_tabs.txt, # in the current folder -all_files <- read.delim("all_repertoires_report_tabs.txt", header=F, sep="\t")[[1]] +all_files <- system(paste0("find '",datadir,"' -name '*clone-pass.tsv'"), intern=T) diversity_dir <- paste(outdir, "Diversity", sep="/") abundance_dir <- paste(outdir, "Abundance", sep="/") From 62f93a7a41e9de66040ef8f224d0b1b4aafc39e0 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Wed, 24 May 2023 13:44:24 -0400 Subject: [PATCH 11/16] fix actions checkout --- .github/workflows/ci.yml | 2 +- .github/workflows/ci_immcantation.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c4955ad9..29fa81bd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,7 +52,7 @@ jobs: fail-fast: false steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.github/workflows/ci_immcantation.yml b/.github/workflows/ci_immcantation.yml index 13d1d4d7..73995668 100644 --- a/.github/workflows/ci_immcantation.yml +++ b/.github/workflows/ci_immcantation.yml @@ -29,7 +29,7 @@ jobs: fail-fast: false steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 From ac468d3a96bd0e2f740fce417c346ef99d253a84 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Wed, 24 May 2023 14:38:51 -0400 Subject: [PATCH 12/16] clean report comments --- assets/repertoire_comparison.Rmd | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/assets/repertoire_comparison.Rmd b/assets/repertoire_comparison.Rmd index 8a181523..4ff18aae 100644 --- a/assets/repertoire_comparison.Rmd +++ b/assets/repertoire_comparison.Rmd @@ -202,17 +202,9 @@ p_ca <- ggplot(abund@abundance, aes(x = rank, y = p)) + scale_x_log10(limits = NULL, breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x))) + - scale_y_continuous(labels = scales::percent) #+ - #facet_grid(cols = vars(locus), rows = vars(subject_id), scales="free", drop = T) -p_ca + scale_y_continuous(labels = scales::percent) -# Try with plotly native -# abund@abundance %>% plot_ly( x=~rank, y=~p, -# type='scatter', color=~sample_id, mode='lines') %>% -# add_ribbons(ymin = ~lower, -# ymax = ~upper, -# line = list(color=~sample_id), -# fillcolor = ~sample_id) +p_ca ``` @@ -277,8 +269,8 @@ div_p <- ggplot(sample_div@diversity, aes(x = q, y = d, group=sample_id)) + fill = sample_id), alpha = 0.4) + geom_line(aes(color = sample_id)) + xlab("q") + ylab("Diversity(q)") + - ggtitle(sample_main) #+ - #facet_grid(cols=vars(locus), rows=vars(subject_id)) + ggtitle(sample_main) + div_p ``` ```{r plot_diversity, include = FALSE} @@ -308,7 +300,6 @@ g2 <- ggplot(family, aes(x=gene, y=clone_freq, fill=sample_id, group=sample_id)) theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) + ylab("Frequency") + xlab("") + - #facet_grid(cols=vars(locus), rows=vars(subject_id)) + theme(legend.position = "right") ggplotly(g2) @@ -337,9 +328,7 @@ g2 <- ggplot(family, aes(x=gene, y=clone_freq, fill=sample_id, group=sample_id)) ggtitle("V Gene Family Usage") + theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) + ylab("Frequency") + - xlab("") #+ - #facet_wrap(vars(subject_id,locus), scales="free_x", ncol = 1) + - #theme(legend.position = "none") + xlab("") ggplotly(g2) ggsave(filename = paste0(vfamily_dir, "/V_gene_distribution_by_clone_patient.pdf"), plot = g2, width = 20, height = 40, units = "cm") ggsave(filename = paste0(vfamily_dir, "/V_gene_distribution_by_clone_patient.png"), plot = g2, width = 20, height = 40, units = "cm") @@ -361,9 +350,7 @@ g2 <- ggplot(family, aes(x=gene, y=seq_freq, fill=sample_id, group=sample_id)) + ggtitle("V Gene Usage") + theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) + ylab("Frequency") + - xlab("") #+ - #facet_wrap(vars(subject_id,locus), scales="free_x", ncol = 1) + - #theme(legend.position = "none") + xlab("") ggplotly(g2) ggsave(filename = paste0(vfamily_dir, "/V_gene_distribution_by_sequence_patient.pdf"), plot = g2, width = 20, height = 40, units = "cm") ggsave(filename = paste0(vfamily_dir, "/V_gene_distribution_by_sequence_patient.png"), plot = g2, width = 20, height = 40, units = "cm") From 395952557e19ebdd68f750784ea78adbd59cacaf Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Wed, 24 May 2023 14:58:59 -0400 Subject: [PATCH 13/16] rm directcall igblast nasty workaround --- .github/workflows/ci.yml | 2 +- CHANGELOG.md | 7 ++++- conf/modules.config | 19 ------------ conf/test_igblast.config | 46 ---------------------------- nextflow.config | 2 -- nextflow_schema.json | 5 --- subworkflows/local/vdj_annotation.nf | 43 +++++++++----------------- 7 files changed, 22 insertions(+), 102 deletions(-) delete mode 100644 conf/test_igblast.config diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 29fa81bd..ddf7f1da 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -48,7 +48,7 @@ jobs: NXF_VER: - "22.10.1" - "latest-everything" - profile: ["test_tcr", "test_no_umi", "test_nocluster", "test_fetchimgt", "test_assembled", "test_igblast"] + profile: ["test_tcr", "test_no_umi", "test_nocluster", "test_fetchimgt", "test_assembled"] fail-fast: false steps: - name: Check out pipeline code diff --git a/CHANGELOG.md b/CHANGELOG.md index 4414dce7..e5d15661 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,15 +3,20 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## [3.0.1dev] - "Portus" Hotfix +## [3.1] - "Portus" Hotfix ### `Added` - [#250](https://github.com/nf-core/airrflow/pull/250) Back to `dev`. +- [#256](https://github.com/nf-core/airrflow/pull/256) Template update to nf-core/tools v2.8 + + ### `Fixed` - [#250](https://github.com/nf-core/airrflow/pull/250) Fixed log parsing with `removeprefix` instead of `lstrip`. +- [#258](https://github.com/nf-core/airrflow/pull/258) Fixes to plotly plots in report sometimes not rendering. +- [#258](https://github.com/nf-core/airrflow/pull/258) Remove direct call to Igblast in favor of a fix in ChangeoO. ## [3.0] - 2023-03-20 "Portus" diff --git a/conf/modules.config b/conf/modules.config index adf238b0..d16975a4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -299,15 +299,6 @@ process { ext.args = '--format blast' } - withName: IGBLAST_ASSIGNGENES { - publishDir = [ - path: { "${params.outdir}/vdj_annotation/01-assign-genes/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = "-domain_system imgt -ig_seqtype Ig -outfmt 19 -show_translation" - } - withName: CHANGEO_MAKEDB { publishDir = [ path: { "${params.outdir}/vdj_annotation/02-make-db/${meta.id}" }, @@ -450,16 +441,6 @@ process { ext.args = ['build':'igphyml'] } -// withName: CHANGEO_PARSEDB_SELECT { -// publishDir = [ -// path: { "${params.outdir}/changeo/04-parsedb-select/${meta.id}" }, -// mode: params.publish_dir_mode, -// saveAs: { filename -> filename.equals('versions.yml') ? null : filename } -// ] -// ext.args = '-f v_call j_call -u "IG[HLK]" --regex --logic all' -// ext.args2 = '-f v_call j_call -u "TR" --regex --logic all' -// } - // ------------------------------- // Reports // ------------------------------- diff --git a/conf/test_igblast.config b/conf/test_igblast.config deleted file mode 100644 index e1eb2082..00000000 --- a/conf/test_igblast.config +++ /dev/null @@ -1,46 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/airrflow -profile test, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Test igblast profile' - config_profile_description = 'Test pipeline with direct call to igblast.' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/Metadata_test_airr.tsv' - cprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/C_primers.fasta' - vprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/V_primers.fasta' - imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' - igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' - - mode = 'fastq' - - library_generation_method = 'specific_pcr_umi' - cprimer_position = 'R1' - umi_length = 8 - umi_start = 6 - umi_position = 'R1' - index_file = true - directcall_igblast = true -} - -process{ - withName:"DEFINE_CLONES*"{ - ext.args = ['outname':'', 'model':'hierarchical', - 'method':'nt', 'linkage':'single', - 'outputby':'sample_id', 'min_n':10] - } -} diff --git a/nextflow.config b/nextflow.config index 8c7744f7..ca67f548 100644 --- a/nextflow.config +++ b/nextflow.config @@ -63,8 +63,6 @@ params { igblast_base = null imgtdb_base = null save_databases = true - directcall_igblast = false - // ----------------------- // bulk filtering options diff --git a/nextflow_schema.json b/nextflow_schema.json index 49949544..8b6194e9 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -290,11 +290,6 @@ "description": "Path to the cached igblast database.", "help_text": "If it is not provided, the database will be newly downloaded.", "fa_icon": "fas fa-database" - }, - "directcall_igblast": { - "type": "boolean", - "fa_icon": "fas fa-align-center", - "description": "Use this flag for directly calling IgBlast for reference alignment, instead of using the `changeo assigngenes` and `changeo makedb` options that call IgBlast in the background. Allows for additional configuration of the IgBlast call." } }, "fa_icon": "fas fa-edit" diff --git a/subworkflows/local/vdj_annotation.nf b/subworkflows/local/vdj_annotation.nf index 8a68fb60..31ed0fd6 100644 --- a/subworkflows/local/vdj_annotation.nf +++ b/subworkflows/local/vdj_annotation.nf @@ -61,38 +61,25 @@ workflow VDJ_ANNOTATION { ch_imgt = FETCH_DATABASES.out.imgt ch_versions = ch_versions.mix(FETCH_DATABASES.out.versions.ifEmpty(null)) } - if (!params.directcall_igblast){ - CHANGEO_ASSIGNGENES ( - ch_fasta, - ch_igblast.collect() - ) - - ch_logs = ch_logs.mix(CHANGEO_ASSIGNGENES.out.logs) - ch_versions = ch_versions.mix(CHANGEO_ASSIGNGENES.out.versions.ifEmpty(null)) - CHANGEO_MAKEDB ( - CHANGEO_ASSIGNGENES.out.fasta, - CHANGEO_ASSIGNGENES.out.blast, - ch_imgt.collect() - ) - ch_logs = ch_logs.mix(CHANGEO_MAKEDB.out.logs) - ch_versions = ch_versions.mix(CHANGEO_MAKEDB.out.versions.ifEmpty(null)) + CHANGEO_ASSIGNGENES ( + ch_fasta, + ch_igblast.collect() + ) - ch_assigned_tab = CHANGEO_MAKEDB.out.tab - ch_assignment_logs = CHANGEO_MAKEDB.out.logs + ch_logs = ch_logs.mix(CHANGEO_ASSIGNGENES.out.logs) + ch_versions = ch_versions.mix(CHANGEO_ASSIGNGENES.out.versions.ifEmpty(null)) - } else { - IGBLAST_ASSIGNGENES( - ch_fasta, - ch_igblast.collect() - ) - - ch_assigned_tab = IGBLAST_ASSIGNGENES.out.tab + CHANGEO_MAKEDB ( + CHANGEO_ASSIGNGENES.out.fasta, + CHANGEO_ASSIGNGENES.out.blast, + ch_imgt.collect() + ) + ch_logs = ch_logs.mix(CHANGEO_MAKEDB.out.logs) + ch_versions = ch_versions.mix(CHANGEO_MAKEDB.out.versions.ifEmpty(null)) - ch_logs = ch_logs.mix(IGBLAST_ASSIGNGENES.out.logs) - ch_versions = ch_versions.mix(IGBLAST_ASSIGNGENES.out.versions.ifEmpty(null)) - ch_assignment_logs = IGBLAST_ASSIGNGENES.out.makedb_log - } + ch_assigned_tab = CHANGEO_MAKEDB.out.tab + ch_assignment_logs = CHANGEO_MAKEDB.out.logs // Apply quality filters: // - locus should match v_call chain From 0deb76c182594b826b9bf4cd23efb7d4010354b9 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Wed, 24 May 2023 15:04:53 -0400 Subject: [PATCH 14/16] fix prettier --- CITATIONS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CITATIONS.md b/CITATIONS.md index f0871b97..9f71f033 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -45,13 +45,13 @@ > Hoehn K, Van der Heiden J, Zhou J, Lunter G, Pybus O, Kleinstein S (2019). “Repertoire-wide phylogenetic models of B cell molecular evolution reveal evolutionary signatures of aging and vaccination.” PNAS. - [TIgGER](https://doi.org/10.1073/pnas.1417683112) + > Gadala-maria, D., Yaari, G., Uduman, M., & Kleinstein, S. H. (2015). Automated analysis of high-throughput B-cell sequencing data reveals a high frequency of novel immunoglobulin V gene segment alleles. Proceedings of the National Academy of Sciences, 112(8), 1–9. - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. - ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) From db1afb020b122db595ce037552c78322923f742a Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Wed, 24 May 2023 17:23:13 -0400 Subject: [PATCH 15/16] add check for whitespaces in columns --- bin/check_samplesheet.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 1c439211..41a0c1c9 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -7,6 +7,7 @@ import errno import argparse import pandas as pd +import re def parse_args(args=None): @@ -51,8 +52,8 @@ def check_samplesheet(file_in): sample_run_dict = {} with open(file_in, "r") as fin: ## Check that required columns are present - MIN_COLS = 7 - REQUIRED_COLUMNS = [ + min_cols = 7 + required_columns = [ "sample_id", "filename_R1", "filename_R2", @@ -65,12 +66,21 @@ def check_samplesheet(file_in): "biomaterial_provider", "age", ] + no_whitespaces = [ + "sample_id", + "filename_R1", + "filename_R2", + "subject_id", + "species", + "pcr_target_locus", + "tissue", + ] header = [x.strip('"') for x in fin.readline().strip().split("\t")] - for col in REQUIRED_COLUMNS: + for col in required_columns: if col not in header: print("ERROR: Please check samplesheet header: {} ".format(",".join(header))) print("Header is missing column {}".format(col)) - print("Header must contain columns {}".format("\t".join(REQUIRED_COLUMNS))) + print("Header must contain columns {}".format("\t".join(required_columns))) sys.exit(1) ## Check that rows have the same fields as header, and at least the compulsory ones are provided @@ -85,9 +95,9 @@ def check_samplesheet(file_in): line, ) num_cols = len([x for x in lspl if x]) - if num_cols < MIN_COLS: + if num_cols < min_cols: print_error( - "Invalid number of populated columns (should be {})!".format(MIN_COLS), + "Invalid number of populated columns (should be {})!".format(min_cols), "Line", line, ) @@ -119,6 +129,14 @@ def check_samplesheet(file_in): "The same subject_id cannot belong to different species! Check input file columns 'subject_id' and 'species'." ) + ## Check that values do not contain spaces in the no whitespaces columns + for col in no_whitespaces: + values = tab[col].tolist() + if any([re.search(r'\s+',s) for s in values]): + print_error( + "The column {} contains values with whitespaces. Please ensure that there are no tabs, spaces or any other whitespaces in these columns as well: {}".format(col,no_whitespaces) + ) + def main(args=None): args = parse_args(args) From 7ee7461e1b35f16c3ab7584d2bf706bef98da4ad Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Wed, 24 May 2023 17:31:12 -0400 Subject: [PATCH 16/16] fix linting update changelog --- CHANGELOG.md | 4 +++- bin/check_samplesheet.py | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e13b2ef..a0d8779e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - [#250](https://github.com/nf-core/airrflow/pull/250) Fixed log parsing with `removeprefix` instead of `lstrip`. - [#258](https://github.com/nf-core/airrflow/pull/258) Fixes to plotly plots in report sometimes not rendering. -- [#258](https://github.com/nf-core/airrflow/pull/258) Remove direct call to Igblast in favor of a fix in ChangeoO. +- [#258](https://github.com/nf-core/airrflow/pull/258) Remove direct call to Igblast in favor of a fix in ChangeO. +- [#258](https://github.com/nf-core/airrflow/pull/258) Added check for whitespaces in certain columns in samplesheet. +- [#258](https://github.com/nf-core/airrflow/pull/258) Added missing Immcantation references in Airrflow report. ## [3.0] - 2023-03-20 "Portus" diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 41a0c1c9..84e1087f 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -132,9 +132,11 @@ def check_samplesheet(file_in): ## Check that values do not contain spaces in the no whitespaces columns for col in no_whitespaces: values = tab[col].tolist() - if any([re.search(r'\s+',s) for s in values]): + if any([re.search(r"\s+", s) for s in values]): print_error( - "The column {} contains values with whitespaces. Please ensure that there are no tabs, spaces or any other whitespaces in these columns as well: {}".format(col,no_whitespaces) + "The column {} contains values with whitespaces. Please ensure that there are no tabs, spaces or any other whitespaces in these columns as well: {}".format( + col, no_whitespaces + ) )