diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index c58b4779..219d4e29 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -116,4 +116,3 @@ To get started:
Devcontainer specs:
- [DevContainer config](.devcontainer/devcontainer.json)
-- [Dockerfile](.devcontainer/Dockerfile)
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index 79bc7c1a..3bd77263 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -42,7 +42,7 @@ body:
attributes:
label: System information
description: |
- * Nextflow version _(eg. 22.10.1)_
+ * Nextflow version _(eg. 23.04.0)_
* Hardware _(eg. HPC, Desktop, Cloud)_
* Executor _(eg. slurm, local, awsbatch)_
* Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_
diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml
index 503a5925..eafcf567 100644
--- a/.github/workflows/awsfulltest.yml
+++ b/.github/workflows/awsfulltest.yml
@@ -14,18 +14,23 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Launch workflow via tower
- uses: seqeralabs/action-tower-launch@v1
+ uses: seqeralabs/action-tower-launch@v2
with:
workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }}
access_token: ${{ secrets.TOWER_ACCESS_TOKEN }}
compute_env: ${{ secrets.TOWER_COMPUTE_ENV }}
+ revision: ${{ github.sha }}
workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/ampliseq/work-${{ github.sha }}
parameters: |
{
+ "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}",
"outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/ampliseq/results-${{ github.sha }}"
}
- profiles: test_full,aws_tower
+ profiles: test_full
+
- uses: actions/upload-artifact@v3
with:
name: Tower debug log file
- path: tower_action_*.log
+ path: |
+ tower_action_*.log
+ tower_action_*.json
diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml
index d42ecdfd..7a4f39de 100644
--- a/.github/workflows/awstest.yml
+++ b/.github/workflows/awstest.yml
@@ -12,18 +12,22 @@ jobs:
steps:
# Launch workflow using Tower CLI tool action
- name: Launch workflow via tower
- uses: seqeralabs/action-tower-launch@v1
+ uses: seqeralabs/action-tower-launch@v2
with:
workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }}
access_token: ${{ secrets.TOWER_ACCESS_TOKEN }}
compute_env: ${{ secrets.TOWER_COMPUTE_ENV }}
+ revision: ${{ github.sha }}
workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/ampliseq/work-${{ github.sha }}
parameters: |
{
"outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/ampliseq/results-test-${{ github.sha }}"
}
- profiles: test,aws_tower
+ profiles: test
+
- uses: actions/upload-artifact@v3
with:
name: Tower debug log file
- path: tower_action_*.log
+ path: |
+ tower_action_*.log
+ tower_action_*.json
diff --git a/.gitpod.yml b/.gitpod.yml
index 85d95ecc..25488dcc 100644
--- a/.gitpod.yml
+++ b/.gitpod.yml
@@ -1,4 +1,9 @@
image: nfcore/gitpod:latest
+tasks:
+ - name: Update Nextflow and setup pre-commit
+ command: |
+ pre-commit install --install-hooks
+ nextflow self-update
vscode:
extensions: # based on nf-core.nf-core-extensionpack
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c52dae40..cd2ca8b0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,11 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Added`
+- [#558](https://github.com/nf-core/ampliseq/pull/558) - Pipeline summary report
+
### `Changed`
### `Fixed`
- [#605](https://github.com/nf-core/ampliseq/pull/605) - Make `--sbdiexport` compatible with PR2 version 5.0.0
+- [#614](https://github.com/nf-core/ampliseq/pull/614) - Template update for nf-core/tools version 2.9
### `Dependencies`
diff --git a/CITATIONS.md b/CITATIONS.md
index e488e7bd..873bc5d8 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -18,6 +18,8 @@
- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
+ > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/.
+
- [Cutadapt](https://journal.embnet.org/index.php/embnetjournal/article/view/200/479)
> Marcel, M. Cutadapt removes adapter sequences from high-throughput sequencing reads. EMBnet. journal 17.1 (2011): pp-10. doi: 10.14806/ej.17.1.200.
@@ -109,6 +111,10 @@
> Jari Oksanen, F. Guillaume Blanchet, Michael Friendly, Roeland Kindt, Pierre Legendre, Dan McGlinn, Peter R. Minchin, R. B. O’Hara, Gavin L. Simpson, Peter Solymos, M. Henry H. Stevens, Eduard Szoecs, and Helene Wagner. vegan: Community Ecology Package. 2018. R package version 2.5-3.
+- [Phyloseq](https://doi.org/10.1371/journal.pone.0061217)
+
+ > McMurdie PJ, Holmes S (2013). “phyloseq: An R package for reproducible interactive analysis and graphics of microbiome census data.” PLoS ONE, 8(4), e61217.
+
### Non-default tools
- [ITSx](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12073)
@@ -142,6 +148,7 @@
### Summarizing software
- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
+
> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
## Data
@@ -165,5 +172,8 @@
- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241)
+ > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241.
+
- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/)
+
> Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675.
diff --git a/README.md b/README.md
index 56e499a3..e6b84050 100644
--- a/README.md
+++ b/README.md
@@ -40,7 +40,9 @@ By default, the pipeline currently performs the following:
- Taxonomical classification using DADA2, [SINTAX](https://doi.org/10.1101/074161) or [QIIME2](https://www.nature.com/articles/s41587-019-0209-9)
- Excludes unwanted taxa, produces absolute and relative feature/taxa count tables and plots, plots alpha rarefaction curves, computes alpha and beta diversity indices and plots thereof ([QIIME2](https://www.nature.com/articles/s41587-019-0209-9))
- Calls differentially abundant taxa ([ANCOM](https://www.ncbi.nlm.nih.gov/pubmed/26028277))
-- Overall pipeline run summaries ([MultiQC](https://multiqc.info/))
+- Creates phyloseq R objects ([Phyloseq](https://www.bioconductor.org/packages/release/bioc/html/phyloseq.html))
+- Pipeline QC summaries ([MultiQC](https://multiqc.info/))
+- Pipeline summary report ([R Markdown](https://github.com/rstudio/rmarkdown))
## Usage
@@ -72,11 +74,11 @@ nextflow run nf-core/ampliseq \
> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;
> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).
-For more details, please refer to the [usage documentation](https://nf-co.re/ampliseq/usage) and the [parameter documentation](https://nf-co.re/ampliseq/parameters).
+For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/ampliseq/usage) and the [parameter documentation](https://nf-co.re/ampliseq/parameters).
## Pipeline output
-To see the the results of a test run with a full size dataset refer to the [results](https://nf-co.re/ampliseq/results) tab on the nf-core website pipeline page.
+To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/ampliseq/results) tab on the nf-core website pipeline page.
For more details about the output files and reports, please refer to the
[output documentation](https://nf-co.re/ampliseq/output).
diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml
index cef829de..625b2446 100644
--- a/assets/methods_description_template.yml
+++ b/assets/methods_description_template.yml
@@ -3,17 +3,21 @@ description: "Suggested text and references to use when describing pipeline usag
section_name: "nf-core/ampliseq Methods Description"
section_href: "https://github.com/nf-core/ampliseq"
plot_type: "html"
+## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline
## You inject any metadata in the Nextflow '${workflow}' object
data: |
Data was processed using nf-core/ampliseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).
+ Data was processed using nf-core/ampliseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.
Notes:
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 3452ef88..1502d618 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -1,7 +1,7 @@
report_comment: >
- This report has been generated by the
nf-core/ampliseq
+ This report has been generated by the
nf-core/ampliseq
analysis pipeline. For information about how to interpret these results, please see the
-
documentation.
+
documentation.
report_section_order:
"nf-core-ampliseq-methods-description":
order: -1000
diff --git a/assets/nf-core-ampliseq_logo_light.png b/assets/nf-core-ampliseq_logo_light.png
index dea08d56..58f01531 100644
Binary files a/assets/nf-core-ampliseq_logo_light.png and b/assets/nf-core-ampliseq_logo_light.png differ
diff --git a/assets/nf-core-ampliseq_logo_light_long.png b/assets/nf-core-ampliseq_logo_light_long.png
new file mode 100644
index 00000000..8aac12e2
Binary files /dev/null and b/assets/nf-core-ampliseq_logo_light_long.png differ
diff --git a/assets/nf-core_style.css b/assets/nf-core_style.css
new file mode 100644
index 00000000..0195a723
--- /dev/null
+++ b/assets/nf-core_style.css
@@ -0,0 +1,70 @@
+body {
+ font-family: Calibri, helvetica, sans-serif;
+}
+
+h1 {
+ color: rgb(36, 176, 100);
+ font-size: 200%;
+}
+
+h2 {
+ color: rgb(36, 176, 100);
+ font-size: 150%;
+}
+
+h3 {
+ font-size: 100%;
+ font-weight: bold;
+}
+
+h3.subtitle {
+ font-size: 120%;
+ color: rgb(0, 0, 0);
+ font-weight: bold;
+}
+
+h4 {
+ font-size: 100%;
+ font-weight: bold;
+ font-style: italic;
+}
+
+.watermark {
+ opacity: 0.1;
+ position: fixed;
+ top: 50%;
+ left: 50%;
+ font-size: 500%;
+ color: #24b064;
+}
+
+.list-group-item.active {
+ z-index: 2;
+ color: #fff;
+ background-color: #24b064;
+ border-color: #24b064;
+}
+.list-group-item.active:hover {
+ z-index: 2;
+ color: #fff;
+ background-color: #24b064;
+ border-color: #24b064;
+}
+
+#TOC {
+ background-size: contain;
+ padding-top: 60px !important;
+ background-repeat: no-repeat;
+}
+
+.nav-pills > li.active > a,
+.nav-pills > li.active > a:hover,
+.nav-pills > li.active > a:focus {
+ color: #fff;
+ background-color: #24b064;
+}
+
+a {
+ color: #24b064;
+ text-decoration: none;
+}
diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd
new file mode 100644
index 00000000..a14cdc6c
--- /dev/null
+++ b/assets/report_template.Rmd
@@ -0,0 +1,1451 @@
+---
+output:
+ html_document:
+ toc: true # table of contents
+ toc_float: true # float the table of contents to the left of the main document content
+ toc_depth: 3 # header levels 1,2,3
+ theme: default
+ number_sections: true # add section numbering to headers
+ df_print: paged # tables are printed as an html table with support for pagination over rows and columns
+ highlight: pygments
+ pdf_document: true
+#bibliography: ./references.bibtex
+params:
+ # any parameter that is by default "FALSE" is used to evaluate the inclusion of a codeblock with e.g. "eval=!isFALSE(params$mqc_plot)"
+
+ # report style
+ css: NULL
+ report_logo: NULL
+ report_title: "Summary of analysis results"
+ report_abstract: FALSE
+
+ # pipeline versions
+ workflow_manifest_version: NULL
+ workflow_scriptid: NULL
+
+ # flags and arguments
+ flag_retain_untrimmed: FALSE
+ flag_ref_tax_user: FALSE
+ flag_single_end: FALSE
+ barplot: FALSE
+ abundance_tables: FALSE
+ alpha_rarefaction: FALSE
+ ancom: FALSE
+ trunclenf: ""
+ trunclenr: ""
+ max_ee: ""
+ trunc_qmin: FALSE
+ trunc_rmin: ""
+ dada_sample_inference: ""
+ filter_ssu: FALSE
+ min_len_asv: ""
+ max_len_asv: ""
+ cut_its: FALSE
+ dada2_ref_tax_title: FALSE
+ qiime2_ref_tax_title: FALSE
+ sintax_ref_tax_title: FALSE
+ dada2_ref_tax_file: ""
+ qiime2_ref_tax_file: ""
+ sintax_ref_tax_file: ""
+ dada2_ref_tax_citation: ""
+ qiime2_ref_tax_citation: ""
+ sintax_ref_tax_citation: ""
+ exclude_taxa: ""
+ min_frequency: ""
+ min_samples: ""
+ qiime2_filtertaxa: ""
+ val_used_taxonomy: FALSE
+ metadata_category_barplot: FALSE
+ qiime_adonis_formula: FALSE
+
+ # file paths
+ metadata: FALSE
+ samplesheet: FALSE
+ fasta: FALSE
+ input: FALSE
+ mqc_plot: FALSE
+ cutadapt_summary: FALSE
+ dada_filtntrim_args: FALSE
+ dada_qc_f_path: FALSE
+ dada_qc_r_path: ""
+ dada_pp_qc_f_path: ""
+ dada_pp_qc_r_path: ""
+ dada_err_path: FALSE
+ dada_err_run: ""
+ asv_table_path: FALSE
+ path_asv_fa: FALSE
+ path_dada2_tab: FALSE
+ dada_stats_path: FALSE
+ path_barrnap_sum: FALSE
+ filter_ssu_stats: ""
+ filter_ssu_asv: ""
+ filter_len_asv: FALSE
+ filter_len_asv_len_orig: FALSE
+ filter_codons: FALSE
+ stop_codons: ""
+ itsx_cutasv_summary: ""
+ cut_dada_ref_taxonomy: FALSE
+ dada2_taxonomy: FALSE
+ sintax_taxonomy: FALSE
+ pplace_taxonomy: FALSE
+ pplace_heattree: ""
+ qiime2_taxonomy: FALSE
+ filter_stats_tsv: FALSE
+ diversity_indices_depth: ""
+ diversity_indices_beta: FALSE
+ diversity_indices_adonis: ""
+ picrust_pathways: FALSE
+---
+
+
+
+```{r libraries, include=FALSE}
+library("dplyr")
+library("ggplot2")
+library("knitr")
+library("DT")
+library("formattable")
+library("purrr")
+```
+
+
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = FALSE) # echo is set in differentialabundance v1.2.0 to TRUE
+```
+
+
+
+```{r, echo=FALSE}
+htmltools::includeCSS(params$css)
+```
+
+```{r results="asis", echo=FALSE}
+cat(paste0("
+
+"))
+```
+
+
+
+```{r}
+if ( endsWith( params$workflow_manifest_version, "dev") ) {
+ ampliseq_version = paste0("version ", params$workflow_manifest_version, ", revision ", params$workflow_scriptid)
+} else {
+ ampliseq_version = paste0("version ",params$workflow_manifest_version)
+}
+report_title <- params$report_title
+report_subtitle <- paste0('nf-core/ampliseq workflow ', ampliseq_version)
+```
+
+---
+title: "
`r report_title`"
+subtitle: `r report_subtitle`
+date: '`r format(Sys.Date(), "%B %d, %Y")`'
+---
+
+---
+
+
+
+```{r, results='asis'}
+if ( !isFALSE(params$report_abstract) ) {
+ report_abstract <- paste(readLines(params$report_abstract), collapse="\n")
+ cat(report_abstract)
+} else {
+ # with tab indentation, the following will be a code block!
+ cat(paste0("
+# Abstract
+
+The bioinformatics analysis pipeline [nfcore/ampliseq](https://nf-co.re/ampliseq) is used for amplicon sequencing,
+supporting denoising of any amplicon and supports a variety of taxonomic databases for taxonomic assignment of 16S, ITS, CO1 and 18S amplicons.
+ "))
+}
+```
+
+
+
+```{r, results='asis'}
+if ( !isFALSE(params$metadata) ) {
+ cat(paste0("
+# Data input and Metadata
+
+Pipeline input was saved to the [input](../input) directory.
+ "))
+} else {
+ cat(paste0("
+# Data input
+
+Pipeline input was saved in folder [input](../input).
+ "))
+}
+
+if ( !isFALSE(params$samplesheet) ) {
+ # samplesheet input
+ cat("\nSequencing data was provided in the samplesheet file `", params$samplesheet, "` that is displayed below:", sep="")
+
+ samplesheet <- read.table(file = params$samplesheet, header = TRUE, sep = "\t")
+ # Display table
+ datatable(samplesheet, options = list(
+ scrollX = TRUE,
+ scrollY = "300px",
+ paging = FALSE))
+} else if ( !isFALSE(params$fasta) ) {
+ # fasta input
+ cat("\nASV/OTU sequences were provided in the fasta file `", params$fasta, "`. ", sep="")
+} else if ( !isFALSE(params$input) ) {
+ # folder input
+ cat("\nSequencing data was retrieved from folder `", params$fasta, "`. ", sep="")
+}
+if ( !isFALSE(params$metadata) ) {
+ cat("\nMetadata associated with the sequencing data was provided in `", params$metadata, "` and is displayed below:", sep="")
+
+ metadata <- read.table(file = params$metadata, header = TRUE, sep = "\t")
+ # Display table
+ datatable(metadata, options = list(
+ scrollX = TRUE,
+ scrollY = "300px",
+ paging = FALSE))
+}
+```
+
+
+
+```{r, eval = !isFALSE(params$mqc_plot) || !isFALSE(params$dada_filtntrim_args), results='asis'}
+cat("# Preprocessing\n")
+```
+
+
+
+```{r, eval = !isFALSE(params$mqc_plot), results='asis'}
+cat(paste0("
+## FastQC
+
+[FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads.
+It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C),
+adapter contamination and overrepresented sequences. The sequence quality was checked using FastQC and resulting data was
+aggregated using the FastQC module of [MultiQC](https://multiqc.info/). For more quality controls and per sample quality checks you can check the full
+MultiQC report, which can be found in [multiqc/multiqc_report.html](../multiqc/multiqc_report.html).
+"))
+```
+
+```{r, eval = !isFALSE(params$mqc_plot), out.width='100%', dpi=1200, fig.align='center'}
+knitr::include_graphics(params$mqc_plot)
+```
+
+
+
+```{r, eval = !isFALSE(params$cutadapt_summary), results='asis'}
+cat(paste0("
+## Primer removal with Cutadapt
+
+[Cutadapt](https://journal.embnet.org/index.php/embnetjournal/article/view/200) is trimming primer sequences from sequencing reads.
+Primer sequences are non-biological sequences that often introduce point mutations that do not reflect sample sequences. This is especially
+true for degenerated PCR primer. If primer trimming were to be omitted, artifactual amplicon sequence variants might be computed by
+the denoising tool or sequences might be lost due to being labelled as PCR chimera.
+"))
+
+# import tsv
+cutadapt_summary <- read.table(file = params$cutadapt_summary, header = TRUE, sep = "\t")
+
+cutadapt_passed_col <- as.numeric(substr(
+ cutadapt_summary$cutadapt_passing_filters_percent, 1, 4))
+
+cutadapt_max_discarded <- round( 100 - min(cutadapt_passed_col), 1 )
+cutadapt_avg_passed <- round(mean(cutadapt_passed_col),1)
+
+cutadapt_text_unch <- "Primers were trimmed using cutadapt"
+cutadapt_text_ch <- paste0(" and all untrimmed sequences were discarded. ",
+ "Sequences that did not contain primer sequences were considered artifacts. Less than ",
+ cutadapt_max_discarded, "% of the sequences were discarded per sample and a mean of ",
+ cutadapt_avg_passed, "% of the sequences per sample passed the filtering. ")
+
+if ( isFALSE(params$flag_retain_untrimmed) ) cutadapt_text <- paste0(
+ cutadapt_text_unch, cutadapt_text_ch
+ ) else cutadapt_text <- paste0(cutadapt_text_unch, ". ")
+
+cat(cutadapt_text)
+cat("Cutadapt results can be found in folder [cutadapt](../cutadapt).")
+
+# shorten header by "cutadapt_" to optimize visualisation
+colnames(cutadapt_summary) <- gsub("cutadapt_","",colnames(cutadapt_summary))
+
+datatable(cutadapt_summary, options = list(
+ scrollX = TRUE,
+ scrollY = "300px",
+ paging = FALSE))
+```
+
+
+
+```{r, eval = !isFALSE(params$dada_filtntrim_args), results='asis'}
+cat(paste0("
+## Quality filtering using DADA2
+
+Additional quality filtering can improve sequence recovery.
+Often it is advised trimming the last few nucleotides to avoid less well-controlled errors that can arise there.
+"))
+
+if (params$trunc_qmin) {
+ f_and_tr_args <- readLines(params$dada_filtntrim_args)
+ trunc_len <- strsplit(gsub(".*truncLen = c\\((.+)\\),maxN.*", "\\1",
+ f_and_tr_args), ", ")
+ tr_len_f <- trunc_len[[1]][1]
+ tr_len_r <- trunc_len[[1]][2]
+ cat("Reads were trimmed to a specific length and the length cutoff was ",
+ "automatically determined by the median quality of all input reads. ",
+ "Reads were trimmed before median quality drops ",
+ "below ", params$trunc_qmin, " and at least ",params$trunc_rmin*100,
+ "% of reads are retained, resulting in a trim of ",
+ "forward reads at ", tr_len_f, " bp and reverse ",
+ "reads at ", tr_len_r, " bp, reads shorter than this were discarded. ", sep = "")
+} else if (params$trunclenf == "null" && params$trunclenr == "null") {
+ cat("Reads were not trimmed. ")
+} else if (params$trunclenf != 0 && params$trunclenr != 0) {
+ cat("Forward reads were trimmed at ", params$trunclenf,
+ " bp and reverse reads were trimmed at ", params$trunclenr,
+ " bp, reads shorter than this were discarded. ", sep = "")
+} else if (params$trunclenf != 0) {
+ cat("Forward reads were trimmed at ", params$trunclenf," bp, reads shorter than this were discarded. ", sep = "")
+} else if (params$trunclenr != 0) {
+ cat("Reverse reads were trimmed at ", params$trunclenr," bp, reads shorter than this were discarded. ", sep = "")
+}
+cat("Reads with more than", params$max_ee,"expected errors were discarded.",
+ "Read counts passing the filter are shown in section ['Read counts per sample'](#read-counts-per-sample)",
+ "column 'filtered'.", sep = " ")
+```
+
+
+
+```{r, eval = !isFALSE(params$dada_qc_f_path), results='asis'}
+cat ("**Quality profiles:**\n\n")
+
+if (params$flag_single_end) {
+ cat("Read quality stats for incoming data:")
+} else {
+ cat("Forward (left) and reverse (right) read quality stats for incoming data:")
+}
+```
+
+```{r, eval = !isFALSE(params$dada_qc_f_path), out.width="49%", fig.show='hold', fig.align='default'}
+if (params$flag_single_end) {
+ knitr::include_graphics(params$dada_qc_f_path)
+} else {
+ knitr::include_graphics(c(params$dada_qc_f_path, params$dada_qc_r_path))
+}
+```
+
+```{r, eval = !isFALSE(params$dada_qc_f_path), results='asis'}
+if (params$flag_single_end) {
+ cat("Read quality stats for preprocessed data:")
+} else {
+ cat("Forward (left) and reverse (right) read quality stats for preprocessed data:")
+}
+```
+
+```{r, eval = !isFALSE(params$dada_qc_f_path), out.width="49%", fig.show='hold', fig.align='default'}
+if (params$flag_single_end) {
+ knitr::include_graphics(params$dada_pp_qc_f_path)
+} else {
+ knitr::include_graphics(c(params$dada_pp_qc_f_path, params$dada_pp_qc_r_path))
+}
+```
+
+```{r, eval = !isFALSE(params$dada_qc_f_path), results='asis'}
+cat(paste0("
+Overall read quality profiles are displayed as heat map of the frequency of each quality score at each base position.
+The mean quality score at each position is shown by the green line, and the quartiles of the quality score
+distribution by the orange lines. The red line shows the scaled proportion of reads that extend to at least
+that position. Original plots can be found [folder dada2/QC/](../dada2/QC/) with names that end in `_qual_stats.pdf`.
+"))
+```
+
+
+
+```{r, eval = !isFALSE(params$dada_err_path) || !isFALSE(params$dada_stats_path) || !isFALSE(params$asv_table_path), results='asis'}
+cat(paste0("
+# ASV inference using DADA2
+
+[DADA2](https://doi.org/10.1038/nmeth.3869) performs fast and accurate sample inference from amplicon data with single-nucleotide
+resolution. It infers exact amplicon sequence variants (ASVs) from amplicon data with fewer false positives than many other
+methods while maintaining high sensitivity.
+
+DADA2 reduces sequence errors and dereplicates sequences by quality filtering, denoising,
+read pair merging (for paired end Illumina reads only) and PCR chimera removal.
+"))
+```
+
+
+
+```{r, eval = !isFALSE(params$dada_err_path), results='asis'}
+cat(paste0("
+## Error correction
+
+Read error correction was performed using estimated error rates, visualized below.
+"))
+
+# check if single run or multirun
+flag_multirun = length ( unlist( strsplit( params$dada_err_run,"," ) ) ) != 1
+
+if ( flag_multirun && params$flag_single_end ) {
+ # single end multi run
+ cat("Error rates were estimated for each sequencing run separately. ",
+ "Each 4x4 figure represents one run, in the sequence ", params$dada_err_run,".")
+} else if ( flag_multirun && !params$flag_single_end ) {
+ # paired end multi run
+ cat("Error rates were estimated for each sequencing run separately. ",
+ "Each row represents one run, in the sequence ", params$dada_err_run,".",
+ "For each row, the error rates for forward reads are at the left side and reverse reads are at the right side.")
+} else if ( !flag_multirun && !params$flag_single_end ) {
+ # paired end single run
+ cat("Error rates for forward reads are at the left side and reverse reads are at the right side.")
+}
+```
+
+```{r, eval = !isFALSE(params$dada_err_path), out.width="49%", fig.show='hold', fig.align='default'}
+dada_err_path <- unlist( strsplit( params$dada_err_path,"," ) )
+knitr::include_graphics(dada_err_path)
+```
+
+```{r, eval = !isFALSE(params$dada_err_path), results='asis'}
+cat(paste0("
+Estimated error rates are displayed for each possible transition. The black line shows the estimated error rates after
+convergence of the machine-learning algorithm. The red line shows the error rates expected under the nominal
+definition of the Q-score. The estimated error rates (black line) should be a good fit to the observed rates
+(points), and the error rates should drop with increased quality. Original plots can be found in
+[folder dada2/QC/](../dada2/QC/) with names that end in `.err.pdf`.
+"))
+```
+
+
+
+```{r, eval = !isFALSE(params$dada_stats_path), results='asis'}
+cat(paste0("
+## Read counts per sample
+
+Tracking read numbers through DADA2 processing steps for each sample. The following table shows the read numbers after each processing stage.
+"))
+
+if ( params$flag_single_end ) {
+ cat("Processing stages are: input - reads into DADA2, filtered - reads passed quality filtering, ",
+ "denoised - reads after denoising, nonchim - reads in non-chimeric sequences (final ASVs).")
+} else {
+ cat("Processing stages are: input - read pairs into DADA2, filtered - read pairs passed quality filtering, ",
+ "denoisedF - forward reads after denoising, denoisedR - reverse reads after denoising, ",
+ "merged - successfully merged read pairs, nonchim - read pairs in non-chimeric sequences (final ASVs).")
+}
+
+# import stats tsv
+dada_stats <- read.table(file = params$dada_stats_path, header = TRUE, sep = "\t")
+
+# Display table
+datatable(dada_stats, options = list(
+ scrollX = TRUE,
+ scrollY = "300px",
+ paging = FALSE))
+
+cat(paste0("
+Samples with unusual low reads numbers relative to the number of expected ASVs
+should be treated cautiously, because the abundance estimate will be very granular
+and might vary strongly between (theoretical) replicates due to high impact of stochasticity.
+
+Following, the numbers of the table above are shown in stacked barcharts as percentage of DADA2 input reads.
+"))
+
+# Stacked barchart to num of reads
+
+# Calc exluded asvs and transform all cols to percent
+
+if ( params$flag_single_end ) {
+ # single end
+ cat("Stacked barcharts of read numbers per sample and processing stage")
+
+ dada_stats_ex <- data.frame(sample = dada_stats$sample,
+ input = dada_stats$DADA2_input,
+ filtered = dada_stats$DADA2_input-dada_stats$filtered,
+ denoised = dada_stats$filtered-dada_stats$denoised,
+ nonchim = dada_stats$denoised-dada_stats$nonchim,
+ analysis = dada_stats$nonchim)
+ dada_stats_p <- data.frame(sample = dada_stats_ex$sample, round(dada_stats_ex[2:6]/dada_stats_ex$input*100, 2))
+ dada_stats_p_analysis_average <- round(sum(dada_stats_p$analysis)/length(dada_stats_p$analysis), 1)
+ # If more than 20 sample only display subset!
+ if ( nrow(dada_stats_p)>=20 ) {
+ cat(" (display 10 samples of each lowest and highest percentage of reads analysed, of",nrow(dada_stats_p),"samples)")
+ dada_stats_p <- dada_stats_p[order(-dada_stats_p$analysis),]
+ dada_stats_p <- rbind(head(dada_stats_p,10),tail(dada_stats_p,10))
+ }
+ # Stack columns for both stacked barcharts
+ n_samples <- length(dada_stats_p$sample)
+ samples_t <- c(rep(dada_stats_p$sample, 4))
+ steps_t <- c(rep("excluded by filtering", n_samples), rep("excluded by denoised", n_samples),
+ rep("excluded by nonchim", n_samples), rep("reads in final ASVs", n_samples))
+ # stack the column for percentage of asvs
+ asvs_p_t <- as.array(flatten_dbl(dada_stats_p[3:6]))
+ dada_stats_p_t <- data.frame(samples_t, steps_t, asvs_p_t)
+} else {
+ # paired end
+ cat("Stacked barchart of read pair numbers (denoisedF & denoisedR halfed, because each pair is split) per sample and processing stage")
+
+ dada_stats_ex <- data.frame(sample = dada_stats$sample,
+ DADA2_input = dada_stats$DADA2_input,
+ filtered = dada_stats$DADA2_input-dada_stats$filtered,
+ denoisedF = (dada_stats$filtered-dada_stats$denoisedF)/2,
+ denoisedR = (dada_stats$filtered-dada_stats$denoisedR)/2,
+ merged = (dada_stats$denoisedF+dada_stats$denoisedR)/2-dada_stats$merged,
+ nonchim = dada_stats$merged-dada_stats$nonchim,
+ analysis = dada_stats$nonchim)
+ dada_stats_p <- data.frame(sample = dada_stats_ex$sample, round(dada_stats_ex[2:8]/dada_stats_ex$DADA2_input*100, 2))
+ dada_stats_p_analysis_average <- round(sum(dada_stats_p$analysis)/length(dada_stats_p$analysis), 1)
+ # If more than 20 sample only display subset!
+ if ( nrow(dada_stats_p)>=20 ) {
+ cat(" (display 10 samples of each lowest and highest percentage of reads analysed, of",nrow(dada_stats_p),"samples)")
+ dada_stats_p <- dada_stats_p[order(-dada_stats_p$analysis),]
+ dada_stats_p <- rbind(head(dada_stats_p,10),tail(dada_stats_p,10))
+ }
+ # Stack columns for both stacked barcharts
+ n_samples <- length(dada_stats_p$sample)
+ samples_t <- c(rep(dada_stats_p$sample, 6))
+ steps_t <- c(rep("excluded by filtering", n_samples), rep("excluded by denoisedF", n_samples),
+ rep("excluded by denoisedR", n_samples), rep("excluded by merged", n_samples),
+ rep("excluded by nonchim", n_samples), rep("reads in final ASVs", n_samples))
+ # stack the column for percentage of asvs
+ asvs_p_t <- as.array(flatten_dbl(dada_stats_p[3:8]))
+ dada_stats_p_t <- data.frame(samples_t, steps_t, asvs_p_t)
+}
+cat(":\n\n")
+
+# Plot
+dada_stats_p_t$steps_t <- factor(dada_stats_p_t$steps_t, levels=unique(dada_stats_p_t$steps_t))
+dada_stats_p_t$samples_t <- factor(dada_stats_p_t$samples_t, levels=dada_stats_p_t[order(dada_stats_p$analysis),"samples_t"])
+
+plot_dada_stats_p_t <- ggplot(dada_stats_p_t, aes(fill = steps_t, y = asvs_p_t, x = samples_t)) +
+ geom_bar(position = "fill", stat = "identity") +
+ xlab("Samples") +
+ ylab("Fraction of total reads") +
+ coord_flip() +
+ scale_fill_brewer("Filtering Steps", palette = "Spectral")
+plot_dada_stats_p_t
+
+svg("stacked_barchart_of_reads.svg")
+plot_dada_stats_p_t
+invisible(dev.off())
+
+cat(paste0("
+
+Between ",min(dada_stats_p$analysis),"% and ",max(dada_stats_p$analysis),"% reads per sample (average ",dada_stats_p_analysis_average,"%)
+were retained for analysis within DADA2 steps.
+
+The proportion of lost reads per processing stage and sample should not be too high, totalling typically <50%.
+Samples that are very different in lost reads (per stage) to the majority of samples must be compared with caution, because an unusual problem
+(e.g. during nucleotide extraction, library preparation, or sequencing) could have occurred that might add bias to the analysis.
+"))
+```
+
+
+
+```{r, eval = !isFALSE(params$asv_table_path), results='asis'}
+cat("## Inferred ASVs\n\n")
+
+#import asv table
+asv_table <- read.table(file = params$asv_table_path, header = TRUE, sep = "\t")
+n_asv <- length(asv_table$ASV_ID)
+
+# Output text
+cat("Finally,", n_asv,
+ "amplicon sequence variants (ASVs) were obtained across all samples. ")
+cat("The ASVs can be found in [`dada2/ASV_seqs.fasta`](../dada2/). And the corresponding",
+ " quantification of the ASVs across samples is in",
+ "[`dada2/ASV_table.tsv`](../dada2/). An extensive table containing both was ",
+ "saved as [`dada2/DADA2_table.tsv`](../dada2/). ")
+if ( params$dada_sample_inference == "independent" ) {
+ cat("ASVs were inferred for each sample independently.")
+} else if ( params$dada_sample_inference == "pooled" ) {
+ cat("ASVs were inferred from pooled sample information.")
+} else {
+ cat("ASVs were initally inferred for each sample independently, but re-examined with all samples (pseudo-pooled).")
+}
+```
+
+```{r, results='asis'}
+flag_any_filtering <- !isFALSE(params$path_barrnap_sum) || !isFALSE(params$filter_len_asv) || !isFALSE(params$filter_codons)
+```
+
+
+
+```{r, eval = flag_any_filtering, results='asis'}
+cat("# Filtering of ASVs\n")
+```
+
+
+
+```{r, eval = !isFALSE(params$path_barrnap_sum), results='asis'}
+cat("## rRNA detection\n")
+cat("[Barrnap](https://github.com/tseemann/barrnap) classifies the ASVs into the origin domain (including mitochondrial origin).\n\n", sep = "")
+
+# Read the barrnap files and count the lines
+barrnap_sum = read.table( params$path_barrnap_sum, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
+# keep only ASV_ID & eval columns & sort
+barrnap_sum <- subset(barrnap_sum, select = c(ASV_ID,mito_eval,euk_eval,arc_eval,bac_eval))
+# choose kingdom (column) with lowest evalue
+barrnap_sum[is.na(barrnap_sum)] <- 1
+barrnap_sum$result = colnames(barrnap_sum[,2:5])[apply(barrnap_sum[,2:5],1,which.min)]
+barrnap_sum$result = gsub("_eval", "", barrnap_sum$result)
+
+#import asv table
+asv_table <- readLines(params$path_asv_fa)
+n_asv <- sum(grepl("^>", asv_table))
+
+# calculate numbers
+n_classified <- length(barrnap_sum$result)
+n_bac <- sum(grepl("bac", barrnap_sum$result))
+n_arc <- sum(grepl("arc", barrnap_sum$result))
+n_mito <- sum(grepl("mito", barrnap_sum$result))
+n_euk <- sum(grepl("euk", barrnap_sum$result))
+
+barrnap_df_sum <- data.frame(label=c('Bacteria','Archaea','Mitochondria','Eukaryotes','Unclassified'),
+ count=c(n_bac,n_arc,n_mito,n_euk,n_asv - n_classified),
+ percent=c(round( (n_bac/n_asv)*100, 2), round( (n_arc/n_asv)*100, 2), round( (n_mito/n_asv)*100, 2), round( (n_euk/n_asv)*100, 2), round( ( (n_asv - n_classified) /n_asv)*100, 2) ) )
+
+# Build outputtext
+cat( "Barrnap classified ")
+cat( barrnap_df_sum$count[1], "(", barrnap_df_sum$percent[1],"%) ASVs as most similar to Bacteria, " )
+cat( barrnap_df_sum$count[2], "(", barrnap_df_sum$percent[2],"%) ASVs to Archea, " )
+cat( barrnap_df_sum$count[3], "(", barrnap_df_sum$percent[3],"%) ASVs to Mitochondria, " )
+cat( barrnap_df_sum$count[4], "(", barrnap_df_sum$percent[4],"%) ASVs to Eukaryotes, and " )
+cat( barrnap_df_sum$count[5], "(", barrnap_df_sum$percent[5],"%) were below similarity threshold to any kingdom." )
+
+# Barplot
+plot_barrnap_df_sum <- ggplot(barrnap_df_sum,
+ aes(x = reorder(label, desc(label)), y = percent)) +
+ geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) +
+ ylab("% Classification") +
+ xlab("rRNA origins") +
+ coord_flip() +
+ theme_bw() +
+ ylim(0, 100)
+plot_barrnap_df_sum
+
+svg("rrna_detection_with_barrnap.svg")
+plot_barrnap_df_sum
+invisible(dev.off())
+
+cat("\n\nrRNA classification results can be found in folder [barrnap](../barrnap).")
+```
+
+
+
+```{r, eval = !isFALSE(params$path_barrnap_sum) && !isFALSE(params$filter_ssu), results='asis'}
+cat("\n\nASVs were filtered for `",params$filter_ssu,"` (`bac`: Bacteria, `arc`: Archaea, `mito`: Mitochondria, `euk`: Eukaryotes)
+ using the above classification. The following table shows read counts for each sample before and after filtering:\n\n", sep = "")
+
+# Read the barrnap stats file
+filter_ssu_stats = read.table( params$filter_ssu_stats, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
+# shorten header by "ssufilter_" to optimize visualisation
+colnames(filter_ssu_stats) <- gsub("ssufilter_","",colnames(filter_ssu_stats))
+filter_ssu_stats <- subset(filter_ssu_stats, select = c(sample,input,output))
+filter_ssu_stats$'retained%' <- round( filter_ssu_stats$output / filter_ssu_stats$input *100, 2)
+filter_ssu_stats_avg_removed <- 100-sum(filter_ssu_stats$'retained%')/length(filter_ssu_stats$'retained%')
+filter_ssu_stats_max_removed <- 100-min(filter_ssu_stats$'retained%')
+
+# Display table
+datatable(filter_ssu_stats, options = list(
+ scrollX = TRUE,
+ scrollY = "300px",
+ paging = FALSE))
+
+# Read the barrnap asv file
+filter_ssu_asv <- read.table( params$filter_ssu_asv, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
+filter_ssu_asv_filtered <- nrow(filter_ssu_asv)
+
+cat("In average", round(filter_ssu_stats_avg_removed,2), "% reads were removed, but at most",filter_ssu_stats_max_removed,"% reads per sample. ")
+# "n_asv" is taken from the barrnap block above
+cat("The number of ASVs was reduced by",n_asv-filter_ssu_asv_filtered,"(",100-round( filter_ssu_asv_filtered/n_asv*100 ,2),"%), from",n_asv,"to",filter_ssu_asv_filtered," ASVs.")
+```
+
+
+
+```{r, eval = !isFALSE(params$filter_len_asv_len_orig), results='asis'}
+cat(paste0("
+## Sequence length
+
+A length filter was used to reduce potential contamination.
+Before filtering, ASVs had the following length profile (count of 1 was transformed to 1.5 to allow plotting on log10 scale):
+
+"))
+
+# ASV length profile
+
+# import length profile tsv
+filter_len_profile <- read.table(file = params$filter_len_asv_len_orig, header = TRUE, sep = "\t")
+
+# find number of ASVs filtered
+filter_len_asv_filtered <- filter_len_profile
+if ( params$min_len_asv != 0 ) {
+ filter_len_asv_filtered <- subset(filter_len_asv_filtered, Length >= params$min_len_asv)
+}
+if ( params$max_len_asv != 0 ) {
+ filter_len_asv_filtered <- subset(filter_len_asv_filtered, Length <= params$max_len_asv)
+}
+
+# replace 1 with 1.5 to display on log scale
+filter_len_profile$Counts[filter_len_profile$Counts == 1] <- 1.5
+
+plot_filter_len_profile <- ggplot(filter_len_profile,
+ aes(x = Length, y = Counts)) +
+ geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) +
+ ylab("Number of ASVs") +
+ xlab("Length") +
+ scale_y_continuous(trans = "log10") +
+ theme_bw()
+plot_filter_len_profile
+
+svg("asv_length_profile_before_length_filter.svg")
+plot_filter_len_profile
+invisible(dev.off())
+
+cat("\n\n")
+if ( params$min_len_asv != 0 && params$max_len_asv != 0 ) {
+ cat("Filtering omitted all ASVs with length lower than",params$min_len_asv,"or above",params$max_len_asv,"bp. ")
+} else if ( params$min_len_asv != 0 ) {
+ cat("Filtering omitted all ASVs with length lower than",params$min_len_asv,"bp. ")
+} else if ( params$max_len_asv != 0 ) {
+ cat("Filtering omitted all ASVs with length above",params$max_len_asv,"bp. ")
+}
+```
+
+```{r, eval = !isFALSE(params$filter_len_asv), results='asis'}
+# import stats tsv
+filter_len_stats <- read.table(file = params$filter_len_asv, header = TRUE, sep = "\t")
+# only if file not empty continue with reporting below
+flag_filter_len_stats <- nrow(filter_len_stats) > 0
+```
+
+```{r, eval = !isFALSE(params$filter_len_asv) && flag_filter_len_stats, results='asis'}
+# Reads removed
+
+# re-name & re-order columns
+colnames(filter_len_stats) <- gsub("lenfilter_","",colnames(filter_len_stats))
+filter_len_stats <- filter_len_stats[, c("sample", "input", "output")]
+filter_len_stats$'retained%' <- round( filter_len_stats$output / filter_len_stats$input * 100 , 2)
+filter_len_stats_avg_removed <- 100-sum(filter_len_stats$'retained%')/length(filter_len_stats$'retained%')
+filter_len_stats_max_removed <- 100-min(filter_len_stats$'retained%')
+
+cat("The following table shows read counts for each sample before and after filtering:")
+
+# Display table
+datatable(filter_len_stats, options = list(
+ scrollX = TRUE,
+ scrollY = "300px",
+ paging = FALSE))
+
+cat("In average", filter_len_stats_avg_removed, "% reads were removed, but at most",filter_len_stats_max_removed,"% reads per sample.")
+```
+
+```{r, eval = !isFALSE(params$filter_len_asv_len_orig), results='asis'}
+cat("The number of ASVs was reduced by",sum(filter_len_profile$Counts)-sum(filter_len_asv_filtered$Counts),"(",100-round( sum(filter_len_asv_filtered$Counts)/sum(filter_len_profile$Counts)*100 ,2),"%), from",sum(filter_len_profile$Counts),"to",sum(filter_len_asv_filtered$Counts)," ASVs.")
+cat("\n\nLength filter results can be found in folder [asv_length_filter](../asv_length_filter).")
+```
+
+
+
+```{r, eval = !isFALSE(params$filter_codons), results='asis'}
+cat(paste0("
+## Codon usage
+
+Amplicons of coding regions are expected to be free of stop codons and consist of condon tripletts.
+ASVs were filtered against the presence of stop codons (",params$stop_codons,") in the specified open reading frame of the ASV.
+Additionally, ASVs that are not multiple of 3 in length were omitted.
+
+"))
+
+# import stats tsv
+filter_codons_stats <- read.table(file = params$filter_codons, header = TRUE, sep = "\t")
+
+cat("The following table shows read counts for each sample after filtering:")
+
+# Display table
+datatable(filter_codons_stats, options = list(
+ scrollX = TRUE,
+ scrollY = "300px",
+ paging = FALSE))
+
+#TODO: add ASV count after filtering
+
+cat("\n\nCodon usage filter results can be found in folder [codon_filter](../codon_filter).")
+```
+
+
+
+```{r, results='asis'}
+# Check if any taxonomic classification is available
+any_taxonomy <- !isFALSE(params$dada2_taxonomy) || !isFALSE(params$qiime2_taxonomy) || !isFALSE(params$sintax_taxonomy) || !isFALSE(params$pplace_taxonomy)
+```
+
+```{r, eval = any_taxonomy, results='asis'}
+# Header if any taxonomic classification is available
+cat("# Taxonomic Classification\n")
+```
+
+
+
+```{r, eval = !isFALSE(params$cut_its), results='asis'}
+cat(paste0("
+## ITS regions
+
+The ",params$cut_its," region was extracted from each ASV sequence using [ITSx](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12073).
+Taxonomic classification should have improved performance based on extracted ITS sequence. ITSx results can be found in folder [itsx](../itsx).
+
+Taxonomies per extracted region was then transferred back to the full ASV sequence. No filtering was done based on whether the region was found or not.
+Those taxonomic classifications per ASV can be found in files `ASV_tax.tsv` and `ASV_tax_species.tsv` in folder [dada2/](../dada2/).
+
+However, the files `ASV_ITS_tax.tsv` and `ASV_ITS_tax_species.tsv` in folder [dada2/](../dada2/) contain only the chosen ITS part of just the ASVs where the region was found.
+Of course, different ASVs may contain identical ",params$cut_its," regions, leading to identical taxonomy assignments,
+but the full ASVs were recorded as separate entries anyway to retain maximum resolution at this stage.
+"))
+
+# Read ITSX summary
+itsx_summary <- readLines(params$itsx_cutasv_summary)
+
+origins = FALSE
+itsx_origins <- data.frame(origin=character(), count=numeric(), stringsAsFactors=FALSE)
+for (line in itsx_summary){
+ # get basic statistic
+ if (grepl("Number of sequences in input file:", line)) {
+ itsx_summary_nasv <- as.numeric( sub("Number of sequences in input file: *\t*", "", line) )
+ }
+ if (grepl("Sequences detected as ITS by ITSx:", line)) {
+ itsx_summary_its <- as.numeric( sub("Sequences detected as ITS by ITSx: *\t*", "", line) )
+ }
+ # get preliminar origins
+ if (grepl("----------------------------", line)) {
+ origins = FALSE
+ }
+ if (isTRUE(origins)) {
+ add <- data.frame(origin=sub(":.*", "", line), count=as.numeric( sub(".*: *\t*", "", line) ) )
+ itsx_origins <- rbind(itsx_origins, add)
+ }
+ if (grepl("ITS sequences by preliminary origin:", line)) {
+ origins = TRUE
+ }
+}
+itsx_origins$percent <- round( itsx_origins$count / itsx_summary_nasv * 100, 2)
+
+cat(itsx_summary_its, "of",itsx_summary_nasv,"(",round( itsx_summary_its/itsx_summary_nasv*100 ,2),"%) ASVs were identified as ITS.",
+ "The following plot shows ITS sequences by preliminary origin:")
+
+plot_itsx_origins <- ggplot(itsx_origins,
+ aes(x = origin, y = percent)) +
+ geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) +
+ ylab("%") +
+ xlab("ITS sequences by preliminary origin") +
+ coord_flip() +
+ theme_bw()
+plot_itsx_origins
+
+svg("itsx_preliminary_origin.svg")
+plot_itsx_origins
+invisible(dev.off())
+```
+
+
+
+```{r, eval = !isFALSE(params$dada2_taxonomy), results='asis'}
+cat("## DADA2\n")
+
+# indicate reference taxonomy
+if (!params$flag_ref_tax_user) {
+ cat("The taxonomic classification was performed by [DADA2](https://pubmed.ncbi.nlm.nih.gov/27214047/)
+ using the database: `", params$dada2_ref_tax_title, "`.
+ More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "")
+} else {
+ cat("The taxonomic classification was performed by DADA2 using a custom database ",
+ "provided by the user.\n\n", sep = "")
+}
+
+# mention if taxonomy was cut by cutadapt
+if ( !isFALSE(params$cut_dada_ref_taxonomy) ) {
+ cut_dada_ref_taxonomy <- readLines(params$cut_dada_ref_taxonomy)
+ for (line in cut_dada_ref_taxonomy){
+ if (grepl("Total reads processed:", line)) {
+ cut_dada_ref_taxonomy_orig <- sub("Total reads processed: *\t*", "", line)
+ }
+ if (grepl("Reads written \\(passing filters\\):", line)) {
+ cut_dada_ref_taxonomy_filt <- sub("Reads written .passing filters.: *\t*", "", line)
+ }
+ if (grepl("Total basepairs processed:", line)) {
+ cut_dada_ref_taxonomy_orig_bp <- sub("Total basepairs processed: *\t*", "", line)
+ }
+ if (grepl("Total written \\(filtered\\):", line)) {
+ cut_dada_ref_taxonomy_filt_bp <- sub("Total written \\(filtered\\): *\t*", "", line)
+ }
+ }
+
+ cat("The taxonomic reference database was cut by primer sequences to improve matching.
+ The original database had ",cut_dada_ref_taxonomy_orig," sequences with ",cut_dada_ref_taxonomy_orig_bp,
+ ", retained were ",cut_dada_ref_taxonomy_filt," sequences that represented ",cut_dada_ref_taxonomy_filt_bp,".\n\n",
+ sep = "")
+}
+
+# make statistics of taxonomic classification
+asv_tax <- read.table(params$dada2_taxonomy, header = TRUE, sep = "\t")
+
+# Calculate the classified numbers/percent of asv
+level <- subset(asv_tax, select = -c(ASV_ID,confidence,sequence))
+level <- colnames(level)
+
+# Catch 100% highest taxa (e.g. Kingdom) assignment
+if (count(asv_tax, level[1])$n[1] == nrow(asv_tax)){
+ n_1 = 0
+} else {
+ n_1 = count(asv_tax, level[1])$n[1]
+}
+n_asv_tax = nrow(asv_tax)
+n_asv_unclassified <- c(n_1)
+for (x in level[2:length(level)]) {
+ asv_tax_subset <- subset(asv_tax, select = x)
+ colnames(asv_tax_subset)[1] <- "count_this"
+ n_asv_unclassified <- c(n_asv_unclassified, count(asv_tax_subset, count_this)$n[1])
+}
+
+n_asv_classified <- n_asv_tax - n_asv_unclassified
+p_asv_classified <- round(n_asv_classified / n_asv_tax * 100, 2)
+
+asv_classi_df <- data.frame(level, n_asv_classified, p_asv_classified)
+
+# Build output string
+outputstr <- "DADA2 classified "
+for (row in seq_len(nrow(asv_classi_df))) {
+ outputstr <- paste0(outputstr, asv_classi_df[row, ]$p_asv_classified,
+ " % ASVs at ", asv_classi_df[row, ]$level, " level, ")
+}
+outputstr <- substr(outputstr, 1, nchar(outputstr)-2)
+outputstr <- paste0(outputstr, ".\n\n")
+
+# Output Text Classifications
+cat(outputstr)
+
+# Barplot
+# Plot
+asv_classi_df$level <- factor(asv_classi_df$level, levels = asv_classi_df$level)
+plot_asv_classi_df <- ggplot(asv_classi_df,
+ aes(x = reorder(level, desc(level)), y = p_asv_classified)) +
+ geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) +
+ ylab("% Classification") +
+ xlab("Levels") +
+ coord_flip() +
+ theme_bw()
+plot_asv_classi_df
+
+svg("dada2_taxonomic_classification_per_taxonomy_level.svg")
+plot_asv_classi_df
+invisible(dev.off())
+
+cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in files `ASV_tax_*.tsv`.")
+```
+
+
+
+```{r, eval = !isFALSE(params$qiime2_taxonomy), results='asis'}
+# Header
+cat("## QIIME2\n")
+
+cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9)
+ using the database: `", params$qiime2_ref_tax_title, "`.
+ More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "")
+
+# Read file and prepare table
+asv_tax <- read.table(params$qiime2_taxonomy, header = TRUE, sep = "\t")
+#asv_tax <- data.frame(do.call('rbind', strsplit(as.character(asv_tax$Taxon),'; ',fixed=TRUE)))
+asv_tax <- subset(asv_tax, select = Taxon)
+
+# Remove greengenes85 ".__" placeholders
+df = as.data.frame(lapply(asv_tax, function(x) gsub(".__", "", x)))
+# remove all last, empty ;
+df = as.data.frame(lapply(df, function(x) gsub(" ;","",x)))
+# remove last remaining, empty ;
+df = as.data.frame(lapply(df, function(x) gsub("; $","",x)))
+
+# get maximum amount of taxa levels per ASV
+max_taxa <- lengths(regmatches(df$Taxon, gregexpr("; ", df$Taxon)))+1
+
+# Currently, all QIIME2 databases seem to have the same levels!
+level <- c("Kingdom","Phylum","Class","Order","Family","Genus","Species")
+
+# Calculate the classified numbers/percent of asv
+n_asv_tax = nrow(asv_tax)
+
+n_asv_classified <- length(which(max_taxa>=1))
+for (x in 2:length(level)) {
+ n_asv_classified <- c(n_asv_classified, length(which(max_taxa>=x)) )
+}
+p_asv_classified <- round(n_asv_classified / n_asv_tax * 100, 2)
+
+asv_classi_df <- data.frame(level, n_asv_classified, p_asv_classified)
+
+# Build output string
+outputstr <- "QIIME2 classified "
+for (row in seq_len(nrow(asv_classi_df))) {
+ outputstr <- paste0(outputstr, asv_classi_df[row, ]$p_asv_classified,
+ " % ASVs at ", asv_classi_df[row, ]$level, " level, ")
+}
+outputstr <- substr(outputstr, 1, nchar(outputstr)-2)
+outputstr <- paste0(outputstr, ".\n\n")
+
+# Output Text Classifications
+cat(outputstr)
+
+# Barplot
+# Plot
+asv_classi_df$level <- factor(asv_classi_df$level, levels = asv_classi_df$level)
+plot_asv_classi_df <- ggplot(asv_classi_df,
+ aes(x = reorder(level, desc(level)), y = p_asv_classified)) +
+ geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) +
+ ylab("% Classification") +
+ xlab("Levels") +
+ coord_flip() +
+ theme_bw()
+plot_asv_classi_df
+
+svg("qiime2_taxonomic_classification_per_taxonomy_level.svg")
+plot_asv_classi_df
+invisible(dev.off())
+
+cat("\n\nQIIME2 taxonomy assignments can be found in folder [qiime2/taxonomy](../qiime2/taxonomy).")
+```
+
+
+
+```{r, eval = !isFALSE(params$sintax_taxonomy), results='asis'}
+# Header
+cat("## SINTAX\n")
+
+cat("The taxonomic classification was performed by [SINTAX](https://doi.org/10.1101/074161)
+ using the database: `", params$sintax_ref_tax_title, "`.
+ More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "")
+
+asv_tax <- read.table(params$sintax_taxonomy, header = TRUE, sep = "\t")
+
+# Calculate the classified numbers/percent of asv
+level <- subset(asv_tax, select = -c(ASV_ID,confidence,sequence))
+level <- colnames(level)
+
+# Catch 100% highest taxa (e.g. Kingdom) assignment
+if (count(asv_tax, level[1])$n[1] == nrow(asv_tax)){
+ n_1 = nrow(asv_tax)
+} else {
+ n_1 = count(asv_tax, level[1])$n[1]
+}
+n_asv_tax = nrow(asv_tax)
+n_asv_unclassified <- c(n_1)
+for (x in level[2:length(level)]) {
+ asv_tax_subset <- subset(asv_tax, select = x)
+ colnames(asv_tax_subset)[1] <- "count_this"
+ n_asv_unclassified <- c(n_asv_unclassified, count(asv_tax_subset, count_this)$n[1])
+}
+
+n_asv_classified <- n_asv_tax - n_asv_unclassified
+p_asv_classified <- round(n_asv_classified / n_asv_tax * 100, 2)
+
+asv_classi_df <- data.frame(level, n_asv_classified, p_asv_classified)
+
+# Build output string
+outputstr <- "SINTAX classified "
+for (row in seq_len(nrow(asv_classi_df))) {
+ outputstr <- paste0(outputstr, asv_classi_df[row, ]$p_asv_classified,
+ " % ASVs at ", asv_classi_df[row, ]$level, " level, ")
+}
+outputstr <- substr(outputstr, 1, nchar(outputstr)-2)
+outputstr <- paste0(outputstr, ".\n\n")
+
+# Output Text Classifications
+cat(outputstr)
+
+# Barplot
+# Plot
+asv_classi_df$level <- factor(asv_classi_df$level, levels = asv_classi_df$level)
+plot_asv_classi_df <- ggplot(asv_classi_df,
+ aes(x = reorder(level, desc(level)), y = p_asv_classified)) +
+ geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) +
+ ylab("% Classification") +
+ xlab("Levels") +
+ coord_flip() +
+ theme_bw()
+plot_asv_classi_df
+
+svg("sintax_taxonomic_classification_per_taxonomy_level.svg")
+plot_asv_classi_df
+invisible(dev.off())
+
+cat("\n\nSINTAX taxonomy assignments can be found in folder [sintax](../sintax).")
+```
+
+
+
+```{r, eval = !isFALSE(params$pplace_taxonomy), results='asis'}
+cat(paste0("
+## Phylogenetic Placement
+
+Phylogenetic placement grafts sequences onto a phylogenetic reference tree and optionally outputs taxonomic annotations.
+The reference tree is ideally made from full-length high-quality sequences containing better evolutionary signal than short amplicons.
+It is hence superior to estimating de-novo phylogenetic trees from short amplicon sequences.
+Extraction of taxonomic classification was performed with [EPA-NG](https://github.com/Pbdas/epa-ng) and [Gappa](https://pubmed.ncbi.nlm.nih.gov/32016344/).
+"))
+
+# Read file and prepare table
+asv_tax <- read.table(params$pplace_taxonomy, header = TRUE, sep = "\t")
+
+# get maximum amount of taxa levels per ASV
+max_taxa <- lengths(regmatches(asv_tax$taxonomy, gregexpr(";", asv_tax$taxonomy)))+1
+
+# labels for levels
+level <- rep(1:max(max_taxa))
+
+# Calculate the classified numbers/percent of asv
+n_asv_tax = nrow(asv_tax)
+
+n_asv_classified <- length(which(max_taxa>=1))
+for (x in 2:length(level)) {
+ n_asv_classified <- c(n_asv_classified, length(which(max_taxa>=x)) )
+}
+p_asv_classified <- round(n_asv_classified / n_asv_tax * 100, 2)
+
+asv_classi_df <- data.frame(level, n_asv_classified, p_asv_classified)
+
+# Build output string
+outputstr <- "Phylogenetic Placement classified "
+for (row in seq_len(nrow(asv_classi_df))) {
+ outputstr <- paste0(outputstr, asv_classi_df[row, ]$p_asv_classified,
+ " % ASVs at taxonomic level ", asv_classi_df[row, ]$level, ", ")
+}
+outputstr <- substr(outputstr, 1, nchar(outputstr)-2)
+outputstr <- paste0(outputstr, ".\n\n")
+
+# Output Text Classifications
+cat(outputstr)
+
+# Barplot
+# Plot
+asv_classi_df$level <- factor(asv_classi_df$level, levels = asv_classi_df$level)
+plot_asv_classi_df <- ggplot(asv_classi_df,
+ aes(x = reorder(level, desc(level)), y = p_asv_classified)) +
+ geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) +
+ ylab("% Classification") +
+ xlab("Taxonomic levels") +
+ coord_flip() +
+ theme_bw()
+plot_asv_classi_df
+
+svg("phylogenetic_placement_taxonomic_classification_per_taxonomy_level.svg")
+plot_asv_classi_df
+invisible(dev.off())
+
+cat("\n\nHeattree of the phylogenetic placement:")
+```
+
+```{r, eval = !isFALSE(params$pplace_taxonomy), out.width="100%", fig.show='hold', fig.align='default'}
+knitr::include_graphics(c(params$pplace_heattree))
+```
+
+```{r, eval = !isFALSE(params$pplace_taxonomy), results='asis'}
+cat("\n\nPhylogenetic placement taxonomy assignments can be found in folder [pplace](../pplace) in file `*.taxonomy.per_query_unique.tsv`.")
+```
+
+
+
+```{r, eval = !isFALSE(params$val_used_taxonomy), results='asis'}
+# Header
+cat("# Downstream analysis with QIIME2\n",
+ "Files that were input to [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) can be found in folder [qiime2/input/](../qiime2/input/).",
+ "Results of taxonomic classification of",params$val_used_taxonomy,"was used in all following analysis, see in the above sections.")
+```
+
+
+
+```{r, eval = !isFALSE(params$filter_stats_tsv), results='asis'}
+cat(paste0("
+## ASV filtering
+
+Unwanted taxa are often off-targets generated in PCR with primers that are not perfectly specific for the target DNA.
+For 16S rRNA sequencing mitrochondria and chloroplast sequences are typically removed because these are frequent unwanted non-bacteria PCR products.
+"))
+
+if ( params$exclude_taxa != "none" ) {
+ cat("ASVs were removed when the taxonomic string contained any of `", params$exclude_taxa, "` (comma separated)", sep="")
+}
+if ( params$min_frequency != 1 ) {
+ cat(", had fewer than", params$min_frequency ,"total read counts over all sample")
+}
+if ( params$min_samples != 1 ) {
+ cat(", and that were present in fewer than", params$min_samples ,"samples")
+}
+cat(". ")
+
+qiime2_filtertaxa <- unlist( strsplit( params$qiime2_filtertaxa, "," ) )
+qiime2_filtertaxa_orig <- as.numeric( qiime2_filtertaxa[1] ) -1
+qiime2_filtertaxa_filt <- as.numeric( qiime2_filtertaxa[2] ) -2
+qiime2_filtertaxa_rm <- qiime2_filtertaxa_orig-qiime2_filtertaxa_filt
+qiime2_filtertaxa_rm_percent <- round( qiime2_filtertaxa_rm/qiime2_filtertaxa_orig*100 ,2)
+
+cat("Consequently,",qiime2_filtertaxa_orig,"ASVs were reduced by",qiime2_filtertaxa_rm,"(",qiime2_filtertaxa_rm_percent,"%) to",qiime2_filtertaxa_filt,".",
+ "The following table shows read counts for each sample before and after filtering:")
+
+# import stats tsv
+filter_stats_tsv <- read.table(file = params$filter_stats_tsv, header = TRUE, sep = "\t")
+colnames(filter_stats_tsv) <- gsub("_tax_filter","",colnames(filter_stats_tsv))
+filter_stats_tsv$retained_percent <- round( filter_stats_tsv$retained_percent, 2)
+filter_stats_tsv$lost_percent <- round( filter_stats_tsv$lost_percent, 2)
+colnames(filter_stats_tsv) <- gsub("_percent","%",colnames(filter_stats_tsv))
+
+# Display table
+datatable(filter_stats_tsv, options = list(
+ scrollX = TRUE,
+ scrollY = "300px",
+ paging = FALSE))
+
+cat("\n\nTables with read count numbers and filtered abundance tables are in folder [qiime2/abundance_tables](../qiime2/abundance_tables).")
+```
+
+
+
+```{r, eval = !isFALSE(params$abundance_tables), results='asis'}
+cat(paste0("
+## Abundance tables
+
+The abundance tables are the final data for further downstream analysis and visualisations.
+The tables are based on the computed ASVs and taxonomic classification, but after removal of unwanted taxa.
+Folder [qiime2/abundance_tables](../qiime2/abundance_tables) contains tap-separated files (.tsv)
+that can be opened by any spreadsheet software.
+
+## Relative abundance tables
+
+Absolute abundance tables produced by the previous steps contain count data, but the compositional
+nature of 16S rRNA amplicon sequencing requires sequencing depth normalisation. This step computes
+relative abundance tables using TSS (Total Sum Scaling normalisation) for various taxonomic levels
+and detailed tables for all ASVs with taxonomic classification, sequence and relative abundance for
+each sample. Typically used for in depth investigation of taxa abundances.
+Folder [qiime2/rel_abundance_tables](../qiime2/rel_abundance_tables) contains tap-separated files (.tsv)
+that can be opened by any spreadsheet software.
+"))
+```
+
+
+
+```{r, eval = !isFALSE(params$barplot), results='asis'}
+cat(paste0("
+## Barplot
+
+Interactive abundance plot that aids exploratory browsing the discovered taxa and their abundance
+in samples and allows sorting for associated meta data. Folder [qiime2/barplot](../qiime2/barplot)
+contains barplots, click [qiime2/barplot/index.html](../qiime2/barplot/index.html) to open it in
+your web browser.
+"))
+```
+
+```{r, eval = !isFALSE(params$metadata_category_barplot), results='asis'}
+cat(paste0("
+Additionally, barplots with average relative abundance values were produced
+for `",params$metadata_category_barplot,"` (comma separated if several) in [qiime2/barplot_average](../qiime2/barplot_average)
+in separate folders following the scheme `barplot_{treatment}`:
+"))
+
+metadata_category_barplot <- sort( unlist( strsplit( params$metadata_category_barplot,"," ) ) )
+for (category in metadata_category_barplot) {
+ barplot_folder_path <- paste0("qiime2/barplot_average/barplot_",category)
+ cat("\n- [",barplot_folder_path,"/index.html](../",barplot_folder_path,"/index.html)\n", sep="")
+}
+```
+
+
+
+```{r, eval = !isFALSE(params$alpha_rarefaction), results='asis'}
+cat(paste0("
+## Alpha diversity rarefaction curves
+
+Produces rarefaction plots for several alpha diversity indices, and is primarily used to determine if the
+richness of the samples has been fully observed or sequenced. If the slope of the curves does not level
+out and the lines do not become horizontal, this might be because the sequencing depth was too low to observe
+all diversity or that sequencing error artificially increases sequence diversity and causes false discoveries.
+
+Folder [qiime2/alpha-rarefaction](../qiime2/alpha-rarefaction) contains the data, click
+[qiime2/alpha-rarefaction/index.html](../qiime2/alpha-rarefaction/index.html) to open it in your web browser.
+"))
+```
+
+
+
+```{r, eval = !isFALSE(params$diversity_indices_beta), results='asis'}
+diversity_indices_depth <- readLines(params$diversity_indices_depth)
+
+cat(paste0("
+## Diversity analysis
+
+Diversity measures summarize important sample features (alpha diversity) or differences between samples (beta diversity).
+Diversity calculations are based on sub-sampled data rarefied to ",diversity_indices_depth, " counts.
+
+### Alpha diversity indices
+
+Alpha diversity measures the species diversity within samples.
+"))
+
+if ( params$dada_sample_inference == "independent") {
+ cat("Please note that ASVs were inferred for each sample independently, that can make alpha diversity indices a poor estimate of true diversity. ")
+}
+
+cat(paste0("
+This step calculates alpha diversity using various methods and performs pairwise comparisons of groups of samples. It is based on a phylogenetic tree of all ASV sequences.
+Folder [qiime2/diversity/alpha_diversity](../qiime2/diversity/alpha_diversity) contains the alpha-diversity data:
+
+- Shannon’s diversity index (quantitative): [qiime2/diversity/alpha_diversity/shannon_vector/index.html](../qiime2/diversity/alpha_diversity/shannon_vector/index.html)
+- Pielou’s Evenness: [qiime2/diversity/alpha_diversity/evenness_vector/index.html](../qiime2/diversity/alpha_diversity/evenness_vector/index.html)
+- Faith’s Phylogenetic Diversity (qualitiative, phylogenetic) [qiime2/diversity/alpha_diversity/faith_pd_vector/index.html](../qiime2/diversity/alpha_diversity/faith_pd_vector/index.html)
+- Observed OTUs (qualitative): [qiime2/diversity/alpha_diversity/observed_otus_vector/index.html](../qiime2/diversity/alpha_diversity/observed_otus_vector/index.html)
+
+### Beta diversity indices
+
+Beta diversity measures the species community differences between samples. This step calculates beta diversity distances using
+various methods and performs pairwise comparisons of groups of samples. Additionally, principle coordinates analysis (PCoA)
+plots are produced that can be visualized with Emperor in your default browser without the need for installation.
+These calculations are based on a phylogenetic tree of all ASV sequences.
+Folder [qiime2/diversity/beta_diversity](../qiime2/diversity/beta_diversity) contains the beta-diverity data:
+
+#### PCoA for four different beta diversity distances are accessible via:
+
+- Bray-Curtis distance (quantitative): [qiime2/diversity/beta_diversity/bray_curtis_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/bray_curtis_pcoa_results-PCoA/index.html)
+- Jaccard distance (qualitative): [qiime2/diversity/beta_diversity/jaccard_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/jaccard_pcoa_results-PCoA/index.html)
+- unweighted UniFrac distance (qualitative, phylogenetic) [qiime2/diversity/beta_diversity/unweighted_unifrac_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/unweighted_unifrac_pcoa_results-PCoA/index.html)
+- weighted UniFrac distance (quantitative, phylogenetic): [qiime2/diversity/beta_diversity/weighted_unifrac_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/weighted_unifrac_pcoa_results-PCoA/index.html)
+
+#### Pairwise comparisons between groups of samples
+
+Statistics on differences between specific metadata groups that can be found in folder
+[qiime2/diversity/beta_diversity/](../qiime2/diversity/beta_diversity/). Each significance test
+result is in its separate folder following the scheme `{method}_distance_matrix-{treatment}`:
+"))
+
+diversity_indices_beta <- sort( unlist( strsplit( params$diversity_indices_beta,"," ) ) )
+for (folder in diversity_indices_beta) {
+ beta_folder_path <- paste0("qiime2/diversity/",folder) #"beta_diversity/" is defined in input section with "stageAs: 'beta_diversity/*'"
+ cat("\n- [",beta_folder_path,"/index.html](../",beta_folder_path,"/index.html)\n", sep="")
+}
+```
+
+```{r, eval = !isFALSE(params$qiime_adonis_formula), results='asis'}
+cat(paste0("
+#### ADONIS test
+
+Permutational multivariate analysis of variance using distance matrices
+[adonis](https://doi.org/10.1111/j.1442-9993.2001.01070.pp.x) (in [VEGAN](https://CRAN.R-project.org/package=vegan))
+determines whether groups of samples are significantly different from one another.
+The formula was `",params$qiime_adonis_formula,"` (multiple formulas are comma separated).
+adonis computes an R2 value (effect size) which shows the percentage of variation explained
+by a condition, as well as a p-value to determine the statistical significance.
+The sequence of conditions in the formula matters, the variance of factors is removed
+(statistically controlled for) from beginning to end of the formula.
+
+Test results are in separate folders following the scheme `{method}_distance_matrix-{adonis formula}`:
+"))
+
+diversity_indices_adonis <- sort( unlist( strsplit( params$diversity_indices_adonis,"," ) ) )
+for (folder in diversity_indices_adonis) {
+ adonis_index_path <- paste0("qiime2/diversity/",folder) #"beta_diversity/" is defined in input section with "stageAs: 'beta_diversity/adonis/*'"
+ cat("\n- [",adonis_index_path,"/index.html](../",adonis_index_path,"/index.html)\n", sep="")
+}
+```
+
+
+
+```{r, eval = !isFALSE(params$ancom), results='asis'}
+cat(paste0("
+## ANCOM
+
+[Analysis of Composition of Microbiomes (ANCOM)](https://www.ncbi.nlm.nih.gov/pubmed/26028277)
+is applied to identify features that are differentially
+abundant across sample groups. A key assumption made by ANCOM is that few taxa (less than about 25%)
+will be differentially abundant between groups otherwise the method will be inaccurate.
+Comparisons between groups of samples is performed for specific metadata that can be found in folder
+[qiime2/ancom/](../qiime2/ancom/).
+
+Test results are in separate folders following the scheme `Category-{treatment}-{taxonomic level}`:
+"))
+
+ancom <- sort( unlist( strsplit( params$ancom,"," ) ) )
+for (folder in ancom) {
+ ancom_path <- paste0("qiime2/ancom/",folder)
+ cat("\n- [",ancom_path,"/index.html](../",ancom_path,"/index.html)\n", sep="")
+}
+```
+
+
+
+```{r, eval = !isFALSE(params$picrust_pathways), results='asis'}
+cat(paste0("
+## PICRUSt2
+
+[PICRUSt2](https://pubmed.ncbi.nlm.nih.gov/32483366/) (Phylogenetic Investigation of Communities by Reconstruction of Unobserved States)
+is a software for predicting functional abundances based only on marker gene sequences.
+Enzyme Classification numbers (EC), KEGG orthologs (KO) and MetaCyc ontology predictions were made for each sample.
+In folder [PICRUSt2/](../PICRUSt2/) are predicted quantifications for Enzyme Classification numbers (EC), see
+`EC_pred_metagenome_unstrat_descrip.tsv`, KEGG orthologs (KO), see `KO_pred_metagenome_unstrat_descrip.tsv`, MetaCyc ontology,
+see `METACYC_path_abun_unstrat_descrip.tsv`. Quantifications are not normalized yet, they can be normalized e.g. by the total sum per sample.
+"))
+```
+
+
+
+# Methods
+
+```{r, results='asis'}
+if ( !isFALSE(params$dada2_ref_tax_title) ) {
+ cat("Taxonomic classification by DADA2:\n\n",
+ "- database: `", params$dada2_ref_tax_title, "`\n\n",
+ "- files: `", params$dada2_ref_tax_file, "`\n\n",
+ "- citation: `", params$dada2_ref_tax_citation, "`\n\n", sep = "")
+}
+
+if ( !isFALSE(params$qiime2_ref_tax_title) ) {
+ cat("Taxonomic classification by QIIME2:\n\n",
+ "- database: `", params$qiime2_ref_tax_title, "`\n\n",
+ "- files: `", params$qiime2_ref_tax_file, "`\n\n",
+ "- citation: `", params$qiime2_ref_tax_citation, "`\n\n", sep = "")
+}
+
+if ( !isFALSE(params$sintax_ref_tax_title) ) {
+ cat("Taxonomic classification by SINTAX:\n\n",
+ "- database: `", params$sintax_ref_tax_title, "`\n\n",
+ "- files: `", params$sintax_ref_tax_file, "`\n\n",
+ "- citation: `", params$sintax_ref_tax_citation, "`\n\n", sep = "")
+}
+
+if ( !isFALSE(params$mqc_plot) ) {
+ # with MultiQC
+ cat("[MultiQC](https://multiqc.info/) summarized computational methods in [multiqc/multiqc_report.html](../multiqc/multiqc_report.html).
+ The proposed short methods description can be found in [MultiQC's Methods Description](../multiqc/multiqc_report.html#nf-core-ampliseq-methods-description),
+ versions of software collected at runtime in [MultiQC's Software Versions](../multiqc/multiqc_report.html#software_versions),
+ and a summary of non-default parameter in [MultiQC's Workflow Summary](../multiqc/multiqc_report.html#nf-core-ampliseq-summary).\n\n")
+}
+# with & without MultiQC
+cat(paste0("
+Technical information to the pipeline run are collected in folder [pipeline_info](../pipeline_info),
+including software versions collected at runtime in file `software_versions.yml` (can be viewed with a text editor),
+execution report in file `execution_report_{date}_{time}.html`,
+execution trace in file `execution_trace_{date}_{time}.txt`,
+execution timeline in file `execution_timelime_{date}_{time}.html`, and
+pipeline direct acyclic graph (DAG) in file `pipeline_dag_{date}_{time}.html`.
+"))
+```
+
+
+
+# Final notes
+
+This report (file `summary_report.html`) is located in folder [summary_report](.) of the original pipeline results folder.
+In this file, all links to files and folders are relative, therefore hyperlinks will only work when the report is at its original place in the pipeline results folder.
+Plots specifically produced for this report (if any) can be also found in folder [summary_report](.).
+
+A comprehensive read count report throughout the pipeline can be found in the [base results folder](../) in file `overall_summary.tsv`.
+
+Please cite the [pipeline publication](https://doi.org/10.3389/fmicb.2020.550420) and any software tools used by the pipeline (see [citations](https://nf-co.re/ampliseq#citations)) when you use any of the pipeline results in your study.
diff --git a/assets/slackreport.json b/assets/slackreport.json
index 043d02f2..b170caab 100644
--- a/assets/slackreport.json
+++ b/assets/slackreport.json
@@ -3,7 +3,7 @@
{
"fallback": "Plain-text summary of the attachment.",
"color": "<% if (success) { %>good<% } else { %>danger<%} %>",
- "author_name": "sanger-tol/readmapping v${version} - ${runName}",
+ "author_name": "nf-core/ampliseq v${version} - ${runName}",
"author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico",
"text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>",
"fields": [
diff --git a/bin/reformat_tax_for_phyloseq.py b/bin/reformat_tax_for_phyloseq.py
new file mode 100755
index 00000000..f35aaf03
--- /dev/null
+++ b/bin/reformat_tax_for_phyloseq.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+
+import pandas as pd
+import sys
+
+tax_file = sys.argv[1]
+out_file = sys.argv[2]
+
+# Import tsv file
+tax_df = pd.read_csv(tax_file, sep="\t")
+
+# The second column should hold the taxonomy information
+tax_col = tax_df.columns[1]
+
+# Split the values in the tax column
+split_tax = tax_df[tax_col].str.split(";", expand=True)
+
+# Assign names to the new columns with an auto incrementing integer
+new_col_names = [f"{tax_col}_{i+1}" for i in range(split_tax.shape[1])]
+split_tax.columns = new_col_names
+
+# Strip whitespace from the tax names
+split_tax = split_tax.applymap(lambda x: x.strip() if isinstance(x, str) else x)
+
+# Drop the original tax column
+tax_df = tax_df.drop(columns=[tax_col])
+
+# Add the new tax columns to the df
+result = pd.concat([tax_df, split_tax], axis=1)
+
+# Create new tsv file
+result.to_csv(out_file, sep="\t", index=False)
diff --git a/conf/modules.config b/conf/modules.config
index 95d8569a..bc91b125 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -785,6 +785,14 @@ process {
]
}
+ withName: 'PHYLOSEQ' {
+ publishDir = [
+ path: { "${params.outdir}/phyloseq" },
+ mode: params.publish_dir_mode,
+ pattern: "*.rds"
+ ]
+ }
+
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
@@ -801,4 +809,11 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
+
+ withName: SUMMARY_REPORT {
+ publishDir = [
+ path: { "${params.outdir}/summary_report" },
+ mode: params.publish_dir_mode
+ ]
+ }
}
diff --git a/conf/test_doubleprimers.config b/conf/test_doubleprimers.config
index 6b275dc8..75c4afab 100644
--- a/conf/test_doubleprimers.config
+++ b/conf/test_doubleprimers.config
@@ -23,7 +23,7 @@ params {
FW_primer = "NNNNCCTAHGGGRBGCAGCAG"
RV_primer = "GACTACHVGGGTATCTAATCC"
double_primer = true
- dada_ref_taxonomy = false
+ skip_dada_taxonomy = true
input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet_double_primer.tsv"
trunc_qmin = 30
skip_fastqc = true
diff --git a/conf/test_pplace.config b/conf/test_pplace.config
index b6eaff1d..ecd5424d 100644
--- a/conf/test_pplace.config
+++ b/conf/test_pplace.config
@@ -24,7 +24,7 @@ params {
RV_primer = "GGACTACNVGGGTWTCTAAT"
input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv"
metadata = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Metadata.tsv"
- dada_ref_taxonomy = false
+ skip_dada_taxonomy = true
qiime_ref_taxonomy = "greengenes85"
filter_ssu = "bac"
diff --git a/docs/output.md b/docs/output.md
index d3d37beb..9e9eb75a 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -17,6 +17,7 @@ The directories listed below will be created in the results directory after the
The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:
- [Input](#input) - Input files
+- [Pipeline summary report](#pipeline-summary-report) - Overview of pipeline output
- [Preprocessing](#preprocessing)
- [FastQC](#fastqc) - Read quality control
- [Cutadapt](#cutadapt) - Primer trimming
@@ -41,6 +42,8 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [Diversity analysis](#diversity-analysis) - High level overview with different diversity indices
- [ANCOM](#ancom) - Differential abundance analysis
- [PICRUSt2](#picrust2) - Predict the functional potential of a bacterial community
+- [SBDI export](#sbdi-export) - Swedish Biodiversity Infrastructure (SBDI) submission file
+- [Phyloseq](#phyloseq) - Phyloseq R objects
- [Read count report](#read-count-report) - Report of read counts during various steps of the pipeline
- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
@@ -58,6 +61,20 @@ Samplesheet, ASV fasta, and metadata file are copied into the results folder.
+### Pipeline summary report
+
+A summary report for most pipeline results in html format produced by [R Markdown](https://rmarkdown.rstudio.com/). The report gives a general overview of the analysis, includes many tables and visualizations, and links to interactive downstream analysis results, if available.
+
+
+Output files
+
+- `summary_report/`
+ - `summary_report.html`: pipeline summary report as standalone HTML file that can be viewed in your web browser.
+ - `*.svg*`: plots that were produced for (and are included in) the report.
+ - `versions.yml`: software versions used to produce this report.
+
+
+
### Preprocessing
#### FastQC
@@ -518,6 +535,18 @@ Most of the fields in the template will not be populated by the export process,
+### Phyloseq
+
+This directory will hold phyloseq objects for each taxonomy table produced by this pipeline. The objects will contain an ASV abundance table and a taxonomy table. If the pipeline is provided with metadata, that metadata will also be included in the phyloseq object. A phylogenetic tree will also be included if the pipeline produces a tree.
+
+
+Output files
+
+- `phyloseq/`
+ - `_phyloseq.rds`: Phyloseq R object.
+
+
+
## Read count report
This report includes information on how many reads per sample passed each pipeline step in which a loss can occur. Specifically, how many read pairs entered cutadapt, were reverse complemented, passed trimming; how many read pairs entered DADA2, were denoised, merged and non-chimeric; and how many counts were lost during excluding unwanted taxa and removing low abundance/prevalence sequences in QIIME2.
diff --git a/docs/usage.md b/docs/usage.md
index 73930402..59fe9f64 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -44,7 +44,7 @@ nextflow run nf-core/ampliseq \
--outdir "./results"
```
-In this example, `--input` is the [Direct FASTQ input](#direct-fastq-input), other options are [Samplesheet input](#samplesheet-input) and [ASV/OTU fasta input](#asvotu-fasta-input). For more details on metadata, see [Metadata](#metadata). For [Reproducibility](#reproducibility), specify the version to run using `-r` (= release, here: 2.3.2). See the [nf-core/ampliseq website documentation](https://nf-co.re/ampliseq/parameters) for more information about pipeline specific parameters.
+In this example, `--input` is the [Direct FASTQ input](#direct-fastq-input), other options are [Samplesheet input](#samplesheet-input) and [ASV/OTU fasta input](#asvotu-fasta-input). For more details on metadata, see [Metadata](#metadata). For [Reproducibility](#reproducibility), specify the version to run using `-r` (= release, e.g. 2.6.1). See the [nf-core/ampliseq website documentation](https://nf-co.re/ampliseq/parameters) for more information about pipeline specific parameters.
It is possible to not provide primer sequences (`--FW_primer` & `--RV_primer`) and skip primer trimming using `--skip_cutadapt`, but this is only for data that indeed does not contain any PCR primers in their sequences. Also, metadata (`--metadata`) isnt required, but aids downstream analysis.
@@ -72,7 +72,8 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than
Pipeline settings can be provided in a `yaml` or `json` file via `-params-file
`.
> ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args).
-> The above pipeline run specified with a params file in yaml format:
+
+The above pipeline run specified with a params file in yaml format:
```bash
nextflow run nf-core/ampliseq -profile docker -params-file params.yaml
@@ -86,6 +87,7 @@ FW_primer: "GTGYCAGCMGCCGCGGTAA"
RV_primer: "GGACTACNVGGGTWTCTAAT"
metadata: "data/Metadata.tsv"
outdir: "./results"
+<...>
```
You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch).
diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy
deleted file mode 100755
index 9b34804d..00000000
--- a/lib/NfcoreSchema.groovy
+++ /dev/null
@@ -1,530 +0,0 @@
-//
-// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template.
-//
-
-import nextflow.Nextflow
-import org.everit.json.schema.Schema
-import org.everit.json.schema.loader.SchemaLoader
-import org.everit.json.schema.ValidationException
-import org.json.JSONObject
-import org.json.JSONTokener
-import org.json.JSONArray
-import groovy.json.JsonSlurper
-import groovy.json.JsonBuilder
-
-class NfcoreSchema {
-
- //
- // Resolve Schema path relative to main workflow directory
- //
- public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') {
- return "${workflow.projectDir}/${schema_filename}"
- }
-
- //
- // Function to loop over all parameters defined in schema and check
- // whether the given parameters adhere to the specifications
- //
- /* groovylint-disable-next-line UnusedPrivateMethodParameter */
- public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') {
- def has_error = false
- //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
- // Check for nextflow core params and unexpected params
- def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text
- def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions')
- def nf_params = [
- // Options for base `nextflow` command
- 'bg',
- 'c',
- 'C',
- 'config',
- 'd',
- 'D',
- 'dockerize',
- 'h',
- 'log',
- 'q',
- 'quiet',
- 'syslog',
- 'v',
-
- // Options for `nextflow run` command
- 'ansi',
- 'ansi-log',
- 'bg',
- 'bucket-dir',
- 'c',
- 'cache',
- 'config',
- 'dsl2',
- 'dump-channels',
- 'dump-hashes',
- 'E',
- 'entry',
- 'latest',
- 'lib',
- 'main-script',
- 'N',
- 'name',
- 'offline',
- 'params-file',
- 'pi',
- 'plugins',
- 'poll-interval',
- 'pool-size',
- 'profile',
- 'ps',
- 'qs',
- 'queue-size',
- 'r',
- 'resume',
- 'revision',
- 'stdin',
- 'stub',
- 'stub-run',
- 'test',
- 'w',
- 'with-apptainer',
- 'with-charliecloud',
- 'with-conda',
- 'with-dag',
- 'with-docker',
- 'with-mpi',
- 'with-notification',
- 'with-podman',
- 'with-report',
- 'with-singularity',
- 'with-timeline',
- 'with-tower',
- 'with-trace',
- 'with-weblog',
- 'without-docker',
- 'without-podman',
- 'work-dir'
- ]
- def unexpectedParams = []
-
- // Collect expected parameters from the schema
- def expectedParams = []
- def enums = [:]
- for (group in schemaParams) {
- for (p in group.value['properties']) {
- expectedParams.push(p.key)
- if (group.value['properties'][p.key].containsKey('enum')) {
- enums[p.key] = group.value['properties'][p.key]['enum']
- }
- }
- }
-
- for (specifiedParam in params.keySet()) {
- // nextflow params
- if (nf_params.contains(specifiedParam)) {
- log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'"
- has_error = true
- }
- // unexpected params
- def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params'
- def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() }
- def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase()
- def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase))
- if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) {
- // Temporarily remove camelCase/camel-case params #1035
- def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()}
- if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){
- unexpectedParams.push(specifiedParam)
- }
- }
- }
-
- //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
- // Validate parameters against the schema
- InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream()
- JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream))
-
- // Remove anything that's in params.schema_ignore_params
- raw_schema = removeIgnoredParams(raw_schema, params)
-
- Schema schema = SchemaLoader.load(raw_schema)
-
- // Clean the parameters
- def cleanedParams = cleanParameters(params)
-
- // Convert to JSONObject
- def jsonParams = new JsonBuilder(cleanedParams)
- JSONObject params_json = new JSONObject(jsonParams.toString())
-
- // Validate
- try {
- schema.validate(params_json)
- } catch (ValidationException e) {
- println ''
- log.error 'ERROR: Validation of pipeline parameters failed!'
- JSONObject exceptionJSON = e.toJSON()
- printExceptions(exceptionJSON, params_json, log, enums)
- println ''
- has_error = true
- }
-
- // Check for unexpected parameters
- if (unexpectedParams.size() > 0) {
- Map colors = NfcoreTemplate.logColours(params.monochrome_logs)
- println ''
- def warn_msg = 'Found unexpected parameters:'
- for (unexpectedParam in unexpectedParams) {
- warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}"
- }
- log.warn warn_msg
- log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}"
- println ''
- }
-
- if (has_error) {
- Nextflow.error('Exiting!')
- }
- }
-
- //
- // Beautify parameters for --help
- //
- public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') {
- Map colors = NfcoreTemplate.logColours(params.monochrome_logs)
- Integer num_hidden = 0
- String output = ''
- output += 'Typical pipeline command:\n\n'
- output += " ${colors.cyan}${command}${colors.reset}\n\n"
- Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename))
- Integer max_chars = paramsMaxChars(params_map) + 1
- Integer desc_indent = max_chars + 14
- Integer dec_linewidth = 160 - desc_indent
- for (group in params_map.keySet()) {
- Integer num_params = 0
- String group_output = colors.underlined + colors.bold + group + colors.reset + '\n'
- def group_params = params_map.get(group) // This gets the parameters of that particular group
- for (param in group_params.keySet()) {
- if (group_params.get(param).hidden && !params.show_hidden_params) {
- num_hidden += 1
- continue;
- }
- def type = '[' + group_params.get(param).type + ']'
- def description = group_params.get(param).description
- def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : ''
- def description_default = description + colors.dim + defaultValue + colors.reset
- // Wrap long description texts
- // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap
- if (description_default.length() > dec_linewidth){
- List olines = []
- String oline = "" // " " * indent
- description_default.split(" ").each() { wrd ->
- if ((oline.size() + wrd.size()) <= dec_linewidth) {
- oline += wrd + " "
- } else {
- olines += oline
- oline = wrd + " "
- }
- }
- olines += oline
- description_default = olines.join("\n" + " " * desc_indent)
- }
- group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n'
- num_params += 1
- }
- group_output += '\n'
- if (num_params > 0){
- output += group_output
- }
- }
- if (num_hidden > 0){
- output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset
- }
- output += NfcoreTemplate.dashedLine(params.monochrome_logs)
- return output
- }
-
- //
- // Groovy Map summarising parameters/workflow options used by the pipeline
- //
- public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') {
- // Get a selection of core Nextflow workflow options
- def Map workflow_summary = [:]
- if (workflow.revision) {
- workflow_summary['revision'] = workflow.revision
- }
- workflow_summary['runName'] = workflow.runName
- if (workflow.containerEngine) {
- workflow_summary['containerEngine'] = workflow.containerEngine
- }
- if (workflow.container) {
- workflow_summary['container'] = workflow.container
- }
- workflow_summary['launchDir'] = workflow.launchDir
- workflow_summary['workDir'] = workflow.workDir
- workflow_summary['projectDir'] = workflow.projectDir
- workflow_summary['userName'] = workflow.userName
- workflow_summary['profile'] = workflow.profile
- workflow_summary['configFiles'] = workflow.configFiles.join(', ')
-
- // Get pipeline parameters defined in JSON Schema
- def Map params_summary = [:]
- def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename))
- for (group in params_map.keySet()) {
- def sub_params = new LinkedHashMap()
- def group_params = params_map.get(group) // This gets the parameters of that particular group
- for (param in group_params.keySet()) {
- if (params.containsKey(param)) {
- def params_value = params.get(param)
- def schema_value = group_params.get(param).default
- def param_type = group_params.get(param).type
- if (schema_value != null) {
- if (param_type == 'string') {
- if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) {
- def sub_string = schema_value.replace('\$projectDir', '')
- sub_string = sub_string.replace('\${projectDir}', '')
- if (params_value.contains(sub_string)) {
- schema_value = params_value
- }
- }
- if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) {
- def sub_string = schema_value.replace('\$params.outdir', '')
- sub_string = sub_string.replace('\${params.outdir}', '')
- if ("${params.outdir}${sub_string}" == params_value) {
- schema_value = params_value
- }
- }
- }
- }
-
- // We have a default in the schema, and this isn't it
- if (schema_value != null && params_value != schema_value) {
- sub_params.put(param, params_value)
- }
- // No default in the schema, and this isn't empty
- else if (schema_value == null && params_value != "" && params_value != null && params_value != false) {
- sub_params.put(param, params_value)
- }
- }
- }
- params_summary.put(group, sub_params)
- }
- return [ 'Core Nextflow options' : workflow_summary ] << params_summary
- }
-
- //
- // Beautify parameters for summary and return as string
- //
- public static String paramsSummaryLog(workflow, params) {
- Map colors = NfcoreTemplate.logColours(params.monochrome_logs)
- String output = ''
- def params_map = paramsSummaryMap(workflow, params)
- def max_chars = paramsMaxChars(params_map)
- for (group in params_map.keySet()) {
- def group_params = params_map.get(group) // This gets the parameters of that particular group
- if (group_params) {
- output += colors.bold + group + colors.reset + '\n'
- for (param in group_params.keySet()) {
- output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n'
- }
- output += '\n'
- }
- }
- output += "!! Only displaying parameters that differ from the pipeline defaults !!\n"
- output += NfcoreTemplate.dashedLine(params.monochrome_logs)
- return output
- }
-
- //
- // Loop over nested exceptions and print the causingException
- //
- private static void printExceptions(ex_json, params_json, log, enums, limit=5) {
- def causingExceptions = ex_json['causingExceptions']
- if (causingExceptions.length() == 0) {
- def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/
- // Missing required param
- if (m.matches()) {
- log.error "* Missing required parameter: --${m[0][1]}"
- }
- // Other base-level error
- else if (ex_json['pointerToViolation'] == '#') {
- log.error "* ${ex_json['message']}"
- }
- // Error with specific param
- else {
- def param = ex_json['pointerToViolation'] - ~/^#\//
- def param_val = params_json[param].toString()
- if (enums.containsKey(param)) {
- def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices"
- if (enums[param].size() > limit) {
- log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )"
- } else {
- log.error "${error_msg}: ${enums[param].join(', ')})"
- }
- } else {
- log.error "* --${param}: ${ex_json['message']} (${param_val})"
- }
- }
- }
- for (ex in causingExceptions) {
- printExceptions(ex, params_json, log, enums)
- }
- }
-
- //
- // Remove an element from a JSONArray
- //
- private static JSONArray removeElement(json_array, element) {
- def list = []
- int len = json_array.length()
- for (int i=0;i
- if(raw_schema.keySet().contains('definitions')){
- raw_schema.definitions.each { definition ->
- for (key in definition.keySet()){
- if (definition[key].get("properties").keySet().contains(ignore_param)){
- // Remove the param to ignore
- definition[key].get("properties").remove(ignore_param)
- // If the param was required, change this
- if (definition[key].has("required")) {
- def cleaned_required = removeElement(definition[key].required, ignore_param)
- definition[key].put("required", cleaned_required)
- }
- }
- }
- }
- }
- if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) {
- raw_schema.get("properties").remove(ignore_param)
- }
- if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) {
- def cleaned_required = removeElement(raw_schema.required, ignore_param)
- raw_schema.put("required", cleaned_required)
- }
- }
- return raw_schema
- }
-
- //
- // Clean and check parameters relative to Nextflow native classes
- //
- private static Map cleanParameters(params) {
- def new_params = params.getClass().newInstance(params)
- for (p in params) {
- // remove anything evaluating to false
- if (!p['value']) {
- new_params.remove(p.key)
- }
- // Cast MemoryUnit to String
- if (p['value'].getClass() == nextflow.util.MemoryUnit) {
- new_params.replace(p.key, p['value'].toString())
- }
- // Cast Duration to String
- if (p['value'].getClass() == nextflow.util.Duration) {
- new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day"))
- }
- // Cast LinkedHashMap to String
- if (p['value'].getClass() == LinkedHashMap) {
- new_params.replace(p.key, p['value'].toString())
- }
- }
- return new_params
- }
-
- //
- // This function tries to read a JSON params file
- //
- private static LinkedHashMap paramsLoad(String json_schema) {
- def params_map = new LinkedHashMap()
- try {
- params_map = paramsRead(json_schema)
- } catch (Exception e) {
- println "Could not read parameters settings from JSON. $e"
- params_map = new LinkedHashMap()
- }
- return params_map
- }
-
- //
- // Method to actually read in JSON file using Groovy.
- // Group (as Key), values are all parameters
- // - Parameter1 as Key, Description as Value
- // - Parameter2 as Key, Description as Value
- // ....
- // Group
- // -
- private static LinkedHashMap paramsRead(String json_schema) throws Exception {
- def json = new File(json_schema).text
- def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions')
- def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties')
- /* Tree looks like this in nf-core schema
- * definitions <- this is what the first get('definitions') gets us
- group 1
- title
- description
- properties
- parameter 1
- type
- description
- parameter 2
- type
- description
- group 2
- title
- description
- properties
- parameter 1
- type
- description
- * properties <- parameters can also be ungrouped, outside of definitions
- parameter 1
- type
- description
- */
-
- // Grouped params
- def params_map = new LinkedHashMap()
- schema_definitions.each { key, val ->
- def Map group = schema_definitions."$key".properties // Gets the property object of the group
- def title = schema_definitions."$key".title
- def sub_params = new LinkedHashMap()
- group.each { innerkey, value ->
- sub_params.put(innerkey, value)
- }
- params_map.put(title, sub_params)
- }
-
- // Ungrouped params
- def ungrouped_params = new LinkedHashMap()
- schema_properties.each { innerkey, value ->
- ungrouped_params.put(innerkey, value)
- }
- params_map.put("Other parameters", ungrouped_params)
-
- return params_map
- }
-
- //
- // Get maximum number of characters across all parameter names
- //
- private static Integer paramsMaxChars(params_map) {
- Integer max_chars = 0
- for (group in params_map.keySet()) {
- def group_params = params_map.get(group) // This gets the parameters of that particular group
- for (param in group_params.keySet()) {
- if (param.size() > max_chars) {
- max_chars = param.size()
- }
- }
- }
- return max_chars
- }
-}
diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy
index 25a0a74a..408951ae 100755
--- a/lib/NfcoreTemplate.groovy
+++ b/lib/NfcoreTemplate.groovy
@@ -128,7 +128,7 @@ class NfcoreTemplate {
def email_html = html_template.toString()
// Render the sendmail template
- def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit
+ def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit
def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ]
def sf = new File("$projectDir/assets/sendmail_template.txt")
def sendmail_template = engine.createTemplate(sf).make(smail_fields)
diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy
index 9f24a34d..fb90b385 100755
--- a/lib/WorkflowAmpliseq.groovy
+++ b/lib/WorkflowAmpliseq.groovy
@@ -155,15 +155,57 @@ class WorkflowAmpliseq {
return yaml_file_text
}
- public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) {
+ //
+ // Generate methods description for MultiQC
+ //
+
+ public static String toolCitationText(params) {
+
+ // TODO Optionally add in-text citation tools to this list.
+ // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "",
+ // Uncomment function in methodsDescriptionText to render in MultiQC report
+ def citation_text = [
+ "Tools used in the workflow included:",
+ "FastQC (Andrews 2010),",
+ "MultiQC (Ewels et al. 2016)",
+ "."
+ ].join(' ').trim()
+
+ return citation_text
+ }
+
+ public static String toolBibliographyText(params) {
+
+ // TODO Optionally add bibliographic entries to this list.
+ // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Author (2023) Pub name, Journal, DOI" : "",
+ // Uncomment function in methodsDescriptionText to render in MultiQC report
+ def reference_text = [
+ "Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).",
+ "Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354"
+ ].join(' ').trim()
+
+ return reference_text
+ }
+
+ public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) {
// Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file
def meta = [:]
meta.workflow = run_workflow.toMap()
meta["manifest_map"] = run_workflow.manifest.toMap()
+ // Pipeline DOI
meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : ""
meta["nodoi_text"] = meta.manifest_map.doi ? "": "If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used. "
+ // Tool references
+ meta["tool_citations"] = ""
+ meta["tool_bibliography"] = ""
+
+ // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled!
+ //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".")
+ //meta["tool_bibliography"] = toolBibliographyText(params)
+
+
def methods_text = mqc_methods_yaml.text
def engine = new SimpleTemplateEngine()
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
index ce4333e5..54128f4a 100755
--- a/lib/WorkflowMain.groovy
+++ b/lib/WorkflowMain.groovy
@@ -21,40 +21,11 @@ class WorkflowMain {
" https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
}
- //
- // Generate help string
- //
- public static String help(workflow, params) {
- def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv -profile docker"
- def help_string = ''
- help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs)
- help_string += NfcoreSchema.paramsHelp(workflow, params, command)
- help_string += '\n' + citation(workflow) + '\n'
- help_string += NfcoreTemplate.dashedLine(params.monochrome_logs)
- return help_string
- }
-
- //
- // Generate parameter summary log string
- //
- public static String paramsSummaryLog(workflow, params) {
- def summary_log = ''
- summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs)
- summary_log += NfcoreSchema.paramsSummaryLog(workflow, params)
- summary_log += '\n' + citation(workflow) + '\n'
- summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs)
- return summary_log
- }
//
// Validate parameters and print summary to screen
//
public static void initialise(workflow, params, log) {
- // Print help to screen if required
- if (params.help) {
- log.info help(workflow, params)
- System.exit(0)
- }
// Check that keys for reference databases are valid
if (params.dada_ref_taxonomy && !params.skip_taxonomy && !params.skip_dada_taxonomy) {
@@ -66,6 +37,9 @@ class WorkflowMain {
if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) {
qiimereftaxonomyExistsError(params, log)
}
+ if (params.sintax_ref_taxonomy && !params.skip_taxonomy) {
+ sintaxreftaxonomyExistsError(params, log)
+ }
// Print workflow version and exit on --version
if (params.version) {
@@ -74,14 +48,6 @@ class WorkflowMain {
System.exit(0)
}
- // Print parameter summary log to screen
- log.info paramsSummaryLog(workflow, params)
-
- // Validate workflow parameters via the JSON schema
- if (params.validate_params) {
- NfcoreSchema.validateParameters(workflow, params, log)
- }
-
// Check that a -profile or Nextflow config has been provided to run the pipeline
NfcoreTemplate.checkConfigProvided(workflow, log)
@@ -133,4 +99,17 @@ class WorkflowMain {
Nextflow.error(error_string)
}
}
+ //
+ // Exit pipeline if incorrect --qiime_ref_taxonomy key provided
+ //
+ private static void sintaxreftaxonomyExistsError(params, log) {
+ if (params.sintax_ref_databases && params.sintax_ref_taxonomy && !params.sintax_ref_databases.containsKey(params.sintax_ref_taxonomy)) {
+ def error_string = "=============================================================================\n" +
+ " SINTAX reference database '${params.sintax_ref_taxonomy}' not found in any config files provided to the pipeline.\n" +
+ " Currently, the available reference taxonomy keys for `--sintax_ref_taxonomy` are:\n" +
+ " ${params.sintax_ref_databases.keySet().join(", ")}\n" +
+ "==================================================================================="
+ Nextflow.error(error_string)
+ }
+ }
}
diff --git a/main.nf b/main.nf
index b47bfe7f..aa809dba 100644
--- a/main.nf
+++ b/main.nf
@@ -17,6 +17,22 @@ nextflow.enable.dsl = 2
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
+include { validateParameters; paramsHelp } from 'plugin/nf-validation'
+
+// Print help message if needed
+if (params.help) {
+ def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs)
+ def citation = '\n' + WorkflowMain.citation(workflow) + '\n'
+ def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker"
+ log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs)
+ System.exit(0)
+}
+
+// Validate input parameters
+if (params.validate_params) {
+ validateParameters()
+}
+
WorkflowMain.initialise(workflow, params, log)
/*
diff --git a/modules/local/phyloseq.nf b/modules/local/phyloseq.nf
new file mode 100644
index 00000000..54537213
--- /dev/null
+++ b/modules/local/phyloseq.nf
@@ -0,0 +1,63 @@
+process PHYLOSEQ {
+ tag "$prefix"
+ label 'process_low'
+
+ conda "bioconda::bioconductor-phyloseq=1.44.0"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/bioconductor-phyloseq:1.44.0--r43hdfd78af_0' :
+ 'quay.io/biocontainers/bioconductor-phyloseq:1.44.0--r43hdfd78af_0' }"
+
+ input:
+ tuple val(prefix), path(tax_tsv)
+ path otu_tsv
+ path sam_tsv
+ path tree
+
+ output:
+ tuple val(prefix), path("*phyloseq.rds"), emit: rds
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def sam_tsv = "\"${sam_tsv}\""
+ def otu_tsv = "\"${otu_tsv}\""
+ def tax_tsv = "\"${tax_tsv}\""
+ def tree = "\"${tree}\""
+ def prefix = "\"${prefix}\""
+ """
+ #!/usr/bin/env Rscript
+
+ suppressPackageStartupMessages(library(phyloseq))
+
+ otu_df <- read.table($otu_tsv, sep="\\t", header=TRUE, row.names=1)
+ tax_df <- read.table($tax_tsv, sep="\\t", header=TRUE, row.names=1)
+ otu_mat <- as.matrix(otu_df)
+ tax_mat <- as.matrix(tax_df)
+
+ OTU <- otu_table(otu_mat, taxa_are_rows=TRUE)
+ TAX <- tax_table(tax_mat)
+ phy_obj <- phyloseq(OTU, TAX)
+
+ if (file.exists($sam_tsv)) {
+ sam_df <- read.table($sam_tsv, sep="\\t", header=TRUE, row.names=1)
+ SAM <- sample_data(sam_df)
+ phy_obj <- merge_phyloseq(phy_obj, SAM)
+ }
+
+ if (file.exists($tree)) {
+ TREE <- read_tree($tree)
+ phy_obj <- merge_phyloseq(phy_obj, TREE)
+ }
+
+ saveRDS(phy_obj, file = paste0($prefix, "_phyloseq.rds"))
+
+ # Version information
+ writeLines(c("\\"${task.process}\\":",
+ paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")),
+ paste0(" phyloseq: ", packageVersion("phyloseq"))),
+ "versions.yml"
+ )
+ """
+}
diff --git a/modules/local/phyloseq_inasv.nf b/modules/local/phyloseq_inasv.nf
new file mode 100644
index 00000000..f66d1669
--- /dev/null
+++ b/modules/local/phyloseq_inasv.nf
@@ -0,0 +1,28 @@
+process PHYLOSEQ_INASV {
+ label 'process_low'
+
+ conda "conda-forge::sed=4.7"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+ 'nf-core/ubuntu:20.04' }"
+
+ input:
+ path(biom_file)
+
+ output:
+ path( "*.tsv" ) , emit: tsv
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ """
+ tail $biom_file -n +2 | sed '1s/#OTU ID/ASV_ID/' > reformat_$biom_file
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bash: \$(bash --version | sed -n 1p | sed 's/GNU bash, version //g')
+ END_VERSIONS
+ """
+}
diff --git a/modules/local/phyloseq_intax.nf b/modules/local/phyloseq_intax.nf
new file mode 100644
index 00000000..6dbd8487
--- /dev/null
+++ b/modules/local/phyloseq_intax.nf
@@ -0,0 +1,29 @@
+process PHYLOSEQ_INTAX {
+ label 'process_low'
+
+ conda "conda-forge::pandas=1.1.5"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/pandas:1.1.5':
+ 'biocontainers/pandas:1.1.5' }"
+
+ input:
+ path(tax_tsv)
+
+ output:
+ path( "*.tsv" ) , emit: tsv
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ """
+ reformat_tax_for_phyloseq.py $tax_tsv reformat_$tax_tsv
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ python: \$(python --version 2>&1 | sed 's/Python //g')
+ pandas: \$(python -c "import pkg_resources; print(pkg_resources.get_distribution('pandas').version)")
+ END_VERSIONS
+ """
+}
diff --git a/modules/local/qiime2_alphararefaction.nf b/modules/local/qiime2_alphararefaction.nf
index 9d656840..9ff9c782 100644
--- a/modules/local/qiime2_alphararefaction.nf
+++ b/modules/local/qiime2_alphararefaction.nf
@@ -1,7 +1,7 @@
process QIIME2_ALPHARAREFACTION {
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_ancom_asv.nf b/modules/local/qiime2_ancom_asv.nf
index 322b414e..165ca45f 100644
--- a/modules/local/qiime2_ancom_asv.nf
+++ b/modules/local/qiime2_ancom_asv.nf
@@ -5,7 +5,7 @@ process QIIME2_ANCOM_ASV {
label 'process_long'
label 'error_ignore'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_ancom_tax.nf b/modules/local/qiime2_ancom_tax.nf
index 9f5392ef..717e7286 100644
--- a/modules/local/qiime2_ancom_tax.nf
+++ b/modules/local/qiime2_ancom_tax.nf
@@ -3,7 +3,7 @@ process QIIME2_ANCOM_TAX {
label 'process_medium'
label 'single_cpu'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_barplot.nf b/modules/local/qiime2_barplot.nf
index 3e83ab02..bb0c8aeb 100644
--- a/modules/local/qiime2_barplot.nf
+++ b/modules/local/qiime2_barplot.nf
@@ -1,7 +1,7 @@
process QIIME2_BARPLOT {
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_classify.nf b/modules/local/qiime2_classify.nf
index f5a4824d..c32fff03 100644
--- a/modules/local/qiime2_classify.nf
+++ b/modules/local/qiime2_classify.nf
@@ -2,7 +2,7 @@ process QIIME2_CLASSIFY {
tag "${repseq},${trained_classifier}"
label 'process_high'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_diversity_adonis.nf b/modules/local/qiime2_diversity_adonis.nf
index 25bc95f8..78b15dd3 100644
--- a/modules/local/qiime2_diversity_adonis.nf
+++ b/modules/local/qiime2_diversity_adonis.nf
@@ -2,7 +2,7 @@ process QIIME2_DIVERSITY_ADONIS {
tag "${core.baseName} - ${formula}"
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_diversity_alpha.nf b/modules/local/qiime2_diversity_alpha.nf
index dff59e3e..ae1db546 100644
--- a/modules/local/qiime2_diversity_alpha.nf
+++ b/modules/local/qiime2_diversity_alpha.nf
@@ -2,7 +2,7 @@ process QIIME2_DIVERSITY_ALPHA {
tag "${core.baseName}"
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_diversity_beta.nf b/modules/local/qiime2_diversity_beta.nf
index f6fc5ee7..8f73ff2c 100644
--- a/modules/local/qiime2_diversity_beta.nf
+++ b/modules/local/qiime2_diversity_beta.nf
@@ -2,7 +2,7 @@ process QIIME2_DIVERSITY_BETA {
tag "${core.baseName} - ${category}"
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_diversity_betaord.nf b/modules/local/qiime2_diversity_betaord.nf
index 7b2699a4..aba4afa8 100644
--- a/modules/local/qiime2_diversity_betaord.nf
+++ b/modules/local/qiime2_diversity_betaord.nf
@@ -2,7 +2,7 @@ process QIIME2_DIVERSITY_BETAORD {
tag "${core.baseName}"
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_diversity_core.nf b/modules/local/qiime2_diversity_core.nf
index 99fe9280..52cb1e6f 100644
--- a/modules/local/qiime2_diversity_core.nf
+++ b/modules/local/qiime2_diversity_core.nf
@@ -1,7 +1,7 @@
process QIIME2_DIVERSITY_CORE {
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_export_absolute.nf b/modules/local/qiime2_export_absolute.nf
index 624547d5..9bfe0d0a 100644
--- a/modules/local/qiime2_export_absolute.nf
+++ b/modules/local/qiime2_export_absolute.nf
@@ -1,7 +1,7 @@
process QIIME2_EXPORT_ABSOLUTE {
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_export_relasv.nf b/modules/local/qiime2_export_relasv.nf
index a5b81388..9ed1b322 100644
--- a/modules/local/qiime2_export_relasv.nf
+++ b/modules/local/qiime2_export_relasv.nf
@@ -1,7 +1,7 @@
process QIIME2_EXPORT_RELASV {
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_export_reltax.nf b/modules/local/qiime2_export_reltax.nf
index 8f090b07..ea2cf21a 100644
--- a/modules/local/qiime2_export_reltax.nf
+++ b/modules/local/qiime2_export_reltax.nf
@@ -1,7 +1,7 @@
process QIIME2_EXPORT_RELTAX {
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_extract.nf b/modules/local/qiime2_extract.nf
index 6f686906..3a10c107 100644
--- a/modules/local/qiime2_extract.nf
+++ b/modules/local/qiime2_extract.nf
@@ -3,7 +3,7 @@ process QIIME2_EXTRACT {
label 'process_low'
label 'single_cpu'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_featuretable_group.nf b/modules/local/qiime2_featuretable_group.nf
index 71e9a9b2..44bcfaae 100644
--- a/modules/local/qiime2_featuretable_group.nf
+++ b/modules/local/qiime2_featuretable_group.nf
@@ -2,7 +2,7 @@ process QIIME2_FEATURETABLE_GROUP {
tag "${category}"
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_filtersamples.nf b/modules/local/qiime2_filtersamples.nf
index 6a4a7310..4bdd7e39 100644
--- a/modules/local/qiime2_filtersamples.nf
+++ b/modules/local/qiime2_filtersamples.nf
@@ -2,7 +2,7 @@ process QIIME2_FILTERSAMPLES {
tag "${filter}"
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_filtertaxa.nf b/modules/local/qiime2_filtertaxa.nf
index 0a25803e..1f26ab10 100644
--- a/modules/local/qiime2_filtertaxa.nf
+++ b/modules/local/qiime2_filtertaxa.nf
@@ -2,7 +2,7 @@ process QIIME2_FILTERTAXA {
tag "taxa:${exclude_taxa};min-freq:${min_frequency};min-samples:${min_samples}"
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_inasv.nf b/modules/local/qiime2_inasv.nf
index 348aea87..aea70bb7 100644
--- a/modules/local/qiime2_inasv.nf
+++ b/modules/local/qiime2_inasv.nf
@@ -2,7 +2,7 @@ process QIIME2_INASV {
tag "${asv}"
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_inseq.nf b/modules/local/qiime2_inseq.nf
index a0504053..0cc3aca8 100644
--- a/modules/local/qiime2_inseq.nf
+++ b/modules/local/qiime2_inseq.nf
@@ -2,7 +2,7 @@ process QIIME2_INSEQ {
tag "${seq}"
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_intax.nf b/modules/local/qiime2_intax.nf
index 0e6c69e1..4f35daed 100644
--- a/modules/local/qiime2_intax.nf
+++ b/modules/local/qiime2_intax.nf
@@ -2,7 +2,7 @@ process QIIME2_INTAX {
tag "${tax}"
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_intree.nf b/modules/local/qiime2_intree.nf
index f9f35b97..620e74c0 100644
--- a/modules/local/qiime2_intree.nf
+++ b/modules/local/qiime2_intree.nf
@@ -2,7 +2,7 @@ process QIIME2_INTREE {
tag "${meta.id}:${meta.model}"
label 'process_low'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_train.nf b/modules/local/qiime2_train.nf
index 254118f8..289fd6a6 100644
--- a/modules/local/qiime2_train.nf
+++ b/modules/local/qiime2_train.nf
@@ -3,7 +3,7 @@ process QIIME2_TRAIN {
label 'process_high'
label 'single_cpu'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/qiime2_tree.nf b/modules/local/qiime2_tree.nf
index 5fc32fed..c870842f 100644
--- a/modules/local/qiime2_tree.nf
+++ b/modules/local/qiime2_tree.nf
@@ -1,7 +1,7 @@
process QIIME2_TREE {
label 'process_medium'
- container "quay.io/qiime2/core:2022.11"
+ container "qiime2/core:2022.11"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf
new file mode 100644
index 00000000..8af605c6
--- /dev/null
+++ b/modules/local/summary_report.nf
@@ -0,0 +1,142 @@
+process SUMMARY_REPORT {
+ label 'process_low'
+
+ conda "conda-forge::r-base=4.2.3 conda-forge::r-rmarkdown=2.22 conda-forge::r-tidyverse=2.0.0 conda-forge::r-knitr=1.43 conda-forge::r-dt=0.28 conda-forge::r-dtplyr=1.3.1 conda-forge::r-formattable=0.2.1 conda-forge::r-purrr=1.0.1 conda-forge::r-vegan=2.6_4 conda-forge::r-optparse=1.7.3 conda-forge::r-ggplot2=3.4.2 conda-forge::r-dplyr=1.1.2 conda-forge::r-data.table=1.14.8 conda-forge::r-patchwork=1.1.2"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/mulled-v2-b2ec1fea5791d428eebb8c8ea7409c350d31dada:a447f6b7a6afde38352b24c30ae9cd6e39df95c4-1' :
+ 'biocontainers/mulled-v2-b2ec1fea5791d428eebb8c8ea7409c350d31dada:a447f6b7a6afde38352b24c30ae9cd6e39df95c4-1' }"
+
+ input:
+ path(report_template)
+ path(report_styles)
+ path(report_logo)
+ path(report_abstract)
+ path(metadata)
+ path(samplesheet)
+ path(fasta)
+ path(mqc_plots)
+ path(cutadapt_summary)
+ val(find_truncation_values)
+ path(dada_filtntrim_args)
+ path(dada_qual_stats)
+ path(dada_pp_qual_stats)
+ tuple val(meta), path(dada_err_svgs)
+ path(dada_asv_table)
+ path(dada_asv_fa)
+ path(dada_tab)
+ path(dada_stats)
+ path(barrnap_summary)
+ path(filter_ssu_stats)
+ path(filter_ssu_asv)
+ path(filter_len_asv_stats)
+ path(filter_len_asv_len_orig)
+ path(filter_codons_stats)
+ path(itsx_cutasv_summary)
+ path(dada2_tax)
+ tuple val(meta_ref), path(cut_dada_ref_taxonomy) // cutadapt log when params.cut_dada_ref_taxonomy
+ path(sintax_tax)
+ path(pplace_tax)
+ tuple val(meta_pplace), path(pplace_heattree)
+ path(qiime2_tax)
+ val(run_qiime2)
+ val(val_used_taxonomy)
+ val(qiime2_filtertaxa) // ,
+ path(filter_stats_tsv)
+ path(barplot)
+ val(abundance_tables)
+ val(alpha_rarefaction)
+ path(diversity_indices)
+ path(diversity_indices_beta, stageAs: 'beta_diversity/*') // prevent folder name collisons
+ path(diversity_indices_adonis, stageAs: 'beta_diversity/adonis/*') // prevent folder name collisons
+ path(ancom)
+ path(picrust_pathways)
+
+
+ output:
+ path "*.svg" , emit: svg, optional: true
+ path "summary_report.html" , emit: report
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ // make named R list (comma separated)
+ // all non-boolean or non-numeric values must be encumbered by single quotes (')!
+ // all elements must have a value, i.e. booleans also need to be set to TRUE
+ def params_list_named = [
+ "css='$report_styles'",
+ "report_logo='$report_logo'",
+ "workflow_manifest_version='${workflow.manifest.version}'",
+ "workflow_scriptid='${workflow.scriptId.substring(0,10)}'",
+ params.report_title ? "report_title='$params.report_title'" : "",
+ report_abstract ? "report_abstract='$params.report_abstract'" : "",
+ meta.single_end ? "flag_single_end=TRUE" : "",
+ metadata ? "metadata='$metadata'" : "",
+ samplesheet ? "samplesheet='$samplesheet'" : "",
+ fasta ? "fasta='$fasta'" : "",
+ !fasta && !samplesheet ? "input='$params.input'" : "",
+ mqc_plots ? "mqc_plot='${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg'" : "",
+ cutadapt_summary ?
+ params.retain_untrimmed ? "flag_retain_untrimmed=TRUE,cutadapt_summary='$cutadapt_summary'" :
+ "cutadapt_summary='$cutadapt_summary'" : "",
+ find_truncation_values ? "trunc_qmin=$params.trunc_qmin,trunc_rmin=$params.trunc_rmin" : "",
+ "trunclenf='$params.trunclenf'",
+ "trunclenr='$params.trunclenr'",
+ "max_ee=$params.max_ee",
+ dada_qual_stats && meta.single_end ? "dada_qc_f_path='$dada_qual_stats',dada_pp_qc_f_path='$dada_pp_qual_stats'" :
+ dada_qual_stats ? "dada_qc_f_path='FW_qual_stats.svg',dada_qc_r_path='RV_qual_stats.svg',dada_pp_qc_f_path='FW_preprocessed_qual_stats.svg',dada_pp_qc_r_path='RV_preprocessed_qual_stats.svg'" : "",
+ dada_filtntrim_args ? "dada_filtntrim_args='$dada_filtntrim_args'" : "",
+ "dada_sample_inference='$params.sample_inference'",
+ dada_err_svgs && meta.run.size() == 1 && meta.single_end ?
+ "dada_err_path='$dada_err_svgs',dada_err_run='"+meta.run+"'" :
+ dada_err_svgs ? "dada_err_path='"+dada_err_svgs.join(',')+"',dada_err_run='"+meta.run.join(',')+"'" : "",
+ dada_asv_table ? "asv_table_path='$dada_asv_table'" : "",
+ dada_asv_fa ? "path_asv_fa='$dada_asv_fa'": "",
+ dada_tab ? "path_dada2_tab='$dada_tab'" : "",
+ dada_stats ? "dada_stats_path='$dada_stats'" : "",
+ params.skip_barrnap ? "" : "path_barrnap_sum='$barrnap_summary'",
+ filter_ssu_stats ? "filter_ssu_stats='$filter_ssu_stats',filter_ssu_asv='$filter_ssu_asv',filter_ssu='$params.filter_ssu'" : "",
+ filter_len_asv_stats ? "filter_len_asv='$filter_len_asv_stats'" : "",
+ filter_len_asv_len_orig ? "filter_len_asv_len_orig='$filter_len_asv_len_orig'" : "",
+ params.min_len_asv ? "min_len_asv=$params.min_len_asv" : "min_len_asv=0",
+ params.max_len_asv ? "max_len_asv=$params.max_len_asv" : "max_len_asv=0",
+ filter_codons_stats ? "filter_codons='$filter_codons_stats',stop_codons='$params.stop_codons'" : "",
+ itsx_cutasv_summary ? "itsx_cutasv_summary='$itsx_cutasv_summary',cut_its='$params.cut_its'" : "",
+ !dada2_tax ? "" :
+ params.dada_ref_tax_custom ? "dada2_taxonomy='$dada2_tax',flag_ref_tax_user=TRUE" :
+ "dada2_taxonomy='$dada2_tax',dada2_ref_tax_title='${params.dada_ref_databases[params.dada_ref_taxonomy]["title"]}',dada2_ref_tax_file='${params.dada_ref_databases[params.dada_ref_taxonomy]["file"]}',dada2_ref_tax_citation='${params.dada_ref_databases[params.dada_ref_taxonomy]["citation"]}'",
+ cut_dada_ref_taxonomy ? "cut_dada_ref_taxonomy='$cut_dada_ref_taxonomy'" : "",
+ sintax_tax ? "sintax_taxonomy='$sintax_tax',sintax_ref_tax_title='${params.sintax_ref_databases[params.sintax_ref_taxonomy]["title"]}',sintax_ref_tax_file='${params.sintax_ref_databases[params.sintax_ref_taxonomy]["file"]}',sintax_ref_tax_citation='${params.sintax_ref_databases[params.sintax_ref_taxonomy]["citation"]}'" : "",
+ pplace_tax ? "pplace_taxonomy='$pplace_tax',pplace_heattree='$pplace_heattree'" : "",
+ qiime2_tax ? "qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : "",
+ run_qiime2 ? "val_used_taxonomy='$val_used_taxonomy'" : "",
+ filter_stats_tsv ? "filter_stats_tsv='$filter_stats_tsv',qiime2_filtertaxa='$qiime2_filtertaxa',exclude_taxa='$params.exclude_taxa',min_frequency='$params.min_frequency',min_samples='$params.min_samples'" : "",
+ barplot ? "barplot=TRUE" : "",
+ barplot && params.metadata_category_barplot ? "metadata_category_barplot='$params.metadata_category_barplot'" : "",
+ abundance_tables ? "abundance_tables=TRUE" : "",
+ alpha_rarefaction ? "alpha_rarefaction=TRUE" : "",
+ diversity_indices ? "diversity_indices_depth='$diversity_indices',diversity_indices_beta='"+ diversity_indices_beta.join(",") +"'" : "",
+ diversity_indices_adonis ? "diversity_indices_adonis='"+ diversity_indices_adonis.join(",") +"',qiime_adonis_formula='$params.qiime_adonis_formula'" : "",
+ ancom ? "ancom='"+ ancom.join(",") +"'" : "",
+ ]
+ // groovy list to R named list string; findAll removes empty entries
+ params_list_named_string = params_list_named.findAll().join(',').trim()
+ """
+ #!/usr/bin/env Rscript
+ library(rmarkdown)
+
+ # Work around https://github.com/rstudio/rmarkdown/issues/1508
+ # If the symbolic link is not replaced by a physical file
+ # output- and temporary files will be written to the original directory.
+ file.copy("./${report_template}", "./template.Rmd", overwrite = TRUE)
+
+ rmarkdown::render("template.Rmd", output_file = "summary_report.html", params = list($params_list_named_string), envir = new.env())
+
+ writeLines(c("\\"${task.process}\\":",
+ paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")),
+ paste0(" rmarkdown: ", packageVersion("rmarkdown")),
+ paste0(" knitr: ", packageVersion("knitr")) ),
+ "versions.yml")
+ """
+}
diff --git a/nextflow.config b/nextflow.config
index 9f28feed..ed052347 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -70,6 +70,13 @@ params {
diversity_rarefaction_depth = 500
ancom_sample_min_count = 1
+ // Report options
+ report_template = "${projectDir}/assets/report_template.Rmd"
+ report_css = "${projectDir}/assets/nf-core_style.css"
+ report_logo = "${projectDir}/assets/nf-core-ampliseq_logo_light_long.png"
+ report_title = "Summary of analysis results"
+ report_abstract = null
+
// Skipping options
skip_cutadapt = false
skip_dada_quality = false
@@ -86,6 +93,7 @@ params {
skip_diversity_indices = false
skip_ancom = false
skip_multiqc = false
+ skip_report = false
// Database options
dada_ref_taxonomy = "silva=138"
@@ -105,7 +113,6 @@ params {
// Boilerplate options
outdir = null
- tracedir = "${params.outdir}/pipeline_info"
publish_dir_mode = 'copy'
email = null
email_on_fail = null
@@ -114,18 +121,14 @@ params {
hook_url = null
help = false
version = false
- validate_params = true
- show_hidden_params = false
- schema_ignore_params = 'dada_ref_databases,qiime_ref_databases,sintax_ref_databases,igenomes_base'
-
// Config options
+ config_profile_name = null
+ config_profile_description = null
custom_config_version = 'master'
custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
- config_profile_description = null
config_profile_contact = null
config_profile_url = null
- config_profile_name = null
// Max resource options
@@ -134,6 +137,13 @@ params {
max_cpus = 16
max_time = '240.h'
+ // Schema validation default options
+ validationFailUnrecognisedParams = false
+ validationLenientMode = false
+ validationSchemaIgnoreParams = 'dada_ref_databases,qiime_ref_databases,sintax_ref_databases,igenomes_base'
+ validationShowHiddenParams = false
+ validate_params = true
+
}
// Load base.config by default for all pipelines
@@ -153,13 +163,11 @@ try {
// } catch (Exception e) {
// System.err.println("WARNING: Could not load nf-core/config/ampliseq profiles: ${params.custom_config_base}/pipeline/ampliseq.config")
// }
-
-
profiles {
debug {
dumpHashes = true
process.beforeScript = 'echo $HOSTNAME'
- cleanup = false
+ cleanup = false
}
conda {
conda.enabled = true
@@ -258,6 +266,18 @@ profiles {
test_sintax { includeConfig 'conf/test_sintax.config' }
}
+// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile
+// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled
+// Set to your registry if you have a mirror of containers
+apptainer.registry = 'quay.io'
+docker.registry = 'quay.io'
+podman.registry = 'quay.io'
+singularity.registry = 'quay.io'
+
+// Nextflow plugins
+plugins {
+ id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet
+}
// Export these variables to prevent local Python/R libraries from conflicting with those in the container
// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
@@ -273,29 +293,22 @@ env {
// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']
-// Set default registry for Docker, Singularity and Podman independent of -profile
-// Will not be used unless Docker, Singularity and Podman are enabled
-// Set to your registry if you have a mirror of containers
-docker.registry = 'quay.io'
-podman.registry = 'quay.io'
-singularity.registry = 'quay.io'
-
def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
timeline {
enabled = true
- file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html"
+ file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html"
}
report {
enabled = true
- file = "${params.tracedir}/execution_report_${trace_timestamp}.html"
+ file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html"
}
trace {
enabled = true
- file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt"
+ file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt"
}
dag {
enabled = true
- file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html"
+ file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html"
}
manifest {
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 7d733cf8..e0055c05 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -41,6 +41,13 @@
"format": "directory-path",
"description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
"fa_icon": "fas fa-folder-open"
+ },
+ "email": {
+ "type": "string",
+ "description": "Email address for completion summary.",
+ "fa_icon": "fas fa-envelope",
+ "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.",
+ "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
}
},
"required": ["input", "outdir"],
@@ -496,6 +503,39 @@
}
}
},
+ "pipeline_report": {
+ "title": "Pipeline summary report",
+ "type": "object",
+ "description": "",
+ "default": "",
+ "properties": {
+ "report_template": {
+ "type": "string",
+ "default": "${projectDir}/assets/report_template.Rmd",
+ "description": "Path to Markdown file (Rmd)"
+ },
+ "report_css": {
+ "type": "string",
+ "default": "${projectDir}/assets/nf-core_style.css",
+ "description": "Path to style file (css)"
+ },
+ "report_logo": {
+ "type": "string",
+ "default": "${projectDir}/assets/nf-core-ampliseq_logo_light_long.png",
+ "description": "Path to logo file (png)"
+ },
+ "report_title": {
+ "type": "string",
+ "default": "Summary of analysis results",
+ "description": "String used as report title"
+ },
+ "report_abstract": {
+ "type": "string",
+ "default": null,
+ "description": "Path to Markdown file (md) that replaces the 'Abstract' section"
+ }
+ }
+ },
"skipping_specific_steps": {
"title": "Skipping specific steps",
"type": "object",
@@ -557,6 +597,10 @@
"skip_multiqc": {
"type": "boolean",
"description": "Skip MultiQC reporting"
+ },
+ "skip_report": {
+ "type": "boolean",
+ "description": "Skip Markdown summary report"
}
}
},
@@ -572,29 +616,18 @@
"default": 100,
"description": "Specifies the random seed."
},
- "email": {
- "type": "string",
- "description": "Email address for completion summary.",
- "fa_icon": "fas fa-envelope",
- "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.",
- "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
- },
- "show_hidden_params": {
- "type": "boolean",
- "fa_icon": "far fa-eye-slash",
- "description": "Show all params when using `--help`",
- "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters."
- },
"help": {
"type": "boolean",
"description": "Display help text.",
"fa_icon": "fas fa-question-circle",
+ "default": false,
"hidden": true
},
"version": {
"type": "boolean",
"description": "Display version and exit.",
"fa_icon": "fas fa-question-circle",
+ "default": false,
"hidden": true
},
"publish_dir_mode": {
@@ -618,6 +651,7 @@
"type": "boolean",
"description": "Send plain-text email instead of HTML.",
"fa_icon": "fas fa-remove-format",
+ "default": false,
"hidden": true
},
"max_multiqc_email_size": {
@@ -632,6 +666,7 @@
"type": "boolean",
"description": "Do not use coloured log outputs.",
"fa_icon": "fas fa-palette",
+ "default": false,
"hidden": true
},
"hook_url": {
@@ -643,6 +678,7 @@
},
"multiqc_config": {
"type": "string",
+ "format": "file-path",
"description": "Custom config file to supply to MultiQC.",
"fa_icon": "fas fa-cog",
"hidden": true
@@ -658,19 +694,36 @@
"description": "Custom MultiQC yaml file containing HTML including a methods description.",
"fa_icon": "fas fa-cog"
},
- "tracedir": {
- "type": "string",
- "description": "Directory to keep pipeline Nextflow logs and reports.",
- "default": "${params.outdir}/pipeline_info",
- "fa_icon": "fas fa-cogs",
- "hidden": true
- },
"validate_params": {
"type": "boolean",
"description": "Boolean whether to validate parameters against the schema at runtime",
"default": true,
"fa_icon": "fas fa-check-square",
"hidden": true
+ },
+ "validationShowHiddenParams": {
+ "type": "boolean",
+ "fa_icon": "far fa-eye-slash",
+ "description": "Show all params when using `--help`",
+ "default": false,
+ "hidden": true,
+ "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters."
+ },
+ "validationFailUnrecognisedParams": {
+ "type": "boolean",
+ "fa_icon": "far fa-check-circle",
+ "description": "Validation of parameters fails when an unrecognised parameter is found.",
+ "default": false,
+ "hidden": true,
+ "help_text": "By default, when an unrecognised parameter is found, it returns a warning."
+ },
+ "validationLenientMode": {
+ "type": "boolean",
+ "fa_icon": "far fa-check-circle",
+ "description": "Validation of parameters in lenient more.",
+ "default": false,
+ "hidden": true,
+ "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)."
}
}
},
@@ -683,7 +736,7 @@
"properties": {
"max_cpus": {
"type": "integer",
- "description": "Maximum number of CPUs that can be requested for any single job.",
+ "description": "Maximum number of CPUs that can be requested for any single job.",
"default": 16,
"fa_icon": "fas fa-microchip",
"help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`"
@@ -786,6 +839,9 @@
{
"$ref": "#/definitions/downstream_analysis"
},
+ {
+ "$ref": "#/definitions/pipeline_report"
+ },
{
"$ref": "#/definitions/skipping_specific_steps"
},
diff --git a/subworkflows/local/dada2_preprocessing.nf b/subworkflows/local/dada2_preprocessing.nf
index fb2b44f3..12412c4a 100644
--- a/subworkflows/local/dada2_preprocessing.nf
+++ b/subworkflows/local/dada2_preprocessing.nf
@@ -41,10 +41,12 @@ workflow DADA2_PREPROCESSING {
.set { ch_all_trimmed_reads }
}
+ ch_DADA2_QUALITY1_SVG = Channel.empty()
if ( !params.skip_dada_quality ) {
DADA2_QUALITY1 ( ch_all_trimmed_reads.dump(tag: 'into_dada2_quality') )
ch_versions_dada2_preprocessing = ch_versions_dada2_preprocessing.mix(DADA2_QUALITY1.out.versions)
DADA2_QUALITY1.out.warning.subscribe { if ( it.baseName.toString().startsWith("WARNING") ) log.warn it.baseName.toString().replace("WARNING ","DADA2_QUALITY1: ") }
+ ch_DADA2_QUALITY1_SVG = DADA2_QUALITY1.out.svg
}
//find truncation values in case they are not supplied
@@ -94,9 +96,12 @@ workflow DADA2_PREPROCESSING {
.mix ( ch_all_preprocessed_rv )
.set { ch_all_preprocessed_reads }
}
+
+ ch_DADA2_QUALITY2_SVG = Channel.empty()
if ( !params.skip_dada_quality ) {
DADA2_QUALITY2 ( ch_all_preprocessed_reads.dump(tag: 'into_dada2_quality2') )
DADA2_QUALITY2.out.warning.subscribe { if ( it.baseName.toString().startsWith("WARNING") ) log.warn it.baseName.toString().replace("WARNING ","DADA2_QUALITY2: ") }
+ ch_DADA2_QUALITY2_SVG = DADA2_QUALITY2.out.svg
}
//group by sequencing run
@@ -118,7 +123,10 @@ workflow DADA2_PREPROCESSING {
.set { ch_filt_reads }
emit:
- reads = ch_filt_reads
- logs = DADA2_FILTNTRIM.out.log
- versions = ch_versions_dada2_preprocessing
+ reads = ch_filt_reads
+ logs = DADA2_FILTNTRIM.out.log
+ args = DADA2_FILTNTRIM.out.args
+ qc_svg = ch_DADA2_QUALITY1_SVG.collect()
+ qc_svg_preprocessed = ch_DADA2_QUALITY2_SVG.collect()
+ versions = ch_versions_dada2_preprocessing
}
diff --git a/subworkflows/local/dada2_taxonomy_wf.nf b/subworkflows/local/dada2_taxonomy_wf.nf
index c5259e6c..9673b45e 100644
--- a/subworkflows/local/dada2_taxonomy_wf.nf
+++ b/subworkflows/local/dada2_taxonomy_wf.nf
@@ -104,6 +104,7 @@ workflow DADA2_TAXONOMY_WF {
}
emit:
+ cut_tax = params.cut_dada_ref_taxonomy ? CUTADAPT_TAXONOMY.out.log : [[],[]]
tax = ch_dada2_tax
versions = ch_versions_dada_taxonomy
}
diff --git a/subworkflows/local/phyloseq_workflow.nf b/subworkflows/local/phyloseq_workflow.nf
new file mode 100644
index 00000000..adf208b7
--- /dev/null
+++ b/subworkflows/local/phyloseq_workflow.nf
@@ -0,0 +1,44 @@
+/*
+ * Create phyloseq objects
+ */
+
+include { PHYLOSEQ } from '../../modules/local/phyloseq'
+include { PHYLOSEQ_INASV } from '../../modules/local/phyloseq_inasv'
+
+workflow PHYLOSEQ_WORKFLOW {
+ take:
+ ch_tax
+ ch_tsv
+ ch_meta
+ ch_tree
+ run_qiime2
+
+ main:
+ if ( params.metadata ) {
+ ch_phyloseq_inmeta = ch_meta.first() // The .first() is to make sure it's a value channel
+ } else {
+ ch_phyloseq_inmeta = []
+ }
+
+ if ( params.pplace_tree ) {
+ ch_phyloseq_intree = ch_tree.map { it = it[1] }.first()
+ } else {
+ ch_phyloseq_intree = []
+ }
+
+ if ( run_qiime2 ) {
+ if ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) {
+ ch_phyloseq_inasv = PHYLOSEQ_INASV ( ch_tsv ).tsv
+ } else {
+ ch_phyloseq_inasv = ch_tsv
+ }
+ } else {
+ ch_phyloseq_inasv = ch_tsv
+ }
+
+ PHYLOSEQ ( ch_tax, ch_phyloseq_inasv, ch_phyloseq_inmeta, ch_phyloseq_intree )
+
+ emit:
+ rds = PHYLOSEQ.out.rds
+ versions= PHYLOSEQ.out.versions
+}
diff --git a/subworkflows/local/qiime2_ancom.nf b/subworkflows/local/qiime2_ancom.nf
index af83733d..ce308d78 100644
--- a/subworkflows/local/qiime2_ancom.nf
+++ b/subworkflows/local/qiime2_ancom.nf
@@ -34,4 +34,7 @@ workflow QIIME2_ANCOM {
QIIME2_ANCOM_TAX.out.ancom.subscribe { if ( it.baseName[0].toString().startsWith("WARNING") ) log.warn it.baseName[0].toString().replace("WARNING ","QIIME2_ANCOM_TAX: ") }
QIIME2_ANCOM_ASV ( ch_metadata.combine( QIIME2_FILTERSAMPLES_ANCOM.out.qza.flatten() ) )
+
+ emit:
+ ancom = QIIME2_ANCOM_ASV.out.ancom.mix(QIIME2_ANCOM_TAX.out.ancom)
}
diff --git a/subworkflows/local/qiime2_diversity.nf b/subworkflows/local/qiime2_diversity.nf
index b3d7f64b..02f0d91e 100644
--- a/subworkflows/local/qiime2_diversity.nf
+++ b/subworkflows/local/qiime2_diversity.nf
@@ -71,4 +71,11 @@ workflow QIIME2_DIVERSITY {
.set{ ch_to_diversity_betaord }
QIIME2_DIVERSITY_BETAORD ( ch_to_diversity_betaord )
}
+
+ emit:
+ depth = !skip_diversity_indices ? QIIME2_DIVERSITY_CORE.out.depth : []
+ alpha = !skip_diversity_indices ? QIIME2_DIVERSITY_ALPHA.out.alpha : []
+ beta = !skip_diversity_indices ? QIIME2_DIVERSITY_BETA.out.beta : []
+ betaord = !skip_diversity_indices ? QIIME2_DIVERSITY_BETAORD.out.beta : []
+ adonis = !skip_diversity_indices && params.qiime_adonis_formula ? QIIME2_DIVERSITY_ADONIS.out.html : []
}
diff --git a/tests/pipeline/doubleprimers.nf.test b/tests/pipeline/doubleprimers.nf.test
index cd810025..5d641077 100644
--- a/tests/pipeline/doubleprimers.nf.test
+++ b/tests/pipeline/doubleprimers.nf.test
@@ -29,11 +29,10 @@ nextflow_pipeline {
path("$outputDir/dada2/DADA2_stats.tsv"),
path("$outputDir/dada2/DADA2_table.rds"),
path("$outputDir/dada2/DADA2_table.tsv")).match("dada2") },
- { assert new File("$outputDir/qiime2/input/rep-seqs.qza").exists() },
- { assert new File("$outputDir/qiime2/input/table.qza").exists() },
{ assert snapshot(path("$outputDir/input/Samplesheet_double_primer.tsv")).match("input") },
{ assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"),
- path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }
+ path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") },
+ { assert new File("$outputDir/summary_report/summary_report.html").exists() }
)
}
}
diff --git a/tests/pipeline/doubleprimers.nf.test.snap b/tests/pipeline/doubleprimers.nf.test.snap
index 64ddaa21..cefcf1b9 100644
--- a/tests/pipeline/doubleprimers.nf.test.snap
+++ b/tests/pipeline/doubleprimers.nf.test.snap
@@ -13,9 +13,9 @@
},
"software_versions": {
"content": [
- "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}"
+ "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}"
],
- "timestamp": "2023-05-28T21:08:54+0000"
+ "timestamp": "2023-07-27T13:49:03+0000"
},
"overall_summary_tsv": {
"content": [
diff --git a/tests/pipeline/fasta.nf.test b/tests/pipeline/fasta.nf.test
index 9daca857..8db0826b 100644
--- a/tests/pipeline/fasta.nf.test
+++ b/tests/pipeline/fasta.nf.test
@@ -25,7 +25,8 @@ nextflow_pipeline {
{ assert snapshot(path("$outputDir/dada2/ref_taxonomy.rdp_18.txt")).match("dada2") },
{ assert new File("$outputDir/dada2/ASV_tax_species.rdp_18.tsv").exists() },
{ assert new File("$outputDir/dada2/ASV_tax.rdp_18.tsv").exists() },
- { assert snapshot(path("$outputDir/input/ASV_seqs.fasta")).match("input") }
+ { assert snapshot(path("$outputDir/input/ASV_seqs.fasta")).match("input") },
+ { assert new File("$outputDir/summary_report/summary_report.html").exists() }
)
}
}
diff --git a/tests/pipeline/iontorrent.nf.test b/tests/pipeline/iontorrent.nf.test
index 9b73af86..200a9825 100644
--- a/tests/pipeline/iontorrent.nf.test
+++ b/tests/pipeline/iontorrent.nf.test
@@ -38,7 +38,9 @@ nextflow_pipeline {
{ assert snapshot(path("$outputDir/input/Samplesheet_it_SE_ITS.tsv")).match("input") },
{ assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"),
path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"),
- path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }
+ path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") },
+ { assert new File("$outputDir/summary_report/summary_report.html").exists() },
+ { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }
)
}
}
diff --git a/tests/pipeline/iontorrent.nf.test.snap b/tests/pipeline/iontorrent.nf.test.snap
index c9c8f4bb..c7fbfb89 100644
--- a/tests/pipeline/iontorrent.nf.test.snap
+++ b/tests/pipeline/iontorrent.nf.test.snap
@@ -13,7 +13,7 @@
},
"software_versions": {
"content": [
- "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}"
+ "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}"
],
"timestamp": "2023-06-20T01:42:35+0000"
},
diff --git a/tests/pipeline/multi.nf.test b/tests/pipeline/multi.nf.test
index e4fe28a0..3e01ff20 100644
--- a/tests/pipeline/multi.nf.test
+++ b/tests/pipeline/multi.nf.test
@@ -63,7 +63,9 @@ nextflow_pipeline {
{ assert new File("$outputDir/qiime2/representative_sequences/filtered-sequences.qza").exists() },
{ assert new File("$outputDir/qiime2/representative_sequences/rep-seq.fasta").exists() },
{ assert snapshot(path("$outputDir/qiime2/representative_sequences/descriptive_stats.tsv"),
- path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") }
+ path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") },
+ { assert new File("$outputDir/summary_report/summary_report.html").exists() },
+ { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }
)
}
}
diff --git a/tests/pipeline/multi.nf.test.snap b/tests/pipeline/multi.nf.test.snap
index 2f0095ac..25b1437c 100644
--- a/tests/pipeline/multi.nf.test.snap
+++ b/tests/pipeline/multi.nf.test.snap
@@ -14,7 +14,7 @@
},
"software_versions": {
"content": [
- "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}"
+ "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}"
],
"timestamp": "2023-05-28T21:15:03+0000"
},
diff --git a/tests/pipeline/novaseq.nf.test b/tests/pipeline/novaseq.nf.test
index a2101d3d..a346898d 100644
--- a/tests/pipeline/novaseq.nf.test
+++ b/tests/pipeline/novaseq.nf.test
@@ -28,7 +28,8 @@ nextflow_pipeline {
{ assert new File("$outputDir/fastqc/S2_2_fastqc.html").exists() },
{ assert snapshot(path("$outputDir/input/Samplesheet_novaseq.tsv")).match("input") },
{ assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"),
- path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt")).match("multiqc") }
+ path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt")).match("multiqc") },
+ { assert new File("$outputDir/summary_report/summary_report.html").exists() }
)
}
}
diff --git a/tests/pipeline/pacbio_its.nf.test b/tests/pipeline/pacbio_its.nf.test
index 39e1d2a2..ffe4b31c 100644
--- a/tests/pipeline/pacbio_its.nf.test
+++ b/tests/pipeline/pacbio_its.nf.test
@@ -52,7 +52,9 @@ nextflow_pipeline {
path("$outputDir/SBDI/emof.tsv"),
path("$outputDir/SBDI/event.tsv")).match("SBDI") },
{ assert new File("$outputDir/SBDI/annotation.tsv").exists() },
- { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }
+ { assert new File("$outputDir/SBDI/asv-table.tsv").exists() },
+ { assert new File("$outputDir/summary_report/summary_report.html").exists() },
+ { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }
)
}
}
diff --git a/tests/pipeline/pacbio_its.nf.test.snap b/tests/pipeline/pacbio_its.nf.test.snap
index 3c860a89..775e5195 100644
--- a/tests/pipeline/pacbio_its.nf.test.snap
+++ b/tests/pipeline/pacbio_its.nf.test.snap
@@ -35,7 +35,7 @@
},
"software_versions": {
"content": [
- "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.0dev}}"
+ "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.0dev}}"
],
"timestamp": "2023-06-20T02:07:02+0000"
},
diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test
index b78c479b..564cf2b9 100644
--- a/tests/pipeline/pplace.nf.test
+++ b/tests/pipeline/pplace.nf.test
@@ -55,7 +55,9 @@ nextflow_pipeline {
{ assert new File("$outputDir/pplace/test_pplace.taxonomy.per_query.tsv").exists() },
{ assert new File("$outputDir/pplace/test_pplace.graft.test_pplace.epa_result.newick").exists() },
{ assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"),
- path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }
+ path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") },
+ { assert new File("$outputDir/summary_report/summary_report.html").exists() },
+ { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() }
)
}
}
diff --git a/tests/pipeline/pplace.nf.test.snap b/tests/pipeline/pplace.nf.test.snap
index d0aa5f26..9ee79d29 100644
--- a/tests/pipeline/pplace.nf.test.snap
+++ b/tests/pipeline/pplace.nf.test.snap
@@ -8,7 +8,7 @@
},
"software_versions": {
"content": [
- "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}"
+ "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}"
],
"timestamp": "2023-06-20T17:24:03+0000"
},
diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test
index 42e0d104..9183b126 100644
--- a/tests/pipeline/reftaxcustom.nf.test
+++ b/tests/pipeline/reftaxcustom.nf.test
@@ -43,7 +43,9 @@ nextflow_pipeline {
{ assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") },
{ assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"),
path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"),
- path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }
+ path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") },
+ { assert new File("$outputDir/summary_report/summary_report.html").exists() },
+ { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }
)
}
}
diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap
index 6407a3bf..7b33f261 100644
--- a/tests/pipeline/reftaxcustom.nf.test.snap
+++ b/tests/pipeline/reftaxcustom.nf.test.snap
@@ -13,7 +13,7 @@
},
"software_versions": {
"content": [
- "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}"
+ "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}"
],
"timestamp": "2023-05-28T21:18:54+0000"
},
diff --git a/tests/pipeline/single.nf.test b/tests/pipeline/single.nf.test
index be236c9a..02d54e9e 100644
--- a/tests/pipeline/single.nf.test
+++ b/tests/pipeline/single.nf.test
@@ -44,7 +44,9 @@ nextflow_pipeline {
{ assert snapshot(path("$outputDir/input/Samplesheet_single_end.tsv")).match("input") },
{ assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"),
path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"),
- path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }
+ path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") },
+ { assert new File("$outputDir/summary_report/summary_report.html").exists() },
+ { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }
)
}
}
diff --git a/tests/pipeline/single.nf.test.snap b/tests/pipeline/single.nf.test.snap
index 49d65106..bd9096d0 100644
--- a/tests/pipeline/single.nf.test.snap
+++ b/tests/pipeline/single.nf.test.snap
@@ -13,7 +13,7 @@
},
"software_versions": {
"content": [
- "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}"
+ "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}"
],
"timestamp": "2023-05-28T20:35:33+0000"
},
diff --git a/tests/pipeline/sintax.nf.test b/tests/pipeline/sintax.nf.test
index f6de2995..f4ff3a4f 100644
--- a/tests/pipeline/sintax.nf.test
+++ b/tests/pipeline/sintax.nf.test
@@ -65,7 +65,9 @@ nextflow_pipeline {
{ assert new File("$outputDir/sintax/ASV_tax_sintax.unite-fungi.tsv").exists() },
{ assert new File("$outputDir/sintax/ref_taxonomy_sintax.txt").exists() },
{ assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"),
- path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }
+ path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") },
+ { assert new File("$outputDir/summary_report/summary_report.html").exists() },
+ { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() }
)
}
}
diff --git a/tests/pipeline/sintax.nf.test.snap b/tests/pipeline/sintax.nf.test.snap
index c9745541..5f360a4b 100644
--- a/tests/pipeline/sintax.nf.test.snap
+++ b/tests/pipeline/sintax.nf.test.snap
@@ -16,7 +16,7 @@
},
"software_versions": {
"content": [
- "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.0dev}}"
+ "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.0dev}}"
],
"timestamp": "2023-06-20T16:40:18+0000"
},
diff --git a/tests/pipeline/test.nf.test b/tests/pipeline/test.nf.test
index 7b295941..0e0e571a 100644
--- a/tests/pipeline/test.nf.test
+++ b/tests/pipeline/test.nf.test
@@ -93,7 +93,10 @@ nextflow_pipeline {
path("$outputDir/SBDI/emof.tsv"),
path("$outputDir/SBDI/event.tsv")).match("SBDI") },
{ assert new File("$outputDir/SBDI/annotation.tsv").exists() },
- { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }
+ { assert new File("$outputDir/SBDI/asv-table.tsv").exists() },
+ { assert new File("$outputDir/summary_report/summary_report.html").exists() },
+ { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() },
+ { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() }
)
}
}
diff --git a/tests/pipeline/test.nf.test.snap b/tests/pipeline/test.nf.test.snap
index fdf84093..b345de55 100644
--- a/tests/pipeline/test.nf.test.snap
+++ b/tests/pipeline/test.nf.test.snap
@@ -22,7 +22,7 @@
},
"software_versions": {
"content": [
- "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}"
+ "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}"
],
"timestamp": "2023-05-28T20:55:32+0000"
},
diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf
index 03e5bf55..5d7cdb3d 100644
--- a/workflows/ampliseq.nf
+++ b/workflows/ampliseq.nf
@@ -1,21 +1,19 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- VALIDATE INPUTS
+ PRINT PARAMS SUMMARY
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params)
+include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation'
-// Validate input parameters
-WorkflowAmpliseq.initialise(params, log)
+def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs)
+def citation = '\n' + WorkflowMain.citation(workflow) + '\n'
+def summary_params = paramsSummaryMap(workflow)
-// Check input path parameters to see if they exist
-// params.input may be: folder, samplesheet, fasta file, and therefore should not appear here (because tests only for "file")
-def checkPathParamList = [ params.multiqc_config, params.metadata, params.classifier ]
-for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
+// Print parameter summary log to screen
+log.info logo + paramsSummaryLog(workflow) + citation
-// Check mandatory parameters
-if (params.input) { ch_input = file(params.input) } else { error('Input samplesheet not specified!') }
+WorkflowAmpliseq.initialise(params, log)
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -73,6 +71,11 @@ if (params.sintax_ref_taxonomy && !params.skip_taxonomy) {
val_sintax_ref_taxonomy = "none"
}
+// report sources
+ch_report_template = Channel.fromPath("${params.report_template}", checkIfExists: true)
+ch_report_css = Channel.fromPath("${params.report_css}", checkIfExists: true)
+ch_report_logo = Channel.fromPath("${params.report_logo}", checkIfExists: true)
+ch_report_abstract = params.report_abstract ? Channel.fromPath(params.report_abstract, checkIfExists: true) : []
// Set non-params Variables
@@ -125,6 +128,9 @@ if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1)
if ( workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 ) { log.warn "Conda or mamba is enabled, any steps involving QIIME2 are not available. Use a container engine instead of conda to enable all software." }
}
+// This tracks tax tables produced during pipeline and each table will be used during phyloseq
+ch_tax_for_phyloseq = Channel.empty()
+
/*
========================================================================================
@@ -165,6 +171,9 @@ include { QIIME2_INTAX } from '../modules/local/qiime2_intax'
include { PICRUST } from '../modules/local/picrust'
include { SBDIEXPORT } from '../modules/local/sbdiexport'
include { SBDIEXPORTREANNOTATE } from '../modules/local/sbdiexportreannotate'
+include { SUMMARY_REPORT } from '../modules/local/summary_report'
+include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_PPLACE } from '../modules/local/phyloseq_intax'
+include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_QIIME2 } from '../modules/local/phyloseq_intax'
//
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
@@ -181,6 +190,7 @@ include { QIIME2_EXPORT } from '../subworkflows/local/qiime2_exp
include { QIIME2_BARPLOTAVG } from '../subworkflows/local/qiime2_barplotavg'
include { QIIME2_DIVERSITY } from '../subworkflows/local/qiime2_diversity'
include { QIIME2_ANCOM } from '../subworkflows/local/qiime2_ancom'
+include { PHYLOSEQ_WORKFLOW } from '../subworkflows/local/phyloseq_workflow'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -216,6 +226,9 @@ workflow AMPLISEQ {
//
PARSE_INPUT ( params.input, is_fasta_input, single_end, params.multiple_sequencing_runs, params.extension )
ch_reads = PARSE_INPUT.out.reads
+ // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input")
+ // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/
+ // ! There is currently no tooling to help you write a sample sheet schema
//
// MODULE: Rename files
@@ -422,6 +435,7 @@ workflow AMPLISEQ {
taxlevels
).tax.set { ch_dada2_tax }
ch_versions = ch_versions.mix(DADA2_TAXONOMY_WF.out.versions)
+ ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_dada2_tax.map { it = [ "dada2", file(it) ] } )
} else {
ch_dada2_tax = Channel.empty()
}
@@ -436,6 +450,7 @@ workflow AMPLISEQ {
sintax_taxlevels
).tax.set { ch_sintax_tax }
ch_versions = ch_versions.mix(SINTAX_TAXONOMY_WF.out.versions)
+ ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_sintax_tax.map { it = [ "sintax", file(it) ] } )
} else {
ch_sintax_tax = Channel.empty()
}
@@ -456,8 +471,8 @@ workflow AMPLISEQ {
}
FASTA_NEWICK_EPANG_GAPPA ( ch_pp_data )
ch_versions = ch_versions.mix( FASTA_NEWICK_EPANG_GAPPA.out.versions )
-
ch_pplace_tax = FORMAT_PPLACETAX ( FASTA_NEWICK_EPANG_GAPPA.out.taxonomy_per_query ).tsv
+ ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_PPLACE ( ch_pplace_tax ).tsv.map { it = [ "pplace", file(it) ] } )
} else {
ch_pplace_tax = Channel.empty()
}
@@ -477,6 +492,10 @@ workflow AMPLISEQ {
ch_qiime_classifier
)
ch_versions = ch_versions.mix( QIIME2_TAXONOMY.out.versions.ifEmpty(null) ) //usually a .first() is here, dont know why this leads here to a warning
+ ch_qiime2_tax = QIIME2_TAXONOMY.out.tsv
+ ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_QIIME2 ( ch_qiime2_tax ).tsv.map { it = [ "qiime2", file(it) ] } )
+ } else {
+ ch_qiime2_tax = Channel.empty()
}
//
@@ -495,23 +514,29 @@ workflow AMPLISEQ {
// Import taxonomic classification into QIIME2, if available
if ( params.skip_taxonomy ) {
log.info "Skip taxonomy classification"
+ val_used_taxonomy = "skipped"
ch_tax = Channel.empty()
tax_agglom_min = 1
tax_agglom_max = 2
} else if ( params.sintax_ref_taxonomy ) {
log.info "Use SINTAX taxonomy classification"
+ val_used_taxonomy = "SINTAX"
ch_tax = QIIME2_INTAX ( ch_sintax_tax ).qza
} else if ( params.pplace_tree && params.pplace_taxonomy) {
log.info "Use EPA-NG / GAPPA taxonomy classification"
+ val_used_taxonomy = "phylogenetic placement"
ch_tax = QIIME2_INTAX ( ch_pplace_tax ).qza
} else if ( params.dada_ref_taxonomy && !params.skip_dada_taxonomy ) {
log.info "Use DADA2 taxonomy classification"
+ val_used_taxonomy = "DADA2"
ch_tax = QIIME2_INTAX ( ch_dada2_tax ).qza
} else if ( params.qiime_ref_taxonomy || params.classifier ) {
log.info "Use QIIME2 taxonomy classification"
+ val_used_taxonomy = "QIIME2"
ch_tax = QIIME2_TAXONOMY.out.qza
} else {
log.info "Use no taxonomy classification"
+ val_used_taxonomy = "none"
ch_tax = Channel.empty()
tax_agglom_min = 1
tax_agglom_max = 2
@@ -540,7 +565,7 @@ workflow AMPLISEQ {
}
//Export various ASV tables
if (!params.skip_abundance_tables) {
- QIIME2_EXPORT ( ch_asv, ch_seq, ch_tax, QIIME2_TAXONOMY.out.tsv, ch_dada2_tax, ch_pplace_tax, ch_sintax_tax, tax_agglom_min, tax_agglom_max )
+ QIIME2_EXPORT ( ch_asv, ch_seq, ch_tax, ch_qiime2_tax, ch_dada2_tax, ch_pplace_tax, ch_sintax_tax, tax_agglom_min, tax_agglom_max )
}
if (!params.skip_barplot) {
@@ -597,6 +622,8 @@ workflow AMPLISEQ {
tax_agglom_max
)
}
+ } else {
+ ch_tsv = ch_dada2_asv
}
//
@@ -627,6 +654,26 @@ workflow AMPLISEQ {
ch_versions = ch_versions.mix(SBDIEXPORT.out.versions.first())
}
+ //
+ // SUBWORKFLOW: Create phyloseq objects
+ //
+ if ( !params.skip_taxonomy ) {
+ if ( params.pplace_tree ) {
+ ch_tree_for_phyloseq = FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny
+ } else {
+ ch_tree_for_phyloseq = []
+ }
+
+ PHYLOSEQ_WORKFLOW (
+ ch_tax_for_phyloseq,
+ ch_tsv,
+ ch_metadata.ifEmpty([]),
+ ch_tree_for_phyloseq,
+ run_qiime2
+ )
+ ch_versions = ch_versions.mix(PHYLOSEQ_WORKFLOW.out.versions.first())
+ }
+
CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml')
)
@@ -638,7 +685,7 @@ workflow AMPLISEQ {
workflow_summary = WorkflowAmpliseq.paramsSummaryMultiqc(workflow, summary_params)
ch_workflow_summary = Channel.value(workflow_summary)
- methods_description = WorkflowAmpliseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description)
+ methods_description = WorkflowAmpliseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params)
ch_methods_description = Channel.value(methods_description)
ch_multiqc_files = Channel.empty()
@@ -661,6 +708,71 @@ workflow AMPLISEQ {
multiqc_report = MULTIQC.out.report.toList()
}
+ //
+ // MODULE: Summary Report
+ //
+ if (!params.skip_report) {
+ SUMMARY_REPORT (
+ ch_report_template,
+ ch_report_css,
+ ch_report_logo,
+ ch_report_abstract,
+ ch_metadata.ifEmpty( [] ),
+ params.input.toString().toLowerCase().endsWith("tsv") ? file(params.input) : [], // samplesheet input
+ is_fasta_input ? PARSE_INPUT.out.fasta.ifEmpty( [] ) : [], // fasta input
+ !is_fasta_input && !params.skip_fastqc && !params.skip_multiqc ? MULTIQC.out.plots : [], //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg")
+ !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect().ifEmpty( [] ) : [],
+ find_truncation_values,
+ DADA2_PREPROCESSING.out.args.first().ifEmpty( [] ),
+ !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg.ifEmpty( [] ) : [],
+ !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg_preprocessed.ifEmpty( [] ) : [],
+ DADA2_ERR.out.svg
+ .map {
+ meta_old, svgs ->
+ def meta = [:]
+ meta.single_end = meta_old.single_end
+ [ meta, svgs, meta_old.run ] }
+ .groupTuple(by: 0 )
+ .map {
+ meta_old, svgs, runs ->
+ def meta = [:]
+ meta.single_end = meta_old.single_end
+ meta.run = runs.flatten()
+ [ meta, svgs.flatten() ]
+ }.ifEmpty( [[],[]] ),
+ DADA2_MERGE.out.asv.ifEmpty( [] ),
+ ch_unfiltered_fasta.ifEmpty( [] ), // this is identical to DADA2_MERGE.out.fasta if !is_fasta_input
+ DADA2_MERGE.out.dada2asv.ifEmpty( [] ),
+ DADA2_MERGE.out.dada2stats.ifEmpty( [] ),
+ !params.skip_barrnap ? BARRNAPSUMMARY.out.summary.ifEmpty( [] ) : [],
+ params.filter_ssu ? FILTER_SSU.out.stats.ifEmpty( [] ) : [],
+ params.filter_ssu ? FILTER_SSU.out.asv.ifEmpty( [] ) : [],
+ params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.stats.ifEmpty( [] ) : [],
+ params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.len_orig.ifEmpty( [] ) : [],
+ params.filter_codons ? FILTER_CODONS.out.stats.ifEmpty( [] ) : [],
+ params.cut_its != "none" ? ITSX_CUTASV.out.summary.ifEmpty( [] ) : [],
+ !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? ch_dada2_tax.ifEmpty( [] ) : [],
+ !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? DADA2_TAXONOMY_WF.out.cut_tax.ifEmpty( [[],[]] ) : [[],[]],
+ !params.skip_taxonomy && params.sintax_ref_taxonomy ? ch_sintax_tax.ifEmpty( [] ) : [],
+ !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax.ifEmpty( [] ) : [],
+ !params.skip_taxonomy && params.pplace_tree ? FASTA_NEWICK_EPANG_GAPPA.out.heattree.ifEmpty( [[],[]] ) : [[],[]],
+ !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [],
+ run_qiime2,
+ run_qiime2 ? val_used_taxonomy : "",
+ run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? ch_dada2_asv.countLines()+","+QIIME2_FILTERTAXA.out.tsv.countLines() : "",
+ run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? FILTER_STATS.out.tsv.ifEmpty( [] ) : [],
+ run_qiime2 && !params.skip_barplot ? QIIME2_BARPLOT.out.folder.ifEmpty( [] ) : [],
+ run_qiime2 && !params.skip_abundance_tables ? "done" : "",
+ run_qiime2 && !params.skip_alpha_rarefaction && params.metadata ? "done" : "",
+ run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.depth.ifEmpty( [] ) : [],
+ run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.beta.collect().ifEmpty( [] ) : [],
+ run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.adonis.collect().ifEmpty( [] ) : [],
+ run_qiime2 && !params.skip_ancom && params.metadata ? QIIME2_ANCOM.out.ancom.collect().ifEmpty( [] ) : [],
+ params.picrust ? PICRUST.out.pathways.ifEmpty( [] ) : []
+ )
+ ch_versions = ch_versions.mix(SUMMARY_REPORT.out.versions)
+ }
+
//Save input in results folder
input = file(params.input)
if ( is_fasta_input || input.toString().toLowerCase().endsWith("tsv") ) {