phac-nml · kylacochrane · Apr 3, 2024 · Mar 25, 2024 · Mar 25, 2024 · Mar 25, 2024
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,4 +1,4 @@
-name: nf-core CI
+name: Pipeline CI
 # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors
 on:
   push:
@@ -35,12 +35,28 @@ jobs:
         with:
           version: "${{ matrix.NXF_VER }}"
 
+      - name: Cache nf-test installation
+        id: cache-software
+        uses: actions/cache@v3
+        with:
+          path: |
+            /usr/local/bin/nf-test
+            /home/runner/.nf-test/nf-test.jar
+          key: ${{ runner.os }}-${{ env.NFTEST_VER }}-nftest
+
+      - name: Install nf-test
+        if: steps.cache-software.outputs.cache-hit != 'true'
+        run: |
+          wget -qO- https://code.askimed.com/install/nf-test | bash
+          sudo mv nf-test /usr/local/bin/
+
       - name: Disk space cleanup
         uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
 
-      - name: Run pipeline with test data
-        # TODO nf-core: You can customise CI pipeline run tests as required
-        # For example: adding multiple test runs with different parameters
-        # Remember that you can parallelise this by using strategy.matrix
+      - name: Run nf-test
+        run: |
+          nf-test test
+
+      - name: Nextflow run with test profile
         run: |
           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
diff --git a/README.md b/README.md
@@ -16,35 +16,49 @@ The structure of this file is defined in [assets/schema_input.json](assets/schem
 
 # Parameters
 
-The main parameters are `--input` as defined above and `--output` for specifying the output results directory.
+## Mandatory
 
-Additionally, you may wish to provide:
+The mandatory parameters are as follows:
 
-`-profile singularity` to specify the use of singularity containers
+- `--input` : a URI to the samplesheet as specified in the [Input](#input) section.
+- `--output` : to specify the output results directory.
+- `--kraken2_db /path/to/kraken2database` : to specify the directory to the Kraken2 database
+- `--bracken_db /path/to/brackendatabase` : to specify the directory to the Bracken database
 
-`-r [branch]` to specify which GitHub branch you would like to run
+## Optional
 
-`--kraken2_db /path/to/kraken2database`
+Additionally, you may wish to provide:
 
-`--bracken_db /path/to/brackendatabase`
+- `-profile` : to specify which profile to use (ex: `-profile singularity`)
+- `-r [branch]` : to specify which GitHub branch you would like to use
+- `-taxonomic_level` : to specify the taxonomic level of the bracken abundance estimation.
+  - Must be one of 'S'(species)(default), 'G'(genus), 'O'(order), 'F'(family), 'P'(phylum), or 'K'(kingdom)
 
 Other parameters (defaults from nf-core) are defined in [nextflow_schema.json](nextflow_schmea.json).
 
 # Running
 
-To run the pipeline, please do:
+## Test Data
+
+To run the pipeline, please run:
 
 ```bash
-nextflow run phac-nml/speciesabundance -profile singularity -r dev -latest --input /path/to/samplesheet.csv --outdir results
+nextflow run phac-nml/speciesabundance -profile singularity -r dev -latest --input /path/to/samplesheet.csv --kraken2_db /path/to/kraken2_db -- bracken_db /path/to/bracken_db --outdir results
 ```
 
-Where the `samplesheet.csv` is structured as specified in the [Input](#input) section.
+The pipeline output will be written to a directory named `results`. A JSON file for integrating with IRIDA Next will be written to `results/iridanext.output.json.gz` (as detailed in the [Output](#output) section)
+
+To run the pipeline using the test profile, please run:
+
+```bash
+nextflow run phac-nml/speciesabundance -profile docker,test -r dev -latest --outdir results
+```
 
-# Output (in development)
+# Output
 
 A JSON file for loading metadata into IRIDA Next is output by this pipeline. The format of this JSON file is specified in our [Pipeline Standards for the IRIDA Next JSON](https://github.com/phac-nml/pipeline-standards#32-irida-next-json). This JSON file is written directly within the `--outdir` provided to the pipeline with the name `iridanext.output.json.gz` (ex: `[outdir]/iridanext.output.json.gz`).
 
-An example of the what the contents of the IRIDA Next JSON file looks like for this particular pipeline is as follows:
+(In-development) An example of the what the contents of the IRIDA Next JSON file looks like for this particular pipeline is as follows:
 
 ```
 {
@@ -82,14 +96,6 @@ An example of the what the contents of the IRIDA Next JSON file looks like for t
 
 Within the `files` section of this JSON file, all of the output paths are relative to the `outdir`. Therefore, `"path": "adjust/SAMPLE1_S_bracken_abundances.csv"` refers to a file located within `outdir/adjust/SAMPLE1_S_bracken_abundances.csv`.
 
-## Test profile
-
-To run with the test profile, please do:
-
-```bash
-nextflow run phac-nml/speciesabundance -profile docker,test -r dev -latest --outdir results
-```
-
 # Legal
 
 Copyright 2023 Government of Canada

diff --git a/conf/test.config b/conf/test.config
@@ -21,4 +21,6 @@ params {
 
     // Input data
     input  = 'https://raw.githubusercontent.com/phac-nml/speciesabundance/dev/assets/samplesheet.csv'
+    kraken2_db = "${projectDir}/tests/data/kraken2database"
+    bracken_db = "${projectDir}/tests/data/brackendatabase"
 }
diff --git a/nextflow.config b/nextflow.config
@@ -13,8 +13,8 @@ params {
     input                      = null
 
     //Pipeline parameters
-    kraken2_db                 = "${projectDir}/tests/data/minidb/"
-    bracken_db                 = "${projectDir}/tests/data/minidb/"
+    kraken2_db                 = null
+    bracken_db                 = null
     taxonomic_level            = 'S'
 
     // Boilerplate options

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -29,6 +29,22 @@
                     "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
                     "fa_icon": "fas fa-folder-open"
                 },
+                "email": {
+                    "type": "string",
+                    "description": "Email address for completion summary.",
+                    "fa_icon": "fas fa-envelope",
+                    "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.",
+                    "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
+                }
+            }
+        },
+        "databases": {
+            "title": "Databases",
+            "type": "object",
+            "description": "The Kraken2 and Bracken databases required for analysis.",
+            "fa_icon": "fas fa-terminal",
+            "default": "",
+            "properties": {
                 "kraken2_db": {
                     "type": "string",
                     "pattern": "^\\S+$",
@@ -40,20 +56,22 @@
                     "pattern": "^\\S+$",
                     "format": "directory-path",
                     "description": "Path to Bracken database (do not use symlinks)"
-                },
+                }
+            }
+        },
+        "speciesabundance_pipeline_parameters": {
+            "title": "SpeciesAbundance Pipeline Parameters",
+            "type": "object",
+            "description": "Options to configure specific SpeciesAbundance pipeline behaviour",
+            "fa_icon": "fas fa-terminal",
+            "default": "",
+            "properties": {
                 "taxonomic_level": {
                     "type": "string",
                     "errorMessage": "Taxonomic_level must be provided and be one of 'S'(species), 'G'(genus), 'O'(order), 'F'(family), 'P'(phylum), or 'K'(kingdom)",
                     "enum": ["S", "G", "O", "F", "P", "K"],
                     "description": "Requested taxonomic level for BRACKEN",
                     "default": "S"
-                },
-                "email": {
-                    "type": "string",
-                    "description": "Email address for completion summary.",
-                    "fa_icon": "fas fa-envelope",
-                    "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.",
-                    "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
                 }
             }
         },
@@ -215,6 +233,12 @@
         {
             "$ref": "#/definitions/input_output_options"
         },
+        {
+            "$ref": "#/definitions/databases"
+        },
+        {
+            "$ref": "#/definitions/speciesabundance_pipeline_parameters"
+        },
         {
             "$ref": "#/definitions/institutional_config_options"
         },

diff --git a/nf-test.config b/nf-test.config
@@ -3,6 +3,10 @@ config {
     testsDir "tests"
     workDir ".nf-test"
     configFile "tests/nextflow.config"
-    profile ""
+    profile "docker"
+
+    stage {
+        symlink "nextflow_schema.json"
+    }
 
 }
diff --git a/tests/data/brackendatabase/database100mers.kmer_distrib b/tests/data/brackendatabase/database100mers.kmer_distrib
@@ -0,0 +1,4 @@
+mapped_taxid	genome_taxids:kmers_mapped:total_genome_kmers
+0	511145:2766:4641553 295405:10574:5277175
+511145	511145:4638787:4641553
+295405	295405:5266601:5277175
diff --git a/tests/data/brackendatabase/database100mers.kraken b/tests/data/brackendatabase/database100mers.kraken
diff --git a/tests/data/kraken2database/hash.k2d b/tests/data/kraken2database/hash.k2d
diff --git a/tests/data/kraken2database/opts.k2d b/tests/data/kraken2database/opts.k2d
diff --git a/tests/data/kraken2database/taxo.k2d b/tests/data/kraken2database/taxo.k2d
diff --git a/tests/main.nf.test b/tests/main.nf.test
@@ -0,0 +1,114 @@
+nextflow_workflow {
+
+    name "Full Integration Tests for PHACNML_SpeciesAbundance"
+    script "main.nf"
+    workflow "SpAnce"
+
+    test("Should run without failures: Selecting input/output and Kraken2/Bracken databases") {
+        tag "pipeline_success"
+
+        when {
+            params {
+                input = "https://raw.githubusercontent.com/phac-nml/speciesabundance/dev/assets/samplesheet.csv"
+                kraken2_db = "$projectDir/tests/data/kraken2database"
+                bracken_db = "$projectDir/tests/data/brackendatabase"
+                outdir = "results"
+            }
+            workflow {}
+        }
+
+        then {
+            assert workflow.success
+            assert path("$launchDir/results").exists()
+
+            // check FASTP_TRIM JSON outputs from paired and single reads
+            def fastp_JSON_paired = path("$launchDir/results/fastp/SAMPLE1.json").json
+
+            assert fastp_JSON_paired.summary.sequencing == "paired end (126 cycles + 126 cycles)"
+            assert fastp_JSON_paired.filtering_result.passed_filter_reads == 99594
+
+            def fastp_JSON_single = path("$launchDir/results/fastp/SAMPLE3.json").json
+
+            assert fastp_JSON_single.summary.sequencing == "single end (126 cycles)"
+            assert fastp_JSON_single.filtering_result.passed_filter_reads == 49986
+
+            // check ADJUST_BRACKEN outputs
+            def lines = []
+
+            lines = path("$launchDir/results/adjust/SAMPLE2_S_bracken_abundances.csv").readLines()
+
+            assert lines.contains("SAMPLE2,unclassified,0,U,24334,0,24334,97.38664")
+            assert lines.contains("SAMPLE2,Escherichia coli,562,S,631,0,631,2.52531")
+            assert lines.contains("SAMPLE2,Bacteroides fragilis,817,S,22,0,22,0.08805")
+
+            // check that KRONA html files exist:
+            assert path("$launchDir/results/krona/SAMPLE1.html").exists()
+            assert path("$launchDir/results/krona/SAMPLE2.html").exists()
+            assert path("$launchDir/results/krona/SAMPLE3.html").exists()
+
+            // check IRIDA Next JSON files
+            lines = path("$launchDir/results/iridanext.output.json.gz").linesGzip.join("/n")
+
+            assert lines.contains("\"path\": \"adjust/SAMPLE2_S_bracken_abundances.csv\"")
+            assert lines.contains("\"path\": \"krona/SAMPLE2.html\"")
+            assert lines.contains("\"path\": \"fastp/SAMPLE2.html\"")
+            assert lines.contains("\"path\": \"fastp/SAMPLE2_R1_trimmed.fastq.gz\"")
+            assert lines.contains("\"path\": \"fastp/SAMPLE2_R2_trimmed.fastq.gz\"")
+        }
+    }
+
+    test("Selecting no databases (failure)") {
+        tag "pipeline_failure"
+
+        when {
+            params {
+                input = "https://raw.githubusercontent.com/phac-nml/speciesabundance/dev/assets/samplesheet.csv"
+                outdir = "results"
+            }
+            workflow {}
+        }
+
+        then {
+            assert workflow.failed
+            assert workflow.stdout.contains("Unable to select a kraken2 database: '--kraken2_db' was not provided")
+        }
+    }
+
+    test("Selecting no BRACKEN database (failure)") {
+        tag "pipeline_failure"
+
+        when {
+            params {
+                input = "https://raw.githubusercontent.com/phac-nml/speciesabundance/dev/assets/samplesheet.csv"
+                kraken2_db = "${projectDir}/tests/data/kracken2database"
+                outdir = "results"
+            }
+            workflow {}
+        }
+
+        then {
+            assert workflow.failed
+            assert workflow.stdout.contains("Unable to select a bracken database: '--bracken_db' was not provided")
+        }
+    }
+
+    test("Missing the BRACKEN kmer distribution database files (failure)") {
+        tag "pipeline_failure"
+
+        when {
+            params {
+                input = "https://raw.githubusercontent.com/phac-nml/speciesabundance/dev/assets/samplesheet.csv"
+                kraken2_db = "${projectDir}/tests/data/kraken2database"
+                bracken_db = "${projectDir}/tests/data/kraken2database"
+                outdir = "results"
+            }
+            workflow {}
+        }
+
+        then {
+            assert workflow.failed
+            assert workflow.stdout.contains("Missing required BRACKEN database files: run bracken-build to generate the kmer distribution files")
+        }
+    }
+
+}