diff --git a/.gitignore b/.gitignore index 5124c9a..7227765 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ results/ testing/ testing* *.pyc +.nf-test +.nf-test.log diff --git a/CHANGELOG.md b/CHANGELOG.md index 58a53ef..6430add 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c ### `Added` +- [#13](https://github.com/nf-core/seqinspector/pull/13) Generate reports per run, per project and per lane. + ### `Fixed` ### `Dependencies` diff --git a/README.md b/README.md index bfb53fe..7efdd3e 100644 --- a/README.md +++ b/README.md @@ -39,26 +39,19 @@ > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - - Now, you can run the pipeline using: - - ```bash nextflow run nf-core/seqinspector \ -profile \ @@ -80,11 +73,11 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/seqinspector was originally written by Adrien Coulier. +nf-core/seqinspector was originally written by the Swedish [@NationalGenomicsInfrastructure](https://github.com/NationalGenomicsInfrastructure/). We thank the following people for their extensive assistance in the development of this pipeline: - +- [@mahesh-panchal](https://github.com/mahesh-panchal) ## Contributions and Support diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index fbe5de2..fef3b4e 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ -sample,lane,project,fastq_1,fastq_2,rundir +sample,lane,group,fastq_1,fastq_2,rundir SAMPLE_PAIRED_END,1,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir SAMPLE_SINGLE_END,2,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir diff --git a/assets/schema_input.json b/assets/schema_input.json index 9fb321b..c9800d5 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -19,11 +19,11 @@ "errorMessage": "Lane ID must be a number", "meta": ["lane"] }, - "project": { + "group": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Project ID cannot contain spaces", - "meta": ["project"] + "errorMessage": "Group ID cannot contain spaces", + "meta": ["group"] }, "fastq_1": { "type": "string", @@ -47,7 +47,7 @@ "meta": ["rundir"] } }, - "required": ["sample", "lane", "fastq_1"], + "required": ["sample", "fastq_1"], "dependentRequired": { "fastq_2": ["fastq_1"] } diff --git a/conf/modules.config b/conf/modules.config index d203d2b..f7e7845 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -22,7 +22,7 @@ process { ext.args = '--quiet' } - withName: 'MULTIQC' { + withName: 'MULTIQC_GLOBAL' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, @@ -31,4 +31,78 @@ process { ] } + withName: 'MULTIQC_PER_LANE' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc/lanes" }, + mode: params.publish_dir_mode, + saveAs: { + filename -> + switch (filename) { + case 'versions.yml': + null + break + case ~/\[LANE:\d+\]_multiqc_(report\.html|plots|data)/: + def lane = (filename =~ /\[LANE:(\d+)\]_multiqc_(report\.html|plots|data)/)[0][1] + def new_filename = filename.replaceFirst( + "(?.*)\\[LANE:${lane}\\]_(?multiqc_(report\\.html|plots|data).*)", + '${prefix}${suffix}') + "L${lane}/${new_filename}" + break + default: + filename + } + } + ] + } + + withName: 'MULTIQC_PER_GROUP' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc/groups" }, + mode: params.publish_dir_mode, + saveAs: { + filename -> + switch (filename) { + case 'versions.yml': + null + break + case ~/\[GROUP:.+\]_multiqc_(report\.html|plots|data)/: + def group = (filename =~ /\[GROUP:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] + def new_filename = filename.replaceFirst( + "(?.*)\\[GROUP:${group}\\]_(?multiqc_(report\\.html|plots|data).*)", + '${prefix}${suffix}') + "${group}/${new_filename}" + break + default: + filename + } + } + ] + } + + withName: 'MULTIQC_PER_RUNDIR' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc/rundirss" }, + mode: params.publish_dir_mode, + saveAs: { + filename -> + switch (filename) { + case 'versions.yml': + null + break + case ~/\[RUNDIR:.+\]_multiqc_(report\.html|plots|data)/: + def rundir = (filename =~ /\[RUNDIR:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] + def new_filename = filename.replaceFirst( + "(?.*)\\[RUNDIR:${rundir}\\]_(?multiqc_(report\\.html|plots|data).*)", + '${prefix}${suffix}') + "${rundir}/${new_filename}" + break + default: + filename + } + } + ] + } } diff --git a/docs/output.md b/docs/output.md index 77e4cc1..7af7806 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,8 +6,6 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: @@ -48,6 +46,29 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - `multiqc_plots/`: directory containing static images from the report in various formats. + - `lanes/` [1] + - `L1/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + - `L2/` + - ... + - `groups/` [1] + - `GROUPNAME1/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + - `GROUPNAME2/` + - ... + - `rundir/` [1] + - `RUNDIR1/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + - `RUNDIR2/` + - ... + +[1] These files will only be generated if `lane`, `group` or `rundir` were specified for some samples. diff --git a/docs/usage.md b/docs/usage.md index ec180e4..7a4ec73 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,47 +10,45 @@ ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. ```bash --input '[path to samplesheet file]' ``` -### Multiple runs of the same sample +### Full samplesheet -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: +The following simple run dir structure... -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz +``` +run_dir +├── sample1_lane1_group1_r1.fq.gz +├── sample2_lane1_group1_r1.fq.gz +├── sample3_lane2_group2_r1.fq.gz +└── sample4_lane2_group3_r1.fq.gz ``` -### Full samplesheet - -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. - -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +...would be represented in the following samplesheet (shown as .tsv for readability) ```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +sample lane group fastq_1 fastq_2 rundir +sample1 1 group1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir +sample2 1 group1 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir +sample3 2 group2 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir +sample4 2 group3 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir + ``` | Column | Description | | --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `lane` | Lane where the sample was processed on an Illumina instrument (optional). | +| `group` | Group the sample belongs too, useful when several groups are pooled together (optional). | | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz" (optional). | +| `rundir` | Path to the runfolder containing extra information about the sequencing run (optional) . | -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +Another [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. ## Running the pipeline diff --git a/main.nf b/main.nf index 1e9c2ed..fb00bd5 100644 --- a/main.nf +++ b/main.nf @@ -58,7 +58,10 @@ workflow NFCORE_SEQINSPECTOR { ) emit: - multiqc_report = SEQINSPECTOR.out.multiqc_report // channel: /path/to/multiqc_report.html + global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html + lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html + group_reports = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html + rundir_report = SEQINSPECTOR.out.rundir_reports // channel: /path/to/multiqc_report.html } /* @@ -101,7 +104,7 @@ workflow { params.outdir, params.monochrome_logs, params.hook_url, - NFCORE_SEQINSPECTOR.out.multiqc_report + NFCORE_SEQINSPECTOR.out.global_report, ) } diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..6969c08 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,8 @@ +config { + + testsDir "tests" + workDir ".nf-test" + configFile "tests/nextflow.config" + profile "test,docker" + +} diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index 947c6dd..ea9c9b8 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -84,7 +84,7 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json .map { meta, fastq_1, fastq_2 -> - def id_string = "${meta.sample}_${meta.project ?: "ungrouped"}_${meta.lane}" + def id_string = "${meta.sample}_${meta.group ?: "ungrouped"}_${meta.lane}" def updated_meta = meta + [ id: id_string ] if (!fastq_2) { return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] @@ -101,7 +101,6 @@ workflow PIPELINE_INITIALISATION { // meta, fastqs -> // return [ meta, fastqs.flatten() ] // } - .view() .set { ch_samplesheet } emit: diff --git a/tests/MiSeq.main.nf.test b/tests/MiSeq.main.nf.test new file mode 100644 index 0000000..bfabd95 --- /dev/null +++ b/tests/MiSeq.main.nf.test @@ -0,0 +1,40 @@ +nextflow_pipeline { + + name "Test Workflow main.nf on MiSeq data" + script "../main.nf" + tag "seqinspector" + tag "PIPELINE" + + test("MiSeq data test") { + + when { + config "./MiSeq.main.nf.test.config" + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + ).match() + } + ) + } + } +} diff --git a/tests/MiSeq.main.nf.test.config b/tests/MiSeq.main.nf.test.config new file mode 100644 index 0000000..073a977 --- /dev/null +++ b/tests/MiSeq.main.nf.test.config @@ -0,0 +1,7 @@ +// Load the basic test config +includeConfig 'nextflow.config' + +// Load the correct samplesheet for that test +params { + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/MiSeq/samplesheet.csv' +} diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap new file mode 100644 index 0000000..87c1a56 --- /dev/null +++ b/tests/MiSeq.main.nf.test.snap @@ -0,0 +1,19 @@ +{ + "MiSeq data test": { + "content": [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", + "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", + "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", + "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + ], + "timestamp": "2024-05-30T13:14:20.263485" + } +} \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test new file mode 100644 index 0000000..174e215 --- /dev/null +++ b/tests/NovaSeq6000.main.nf.test @@ -0,0 +1,55 @@ +nextflow_pipeline { + + name "Test Workflow main.nf on NovaSeq6000 data" + script "../main.nf" + tag "seqinspector" + tag "PIPELINE" + + test("NovaSeq6000 data test") { + + when { + config "./NovaSeq6000.main.nf.test.config" + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + ).match() + }, + ) + } + } +} diff --git a/tests/NovaSeq6000.main.nf.test.config b/tests/NovaSeq6000.main.nf.test.config new file mode 100644 index 0000000..cad5edd --- /dev/null +++ b/tests/NovaSeq6000.main.nf.test.config @@ -0,0 +1,7 @@ +// Load the basic test config +includeConfig 'nextflow.config' + +// Load the correct samplesheet for that test +params { + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/NovaSeq6000/samplesheet.csv' +} diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap new file mode 100644 index 0000000..2260025 --- /dev/null +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "NovaSeq6000 data test": { + "content": [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", + "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,9b4fd8a6d6e8a9acabecd592f633472e", + "multiqc_general_stats.txt:md5,8237b88ceb018d3cb1edcea62d10f4a2", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,9246a5b6b7b0410c79049fc3dbd08e92", + "multiqc_general_stats.txt:md5,44328403f423c6f5ac9ee0a8a01e6725", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,84820276fae52d4d492831280ae6207c", + "multiqc_general_stats.txt:md5,dd07799e5e4b9d389f9de49a852c3363", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,59ae05d89453da6f57010ffb6466f902", + "multiqc_general_stats.txt:md5,e4629691992bfe639c01a84b90563334", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", + "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + ], + "timestamp": "2024-05-30T13:13:49.062282" + } +} \ No newline at end of file diff --git a/tests/PromethION.main.nf.test b/tests/PromethION.main.nf.test new file mode 100644 index 0000000..3928478 --- /dev/null +++ b/tests/PromethION.main.nf.test @@ -0,0 +1,35 @@ +nextflow_pipeline { + + name "Test Workflow main.nf on PromethION data" + script "../main.nf" + tag "seqinspector" + tag "PIPELINE" + + test("PromethION data test") { + + when { + config "./PromethION.main.nf.test.config" + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + ).match() + }, + ) + } + } +} diff --git a/tests/PromethION.main.nf.test.config b/tests/PromethION.main.nf.test.config new file mode 100644 index 0000000..e1498a4 --- /dev/null +++ b/tests/PromethION.main.nf.test.config @@ -0,0 +1,7 @@ +// Load the basic test config +includeConfig 'nextflow.config' + +// Load the correct samplesheet for that test +params { + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/PromethION/samplesheet.csv' +} diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap new file mode 100644 index 0000000..0ac213c --- /dev/null +++ b/tests/PromethION.main.nf.test.snap @@ -0,0 +1,15 @@ +{ + "PromethION data test": { + "content": [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", + "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", + "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + ], + "timestamp": "2024-05-30T13:14:40.99246" + } +} \ No newline at end of file diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 0000000..8d9ef46 --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,21 @@ +/* +======================================================================================== + Nextflow config file for running tests +======================================================================================== +*/ + +params { + config_profile_name = 'nf-test profile' + config_profile_description = 'Configuration profile to use for nf-test.' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '3.GB' + max_time = '2.h' + + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + + validationSchemaIgnoreParams = 'genomes,igenomes_base,pipelines_testdata_base_path' + + +} diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index cf7a85c..50fa481 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -4,12 +4,17 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-validation' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqinspector_pipeline' +include { FASTQC } from '../modules/nf-core/fastqc/main' + +include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_LANE } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_GROUP } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_RUNDIR } from '../modules/nf-core/multiqc/main' + +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqinspector_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -24,8 +29,10 @@ workflow SEQINSPECTOR { main: - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + ch_multiqc_extra_files = Channel.empty() + ch_multiqc_reports = Channel.empty() // // MODULE: Run FastQC @@ -33,7 +40,7 @@ workflow SEQINSPECTOR { FASTQC ( ch_samplesheet ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) // @@ -50,45 +57,162 @@ workflow SEQINSPECTOR { // // MODULE: MultiQC // - ch_multiqc_config = Channel.fromPath( - "$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? + ch_multiqc_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : - Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() - summary_params = paramsSummaryMap( + summary_params = paramsSummaryMap( workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - + ch_workflow_summary = Channel.value( + paramsSummaryMultiqc(summary_params)) ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) ch_methods_description = Channel.value( methodsDescriptionText(ch_multiqc_custom_methods_description)) - ch_multiqc_files = ch_multiqc_files.mix( + ch_multiqc_extra_files = ch_multiqc_extra_files.mix( ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix( + ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_collated_versions) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix( ch_methods_description.collectFile( name: 'methods_description_mqc.yaml', sort: true ) ) - MULTIQC ( - ch_multiqc_files.collect(), + MULTIQC_GLOBAL ( + ch_multiqc_files + .map { meta, file -> file } + .mix(ch_multiqc_extra_files) + .collect(), + ch_multiqc_config.toList(), + Channel.empty().toList(), + ch_multiqc_logo.toList() + ) + + // Generate reports by lane + multiqc_extra_files_per_lane = ch_multiqc_files + .filter { meta, sample -> meta.lane } + .map { meta, sample -> [lane: meta.lane] } + .unique() + .combine(ch_multiqc_extra_files) + + lane_mqc_files = ch_multiqc_files + .filter { meta, sample -> meta.lane } + .mix(multiqc_extra_files_per_lane) + .map { meta, sample -> [ "[LANE:${meta.lane}]", meta, sample ] } + .groupTuple() + .tap { mqc_by_lane } + .collectFile{ + lane, meta, samples -> [ + "${lane}_multiqc_extra_config.yml", + """ + |output_fn_name: \"${lane}_multiqc_report.html\" + |data_dir_name: \"${lane}_multiqc_data\" + |plots_dir_name: \"${lane}_multiqc_plots\" + """.stripMargin() + ] + } + .map { file -> [ (file =~ /(\[LANE:.+\])/)[0][1], file ] } + .join(mqc_by_lane) + .multiMap { lane, config, meta , samples_per_lane -> + samples_per_lane: samples_per_lane + config: config + } + + MULTIQC_PER_LANE( + lane_mqc_files.samples_per_lane, + ch_multiqc_config.toList(), + lane_mqc_files.config, + ch_multiqc_logo.toList() + ) + + // Generate reports by group + multiqc_extra_files_per_group = ch_multiqc_files + .filter { meta, sample -> meta.group } + .map { meta, sample -> meta.group } + .unique() + .map { group -> [group:group] } + .combine(ch_multiqc_extra_files) + + group_mqc_files = ch_multiqc_files + .filter { meta, sample -> meta.group } + .mix(multiqc_extra_files_per_group) + .map { meta, sample -> [ "[GROUP:${meta.group}]", meta, sample ] } + .groupTuple() + .tap { mqc_by_group } + .collectFile{ + group, meta, samples -> [ + "${group}_multiqc_extra_config.yml", + """ + |output_fn_name: \"${group}_multiqc_report.html\" + |data_dir_name: \"${group}_multiqc_data\" + |plots_dir_name: \"${group}_multiqc_plots\" + """.stripMargin() + ] + } + .map { file -> [ (file =~ /(\[GROUP:.+\])/)[0][1], file ] } + .join(mqc_by_group) + .multiMap { group, config, meta , samples_per_group -> + samples_per_group: samples_per_group + config: config + } + + MULTIQC_PER_GROUP( + group_mqc_files.samples_per_group, + ch_multiqc_config.toList(), + group_mqc_files.config, + ch_multiqc_logo.toList() + ) + + // Generate reports by rundir + multiqc_extra_files_per_rundir = ch_multiqc_files + .filter { meta, sample -> meta.rundir } + .map { meta, sample -> meta.rundir } + .unique() + .map { rundir -> [rundir:rundir] } + .combine(ch_multiqc_extra_files) + + rundir_mqc_files = ch_multiqc_files + .filter { meta, sample -> meta.rundir } + .mix(multiqc_extra_files_per_rundir) + .map { meta, sample -> [ "[RUNDIR:${meta.rundir.name}]", meta, sample ] } + .groupTuple() + .tap { mqc_by_rundir } + .collectFile{ + rundir, meta, samples -> [ + "${rundir}_multiqc_extra_config.yml", + """ + |output_fn_name: \"${rundir}_multiqc_report.html\" + |data_dir_name: \"${rundir}_multiqc_data\" + |plots_dir_name: \"${rundir}_multiqc_plots\" + """.stripMargin() + ] + } + .map { file -> [ (file =~ /(\[RUNDIR:.+\])/)[0][1], file ] } + .join(mqc_by_rundir) + .multiMap { rundir, config, meta , samples_per_rundir -> + samples_per_rundir: samples_per_rundir + config: config + } + + MULTIQC_PER_RUNDIR( + rundir_mqc_files.samples_per_rundir, ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), + rundir_mqc_files.config, ch_multiqc_logo.toList() ) emit: - multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] + global_report = MULTIQC_GLOBAL.out.report.toList() // channel: /path/to/multiqc_report.html + lane_reports = MULTIQC_PER_LANE.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + group_reports = MULTIQC_PER_GROUP.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + rundir_reports = MULTIQC_PER_RUNDIR.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + versions = ch_versions // channel: [ path(versions.yml) ] } /*