From a31040e055e38f8a530a99fcc865b2bf9f2e14b9 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 18 Mar 2024 15:16:00 +0100 Subject: [PATCH 01/34] Generate reports per lane, group and rundir --- conf/modules.config | 20 ++++++- main.nf | 5 +- workflows/seqinspector.nf | 120 ++++++++++++++++++++++++++++++++++---- 3 files changed, 133 insertions(+), 12 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index e3ea8fa..296ca78 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -35,7 +35,25 @@ process { publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { + filename -> + switch (filename) { + case 'versions.yml': + null + break + case ~/L\d+_multiqc_(report\.html|plots|data)/: + "lanes/L${(filename =~ /L(\d+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + break + case ~/G-.+_multiqc_(report\.html|plots|data)/: + "groups/G-${(filename =~ /G-(.+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + break + case ~/D-.+_multiqc_(report\.html|plots|data)/: + "rundirs/D-${(filename =~ /D-(.+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + break + default: + filename + } + } ] } diff --git a/main.nf b/main.nf index 1e9c2ed..58afd1f 100644 --- a/main.nf +++ b/main.nf @@ -58,7 +58,10 @@ workflow NFCORE_SEQINSPECTOR { ) emit: - multiqc_report = SEQINSPECTOR.out.multiqc_report // channel: /path/to/multiqc_report.html + global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html + lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html + group_report = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html + rundir_report = SEQINSPECTOR.out.rundir_reports // channel: /path/to/multiqc_report.html } /* diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 9ae3384..b6fd3bc 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -5,7 +5,10 @@ */ include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_LANE } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_GROUP } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_RUNDIR } from '../modules/nf-core/multiqc/main' include { paramsSummaryMap } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' @@ -26,6 +29,8 @@ workflow SEQINSPECTOR { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() + ch_multiqc_extra_files = Channel.empty() + ch_multiqc_reports = Channel.empty() // // MODULE: Run FastQC @@ -33,7 +38,7 @@ workflow SEQINSPECTOR { FASTQC ( ch_samplesheet ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) // @@ -46,26 +51,121 @@ workflow SEQINSPECTOR { // // MODULE: MultiQC // - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_config = params.multiqc_config ? + Channel.fromPath(params.multiqc_config, checkIfExists: true) : + Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_collated_versions) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) MULTIQC ( - ch_multiqc_files.collect(), + ch_multiqc_files + .map { meta, file -> file } + .mix(ch_multiqc_extra_files) + .collect(), ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), + Channel.empty().toList(), + ch_multiqc_logo.toList() + ) + + multiqc_extra_files = ch_multiqc_extra_files.toList() + + // Generate reports by lane + lane_mqc_files = ch_multiqc_files + .map { meta, sample -> [ "L${meta.lane}", meta, sample ] } + .groupTuple() + .tap { mqc_by_lane } + .collectFile{ + lane, meta, samples -> [ + "${lane}_multiqc_extra_config.yml", + "output_fn_name: \"${lane}_multiqc_report.html\"\ndata_dir_name: \"${lane}_multiqc_data\"\nplots_dir_name: \"${lane}_multiqc_plots\"" + ] + } + .map { file -> def fileparts = file.name.split("_") + [ fileparts[0], file ] + } + .join(mqc_by_lane) + .multiMap { lane, config, meta , samples_per_lane -> + samples_per_lane: samples_per_lane + config: config + } + + MULTIQC_PER_LANE( + lane_mqc_files.samples_per_lane + .map { samples -> samples + multiqc_extra_files.value }, + ch_multiqc_config.toList(), + lane_mqc_files.config, + ch_multiqc_logo.toList() + ) + + // Generate reports by group + group_mqc_files = ch_multiqc_files + .filter { meta, sample -> meta.group } + .map { meta, sample -> [ "G-${meta.group}", meta, sample ] } + .groupTuple() + .tap { mqc_by_group } + .collectFile{ + group, meta, samples -> [ + "${group}_multiqc_extra_config.yml", + "output_fn_name: \"${group}_multiqc_report.html\"\ndata_dir_name: \"${group}_multiqc_data\"\nplots_dir_name: \"${group}_multiqc_plots\"" + ] + } + .map { file -> def fileparts = file.name.split("_") + [ fileparts[0], file ] + } + .join(mqc_by_group) + .multiMap { group, config, meta , samples_per_group -> + samples_per_group: samples_per_group + config: config + } + + MULTIQC_PER_GROUP( + group_mqc_files.samples_per_group + .map { samples -> samples + multiqc_extra_files.value }, + ch_multiqc_config.toList(), + group_mqc_files.config, + ch_multiqc_logo.toList() + ) + + // Generate reports by rundir + rundir_mqc_files = ch_multiqc_files + .filter { meta, sample -> meta.rundir } + .map { meta, sample -> [ "D-${meta.rundir.name}", meta, sample ] } + .groupTuple() + .tap { mqc_by_rundir } + .collectFile{ + rundir, meta, samples -> [ + "${rundir}_multiqc_extra_config.yml", + "output_fn_name: \"${rundir}_multiqc_report.html\"\ndata_dir_name: \"${rundir}_multiqc_data\"\nplots_dir_name: \"${rundir}_multiqc_plots\"" + ] + } + .map { file -> def fileparts = file.name.split("_") + [ fileparts[0], file ] + } + .join(mqc_by_rundir) + .multiMap { rundir, config, meta , samples_per_rundir -> + samples_per_rundir: samples_per_rundir + config: config + } + + MULTIQC_PER_RUNDIR( + rundir_mqc_files.samples_per_rundir + .map { samples -> samples + multiqc_extra_files.value }, + ch_multiqc_config.toList(), + rundir_mqc_files.config, ch_multiqc_logo.toList() ) emit: - multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + global_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + lane_reports = MULTIQC_PER_LANE.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + group_reports = MULTIQC_PER_GROUP.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + rundir_reports = MULTIQC_PER_RUNDIR.out.report.toList() // channel: [ /path/to/multiqc_report.html ] versions = ch_versions // channel: [ path(versions.yml) ] } From 0da58701e706f23001df33bf997bf671ea37a787 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Thu, 28 Mar 2024 16:21:45 +0100 Subject: [PATCH 02/34] Improve formatting --- conf/modules.config | 2 +- main.nf | 6 ++-- workflows/seqinspector.nf | 71 +++++++++++++++++++++++++-------------- 3 files changed, 50 insertions(+), 29 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 296ca78..8b7a9c6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -52,7 +52,7 @@ process { break default: filename - } + } } ] } diff --git a/main.nf b/main.nf index 58afd1f..1ee043e 100644 --- a/main.nf +++ b/main.nf @@ -58,9 +58,9 @@ workflow NFCORE_SEQINSPECTOR { ) emit: - global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html - lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html - group_report = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html + global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html + lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html + group_report = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html rundir_report = SEQINSPECTOR.out.rundir_reports // channel: /path/to/multiqc_report.html } diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index b6fd3bc..f0a269a 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -4,15 +4,17 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { MULTIQC as MULTIQC_PER_LANE } from '../modules/nf-core/multiqc/main' -include { MULTIQC as MULTIQC_PER_GROUP } from '../modules/nf-core/multiqc/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' + +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_LANE } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_GROUP } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_RUNDIR } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-validation' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqinspector_pipeline' + +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqinspector_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -27,10 +29,10 @@ workflow SEQINSPECTOR { main: - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() ch_multiqc_extra_files = Channel.empty() - ch_multiqc_reports = Channel.empty() + ch_multiqc_reports = Channel.empty() // // MODULE: Run FastQC @@ -45,23 +47,42 @@ workflow SEQINSPECTOR { // Collate and save software versions // softwareVersionsToYAML(ch_versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true) - .set { ch_collated_versions } + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_pipeline_software_mqc_versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } // // MODULE: MultiQC // - ch_multiqc_config = params.multiqc_config ? + ch_multiqc_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_collated_versions) - ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath(params.multiqc_logo, checkIfExists: true) : + Channel.empty() + + summary_params = paramsSummaryMap( + workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value( + paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? + file(params.multiqc_methods_description, checkIfExists: true) : + file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_extra_files = ch_multiqc_extra_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_collated_versions) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: false + ) + ) MULTIQC ( ch_multiqc_files @@ -163,10 +184,10 @@ workflow SEQINSPECTOR { emit: global_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - lane_reports = MULTIQC_PER_LANE.out.report.toList() // channel: [ /path/to/multiqc_report.html ] - group_reports = MULTIQC_PER_GROUP.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + lane_reports = MULTIQC_PER_LANE.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + group_reports = MULTIQC_PER_GROUP.out.report.toList() // channel: [ /path/to/multiqc_report.html ] rundir_reports = MULTIQC_PER_RUNDIR.out.report.toList() // channel: [ /path/to/multiqc_report.html ] - versions = ch_versions // channel: [ path(versions.yml) ] + versions = ch_versions // channel: [ path(versions.yml) ] } /* From d233d8f4052208b4e572ddb1e8d561387df1cce5 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Thu, 28 Mar 2024 16:56:51 +0100 Subject: [PATCH 03/34] Improve output sorting --- conf/modules.config | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 8b7a9c6..3b2fc02 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -42,13 +42,25 @@ process { null break case ~/L\d+_multiqc_(report\.html|plots|data)/: - "lanes/L${(filename =~ /L(\d+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + def lane = (filename =~ /L(\d+)_multiqc_(report\.html|plots|data)/)[0][1] + def new_filename = filename.replaceFirst( + "(?.*)L${lane}_(?multiqc_(report\\.html|plots|data).*)", + '${prefix}${suffix}') + "lanes/L${lane}/${new_filename}" break case ~/G-.+_multiqc_(report\.html|plots|data)/: - "groups/G-${(filename =~ /G-(.+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + def group = (filename =~ /G-(.+)_multiqc_(report\.html|plots|data)/)[0][1] + def new_filename = filename.replaceFirst( + "(?.*)G-${group}_(?multiqc_(report\\.html|plots|data).*)", + '${prefix}${suffix}') + "groups/${group}/${new_filename}" break case ~/D-.+_multiqc_(report\.html|plots|data)/: - "rundirs/D-${(filename =~ /D-(.+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + def rundir = (filename =~ /D-(.+)_multiqc_(report\.html|plots|data)/)[0][1] + def new_filename = filename.replaceFirst( + "(?.*)D-${rundir}_(?multiqc_(report\\.html|plots|data).*)", + '${prefix}${suffix}') + "rundirs/${rundir}/${new_filename}" break default: filename From 307e43c6a21c77ad30c8f02997c3220568d35fb2 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 8 Apr 2024 14:11:47 +0200 Subject: [PATCH 04/34] Use `group` instead of `project` --- assets/schema_input.json | 4 ++-- subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 9fb321b..1648944 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -19,11 +19,11 @@ "errorMessage": "Lane ID must be a number", "meta": ["lane"] }, - "project": { + "group": { "type": "string", "pattern": "^\\S+$", "errorMessage": "Project ID cannot contain spaces", - "meta": ["project"] + "meta": ["group"] }, "fastq_1": { "type": "string", diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index cdbb640..e1f8e4d 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -84,7 +84,7 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json .map { meta, fastq_1, fastq_2 -> - def id_string = "${meta.sample}_${meta.project ?: "ungrouped"}_${meta.lane}" + def id_string = "${meta.sample}_${meta.group ?: "ungrouped"}_${meta.lane}" def updated_meta = meta + [ id: id_string ] if (!fastq_2) { return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] From 627cf940a2a6b5f7cca7cbb692af1cf74293a289 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 8 Apr 2024 14:21:59 +0200 Subject: [PATCH 05/34] Fix output channel --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 1ee043e..9de4f94 100644 --- a/main.nf +++ b/main.nf @@ -104,7 +104,7 @@ workflow { params.outdir, params.monochrome_logs, params.hook_url, - NFCORE_SEQINSPECTOR.out.multiqc_report + NFCORE_SEQINSPECTOR.out.global_report, ) } From c9ba02866dc35a1b345476645ece7f9a6e48cc8b Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 8 Apr 2024 14:22:17 +0200 Subject: [PATCH 06/34] Fix linting --- conf/modules.config | 2 +- .../local/utils_nfcore_seqinspector_pipeline/main.nf | 1 - workflows/seqinspector.nf | 6 +++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 3b2fc02..44a3513 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -36,7 +36,7 @@ process { path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, saveAs: { - filename -> + filename -> switch (filename) { case 'versions.yml': null diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index e1f8e4d..9001400 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -101,7 +101,6 @@ workflow PIPELINE_INITIALISATION { // meta, fastqs -> // return [ meta, fastqs.flatten() ] // } - .view() .set { ch_samplesheet } emit: diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index f0a269a..815bc40 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -108,7 +108,7 @@ workflow SEQINSPECTOR { ] } .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] + [ fileparts[0], file ] } .join(mqc_by_lane) .multiMap { lane, config, meta , samples_per_lane -> @@ -137,7 +137,7 @@ workflow SEQINSPECTOR { ] } .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] + [ fileparts[0], file ] } .join(mqc_by_group) .multiMap { group, config, meta , samples_per_group -> @@ -166,7 +166,7 @@ workflow SEQINSPECTOR { ] } .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] + [ fileparts[0], file ] } .join(mqc_by_rundir) .multiMap { rundir, config, meta , samples_per_rundir -> From 2cfc91d449bac7c1a6d22d125fd57e3d8592a51b Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 8 Apr 2024 15:48:41 +0200 Subject: [PATCH 07/34] Give credits back to NGI --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5e2bc4c..abe0d4e 100644 --- a/README.md +++ b/README.md @@ -80,11 +80,11 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/seqinspector was originally written by Adrien Coulier. +nf-core/seqinspector was originally written by the Swedish [@NationalGenomicsInfrastructure](https://github.com/NationalGenomicsInfrastructure/). We thank the following people for their extensive assistance in the development of this pipeline: - +- [@mahesh-panchal](https://github.com/mahesh-panchal) ## Contributions and Support From fbfb02dbba5945fcfcf8d431cccd7a230eea5ff5 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 9 Apr 2024 09:26:32 +0200 Subject: [PATCH 08/34] Fix file names --- conf/modules.config | 18 +++++++++--------- workflows/seqinspector.nf | 24 ++++++++++++++++++------ 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 44a3513..b2e48f3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -41,24 +41,24 @@ process { case 'versions.yml': null break - case ~/L\d+_multiqc_(report\.html|plots|data)/: - def lane = (filename =~ /L(\d+)_multiqc_(report\.html|plots|data)/)[0][1] + case ~/\[LANE:\d+\]_multiqc_(report\.html|plots|data)/: + def lane = (filename =~ /\[LANE:(\d+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( - "(?.*)L${lane}_(?multiqc_(report\\.html|plots|data).*)", + "(?.*)\\[LANE:${lane}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') "lanes/L${lane}/${new_filename}" break - case ~/G-.+_multiqc_(report\.html|plots|data)/: - def group = (filename =~ /G-(.+)_multiqc_(report\.html|plots|data)/)[0][1] + case ~/\[GROUP:.+\]_multiqc_(report\.html|plots|data)/: + def group = (filename =~ /\[GROUP:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( - "(?.*)G-${group}_(?multiqc_(report\\.html|plots|data).*)", + "(?.*)\\[GROUP:${group}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') "groups/${group}/${new_filename}" break - case ~/D-.+_multiqc_(report\.html|plots|data)/: - def rundir = (filename =~ /D-(.+)_multiqc_(report\.html|plots|data)/)[0][1] + case ~/\[RUNDIR:.+\]_multiqc_(report\.html|plots|data)/: + def rundir = (filename =~ /\[RUNDIR:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( - "(?.*)D-${rundir}_(?multiqc_(report\\.html|plots|data).*)", + "(?.*)\\[RUNDIR:${rundir}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') "rundirs/${rundir}/${new_filename}" break diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 815bc40..2acafc3 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -98,13 +98,17 @@ workflow SEQINSPECTOR { // Generate reports by lane lane_mqc_files = ch_multiqc_files - .map { meta, sample -> [ "L${meta.lane}", meta, sample ] } + .map { meta, sample -> [ "[LANE:${meta.lane}]", meta, sample ] } .groupTuple() .tap { mqc_by_lane } .collectFile{ lane, meta, samples -> [ "${lane}_multiqc_extra_config.yml", - "output_fn_name: \"${lane}_multiqc_report.html\"\ndata_dir_name: \"${lane}_multiqc_data\"\nplots_dir_name: \"${lane}_multiqc_plots\"" + """ + |output_fn_name: \"${lane}_multiqc_report.html\" + |data_dir_name: \"${lane}_multiqc_data\" + |plots_dir_name: \"${lane}_multiqc_plots\" + """.stripMargin() ] } .map { file -> def fileparts = file.name.split("_") @@ -127,13 +131,17 @@ workflow SEQINSPECTOR { // Generate reports by group group_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.group } - .map { meta, sample -> [ "G-${meta.group}", meta, sample ] } + .map { meta, sample -> [ "[GROUP:${meta.group}]", meta, sample ] } .groupTuple() .tap { mqc_by_group } .collectFile{ group, meta, samples -> [ "${group}_multiqc_extra_config.yml", - "output_fn_name: \"${group}_multiqc_report.html\"\ndata_dir_name: \"${group}_multiqc_data\"\nplots_dir_name: \"${group}_multiqc_plots\"" + """ + |output_fn_name: \"${group}_multiqc_report.html\" + |data_dir_name: \"${group}_multiqc_data\" + |plots_dir_name: \"${group}_multiqc_plots\" + """.stripMargin() ] } .map { file -> def fileparts = file.name.split("_") @@ -156,13 +164,17 @@ workflow SEQINSPECTOR { // Generate reports by rundir rundir_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.rundir } - .map { meta, sample -> [ "D-${meta.rundir.name}", meta, sample ] } + .map { meta, sample -> [ "[RUNDIR:${meta.rundir.name}]", meta, sample ] } .groupTuple() .tap { mqc_by_rundir } .collectFile{ rundir, meta, samples -> [ "${rundir}_multiqc_extra_config.yml", - "output_fn_name: \"${rundir}_multiqc_report.html\"\ndata_dir_name: \"${rundir}_multiqc_data\"\nplots_dir_name: \"${rundir}_multiqc_plots\"" + """ + |output_fn_name: \"${rundir}_multiqc_report.html\" + |data_dir_name: \"${rundir}_multiqc_data\" + |plots_dir_name: \"${rundir}_multiqc_plots\" + """.stripMargin() ] } .map { file -> def fileparts = file.name.split("_") From 8a19929d03832ecaf95a50962898882de04e6884 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Fri, 3 May 2024 09:50:42 +0200 Subject: [PATCH 09/34] Set up tests --- .gitignore | 1 + nf-test.config | 8 ++++++++ tests/main.nf.test | 20 +++++++++++++++++++ tests/nextflow.config | 5 +++++ tests/workflows/seqinspector.nf.test | 30 ++++++++++++++++++++++++++++ 5 files changed, 64 insertions(+) create mode 100644 nf-test.config create mode 100644 tests/main.nf.test create mode 100644 tests/nextflow.config create mode 100644 tests/workflows/seqinspector.nf.test diff --git a/.gitignore b/.gitignore index 5124c9a..089a407 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ results/ testing/ testing* *.pyc +.nf-test diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..6969c08 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,8 @@ +config { + + testsDir "tests" + workDir ".nf-test" + configFile "tests/nextflow.config" + profile "test,docker" + +} diff --git a/tests/main.nf.test b/tests/main.nf.test new file mode 100644 index 0000000..72498b5 --- /dev/null +++ b/tests/main.nf.test @@ -0,0 +1,20 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + + test("Should run without failures") { + + when { + params { + outdir = "tests/results" + } + } + + then { + assert workflow.success + } + + } + +} diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 0000000..c19b1ad --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,5 @@ +/* +======================================================================================== + Nextflow config file for running tests +======================================================================================== +*/ diff --git a/tests/workflows/seqinspector.nf.test b/tests/workflows/seqinspector.nf.test new file mode 100644 index 0000000..bbb529d --- /dev/null +++ b/tests/workflows/seqinspector.nf.test @@ -0,0 +1,30 @@ +nextflow_workflow { + + name "Test Workflow SEQINSPECTOR" + script "workflows/seqinspector.nf" + workflow "SEQINSPECTOR" + + test("Should run without failures pipeline") { + + when { + params { + // define parameters here. Example: + // outdir = "tests/results" + } + workflow { + """ + // define inputs of the workflow here. Example: + // input[0] = file("https://raw.githubusercontent.com/nf-core/test-datasets/e47966b63444ec0fcdef23bfc410eeca22535ac7/testdata/MiSeq/samplesheet.csv") + input[0] = file("assets/samplesheet.csv") + """ + } + } + + then { + assert workflow.success + assert snapshot(workflow.out).match() + } + + } + +} From eba628ef9389a13214e6c9663965d6ea56d7e994 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 12:18:42 +0200 Subject: [PATCH 10/34] point test conf upstream --- conf/test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index 38e9ee3..2c26a9c 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,7 +22,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/KarNair/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv' // Genome references genome = 'R64-1-1' From 23f69d1302457fc32295c3c77bd41d443bb96d9e Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 13:05:47 +0200 Subject: [PATCH 11/34] project -> group --- assets/samplesheet.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index fbe5de2..fef3b4e 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ -sample,lane,project,fastq_1,fastq_2,rundir +sample,lane,group,fastq_1,fastq_2,rundir SAMPLE_PAIRED_END,1,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir SAMPLE_SINGLE_END,2,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir From 1ebf3f1adc86f3c50020a56f95430b06f3a5000a Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 13:06:48 +0200 Subject: [PATCH 12/34] project -> group --- assets/schema_input.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 1648944..7115bfa 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -22,7 +22,7 @@ "group": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Project ID cannot contain spaces", + "errorMessage": "Group ID cannot contain spaces", "meta": ["group"] }, "fastq_1": { From b0bf471667c0547654afd14d668e8f70aab55185 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 13:53:31 +0200 Subject: [PATCH 13/34] make lane non-compulsory --- assets/schema_input.json | 2 +- workflows/seqinspector.nf | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 7115bfa..c9800d5 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -47,7 +47,7 @@ "meta": ["rundir"] } }, - "required": ["sample", "lane", "fastq_1"], + "required": ["sample", "fastq_1"], "dependentRequired": { "fastq_2": ["fastq_1"] } diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 2acafc3..018e079 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -98,6 +98,7 @@ workflow SEQINSPECTOR { // Generate reports by lane lane_mqc_files = ch_multiqc_files + .filter { meta, sample -> meta.lane } .map { meta, sample -> [ "[LANE:${meta.lane}]", meta, sample ] } .groupTuple() .tap { mqc_by_lane } From 9e0eca39baf5b58760ec4117b4faefde4a512f09 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 15:59:10 +0200 Subject: [PATCH 14/34] remove unused file --- assets/samplesheet.csv | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 assets/samplesheet.csv diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv deleted file mode 100644 index fef3b4e..0000000 --- a/assets/samplesheet.csv +++ /dev/null @@ -1,3 +0,0 @@ -sample,lane,group,fastq_1,fastq_2,rundir -SAMPLE_PAIRED_END,1,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir -SAMPLE_SINGLE_END,2,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir From 98e60bf160cb6a4476d55b38a44095e67f79eeea Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 16:38:52 +0200 Subject: [PATCH 15/34] revamp nf-test, run once for each sequencing platform --- tests/MiSeq.main.nf.test | 23 +++++++++++++++++++++ tests/NovaSeq6000.main.nf.test | 23 +++++++++++++++++++++ tests/PromethION.main.nf.test | 23 +++++++++++++++++++++ tests/main.nf.test | 20 ------------------- tests/nextflow.config | 11 ++++++++++ tests/workflows/seqinspector.nf.test | 30 ---------------------------- 6 files changed, 80 insertions(+), 50 deletions(-) create mode 100644 tests/MiSeq.main.nf.test create mode 100644 tests/NovaSeq6000.main.nf.test create mode 100644 tests/PromethION.main.nf.test delete mode 100644 tests/main.nf.test delete mode 100644 tests/workflows/seqinspector.nf.test diff --git a/tests/MiSeq.main.nf.test b/tests/MiSeq.main.nf.test new file mode 100644 index 0000000..0246a75 --- /dev/null +++ b/tests/MiSeq.main.nf.test @@ -0,0 +1,23 @@ +nextflow_pipeline { + + name "Test Workflow main.nf on MiSeq data" + script "../main.nf" + tag "seqinspector" + tag "PIPELINE" + + test("MiSeq data test") { + + when { + params { + outdir = "tests/results/MiSeq" + input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv" + } + } + + then { + assert workflow.success + } + + } + +} diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test new file mode 100644 index 0000000..7c410e5 --- /dev/null +++ b/tests/NovaSeq6000.main.nf.test @@ -0,0 +1,23 @@ +nextflow_pipeline { + + name "Test Workflow main.nf on NovaSeq6000 data" + script "../main.nf" + tag "seqinspector" + tag "PIPELINE" + + test("NovaSeq6000 data test") { + + when { + params { + outdir = "tests/results/NovaSeq6000" + input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/NovaSeq6000/samplesheet.csv" + } + } + + then { + assert workflow.success + } + + } + +} diff --git a/tests/PromethION.main.nf.test b/tests/PromethION.main.nf.test new file mode 100644 index 0000000..9987c95 --- /dev/null +++ b/tests/PromethION.main.nf.test @@ -0,0 +1,23 @@ +nextflow_pipeline { + + name "Test Workflow main.nf on PromethION data" + script "../main.nf" + tag "seqinspector" + tag "PIPELINE" + + test("PromethION data test") { + + when { + params { + outdir = "tests/results/PromethION" + input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/PromethION/samplesheet.csv" + } + } + + then { + assert workflow.success + } + + } + +} diff --git a/tests/main.nf.test b/tests/main.nf.test deleted file mode 100644 index 72498b5..0000000 --- a/tests/main.nf.test +++ /dev/null @@ -1,20 +0,0 @@ -nextflow_pipeline { - - name "Test Workflow main.nf" - script "main.nf" - - test("Should run without failures") { - - when { - params { - outdir = "tests/results" - } - } - - then { - assert workflow.success - } - - } - -} diff --git a/tests/nextflow.config b/tests/nextflow.config index c19b1ad..422545b 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -3,3 +3,14 @@ Nextflow config file for running tests ======================================================================================== */ + +params { + config_profile_name = 'nf-test profile' + config_profile_description = 'Configuration profile to use for nf-test.' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '3.GB' + max_time = '2.h' + +} diff --git a/tests/workflows/seqinspector.nf.test b/tests/workflows/seqinspector.nf.test deleted file mode 100644 index bbb529d..0000000 --- a/tests/workflows/seqinspector.nf.test +++ /dev/null @@ -1,30 +0,0 @@ -nextflow_workflow { - - name "Test Workflow SEQINSPECTOR" - script "workflows/seqinspector.nf" - workflow "SEQINSPECTOR" - - test("Should run without failures pipeline") { - - when { - params { - // define parameters here. Example: - // outdir = "tests/results" - } - workflow { - """ - // define inputs of the workflow here. Example: - // input[0] = file("https://raw.githubusercontent.com/nf-core/test-datasets/e47966b63444ec0fcdef23bfc410eeca22535ac7/testdata/MiSeq/samplesheet.csv") - input[0] = file("assets/samplesheet.csv") - """ - } - } - - then { - assert workflow.success - assert snapshot(workflow.out).match() - } - - } - -} From d95c660d8fbdba413920d45a7f7025e9261989c0 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 6 May 2024 15:54:05 +0200 Subject: [PATCH 16/34] Update modules and subworkflows --- modules.json | 4 ++-- modules/nf-core/fastqc/main.nf | 6 ++++++ subworkflows/nf-core/utils_nfcore_pipeline/main.nf | 8 +++++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/modules.json b/modules.json index ebbc5dc..87fe816 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "fastqc": { "branch": "master", - "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", + "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", "installed_by": ["modules"] }, "multiqc": { @@ -26,7 +26,7 @@ }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 9e19a74..d79f1c8 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -25,6 +25,11 @@ process FASTQC { def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } def rename_to = old_new_pairs*.join(' ').join(' ') def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + """ printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name @@ -33,6 +38,7 @@ process FASTQC { fastqc \\ $args \\ --threads $task.cpus \\ + --memory $fastqc_memory \\ $renamed_files cat <<-END_VERSIONS > versions.yml diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index a8b55d6..14558c3 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -65,9 +65,15 @@ def checkProfileProvided(nextflow_cli_args) { // Citation string for pipeline // def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + - " ${workflow.manifest.doi}\n\n" + + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + From e0527ccb6c235fcb01f5fa26e1b192e705b3f873 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 7 May 2024 16:45:09 +0200 Subject: [PATCH 17/34] Fix multiqc extra files Issue with the previous implementation was that sometimes MULTIQC_PER_LANE would execute before the extra files were collected into `ch_multiqc_extra_files`, causing `null` to be added to the list of files passed to multiqc. --- workflows/seqinspector.nf | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 018e079..65343a8 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -94,11 +94,17 @@ workflow SEQINSPECTOR { ch_multiqc_logo.toList() ) - multiqc_extra_files = ch_multiqc_extra_files.toList() - // Generate reports by lane + multiqc_extra_files_per_lane = ch_multiqc_files + .filter { meta, sample -> meta.lane } + .map { meta, sample -> meta.lane } + .unique() + .map { lane -> [lane:lane] } + .cross(ch_multiqc_extra_files) + lane_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.lane } + .mix(multiqc_extra_files_per_lane) .map { meta, sample -> [ "[LANE:${meta.lane}]", meta, sample ] } .groupTuple() .tap { mqc_by_lane } @@ -122,16 +128,23 @@ workflow SEQINSPECTOR { } MULTIQC_PER_LANE( - lane_mqc_files.samples_per_lane - .map { samples -> samples + multiqc_extra_files.value }, + lane_mqc_files.samples_per_lane, ch_multiqc_config.toList(), lane_mqc_files.config, ch_multiqc_logo.toList() ) // Generate reports by group + multiqc_extra_files_per_group = ch_multiqc_files + .filter { meta, sample -> meta.group } + .map { meta, sample -> meta.group } + .unique() + .map { group -> [group:group] } + .cross(ch_multiqc_extra_files) + group_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.group } + .mix(multiqc_extra_files_per_group) .map { meta, sample -> [ "[GROUP:${meta.group}]", meta, sample ] } .groupTuple() .tap { mqc_by_group } @@ -155,16 +168,23 @@ workflow SEQINSPECTOR { } MULTIQC_PER_GROUP( - group_mqc_files.samples_per_group - .map { samples -> samples + multiqc_extra_files.value }, + group_mqc_files.samples_per_group, ch_multiqc_config.toList(), group_mqc_files.config, ch_multiqc_logo.toList() ) // Generate reports by rundir + multiqc_extra_files_per_rundir = ch_multiqc_files + .filter { meta, sample -> meta.rundir } + .map { meta, sample -> meta.rundir } + .unique() + .map { rundir -> [rundir:rundir] } + .cross(ch_multiqc_extra_files) + rundir_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.rundir } + .mix(multiqc_extra_files_per_rundir) .map { meta, sample -> [ "[RUNDIR:${meta.rundir.name}]", meta, sample ] } .groupTuple() .tap { mqc_by_rundir } @@ -188,8 +208,7 @@ workflow SEQINSPECTOR { } MULTIQC_PER_RUNDIR( - rundir_mqc_files.samples_per_rundir - .map { samples -> samples + multiqc_extra_files.value }, + rundir_mqc_files.samples_per_rundir, ch_multiqc_config.toList(), rundir_mqc_files.config, ch_multiqc_logo.toList() From 42159a132e7f73f2594fcb0e66ca26702311cefb Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 10:29:17 +0200 Subject: [PATCH 18/34] Add test snapshots --- tests/MiSeq.main.nf.test | 25 +++++++++++++++--- tests/MiSeq.main.nf.test.snap | 19 ++++++++++++++ tests/NovaSeq6000.main.nf.test | 40 ++++++++++++++++++++++++++--- tests/NovaSeq6000.main.nf.test.snap | 31 ++++++++++++++++++++++ tests/PromethION.main.nf.test | 20 ++++++++++++--- tests/PromethION.main.nf.test.snap | 15 +++++++++++ workflows/seqinspector.nf | 18 +++++-------- 7 files changed, 144 insertions(+), 24 deletions(-) create mode 100644 tests/MiSeq.main.nf.test.snap create mode 100644 tests/NovaSeq6000.main.nf.test.snap create mode 100644 tests/PromethION.main.nf.test.snap diff --git a/tests/MiSeq.main.nf.test b/tests/MiSeq.main.nf.test index 0246a75..1e72de9 100644 --- a/tests/MiSeq.main.nf.test +++ b/tests/MiSeq.main.nf.test @@ -9,15 +9,32 @@ nextflow_pipeline { when { params { - outdir = "tests/results/MiSeq" + outdir = "$outputDir" input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv" } } then { - assert workflow.success - } + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_software_versions.txt"), - } + path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + ).match() + } + ) + } + } } diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap new file mode 100644 index 0000000..e222b51 --- /dev/null +++ b/tests/MiSeq.main.nf.test.snap @@ -0,0 +1,19 @@ +{ + "MiSeq data test": { + "content": [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", + "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", + "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", + "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + ], + "timestamp": "2024-05-08T16:29:33.284003" + } +} \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test index 7c410e5..f5de9aa 100644 --- a/tests/NovaSeq6000.main.nf.test +++ b/tests/NovaSeq6000.main.nf.test @@ -9,15 +9,47 @@ nextflow_pipeline { when { params { - outdir = "tests/results/NovaSeq6000" + outdir = "$outputDir" input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/NovaSeq6000/samplesheet.csv" } } then { - assert workflow.success - } + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_software_versions.txt"), - } + path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + ).match() + }, + ) + } + } } diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap new file mode 100644 index 0000000..a406b89 --- /dev/null +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "NovaSeq6000 data test": { + "content": [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", + "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,9b4fd8a6d6e8a9acabecd592f633472e", + "multiqc_general_stats.txt:md5,8237b88ceb018d3cb1edcea62d10f4a2", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,9246a5b6b7b0410c79049fc3dbd08e92", + "multiqc_general_stats.txt:md5,44328403f423c6f5ac9ee0a8a01e6725", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,84820276fae52d4d492831280ae6207c", + "multiqc_general_stats.txt:md5,dd07799e5e4b9d389f9de49a852c3363", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,59ae05d89453da6f57010ffb6466f902", + "multiqc_general_stats.txt:md5,e4629691992bfe639c01a84b90563334", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", + "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + ], + "timestamp": "2024-05-14T10:17:35.440827" + } +} \ No newline at end of file diff --git a/tests/PromethION.main.nf.test b/tests/PromethION.main.nf.test index 9987c95..de2beb3 100644 --- a/tests/PromethION.main.nf.test +++ b/tests/PromethION.main.nf.test @@ -9,15 +9,27 @@ nextflow_pipeline { when { params { - outdir = "tests/results/PromethION" + outdir = "$outputDir" input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/PromethION/samplesheet.csv" } } then { - assert workflow.success - } + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + ).match() + }, + ) + } } - } diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap new file mode 100644 index 0000000..24e52bd --- /dev/null +++ b/tests/PromethION.main.nf.test.snap @@ -0,0 +1,15 @@ +{ + "PromethION data test": { + "content": [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", + "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", + "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + ], + "timestamp": "2024-05-08T17:17:23.151259" + } +} \ No newline at end of file diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 65343a8..3e2a08d 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -100,7 +100,7 @@ workflow SEQINSPECTOR { .map { meta, sample -> meta.lane } .unique() .map { lane -> [lane:lane] } - .cross(ch_multiqc_extra_files) + .combine(ch_multiqc_extra_files) lane_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.lane } @@ -118,9 +118,7 @@ workflow SEQINSPECTOR { """.stripMargin() ] } - .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] - } + .map { file -> [ (file =~ /(\[LANE:.+\])/)[0][1], file ] } .join(mqc_by_lane) .multiMap { lane, config, meta , samples_per_lane -> samples_per_lane: samples_per_lane @@ -140,7 +138,7 @@ workflow SEQINSPECTOR { .map { meta, sample -> meta.group } .unique() .map { group -> [group:group] } - .cross(ch_multiqc_extra_files) + .combine(ch_multiqc_extra_files) group_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.group } @@ -158,9 +156,7 @@ workflow SEQINSPECTOR { """.stripMargin() ] } - .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] - } + .map { file -> [ (file =~ /(\[GROUP:.+\])/)[0][1], file ] } .join(mqc_by_group) .multiMap { group, config, meta , samples_per_group -> samples_per_group: samples_per_group @@ -180,7 +176,7 @@ workflow SEQINSPECTOR { .map { meta, sample -> meta.rundir } .unique() .map { rundir -> [rundir:rundir] } - .cross(ch_multiqc_extra_files) + .combine(ch_multiqc_extra_files) rundir_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.rundir } @@ -198,9 +194,7 @@ workflow SEQINSPECTOR { """.stripMargin() ] } - .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] - } + .map { file -> [ (file =~ /(\[RUNDIR:.+\])/)[0][1], file ] } .join(mqc_by_rundir) .multiMap { rundir, config, meta , samples_per_rundir -> samples_per_rundir: samples_per_rundir From ef61f9f7e32f63235f0bf574a2fd89c1c119ffcf Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 11:08:13 +0200 Subject: [PATCH 19/34] Remove unused module configuration --- conf/base.config | 3 --- conf/modules.config | 8 -------- 2 files changed, 11 deletions(-) diff --git a/conf/base.config b/conf/base.config index 8d7dffc..aab50f9 100644 --- a/conf/base.config +++ b/conf/base.config @@ -59,7 +59,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } } diff --git a/conf/modules.config b/conf/modules.config index b2e48f3..da3d2ca 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -22,14 +22,6 @@ process { ext.args = '--quiet' } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] - } - withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ From 51b01e98ca9560154634b08d541475abeffbc35c Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 11:16:41 +0200 Subject: [PATCH 20/34] Update usage docs and restore example samplesheet --- assets/samplesheet.csv | 3 +++ docs/usage.md | 36 ++++++++++++------------------------ 2 files changed, 15 insertions(+), 24 deletions(-) create mode 100644 assets/samplesheet.csv diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv new file mode 100644 index 0000000..fef3b4e --- /dev/null +++ b/assets/samplesheet.csv @@ -0,0 +1,3 @@ +sample,lane,group,fastq_1,fastq_2,rundir +SAMPLE_PAIRED_END,1,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir +SAMPLE_SINGLE_END,2,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir diff --git a/docs/usage.md b/docs/usage.md index f926de7..df2be20 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -16,39 +16,27 @@ You will need to create a samplesheet with information about the samples you wou --input '[path to samplesheet file]' ``` -### Multiple runs of the same sample - -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: - -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz -``` - ### Full samplesheet -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. - -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. - ```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +sample,lane,group,fastq_1,fastq_2,rundir +CONTROL_REP1,1,,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX +CONTROL_REP2,1,,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX +CONTROL_REP3,1,,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX +TREATMENT_REP1,2,GROUP1,AEG588A4_S4_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX +TREATMENT_REP2,2,GROUP1,AEG588A5_S5_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX +TREATMENT_REP3,2,GROUP2,AEG588A6_S6_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX +TREATMENT_REP3,2,GROUP2,AEG588A6_S6_L004_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX ``` | Column | Description | | --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `lane` | Lane where the sample was processed on an Illumina instrument (optional). | +| `group` | Group the sample belongs too, useful when several groups are pooled together (optional). | +| `rundir` | Path to the runfolder containing extra information about the sequencing run (optional) . | | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz" (optional). | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. From 7c7f31f715a40c1f5b061099c9cd945214875ab0 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 12:09:51 +0200 Subject: [PATCH 21/34] Update output docs --- docs/output.md | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index 77e4cc1..f83e1fa 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,8 +6,6 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: @@ -48,6 +46,31 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - `multiqc_plots/`: directory containing static images from the report in various formats. + - `lanes/` [1] + - `L1/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + - `L2/` + - ... + - `groups/` [1] + - `GROUPNAME1/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + - `GROUPNAME2/` + - ... + - `rundir/` [1] + - `RUNDIR1/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + - `RUNDIR2/` + - ... + + +[1] These files will only be generated if `lane`, `group` or `rundir` were specified for some samples. + From 1c4f6e07019fad57bce6d5d3473155b2f3b364b3 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 12:16:38 +0200 Subject: [PATCH 22/34] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 58a53ef..6430add 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c ### `Added` +- [#13](https://github.com/nf-core/seqinspector/pull/13) Generate reports per run, per project and per lane. + ### `Fixed` ### `Dependencies` From 211bfafb21f6f8c6b208d33875ddc9e45552973a Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 12:20:34 +0200 Subject: [PATCH 23/34] Update samplesheet in readme file --- README.md | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index abe0d4e..edc54ae 100644 --- a/README.md +++ b/README.md @@ -39,26 +39,19 @@ > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - - Now, you can run the pipeline using: - - ```bash nextflow run nf-core/seqinspector \ -profile \ From df4e9cbcbd1f62990f44dd90d9eb0a2990d0c657 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 12:43:27 +0200 Subject: [PATCH 24/34] Run prettier --- docs/output.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index f83e1fa..7af7806 100644 --- a/docs/output.md +++ b/docs/output.md @@ -68,10 +68,8 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m - `RUNDIR2/` - ... - [1] These files will only be generated if `lane`, `group` or `rundir` were specified for some samples. - [MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. From 2303db9a4c28fe0b5a907ab73bc3946f3e3daa3e Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar <89784800+kedhammar@users.noreply.github.com> Date: Thu, 16 May 2024 16:00:32 +0200 Subject: [PATCH 25/34] Use testdata base path param in tests/MiSeq.main.nf.test Co-authored-by: Matthias Zepper <6963520+MatthiasZepper@users.noreply.github.com> --- tests/MiSeq.main.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/MiSeq.main.nf.test b/tests/MiSeq.main.nf.test index 1e72de9..a3dc0a5 100644 --- a/tests/MiSeq.main.nf.test +++ b/tests/MiSeq.main.nf.test @@ -10,7 +10,7 @@ nextflow_pipeline { when { params { outdir = "$outputDir" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv" + input = params.pipelines_testdata_base_path + "seqinspector/testdata/MiSeq/samplesheet.csv" } } From 1ea8ac0dac6c0cb11e098beb3c516ba0bb6cdc49 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar <89784800+kedhammar@users.noreply.github.com> Date: Thu, 16 May 2024 16:01:00 +0200 Subject: [PATCH 26/34] Use testdata base path param in tests/NovaSeq6000.main.nf.test Co-authored-by: Matthias Zepper <6963520+MatthiasZepper@users.noreply.github.com> --- tests/NovaSeq6000.main.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test index f5de9aa..bc687ab 100644 --- a/tests/NovaSeq6000.main.nf.test +++ b/tests/NovaSeq6000.main.nf.test @@ -10,7 +10,7 @@ nextflow_pipeline { when { params { outdir = "$outputDir" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/NovaSeq6000/samplesheet.csv" + input = params.pipelines_testdata_base_path + "seqinspector/testdata/NovaSeq6000/samplesheet.csv" } } From 048765fd18409769442915dcd403da682dfd852f Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar <89784800+kedhammar@users.noreply.github.com> Date: Thu, 16 May 2024 16:01:20 +0200 Subject: [PATCH 27/34] Use testdata base path param in tests/PromethION.main.nf.test Co-authored-by: Matthias Zepper <6963520+MatthiasZepper@users.noreply.github.com> --- tests/PromethION.main.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/PromethION.main.nf.test b/tests/PromethION.main.nf.test index de2beb3..d1969f3 100644 --- a/tests/PromethION.main.nf.test +++ b/tests/PromethION.main.nf.test @@ -10,7 +10,7 @@ nextflow_pipeline { when { params { outdir = "$outputDir" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/PromethION/samplesheet.csv" + input = params.pipelines_testdata_base_path + "seqinspector/testdata/PromethION/samplesheet.csv" } } From 4329bb97c229bcb4d869e3c2d88af08cab730b37 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 17 May 2024 11:38:29 +0200 Subject: [PATCH 28/34] Make the tests work with 'pipelines_testdata_base_path' parameter. --- tests/MiSeq.main.nf.test | 2 +- tests/MiSeq.main.nf.test.config | 7 +++++++ tests/NovaSeq6000.main.nf.test | 2 +- tests/NovaSeq6000.main.nf.test.config | 7 +++++++ tests/PromethION.main.nf.test | 2 +- tests/PromethION.main.nf.test.config | 7 +++++++ tests/nextflow.config | 5 +++++ 7 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 tests/MiSeq.main.nf.test.config create mode 100644 tests/NovaSeq6000.main.nf.test.config create mode 100644 tests/PromethION.main.nf.test.config diff --git a/tests/MiSeq.main.nf.test b/tests/MiSeq.main.nf.test index a3dc0a5..bfabd95 100644 --- a/tests/MiSeq.main.nf.test +++ b/tests/MiSeq.main.nf.test @@ -8,9 +8,9 @@ nextflow_pipeline { test("MiSeq data test") { when { + config "./MiSeq.main.nf.test.config" params { outdir = "$outputDir" - input = params.pipelines_testdata_base_path + "seqinspector/testdata/MiSeq/samplesheet.csv" } } diff --git a/tests/MiSeq.main.nf.test.config b/tests/MiSeq.main.nf.test.config new file mode 100644 index 0000000..073a977 --- /dev/null +++ b/tests/MiSeq.main.nf.test.config @@ -0,0 +1,7 @@ +// Load the basic test config +includeConfig 'nextflow.config' + +// Load the correct samplesheet for that test +params { + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/MiSeq/samplesheet.csv' +} diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test index bc687ab..174e215 100644 --- a/tests/NovaSeq6000.main.nf.test +++ b/tests/NovaSeq6000.main.nf.test @@ -8,9 +8,9 @@ nextflow_pipeline { test("NovaSeq6000 data test") { when { + config "./NovaSeq6000.main.nf.test.config" params { outdir = "$outputDir" - input = params.pipelines_testdata_base_path + "seqinspector/testdata/NovaSeq6000/samplesheet.csv" } } diff --git a/tests/NovaSeq6000.main.nf.test.config b/tests/NovaSeq6000.main.nf.test.config new file mode 100644 index 0000000..cad5edd --- /dev/null +++ b/tests/NovaSeq6000.main.nf.test.config @@ -0,0 +1,7 @@ +// Load the basic test config +includeConfig 'nextflow.config' + +// Load the correct samplesheet for that test +params { + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/NovaSeq6000/samplesheet.csv' +} diff --git a/tests/PromethION.main.nf.test b/tests/PromethION.main.nf.test index d1969f3..3928478 100644 --- a/tests/PromethION.main.nf.test +++ b/tests/PromethION.main.nf.test @@ -8,9 +8,9 @@ nextflow_pipeline { test("PromethION data test") { when { + config "./PromethION.main.nf.test.config" params { outdir = "$outputDir" - input = params.pipelines_testdata_base_path + "seqinspector/testdata/PromethION/samplesheet.csv" } } diff --git a/tests/PromethION.main.nf.test.config b/tests/PromethION.main.nf.test.config new file mode 100644 index 0000000..e1498a4 --- /dev/null +++ b/tests/PromethION.main.nf.test.config @@ -0,0 +1,7 @@ +// Load the basic test config +includeConfig 'nextflow.config' + +// Load the correct samplesheet for that test +params { + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/PromethION/samplesheet.csv' +} diff --git a/tests/nextflow.config b/tests/nextflow.config index 422545b..8d9ef46 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -13,4 +13,9 @@ params { max_memory = '3.GB' max_time = '2.h' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + + validationSchemaIgnoreParams = 'genomes,igenomes_base,pipelines_testdata_base_path' + + } From 3ff850395b52b9ee509901f203c301fd40aa9844 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 17 May 2024 13:28:56 +0200 Subject: [PATCH 29/34] update snapshots, add nf-test.log to gitignore --- .gitignore | 1 + tests/MiSeq.main.nf.test.snap | 12 ++++++++---- tests/NovaSeq6000.main.nf.test.snap | 18 +++++++++++------- tests/PromethION.main.nf.test.snap | 10 +++++++--- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 089a407..7227765 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ testing/ testing* *.pyc .nf-test +.nf-test.log diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap index e222b51..9689638 100644 --- a/tests/MiSeq.main.nf.test.snap +++ b/tests/MiSeq.main.nf.test.snap @@ -4,16 +4,20 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" ], - "timestamp": "2024-05-08T16:29:33.284003" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-17T12:59:21.531493" } } \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap index a406b89..cbb7538 100644 --- a/tests/NovaSeq6000.main.nf.test.snap +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -4,28 +4,32 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,9b4fd8a6d6e8a9acabecd592f633472e", "multiqc_general_stats.txt:md5,8237b88ceb018d3cb1edcea62d10f4a2", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,9246a5b6b7b0410c79049fc3dbd08e92", "multiqc_general_stats.txt:md5,44328403f423c6f5ac9ee0a8a01e6725", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,84820276fae52d4d492831280ae6207c", "multiqc_general_stats.txt:md5,dd07799e5e4b9d389f9de49a852c3363", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,59ae05d89453da6f57010ffb6466f902", "multiqc_general_stats.txt:md5,e4629691992bfe639c01a84b90563334", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" ], - "timestamp": "2024-05-14T10:17:35.440827" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-17T13:02:20.874181" } } \ No newline at end of file diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap index 24e52bd..951c555 100644 --- a/tests/PromethION.main.nf.test.snap +++ b/tests/PromethION.main.nf.test.snap @@ -4,12 +4,16 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" ], - "timestamp": "2024-05-08T17:17:23.151259" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-17T12:58:02.572837" } } \ No newline at end of file From 8ac4d76770515159a23f746d4d3fa8c4d3e06ce1 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 17 May 2024 15:49:01 +0200 Subject: [PATCH 30/34] visualize example run dir corresponsing to samplesheet --- docs/usage.md | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 5731497..7a4ec73 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,7 +10,7 @@ ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. ```bash --input '[path to samplesheet file]' @@ -18,15 +18,25 @@ You will need to create a samplesheet with information about the samples you wou ### Full samplesheet +The following simple run dir structure... + +``` +run_dir +├── sample1_lane1_group1_r1.fq.gz +├── sample2_lane1_group1_r1.fq.gz +├── sample3_lane2_group2_r1.fq.gz +└── sample4_lane2_group3_r1.fq.gz +``` + +...would be represented in the following samplesheet (shown as .tsv for readability) + ```csv title="samplesheet.csv" -sample,lane,group,fastq_1,fastq_2,rundir -CONTROL_REP1,1,,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX -CONTROL_REP2,1,,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX -CONTROL_REP3,1,,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX -TREATMENT_REP1,2,GROUP1,AEG588A4_S4_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX -TREATMENT_REP2,2,GROUP1,AEG588A5_S5_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX -TREATMENT_REP3,2,GROUP2,AEG588A6_S6_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX -TREATMENT_REP3,2,GROUP2,AEG588A6_S6_L004_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX +sample lane group fastq_1 fastq_2 rundir +sample1 1 group1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir +sample2 1 group1 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir +sample3 2 group2 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir +sample4 2 group3 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir + ``` | Column | Description | @@ -34,11 +44,11 @@ TREATMENT_REP3,2,GROUP2,AEG588A6_S6_L004_R1_001.fastq.gz,,200624_A00834_0183_BHM | `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | | `lane` | Lane where the sample was processed on an Illumina instrument (optional). | | `group` | Group the sample belongs too, useful when several groups are pooled together (optional). | -| `rundir` | Path to the runfolder containing extra information about the sequencing run (optional) . | | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | | `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz" (optional). | +| `rundir` | Path to the runfolder containing extra information about the sequencing run (optional) . | -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +Another [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. ## Running the pipeline From 84c9b3d95c693b722a3e61752ba498b05fc08e57 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 20 May 2024 12:41:50 +0200 Subject: [PATCH 31/34] naming fixes --- main.nf | 4 ++-- workflows/seqinspector.nf | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/main.nf b/main.nf index 9de4f94..fb00bd5 100644 --- a/main.nf +++ b/main.nf @@ -59,8 +59,8 @@ workflow NFCORE_SEQINSPECTOR { emit: global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html - lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html - group_report = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html + lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html + group_reports = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html rundir_report = SEQINSPECTOR.out.rundir_reports // channel: /path/to/multiqc_report.html } diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 0eb375d..50fa481 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -6,7 +6,7 @@ include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_LANE } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_GROUP } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_RUNDIR } from '../modules/nf-core/multiqc/main' @@ -84,7 +84,7 @@ workflow SEQINSPECTOR { ) ) - MULTIQC ( + MULTIQC_GLOBAL ( ch_multiqc_files .map { meta, file -> file } .mix(ch_multiqc_extra_files) @@ -97,9 +97,8 @@ workflow SEQINSPECTOR { // Generate reports by lane multiqc_extra_files_per_lane = ch_multiqc_files .filter { meta, sample -> meta.lane } - .map { meta, sample -> meta.lane } + .map { meta, sample -> [lane: meta.lane] } .unique() - .map { lane -> [lane:lane] } .combine(ch_multiqc_extra_files) lane_mqc_files = ch_multiqc_files @@ -209,7 +208,7 @@ workflow SEQINSPECTOR { ) emit: - global_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + global_report = MULTIQC_GLOBAL.out.report.toList() // channel: /path/to/multiqc_report.html lane_reports = MULTIQC_PER_LANE.out.report.toList() // channel: [ /path/to/multiqc_report.html ] group_reports = MULTIQC_PER_GROUP.out.report.toList() // channel: [ /path/to/multiqc_report.html ] rundir_reports = MULTIQC_PER_RUNDIR.out.report.toList() // channel: [ /path/to/multiqc_report.html ] From aaf17b68f90dcaa123a9bc0c7807c7bba0dc2fbd Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 20 May 2024 12:44:01 +0200 Subject: [PATCH 32/34] nf-core sync --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/workflows/linting.yml | 19 ++++++++++--------- .github/workflows/linting_comment.yml | 2 +- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 6de151f..5d27c53 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -18,7 +18,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/seqi - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/seqinspector/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/seqinspector _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 1fcafe8..073e187 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,12 +14,13 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - - name: Set up Python 3.12 - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + - name: Set up Python 3.11 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: - python-version: "3.12" + python-version: 3.11 + cache: "pip" - name: Install pre-commit run: pip install pre-commit @@ -31,14 +32,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v2 + uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: - python-version: "3.12" + python-version: "3.11" architecture: "x64" - name: Install dependencies @@ -59,7 +60,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 + uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 40acc23..b706875 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 + uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 with: workflow: linting.yml workflow_conclusion: completed From e6dfea9ca1fbbb0019e438c6394d62daf1f2cda8 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 20 May 2024 12:44:54 +0200 Subject: [PATCH 33/34] nf-core fixes --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/workflows/linting.yml | 19 +++++++++---------- .github/workflows/linting_comment.yml | 2 +- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 5d27c53..6de151f 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -18,7 +18,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/seqi - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/seqinspector/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/seqinspector _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 073e187..1fcafe8 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,13 +14,12 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - name: Set up Python 3.11 - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: 3.11 - cache: "pip" + python-version: "3.12" - name: Install pre-commit run: pip install pre-commit @@ -32,14 +31,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.11" + python-version: "3.12" architecture: "x64" - name: Install dependencies @@ -60,7 +59,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index b706875..40acc23 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 with: workflow: linting.yml workflow_conclusion: completed From 02affeb602655023888b330b0402bcbe2a0e613a Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Thu, 30 May 2024 13:18:36 +0200 Subject: [PATCH 34/34] Improve publishDir logic --- conf/modules.config | 56 +++++++++++++++++++++++++---- tests/MiSeq.main.nf.test.snap | 12 +++---- tests/NovaSeq6000.main.nf.test.snap | 18 ++++------ tests/PromethION.main.nf.test.snap | 10 ++---- 4 files changed, 64 insertions(+), 32 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index da3d2ca..f7e7845 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -22,11 +22,20 @@ process { ext.args = '--quiet' } - withName: 'MULTIQC' { + withName: 'MULTIQC_GLOBAL' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'MULTIQC_PER_LANE' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc/lanes" }, + mode: params.publish_dir_mode, saveAs: { filename -> switch (filename) { @@ -38,27 +47,62 @@ process { def new_filename = filename.replaceFirst( "(?.*)\\[LANE:${lane}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') - "lanes/L${lane}/${new_filename}" + "L${lane}/${new_filename}" + break + default: + filename + } + } + ] + } + + withName: 'MULTIQC_PER_GROUP' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc/groups" }, + mode: params.publish_dir_mode, + saveAs: { + filename -> + switch (filename) { + case 'versions.yml': + null break case ~/\[GROUP:.+\]_multiqc_(report\.html|plots|data)/: def group = (filename =~ /\[GROUP:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( "(?.*)\\[GROUP:${group}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') - "groups/${group}/${new_filename}" + "${group}/${new_filename}" + break + default: + filename + } + } + ] + } + + withName: 'MULTIQC_PER_RUNDIR' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc/rundirss" }, + mode: params.publish_dir_mode, + saveAs: { + filename -> + switch (filename) { + case 'versions.yml': + null break case ~/\[RUNDIR:.+\]_multiqc_(report\.html|plots|data)/: def rundir = (filename =~ /\[RUNDIR:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( "(?.*)\\[RUNDIR:${rundir}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') - "rundirs/${rundir}/${new_filename}" + "${rundir}/${new_filename}" break default: filename - } + } } ] } - } diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap index 9689638..87c1a56 100644 --- a/tests/MiSeq.main.nf.test.snap +++ b/tests/MiSeq.main.nf.test.snap @@ -4,20 +4,16 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-05-17T12:59:21.531493" + "timestamp": "2024-05-30T13:14:20.263485" } } \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap index cbb7538..2260025 100644 --- a/tests/NovaSeq6000.main.nf.test.snap +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -4,32 +4,28 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,9b4fd8a6d6e8a9acabecd592f633472e", "multiqc_general_stats.txt:md5,8237b88ceb018d3cb1edcea62d10f4a2", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,9246a5b6b7b0410c79049fc3dbd08e92", "multiqc_general_stats.txt:md5,44328403f423c6f5ac9ee0a8a01e6725", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,84820276fae52d4d492831280ae6207c", "multiqc_general_stats.txt:md5,dd07799e5e4b9d389f9de49a852c3363", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,59ae05d89453da6f57010ffb6466f902", "multiqc_general_stats.txt:md5,e4629691992bfe639c01a84b90563334", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-05-17T13:02:20.874181" + "timestamp": "2024-05-30T13:13:49.062282" } } \ No newline at end of file diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap index 951c555..0ac213c 100644 --- a/tests/PromethION.main.nf.test.snap +++ b/tests/PromethION.main.nf.test.snap @@ -4,16 +4,12 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-05-17T12:58:02.572837" + "timestamp": "2024-05-30T13:14:40.99246" } } \ No newline at end of file