diff --git a/CHANGELOG.md b/CHANGELOG.md index 3aa0a7b3..fe58ff28 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#206](https://github.com/nf-core/demultiplex/pull/206) Add test with uncompressed data. +- [#208](https://github.com/nf-core/demultiplex/pull/208) Added parameter for removing adapter information from samplesheets. ## 1.4.1 - 2024-02-27 diff --git a/nextflow.config b/nextflow.config index d320b6ba..736fd1f9 100755 --- a/nextflow.config +++ b/nextflow.config @@ -15,6 +15,7 @@ params { // Options: trimming trim_fastq = true // [true, false] + remove_adapter = true // [true, false] // Options: tooling skip_tools = [] // list [fastp, fastqc] diff --git a/nextflow_schema.json b/nextflow_schema.json index a276c2e0..06b8890c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -240,6 +240,11 @@ "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, + "remove_adapter": { + "type": "boolean", + "description": "Boolean whether to remove adapter information from Illumina samplesheet. If adapter information is present, the various bcl conversion tools will perform adapter trimming already at the demultiplexing step.", + "default": "true" + }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", diff --git a/tests/pipeline/bclconvert.nf.test.snap b/tests/pipeline/bclconvert.nf.test.snap index f9178e50..20d5df66 100644 --- a/tests/pipeline/bclconvert.nf.test.snap +++ b/tests/pipeline/bclconvert.nf.test.snap @@ -15,17 +15,25 @@ "Quality_Metrics.csv:md5,6614accb1bb414fe312b17b81f5521f7", "Quality_Tile_Metrics.csv:md5,cdc89fd2962bdd4a24f71e186112118a", "RunInfo.xml:md5,03038959f4dd181c86bc97ae71fe270a", - "SampleSheet.csv:md5,2df2e405991814571c021dc8749c2a89", + "SampleSheet.csv:md5,ee5db2e12754e069998b0a96e535238c", "Top_Unknown_Barcodes.csv:md5,2e2faba761137f228e56bd3428453ccc", "fastq_list.csv:md5,05bc84f51840f5754cfb8381b36f2cb0" ] ], - "timestamp": "2023-12-11T16:38:54.135338" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-07-23T22:32:32.517671192" }, "software_versions": { "content": [ "{BCLCONVERT={bclconvert=00.000.000.4.2.7}, FALCO={falco=1.2.1}, FASTP={fastp=0.23.4}, MD5SUM={md5sum=8.3}, Workflow={nf-core/demultiplex=v1.5.0dev}}" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, "timestamp": "2024-05-16T08:36:09.489747" }, "multiqc": { @@ -34,6 +42,10 @@ "multiqc_fastp.txt:md5,d65cc0ccd033e949132c2e46b1f81717", "multiqc_bclconvert_bysample.txt:md5,07048c9b73ed85bdac03c476b192c4e3" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, "timestamp": "2024-05-15T01:34:10.988849" } } \ No newline at end of file diff --git a/tests/pipeline/skip_tools.nf.test.snap b/tests/pipeline/skip_tools.nf.test.snap index 4adb35d8..f2fe11b1 100644 --- a/tests/pipeline/skip_tools.nf.test.snap +++ b/tests/pipeline/skip_tools.nf.test.snap @@ -3,6 +3,10 @@ "content": [ "{BCLCONVERT={bclconvert=00.000.000.4.2.7}, FALCO={falco=1.2.1}, FASTP={fastp=0.23.4}, MD5SUM={md5sum=8.3}, Workflow={nf-core/demultiplex=v1.5.0dev}}" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, "timestamp": "2024-05-16T08:39:34.361632" }, "skip_fastqc": { @@ -20,12 +24,16 @@ "Quality_Metrics.csv:md5,6614accb1bb414fe312b17b81f5521f7", "Quality_Tile_Metrics.csv:md5,cdc89fd2962bdd4a24f71e186112118a", "RunInfo.xml:md5,03038959f4dd181c86bc97ae71fe270a", - "SampleSheet.csv:md5,2df2e405991814571c021dc8749c2a89", + "SampleSheet.csv:md5,ee5db2e12754e069998b0a96e535238c", "Top_Unknown_Barcodes.csv:md5,2e2faba761137f228e56bd3428453ccc", "fastq_list.csv:md5,05bc84f51840f5754cfb8381b36f2cb0" ] ], - "timestamp": "2023-12-11T16:45:17.736105" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-07-23T22:27:37.881306832" }, "skip_fastp": { "content": [ @@ -42,12 +50,16 @@ "Quality_Metrics.csv:md5,6614accb1bb414fe312b17b81f5521f7", "Quality_Tile_Metrics.csv:md5,cdc89fd2962bdd4a24f71e186112118a", "RunInfo.xml:md5,03038959f4dd181c86bc97ae71fe270a", - "SampleSheet.csv:md5,2df2e405991814571c021dc8749c2a89", + "SampleSheet.csv:md5,ee5db2e12754e069998b0a96e535238c", "Top_Unknown_Barcodes.csv:md5,2e2faba761137f228e56bd3428453ccc", "fastq_list.csv:md5,05bc84f51840f5754cfb8381b36f2cb0" ] ], - "timestamp": "2023-12-11T16:43:54.005346" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-07-23T22:25:35.140271903" }, "skip_multiqc": { "content": [ @@ -64,12 +76,16 @@ "Quality_Metrics.csv:md5,6614accb1bb414fe312b17b81f5521f7", "Quality_Tile_Metrics.csv:md5,cdc89fd2962bdd4a24f71e186112118a", "RunInfo.xml:md5,03038959f4dd181c86bc97ae71fe270a", - "SampleSheet.csv:md5,2df2e405991814571c021dc8749c2a89", + "SampleSheet.csv:md5,ee5db2e12754e069998b0a96e535238c", "Top_Unknown_Barcodes.csv:md5,2e2faba761137f228e56bd3428453ccc", "fastq_list.csv:md5,05bc84f51840f5754cfb8381b36f2cb0" ] ], - "timestamp": "2023-12-11T16:47:17.03681" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-07-23T22:30:34.614021228" }, "skip_fastp_fastqc": { "content": [ @@ -86,35 +102,55 @@ "Quality_Metrics.csv:md5,6614accb1bb414fe312b17b81f5521f7", "Quality_Tile_Metrics.csv:md5,cdc89fd2962bdd4a24f71e186112118a", "RunInfo.xml:md5,03038959f4dd181c86bc97ae71fe270a", - "SampleSheet.csv:md5,2df2e405991814571c021dc8749c2a89", + "SampleSheet.csv:md5,ee5db2e12754e069998b0a96e535238c", "Top_Unknown_Barcodes.csv:md5,2e2faba761137f228e56bd3428453ccc", "fastq_list.csv:md5,05bc84f51840f5754cfb8381b36f2cb0" ] ], - "timestamp": "2023-12-11T16:46:00.460287" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-07-23T22:28:46.845866763" }, "software_versions_skip_fastqc": { "content": [ "{BCLCONVERT={bclconvert=00.000.000.4.2.7}, FALCO={falco=1.2.1}, FASTP={fastp=0.23.4}, MD5SUM={md5sum=8.3}, Workflow={nf-core/demultiplex=v1.5.0dev}}" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, "timestamp": "2024-05-16T08:41:12.480795" }, "software_versions_skip_fastp_fastqc": { "content": [ "{BCLCONVERT={bclconvert=00.000.000.4.2.7}, FALCO={falco=1.2.1}, MD5SUM={md5sum=8.3}, Workflow={nf-core/demultiplex=v1.5.0dev}}" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, "timestamp": "2024-05-16T08:41:37.617916" }, "software_versions_skip_multiqc": { "content": [ "{BCLCONVERT={bclconvert=00.000.000.4.2.7}, FALCO={falco=1.2.1}, FASTP={fastp=0.23.4}, MD5SUM={md5sum=8.3}, Workflow={nf-core/demultiplex=v1.5.0dev}}" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, "timestamp": "2024-05-16T08:42:46.504885" }, "software_versions_skip_fastp": { "content": [ "{BCLCONVERT={bclconvert=00.000.000.4.2.7}, FALCO={falco=1.2.1}, MD5SUM={md5sum=8.3}, Workflow={nf-core/demultiplex=v1.5.0dev}}" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, "timestamp": "2024-05-16T08:40:02.626844" }, "skip_trimming": { @@ -132,11 +168,15 @@ "Quality_Metrics.csv:md5,6614accb1bb414fe312b17b81f5521f7", "Quality_Tile_Metrics.csv:md5,cdc89fd2962bdd4a24f71e186112118a", "RunInfo.xml:md5,03038959f4dd181c86bc97ae71fe270a", - "SampleSheet.csv:md5,2df2e405991814571c021dc8749c2a89", + "SampleSheet.csv:md5,ee5db2e12754e069998b0a96e535238c", "Top_Unknown_Barcodes.csv:md5,2e2faba761137f228e56bd3428453ccc", "fastq_list.csv:md5,05bc84f51840f5754cfb8381b36f2cb0" ] ], - "timestamp": "2023-12-11T16:43:15.513271" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-07-23T22:23:53.234471432" } } \ No newline at end of file diff --git a/workflows/demultiplex.nf b/workflows/demultiplex.nf index 2148af79..21fe2ffe 100644 --- a/workflows/demultiplex.nf +++ b/workflows/demultiplex.nf @@ -51,6 +51,39 @@ workflow DEMULTIPLEX { ch_multiqc_files = Channel.empty() ch_multiqc_reports = Channel.empty() + // Remove adapter from Illumina samplesheet to avoid adapter trimming in demultiplexer tools + if (params.remove_adapter && (params.demultiplexer in ["bcl2fastq", "bclconvert", "mkfastq"])) { + ch_samplesheet_no_adapter = ch_samplesheet + .map{meta,samplesheet,flowcell,lane -> + def samplesheet_out = new File("${samplesheet.getSimpleName()}_no_adapters.csv") + samplesheet_out.delete() + samplesheet_out.createNewFile() + + def lines_out = '' + def new_line = '' + def removal_checker = false + samplesheet + .readLines() + .each { line -> + if ( line =~ /Adapter,[ACGT]+,/ ) { + new_line = line.replaceAll(/Adapter,[ACGT]+,/, 'Adapter,,') + removal_checker = true + } else if ( line =~ /AdapterRead2,[ACGT]+,/ ) { + new_line = line.replaceAll(/AdapterRead2,[ACGT]+,/, 'AdapterRead2,,') + removal_checker = true + } else { + new_line = line + } + lines_out = lines_out + new_line + '\n' + } + if (!removal_checker) {log.warn("Parameter 'remove_adapter' was set to true but no adapters were found in samplesheet")} + + samplesheet_out.text=lines_out + [meta,file(samplesheet_out),flowcell,lane] + } + ch_samplesheet = ch_samplesheet_no_adapter + } + // Convenience ch_samplesheet.dump(tag: 'DEMULTIPLEX::inputs', {FormattingService.prettyFormat(it)})