diff --git a/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf b/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf index 56a3a310503..e0744e16f7d 100644 --- a/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf +++ b/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf @@ -24,6 +24,7 @@ process GATK4_ESTIMATELIBRARYCOMPLEXITY { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def input_list = input.collect(){"--INPUT $it"}.join(" ") + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" def avail_mem = 3072 if (!task.memory) { @@ -36,7 +37,7 @@ process GATK4_ESTIMATELIBRARYCOMPLEXITY { EstimateLibraryComplexity \\ $input_list \\ --OUTPUT ${prefix}.metrics \\ - --REFERENCE_SEQUENCE ${fasta} \\ + $reference \\ --TMP_DIR . \\ $args diff --git a/modules/nf-core/gatk4/estimatelibrarycomplexity/tests/main.nf.test b/modules/nf-core/gatk4/estimatelibrarycomplexity/tests/main.nf.test new file mode 100644 index 00000000000..e23b65573c9 --- /dev/null +++ b/modules/nf-core/gatk4/estimatelibrarycomplexity/tests/main.nf.test @@ -0,0 +1,113 @@ +nextflow_process { + + name "Test Process GATK4_ESTIMATELIBRARYCOMPLEXITY" + script "../main.nf" + process "GATK4_ESTIMATELIBRARYCOMPLEXITY" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/estimatelibrarycomplexity" + + test("homo_sapiens - bam") { + + when { + process { + """ + // [ meta, input ] + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true) + ] + // fasta + input[1] = [] + // fai + input[2] = [] + // dict + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict' , checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.metrics[0][1]).name, + process.out.versions + ).match() + } + ) + } + + } + + test("homo_sapiens - cram") { + + when { + process { + """ + // [ meta, input ] + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ] + // fasta + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta' , checkIfExists: true) + // fai + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai' , checkIfExists: true) + // dict + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict' , checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.metrics[0][1]).name, + process.out.versions + ).match() + } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + // [ meta, input ] + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true) + ] + // fasta + input[1] = [] + // fai + input[2] = [] + // dict + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict' , checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.metrics[0][1]).name, + process.out.versions + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/estimatelibrarycomplexity/tests/main.nf.test.snap b/modules/nf-core/gatk4/estimatelibrarycomplexity/tests/main.nf.test.snap new file mode 100644 index 00000000000..bbcef1471e0 --- /dev/null +++ b/modules/nf-core/gatk4/estimatelibrarycomplexity/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "homo_sapiens - cram": { + "content": [ + "test.metrics", + [ + "versions.yml:md5,1d9c175d88b6c50c7dc999c1f70261a8" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T14:41:39.857566" + }, + "sarscov2 - bam - stub": { + "content": [ + "test.metrics", + [ + "versions.yml:md5,1d9c175d88b6c50c7dc999c1f70261a8" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T14:27:55.337569" + }, + "homo_sapiens - bam": { + "content": [ + "test.metrics", + [ + "versions.yml:md5,1d9c175d88b6c50c7dc999c1f70261a8" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T14:24:41.460191" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/estimatelibrarycomplexity/tests/tags.yml b/modules/nf-core/gatk4/estimatelibrarycomplexity/tests/tags.yml new file mode 100644 index 00000000000..0949d08563b --- /dev/null +++ b/modules/nf-core/gatk4/estimatelibrarycomplexity/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/estimatelibrarycomplexity: + - "modules/nf-core/gatk4/estimatelibrarycomplexity/**" diff --git a/modules/nf-core/picard/addorreplacereadgroups/main.nf b/modules/nf-core/picard/addorreplacereadgroups/main.nf index 4ea018d56bd..4300ba7f8cf 100644 --- a/modules/nf-core/picard/addorreplacereadgroups/main.nf +++ b/modules/nf-core/picard/addorreplacereadgroups/main.nf @@ -8,7 +8,7 @@ process PICARD_ADDORREPLACEREADGROUPS { 'biocontainers/picard:3.1.1--hdfd78af_0' }" input: - tuple val(meta), path(bam) + tuple val(meta), path(reads) tuple val(meta2), path(fasta) tuple val(meta3), path(fasta_index) @@ -24,9 +24,8 @@ process PICARD_ADDORREPLACEREADGROUPS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def suffix = task.ext.suffix ?: "${bam.getExtension()}" + def suffix = task.ext.suffix ?: "${reads.getExtension()}" def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" - def create_index = ( suffix == "bam" )? "--CREATE_INDEX" : "" def avail_mem = 3072 if (!task.memory) { log.info '[Picard AddOrReplaceReadGroups] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -34,7 +33,7 @@ process PICARD_ADDORREPLACEREADGROUPS { avail_mem = (task.memory.mega*0.8).intValue() } - if ("$bam" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + if ("$reads" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ picard \\ @@ -42,8 +41,7 @@ process PICARD_ADDORREPLACEREADGROUPS { AddOrReplaceReadGroups \\ $args \\ $reference \\ - $create_index \\ - --INPUT ${bam} \\ + --INPUT ${reads} \\ --OUTPUT ${prefix}.${suffix} cat <<-END_VERSIONS > versions.yml @@ -54,15 +52,10 @@ process PICARD_ADDORREPLACEREADGROUPS { stub: def prefix = task.ext.prefix ?: "${meta.id}" - def suffix = task.ext.suffix ?: "${bam.getExtension()}" - if ("$bam" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - def create_index = "" - if (suffix == "bam") { - create_index = "touch ${prefix}.${suffix}.bai" - } + def suffix = task.ext.suffix ?: "${reads.getExtension()}" + if ("$reads" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ touch ${prefix}.${suffix} - ${create_index} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/picard/addorreplacereadgroups/meta.yml b/modules/nf-core/picard/addorreplacereadgroups/meta.yml index 17110b7875f..efd5b86dc05 100644 --- a/modules/nf-core/picard/addorreplacereadgroups/meta.yml +++ b/modules/nf-core/picard/addorreplacereadgroups/meta.yml @@ -30,10 +30,10 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - reads: type: file - description: Input BAM file - pattern: "*.{bam}" + description: Sequence reads file, can be SAM/BAM/CRAM format + pattern: "*.{bam,cram,sam}" - fasta: type: file description: Reference genome file diff --git a/modules/nf-core/picard/addorreplacereadgroups/tests/bam.config b/modules/nf-core/picard/addorreplacereadgroups/tests/bam.config index 8ce837e53f8..3f37c2fd9e1 100644 --- a/modules/nf-core/picard/addorreplacereadgroups/tests/bam.config +++ b/modules/nf-core/picard/addorreplacereadgroups/tests/bam.config @@ -2,6 +2,7 @@ process { withName: 'PICARD_ADDORREPLACEREADGROUPS'{ ext.prefix = { "${meta.id}.replaced"} ext.args = {[ + "--CREATE_INDEX", "-LB ${meta.id}", "-PL ILLUMINA", "-PU bc1", diff --git a/modules/nf-core/picard/addorreplacereadgroups/tests/main.nf.test b/modules/nf-core/picard/addorreplacereadgroups/tests/main.nf.test index 6d50cfe2689..9c029354f15 100644 --- a/modules/nf-core/picard/addorreplacereadgroups/tests/main.nf.test +++ b/modules/nf-core/picard/addorreplacereadgroups/tests/main.nf.test @@ -26,8 +26,12 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.bam[0][1]).name).match("bam_name") }, - { assert snapshot(file(process.out.bai[0][1]).name).match("bai_name") }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.bai[0][1]).name, + process.out.versions + ).match() + }, ) } @@ -53,8 +57,11 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.cram[0][1]).name).match("cram_name") }, - { assert snapshot(process.out.versions).match("versions") }, + { assert snapshot( + file(process.out.cram[0][1]).name, + process.out.versions + ).match() + }, ) } diff --git a/modules/nf-core/picard/addorreplacereadgroups/tests/main.nf.test.snap b/modules/nf-core/picard/addorreplacereadgroups/tests/main.nf.test.snap index f80eaba047d..63a55843cee 100644 --- a/modules/nf-core/picard/addorreplacereadgroups/tests/main.nf.test.snap +++ b/modules/nf-core/picard/addorreplacereadgroups/tests/main.nf.test.snap @@ -1,13 +1,16 @@ { - "bam_name": { + "homo_sapiens - cram": { "content": [ - "null.replaced.bam" + "test.replaced.cram", + [ + "versions.yml:md5,0a6f049f94501dcf23aabfbc0e272891" + ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-20T12:05:21.587215" + "timestamp": "2024-03-21T12:05:47.668865" }, "sarscov2 - bam - stub": { "content": [ @@ -21,12 +24,7 @@ ] ], "1": [ - [ - { - - }, - "null.replaced.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + ], "2": [ @@ -35,12 +33,7 @@ "versions.yml:md5,0a6f049f94501dcf23aabfbc0e272891" ], "bai": [ - [ - { - - }, - "null.replaced.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + ], "bam": [ [ @@ -62,10 +55,12 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-20T12:54:03.639457" + "timestamp": "2024-03-21T10:51:24.530553" }, - "versions": { + "sarscov2 - bam": { "content": [ + "null.replaced.bam", + "null.replaced.bai", [ "versions.yml:md5,0a6f049f94501dcf23aabfbc0e272891" ] @@ -74,26 +69,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-19T17:32:24.906639" - }, - "cram_name": { - "content": [ - "test.replaced.cram" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T12:06:05.10743" - }, - "bai_name": { - "content": [ - "null.replaced.bai" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T12:05:21.604866" + "timestamp": "2024-03-21T12:05:29.696728" } } \ No newline at end of file diff --git a/modules/nf-core/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf index 80930cc41b6..ad0b296361b 100644 --- a/modules/nf-core/picard/markduplicates/main.nf +++ b/modules/nf-core/picard/markduplicates/main.nf @@ -8,13 +8,14 @@ process PICARD_MARKDUPLICATES { 'biocontainers/picard:3.1.1--hdfd78af_0' }" input: - tuple val(meta), path(bam) + tuple val(meta), path(reads) tuple val(meta2), path(fasta) tuple val(meta3), path(fai) output: - tuple val(meta), path("*.bam") , emit: bam - tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.bam") , emit: bam, optional: true + tuple val(meta), path("*.bai") , emit: bai, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true tuple val(meta), path("*.metrics.txt"), emit: metrics path "versions.yml" , emit: versions @@ -24,6 +25,8 @@ process PICARD_MARKDUPLICATES { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "${reads.getExtension()}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" def avail_mem = 3072 if (!task.memory) { log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -31,16 +34,16 @@ process PICARD_MARKDUPLICATES { avail_mem = (task.memory.mega*0.8).intValue() } - if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + if ("$reads" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ picard \\ -Xmx${avail_mem}M \\ MarkDuplicates \\ $args \\ - --INPUT $bam \\ - --OUTPUT ${prefix}.bam \\ - --REFERENCE_SEQUENCE $fasta \\ + --INPUT $reads \\ + --OUTPUT ${prefix}.${suffix} \\ + $reference \\ --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt cat <<-END_VERSIONS > versions.yml @@ -51,10 +54,10 @@ process PICARD_MARKDUPLICATES { stub: def prefix = task.ext.prefix ?: "${meta.id}" - if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + def suffix = task.ext.suffix ?: "${reads.getExtension()}" + if ("$reads" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ - touch ${prefix}.bam - touch ${prefix}.bam.bai + touch ${prefix}.${suffix} touch ${prefix}.MarkDuplicates.metrics.txt cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/picard/markduplicates/meta.yml b/modules/nf-core/picard/markduplicates/meta.yml index 1ab90c07542..1f0ffe16c2f 100644 --- a/modules/nf-core/picard/markduplicates/meta.yml +++ b/modules/nf-core/picard/markduplicates/meta.yml @@ -21,9 +21,9 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - reads: type: file - description: BAM file + description: Sequence reads file, can be SAM/BAM/CRAM format pattern: "*.{bam,cram,sam}" - meta2: type: map @@ -32,7 +32,7 @@ input: e.g. [ id:'genome' ] - fasta: type: file - description: Reference genome fasta file + description: Reference genome fasta file, required for CRAM input pattern: "*.{fasta,fa}" - meta3: type: map @@ -57,6 +57,10 @@ output: type: file description: An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag pattern: "*.{bai}" + - cram: + type: file + description: Output CRAM file + pattern: "*.{cram}" - metrics: type: file description: Duplicate metrics file generated by picard diff --git a/modules/nf-core/picard/markduplicates/tests/main.nf.test b/modules/nf-core/picard/markduplicates/tests/main.nf.test index c5a29b4bdd3..e3e97f6cc3a 100644 --- a/modules/nf-core/picard/markduplicates/tests/main.nf.test +++ b/modules/nf-core/picard/markduplicates/tests/main.nf.test @@ -18,14 +18,8 @@ nextflow_process { [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) ]) - input[1] = Channel.of([ - [ id:'genome' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ]) - input[2] = Channel.of([ - [ id:'genome' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) - ]) + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] """ } } @@ -49,14 +43,8 @@ nextflow_process { [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) ]) - input[1] = Channel.of([ - [ id:'genome' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ]) - input[2] = Channel.of([ - [ id:'genome' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) - ]) + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] """ } } @@ -95,7 +83,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.bam[0][1]).name).match("cram_name") }, + { assert snapshot(file(process.out.cram[0][1]).name).match("cram_name") }, { assert snapshot(path(process.out.metrics.get(0).get(1)).readLines()[0..2]).match("cram_metrics") }, { assert snapshot(process.out.versions).match("cram_versions") } ) diff --git a/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap b/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap index 31c9130dc3d..eb17111e4c5 100644 --- a/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap +++ b/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap @@ -5,39 +5,59 @@ "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" ] ], - "timestamp": "2024-01-19T10:26:45.092349" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:31:50.928021" }, "unsorted_bam_name": { "content": [ "test.marked.bam" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, "timestamp": "2024-01-19T10:26:28.100755" }, "cram_metrics": { "content": [ [ "## htsjdk.samtools.metrics.StringHeader", - "# MarkDuplicates --INPUT test.paired_end.sorted.cram --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "# MarkDuplicates --INPUT test.paired_end.sorted.cram --OUTPUT test.marked.cram --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", "## htsjdk.samtools.metrics.StringHeader" ] ], - "timestamp": "2024-01-19T10:27:03.253071" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:25:47.518152" }, "sorted_bam_metrics": { "content": [ [ "## htsjdk.samtools.metrics.StringHeader", - "# MarkDuplicates --INPUT test.paired_end.sorted.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "# MarkDuplicates --INPUT test.paired_end.sorted.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", "## htsjdk.samtools.metrics.StringHeader" ] ], - "timestamp": "2024-01-19T10:26:45.086503" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-21T11:39:10.318331" }, "cram_name": { "content": [ - "test.marked.bam" + "test.marked.cram" ], - "timestamp": "2024-01-19T10:27:03.241617" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:25:47.459663" }, "cram_versions": { "content": [ @@ -45,6 +65,10 @@ "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" ] ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, "timestamp": "2024-01-19T10:27:03.26989" }, "unsorted_bam_versions": { @@ -53,22 +77,34 @@ "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" ] ], - "timestamp": "2024-01-19T10:26:28.159071" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:31:24.040403" }, "unsorted_bam_metrics": { "content": [ [ "## htsjdk.samtools.metrics.StringHeader", - "# MarkDuplicates --INPUT test.paired_end.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "# MarkDuplicates --INPUT test.paired_end.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", "## htsjdk.samtools.metrics.StringHeader" ] ], - "timestamp": "2024-01-19T10:26:28.143979" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-21T10:51:12.831787" }, "sorted_bam_name": { "content": [ "test.marked.bam" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, "timestamp": "2024-01-19T10:26:45.080116" } } \ No newline at end of file diff --git a/subworkflows/nf-core/bam_markduplicates_picard/main.nf b/subworkflows/nf-core/bam_markduplicates_picard/main.nf index de8130fb512..2de059b84aa 100644 --- a/subworkflows/nf-core/bam_markduplicates_picard/main.nf +++ b/subworkflows/nf-core/bam_markduplicates_picard/main.nf @@ -9,7 +9,7 @@ include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' workflow BAM_MARKDUPLICATES_PICARD { take: - ch_bam // channel: [ val(meta), path(bam) ] + ch_reads // channel: [ val(meta), path(reads) ] ch_fasta // channel: [ path(fasta) ] ch_fai // channel: [ path(fai) ] @@ -17,27 +17,33 @@ workflow BAM_MARKDUPLICATES_PICARD { ch_versions = Channel.empty() - PICARD_MARKDUPLICATES ( ch_bam, ch_fasta, ch_fai ) + PICARD_MARKDUPLICATES ( ch_reads, ch_fasta, ch_fai ) ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first()) - SAMTOOLS_INDEX ( PICARD_MARKDUPLICATES.out.bam ) + ch_markdup = PICARD_MARKDUPLICATES.out.bam.mix(PICARD_MARKDUPLICATES.out.cram) + + SAMTOOLS_INDEX ( ch_markdup ) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - ch_bam_bai = PICARD_MARKDUPLICATES.out.bam - .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) - .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) - .map{meta, bam, bai, csi -> - if (bai) [ meta, bam, bai ] - else [ meta, bam, csi ] + ch_reads_index = ch_markdup + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.crai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map{meta, reads, bai, crai, csi -> + if (bai) [ meta, reads, bai ] + else if (crai) [ meta, reads, crai ] + else [ meta, reads, csi ] } - BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + BAM_STATS_SAMTOOLS ( ch_reads_index, ch_fasta ) ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) emit: bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), path(bam) ] - metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(bam) ] + cram = PICARD_MARKDUPLICATES.out.cram // channel: [ val(meta), path(cram) ] + metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(metrics) ] bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] + crai = SAMTOOLS_INDEX.out.crai // channel: [ val(meta), path(crai) ] csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] diff --git a/subworkflows/nf-core/bam_markduplicates_picard/meta.yml b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml index fe63068e693..433d35b2b89 100644 --- a/subworkflows/nf-core/bam_markduplicates_picard/meta.yml +++ b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml @@ -14,13 +14,13 @@ components: - samtools/flagstat - bam_stats_samtools input: - - ch_bam: + - ch_reads: description: | - BAM/CRAM/SAM file - Structure: [ val(meta), path(bam) ] + Sequence reads in BAM/CRAM/SAM format + Structure: [ val(meta), path(reads) ] - ch_fasta: description: | - Reference genome fasta file + Reference genome fasta file required for CRAM input Structure: [ path(fasta) ] - ch_fasta: description: | @@ -29,12 +29,20 @@ input: output: - bam: description: | - processed BAM/CRAM/SAM file + processed BAM/SAM file Structure: [ val(meta), path(bam) ] - bai: description: | - BAM/CRAM/SAM samtools index + BAM/SAM samtools index Structure: [ val(meta), path(bai) ] + - cram: + description: | + processed CRAM file + Structure: [ val(meta), path(cram) ] + - crai: + description: | + CRAM samtools index + Structure: [ val(meta), path(crai) ] - csi: description: | CSI samtools index diff --git a/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test index d8d24290842..5ef337dc51a 100644 --- a/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test +++ b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test @@ -79,8 +79,8 @@ nextflow_workflow { assertAll( { assert workflow.success}, { assert snapshot( - path(workflow.out.bam[0][1]), - path(workflow.out.bai[0][1]), + file(workflow.out.cram[0][1]).name, + path(workflow.out.crai[0][1]), path(workflow.out.flagstat[0][1]), path(workflow.out.idxstats[0][1]), path(workflow.out.stats[0][1]), diff --git a/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap index 2f02f747a07..caf4ac8ad18 100644 --- a/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap +++ b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap @@ -1,17 +1,17 @@ { "homo_sapiens - cram": { "content": [ - "test.bam:md5,6641dc05efa8384a061f378d86d922cd", - "test.bam.bai:md5,c41c60d8a94adebe53b6df80b6e90d38", + "test.cram", + "test.cram.crai:md5,78d47ba01ac4e05f3ae1e353902a989e", "test.flagstat:md5,93b0ef463df947ede1f42ff60396c34d", "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15", - "test.stats:md5,9ac28e327a7797d7bb6a5922fde59ed1" + "test.stats:md5,c2f74a4d9b2377bcf4f4f184da3801af" ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nextflow": "23.10.1" }, - "timestamp": "2024-02-13T16:46:16.302755774" + "timestamp": "2024-03-20T20:45:38.364189" }, "sarscov2 - bam": { "content": [ @@ -23,8 +23,8 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nextflow": "23.10.1" }, - "timestamp": "2024-02-13T16:46:02.942115679" + "timestamp": "2024-03-21T11:38:08.434529" } } \ No newline at end of file diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 49ba2b0102b..42c8e0b0c4e 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -724,9 +724,6 @@ gatk4/createsomaticpanelofnormals: gatk4/determinegermlinecontigploidy: - modules/nf-core/gatk4/determinegermlinecontigploidy/** - tests/modules/nf-core/gatk4/determinegermlinecontigploidy/** -gatk4/estimatelibrarycomplexity: - - modules/nf-core/gatk4/estimatelibrarycomplexity/** - - tests/modules/nf-core/gatk4/estimatelibrarycomplexity/** gatk4/fastqtosam: - modules/nf-core/gatk4/fastqtosam/** - tests/modules/nf-core/gatk4/fastqtosam/** diff --git a/tests/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf b/tests/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf deleted file mode 100644 index decc9114132..00000000000 --- a/tests/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -include { GATK4_ESTIMATELIBRARYCOMPLEXITY } from '../../../../../modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf' - -workflow test_gatk4_estimatelibrarycomplexity { - - input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true) - ] - - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - - GATK4_ESTIMATELIBRARYCOMPLEXITY ( input, fasta, fai, dict ) -} diff --git a/tests/modules/nf-core/gatk4/estimatelibrarycomplexity/nextflow.config b/tests/modules/nf-core/gatk4/estimatelibrarycomplexity/nextflow.config deleted file mode 100644 index 8730f1c4b93..00000000000 --- a/tests/modules/nf-core/gatk4/estimatelibrarycomplexity/nextflow.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - - publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - -} diff --git a/tests/modules/nf-core/gatk4/estimatelibrarycomplexity/test.yml b/tests/modules/nf-core/gatk4/estimatelibrarycomplexity/test.yml deleted file mode 100644 index b2288a6f67c..00000000000 --- a/tests/modules/nf-core/gatk4/estimatelibrarycomplexity/test.yml +++ /dev/null @@ -1,8 +0,0 @@ -- name: gatk4 estimatelibrarycomplexity test_gatk4_estimatelibrarycomplexity - command: nextflow run ./tests/modules/nf-core/gatk4/estimatelibrarycomplexity -entry test_gatk4_estimatelibrarycomplexity -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/gatk4/estimatelibrarycomplexity/nextflow.config - tags: - - gatk4/estimatelibrarycomplexity - - gatk4 - files: - - path: output/gatk4/test.metrics - - path: output/gatk4/versions.yml