From b4ad530dc270036271205f42110f116c22d03279 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Mon, 17 Apr 2023 22:07:19 +0200 Subject: [PATCH 01/24] Add back skip_spp parameter --- CHANGELOG.md | 1 + workflows/chipseq.nf | 44 +++++++++++++++++++++++++++----------------- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c56805d..173b68d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Updated pipeline template to [nf-core/tools 2.7.2](https://github.com/nf-core/tools/releases/tag/2.7.2) - [[#317](https://github.com/nf-core/chipseq/issues/317)] Added metro map - [[#288](https://github.com/nf-core/chipseq/issues/291)] Bump `chromap` version 2 and enable all the steps below chromap again when paired-end data is processed. +- [[#311](https://github.com/nf-core/chipseq/issues/311)] Add back `--skip_spp` parameter which was unintentionally removed from the code. ## [[2.0.0](https://github.com/nf-core/chipseq/releases/tag/2.0.0)] - 2022-10-03 diff --git a/workflows/chipseq.nf b/workflows/chipseq.nf index 08069eb6..caaa4c0c 100644 --- a/workflows/chipseq.nf +++ b/workflows/chipseq.nf @@ -310,20 +310,30 @@ workflow CHIPSEQ { // // MODULE: Phantompeaktools strand cross-correlation and QC metrics // - PHANTOMPEAKQUALTOOLS ( - FILTER_BAM_BAMTOOLS.out.bam - ) - ch_versions = ch_versions.mix(PHANTOMPEAKQUALTOOLS.out.versions.first()) + ch_phantompeakqualtools_spp_multiqc = Channel.empty() + ch_multiqc_phantompeakqualtools_nsc_multiqc = Channel.empty() + ch_multiqc_phantompeakqualtools_rsc_multiqc = Channel.empty() + ch_multiqc_phantompeakqualtools_correlation_multiqc = Channel.empty() + if (!params.skip_spp) { + PHANTOMPEAKQUALTOOLS ( + FILTER_BAM_BAMTOOLS.out.bam + ) + ch_phantompeakqualtools_spp_multiqc = PHANTOMPEAKQUALTOOLS.out.spp + ch_versions = ch_versions.mix(PHANTOMPEAKQUALTOOLS.out.versions.first()) - // - // MODULE: MultiQC custom content for Phantompeaktools - // - MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS ( - PHANTOMPEAKQUALTOOLS.out.spp.join(PHANTOMPEAKQUALTOOLS.out.rdata, by: [0]), - ch_spp_nsc_header, - ch_spp_rsc_header, - ch_spp_correlation_header - ) + // + // MODULE: MultiQC custom content for Phantompeaktools + // + MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS ( + PHANTOMPEAKQUALTOOLS.out.spp.join(PHANTOMPEAKQUALTOOLS.out.rdata, by: [0]), + ch_spp_nsc_header, + ch_spp_rsc_header, + ch_spp_correlation_header + ) + ch_multiqc_phantompeakqualtools_nsc_multiqc = MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.nsc + ch_multiqc_phantompeakqualtools_rsc_multiqc = MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.rsc + ch_multiqc_phantompeakqualtools_correlation_multiqc = MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.correlation + } // // MODULE: BedGraph coverage tracks @@ -688,10 +698,10 @@ workflow CHIPSEQ { ch_deeptoolsplotprofile_multiqc.collect{it[1]}.ifEmpty([]), ch_deeptoolsplotfingerprint_multiqc.collect{it[1]}.ifEmpty([]), - PHANTOMPEAKQUALTOOLS.out.spp.collect{it[1]}.ifEmpty([]), - MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.nsc.collect{it[1]}.ifEmpty([]), - MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.rsc.collect{it[1]}.ifEmpty([]), - MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.correlation.collect{it[1]}.ifEmpty([]), + ch_phantompeakqualtools_spp_multiqc.collect{it[1]}.ifEmpty([]), + ch_multiqc_phantompeakqualtools_nsc_multiqc.collect{it[1]}.ifEmpty([]), + ch_multiqc_phantompeakqualtools_rsc_multiqc.collect{it[1]}.ifEmpty([]), + ch_multiqc_phantompeakqualtools_correlation_multiqc.collect{it[1]}.ifEmpty([]), ch_custompeaks_frip_multiqc.collect{it[1]}.ifEmpty([]), ch_custompeaks_count_multiqc.collect{it[1]}.ifEmpty([]), From 77907aa5ddf9f60cecc7d795bffb02d583708d6c Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Mon, 17 Apr 2023 22:44:59 +0200 Subject: [PATCH 02/24] Install fastq_fastqc_umitools_trimgalore subworkflow --- modules.json | 154 ++++++++++++++---- modules/nf-core/umitools/extract/main.nf | 56 +++++++ modules/nf-core/umitools/extract/meta.yml | 47 ++++++ .../fastq_fastqc_umitools_trimgalore/main.nf | 123 ++++++++++++++ .../fastq_fastqc_umitools_trimgalore/meta.yml | 96 +++++++++++ 5 files changed, 442 insertions(+), 34 deletions(-) create mode 100644 modules/nf-core/umitools/extract/main.nf create mode 100644 modules/nf-core/umitools/extract/meta.yml create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml diff --git a/modules.json b/modules.json index 7af6cca7..8f10866d 100644 --- a/modules.json +++ b/modules.json @@ -8,171 +8,257 @@ "bowtie2/align": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bowtie2/build": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwa/index": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwa/mem": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "chromap/chromap": { "branch": "master", "git_sha": "d6b7b1f108dab88b0269a4331767c36a1a8da960", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "chromap/index": { "branch": "master", "git_sha": "d6b7b1f108dab88b0269a4331767c36a1a8da960", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/getchromsizes": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff" }, "deeptools/computematrix": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deeptools/plotfingerprint": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deeptools/plotheatmap": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deeptools/plotprofile": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastqc": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules", + "fastq_fastqc_umitools_trimgalore" + ] }, "gffread": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gunzip": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "homer/annotatepeaks": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "khmer/uniquekmers": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "macs2/callpeak": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "phantompeakqualtools": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/collectmultiplemetrics": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/markduplicates": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/mergesamfiles": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "preseq/lcextrap": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/flagstat": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/idxstats": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/sort": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/stats": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "subread/featurecounts": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "trimgalore": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules", + "fastq_fastqc_umitools_trimgalore" + ] }, "ucsc/bedgraphtobigwig": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "umitools/extract": { + "branch": "master", + "git_sha": "ffe2f5865f608848e440a52b73c304ea79aaf818", + "installed_by": [ + "fastq_fastqc_umitools_trimgalore" + ] }, "untar": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + } + } + }, + "subworkflows": { + "nf-core": { + "fastq_fastqc_umitools_trimgalore": { + "branch": "master", + "git_sha": "100caf3506850ffcc04953f724a97f422940c377", + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf new file mode 100644 index 00000000..64fdd367 --- /dev/null +++ b/modules/nf-core/umitools/extract/main.nf @@ -0,0 +1,56 @@ +process UMITOOLS_EXTRACT { + tag "$meta.id" + label "process_single" + label "process_long" + + conda "bioconda::umi_tools=1.1.4" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' : + 'quay.io/biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + """ + umi_tools \\ + extract \\ + -I $reads \\ + -S ${prefix}.umi_extract.fastq.gz \\ + $args \\ + > ${prefix}.umi_extract.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + END_VERSIONS + """ + } else { + """ + umi_tools \\ + extract \\ + -I ${reads[0]} \\ + --read2-in=${reads[1]} \\ + -S ${prefix}.umi_extract_1.fastq.gz \\ + --read2-out=${prefix}.umi_extract_2.fastq.gz \\ + $args \\ + > ${prefix}.umi_extract.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml new file mode 100644 index 00000000..db64a0f8 --- /dev/null +++ b/modules/nf-core/umitools/extract/meta.yml @@ -0,0 +1,47 @@ +name: umitools_extract +description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place +keywords: + - umitools + - extract +tools: + - umi_tools: + description: > + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) + and single cell RNA-Seq cell barcodes + documentation: https://umi-tools.readthedocs.io/en/latest/ + license: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: list + description: | + List of input FASTQ files whose UMIs will be extracted. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: > + Extracted FASTQ files. | + For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | + For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" + - log: + type: file + description: Logfile for umi_tools + pattern: "*.{log}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@drpatelh" + - "@grst" diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf new file mode 100644 index 00000000..db2e5b32 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf @@ -0,0 +1,123 @@ +// +// Read QC, UMI extraction and trimming +// + +include { FASTQC } from '../../../modules/nf-core/fastqc/main' +include { UMITOOLS_EXTRACT } from '../../../modules/nf-core/umitools/extract/main' +include { TRIMGALORE } from '../../../modules/nf-core/trimgalore/main' + +// +// Function that parses TrimGalore log output file to get total number of reads after trimming +// +def getTrimGaloreReadsAfterFiltering(log_file) { + def total_reads = 0 + def filtered_reads = 0 + log_file.eachLine { line -> + def total_reads_matcher = line =~ /([\d\.]+)\ssequences processed in total/ + def filtered_reads_matcher = line =~ /shorter than the length cutoff[^:]+:\s([\d\.]+)/ + if (total_reads_matcher) total_reads = total_reads_matcher[0][1].toFloat() + if (filtered_reads_matcher) filtered_reads = filtered_reads_matcher[0][1].toFloat() + } + return total_reads - filtered_reads +} + +workflow FASTQ_FASTQC_UMITOOLS_TRIMGALORE { + take: + reads // channel: [ val(meta), [ reads ] ] + skip_fastqc // boolean: true/false + with_umi // boolean: true/false + skip_umi_extract // boolean: true/false + skip_trimming // boolean: true/false + umi_discard_read // integer: 0, 1 or 2 + min_trimmed_reads // integer: > 0 + + main: + ch_versions = Channel.empty() + fastqc_html = Channel.empty() + fastqc_zip = Channel.empty() + if (!skip_fastqc) { + FASTQC (reads) + fastqc_html = FASTQC.out.html + fastqc_zip = FASTQC.out.zip + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + } + + umi_reads = reads + umi_log = Channel.empty() + if (with_umi && !skip_umi_extract) { + UMITOOLS_EXTRACT (reads) + umi_reads = UMITOOLS_EXTRACT.out.reads + umi_log = UMITOOLS_EXTRACT.out.log + ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first()) + + // Discard R1 / R2 if required + if (umi_discard_read in [1,2]) { + UMITOOLS_EXTRACT + .out + .reads + .map { + meta, reads -> + meta.single_end ? [ meta, reads ] : [ meta + ['single_end': true], reads[umi_discard_read % 2] ] + } + .set { umi_reads } + } + } + + trim_reads = umi_reads + trim_unpaired = Channel.empty() + trim_html = Channel.empty() + trim_zip = Channel.empty() + trim_log = Channel.empty() + trim_read_count = Channel.empty() + if (!skip_trimming) { + TRIMGALORE (umi_reads) + trim_unpaired = TRIMGALORE.out.unpaired + trim_html = TRIMGALORE.out.html + trim_zip = TRIMGALORE.out.zip + trim_log = TRIMGALORE.out.log + ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first()) + + // + // Filter FastQ files based on minimum trimmed read count after adapter trimming + // + TRIMGALORE + .out + .reads + .join(trim_log, remainder: true) + .map { + meta, reads, trim_log -> + if (trim_log) { + num_reads = getTrimGaloreReadsAfterFiltering(meta.single_end ? trim_log : trim_log[-1]) + [ meta, reads, num_reads ] + } else { + [ meta, reads, min_trimmed_reads.toFloat() + 1 ] + } + } + .set { ch_num_trimmed_reads } + + ch_num_trimmed_reads + .filter { meta, reads, num_reads -> num_reads >= min_trimmed_reads.toFloat() } + .map { meta, reads, num_reads -> [ meta, reads ] } + .set { trim_reads } + + ch_num_trimmed_reads + .map { meta, reads, num_reads -> [ meta, num_reads ] } + .set { trim_read_count } + } + + emit: + reads = trim_reads // channel: [ val(meta), [ reads ] ] + + fastqc_html // channel: [ val(meta), [ html ] ] + fastqc_zip // channel: [ val(meta), [ zip ] ] + + umi_log // channel: [ val(meta), [ log ] ] + + trim_unpaired // channel: [ val(meta), [ reads ] ] + trim_html // channel: [ val(meta), [ html ] ] + trim_zip // channel: [ val(meta), [ zip ] ] + trim_log // channel: [ val(meta), [ txt ] ] + trim_read_count // channel: [ val(meta), val(count) ] + + versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml new file mode 100644 index 00000000..b05004d4 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml @@ -0,0 +1,96 @@ +name: "fastq_fastqc_umitools_trimgalore" +description: Read QC, UMI extraction and trimming +keywords: + - fastq + - fastqc + - qc + - UMI + - trimming + - trimgalore +modules: + - fastqc + - umitools/extract + - trimgalore +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - skip_fastqc: + type: boolean + description: | + Skip fastqc process + - with_umi: + type: boolean + description: | + With or without umi detection + - skip_umi_extract: + type: boolean + description: | + With or without umi extrection + - skip_trimming: + type: boolean + description: | + Allows to skip trimgalore execution + - umi_discard_read: + type: integer + description: | + Discard R1 / R2 if required + - min_trimmed_reads: + type: integer + description: | + Inputs with fewer than this reads will be filtered out of the "reads" output channel + +output: + - reads: + type: file + description: > + Extracted FASTQ files. | + For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | + For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" + - fastqc_html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - fastqc_zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - log: + type: file + description: Logfile for umi_tools + pattern: "*.{log}" + - trim_unpaired: + type: file + description: | + FastQ files containing unpaired reads from read 1 or read 2 + pattern: "*unpaired*.fq.gz" + - trim_html: + type: file + description: FastQC report (optional) + pattern: "*_{fastqc.html}" + - trim_zip: + type: file + description: FastQC report archive (optional) + pattern: "*_{fastqc.zip}" + - trim_log: + type: file + description: Trim Galore! trimming report + pattern: "*_{report.txt}" + - trim_read_count: + type: integer + description: Number of reads remaining after trimming for all input samples + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@KamilMaliszArdigen" From d67a0fda5cfadc2520e832ec3105c2bb3dc8e998 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Mon, 17 Apr 2023 22:46:42 +0200 Subject: [PATCH 03/24] Install fastq_align_bwa subworkflow --- modules.json | 27 ++++++++ .../nf-core/bam_sort_stats_samtools/main.nf | 50 ++++++++++++++ .../nf-core/bam_sort_stats_samtools/meta.yml | 65 +++++++++++++++++ .../nf-core/bam_stats_samtools/main.nf | 32 +++++++++ .../nf-core/bam_stats_samtools/meta.yml | 40 +++++++++++ subworkflows/nf-core/fastq_align_bwa/main.nf | 43 ++++++++++++ subworkflows/nf-core/fastq_align_bwa/meta.yml | 69 +++++++++++++++++++ 7 files changed, 326 insertions(+) create mode 100644 subworkflows/nf-core/bam_sort_stats_samtools/main.nf create mode 100644 subworkflows/nf-core/bam_sort_stats_samtools/meta.yml create mode 100644 subworkflows/nf-core/bam_stats_samtools/main.nf create mode 100644 subworkflows/nf-core/bam_stats_samtools/meta.yml create mode 100644 subworkflows/nf-core/fastq_align_bwa/main.nf create mode 100644 subworkflows/nf-core/fastq_align_bwa/meta.yml diff --git a/modules.json b/modules.json index 8f10866d..6c75bb41 100644 --- a/modules.json +++ b/modules.json @@ -30,6 +30,7 @@ "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", "installed_by": [ + "fastq_align_bwa", "modules" ] }, @@ -179,6 +180,7 @@ "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", "installed_by": [ + "bam_stats_samtools", "modules" ] }, @@ -186,6 +188,7 @@ "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", "installed_by": [ + "bam_stats_samtools", "modules" ] }, @@ -193,6 +196,7 @@ "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", "installed_by": [ + "bam_sort_stats_samtools", "modules" ] }, @@ -200,6 +204,7 @@ "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", "installed_by": [ + "bam_sort_stats_samtools", "modules" ] }, @@ -207,6 +212,7 @@ "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", "installed_by": [ + "bam_stats_samtools", "modules" ] }, @@ -250,6 +256,27 @@ }, "subworkflows": { "nf-core": { + "bam_sort_stats_samtools": { + "branch": "master", + "git_sha": "3911652a6b24249358f79e8b8466338d63efb2a2", + "installed_by": [ + "fastq_align_bwa" + ] + }, + "bam_stats_samtools": { + "branch": "master", + "git_sha": "b4b7f89e7fd6d2293f0c176213f710e0bcdaf19e", + "installed_by": [ + "bam_sort_stats_samtools" + ] + }, + "fastq_align_bwa": { + "branch": "master", + "git_sha": "901fab507683647b43e7032f3ae9e4c234eb68eb", + "installed_by": [ + "subworkflows" + ] + }, "fastq_fastqc_umitools_trimgalore": { "branch": "master", "git_sha": "100caf3506850ffcc04953f724a97f422940c377", diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf new file mode 100644 index 00000000..617871fe --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -0,0 +1,50 @@ +// +// Sort, index BAM file and run samtools stats, flagstat and idxstats +// + +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_SORT_STATS_SAMTOOLS { + take: + ch_bam // channel: [ val(meta), [ bam ] ] + ch_fasta // channel: [ fasta ] + + main: + + ch_versions = Channel.empty() + + SAMTOOLS_SORT ( ch_bam ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + + SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + SAMTOOLS_SORT.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map { + meta, bam, bai, csi -> + if (bai) { + [ meta, bam, bai ] + } else { + [ meta, bam, csi ] + } + } + .set { ch_bam_bai } + + BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml new file mode 100644 index 00000000..131065be --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml @@ -0,0 +1,65 @@ +name: bam_sort_stats_samtools +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +modules: + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf new file mode 100644 index 00000000..c9d7c8b7 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -0,0 +1,32 @@ +// +// Run SAMtools stats, flagstat and idxstats +// + +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' + +workflow BAM_STATS_SAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [ path(fasta) ] + + main: + ch_versions = Channel.empty() + + SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) + + SAMTOOLS_FLAGSTAT ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + SAMTOOLS_IDXSTATS ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) + + emit: + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_stats_samtools/meta.yml b/subworkflows/nf-core/bam_stats_samtools/meta.yml new file mode 100644 index 00000000..b6072686 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/meta.yml @@ -0,0 +1,40 @@ +name: bam_stats_samtools +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +modules: + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - ch_bam_bai: + description: | + The input channel containing the BAM/CRAM and it's index + Structure: [ val(meta), path(bam), path(bai) ] + - ch_fasta: + description: | + Reference genome fasta file + Structure: [ path(fasta) ] +output: + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats)] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@drpatelh" diff --git a/subworkflows/nf-core/fastq_align_bwa/main.nf b/subworkflows/nf-core/fastq_align_bwa/main.nf new file mode 100644 index 00000000..4ce4f886 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bwa/main.nf @@ -0,0 +1,43 @@ +// +// Alignment with BWA +// + +include { BWA_MEM } from '../../../modules/nf-core/bwa/mem/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' + +workflow FASTQ_ALIGN_BWA { + take: + ch_reads // channel (mandatory): [ val(meta), [ path(reads) ] ] + ch_index // channel (mandatory): [ val(meta2), path(index) ] + val_sort_bam // boolean (mandatory): true or false + ch_fasta // channel (optional) : [ path(fasta) ] + + main: + ch_versions = Channel.empty() + + // + // Map reads with BWA + // + + BWA_MEM ( ch_reads, ch_index, val_sort_bam ) + ch_versions = ch_versions.mix(BWA_MEM.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + + BAM_SORT_STATS_SAMTOOLS ( BWA_MEM.out.bam, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + bam_orig = BWA_MEM.out.bam // channel: [ val(meta), path(bam) ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), path(bam) ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), path(bai) ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), path(csi) ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/fastq_align_bwa/meta.yml b/subworkflows/nf-core/fastq_align_bwa/meta.yml new file mode 100644 index 00000000..03f84860 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bwa/meta.yml @@ -0,0 +1,69 @@ +name: fastq_align_bwa +description: Align reads to a reference genome using bwa then sort with samtools +keywords: + - align + - fasta + - genome + - reference +modules: + - bwa/align + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - ch_reads: + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + Structure: [ val(meta), [ path(reads) ] ] + - ch_index: + description: | + BWA genome index files + Structure: [ val(meta2), path(index) ] + - val_sort_bam: + type: boolean + description: If true bwa modules sort resulting bam files + pattern: "true|false" + - ch_fasta: + type: file + description: | + Optional reference fasta file. This only needs to be given if val_sort_bam = true + Structure: [ path(fasta) ] + +output: + - bam_orig: + description: | + BAM file produced by bwa + Structure: [ val(meta), path(bam) ] + - bam: + description: | + BAM file ordered by samtools + Structure: [ val(meta), path(bam) ] + - bai: + description: | + BAI index of the ordered BAM file + Structure: [ val(meta), path(bai) ] + - csi: + description: | + CSI index of the ordered BAM file + Structure: [ val(meta), path(csi) ] + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@JoseEspinosa" From 8b93ec222b5cf706ea3e682ce6e90e22b664ec26 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Mon, 17 Apr 2023 22:47:29 +0200 Subject: [PATCH 04/24] Install fastq_align_bowtie2 subworkflow --- modules.json | 23 ++++--- .../nf-core/fastq_align_bowtie2/main.nf | 45 +++++++++++++ .../nf-core/fastq_align_bowtie2/meta.yml | 63 +++++++++++++++++++ 3 files changed, 124 insertions(+), 7 deletions(-) create mode 100644 subworkflows/nf-core/fastq_align_bowtie2/main.nf create mode 100644 subworkflows/nf-core/fastq_align_bowtie2/meta.yml diff --git a/modules.json b/modules.json index 6c75bb41..43a96a34 100644 --- a/modules.json +++ b/modules.json @@ -9,6 +9,7 @@ "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", "installed_by": [ + "fastq_align_bowtie2", "modules" ] }, @@ -180,16 +181,16 @@ "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", "installed_by": [ - "bam_stats_samtools", - "modules" + "modules", + "bam_stats_samtools" ] }, "samtools/idxstats": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", "installed_by": [ - "bam_stats_samtools", - "modules" + "modules", + "bam_stats_samtools" ] }, "samtools/index": { @@ -212,8 +213,8 @@ "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", "installed_by": [ - "bam_stats_samtools", - "modules" + "modules", + "bam_stats_samtools" ] }, "subread/featurecounts": { @@ -260,7 +261,8 @@ "branch": "master", "git_sha": "3911652a6b24249358f79e8b8466338d63efb2a2", "installed_by": [ - "fastq_align_bwa" + "fastq_align_bwa", + "fastq_align_bowtie2" ] }, "bam_stats_samtools": { @@ -270,6 +272,13 @@ "bam_sort_stats_samtools" ] }, + "fastq_align_bowtie2": { + "branch": "master", + "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", + "installed_by": [ + "subworkflows" + ] + }, "fastq_align_bwa": { "branch": "master", "git_sha": "901fab507683647b43e7032f3ae9e4c234eb68eb", diff --git a/subworkflows/nf-core/fastq_align_bowtie2/main.nf b/subworkflows/nf-core/fastq_align_bowtie2/main.nf new file mode 100644 index 00000000..eaf4ac5e --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/main.nf @@ -0,0 +1,45 @@ +// +// Alignment with Bowtie2 +// + +include { BOWTIE2_ALIGN } from '../../../modules/nf-core/bowtie2/align/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' + +workflow FASTQ_ALIGN_BOWTIE2 { + take: + ch_reads // channel: [ val(meta), [ reads ] ] + ch_index // channel: /path/to/bowtie2/index/ + save_unaligned // val + sort_bam // val + ch_fasta // channel: /path/to/reference.fasta + + main: + + ch_versions = Channel.empty() + + // + // Map reads with Bowtie2 + // + BOWTIE2_ALIGN ( ch_reads, ch_index, save_unaligned, sort_bam ) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( BOWTIE2_ALIGN.out.bam, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + bam_orig = BOWTIE2_ALIGN.out.bam // channel: [ val(meta), bam ] + log_out = BOWTIE2_ALIGN.out.log // channel: [ val(meta), log ] + fastq = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), fastq ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_align_bowtie2/meta.yml b/subworkflows/nf-core/fastq_align_bowtie2/meta.yml new file mode 100644 index 00000000..ad378077 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/meta.yml @@ -0,0 +1,63 @@ +name: fastq_align_bowtie2 +description: Align reads to a reference genome using bowtie2 then sort with samtools +keywords: + - align + - fasta + - genome + - reference +modules: + - bowtie2/align + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ch_reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - ch_index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - sort_bam: + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - ch_fasta: + type: file + description: Reference fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - log: + type: file + description: Alignment log + pattern: "*.log" +# TODO Add samtools outputs +authors: + - "@drpatelh" From 356ccd2e248860dbf14b6d986f0e603611f5a691 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Mon, 17 Apr 2023 22:48:55 +0200 Subject: [PATCH 05/24] Install fastq_align_chromap subworkflow --- modules.json | 173 +++++------------- .../nf-core/fastq_align_chromap/main.nf | 41 +++++ .../nf-core/fastq_align_chromap/meta.yml | 99 ++++++++++ 3 files changed, 185 insertions(+), 128 deletions(-) create mode 100644 subworkflows/nf-core/fastq_align_chromap/main.nf create mode 100644 subworkflows/nf-core/fastq_align_chromap/meta.yml diff --git a/modules.json b/modules.json index 43a96a34..d7abe7b4 100644 --- a/modules.json +++ b/modules.json @@ -8,250 +8,173 @@ "bowtie2/align": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "fastq_align_bowtie2", - "modules" - ] + "installed_by": ["fastq_align_bowtie2", "modules"] }, "bowtie2/build": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwa/index": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwa/mem": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "fastq_align_bwa", - "modules" - ] + "installed_by": ["fastq_align_bwa", "modules"] }, "chromap/chromap": { "branch": "master", "git_sha": "d6b7b1f108dab88b0269a4331767c36a1a8da960", - "installed_by": [ - "modules" - ] + "installed_by": ["fastq_align_chromap", "modules"] }, "chromap/index": { "branch": "master", "git_sha": "d6b7b1f108dab88b0269a4331767c36a1a8da960", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "custom/getchromsizes": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff" }, "deeptools/computematrix": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "deeptools/plotfingerprint": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "deeptools/plotheatmap": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "deeptools/plotprofile": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastqc": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules", - "fastq_fastqc_umitools_trimgalore" - ] + "installed_by": ["modules", "fastq_fastqc_umitools_trimgalore"] }, "gffread": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gunzip": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "homer/annotatepeaks": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "khmer/uniquekmers": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "macs2/callpeak": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "phantompeakqualtools": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "picard/collectmultiplemetrics": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "picard/markduplicates": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "picard/mergesamfiles": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "preseq/lcextrap": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules", - "bam_stats_samtools" - ] + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/idxstats": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules", - "bam_stats_samtools" - ] + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/index": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "bam_sort_stats_samtools", - "modules" - ] + "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/sort": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "bam_sort_stats_samtools", - "modules" - ] + "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/stats": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules", - "bam_stats_samtools" - ] + "installed_by": ["bam_stats_samtools", "modules"] }, "subread/featurecounts": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "trimgalore": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules", - "fastq_fastqc_umitools_trimgalore" - ] + "installed_by": ["modules", "fastq_fastqc_umitools_trimgalore"] }, "ucsc/bedgraphtobigwig": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "umitools/extract": { "branch": "master", "git_sha": "ffe2f5865f608848e440a52b73c304ea79aaf818", - "installed_by": [ - "fastq_fastqc_umitools_trimgalore" - ] + "installed_by": ["fastq_fastqc_umitools_trimgalore"] }, "untar": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -260,41 +183,35 @@ "bam_sort_stats_samtools": { "branch": "master", "git_sha": "3911652a6b24249358f79e8b8466338d63efb2a2", - "installed_by": [ - "fastq_align_bwa", - "fastq_align_bowtie2" - ] + "installed_by": ["fastq_align_bowtie2", "fastq_align_chromap", "fastq_align_bwa"] }, "bam_stats_samtools": { "branch": "master", "git_sha": "b4b7f89e7fd6d2293f0c176213f710e0bcdaf19e", - "installed_by": [ - "bam_sort_stats_samtools" - ] + "installed_by": ["bam_sort_stats_samtools"] }, "fastq_align_bowtie2": { "branch": "master", "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "fastq_align_bwa": { "branch": "master", "git_sha": "901fab507683647b43e7032f3ae9e4c234eb68eb", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] + }, + "fastq_align_chromap": { + "branch": "master", + "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", + "installed_by": ["subworkflows"] }, "fastq_fastqc_umitools_trimgalore": { "branch": "master", "git_sha": "100caf3506850ffcc04953f724a97f422940c377", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} diff --git a/subworkflows/nf-core/fastq_align_chromap/main.nf b/subworkflows/nf-core/fastq_align_chromap/main.nf new file mode 100644 index 00000000..9d706c98 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_chromap/main.nf @@ -0,0 +1,41 @@ +/* + * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +include { CHROMAP_CHROMAP } from '../../../modules/nf-core/chromap/chromap/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' + +workflow FASTQ_ALIGN_CHROMAP { + take: + ch_reads // channel (mandatory): [ val(meta), [ reads ] ] + ch_index // channel (mandatory): [ val(meta2, [ index ] ] + ch_fasta // channel (mandatory): [ val(meta2, [ fasta ] ] + ch_barcodes // channel (optional): [ barcodes ] + ch_whitelist // channel (optional): [ whitelist ] + ch_chr_order // channel (optional): [ chr_order ] + ch_pairs_chr_order // channel (optional): [ pairs_chr_order ] + + main: + ch_versions = Channel.empty() + + // + // Map reads with CHROMAP + // + CHROMAP_CHROMAP(ch_reads, ch_fasta, ch_index, ch_barcodes, ch_whitelist, ch_chr_order, ch_pairs_chr_order) + ch_versions = ch_versions.mix(CHROMAP_CHROMAP.out.versions) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS(CHROMAP_CHROMAP.out.bam, ch_fasta.map { it[1] }) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // path: versions.yml +} diff --git a/subworkflows/nf-core/fastq_align_chromap/meta.yml b/subworkflows/nf-core/fastq_align_chromap/meta.yml new file mode 100644 index 00000000..6d701bce --- /dev/null +++ b/subworkflows/nf-core/fastq_align_chromap/meta.yml @@ -0,0 +1,99 @@ +name: "fastq_align_chromap" +description: Align high throughput chromatin profiles using Chromap then sort with samtools +keywords: + - align + - fasta + - genome + - reference + - chromatin profiles + - chip-seq + - atac-seq + - hic +modules: + - chromap/chromap + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ch_reads: + type: file + description: | + Structure: [val(meta), path(reads)] + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - ch_index: + type: file + description: | + Structure: [val(meta2), path(index)] + Chromap genome index files + pattern: "*.index" + - ch_fasta: + type: file + description: | + Structure: [val(meta2), path(fasta)] + Reference fasta file + pattern: "*.{fasta,fa}" + - ch_barcodes: + type: file + description: | + Structure: [path(barcodes)] + Cell barcode files + - ch_whitelist: + type: file + description: | + Structure: [path(whitelist)] + Cell barcode whitelist file + - ch_chr_order: + type: file + description: | + Structure: [path(chr_order)] + Custom chromosome order + - ch_pairs_chr_order: + type: file + description: | + Structure: [path(pairs_chr_order)] + Natural chromosome order for pairs flipping +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: BAM file + pattern: "*.bam" + - bai: + type: file + description: BAM index (currently only for snapaligner) + pattern: "*.bai" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@JoseEspinosa" From c8739edbebf1c3cdca696a50653f712f1817da6d Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Mon, 17 Apr 2023 23:08:50 +0200 Subject: [PATCH 06/24] use the FASTQ_FASTQC_UMITOOLS_TRIMGALORE subworkflow insead of fastq_trimgalore --- conf/modules.config | 18 ++++++--- subworkflows/nf-core/fastqc_trimgalore.nf | 48 ----------------------- workflows/chipseq.nf | 26 ++++++------ 3 files changed, 27 insertions(+), 65 deletions(-) delete mode 100644 subworkflows/nf-core/fastqc_trimgalore.nf diff --git a/conf/modules.config b/conf/modules.config index b46fec83..f4c55854 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -122,7 +122,7 @@ process { if (!(params.skip_fastqc || params.skip_qc)) { process { - withName: '.*:FASTQC_TRIMGALORE:FASTQC' { + withName: 'FASTQC' { ext.args = '--quiet' publishDir = [ [ @@ -142,11 +142,17 @@ if (!(params.skip_fastqc || params.skip_qc)) { if (!params.skip_trimming) { process { - withName: '.*:FASTQC_TRIMGALORE:TRIMGALORE' { - ext.args = [ - '--fastqc', - params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : '' - ].join(' ').trim() + withName: 'TRIMGALORE' { + ext.args = { + [ + '--fastqc', + params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : '', + params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : '', + params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '', + params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '', + params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' + ].join(' ').trim() + } publishDir = [ [ path: { "${params.outdir}/trimgalore/fastqc" }, diff --git a/subworkflows/nf-core/fastqc_trimgalore.nf b/subworkflows/nf-core/fastqc_trimgalore.nf deleted file mode 100644 index 8a18098a..00000000 --- a/subworkflows/nf-core/fastqc_trimgalore.nf +++ /dev/null @@ -1,48 +0,0 @@ -// -// Read QC and trimming -// - -include { FASTQC } from '../../modules/nf-core/fastqc/main' -include { TRIMGALORE } from '../../modules/nf-core/trimgalore/main' - -workflow FASTQC_TRIMGALORE { - take: - reads // channel: [ val(meta), [ reads ] ] - skip_fastqc // boolean: true/false - skip_trimming // boolean: true/false - - main: - - ch_versions = Channel.empty() - fastqc_html = Channel.empty() - fastqc_zip = Channel.empty() - if (!skip_fastqc) { - FASTQC ( reads ).html.set { fastqc_html } - fastqc_zip = FASTQC.out.zip - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - } - - trim_reads = reads - trim_html = Channel.empty() - trim_zip = Channel.empty() - trim_log = Channel.empty() - if (!skip_trimming) { - TRIMGALORE ( reads ).reads.set { trim_reads } - trim_html = TRIMGALORE.out.html - trim_zip = TRIMGALORE.out.zip - trim_log = TRIMGALORE.out.log - ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first()) - } - - emit: - reads = trim_reads // channel: [ val(meta), [ reads ] ] - - fastqc_html // channel: [ val(meta), [ html ] ] - fastqc_zip // channel: [ val(meta), [ zip ] ] - - trim_html // channel: [ val(meta), [ html ] ] - trim_zip // channel: [ val(meta), [ zip ] ] - trim_log // channel: [ val(meta), [ txt ] ] - - versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] -} diff --git a/workflows/chipseq.nf b/workflows/chipseq.nf index caaa4c0c..d2279680 100644 --- a/workflows/chipseq.nf +++ b/workflows/chipseq.nf @@ -115,7 +115,7 @@ include { HOMER_ANNOTATEPEAKS as HOMER_ANNOTATEPEAKS_CONSENSUS } from '../module // SUBWORKFLOW: Consisting entirely of nf-core/modules // -include { FASTQC_TRIMGALORE } from '../subworkflows/nf-core/fastqc_trimgalore' +include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main' include { ALIGN_BWA_MEM } from '../subworkflows/nf-core/align_bwa_mem' include { ALIGN_BOWTIE2 } from '../subworkflows/nf-core/align_bowtie2' include { ALIGN_CHROMAP } from '../subworkflows/nf-core/align_chromap' @@ -155,12 +155,16 @@ workflow CHIPSEQ { // // SUBWORKFLOW: Read QC and trim adapters // - FASTQC_TRIMGALORE ( + FASTQ_FASTQC_UMITOOLS_TRIMGALORE ( INPUT_CHECK.out.reads, params.skip_fastqc || params.skip_qc, - params.skip_trimming + false, + false, + params.skip_trimming, + 0, + 10000 ) - ch_versions = ch_versions.mix(FASTQC_TRIMGALORE.out.versions) + ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions) // // SUBWORKFLOW: Alignment with BWA & BAM QC @@ -172,7 +176,7 @@ workflow CHIPSEQ { ch_samtools_idxstats = Channel.empty() if (params.aligner == 'bwa') { ALIGN_BWA_MEM ( - FASTQC_TRIMGALORE.out.reads, + FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads, PREPARE_GENOME.out.bwa_index ) ch_genome_bam = ALIGN_BWA_MEM.out.bam @@ -188,7 +192,7 @@ workflow CHIPSEQ { // if (params.aligner == 'bowtie2') { ALIGN_BOWTIE2 ( - FASTQC_TRIMGALORE.out.reads, + FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads, PREPARE_GENOME.out.bowtie2_index, params.save_unaligned ) @@ -205,7 +209,7 @@ workflow CHIPSEQ { // if (params.aligner == 'chromap') { ALIGN_CHROMAP ( - FASTQC_TRIMGALORE.out.reads, + FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads, PREPARE_GENOME.out.chromap_index, PREPARE_GENOME.out.fasta .map { @@ -225,7 +229,7 @@ workflow CHIPSEQ { // if (params.aligner == 'star') { ALIGN_STAR ( - FASTQC_TRIMGALORE.out.reads, + FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads, PREPARE_GENOME.out.star_index ) ch_genome_bam = ALIGN_STAR.out.bam @@ -675,9 +679,9 @@ workflow CHIPSEQ { ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'), ch_multiqc_logo.collect().ifEmpty([]), - FASTQC_TRIMGALORE.out.fastqc_zip.collect{it[1]}.ifEmpty([]), - FASTQC_TRIMGALORE.out.trim_zip.collect{it[1]}.ifEmpty([]), - FASTQC_TRIMGALORE.out.trim_log.collect{it[1]}.ifEmpty([]), + FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip.collect{it[1]}.ifEmpty([]), + FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip.collect{it[1]}.ifEmpty([]), + FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log.collect{it[1]}.ifEmpty([]), ch_samtools_stats.collect{it[1]}.ifEmpty([]), ch_samtools_flagstat.collect{it[1]}.ifEmpty([]), From 8af9cb2991f30890ba1d1bd7cbb3cce9e690b717 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Mon, 17 Apr 2023 23:16:23 +0200 Subject: [PATCH 07/24] Use FASTQ_ALIGN_BWA subworkflow --- conf/modules.config | 12 +++++---- subworkflows/nf-core/align_bwa_mem.nf | 37 --------------------------- workflows/chipseq.nf | 21 ++++++++------- 3 files changed, 19 insertions(+), 51 deletions(-) delete mode 100644 subworkflows/nf-core/align_bwa_mem.nf diff --git a/conf/modules.config b/conf/modules.config index f4c55854..01316ea6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -214,11 +214,13 @@ process { if (params.aligner == 'bwa') { process { withName: 'BWA_MEM' { - ext.args = { [ - '-M', - params.bwa_min_score ? " -T ${params.bwa_min_score}" : '', - meta.read_group ? "-R ${meta.read_group}": '' - ].join(' ').trim() } + ext.args = { + [ + '-M', + params.bwa_min_score ? " -T ${params.bwa_min_score}" : '', + meta.read_group ? "-R ${meta.read_group}": '' + ].join(' ').trim() + } ext.args2 = '-bhS -F 0x0100 -O BAM' ext.prefix = { "${meta.id}.Lb" } publishDir = [ diff --git a/subworkflows/nf-core/align_bwa_mem.nf b/subworkflows/nf-core/align_bwa_mem.nf deleted file mode 100644 index eac98188..00000000 --- a/subworkflows/nf-core/align_bwa_mem.nf +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -include { BWA_MEM } from '../../modules/nf-core/bwa/mem/main' -include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' - -workflow ALIGN_BWA_MEM { - take: - reads // channel: [ val(meta), [ reads ] ] - index // path: /path/to/index - - main: - - ch_versions = Channel.empty() - - // - // Map reads with BWA - // - BWA_MEM(reads, index, false) - ch_versions = ch_versions.mix(BWA_MEM.out.versions.first()) - - // - // Sort, index BAM file and run samtools stats, flagstat and idxstats - // - BAM_SORT_SAMTOOLS(BWA_MEM.out.bam) - ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions.first()) - - emit: - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // path: versions.yml -} diff --git a/workflows/chipseq.nf b/workflows/chipseq.nf index d2279680..dfd2df76 100644 --- a/workflows/chipseq.nf +++ b/workflows/chipseq.nf @@ -116,7 +116,8 @@ include { HOMER_ANNOTATEPEAKS as HOMER_ANNOTATEPEAKS_CONSENSUS } from '../module // include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main' -include { ALIGN_BWA_MEM } from '../subworkflows/nf-core/align_bwa_mem' +include { FASTQ_ALIGN_BWA } from '../subworkflows/nf-core/fastq_align_bwa/main' + include { ALIGN_BOWTIE2 } from '../subworkflows/nf-core/align_bowtie2' include { ALIGN_CHROMAP } from '../subworkflows/nf-core/align_chromap' include { ALIGN_STAR } from '../subworkflows/nf-core/align_star' @@ -175,16 +176,18 @@ workflow CHIPSEQ { ch_samtools_flagstat = Channel.empty() ch_samtools_idxstats = Channel.empty() if (params.aligner == 'bwa') { - ALIGN_BWA_MEM ( + FASTQ_ALIGN_BWA ( FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads, - PREPARE_GENOME.out.bwa_index + PREPARE_GENOME.out.bwa_index, + false, + PREPARE_GENOME.out.fasta ) - ch_genome_bam = ALIGN_BWA_MEM.out.bam - ch_genome_bam_index = ALIGN_BWA_MEM.out.bai - ch_samtools_stats = ALIGN_BWA_MEM.out.stats - ch_samtools_flagstat = ALIGN_BWA_MEM.out.flagstat - ch_samtools_idxstats = ALIGN_BWA_MEM.out.idxstats - ch_versions = ch_versions.mix(ALIGN_BWA_MEM.out.versions.first()) + ch_genome_bam = FASTQ_ALIGN_BWA.out.bam + ch_genome_bam_index = FASTQ_ALIGN_BWA.out.bai + ch_samtools_stats = FASTQ_ALIGN_BWA.out.stats + ch_samtools_flagstat = FASTQ_ALIGN_BWA.out.flagstat + ch_samtools_idxstats = FASTQ_ALIGN_BWA.out.idxstats + ch_versions = ch_versions.mix(FASTQ_ALIGN_BWA.out.versions) } // From d7f074a63793bcb3ad9b51af0ee2bc7511a3159c Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 10:25:46 +0200 Subject: [PATCH 08/24] Use FASTQ_ALIGN_BOWTIE2 subworkflow --- subworkflows/nf-core/align_bowtie2.nf | 38 --------------------------- workflows/chipseq.nf | 20 +++++++------- 2 files changed, 11 insertions(+), 47 deletions(-) delete mode 100644 subworkflows/nf-core/align_bowtie2.nf diff --git a/subworkflows/nf-core/align_bowtie2.nf b/subworkflows/nf-core/align_bowtie2.nf deleted file mode 100644 index 4661c720..00000000 --- a/subworkflows/nf-core/align_bowtie2.nf +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main' -include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' - -workflow ALIGN_BOWTIE2 { - take: - reads // channel: [ val(meta), [ reads ] ] - index // path: /path/to/index - save_unaligned // boolean: true/false - - main: - - ch_versions = Channel.empty() - - // - // Map reads with BWA - // - BOWTIE2_ALIGN(reads, index, save_unaligned, false) - ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions.first()) - - // - // Sort, index BAM file and run samtools stats, flagstat and idxstats - // - BAM_SORT_SAMTOOLS(BOWTIE2_ALIGN.out.bam) - ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions.first()) - - emit: - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // path: versions.yml -} diff --git a/workflows/chipseq.nf b/workflows/chipseq.nf index dfd2df76..45de3811 100644 --- a/workflows/chipseq.nf +++ b/workflows/chipseq.nf @@ -117,8 +117,8 @@ include { HOMER_ANNOTATEPEAKS as HOMER_ANNOTATEPEAKS_CONSENSUS } from '../module include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main' include { FASTQ_ALIGN_BWA } from '../subworkflows/nf-core/fastq_align_bwa/main' +include { FASTQ_ALIGN_BOWTIE2 } from '../subworkflows/nf-core/fastq_align_bowtie2/main' -include { ALIGN_BOWTIE2 } from '../subworkflows/nf-core/align_bowtie2' include { ALIGN_CHROMAP } from '../subworkflows/nf-core/align_chromap' include { ALIGN_STAR } from '../subworkflows/nf-core/align_star' include { MARK_DUPLICATES_PICARD } from '../subworkflows/nf-core/mark_duplicates_picard' @@ -194,17 +194,19 @@ workflow CHIPSEQ { // SUBWORKFLOW: Alignment with Bowtie2 & BAM QC // if (params.aligner == 'bowtie2') { - ALIGN_BOWTIE2 ( + FASTQ_ALIGN_BOWTIE2 ( FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads, PREPARE_GENOME.out.bowtie2_index, - params.save_unaligned + params.save_unaligned, + false, + PREPARE_GENOME.out.fasta ) - ch_genome_bam = ALIGN_BOWTIE2.out.bam - ch_genome_bam_index = ALIGN_BOWTIE2.out.bai - ch_samtools_stats = ALIGN_BOWTIE2.out.stats - ch_samtools_flagstat = ALIGN_BOWTIE2.out.flagstat - ch_samtools_idxstats = ALIGN_BOWTIE2.out.idxstats - ch_versions = ch_versions.mix(ALIGN_BOWTIE2.out.versions.first()) + ch_genome_bam = FASTQ_ALIGN_BOWTIE2.out.bam + ch_genome_bam_index = FASTQ_ALIGN_BOWTIE2.out.bai + ch_samtools_stats = FASTQ_ALIGN_BOWTIE2.out.stats + ch_samtools_flagstat = FASTQ_ALIGN_BOWTIE2.out.flagstat + ch_samtools_idxstats = FASTQ_ALIGN_BOWTIE2.out.idxstats + ch_versions = ch_versions.mix(FASTQ_ALIGN_BOWTIE2.out.versions.first()) } // From 28de291ad88fc1c6c8fac99932f9558ae22174b9 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 16:01:06 +0200 Subject: [PATCH 09/24] Install custom/getchromsizes and picard/collectmultiplemetrics --- modules.json | 7 +++--- .../getchromsizes/custom-getchromsizes.diff | 23 ------------------- modules/nf-core/custom/getchromsizes/main.nf | 15 ++++++------ .../picard/collectmultiplemetrics/main.nf | 20 ++++++++-------- .../picard/collectmultiplemetrics/meta.yml | 18 +++++++++++++-- 5 files changed, 37 insertions(+), 46 deletions(-) delete mode 100644 modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff diff --git a/modules.json b/modules.json index d7abe7b4..22fd3128 100644 --- a/modules.json +++ b/modules.json @@ -42,9 +42,8 @@ }, "custom/getchromsizes": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"], - "patch": "modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff" + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] }, "deeptools/computematrix": { "branch": "master", @@ -108,7 +107,7 @@ }, "picard/collectmultiplemetrics": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "75027bf77472b1f4fd2cdd7e46f83119dfb0f2c6", "installed_by": ["modules"] }, "picard/markduplicates": { diff --git a/modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff b/modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff deleted file mode 100644 index 18fda554..00000000 --- a/modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff +++ /dev/null @@ -1,23 +0,0 @@ -Changes in module 'nf-core/custom/getchromsizes' ---- modules/nf-core/custom/getchromsizes/main.nf -+++ modules/nf-core/custom/getchromsizes/main.nf -@@ -8,13 +8,12 @@ - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" - - input: -- tuple val(meta), path(fasta) -+ path(fasta) - - output: -- tuple val(meta), path ("*.sizes"), emit: sizes -- tuple val(meta), path ("*.fai") , emit: fai -- tuple val(meta), path ("*.gzi") , emit: gzi, optional: true -- path "versions.yml" , emit: versions -+ path '*.sizes' , emit: sizes -+ path '*.fai' , emit: fai -+ path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - -************************************************************ diff --git a/modules/nf-core/custom/getchromsizes/main.nf b/modules/nf-core/custom/getchromsizes/main.nf index 562c693e..580f87fe 100644 --- a/modules/nf-core/custom/getchromsizes/main.nf +++ b/modules/nf-core/custom/getchromsizes/main.nf @@ -2,18 +2,19 @@ process CUSTOM_GETCHROMSIZES { tag "$fasta" label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" input: - path(fasta) + tuple val(meta), path(fasta) output: - path '*.sizes' , emit: sizes - path '*.fai' , emit: fai - path "versions.yml", emit: versions + tuple val(meta), path ("*.sizes"), emit: sizes + tuple val(meta), path ("*.fai") , emit: fai + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/picard/collectmultiplemetrics/main.nf b/modules/nf-core/picard/collectmultiplemetrics/main.nf index 63f4e872..ed88dbe7 100644 --- a/modules/nf-core/picard/collectmultiplemetrics/main.nf +++ b/modules/nf-core/picard/collectmultiplemetrics/main.nf @@ -1,16 +1,16 @@ process PICARD_COLLECTMULTIPLEMETRICS { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) + conda "bioconda::picard=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'quay.io/biocontainers/picard:3.0.0--hdfd78af_1' }" input: - tuple val(meta), path(bam) - path fasta - path fai + tuple val(meta) , path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path("*_metrics"), emit: metrics @@ -24,15 +24,15 @@ process PICARD_COLLECTMULTIPLEMETRICS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ picard \\ - -Xmx${avail_mem}g \\ + -Xmx${avail_mem}M \\ CollectMultipleMetrics \\ $args \\ --INPUT $bam \\ diff --git a/modules/nf-core/picard/collectmultiplemetrics/meta.yml b/modules/nf-core/picard/collectmultiplemetrics/meta.yml index c11b02cf..22656080 100644 --- a/modules/nf-core/picard/collectmultiplemetrics/meta.yml +++ b/modules/nf-core/picard/collectmultiplemetrics/meta.yml @@ -23,11 +23,25 @@ input: e.g. [ id:'test', single_end:false ] - bam: type: file - description: BAM file - pattern: "*.{bam}" + description: SAM/BAM/CRAM file + pattern: "*.{sam,bam,cram}" + - bai: + type: file + description: Optional SAM/BAM/CRAM file index + pattern: "*.{sai,bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] - fasta: type: file description: Genome fasta file + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] - fai: type: file description: Index of FASTA file. Only needed when fasta is supplied. From 4c462b3134dfb423e983be69a94180218edea8c7 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 16:07:24 +0200 Subject: [PATCH 10/24] Remove substituted modules and subworkflows --- modules/local/bam_filter.nf | 47 ----------------------- subworkflows/local/filter_bam_bamtools.nf | 36 ----------------- subworkflows/nf-core/align_chromap.nf | 38 ------------------ subworkflows/nf-core/align_star.nf | 46 ---------------------- subworkflows/nf-core/bam_sort_samtools.nf | 34 ---------------- 5 files changed, 201 deletions(-) delete mode 100644 modules/local/bam_filter.nf delete mode 100644 subworkflows/local/filter_bam_bamtools.nf delete mode 100644 subworkflows/nf-core/align_chromap.nf delete mode 100644 subworkflows/nf-core/align_star.nf delete mode 100644 subworkflows/nf-core/bam_sort_samtools.nf diff --git a/modules/local/bam_filter.nf b/modules/local/bam_filter.nf deleted file mode 100644 index ff07c084..00000000 --- a/modules/local/bam_filter.nf +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Filter BAM file - */ -process BAM_FILTER { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::bamtools=2.5.2 bioconda::samtools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' : - 'quay.io/biocontainers/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' }" - - input: - tuple val(meta), path(bam), path(bai) - path bed - path bamtools_filter_se_config - path bamtools_filter_pe_config - - output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - def filter_params = meta.single_end ? '-F 0x004' : '-F 0x004 -F 0x0008 -f 0x001' - def dup_params = params.keep_dups ? '' : '-F 0x0400' - def multimap_params = params.keep_multi_map ? '' : '-q 1' - def blacklist_params = params.blacklist ? "-L $bed" : '' - def config = meta.single_end ? bamtools_filter_se_config : bamtools_filter_pe_config - """ - samtools view \\ - $filter_params \\ - $dup_params \\ - $multimap_params \\ - $blacklist_params \\ - -b $bam \\ - | bamtools filter \\ - -out ${prefix}.bam \\ - -script $config - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - bamtools: \$(echo \$(bamtools --version 2>&1) | sed 's/^.*bamtools //; s/Part .*\$//') - END_VERSIONS - """ -} diff --git a/subworkflows/local/filter_bam_bamtools.nf b/subworkflows/local/filter_bam_bamtools.nf deleted file mode 100644 index 40e9b1be..00000000 --- a/subworkflows/local/filter_bam_bamtools.nf +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Filter BAM file - */ - -include { BAM_FILTER } from '../../modules/local/bam_filter' -include { BAM_REMOVE_ORPHANS } from '../../modules/local/bam_remove_orphans' -include { BAM_SORT_SAMTOOLS } from '../nf-core/bam_sort_samtools' - -workflow FILTER_BAM_BAMTOOLS { - take: - ch_bam_bai // channel: [ val(meta), [ bam ], [bai] ] - ch_bed // channel: [ bed ] - bamtools_filter_se_config // file: BAMtools filter JSON config file for SE data - bamtools_filter_pe_config // file: BAMtools filter JSON config file for PE data - - main: - ch_versions = Channel.empty() - - BAM_FILTER(ch_bam_bai, ch_bed, bamtools_filter_se_config, bamtools_filter_pe_config) - BAM_REMOVE_ORPHANS(BAM_FILTER.out.bam) - BAM_SORT_SAMTOOLS(BAM_REMOVE_ORPHANS.out.bam) - - ch_versions = ch_versions.mix(BAM_FILTER.out.versions, - BAM_REMOVE_ORPHANS.out.versions, - BAM_SORT_SAMTOOLS.out.versions) - - emit: - name_bam = BAM_REMOVE_ORPHANS.out.bam // channel: [ val(meta), [ bam ] ] - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/align_chromap.nf b/subworkflows/nf-core/align_chromap.nf deleted file mode 100644 index 29b77442..00000000 --- a/subworkflows/nf-core/align_chromap.nf +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -include { CHROMAP_CHROMAP } from '../../modules/nf-core/chromap/chromap/main' -include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' - -workflow ALIGN_CHROMAP { - take: - reads // channel: [ val(meta), [ reads ] ] - index // path: /path/to/index - fasta // path: /path/to/fasta - - main: - - ch_versions = Channel.empty() - - // - // Map reads with CHROMAP - // - CHROMAP_CHROMAP(reads, fasta, index, [], [], [], []) - ch_versions = ch_versions.mix(CHROMAP_CHROMAP.out.versions.first()) - - // - // Sort, index BAM file and run samtools stats, flagstat and idxstats - // - BAM_SORT_SAMTOOLS(CHROMAP_CHROMAP.out.bam) - ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions.first()) - - emit: - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // path: versions.yml -} diff --git a/subworkflows/nf-core/align_star.nf b/subworkflows/nf-core/align_star.nf deleted file mode 100644 index 47462182..00000000 --- a/subworkflows/nf-core/align_star.nf +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -include { STAR_ALIGN } from '../../modules/local/star_align' -include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' - -workflow ALIGN_STAR { - take: - reads // channel: [ val(meta), [ reads ] ] - index // channel: /path/to/star/index/ - - main: - - ch_versions = Channel.empty() - - // - // Map reads with STAR - // - STAR_ALIGN ( reads, index ) - ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first()) - - // - // Sort, index BAM file and run samtools stats, flagstat and idxstats - // - BAM_SORT_SAMTOOLS ( STAR_ALIGN.out.bam ) - ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions) - - emit: - orig_bam = STAR_ALIGN.out.bam // channel: [ val(meta), bam ] - log_final = STAR_ALIGN.out.log_final // channel: [ val(meta), log_final ] - log_out = STAR_ALIGN.out.log_out // channel: [ val(meta), log_out ] - log_progress = STAR_ALIGN.out.log_progress // channel: [ val(meta), log_progress ] - bam_sorted = STAR_ALIGN.out.bam_sorted // channel: [ val(meta), bam_sorted ] - bam_transcript = STAR_ALIGN.out.bam_transcript // channel: [ val(meta), bam_transcript ] - fastq = STAR_ALIGN.out.fastq // channel: [ val(meta), fastq ] - tab = STAR_ALIGN.out.tab // channel: [ val(meta), tab ] - - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/bam_sort_samtools.nf b/subworkflows/nf-core/bam_sort_samtools.nf deleted file mode 100644 index deb06c4d..00000000 --- a/subworkflows/nf-core/bam_sort_samtools.nf +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' - -workflow BAM_SORT_SAMTOOLS { - take: - ch_bam // channel: [ val(meta), [ bam ] ] - - main: - - ch_versions = Channel.empty() - - SAMTOOLS_SORT(ch_bam) - ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) - - SAMTOOLS_INDEX(SAMTOOLS_SORT.out.bam) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - - BAM_STATS_SAMTOOLS(SAMTOOLS_SORT.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0])) - ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) - - emit: - bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} From 3a33ed9119b89cfa4eceb0020413656eeb8e5f31 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 16:08:31 +0200 Subject: [PATCH 11/24] Rename bamtools/filter --- modules/local/bamtools_filter.nf | 43 ++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 modules/local/bamtools_filter.nf diff --git a/modules/local/bamtools_filter.nf b/modules/local/bamtools_filter.nf new file mode 100644 index 00000000..ee469ffd --- /dev/null +++ b/modules/local/bamtools_filter.nf @@ -0,0 +1,43 @@ +process BAMTOOLS_FILTER { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::bamtools=2.5.2 bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' : + 'quay.io/biocontainers/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' }" + + input: + tuple val(meta), path(bam), path(bai) + path bed + path bamtools_filter_se_config + path bamtools_filter_pe_config + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def blacklist = bed ? "-L $bed" : '' + def config = meta.single_end ? bamtools_filter_se_config : bamtools_filter_pe_config + """ + samtools view \\ + $args \\ + $blacklist \\ + -b $bam \\ + | bamtools filter \\ + -out ${prefix}.bam \\ + -script $config + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + bamtools: \$(echo \$(bamtools --version 2>&1) | sed 's/^.*bamtools //; s/Part .*\$//') + END_VERSIONS + """ +} From 0f70b6d380d9801eeae999c9f0d2bbc03f6b6dec Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 16:09:37 +0200 Subject: [PATCH 12/24] add align_star local subworkflow --- subworkflows/local/align_star.nf | 48 ++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 subworkflows/local/align_star.nf diff --git a/subworkflows/local/align_star.nf b/subworkflows/local/align_star.nf new file mode 100644 index 00000000..843f303e --- /dev/null +++ b/subworkflows/local/align_star.nf @@ -0,0 +1,48 @@ +/* + * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +include { STAR_ALIGN } from '../../modules/local/star_align' +include { BAM_SORT_STATS_SAMTOOLS } from '../nf-core/bam_sort_stats_samtools/main' + +workflow ALIGN_STAR { + take: + ch_reads // channel: [ val(meta), [ reads ] ] + ch_index // channel: /path/to/star/index/ + ch_fasta // channel: /path/to/fasta + seq_center // string: sequencing center + + main: + + ch_versions = Channel.empty() + + // + // Map reads with STAR + // + STAR_ALIGN ( ch_reads, ch_index, seq_center ) + ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( STAR_ALIGN.out.bam, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + orig_bam = STAR_ALIGN.out.bam // channel: [ val(meta), bam ] + log_final = STAR_ALIGN.out.log_final // channel: [ val(meta), log_final ] + log_out = STAR_ALIGN.out.log_out // channel: [ val(meta), log_out ] + log_progress = STAR_ALIGN.out.log_progress // channel: [ val(meta), log_progress ] + bam_sorted = STAR_ALIGN.out.bam_sorted // channel: [ val(meta), bam_sorted ] + bam_transcript = STAR_ALIGN.out.bam_transcript // channel: [ val(meta), bam_transcript ] + fastq = STAR_ALIGN.out.fastq // channel: [ val(meta), fastq ] + tab = STAR_ALIGN.out.tab // channel: [ val(meta), tab ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} From ccbc71ccd399ab477a16b6171356091d18debbf7 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 16:10:16 +0200 Subject: [PATCH 13/24] Refactor and rename bam_filter_bamtools --- subworkflows/local/bam_filter_bamtools.nf | 94 +++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 subworkflows/local/bam_filter_bamtools.nf diff --git a/subworkflows/local/bam_filter_bamtools.nf b/subworkflows/local/bam_filter_bamtools.nf new file mode 100644 index 00000000..a09876b2 --- /dev/null +++ b/subworkflows/local/bam_filter_bamtools.nf @@ -0,0 +1,94 @@ +include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../nf-core/bam_sort_stats_samtools/main' +include { BAM_STATS_SAMTOOLS } from '../nf-core/bam_stats_samtools/main' + +include { BAMTOOLS_FILTER } from '../../modules/local/bamtools_filter' +include { BAM_REMOVE_ORPHANS } from '../../modules/local/bam_remove_orphans' + +workflow BAM_FILTER_BAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), [ bam ], [bai] ] + ch_bed // channel: [ bed ] + ch_fasta // channel: [ fasta ] + ch_bamtools_filter_se_config // channel: [ config_file ] + ch_bamtools_filter_pe_config // channel: [ config_file ] + + main: + + ch_versions = Channel.empty() + + // + // Filter BAM file with BAMTools + // + BAMTOOLS_FILTER ( + ch_bam_bai, + ch_bed, + ch_bamtools_filter_se_config, + ch_bamtools_filter_pe_config + ) + ch_versions = ch_versions.mix(BAMTOOLS_FILTER.out.versions.first()) + + BAMTOOLS_FILTER + .out + .bam + .branch { + meta, bam -> + single_end: meta.single_end + return [ meta, bam ] + paired_end: !meta.single_end + return [ meta, bam ] + } + .set { ch_bam } + + // + // Index SE BAM file + // + SAMTOOLS_INDEX { + ch_bam.single_end + } + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + // + // Run samtools stats, flagstat and idxstats on SE BAM + // + BAM_STATS_SAMTOOLS ( + ch_bam.single_end.join(SAMTOOLS_INDEX.out.bai), + ch_fasta + ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions.first()) + + // + // Name sort PE BAM before filtering with pysam + // + SAMTOOLS_SORT ( + ch_bam.paired_end + ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + + // + // Remove orphan reads from PE BAM file + // + BAM_REMOVE_ORPHANS ( + SAMTOOLS_SORT.out.bam + ) + ch_versions = ch_versions.mix(BAM_REMOVE_ORPHANS.out.versions.first()) + + // + // Sort, index PE BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( + BAM_REMOVE_ORPHANS.out.bam, + ch_fasta + ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions.first()) + + emit: + name_bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bam = BAM_SORT_STATS_SAMTOOLS.out.bam.mix(ch_bam.single_end) // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai.mix(SAMTOOLS_INDEX.out.bai) // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats.mix(BAM_STATS_SAMTOOLS.out.stats) // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat.mix(BAM_STATS_SAMTOOLS.out.flagstat) // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats.mix(BAM_STATS_SAMTOOLS.out.idxstats) // channel: [ val(meta), [ idxstats ] ] + versions = ch_versions // channel: [ versions.yml ] +} From 0230090cc51443955e07ac2e21602cdb3bfe2e16 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 16:11:27 +0200 Subject: [PATCH 14/24] Refactor code to use nf-core official subworkflows --- conf/modules.config | 14 +++--- subworkflows/local/prepare_genome.nf | 6 ++- workflows/chipseq.nf | 75 ++++++++++++++++++---------- 3 files changed, 61 insertions(+), 34 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 01316ea6..809e6ec1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -181,7 +181,7 @@ if (!params.skip_trimming) { } process { - withName: 'NFCORE_CHIPSEQ:CHIPSEQ:ALIGN_.*:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.Lb.sorted" } publishDir = [ path: { "${params.outdir}/${params.aligner}/library" }, @@ -191,7 +191,7 @@ process { ] } - withName: 'NFCORE_CHIPSEQ:CHIPSEQ:ALIGN_.*:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { publishDir = [ path: { "${params.outdir}/${params.aligner}/library" }, mode: params.publish_dir_mode, @@ -200,7 +200,7 @@ process { ] } - withName: 'NFCORE_CHIPSEQ:CHIPSEQ:ALIGN_.*:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:SAMTOOLS_.*' { + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:.*:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:SAMTOOLS_.*' { ext.prefix = { "${meta.id}.Lb.sorted.bam" } publishDir = [ path: { "${params.outdir}/${params.aligner}/library/samtools_stats/" }, @@ -359,7 +359,7 @@ process { } // Should only be published when paired end data is used and save_align_intermeds is true - withName: 'BAM_FILTER' { + withName: 'BAMTOOLS_FILTER' { ext.prefix = { meta.single_end ? "${meta.id}.mLb.noPublish" : "${meta.id}.mLb.flT.sorted" } publishDir = [ path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, @@ -375,7 +375,7 @@ process { publishDir = [ enabled: false ] } - withName: 'NFCORE_CHIPSEQ:CHIPSEQ:FILTER_BAM_BAMTOOLS:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:BAM_FILTER_BAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.mLb.clN.sorted" } publishDir = [ path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, @@ -384,7 +384,7 @@ process { ] } - withName: 'NFCORE_CHIPSEQ:CHIPSEQ:FILTER_BAM_BAMTOOLS:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:BAM_FILTER_BAMTOOLS:SAMTOOLS_INDEX' { ext.prefix = { "${meta.id}.mLb.clN.sorted" } publishDir = [ path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, @@ -393,7 +393,7 @@ process { ] } - withName: 'NFCORE_CHIPSEQ:CHIPSEQ:FILTER_BAM_BAMTOOLS:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:BAM_FILTER_BAMTOOLS:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.mLb.clN.sorted.bam" } publishDir = [ path: { "${params.outdir}/${params.aligner}/mergedLibrary/samtools_stats" }, diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index ed5377bf..456c20d8 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -114,7 +114,8 @@ workflow PREPARE_GENOME { // // Create chromosome sizes file // - ch_chrom_sizes = CUSTOM_GETCHROMSIZES ( ch_fasta ).sizes + ch_chrom_sizes = CUSTOM_GETCHROMSIZES ( [ [:], ch_fasta ] ).sizes.map{ it[1] } + ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map{ it[1] } ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) // @@ -123,7 +124,7 @@ workflow PREPARE_GENOME { ch_genome_filtered_bed = Channel.empty() GENOME_BLACKLIST_REGIONS ( - CUSTOM_GETCHROMSIZES.out.sizes, + CUSTOM_GETCHROMSIZES.out.sizes.map{ it[1] }, ch_blacklist.ifEmpty([]) ) ch_genome_filtered_bed = GENOME_BLACKLIST_REGIONS.out.bed @@ -204,6 +205,7 @@ workflow PREPARE_GENOME { emit: fasta = ch_fasta // path: genome.fasta + fai = ch_fai // path: genome.fai gtf = ch_gtf // path: genome.gtf gene_bed = ch_gene_bed // path: gene.bed chrom_sizes = ch_chrom_sizes // path: genome.sizes diff --git a/workflows/chipseq.nf b/workflows/chipseq.nf index 45de3811..f60c8ab3 100644 --- a/workflows/chipseq.nf +++ b/workflows/chipseq.nf @@ -82,7 +82,8 @@ include { MULTIQC_CUSTOM_PEAKS } from '../modules/local/multiqc_c // include { INPUT_CHECK } from '../subworkflows/local/input_check' include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' -include { FILTER_BAM_BAMTOOLS } from '../subworkflows/local/filter_bam_bamtools' +include { ALIGN_STAR } from '../subworkflows/local/align_star' +include { BAM_FILTER_BAMTOOLS } from '../subworkflows/local/bam_filter_bamtools' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -118,10 +119,8 @@ include { HOMER_ANNOTATEPEAKS as HOMER_ANNOTATEPEAKS_CONSENSUS } from '../module include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main' include { FASTQ_ALIGN_BWA } from '../subworkflows/nf-core/fastq_align_bwa/main' include { FASTQ_ALIGN_BOWTIE2 } from '../subworkflows/nf-core/fastq_align_bowtie2/main' - -include { ALIGN_CHROMAP } from '../subworkflows/nf-core/align_chromap' -include { ALIGN_STAR } from '../subworkflows/nf-core/align_star' -include { MARK_DUPLICATES_PICARD } from '../subworkflows/nf-core/mark_duplicates_picard' +include { FASTQ_ALIGN_CHROMAP } from '../subworkflows/nf-core/fastq_align_chromap/main' +include { MARK_DUPLICATES_PICARD } from '../subworkflows/nf-core/mark_duplicates_picard' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -213,20 +212,24 @@ workflow CHIPSEQ { // SUBWORKFLOW: Alignment with Chromap & BAM QC // if (params.aligner == 'chromap') { - ALIGN_CHROMAP ( + FASTQ_ALIGN_CHROMAP ( FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads, PREPARE_GENOME.out.chromap_index, PREPARE_GENOME.out.fasta .map { [ [:], it ] - } + }, + [], + [], + [], + [] ) - ch_genome_bam = ALIGN_CHROMAP.out.bam - ch_genome_bam_index = ALIGN_CHROMAP.out.bai - ch_samtools_stats = ALIGN_CHROMAP.out.stats - ch_samtools_flagstat = ALIGN_CHROMAP.out.flagstat - ch_samtools_idxstats = ALIGN_CHROMAP.out.idxstats - ch_versions = ch_versions.mix(ALIGN_CHROMAP.out.versions.first()) + ch_genome_bam = FASTQ_ALIGN_CHROMAP.out.bam + ch_genome_bam_index = FASTQ_ALIGN_CHROMAP.out.bai + ch_samtools_stats = FASTQ_ALIGN_CHROMAP.out.stats + ch_samtools_flagstat = FASTQ_ALIGN_CHROMAP.out.flagstat + ch_samtools_idxstats = FASTQ_ALIGN_CHROMAP.out.idxstats + ch_versions = ch_versions.mix(FASTQ_ALIGN_CHROMAP.out.versions.first()) } // @@ -282,13 +285,14 @@ workflow CHIPSEQ { // // SUBWORKFLOW: Filter BAM file with BamTools // - FILTER_BAM_BAMTOOLS ( + BAM_FILTER_BAMTOOLS ( MARK_DUPLICATES_PICARD.out.bam.join(MARK_DUPLICATES_PICARD.out.bai, by: [0]), PREPARE_GENOME.out.filtered_bed.first(), + PREPARE_GENOME.out.fasta, ch_bamtools_filter_se_config, ch_bamtools_filter_pe_config ) - ch_versions = ch_versions.mix(FILTER_BAM_BAMTOOLS.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(BAM_FILTER_BAMTOOLS.out.versions.first().ifEmpty(null)) // // MODULE: Preseq coverage analysis @@ -305,12 +309,33 @@ workflow CHIPSEQ { // // MODULE: Picard post alignment QC // + BAM_FILTER_BAMTOOLS + .out + .bam + .map { + [ it[0], it[1], [] ] + } ch_picardcollectmultiplemetrics_multiqc = Channel.empty() if (!params.skip_picard_metrics) { PICARD_COLLECTMULTIPLEMETRICS ( - FILTER_BAM_BAMTOOLS.out.bam, - PREPARE_GENOME.out.fasta, - [] + BAM_FILTER_BAMTOOLS + .out + .bam + .map { + [ it[0], it[1], [] ] + }, + PREPARE_GENOME + .out + .fasta + .map { + [ [:], it ] + }, + PREPARE_GENOME + .out + .fai + .map { + [ [:], it ] + } ) ch_picardcollectmultiplemetrics_multiqc = PICARD_COLLECTMULTIPLEMETRICS.out.metrics ch_versions = ch_versions.mix(PICARD_COLLECTMULTIPLEMETRICS.out.versions.first()) @@ -325,7 +350,7 @@ workflow CHIPSEQ { ch_multiqc_phantompeakqualtools_correlation_multiqc = Channel.empty() if (!params.skip_spp) { PHANTOMPEAKQUALTOOLS ( - FILTER_BAM_BAMTOOLS.out.bam + BAM_FILTER_BAMTOOLS.out.bam ) ch_phantompeakqualtools_spp_multiqc = PHANTOMPEAKQUALTOOLS.out.spp ch_versions = ch_versions.mix(PHANTOMPEAKQUALTOOLS.out.versions.first()) @@ -348,7 +373,7 @@ workflow CHIPSEQ { // MODULE: BedGraph coverage tracks // BEDTOOLS_GENOMECOV ( - FILTER_BAM_BAMTOOLS.out.bam.join(FILTER_BAM_BAMTOOLS.out.flagstat, by: [0]) + BAM_FILTER_BAMTOOLS.out.bam.join(BAM_FILTER_BAMTOOLS.out.flagstat, by: [0]) ) ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions.first()) @@ -393,10 +418,10 @@ workflow CHIPSEQ { // // Create channels: [ meta, [ ip_bam, control_bam ] [ ip_bai, control_bai ] ] // - FILTER_BAM_BAMTOOLS + BAM_FILTER_BAMTOOLS .out .bam - .join(FILTER_BAM_BAMTOOLS.out.bai, by: [0]) + .join(BAM_FILTER_BAMTOOLS.out.bai, by: [0]) .set { ch_genome_bam_bai } ch_genome_bam_bai @@ -697,9 +722,9 @@ workflow CHIPSEQ { MARK_DUPLICATES_PICARD.out.idxstats.collect{it[1]}.ifEmpty([]), MARK_DUPLICATES_PICARD.out.metrics.collect{it[1]}.ifEmpty([]), - FILTER_BAM_BAMTOOLS.out.stats.collect{it[1]}.ifEmpty([]), - FILTER_BAM_BAMTOOLS.out.flagstat.collect{it[1]}.ifEmpty([]), - FILTER_BAM_BAMTOOLS.out.idxstats.collect{it[1]}.ifEmpty([]), + BAM_FILTER_BAMTOOLS.out.stats.collect{it[1]}.ifEmpty([]), + BAM_FILTER_BAMTOOLS.out.flagstat.collect{it[1]}.ifEmpty([]), + BAM_FILTER_BAMTOOLS.out.idxstats.collect{it[1]}.ifEmpty([]), ch_picardcollectmultiplemetrics_multiqc.collect{it[1]}.ifEmpty([]), ch_preseq_multiqc.collect{it[1]}.ifEmpty([]), From 3165a73b7eaacdbb78ea676bb2a1de53a9b65089 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 17:17:02 +0200 Subject: [PATCH 15/24] Simplify code --- workflows/chipseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/chipseq.nf b/workflows/chipseq.nf index f60c8ab3..7a01c748 100644 --- a/workflows/chipseq.nf +++ b/workflows/chipseq.nf @@ -292,7 +292,7 @@ workflow CHIPSEQ { ch_bamtools_filter_se_config, ch_bamtools_filter_pe_config ) - ch_versions = ch_versions.mix(BAM_FILTER_BAMTOOLS.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(BAM_FILTER_BAMTOOLS.out.versions) // // MODULE: Preseq coverage analysis From 30312c3c5962c72757fc7ecfd3e27e84e6ea0c3a Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 17:18:45 +0200 Subject: [PATCH 16/24] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 173b68d1..531befa7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#317](https://github.com/nf-core/chipseq/issues/317)] Added metro map - [[#288](https://github.com/nf-core/chipseq/issues/291)] Bump `chromap` version 2 and enable all the steps below chromap again when paired-end data is processed. - [[#311](https://github.com/nf-core/chipseq/issues/311)] Add back `--skip_spp` parameter which was unintentionally removed from the code. +- Install available nf-core subworkflows and refactor code accordingly ## [[2.0.0](https://github.com/nf-core/chipseq/releases/tag/2.0.0)] - 2022-10-03 From 603db6303f178d655da36c9bf3986cf710353b76 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 18:57:40 +0200 Subject: [PATCH 17/24] Resolved arguments for bamtools_filteer --- conf/modules.config | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 809e6ec1..dfc0c5ec 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -360,7 +360,14 @@ process { // Should only be published when paired end data is used and save_align_intermeds is true withName: 'BAMTOOLS_FILTER' { - ext.prefix = { meta.single_end ? "${meta.id}.mLb.noPublish" : "${meta.id}.mLb.flT.sorted" } + ext.args = { + [ + meta.single_end ? '-F 0x004' : '-F 0x004 -F 0x0008 -f 0x001', + params.keep_dups ? '' : '-F 0x0400', + params.keep_multi_map ? '' : '-q 1' + ].join(' ').trim() + } + ext.prefix = { meta.single_end ? "${meta.id}.mLb.noPublish" : "${meta.id}.mLb.flT.sorted" } //TODO check with atacseq publishDir = [ path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, mode: params.publish_dir_mode, From 2637d88d331199b10a5c102ab5c2fc3f70c18eca Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 19:01:08 +0200 Subject: [PATCH 18/24] Update bowtie2/align to fix the container download --- modules.json | 2 +- modules/nf-core/bowtie2/align/main.nf | 16 ++++++++-------- modules/nf-core/bowtie2/align/meta.yml | 5 +++++ 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/modules.json b/modules.json index 22fd3128..630219e6 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "bowtie2/align": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["fastq_align_bowtie2", "modules"] }, "bowtie2/build": { diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf index c74e376f..3d851866 100644 --- a/modules/nf-core/bowtie2/align/main.nf +++ b/modules/nf-core/bowtie2/align/main.nf @@ -2,14 +2,14 @@ process BOWTIE2_ALIGN { tag "$meta.id" label "process_high" - conda (params.enable_conda ? "bioconda::bowtie2=2.4.4 bioconda::samtools=1.15.1 conda-forge::pigz=2.6" : null) - container "${ workflow.containerEngine == "singularity" && !task.ext.singularity_pull_docker_container ? - "https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" : - "quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" }" + conda "bioconda::bowtie2=2.4.4 bioconda::samtools=1.16.1 conda-forge::pigz=2.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' : + 'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' }" input: - tuple val(meta), path(reads) - path index + tuple val(meta) , path(reads) + tuple val(meta2), path(index) val save_unaligned val sort_bam @@ -40,8 +40,8 @@ process BOWTIE2_ALIGN { def samtools_command = sort_bam ? 'sort' : 'view' """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/.rev.1.bt2//"` - [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/.rev.1.bt2l//"` + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"` [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 bowtie2 \\ diff --git a/modules/nf-core/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml index 42ba0f96..c8e9a001 100644 --- a/modules/nf-core/bowtie2/align/meta.yml +++ b/modules/nf-core/bowtie2/align/meta.yml @@ -27,6 +27,11 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] - index: type: file description: Bowtie2 genome index files From 6a75945dc4bacacb93f11941a09eeec8539f2d39 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 19:08:38 +0200 Subject: [PATCH 19/24] Remove code used for view --- workflows/chipseq.nf | 6 ------ 1 file changed, 6 deletions(-) diff --git a/workflows/chipseq.nf b/workflows/chipseq.nf index 7a01c748..e448bda1 100644 --- a/workflows/chipseq.nf +++ b/workflows/chipseq.nf @@ -309,12 +309,6 @@ workflow CHIPSEQ { // // MODULE: Picard post alignment QC // - BAM_FILTER_BAMTOOLS - .out - .bam - .map { - [ it[0], it[1], [] ] - } ch_picardcollectmultiplemetrics_multiqc = Channel.empty() if (!params.skip_picard_metrics) { PICARD_COLLECTMULTIPLEMETRICS ( From b00897d31317c96cfb797c23c8e0e452f8f47e02 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 22:04:04 +0200 Subject: [PATCH 20/24] Feed missing arguments to star subworkflow --- workflows/chipseq.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/workflows/chipseq.nf b/workflows/chipseq.nf index e448bda1..71d7dfe8 100644 --- a/workflows/chipseq.nf +++ b/workflows/chipseq.nf @@ -238,7 +238,9 @@ workflow CHIPSEQ { if (params.aligner == 'star') { ALIGN_STAR ( FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads, - PREPARE_GENOME.out.star_index + PREPARE_GENOME.out.star_index, + PREPARE_GENOME.out.fasta, + params.seq_center ?: '' ) ch_genome_bam = ALIGN_STAR.out.bam ch_genome_bam_index = ALIGN_STAR.out.bai From be423bf39a58d36e1a68cadea59f62ae9ec362a4 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 22:12:05 +0200 Subject: [PATCH 21/24] Update chromap/chromap --- modules.json | 2 +- modules/nf-core/chromap/chromap/main.nf | 2 +- modules/nf-core/chromap/chromap/meta.yml | 7 ++++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/modules.json b/modules.json index 630219e6..55237ed3 100644 --- a/modules.json +++ b/modules.json @@ -27,7 +27,7 @@ }, "chromap/chromap": { "branch": "master", - "git_sha": "d6b7b1f108dab88b0269a4331767c36a1a8da960", + "git_sha": "b7f0dbc07af00c0ccdc7c2df06efee8786e976ac", "installed_by": ["fastq_align_chromap", "modules"] }, "chromap/index": { diff --git a/modules/nf-core/chromap/chromap/main.nf b/modules/nf-core/chromap/chromap/main.nf index 5b7631cb..69a4bc1b 100644 --- a/modules/nf-core/chromap/chromap/main.nf +++ b/modules/nf-core/chromap/chromap/main.nf @@ -10,7 +10,7 @@ process CHROMAP_CHROMAP { input: tuple val(meta), path(reads) tuple val(meta2), path(fasta) - tuple val(meta2), path(index) + tuple val(meta3), path(index) path barcodes path whitelist path chr_order diff --git a/modules/nf-core/chromap/chromap/meta.yml b/modules/nf-core/chromap/chromap/meta.yml index 10ce8f58..05f70cf0 100644 --- a/modules/nf-core/chromap/chromap/meta.yml +++ b/modules/nf-core/chromap/chromap/meta.yml @@ -36,12 +36,17 @@ input: - meta2: type: map description: | - Groovy Map containing sample information + Groovy Map containing information for the fasta e.g. [ id:'test' ] - fasta: type: file description: | The fasta reference file. + - meta3: + type: map + description: | + Groovy Map containing information for the index + e.g. [ id:'test' ] - index: type: file description: | From 7cc3041ad21667f326ae9e7703b9dd810f305186 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 22:18:58 +0200 Subject: [PATCH 22/24] Update star_align --- modules/local/star_align.nf | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf index f960045f..f1988bea 100644 --- a/modules/local/star_align.nf +++ b/modules/local/star_align.nf @@ -11,6 +11,7 @@ process STAR_ALIGN { input: tuple val(meta), path(reads) path index + val seq_center output: tuple val(meta), path('*d.out.bam') , emit: bam @@ -25,10 +26,13 @@ process STAR_ALIGN { tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq tuple val(meta), path('*.tab') , optional:true, emit: tab + when: + task.ext.when == null || task.ext.when + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def seq_center = params.seq_center ? "--outSAMattrRGline ID:$prefix 'CN:$params.seq_center' 'SM:$prefix'" : "--outSAMattrRGline ID:$prefix 'SM:$prefix'" + def seq_center_tag = seq_center ? "--outSAMattrRGline ID:$prefix 'CN:$seq_center' 'SM:$prefix'" : "--outSAMattrRGline ID:$prefix 'SM:$prefix'" def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted' def mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : '' """ @@ -38,7 +42,7 @@ process STAR_ALIGN { --runThreadN $task.cpus \\ --outFileNamePrefix $prefix. \\ $out_sam_type \\ - $seq_center \\ + $seq_center_tag \\ $args $mv_unsorted_bam if [ -f ${prefix}.Unmapped.out.mate1 ]; then From 403a8e59cfe66b785833fd4d4b6e40a40e3ab112 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 22:21:32 +0200 Subject: [PATCH 23/24] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 531befa7..b6d20d60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#288](https://github.com/nf-core/chipseq/issues/291)] Bump `chromap` version 2 and enable all the steps below chromap again when paired-end data is processed. - [[#311](https://github.com/nf-core/chipseq/issues/311)] Add back `--skip_spp` parameter which was unintentionally removed from the code. - Install available nf-core subworkflows and refactor code accordingly +- [[#318](https://github.com/nf-core/chipseq/issues/318)] Update `bowtie2/align` module to fix issue when downloading its singularity image. ## [[2.0.0](https://github.com/nf-core/chipseq/releases/tag/2.0.0)] - 2022-10-03 From aece0fb6c96c259e83b357100c66348e9f2aad1a Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 18 Apr 2023 22:48:01 +0200 Subject: [PATCH 24/24] Update bowtie2/build --- modules.json | 2 +- modules/nf-core/bowtie2/build/main.nf | 8 ++++---- modules/nf-core/bowtie2/build/meta.yml | 10 ++++++++++ subworkflows/local/prepare_genome.nf | 6 +++--- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/modules.json b/modules.json index 55237ed3..28fb02ca 100644 --- a/modules.json +++ b/modules.json @@ -12,7 +12,7 @@ }, "bowtie2/build": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] }, "bwa/index": { diff --git a/modules/nf-core/bowtie2/build/main.nf b/modules/nf-core/bowtie2/build/main.nf index a4da62d0..551893af 100644 --- a/modules/nf-core/bowtie2/build/main.nf +++ b/modules/nf-core/bowtie2/build/main.nf @@ -2,17 +2,17 @@ process BOWTIE2_BUILD { tag "$fasta" label 'process_high' - conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4' : null) + conda "bioconda::bowtie2=2.4.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.4--py39hbb4e92a_0' : 'quay.io/biocontainers/bowtie2:2.4.4--py39hbb4e92a_0' }" input: - path fasta + tuple val(meta), path(fasta) output: - path 'bowtie2' , emit: index - path "versions.yml" , emit: versions + tuple val(meta), path('bowtie2') , emit: index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/bowtie2/build/meta.yml b/modules/nf-core/bowtie2/build/meta.yml index 2da9a217..0240224d 100644 --- a/modules/nf-core/bowtie2/build/meta.yml +++ b/modules/nf-core/bowtie2/build/meta.yml @@ -16,10 +16,20 @@ tools: doi: 10.1038/nmeth.1923 licence: ["GPL-3.0-or-later"] input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] - fasta: type: file description: Input genome fasta file output: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] - index: type: file description: Bowtie2 genome index files diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index 456c20d8..4cb2d164 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -156,13 +156,13 @@ workflow PREPARE_GENOME { if (prepare_tool_index == 'bowtie2') { if (params.bowtie2_index) { if (params.bowtie2_index.endsWith('.tar.gz')) { - ch_bowtie2_index = UNTAR_BOWTIE2_INDEX ( [ [:], params.bowtie2_index ] ).untar.map{ it[1] } + ch_bowtie2_index = UNTAR_BOWTIE2_INDEX ( [ [:], params.bowtie2_index ] ).untar ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions) } else { - ch_bowtie2_index = file(params.bowtie2_index) + ch_bowtie2_index = [ [:], file(params.bowtie2_index) ] } } else { - ch_bowtie2_index = BOWTIE2_BUILD ( ch_fasta ).index + ch_bowtie2_index = BOWTIE2_BUILD ( [ [:], ch_fasta ] ).index ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) } }