From 0ac039c4dba0c24f7e04f49f8af2706add87690d Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 Mar 2024 10:03:00 +0000 Subject: [PATCH 01/22] Add a full test profile --- conf/test_full.config | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/conf/test_full.config b/conf/test_full.config index 731f7c3..55d8135 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -1,23 +1,23 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running full-size tests + Nextflow config file for running minimal tests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/riboseq -profile test_full, --outdir + nextflow run nf-core/riboseq -profile test, --outdir ---------------------------------------------------------------------------------------- */ params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/riboseq/testdata/samplesheet.csv' - - fasta = 'http://ftp.ensembl.org/pub/release-110/fasta/mus_musculus/dna/Mus_musculus.GRCm39.dna.primary_assembly.fa.gz' - gtf = 'http://ftp.ensembl.org/pub/release-110/gtf/mus_musculus/Mus_musculus.GRCm39.110.gtf.gz' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/riboseq/samplesheet/samplesheet_full.csv' + ribo_database_manifest = 'https://raw.githubusercontent.com/nf-core/test-datasets/riboseq/testdata/rrna-db.txt' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.dna.chromosome.20.fa.gz' + gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf' min_trimmed_reads = 1000 } From 998240d44317df692f1aa3be3a9b06f19bf08f3a Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 Mar 2024 10:04:09 +0000 Subject: [PATCH 02/22] Install ribotricer --- conf/modules.config | 16 + modules.json | 10 + .../ribotricer/detectorfs/environment.yml | 9 + modules/nf-core/ribotricer/detectorfs/main.nf | 86 ++ .../nf-core/ribotricer/detectorfs/meta.yml | 99 ++ .../ribotricer/detectorfs/tests/main.nf.test | 334 +++++ .../detectorfs/tests/main.nf.test.snap | 1324 +++++++++++++++++ .../detectorfs/tests/nextflow.all.config | 11 + .../detectorfs/tests/nextflow.filtered.config | 11 + .../ribotricer/detectorfs/tests/tags.yml | 2 + .../ribotricer/prepareorfs/environment.yml | 9 + .../nf-core/ribotricer/prepareorfs/main.nf | 48 + .../nf-core/ribotricer/prepareorfs/meta.yml | 53 + .../ribotricer/prepareorfs/tests/main.nf.test | 77 + .../prepareorfs/tests/main.nf.test.snap | 68 + .../ribotricer/prepareorfs/tests/tags.yml | 2 + nextflow.config | 9 +- workflows/riboseq/main.nf | 73 +- 18 files changed, 2210 insertions(+), 31 deletions(-) create mode 100644 modules/nf-core/ribotricer/detectorfs/environment.yml create mode 100644 modules/nf-core/ribotricer/detectorfs/main.nf create mode 100644 modules/nf-core/ribotricer/detectorfs/meta.yml create mode 100644 modules/nf-core/ribotricer/detectorfs/tests/main.nf.test create mode 100644 modules/nf-core/ribotricer/detectorfs/tests/main.nf.test.snap create mode 100644 modules/nf-core/ribotricer/detectorfs/tests/nextflow.all.config create mode 100644 modules/nf-core/ribotricer/detectorfs/tests/nextflow.filtered.config create mode 100644 modules/nf-core/ribotricer/detectorfs/tests/tags.yml create mode 100644 modules/nf-core/ribotricer/prepareorfs/environment.yml create mode 100644 modules/nf-core/ribotricer/prepareorfs/main.nf create mode 100644 modules/nf-core/ribotricer/prepareorfs/meta.yml create mode 100644 modules/nf-core/ribotricer/prepareorfs/tests/main.nf.test create mode 100644 modules/nf-core/ribotricer/prepareorfs/tests/main.nf.test.snap create mode 100644 modules/nf-core/ribotricer/prepareorfs/tests/tags.yml diff --git a/conf/modules.config b/conf/modules.config index 01219f7..60eaa38 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1240,3 +1240,19 @@ if (!params.skip_ribotish) { } } } + +if (!params.skip_ribotricer) { + process { + withName: 'RIBOTRICER_PREPAREORFS' { + ext.args = { params.extra_ribotricer_prepareorfs_args ?: '' } + } + withName: 'RIBOTRICER_DETECTORFS' { + ext.args = { params.extra_ribotricer_detectorfs_args ?: '' } + publishDir = [ + path: { "${params.outdir}/orf_predictions/ribotricer" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} diff --git a/modules.json b/modules.json index 44724ed..19e7f2a 100644 --- a/modules.json +++ b/modules.json @@ -85,6 +85,16 @@ "git_sha": "fac281689037749ea014215a61d431522513077a", "installed_by": ["modules"] }, + "ribotricer/detectorfs": { + "branch": "master", + "git_sha": "eef39ece87292a7e11d7f52eec165dbb6e5b696a", + "installed_by": ["modules"] + }, + "ribotricer/prepareorfs": { + "branch": "master", + "git_sha": "db0c1159f2d90a0e0ee6753f7299b4ec68439c39", + "installed_by": ["modules"] + }, "rsem/preparereference": { "branch": "master", "git_sha": "301b088c7e9e00c4c80686411383f07173b54d69", diff --git a/modules/nf-core/ribotricer/detectorfs/environment.yml b/modules/nf-core/ribotricer/detectorfs/environment.yml new file mode 100644 index 0000000..74d5104 --- /dev/null +++ b/modules/nf-core/ribotricer/detectorfs/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "ribotricer_detectorfs" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::ribotricer=1.3.3" diff --git a/modules/nf-core/ribotricer/detectorfs/main.nf b/modules/nf-core/ribotricer/detectorfs/main.nf new file mode 100644 index 0000000..d86a64e --- /dev/null +++ b/modules/nf-core/ribotricer/detectorfs/main.nf @@ -0,0 +1,86 @@ +process RIBOTRICER_DETECTORFS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ribotricer:1.3.3--pyhdfd78af_0': + 'biocontainers/ribotricer:1.3.3--pyhdfd78af_0' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(candidate_orfs) + + output: + tuple val(meta), path('*_protocol.txt') , emit: protocol, optional: true + tuple val(meta), path('*_bam_summary.txt') , emit: bam_summary + tuple val(meta), path('*_read_length_dist.pdf') , emit: read_length_dist + tuple val(meta), path('*_metagene_profiles_5p.tsv') , emit: metagene_profile_5p + tuple val(meta), path('*_metagene_profiles_3p.tsv') , emit: metagene_profile_3p + tuple val(meta), path('*_metagene_plots.pdf') , emit: metagene_plots + tuple val(meta), path('*_psite_offsets.txt') , emit: psite_offsets, optional: true + tuple val(meta), path('*_pos.wig') , emit: pos_wig + tuple val(meta), path('*_neg.wig') , emit: neg_wig + tuple val(meta), path('*_translating_ORFs.tsv') , emit: orfs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def strandedness_cmd = '' + + switch(meta.strandedness) { + case "forward": + strandedness_cmd = "--stranded yes" + break + case "reverse": + strandedness_cmd = "--stranded reverse" + break + // + // Specifying unstranded seems broken - see + // https://github.com/smithlabcode/ribotricer/issues/153. Leaving it + // undefined works, though ribotricer may incorrectly infer + // strandednesss? + // + //case "unstranded": + // strandedness_cmd = "--stranded no" + // break + } + """ + ribotricer detect-orfs \\ + --bam $bam \\ + --ribotricer_index $candidate_orfs \\ + --prefix $prefix \\ + $strandedness_cmd \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ribotricer: \$(ribotricer --version 2>&1 | grep ribotricer | sed '1!d ; s/ribotricer, version //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_protocol.txt + touch ${prefix}_bam_summary.txt + touch ${prefix}_read_length_dist.pdf + touch ${prefix}_metagene_profiles_5p.tsv + touch ${prefix}_metagene_profiles_3p.tsv + touch ${prefix}_metagene_plots.pdf + touch ${prefix}_psite_offsets.txt + touch ${prefix}_pos.wig + touch ${prefix}_neg.wig + touch ${prefix}_translating_ORFs.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ribotricer: \$(ribotricer --version 2>&1 | grep ribotricer | sed '1!d ; s/ribotricer, version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ribotricer/detectorfs/meta.yml b/modules/nf-core/ribotricer/detectorfs/meta.yml new file mode 100644 index 0000000..430f232 --- /dev/null +++ b/modules/nf-core/ribotricer/detectorfs/meta.yml @@ -0,0 +1,99 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "ribotricer_detectorfs" +description: "Accurate detection of short and long active ORFs using Ribo-seq data" +keywords: + - riboseq + - orf + - genomics +tools: + - "ribotricer": + description: "Python package to detect translating ORF from Ribo-seq data" + homepage: "https://github.com/smithlabcode/ribotricer" + documentation: "https://github.com/smithlabcode/ribotricer" + tool_dev_url: "https://github.com/smithlabcode/ribotricer" + doi: "10.1093/bioinformatics/btz878" + licence: ["GNU General Public v3 (GPL v3)"] + +input: + - meta: + type: map + description: | + Map containing riboseq sample information + e.g. `[ id:'sample1', single_end:false, strandedness: 'single' ] + - bam_ribo: + type: file + description: Sorted riboseq BAM file(s) + pattern: "*.{bam}" + - bai_ribo: + type: file + description: Index for sorted riboseq bam file(s) + pattern: "*.{bai}" + - meta2: + type: map + description: | + Map containing reference information for the candidate ORFs + e.g. `[ id:'Ensembl human v.111' ]` + - candidate_orfs: + type: file + description: "TSV file with candidate ORFs from 'ribotricer prepareorfs'" + pattern: "*.tsv" + +output: + - meta: + type: map + description: | + Groovy Map containing riboseq sample information + e.g. `[ id:'sample1', single_end:false ] + - protocol: + type: file + description: "txt file containing inferred protocol if it was inferred (not supplied as input)" + pattern: "*_protocol.txt" + - bam_summary: + type: file + description: "Text summary of reads found in the BAM" + pattern: "*_bam_summary.txt" + - read_length_dist: + type: file + description: "PDF-format read length distribution as quality control" + pattern: "*_read_length_dist.pdf" + - metagene_profile_5p: + type: file + description: "Metagene profile aligning with the start codon" + pattern: "*_metagene_profiles_5p.tsv" + - metagene_profile_3p: + type: file + description: "Metagene profile aligning with the stop codon" + pattern: "*_metagene_profiles_3p.tsv" + - metagene_plots: + type: file + description: "Metagene plots for quality control" + pattern: "*_metagene_plots.pdf" + - psite_offsets: + type: file + description: | + "If the P-site offsets are not provided, txt file containing the + derived relative offsets" + pattern: "*_psite_offsets.txt" + - pos_wig: + type: file + description: "Positive strand WIG file for visualization in Genome Browser" + pattern: "*_pos.wig" + - neg_wig: + type: file + description: "Negative strand WIG file for visualization in Genome Browser" + pattern: "*_neg.wig" + - orfs: + type: file + description: | + "TSV with ORFs assessed as translating in this BAM file. You can output + all ORFs regardless of the translation status with option --report_all" + pattern: "*_translating_ORFs.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@pinin4fjords" +maintainers: + - "@pinin4fjords" diff --git a/modules/nf-core/ribotricer/detectorfs/tests/main.nf.test b/modules/nf-core/ribotricer/detectorfs/tests/main.nf.test new file mode 100644 index 0000000..540a32e --- /dev/null +++ b/modules/nf-core/ribotricer/detectorfs/tests/main.nf.test @@ -0,0 +1,334 @@ +nextflow_process { + + name "Test Process RIBOTRICER_DETECTORFS" + script "../main.nf" + process "RIBOTRICER_DETECTORFS" + + tag "modules" + tag "modules_nfcore" + tag "ribotricer" + tag "ribotricer/prepareorfs" + tag "ribotricer/detectorfs" + tag "gunzip" + + setup { + run("GUNZIP") { + script "modules/nf-core/gunzip/main.nf" + process { + """ + input[0] = [ + [ ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.dna.chromosome.20.fa.gz", checkIfExists: true) + ] + """ + } + } + run("RIBOTRICER_PREPAREORFS") { + script "modules/nf-core/ribotricer/prepareorfs/main.nf" + process { + """ + input[0] = GUNZIP.out.gunzip.map{[ + [id:'homo_sapiens_chr20'], + it[1], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true) + ]} + """ + } + } + + } + + test("human chr20 - bam - filtered - forward") { + + config './nextflow.filtered.config' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true, strandedness:'forward' ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam.bai", checkIfExists: true) + ] + input[1] = RIBOTRICER_PREPAREORFS.out.candidate_orfs + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.protocol).match('protocol') }, + { assert snapshot(process.out.bam_summary).match('bam_summary') }, + { assert snapshot(file(process.out.read_length_dist[0][1]).name).match("read_length_dist") }, + { assert snapshot(process.out.metagene_profile_5p).match('metagene_profile_5p') }, + { assert path(process.out.metagene_profile_3p[0][1]).getText().contains("26\t0\t[0.0, 0.0, 0.0, 0.0, 2.6132404181184667") }, + { assert snapshot(file(process.out.metagene_plots[0][1]).name).match("metagene_plots") }, + { assert snapshot(process.out.psite_offsets).match('psite_offsets') }, + { assert snapshot(process.out.pos_wig).match('pos_wig') }, + { assert snapshot(process.out.neg_wig).match('neg_wig') }, + { assert path(process.out.orfs[0][1]).getText().contains("ENST00000370861_62136860_62140830_534\tannotated\ttranslating\t0.515078753637712") }, + { assert snapshot(process.out.versions).match('versions') } + ) + } + } + + test("human chr20 - bam - filtered - reverse") { + + config './nextflow.filtered.config' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true, strandedness:'reverse' ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam.bai", checkIfExists: true) + ] + input[1] = RIBOTRICER_PREPAREORFS.out.candidate_orfs + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.protocol).match('protocol_reverse') }, + { assert snapshot(process.out.bam_summary).match('bam_summary_reverse') }, + { assert snapshot(file(process.out.read_length_dist[0][1]).name).match("read_length_dist_reverse") }, + { assert snapshot(process.out.metagene_profile_5p).match('metagene_profile_5p_reverse') }, + { assert snapshot(process.out.metagene_profile_3p).match('metagene_profile_3p_reverse') }, + { assert snapshot(file(process.out.metagene_plots[0][1]).name).match("metagene_plots_reverse") }, + { assert snapshot(process.out.psite_offsets).match('psite_offsets_reverse') }, + { assert snapshot(process.out.pos_wig).match('pos_wig_reverse') }, + { assert snapshot(process.out.neg_wig).match('neg_wig_reverse') }, + { assert path(process.out.orfs[0][1]).getText().contains("ENST00000463943_50840572_50840805_234\tdORF\ttranslating\t0.1428571428571428") }, + { assert snapshot(process.out.versions).match('versions_reverse') } + ) + } + } + + test("human chr20 - bam - filtered - unspecified strand") { + + config './nextflow.filtered.config' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam.bai", checkIfExists: true) + ] + input[1] = RIBOTRICER_PREPAREORFS.out.candidate_orfs + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.protocol).match('protocol_unspecified') }, + { assert snapshot(process.out.bam_summary).match('bam_summary_unspecified') }, + { assert snapshot(file(process.out.read_length_dist[0][1]).name).match("read_length_dist_unspecified") }, + { assert snapshot(process.out.metagene_profile_5p).match('metagene_profile_5p_unspecified') }, + { assert snapshot(process.out.metagene_profile_3p).match('metagene_profile_3p_unspecified') }, + { assert snapshot(file(process.out.metagene_plots[0][1]).name).match("metagene_plots_unspecified") }, + { assert snapshot(process.out.psite_offsets).match('psite_offsets_unspecified') }, + { assert snapshot(process.out.pos_wig).match('pos_wig_unspecified') }, + { assert snapshot(process.out.neg_wig).match('neg_wig_unspecified') }, + { assert path(process.out.orfs[0][1]).getText().contains("ENST00000463943_50840572_50840805_234\tdORF\ttranslating\t0.1428571428571428") }, + { assert snapshot(process.out.versions).match('versions_unspecified') } + ) + } + } + + test("human chr20 - bam - all - forward") { + + config './nextflow.all.config' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true, strandedness:'forward' ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam.bai", checkIfExists: true) + ] + input[1] = RIBOTRICER_PREPAREORFS.out.candidate_orfs + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.protocol).match('protocol_all') }, + { assert snapshot(process.out.bam_summary).match('bam_summary_all') }, + { assert snapshot(file(process.out.read_length_dist[0][1]).name).match("read_length_dist_all") }, + { assert snapshot(process.out.metagene_profile_5p).match('metagene_profile_5p_all') }, + { assert path(process.out.metagene_profile_3p[0][1]).getText().contains("26\t0\t[0.0, 0.0, 0.0, 0.0, 2.6132404181184667") }, + { assert snapshot(file(process.out.metagene_plots[0][1]).name).match("metagene_plots_all") }, + { assert snapshot(process.out.psite_offsets).match('psite_offsets_all') }, + { assert snapshot(process.out.pos_wig).match('pos_wig_all') }, + { assert snapshot(process.out.neg_wig).match('neg_wig_all') }, + { assert path(process.out.orfs[0][1]).getText().contains("ENST00000370861_62136860_62140830_534\tannotated\ttranslating\t0.515078753637712") }, + { assert snapshot(process.out.versions).match('versions_all') } + ) + } + + } + + test("human chr20 - bam - filtered - forward - stub") { + + options '-stub' + + config './nextflow.filtered.config' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true, strandedness:'forward' ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam.bai", checkIfExists: true) + ] + input[1] = RIBOTRICER_PREPAREORFS.out.candidate_orfs + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.protocol).match('protocol_stub') }, + { assert snapshot(process.out.bam_summary).match('bam_summary_stub') }, + { assert snapshot(file(process.out.read_length_dist[0][1]).name).match("read_length_dist_stub") }, + { assert snapshot(process.out.metagene_profile_5p).match('metagene_profile_5p_stub') }, + { assert snapshot(process.out.metagene_profile_3p).match('metagene_profile_3p_stub') }, + { assert snapshot(file(process.out.metagene_plots[0][1]).name).match("metagene_plots_stub") }, + { assert snapshot(process.out.psite_offsets).match('psite_offsets_stub') }, + { assert snapshot(process.out.pos_wig).match('pos_wig_stub') }, + { assert snapshot(process.out.neg_wig).match('neg_wig_stub') }, + { assert snapshot(process.out.orfs).match('orfs_stub') }, + { assert snapshot(process.out.versions).match('versions_stub') } + ) + } + } + + test("human chr20 - bam - filtered - reverse - stub") { + + options '-stub' + + config './nextflow.filtered.config' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true, strandedness:'reverse' ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam.bai", checkIfExists: true) + ] + input[1] = RIBOTRICER_PREPAREORFS.out.candidate_orfs + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.protocol).match('protocol_reverse_stub') }, + { assert snapshot(process.out.bam_summary).match('bam_summary_reverse_stub') }, + { assert snapshot(file(process.out.read_length_dist[0][1]).name).match("read_length_dist_reverse_stub") }, + { assert snapshot(process.out.metagene_profile_5p).match('metagene_profile_5p_reverse_stub') }, + { assert snapshot(process.out.metagene_profile_3p).match('metagene_profile_3p_reverse_stub') }, + { assert snapshot(file(process.out.metagene_plots[0][1]).name).match("metagene_plots_reverse_stub") }, + { assert snapshot(process.out.psite_offsets).match('psite_offsets_reverse_stub') }, + { assert snapshot(process.out.pos_wig).match('pos_wig_reverse_stub') }, + { assert snapshot(process.out.neg_wig).match('neg_wig_reverse_stub') }, + { assert snapshot(process.out.orfs).match('orfs_reverse_stub') }, + { assert snapshot(process.out.versions).match('versions_reverse_stub') } + ) + } + } + + test("human chr20 - bam - filtered - unspecified strand - stub") { + + options '-stub' + + config './nextflow.filtered.config' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam.bai", checkIfExists: true) + ] + input[1] = RIBOTRICER_PREPAREORFS.out.candidate_orfs + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.protocol).match('protocol_unspecified_stub') }, + { assert snapshot(process.out.bam_summary).match('bam_summary_unspecified_stub') }, + { assert snapshot(file(process.out.read_length_dist[0][1]).name).match("read_length_dist_unspecified_stub") }, + { assert snapshot(process.out.metagene_profile_5p).match('metagene_profile_5p_unspecified_stub') }, + { assert snapshot(process.out.metagene_profile_3p).match('metagene_profile_3p_unspecified_stub') }, + { assert snapshot(file(process.out.metagene_plots[0][1]).name).match("metagene_plots_unspecified_stub") }, + { assert snapshot(process.out.psite_offsets).match('psite_offsets_unspecified_stub') }, + { assert snapshot(process.out.pos_wig).match('pos_wig_unspecified_stub') }, + { assert snapshot(process.out.neg_wig).match('neg_wig_unspecified_stub') }, + { assert snapshot(process.out.orfs).match('orfs_unspecified_stub') }, + { assert snapshot(process.out.versions).match('versions_unspecified_stub') } + ) + } + } + + test("human chr20 - bam - all - forward - stub") { + + options '-stub' + + config './nextflow.all.config' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true, strandedness:'forward' ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam.bai", checkIfExists: true) + ] + input[1] = RIBOTRICER_PREPAREORFS.out.candidate_orfs + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.protocol).match('protocol_all_stub') }, + { assert snapshot(process.out.bam_summary).match('bam_summary_all_stub') }, + { assert snapshot(file(process.out.read_length_dist[0][1]).name).match("read_length_dist_all_stub") }, + { assert snapshot(process.out.metagene_profile_5p).match('metagene_profile_5p_all_stub') }, + { assert snapshot(process.out.metagene_profile_3p).match('metagene_profile_3p_all_stub') }, + { assert snapshot(file(process.out.metagene_plots[0][1]).name).match("metagene_plots_all_stub") }, + { assert snapshot(process.out.psite_offsets).match('psite_offsets_all_stub') }, + { assert snapshot(process.out.pos_wig).match('pos_wig_all_stub') }, + { assert snapshot(process.out.neg_wig).match('neg_wig_all_stub') }, + { assert snapshot(process.out.orfs).match('orfs_all_stub') }, + { assert snapshot(process.out.versions).match('versions_all_stub') } + ) + } + + } +} + + + + diff --git a/modules/nf-core/ribotricer/detectorfs/tests/main.nf.test.snap b/modules/nf-core/ribotricer/detectorfs/tests/main.nf.test.snap new file mode 100644 index 0000000..659eaac --- /dev/null +++ b/modules/nf-core/ribotricer/detectorfs/tests/main.nf.test.snap @@ -0,0 +1,1324 @@ +{ + "versions_unspecified_stub": { + "content": [ + [ + "versions.yml:md5,e692bbf6e4fcebbfd4b96fe0861f4fa7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:35.629937" + }, + "versions_all": { + "content": [ + [ + "versions.yml:md5,e692bbf6e4fcebbfd4b96fe0861f4fa7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:24.5335" + }, + "pos_wig_unspecified": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_pos.wig:md5,1322405703845f8bec05c169eb36e2c0" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:12:11.639155" + }, + "pos_wig_reverse": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "reverse" + }, + "test_pos.wig:md5,1322405703845f8bec05c169eb36e2c0" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T10:23:53.26671" + }, + "psite_offsets_all": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_psite_offsets.txt:md5,32c34c6201b3efc8e7f84db370405e30" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:24.217844" + }, + "protocol": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:55:40.799057" + }, + "metagene_profile_5p_unspecified_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_metagene_profiles_5p.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:35.591335" + }, + "neg_wig_unspecified_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_neg.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:35.621865" + }, + "read_length_dist_reverse": { + "content": [ + "test_read_length_dist.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T10:23:53.236982" + }, + "metagene_profile_3p_reverse": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "reverse" + }, + "test_metagene_profiles_3p.tsv:md5,fa00b17c8093d7a9a2bb4f83c24f7315" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T10:43:24.607592" + }, + "neg_wig_all_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_neg.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:57:09.064731" + }, + "pos_wig_unspecified_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_pos.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:35.614509" + }, + "protocol_unspecified_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_protocol.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:35.567122" + }, + "psite_offsets_all_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_psite_offsets.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:57:08.622733" + }, + "metagene_plots_all_stub": { + "content": [ + "test_metagene_plots.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:57:08.401264" + }, + "metagene_profile_3p_unspecified_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_metagene_profiles_3p.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:32:24.661133" + }, + "versions_unspecified": { + "content": [ + [ + "versions.yml:md5,e692bbf6e4fcebbfd4b96fe0861f4fa7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:12:11.653884" + }, + "bam_summary_unspecified_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_bam_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:35.576923" + }, + "bam_summary": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_bam_summary.txt:md5,3d4a99d779b42755bf3ff6dc9802f9a9" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T10:23:17.882713" + }, + "orfs_reverse_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "reverse" + }, + "test_translating_ORFs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:38:30.43685" + }, + "metagene_plots": { + "content": [ + "test_metagene_plots.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:55:40.819694" + }, + "read_length_dist_stub": { + "content": [ + "test_read_length_dist.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:46.17198" + }, + "metagene_plots_unspecified_stub": { + "content": [ + "test_metagene_plots.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:35.599678" + }, + "bam_summary_all_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_bam_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:57:07.499662" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,e692bbf6e4fcebbfd4b96fe0861f4fa7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T10:23:17.984684" + }, + "pos_wig_reverse_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "reverse" + }, + "test_pos.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:15.258994" + }, + "metagene_profile_5p_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_metagene_profiles_5p.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:46.394691" + }, + "neg_wig_reverse_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "reverse" + }, + "test_neg.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:15.265052" + }, + "neg_wig": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_neg.wig:md5,bc036bbb6b2518f85a60c70829e48229" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T10:23:17.965289" + }, + "metagene_profile_3p_reverse_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "reverse" + }, + "test_metagene_profiles_3p.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:32:05.192565" + }, + "bam_summary_reverse": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "reverse" + }, + "test_bam_summary.txt:md5,3d4a99d779b42755bf3ff6dc9802f9a9" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T10:23:53.230879" + }, + "protocol_reverse_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "reverse" + }, + "test_protocol.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:15.199816" + }, + "bam_summary_unspecified": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_bam_summary.txt:md5,3d4a99d779b42755bf3ff6dc9802f9a9" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:12:11.605103" + }, + "metagene_plots_unspecified": { + "content": [ + "test_metagene_plots.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:12:11.625406" + }, + "bam_summary_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_bam_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:45.946051" + }, + "metagene_profile_3p_all_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_metagene_profiles_3p.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:57:08.176629" + }, + "metagene_profile_5p_reverse": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "reverse" + }, + "test_metagene_profiles_5p.tsv:md5,0767434b9ed0d95f07dd5e87612f1014" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T10:23:53.242312" + }, + "bam_summary_reverse_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "reverse" + }, + "test_bam_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:15.222899" + }, + "psite_offsets": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_psite_offsets.txt:md5,32c34c6201b3efc8e7f84db370405e30" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T10:23:17.932169" + }, + "metagene_profile_3p_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_metagene_profiles_3p.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:46.615498" + }, + "orfs_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_translating_ORFs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:47.718532" + }, + "metagene_profile_5p_unspecified": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_metagene_profiles_5p.tsv:md5,0767434b9ed0d95f07dd5e87612f1014" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:16:52.638629" + }, + "neg_wig_reverse": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "reverse" + }, + "test_neg.wig:md5,3c1c2f9e2c32652f00325bcc643d92ba" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T10:23:53.297502" + }, + "metagene_plots_reverse_stub": { + "content": [ + "test_metagene_plots.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:15.247322" + }, + "pos_wig": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_pos.wig:md5,29330801b0d9a1ca7fe1abfc7aea8bf6" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T10:23:17.949136" + }, + "psite_offsets_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_psite_offsets.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:47.055786" + }, + "orfs_unspecified_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_translating_ORFs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:38:48.579416" + }, + "protocol_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_protocol.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:45.721392" + }, + "read_length_dist_unspecified_stub": { + "content": [ + "test_read_length_dist.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:35.584352" + }, + "psite_offsets_reverse_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "reverse" + }, + "test_psite_offsets.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:15.253256" + }, + "metagene_profile_3p_unspecified": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_metagene_profiles_3p.tsv:md5,fa00b17c8093d7a9a2bb4f83c24f7315" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:16:52.657937" + }, + "read_length_dist_reverse_stub": { + "content": [ + "test_read_length_dist.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:15.229571" + }, + "metagene_profile_5p": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_metagene_profiles_5p.tsv:md5,c7c36a2748f89ae3b2f1e7ae448d1237" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T10:23:17.912506" + }, + "metagene_profile_5p_all_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_metagene_profiles_5p.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:57:07.945858" + }, + "protocol_unspecified": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_protocol.txt:md5,78ca4b6682ea8b5187eb512127544a0f" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:12:11.589918" + }, + "read_length_dist": { + "content": [ + "test_read_length_dist.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:55:40.81119" + }, + "versions_reverse": { + "content": [ + [ + "versions.yml:md5,e692bbf6e4fcebbfd4b96fe0861f4fa7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T10:23:53.312637" + }, + "protocol_all": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:24.158017" + }, + "metagene_plots_stub": { + "content": [ + "test_metagene_plots.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:46.835308" + }, + "orfs_all_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_translating_ORFs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:57:09.284761" + }, + "protocol_reverse": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T10:23:53.224089" + }, + "neg_wig_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_neg.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:47.498598" + }, + "read_length_dist_all": { + "content": [ + "test_read_length_dist.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:24.177835" + }, + "neg_wig_all": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_neg.wig:md5,bc036bbb6b2518f85a60c70829e48229" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:24.238246" + }, + "read_length_dist_all_stub": { + "content": [ + "test_read_length_dist.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:57:07.71913" + }, + "psite_offsets_unspecified_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_psite_offsets.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:35.606622" + }, + "versions_reverse_stub": { + "content": [ + [ + "versions.yml:md5,e692bbf6e4fcebbfd4b96fe0861f4fa7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:15.272599" + }, + "pos_wig_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_pos.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:47.275649" + }, + "metagene_plots_reverse": { + "content": [ + "test_metagene_plots.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T10:23:53.251928" + }, + "pos_wig_all": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_pos.wig:md5,29330801b0d9a1ca7fe1abfc7aea8bf6" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:24.228154" + }, + "versions_stub": { + "content": [ + [ + "versions.yml:md5,e692bbf6e4fcebbfd4b96fe0861f4fa7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:47.939129" + }, + "neg_wig_unspecified": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_neg.wig:md5,3c1c2f9e2c32652f00325bcc643d92ba" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:12:11.646179" + }, + "read_length_dist_unspecified": { + "content": [ + "test_read_length_dist.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:12:11.614371" + }, + "pos_wig_all_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_pos.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:57:08.844313" + }, + "metagene_profile_5p_reverse_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "reverse" + }, + "test_metagene_profiles_5p.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:18:15.235454" + }, + "protocol_all_stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_protocol.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:57:07.281054" + }, + "psite_offsets_reverse": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "reverse" + }, + "test_psite_offsets.txt:md5,32c34c6201b3efc8e7f84db370405e30" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T10:23:53.259415" + }, + "versions_all_stub": { + "content": [ + [ + "versions.yml:md5,e692bbf6e4fcebbfd4b96fe0861f4fa7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:57:09.506799" + }, + "metagene_plots_all": { + "content": [ + "test_metagene_plots.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:24.208296" + }, + "psite_offsets_unspecified": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_psite_offsets.txt:md5,32c34c6201b3efc8e7f84db370405e30" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T11:12:11.632172" + }, + "bam_summary_all": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_bam_summary.txt:md5,3d4a99d779b42755bf3ff6dc9802f9a9" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:24.168332" + }, + "metagene_profile_5p_all": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_metagene_profiles_5p.tsv:md5,c7c36a2748f89ae3b2f1e7ae448d1237" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T14:56:24.188253" + } +} \ No newline at end of file diff --git a/modules/nf-core/ribotricer/detectorfs/tests/nextflow.all.config b/modules/nf-core/ribotricer/detectorfs/tests/nextflow.all.config new file mode 100644 index 0000000..4e8344e --- /dev/null +++ b/modules/nf-core/ribotricer/detectorfs/tests/nextflow.all.config @@ -0,0 +1,11 @@ +process { + + withName: RIBOTRICER_DETECTORFS { + + // NOTE: this is not a sensible value, but makes the module run for + // testing purposes + + ext.args = '--phase_score_cutoff 0.05 --report_all' + } + +} diff --git a/modules/nf-core/ribotricer/detectorfs/tests/nextflow.filtered.config b/modules/nf-core/ribotricer/detectorfs/tests/nextflow.filtered.config new file mode 100644 index 0000000..0f7d7a7 --- /dev/null +++ b/modules/nf-core/ribotricer/detectorfs/tests/nextflow.filtered.config @@ -0,0 +1,11 @@ +process { + + withName: RIBOTRICER_DETECTORFS { + + // NOTE: this is not a sensible value, but makes the module run for + // testing purposes + + ext.args = '--phase_score_cutoff 0.05' + } + +} diff --git a/modules/nf-core/ribotricer/detectorfs/tests/tags.yml b/modules/nf-core/ribotricer/detectorfs/tests/tags.yml new file mode 100644 index 0000000..4326480 --- /dev/null +++ b/modules/nf-core/ribotricer/detectorfs/tests/tags.yml @@ -0,0 +1,2 @@ +ribotricer/detectorfs: + - "modules/nf-core/ribotricer/detectorfs/**" diff --git a/modules/nf-core/ribotricer/prepareorfs/environment.yml b/modules/nf-core/ribotricer/prepareorfs/environment.yml new file mode 100644 index 0000000..95bb38a --- /dev/null +++ b/modules/nf-core/ribotricer/prepareorfs/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "ribotricer_prepareorfs" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::ribotricer=1.3.3" diff --git a/modules/nf-core/ribotricer/prepareorfs/main.nf b/modules/nf-core/ribotricer/prepareorfs/main.nf new file mode 100644 index 0000000..1b0edd1 --- /dev/null +++ b/modules/nf-core/ribotricer/prepareorfs/main.nf @@ -0,0 +1,48 @@ +process RIBOTRICER_PREPAREORFS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ribotricer:1.3.3--pyhdfd78af_0': + 'biocontainers/ribotricer:1.3.3--pyhdfd78af_0' }" + + input: + tuple val(meta), path(fasta), path(gtf) + + output: + tuple val(meta), path("*_candidate_orfs.tsv"), emit: candidate_orfs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + ribotricer prepare-orfs \\ + --gtf $gtf \\ + --fasta $fasta \\ + --prefix $prefix \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ribotricer: \$(ribotricer --version | grep ribotricer |& sed '1!d ; s/ribotricer, version //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_candidate_orfs.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ribotricer: \$(ribotricer --version | grep ribotricer |& sed '1!d ; s/ribotricer, version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ribotricer/prepareorfs/meta.yml b/modules/nf-core/ribotricer/prepareorfs/meta.yml new file mode 100644 index 0000000..005edd5 --- /dev/null +++ b/modules/nf-core/ribotricer/prepareorfs/meta.yml @@ -0,0 +1,53 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "ribotricer_prepareorfs" +description: "Accurate detection of short and long active ORFs using Ribo-seq data" +keywords: + - riboseq + - orf + - genomics +tools: + - "ribotricer": + description: "Python package to detect translating ORF from Ribo-seq data" + homepage: "https://github.com/smithlabcode/ribotricer" + documentation: "https://github.com/smithlabcode/ribotricer" + tool_dev_url: "https://github.com/smithlabcode/ribotricer" + doi: "10.1093/bioinformatics/btz878" + licence: ["GNU General Public v3 (GPL v3)"] + +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'Ensembl human v.111' ]` + - fasta: + type: file + description: | + Fasta-format sequence file for reference sequences used in the bam file + pattern: "*.gtf" + - gtf: + type: file + description: | + GTF-format annotation file for reference sequences used in the bam file + pattern: "*.gtf" + +output: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'Ensembl human v.111' ]` + - candidate_orfs: + type: file + description: "TSV file with candidate ORFs" + pattern: "*.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@pinin4fjords" +maintainers: + - "@pinin4fjords" diff --git a/modules/nf-core/ribotricer/prepareorfs/tests/main.nf.test b/modules/nf-core/ribotricer/prepareorfs/tests/main.nf.test new file mode 100644 index 0000000..752fc6e --- /dev/null +++ b/modules/nf-core/ribotricer/prepareorfs/tests/main.nf.test @@ -0,0 +1,77 @@ +nextflow_process { + + name "Test Process RIBOTRICER_PREPAREORFS" + script "../main.nf" + process "RIBOTRICER_PREPAREORFS" + + tag "modules" + tag "modules_nfcore" + tag "ribotricer" + tag "ribotricer/prepareorfs" + tag "gunzip" + + setup { + run("GUNZIP") { + script "modules/nf-core/gunzip/main.nf" + process { + """ + input[0] = [ + [ ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.dna.chromosome.20.fa.gz", checkIfExists: true) + ] + """ + } + } + } + + test("human chr20 - fasta - gtf") { + + when { + process { + """ + input[0] = GUNZIP.out.gunzip.map{[ + [id:'homo_sapiens_chr20'], + it[1], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true) + ]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + test("human chr20 - fasta - gtf - stub") { + + options '-stub' + + when { + process { + """ + input[0] = GUNZIP.out.gunzip.map{[ + [id:'homo_sapiens_chr20'], + it[1], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true) + ]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} + + + + diff --git a/modules/nf-core/ribotricer/prepareorfs/tests/main.nf.test.snap b/modules/nf-core/ribotricer/prepareorfs/tests/main.nf.test.snap new file mode 100644 index 0000000..365f734 --- /dev/null +++ b/modules/nf-core/ribotricer/prepareorfs/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "human chr20 - fasta - gtf": { + "content": [ + { + "0": [ + [ + { + "id": "homo_sapiens_chr20" + }, + "homo_sapiens_chr20_candidate_orfs.tsv:md5,dc5fc34a067311693ea2b3b3cb227df9" + ] + ], + "1": [ + "versions.yml:md5,60b6b36cf45aed75a0b4cb969854ac82" + ], + "candidate_orfs": [ + [ + { + "id": "homo_sapiens_chr20" + }, + "homo_sapiens_chr20_candidate_orfs.tsv:md5,dc5fc34a067311693ea2b3b3cb227df9" + ] + ], + "versions": [ + "versions.yml:md5,60b6b36cf45aed75a0b4cb969854ac82" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-08T13:40:02.573424" + }, + "human chr20 - fasta - gtf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "homo_sapiens_chr20" + }, + "homo_sapiens_chr20_candidate_orfs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,60b6b36cf45aed75a0b4cb969854ac82" + ], + "candidate_orfs": [ + [ + { + "id": "homo_sapiens_chr20" + }, + "homo_sapiens_chr20_candidate_orfs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,60b6b36cf45aed75a0b4cb969854ac82" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-08T13:40:19.28114" + } +} \ No newline at end of file diff --git a/modules/nf-core/ribotricer/prepareorfs/tests/tags.yml b/modules/nf-core/ribotricer/prepareorfs/tests/tags.yml new file mode 100644 index 0000000..a3a402b --- /dev/null +++ b/modules/nf-core/ribotricer/prepareorfs/tests/tags.yml @@ -0,0 +1,2 @@ +ribotricer/prepareorfs: + - "modules/nf-core/ribotricer/prepareorfs/**" diff --git a/nextflow.config b/nextflow.config index ff3827f..579266f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -95,9 +95,12 @@ params { rseqc_modules = 'bam_stat,inner_distance,infer_experiment,junction_annotation,junction_saturation,read_distribution,read_duplication' // Riboseq module-specific options - skip_ribotish = false - extra_ribotish_quality_args = null - extra_ribotish_predict_args = null + skip_ribotish = false + extra_ribotish_quality_args = null + extra_ribotish_predict_args = null + skip_ribotricer = false + extra_ribotricer_prepareorfs_args = null + extra_ribotricer_detectorfs_args = null // MultiQC options multiqc_config = null diff --git a/workflows/riboseq/main.nf b/workflows/riboseq/main.nf index 3cfbf99..7eda565 100644 --- a/workflows/riboseq/main.nf +++ b/workflows/riboseq/main.nf @@ -73,6 +73,8 @@ include { RIBOTISH_QUALITY as RIBOTISH_QUALITY_RIBOSEQ } from '../../mod include { RIBOTISH_QUALITY as RIBOTISH_QUALITY_TISEQ } from '../../modules/nf-core/ribotish/quality' include { RIBOTISH_PREDICT as RIBOTISH_PREDICT_INDIVIDUAL } from '../../modules/nf-core/ribotish/predict' include { RIBOTISH_PREDICT as RIBOTISH_PREDICT_ALL } from '../../modules/nf-core/ribotish/predict' +include { RIBOTRICER_PREPAREORFS } from '../../modules/nf-core/ribotricer/prepareorfs' +include { RIBOTRICER_DETECTORFS } from '../../modules/nf-core/ribotricer/detectorfs' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -273,38 +275,53 @@ workflow RIBOSEQ { ch_genome_bam_by_type } - ch_bams_for_ribotish = ch_genome_bam_by_type.riboseq.join(ch_genome_bam_index) + ch_bams_for_analysis = ch_genome_bam_by_type.riboseq.join(ch_genome_bam_index) - RIBOTISH_QUALITY_RIBOSEQ( - ch_bams_for_ribotish, - ch_gtf.map { [ [:], it ] }.first() - ) - ch_versions = ch_versions.mix(RIBOTISH_QUALITY_RIBOSEQ.out.versions) + if (!params.skip_ribotish){ + RIBOTISH_QUALITY_RIBOSEQ( + ch_bams_for_analysis, + ch_gtf.map { [ [:], it ] }.first() + ) + ch_versions = ch_versions.mix(RIBOTISH_QUALITY_RIBOSEQ.out.versions) - ribotish_predict_inputs = ch_bams_for_ribotish - .join(RIBOTISH_QUALITY_RIBOSEQ.out.offset) - .multiMap{ meta, bam, bai, offset -> - bam: [ meta, bam, bai ] - offset: [ meta, offset ] - } + ribotish_predict_inputs = ch_bams_for_analysis + .join(RIBOTISH_QUALITY_RIBOSEQ.out.offset) + .multiMap{ meta, bam, bai, offset -> + bam: [ meta, bam, bai ] + offset: [ meta, offset ] + } - RIBOTISH_PREDICT_INDIVIDUAL( - ribotish_predict_inputs.bam, - [[:],[],[]], - ch_fasta.combine(ch_gtf).map{ fasta, gtf -> [ [:], fasta, gtf ] }.first(), - [[:],[]], - ribotish_predict_inputs.offset, - [[:],[]] - ) + ch_fasta_gtf = ch_fasta.combine(ch_gtf).map{ fasta, gtf -> [ [:], fasta, gtf ] }.first() - RIBOTISH_PREDICT_ALL( - ribotish_predict_inputs.bam.map{meta, bam, bai -> [[id:'allsamples'], bam, bai]}.groupTuple(), - [[:],[],[]], - ch_fasta.combine(ch_gtf).map{ fasta, gtf -> [ [:], fasta, gtf ] }, - [[:],[]], - ribotish_predict_inputs.offset.map{meta, offset -> [[id:'allsamples'], offset]}.groupTuple(), - [[:],[]] - ) + RIBOTISH_PREDICT_INDIVIDUAL( + ribotish_predict_inputs.bam, + [[:],[],[]], + ch_fasta_gtf, + [[:],[]], + ribotish_predict_inputs.offset, + [[:],[]] + ) + + RIBOTISH_PREDICT_ALL( + ribotish_predict_inputs.bam.map{meta, bam, bai -> [[id:'allsamples'], bam, bai]}.groupTuple(), + [[:],[],[]], + ch_fasta_gtf, + [[:],[]], + ribotish_predict_inputs.offset.map{meta, offset -> [[id:'allsamples'], offset]}.groupTuple(), + [[:],[]] + ) + } + + if (!params.skip_ribotricer){ + RIBOTRICER_PREPAREORFS( + ch_fasta_gtf + ) + + RIBOTRICER_DETECTORFS( + ch_bams_for_analysis, + RIBOTRICER_PREPAREORFS.out.candidate_orfs + ) + } // // Collate and save software versions From 36b2b4400e82a94b189e99609dca85696798d592 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 Mar 2024 10:15:50 +0000 Subject: [PATCH 03/22] skip ribotricer in test profile --- conf/test.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conf/test.config b/conf/test.config index 45bbe19..21ad940 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,4 +25,6 @@ params { fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.dna.chromosome.20.fa.gz' gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf' min_trimmed_reads = 1000 + + skip_ribotricer = true } From 6ea7c8bb178af29fff546d0b6281d368fe40d2a5 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 Mar 2024 10:20:11 +0000 Subject: [PATCH 04/22] Appease eclint --- workflows/riboseq/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/riboseq/main.nf b/workflows/riboseq/main.nf index 7eda565..7f4e62f 100644 --- a/workflows/riboseq/main.nf +++ b/workflows/riboseq/main.nf @@ -316,7 +316,7 @@ workflow RIBOSEQ { RIBOTRICER_PREPAREORFS( ch_fasta_gtf ) - + RIBOTRICER_DETECTORFS( ch_bams_for_analysis, RIBOTRICER_PREPAREORFS.out.candidate_orfs From b69906fa7a2df1cc2cd90d011814da96633d84f1 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 Mar 2024 10:30:18 +0000 Subject: [PATCH 05/22] Update schema --- nextflow_schema.json | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 6752d16..8bc2809 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -471,6 +471,14 @@ "extra_ribotish_predict_args": { "type": "string", "description": "Extra arguments to pass to the ribotish predict command in addition to defaults defined by the pipeline." + }, + "extra_ribotricer_prepareorfs_args": { + "type": "string", + "description": "Extra arguments to pass to the ribotricer prepare-orfs command in addition to defaults defined by the pipeline." + }, + "extra_ribotricer_detectorfs_args": { + "type": "string", + "description": "Extra arguments to pass to the ribotricer detect-orfs command in addition to defaults defined by the pipeline." } } }, @@ -657,6 +665,9 @@ "type": "boolean", "description": "Skip Ribo-TISH.", "fa_icon": "fas fa-fast-forward" + }, + "skip_ribotricer": { + "type": "boolean" } } }, @@ -870,9 +881,6 @@ } }, "allOf": [ - { - "$ref": "#/definitions/riboseq_specific_options" - }, { "$ref": "#/definitions/input_output_options" }, @@ -891,6 +899,9 @@ { "$ref": "#/definitions/alignment_options" }, + { + "$ref": "#/definitions/riboseq_specific_options" + }, { "$ref": "#/definitions/optional_outputs" }, From 20ba16a8f631d93387cd7bcaddae275a446aa005 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 Mar 2024 10:41:42 +0000 Subject: [PATCH 06/22] Fix linting --- workflows/riboseq/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/riboseq/main.nf b/workflows/riboseq/main.nf index 7f4e62f..310f9c9 100644 --- a/workflows/riboseq/main.nf +++ b/workflows/riboseq/main.nf @@ -316,7 +316,7 @@ workflow RIBOSEQ { RIBOTRICER_PREPAREORFS( ch_fasta_gtf ) - + RIBOTRICER_DETECTORFS( ch_bams_for_analysis, RIBOTRICER_PREPAREORFS.out.candidate_orfs From 0562c8514dce7dc60556e3666200f59542a6d29f Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 Mar 2024 11:52:59 +0000 Subject: [PATCH 07/22] Temporary fix to test data --- tests/nextflow.config | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/nextflow.config b/tests/nextflow.config index 36706b2..786df68 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -26,7 +26,9 @@ process { params { test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules' - modules_testdata_base_path = 's3://ngi-igenomes/testdata/nf-core/modules/' + // Test data on S3 needs update + //modules_testdata_base_path = 's3://ngi-igenomes/testdata/nf-core/modules/' + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' hisat2_build_memory = '3.GB' } From b3f4d1a63e81d0cbc1d7e839f139e1bd5c2db911 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 Mar 2024 11:55:57 +0000 Subject: [PATCH 08/22] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 824ac00..5e6a4ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#35](https://github.com/nf-core/riboseq/pull/35) - Sortmerna: index once ([@pinin4fjords](https://github.com/pinin4fjords), review by [@maxulysse](https://github.com/maxulysse)) - [[#36](https://github.com/nf-core/riboseq/pull/36) - Bump bbsplit module to prevent index overwrites ([@pinin4fjords](https://github.com/pinin4fjords), review by [@maxulysse](https://github.com/maxulysse)) - [#38](https://github.com/nf-core/riboseq/pull/38) - Important! Template update for nf-core/tools v2.13.1 ([@nf-core-bot](https://github.com/nf-core-bot), [@pinin4fjords](https://github.com/pinin4fjords)) +- [#40](https://github.com/nf-core/riboseq/pull/40) - Ribotricer orf prediction ([@pinin4fjords](https://github.com/pinin4fjords), review by ) Initial release of nf-core/riboseq, created with the [nf-core](https://nf-co.re/) template. From 8a39dbb60be8a398bec04973f2b12f63b5c29ec7 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 Mar 2024 13:23:08 +0000 Subject: [PATCH 09/22] Update ribotish modules --- modules.json | 4 +- .../ribotish/predict/tests/main.nf.test | 8 +- .../ribotish/predict/tests/main.nf.test.snap | 102 +++++++++--------- .../ribotish/quality/tests/main.nf.test.snap | 4 +- 4 files changed, 59 insertions(+), 59 deletions(-) diff --git a/modules.json b/modules.json index 19e7f2a..9abc972 100644 --- a/modules.json +++ b/modules.json @@ -77,12 +77,12 @@ }, "ribotish/predict": { "branch": "master", - "git_sha": "21fbfeef55f6988127868eee1d1737217478f0ca", + "git_sha": "d3039382e6943ee8e80016a1e8d7c0f79e21e31e", "installed_by": ["modules"] }, "ribotish/quality": { "branch": "master", - "git_sha": "fac281689037749ea014215a61d431522513077a", + "git_sha": "d3039382e6943ee8e80016a1e8d7c0f79e21e31e", "installed_by": ["modules"] }, "ribotricer/detectorfs": { diff --git a/modules/nf-core/ribotish/predict/tests/main.nf.test b/modules/nf-core/ribotish/predict/tests/main.nf.test index 2566ad1..4116661 100644 --- a/modules/nf-core/ribotish/predict/tests/main.nf.test +++ b/modules/nf-core/ribotish/predict/tests/main.nf.test @@ -50,8 +50,8 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.predictions[0][1]).getText().contains("ENSG00000132640\tENST00000254977\tBTBD3\tprotein_coding\t20:11890767-11923666:+\tATG\t45\t1743\tExtended\t0\t0\tNone\t0.004656093095383119\tT\tNone\tNone\t0.00893789006386") }, - { assert path(process.out.all[0][1]).getText().contains("ENSG00000132640\tENST00000254977\tBTBD3\tprotein_coding\t20:11890767-11923666:+\tATG\t45\t1743\tExtended\t0\t0\tNone\t0.004656093095383119\tT\tNone\tNone\t0.00893789006386") }, + { assert path(process.out.predictions[0][1]).getText().contains("ENSG00000132640\tENST00000254977\tBTBD3\tprotein_coding\t20:11890767-11923666:+\tATG\t45\t1743\tExtended\t0\t0\tNone\t0.013377070461772932\tT\tNone\tNone\t0.02118962148347") }, + { assert path(process.out.all[0][1]).getText().contains("ENSG00000132640\tENST00000254977\tBTBD3\tprotein_coding\t20:11890767-11923666:+\tATG\t45\t1743\tExtended\t0\t0\tNone\t0.013377070461772932\tT\tNone\tNone\t0.02118962148347") }, { assert snapshot(process.out.transprofile).match("transprofile_single_end_single_ribo_bam") }, { assert snapshot(process.out.versions).match("versions_single_end_single_ribo_bam") } ) @@ -126,8 +126,8 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.predictions[0][1]).getText().contains("ENSG00000088876\tENST00000465019\tZNF343\tprotein_coding\t20:2483160-2492732:-\tATG\t298\t1828\tNovel:Known\t0\t0\tNone\t0.016618916757") }, - { assert path(process.out.all[0][1]).getText().contains("ENSG00000088876\tENST00000465019\tZNF343\tprotein_coding\t20:2483160-2492732:-\tATG\t298\t1828\tNovel:Known\t0\t0\tNone\t0.016618916757") }, + { assert path(process.out.predictions[0][1]).getText().contains("ENSG00000284776\tENST00000618693\t\tprotein_coding\t20:18567478-18744216:+\tATG\t26\t695\tAnnotated\t0\t0\tNone\t0.0006123183014212") }, + { assert path(process.out.all[0][1]).getText().contains("ENSG00000284776\tENST00000618693\t\tprotein_coding\t20:18567478-18744216:+\tATG\t26\t695\tAnnotated\t0\t0\tNone\t0.0006123183014212") }, { assert snapshot(process.out.transprofile).match("transprofile_single_end_multi_ribo_bam") }, { assert snapshot(process.out.versions).match("versions_single_end_multi_ribo_bam") } ) diff --git a/modules/nf-core/ribotish/predict/tests/main.nf.test.snap b/modules/nf-core/ribotish/predict/tests/main.nf.test.snap index 5fab05a..7716ecd 100644 --- a/modules/nf-core/ribotish/predict/tests/main.nf.test.snap +++ b/modules/nf-core/ribotish/predict/tests/main.nf.test.snap @@ -1,4 +1,16 @@ { + "versions_single_end_multi_ribo_bam_stub": { + "content": [ + [ + "versions.yml:md5,48e727a11954fb4c3de5a0eb2576951c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-04T11:28:24.897805" + }, "transprofile_single_end_single_ribo_bam_stub": { "content": [ [ @@ -18,19 +30,26 @@ }, "timestamp": "2024-03-04T11:27:04.019058" }, - "versions_single_end_single_ribo_bam_stub": { + "transprofile_single_end_multi_ribo_bam_stub": { "content": [ [ - "versions.yml:md5,48e727a11954fb4c3de5a0eb2576951c" + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_transprofile.py:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-04T11:27:04.031448" + "timestamp": "2024-03-04T11:28:24.874123" }, - "all_single_end_multi_ribo_bam_stub": { + "all_single_end_single_ribo_bam_stub": { "content": [ [ [ @@ -47,9 +66,9 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-04T11:28:24.849804" + "timestamp": "2024-03-04T11:27:04.007006" }, - "versions_single_end_single_ribo_bam": { + "versions_single_end_single_ribo_bam_stub": { "content": [ [ "versions.yml:md5,48e727a11954fb4c3de5a0eb2576951c" @@ -59,28 +78,21 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-04T09:46:34.409406" + "timestamp": "2024-03-04T11:27:04.031448" }, - "transprofile_single_end_multi_ribo_bam": { + "versions_single_end_multi_ribo_bam": { "content": [ [ - [ - { - "id": "test", - "single_end": true, - "strandedness": "forward" - }, - "test_transprofile.py:md5,8ae3e0a2e9ee68a789216ebc65d288d4" - ] + "versions.yml:md5,48e727a11954fb4c3de5a0eb2576951c" ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-04T11:28:06.593742" + "timestamp": "2024-03-04T09:47:34.863391" }, - "predictions_single_end_multi_ribo_bam_stub": { + "transprofile_single_end_single_ribo_bam": { "content": [ [ [ @@ -89,7 +101,7 @@ "single_end": true, "strandedness": "forward" }, - "test_pred.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_transprofile.py:md5,3e6aaa9ec9f3346ae8c838da2d415924" ] ] ], @@ -97,21 +109,9 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-04T11:28:24.825927" - }, - "versions_single_end_multi_ribo_bam_stub": { - "content": [ - [ - "versions.yml:md5,48e727a11954fb4c3de5a0eb2576951c" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-04T11:28:24.897805" + "timestamp": "2024-03-14T12:20:15.166081" }, - "transprofile_single_end_multi_ribo_bam_stub": { + "all_single_end_multi_ribo_bam_stub": { "content": [ [ [ @@ -120,7 +120,7 @@ "single_end": true, "strandedness": "forward" }, - "test_transprofile.py:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_all.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ] ], @@ -128,40 +128,40 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-04T11:28:24.874123" + "timestamp": "2024-03-04T11:28:24.849804" }, - "all_single_end_single_ribo_bam_stub": { + "versions_single_end_single_ribo_bam": { "content": [ [ - [ - { - "id": "test", - "single_end": true, - "strandedness": "forward" - }, - "test_all.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + "versions.yml:md5,48e727a11954fb4c3de5a0eb2576951c" ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-04T11:27:04.007006" + "timestamp": "2024-03-04T09:46:34.409406" }, - "versions_single_end_multi_ribo_bam": { + "transprofile_single_end_multi_ribo_bam": { "content": [ [ - "versions.yml:md5,48e727a11954fb4c3de5a0eb2576951c" + [ + { + "id": "test", + "single_end": true, + "strandedness": "forward" + }, + "test_transprofile.py:md5,202a5b5983806e8d9f242c6157736357" + ] ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-04T09:47:34.863391" + "timestamp": "2024-03-14T12:26:32.447969" }, - "transprofile_single_end_single_ribo_bam": { + "predictions_single_end_multi_ribo_bam_stub": { "content": [ [ [ @@ -170,7 +170,7 @@ "single_end": true, "strandedness": "forward" }, - "test_transprofile.py:md5,4b94277acd84195077986ae5918359a6" + "test_pred.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ] ], @@ -178,7 +178,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-04T11:26:47.038191" + "timestamp": "2024-03-04T11:28:24.825927" }, "predictions_single_end_single_ribo_bam_stub": { "content": [ diff --git a/modules/nf-core/ribotish/quality/tests/main.nf.test.snap b/modules/nf-core/ribotish/quality/tests/main.nf.test.snap index 1061069..4bb486c 100644 --- a/modules/nf-core/ribotish/quality/tests/main.nf.test.snap +++ b/modules/nf-core/ribotish/quality/tests/main.nf.test.snap @@ -27,7 +27,7 @@ "single_end": true, "strandedness": "forward" }, - "test_qual.txt:md5,bf141602163a965009f17c5c82969aeb" + "test_qual.txt:md5,da6083558b20405f79d90a025c9849f6" ] ] ], @@ -35,7 +35,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-29T19:19:04.425034" + "timestamp": "2024-03-14T12:18:54.863133" }, "pdf_single_end_stub": { "content": [ From 4f8015ab1ff3a7a3e9f27ab13cadb60d03b4fe21 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 Mar 2024 13:28:09 +0000 Subject: [PATCH 10/22] skip ribotricer in nf-test --- tests/main.nf.test | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/main.nf.test b/tests/main.nf.test index 6a695c0..e24c467 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -16,6 +16,7 @@ nextflow_pipeline { fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.dna.chromosome.20.fa.gz' gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf' min_trimmed_reads = 1000 + skip_ribotricer = true } } From c9ef8d2d68696a3d1a7ebdf5b55f00e3a2762ab3 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 Mar 2024 14:27:56 +0000 Subject: [PATCH 11/22] Update README, CITATIONS --- CITATIONS.md | 33 +++++++++++++++++++++++++++++++++ README.md | 52 ++++++++++++++++++++++++---------------------------- 2 files changed, 57 insertions(+), 28 deletions(-) diff --git a/CITATIONS.md b/CITATIONS.md index b178e03..c7a338c 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,6 +10,16 @@ ## Pipeline tools +- [BBMap](https://sourceforge.net/projects/bbmap/) + +- [BEDTools](https://pubmed.ncbi.nlm.nih.gov/20110278/) + + > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. + +- [fastp](https://www.ncbi.nlm.nih.gov/pubmed/30423086/) + + > Chen S, Zhou Y, Chen Y, Gu J. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018 Sep 1;34(17):i884-i890. doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086; PubMed Central PMCID: PMC6129281. + - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. @@ -18,6 +28,29 @@ > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +- [Ribo-TISH](https://pubmed.ncbi.nlm.nih.gov/29170441/) + + > Zhang P, He D, Xu Y, Hou J, Pan BF, Wang Y, Liu T, Davis CM, Ehli EA, Tan L, Zhou F, Hu J, Yu Y, Chen X, Nguyen TM, Rosen JM, Hawke DH, Ji Z, Chen Y. Genome-wide identification and differential analysis of translational initiation. Nat Commun. 2017 Nov 23;8(1):1749. doi: 10.1038/s41467-017-01981-8. PMID: 29170441; PMCID: PMC5701008. + +- [Ribotricer](https://pubmed.ncbi.nlm.nih.gov/31750902/) + + > Choudhary S, Li W, D Smith A. Accurate detection of short and long active ORFs using Ribo-seq data. Bioinformatics. 2020 Apr 1;36(7):2053-2059. doi: 10.1093/bioinformatics/btz878. PMID: 31750902; PMCID: PMC7141849. + +- [SortMeRNA](https://pubmed.ncbi.nlm.nih.gov/23071270/) + + > Kopylova E, Noé L, Touzet H. SortMeRNA: fast and accurate filtering of ribosomal RNAs in metatranscriptomic data Bioinformatics. 2012 Dec 15;28(24):3211-7. doi: 10.1093/bioinformatics/bts611. Epub 2012 Oct 15. PubMed PMID: 23071270. + +- [STAR](https://pubmed.ncbi.nlm.nih.gov/23104886/) + + > Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M, Gingeras TR. STAR: ultrafast universal RNA-seq aligner Bioinformatics. 2013 Jan 1;29(1):15-21. doi: 10.1093/bioinformatics/bts635. Epub 2012 Oct 25. PubMed PMID: 23104886; PubMed Central PMCID: PMC3530905. + +- [Trim Galore!](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) + +- [UMI-tools](https://pubmed.ncbi.nlm.nih.gov/28100584/) + + > Smith T, Heger A, Sudbery I. UMI-tools: modeling sequencing errors in Unique Molecular Identifiers to improve quantification accuracy Genome Res. 2017 Mar;27(3):491-499. doi: 10.1101/gr.209601.116. Epub 2017 Jan 18. PubMed PMID: 28100584; PubMed Central PMCID: PMC5340976. + + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/README.md b/README.md index ea5641b..9c8b338 100644 --- a/README.md +++ b/README.md @@ -19,46 +19,43 @@ ## Introduction -**nf-core/riboseq** is a bioinformatics pipeline that ... +**nf-core/riboseq** is a bioinformatics pipeline for analysis of ribo-seq data. It borrows heavily from nf-core/rnaseq in the preprocessing stages: - +1. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html)) +2. Sub-sample FastQ files and auto-infer strandedness ([`fq`](https://github.com/stjude-rust-labs/fq), [`Salmon`](https://combine-lab.github.io/salmon/)) +3. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +4. UMI extraction ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools)) +5. Adapter and quality trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) +6. Removal of genome contaminants ([`BBSplit`](http://seqanswers.com/forums/showthread.php?t=41288)) +7. Removal of ribosomal RNA ([`SortMeRNA`](https://github.com/biocore/sortmerna)) +8. Multiple alignment to both genome and transcriptome using [`STAR`](https://github.com/alexdobin/STAR) +9. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) +10. UMI-based deduplication ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools)) - - +Differences occur in the downstream analysis steps. Currently these specialist steps are: -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +1. Check reads distribution around annotated protein coding regions on user provided transcripts, show frame bias and estimate P-site offset for different group of reads ([`Ribo-TISH`](https://github.com/zhpn1024/ribotish)) +2. Predict translating open reading frames and/ or translation initiation sites _de novo_ from alignment data ([`Ribo-TISH`](https://github.com/zhpn1024/ribotish)) +3. Derive candidate ORFs from reference data and detect translating ORFs from that list ([`Ribotricer`](https://github.com/smithlabcode/ribotricer)) ## Usage > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - +Each row represents a fastq file (single-end) or a pair of fastq files (paired end). Each row should have a 'type' value of `riboseq`, `tiseq` or `rnaseq`. Future iterations of the workflow will conduct paired analysis of matched riboseq and rnaseq samples to accomplish analysis types such as 'translational efficiency, but in the current version you should set this to `riboseq` or `tiseq` for reglar ribo-seq or TI-seq data respectively. Now, you can run the pipeline using: - - ```bash nextflow run nf-core/riboseq \ -profile \ @@ -80,12 +77,13 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/riboseq was originally written by Maxime U Garcia. - -We thank the following people for their extensive assistance in the development of this pipeline: - - +nf-core/riboseq was originally written by [Jonathan Manning](https://github.com/pinin4fjords) (Bioinformatics Engineer as Seqera) with funding from [Altos labs](https://www.altoslabs.com/) and in discussion with [Felix Krueger](https://github.com/FelixKrueger) and Christel Krueger. We thank the following people for their input: + - Anne Bresciani (ZS) + - Mikhail Osipovitch (ZS) + - Edward Wallace (University of Edinburgh) + - Jack Tierney (University College Cork) + ## Contributions and Support If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). @@ -97,8 +95,6 @@ For further information or help, don't hesitate to get in touch on the [Slack `# - - An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. You can cite the `nf-core` publication as follows: From bab78ac31d681b33e4fac6fe785f10e4c0067b6f Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 Mar 2024 15:32:01 +0000 Subject: [PATCH 12/22] Refine publishing locations --- conf/modules.config | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 60eaa38..d16a95b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1249,9 +1249,24 @@ if (!params.skip_ribotricer) { withName: 'RIBOTRICER_DETECTORFS' { ext.args = { params.extra_ribotricer_detectorfs_args ?: '' } publishDir = [ - path: { "${params.outdir}/orf_predictions/ribotricer" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + [ + path: { "${params.outdir}/riboseq_qc/ribotricer" }, + mode: params.publish_dir_mode, + pattern: "*_{read_length_dist.pdf,metagene_plots.pdf,bam_summary.txt,protocol.txt,metagene_profiles_5p.tsv,metagene_profiles_3p.tsv}" + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ], + [ + path: { "${params.outdir}/orf_predictions/ribotricer" }, + mode: params.publish_dir_mode, + pattern: "*_{translating_ORFs.tsv,psite_offsets.txt}", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ], + [ + path: { "${params.outdir}/other/ribotricer" }, + mode: params.publish_dir_mode, + pattern: "*_{pos.wig,neg.wig}", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] ] } } From 3f4b7398abaf2b7bde119ea2a421854d751b17b2 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 Mar 2024 15:32:16 +0000 Subject: [PATCH 13/22] Update output.md --- docs/output.md | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/docs/output.md b/docs/output.md index 7622261..0cee2cc 100644 --- a/docs/output.md +++ b/docs/output.md @@ -22,15 +22,16 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [fastp](#fastp) - Adapter and quality trimming - [BBSplit](#bbsplit) - Removal of genome contaminants - [SortMeRNA](#sortmerna) - Removal of ribosomal RNA - - [Ribo-TISH quality](#ribotish-quality) - Riboseq QC plots generated with the Ribo-TISH 'quality' command + - [Ribo-TISH quality](#ribo-tish-quality) - Riboseq QC plots generated with the Ribo-TISH 'quality' command - [Alignment and quantification](#alignment-and-quantification) - [STAR](#star) - Fast spliced aware genome alignment - [Alignment post-processing](#alignment-post-processing) - [SAMtools](#samtools) - Sort and index alignments - [UMI-tools dedup](#umi-tools-dedup) - UMI-based deduplication - [picard MarkDuplicates](#picard-markduplicates) - Duplicate read marking -- [ORF prediction](#orf-prediction) - Open reading frame (ORF prediction) - - [Ribo-TISH](#ribotish-predict) - Riboseq ORF predictions by Ribo-TISH +- [ORF prediction](#orf-predictions) - Open reading frame (ORF prediction) + - [Ribo-TISH](#ribo-tish-predict) - Riboseq ORF predictions by Ribo-TISH + - [Ribotricer](#ribotricer-detect-orfs) - Riboseq QC and ORF predictions by Ribotricer - [Workflow reporting and genomes](#workflow-reporting-and-genomes) - [Reference genome files](#reference-genome-files) - Saving reference genome indices/files - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution @@ -226,7 +227,9 @@ After extracting the UMI information from the read sequence (see [UMI-tools extr ## Riboseq-specific QC -reads distribution around annotated protein coding regions on user provided transcripts +Read distribution metrics around annotated protein coding regions or based on alignments alone, plus related metrics. + +### Ribo-TISH quality
Output files @@ -237,8 +240,23 @@ reads distribution around annotated protein coding regions on user provided tran - `*.para.py`: P-site offsets for different reads lengths in python code dict format
+### Ribotricer detect-orfs QC outputs + +
+Output files + +- `riboseq_qc/ribotricer/` + - `*_read_length_dist.pdf`: PDF-format read length distribution as quality control + - `*_metagene_plots.pdf`: Metagene plots for quality control + - `*_protocol.txt`: txt file containing inferred protocol if it was inferred (not supplied as input) + - `*_metagene_profiles_5p.tsv`: Metagene profile aligning with the start codon + - `*_metagene_profiles_3p.tsv`: Metagene profile aligning with the stop codon +
+ ## ORF predictions +### Ribo-TISH predict +
Output files @@ -252,6 +270,16 @@ reads distribution around annotated protein coding regions on user provided tran - `allsamples_transprofile.py` RPF P-site profile for each transcript from Ribo-TISH ran over all samples at once
+### Ribotricer detect-orfs + +
+Output files + +- `orf_predictions/ribotricer/` + - `*_translating_ORFs.tsv` TSV with ORFs assessed as translating in the assocciated BAM file + - `*_psite_offsets.txt`: If the P-site offsets are not provided, txt file containing the derived relative offsets. +
+ ### MultiQC
From 8ad4081a23f45e07b4ae136e69e6bf9ae604b2ba Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 Mar 2024 16:01:14 +0000 Subject: [PATCH 14/22] syntax fix --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index d16a95b..6780862 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1252,7 +1252,7 @@ if (!params.skip_ribotricer) { [ path: { "${params.outdir}/riboseq_qc/ribotricer" }, mode: params.publish_dir_mode, - pattern: "*_{read_length_dist.pdf,metagene_plots.pdf,bam_summary.txt,protocol.txt,metagene_profiles_5p.tsv,metagene_profiles_3p.tsv}" + pattern: "*_{read_length_dist.pdf,metagene_plots.pdf,bam_summary.txt,protocol.txt,metagene_profiles_5p.tsv,metagene_profiles_3p.tsv}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ], [ From 78dbd4765fc282a43590d214f71150d41e74d1d4 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 Mar 2024 16:42:36 +0000 Subject: [PATCH 15/22] fix profile details --- conf/test_full.config | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/test_full.config b/conf/test_full.config index 55d8135..5de21c8 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -1,18 +1,18 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests + Nextflow config file for running full-size tests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/riboseq -profile test, --outdir + nextflow run nf-core/riboseq -profile test_full, --outdir ---------------------------------------------------------------------------------------- */ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/riboseq/samplesheet/samplesheet_full.csv' From aa4f09e6be0943eb3735bdd0aeb5e507af42c768 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 Mar 2024 16:44:52 +0000 Subject: [PATCH 16/22] Reinstate test data path --- tests/nextflow.config | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/nextflow.config b/tests/nextflow.config index 786df68..36706b2 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -26,9 +26,7 @@ process { params { test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules' - // Test data on S3 needs update - //modules_testdata_base_path = 's3://ngi-igenomes/testdata/nf-core/modules/' - modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + modules_testdata_base_path = 's3://ngi-igenomes/testdata/nf-core/modules/' hisat2_build_memory = '3.GB' } From ffc8ee8a794c736b1ac46c8ecaf5594089d62749 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 19 Mar 2024 09:26:06 +0000 Subject: [PATCH 17/22] [skip ci] add link to credits --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9c8b338..fad153b 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/riboseq was originally written by [Jonathan Manning](https://github.com/pinin4fjords) (Bioinformatics Engineer as Seqera) with funding from [Altos labs](https://www.altoslabs.com/) and in discussion with [Felix Krueger](https://github.com/FelixKrueger) and Christel Krueger. We thank the following people for their input: +nf-core/riboseq was originally written by [Jonathan Manning](https://github.com/pinin4fjords) (Bioinformatics Engineer as Seqera) with funding from [Altos labs](https://www.altoslabs.com/) and in discussion with [Felix Krueger](https://github.com/FelixKrueger) and [Christel Krueger](https://github.com/ChristelKrueger). We thank the following people for their input: - Anne Bresciani (ZS) - Mikhail Osipovitch (ZS) From 003b24883877a20346b20f492e9dc6afb9a0f34d Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 21 Mar 2024 09:53:03 +0000 Subject: [PATCH 18/22] Tweak full test profile --- conf/test_full.config | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/conf/test_full.config b/conf/test_full.config index 5de21c8..2ffa8e0 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -16,8 +16,9 @@ params { // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/riboseq/samplesheet/samplesheet_full.csv' - ribo_database_manifest = 'https://raw.githubusercontent.com/nf-core/test-datasets/riboseq/testdata/rrna-db.txt' - fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.dna.chromosome.20.fa.gz' - gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf' + ribo_database_manifest = 'https://raw.githubusercontent.com/nf-core/test-datasets/riboseq/testdata/rrna-db-full.txt' + + fasta = 'https://ftp.ensembl.org/pub/release-111/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz' + gtf = 'https://ftp.ensembl.org/pub/release-111/gtf/homo_sapiens/Homo_sapiens.GRCh38.111.gtf.gz' min_trimmed_reads = 1000 } From a1cc1d8b52f9d105fcdf34e6167b7d8897f07f7f Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 21 Mar 2024 09:53:24 +0000 Subject: [PATCH 19/22] increase timeout for star --- conf/base.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index e4331b0..dd4100a 100644 --- a/conf/base.config +++ b/conf/base.config @@ -44,7 +44,7 @@ process { withLabel:process_high { cpus = { check_max( 12 * task.attempt, 'cpus' ) } memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + time = { check_max( 24.h * task.attempt, 'time' ) } } withLabel:process_long { time = { check_max( 20.h * task.attempt, 'time' ) } From 7fbfdce22d9f049d55b795cecbeb38b5fabc9e3a Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 3 Apr 2024 09:37:54 +0100 Subject: [PATCH 20/22] Remove requirement for fasta in schema --- nextflow_schema.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 8bc2809..47c1624 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -211,8 +211,7 @@ "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." } - }, - "required": ["fasta"] + } }, "read_trimming_options": { "title": "Read trimming options", From 4e18a8a504fb4bc090f84a9bb4a59ea5ac6f3f52 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 3 Apr 2024 09:38:57 +0100 Subject: [PATCH 21/22] Doc fix --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index e1140ae..e3fec08 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -34,7 +34,7 @@ The pipeline will auto-detect whether a sample is single- or paired-end using th A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. ```csv title="samplesheet.csv" -sample,fastq_1,fastq_2,strandedness +sample,fastq_1,fastq_2,strandedness,type CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,forward,rnaseq CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,forward,rnaseq CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,forward,rnaseq From 05cdad2960abf1c1bc946a39def32fd1a1c5f599 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 3 Apr 2024 10:43:16 +0100 Subject: [PATCH 22/22] Prettier --- CITATIONS.md | 1 - README.md | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/CITATIONS.md b/CITATIONS.md index c7a338c..b15522e 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -50,7 +50,6 @@ > Smith T, Heger A, Sudbery I. UMI-tools: modeling sequencing errors in Unique Molecular Identifiers to improve quantification accuracy Genome Res. 2017 Mar;27(3):491-499. doi: 10.1101/gr.209601.116. Epub 2017 Jan 18. PubMed PMID: 28100584; PubMed Central PMCID: PMC5340976. - ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/README.md b/README.md index fad153b..f477516 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ sample,fastq_1,fastq_2,strandedness,type CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,forward,riboseq ``` -Each row represents a fastq file (single-end) or a pair of fastq files (paired end). Each row should have a 'type' value of `riboseq`, `tiseq` or `rnaseq`. Future iterations of the workflow will conduct paired analysis of matched riboseq and rnaseq samples to accomplish analysis types such as 'translational efficiency, but in the current version you should set this to `riboseq` or `tiseq` for reglar ribo-seq or TI-seq data respectively. +Each row represents a fastq file (single-end) or a pair of fastq files (paired end). Each row should have a 'type' value of `riboseq`, `tiseq` or `rnaseq`. Future iterations of the workflow will conduct paired analysis of matched riboseq and rnaseq samples to accomplish analysis types such as 'translational efficiency, but in the current version you should set this to `riboseq` or `tiseq` for reglar ribo-seq or TI-seq data respectively. Now, you can run the pipeline using: @@ -79,11 +79,11 @@ For more details about the output files and reports, please refer to the nf-core/riboseq was originally written by [Jonathan Manning](https://github.com/pinin4fjords) (Bioinformatics Engineer as Seqera) with funding from [Altos labs](https://www.altoslabs.com/) and in discussion with [Felix Krueger](https://github.com/FelixKrueger) and [Christel Krueger](https://github.com/ChristelKrueger). We thank the following people for their input: - - Anne Bresciani (ZS) - - Mikhail Osipovitch (ZS) - - Edward Wallace (University of Edinburgh) - - Jack Tierney (University College Cork) - +- Anne Bresciani (ZS) +- Mikhail Osipovitch (ZS) +- Edward Wallace (University of Edinburgh) +- Jack Tierney (University College Cork) + ## Contributions and Support If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).