diff --git a/.gitignore b/.gitignore index ccae37a..ef809d7 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,4 @@ results/ test.xml testing* testing/ -work/ \ No newline at end of file +work/ diff --git a/bin/busco_2_table.py b/bin/busco_2_table.py old mode 100644 new mode 100755 index 6b9f811..56901b3 --- a/bin/busco_2_table.py +++ b/bin/busco_2_table.py @@ -1,3 +1,4 @@ +#!/usr/bin/python3 import pandas as pd import argparse diff --git a/bin/plot_tree_summary.R b/bin/plot_tree_summary.R index 44de8a6..2839009 100755 --- a/bin/plot_tree_summary.R +++ b/bin/plot_tree_summary.R @@ -1,3 +1,4 @@ +#!/usr/bin/Rscript # Load necessary libraries if (!requireNamespace("argparse", quietly = TRUE)) { install.packages("argparse") diff --git a/bin/plot_tree_summary2.R b/bin/plot_tree_summary2.R index 2142c8b..d048b4d 100755 --- a/bin/plot_tree_summary2.R +++ b/bin/plot_tree_summary2.R @@ -1,3 +1,4 @@ +#!/usr/bin/Rscript # Load necessary libraries if (!requireNamespace("argparse", quietly = TRUE)) { install.packages("argparse") diff --git a/bin/quast_2_table.py b/bin/quast_2_table.py old mode 100644 new mode 100755 index 135adef..4e6199b --- a/bin/quast_2_table.py +++ b/bin/quast_2_table.py @@ -1,3 +1,4 @@ +#!/usr/bin/python3 import pandas as pd import argparse import os diff --git a/conf/modules.config b/conf/modules.config index 5db51ff..fed48f5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -95,4 +95,11 @@ process { ] } + withName: 'LONGEST' { + publishDir = [ + path: { "$params.outdir/output_data/longest" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/conf/test_full.config b/conf/test_full.config index 3d290d6..3cd63b0 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -12,7 +12,7 @@ params { config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset of hymenpotera genomes' + config_profile_description = 'Full test dataset of Hymenoptera genomes' input = 'assets/samplesheet.csv' diff --git a/modules.json b/modules.json index 8e5e137..e45adfb 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "agat/convertspgxf2gxf": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "agat/spstatistics": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", diff --git a/modules/local/gffread.nf b/modules/local/gffread.nf index 5f6f1a2..df79b81 100644 --- a/modules/local/gffread.nf +++ b/modules/local/gffread.nf @@ -63,9 +63,9 @@ process GFFREAD { fi - ${projectDir}/bin/gff_to_genetranshash.2.pl - ${projectDir}/bin/prot_fasta_to_longest.pl ${prefix}.prot.fa ${prefix}_longestisoform.txt - ${projectDir}/bin/fasta_topIsoform.pl ${prefix}.splicedcds.fa ${prefix}_longestisoform.txt + gff_to_genetranshash.2.pl + prot_fasta_to_longest.pl ${prefix}.prot.fa ${prefix}_longestisoform.txt + fasta_topIsoform.pl ${prefix}.splicedcds.fa ${prefix}_longestisoform.txt #This part checks if longest isoform worked, if not we will continue with all proteins into Orthofinder. Warning sent to screen. diff --git a/modules/local/longest.nf b/modules/local/longest.nf index cc16589..1636112 100644 --- a/modules/local/longest.nf +++ b/modules/local/longest.nf @@ -4,7 +4,6 @@ process LONGEST { label 'process_med_memory' container = 'biocontainers/agat:1.3.0--pl5321hdfd78af_0' - publishDir "$params.outdir/output_data/longest" , mode: "${params.publish_dir_mode}", pattern:"*.txt" input: tuple val (meta), path(gff) diff --git a/modules/local/tree_summary.nf b/modules/local/tree_summary.nf index c7bde84..eca2247 100644 --- a/modules/local/tree_summary.nf +++ b/modules/local/tree_summary.nf @@ -27,18 +27,18 @@ process TREE_SUMMARY { cut -f 1,3,4,5,6,7 Busco_combined >> Busco_combined_cut sed -i \'s/\\.fasta//g\' Busco_combined_cut - python3 ${projectDir}/bin/busco_2_table.py Busco_combined_cut Busco_to_plot.tsv + busco_2_table.py Busco_combined_cut Busco_to_plot.tsv # Combine QUAST ouput - python3 ${projectDir}/bin/quast_2_table.py *quast.tsv -o Quast_to_plot.tsv -col N50,N90 -plot_types bar,bar + quast_2_table.py *quast.tsv -o Quast_to_plot.tsv -col N50,N90 -plot_types bar,bar #Remove unwanted extensions from Busco tables sed \'s/.prot.fa.largestIsoform.fa//g\' Busco_to_plot.tsv > Busco_to_plot_final.tsv sed \'s/.prot.fa.largestIsoform.fa//g\' Quast_to_plot.tsv > Quast_to_plot_final.tsv # Run summary plot - /usr/bin/Rscript ${projectDir}/bin/plot_tree_summary2.R tree.nw Busco_to_plot_final.tsv --tree_size 0.6 - /usr/bin/Rscript ${projectDir}/bin/plot_tree_summary.R tree.nw Quast_to_plot_final.tsv --tree_size 0.6 + plot_tree_summary2.R tree.nw Busco_to_plot_final.tsv --tree_size 0.6 + plot_tree_summary.R tree.nw Quast_to_plot_final.tsv --tree_size 0.6 cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/agat/convertspgxf2gxf/environment.yml b/modules/nf-core/agat/convertspgxf2gxf/environment.yml new file mode 100644 index 0000000..0410ee7 --- /dev/null +++ b/modules/nf-core/agat/convertspgxf2gxf/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::agat=1.4.0 diff --git a/modules/nf-core/agat/convertspgxf2gxf/main.nf b/modules/nf-core/agat/convertspgxf2gxf/main.nf new file mode 100644 index 0000000..b9a7668 --- /dev/null +++ b/modules/nf-core/agat/convertspgxf2gxf/main.nf @@ -0,0 +1,48 @@ +process AGAT_CONVERTSPGXF2GXF { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/agat:1.4.0--pl5321hdfd78af_0' : + 'biocontainers/agat:1.4.0--pl5321hdfd78af_0' }" + + input: + tuple val(meta), path(gxf) + + output: + tuple val(meta), path("*.agat.gff") , emit: output_gff + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + agat_convert_sp_gxf2gxf.pl \\ + --gxf $gxf \\ + --output ${prefix}.agat.gff \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_convert_sp_gxf2gxf.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.agat.gff + touch ${gxf}.agat.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_convert_sp_gxf2gxf.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ +} diff --git a/modules/nf-core/agat/convertspgxf2gxf/meta.yml b/modules/nf-core/agat/convertspgxf2gxf/meta.yml new file mode 100644 index 0000000..d9468ec --- /dev/null +++ b/modules/nf-core/agat/convertspgxf2gxf/meta.yml @@ -0,0 +1,56 @@ +name: agat_convertspgxf2gxf +description: | + Fixes and standardizes GFF/GTF files and outputs a cleaned GFF/GTF file +keywords: + - genome + - gff + - gtf + - conversion +tools: + - agat: + description: "AGAT is a toolkit for manipulation and getting information from + GFF/GTF files" + homepage: "https://github.com/NBISweden/AGAT" + documentation: "https://agat.readthedocs.io/" + tool_dev_url: "https://github.com/NBISweden/AGAT" + doi: "10.5281/zenodo.3552717" + licence: ["GPL v3"] + identifier: biotools:AGAT +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gxf: + type: file + description: Annotation file in GFF3/GTF format + pattern: "*.{gff, gtf}" +output: + - output_gff: + - meta: + type: file + description: Cleaned annotation file in GFF3 format + pattern: "*.{gff}" + - "*.agat.gff": + type: file + description: Cleaned annotation file in GFF3 format + pattern: "*.{gff}" + - log: + - meta: + type: file + description: Log file of the conversion process + pattern: "*.{log}" + - "*.log": + type: file + description: Log file of the conversion process + pattern: "*.{log}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@toniher" +maintainers: + - "@toniher" diff --git a/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test b/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test new file mode 100644 index 0000000..d8d7bc2 --- /dev/null +++ b/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process AGAT_CONVERTSPGXF2GXF" + script "../main.nf" + process "AGAT_CONVERTSPGXF2GXF" + + tag "modules" + tag "modules_nfcore" + tag "agat" + tag "agat/convertspgxf2gxf" + + test("sarscov2 genome [gtf]") { + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.output_gff, + process.out.versions).match() }, + { assert path(process.out.log[0][1]).exists() } + ) + } + + } + + test("sarscov2 genome [gtf] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test.snap b/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test.snap new file mode 100644 index 0000000..e89073f --- /dev/null +++ b/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test.snap @@ -0,0 +1,71 @@ +{ + "sarscov2 genome [gtf] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.agat.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "genome.gtf.agat.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,5ec6166c5c080ec4bc08a8fe55ada486" + ], + "log": [ + [ + { + "id": "test" + }, + "genome.gtf.agat.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "output_gff": [ + [ + { + "id": "test" + }, + "test.agat.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5ec6166c5c080ec4bc08a8fe55ada486" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-12T12:25:34.583294" + }, + "sarscov2 genome [gtf]": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.agat.gff:md5,7d7e9bcd82a2f0bb7d8a38f85e82f0bc" + ] + ], + [ + "versions.yml:md5,5ec6166c5c080ec4bc08a8fe55ada486" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-12T12:21:21.310464" + } +} \ No newline at end of file diff --git a/modules/nf-core/agat/convertspgxf2gxf/tests/tags.yml b/modules/nf-core/agat/convertspgxf2gxf/tests/tags.yml new file mode 100644 index 0000000..85c7000 --- /dev/null +++ b/modules/nf-core/agat/convertspgxf2gxf/tests/tags.yml @@ -0,0 +1,2 @@ +agat/convertspgxf2gxf: + - "modules/nf-core/agat/convertspgxf2gxf/**" diff --git a/subworkflows/local/genome_and_annotation.nf b/subworkflows/local/genome_and_annotation.nf index e1af4cb..c07f551 100644 --- a/subworkflows/local/genome_and_annotation.nf +++ b/subworkflows/local/genome_and_annotation.nf @@ -1,4 +1,5 @@ +include { AGAT_CONVERTSPGXF2GXF } from '../../modules/nf-core/agat/convertspgxf2gxf' include { LONGEST } from '../../modules/local/longest' include { BUSCO_BUSCO } from '../../modules/nf-core/busco/busco/main' include { QUAST } from '../../modules/nf-core/quast/main' @@ -21,6 +22,9 @@ workflow GENOME_AND_ANNOTATION { // TODO nf-core: substitute modules here for the modules of your subworkflow + // Check GFF integrity + ch_agat_gff = AGAT_CONVERTSPGXF2GXF(ch_gff).output_gff + // // Run Quast // @@ -28,7 +32,7 @@ workflow GENOME_AND_ANNOTATION { QUAST ( ch_fasta, [[],[]], - ch_gff + ch_agat_gff ) ch_versions = ch_versions.mix(QUAST.out.versions.first()) @@ -41,7 +45,7 @@ workflow GENOME_AND_ANNOTATION { // AGAT_SPSTATISTICS ( - ch_gff + ch_agat_gff ) ch_versions = ch_versions.mix(AGAT_SPSTATISTICS.out.versions.first()) @@ -50,7 +54,7 @@ workflow GENOME_AND_ANNOTATION { // // LONGEST ( -// ch_gff +// ch_ch_agat_gff // ) // ch_versions = ch_versions.mix(LONGEST.out.versions.first()) // @@ -60,7 +64,7 @@ workflow GENOME_AND_ANNOTATION { // // ch_long_gff = LONGEST.out.longest_proteins // - inputChannel = ch_gff.combine(ch_fasta, by: 0) + inputChannel = ch_agat_gff.combine(ch_fasta, by: 0) // Split the input channel into two channels gffChannel = inputChannel.map { tuple ->