From 6336ad5d9f07606f74adfa30a9e381407c3362d2 Mon Sep 17 00:00:00 2001 From: fperezcobos <154519851+fperezcobos@users.noreply.github.com> Date: Mon, 28 Oct 2024 12:30:12 +0100 Subject: [PATCH 01/11] Add files via upload --- test_athaliana.csv | 2 ++ test_prunus_dulcis.csv | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 test_athaliana.csv create mode 100644 test_prunus_dulcis.csv diff --git a/test_athaliana.csv b/test_athaliana.csv new file mode 100644 index 0000000..93956e5 --- /dev/null +++ b/test_athaliana.csv @@ -0,0 +1,2 @@ +species,refseq,fasta,gff +Arabidopsis_thaliana,GCF_000001735.4,, diff --git a/test_prunus_dulcis.csv b/test_prunus_dulcis.csv new file mode 100644 index 0000000..e2fa8fe --- /dev/null +++ b/test_prunus_dulcis.csv @@ -0,0 +1,2 @@ +species,refseq,fasta,gff +Prunus_dulcis,GCF_902201215.1,, From 374dc824418cb676beae07b67dcc016659b0b2bb Mon Sep 17 00:00:00 2001 From: felipe perezdeloscobos Date: Tue, 29 Oct 2024 09:50:50 +0000 Subject: [PATCH 02/11] update --- .gitignore | 4 +- modules.json | 247 ++++++++---------- .../agat/convertspgxf2gxf/environment.yml | 5 + modules/nf-core/agat/convertspgxf2gxf/main.nf | 48 ++++ .../nf-core/agat/convertspgxf2gxf/meta.yml | 56 ++++ workflows/genomeqc.nf | 6 +- 6 files changed, 225 insertions(+), 141 deletions(-) create mode 100644 modules/nf-core/agat/convertspgxf2gxf/environment.yml create mode 100644 modules/nf-core/agat/convertspgxf2gxf/main.nf create mode 100644 modules/nf-core/agat/convertspgxf2gxf/meta.yml diff --git a/.gitignore b/.gitignore index ccae37a..f7cade8 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,6 @@ results/ test.xml testing* testing/ -work/ \ No newline at end of file +work/ +felipe_testing* +test* \ No newline at end of file diff --git a/modules.json b/modules.json index 4243637..72b5d07 100644 --- a/modules.json +++ b/modules.json @@ -1,142 +1,111 @@ { - "name": "ecoflow/genomeqc", - "homePage": "https://github.com/ecoflow/genomeqc", - "repos": { - "https://github.com/nf-core/modules.git": { - "modules": { - "nf-core": { - "agat/spstatistics": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] - }, - "busco/busco": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] - }, - "fastqc": { - "branch": "master", - "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", - "installed_by": [ - "modules" - ] - }, - "gffread": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] - }, - "multiqc": { - "branch": "master", - "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": [ - "modules" - ] - }, - "ncbigenomedownload": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] - }, - "orthofinder": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] - }, - "pigz/uncompress": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] - }, - "quast": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] - }, - "seqkit/seq": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "fasta_explore_search_plot_tidk" - ] - }, - "seqkit/sort": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "fasta_explore_search_plot_tidk" - ] - }, - "tidk/explore": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "fasta_explore_search_plot_tidk" - ] - }, - "tidk/plot": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "fasta_explore_search_plot_tidk" - ] - }, - "tidk/search": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "fasta_explore_search_plot_tidk" - ] - } - } - }, - "subworkflows": { - "nf-core": { - "fasta_explore_search_plot_tidk": { - "branch": "master", - "git_sha": "ab80a04707104a4baf39341581dfbced5da05479", - "installed_by": [ - "subworkflows" - ] - }, - "utils_nextflow_pipeline": { - "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] - }, - "utils_nfcore_pipeline": { - "branch": "master", - "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] - }, - "utils_nfvalidation_plugin": { - "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] - } - } - } + "name": "ecoflow/genomeqc", + "homePage": "https://github.com/ecoflow/genomeqc", + "repos": { + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "agat/convertspgxf2gxf": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "agat/spstatistics": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "busco/busco": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "fastqc": { + "branch": "master", + "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", + "installed_by": ["modules"] + }, + "gffread": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "multiqc": { + "branch": "master", + "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", + "installed_by": ["modules"] + }, + "ncbigenomedownload": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "orthofinder": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "pigz/uncompress": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "quast": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "seqkit/seq": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["fasta_explore_search_plot_tidk"] + }, + "seqkit/sort": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["fasta_explore_search_plot_tidk"] + }, + "tidk/explore": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["fasta_explore_search_plot_tidk"] + }, + "tidk/plot": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["fasta_explore_search_plot_tidk"] + }, + "tidk/search": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["fasta_explore_search_plot_tidk"] + } } + }, + "subworkflows": { + "nf-core": { + "fasta_explore_search_plot_tidk": { + "branch": "master", + "git_sha": "ab80a04707104a4baf39341581dfbced5da05479", + "installed_by": ["subworkflows"] + }, + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + } + } + } } -} \ No newline at end of file + } +} diff --git a/modules/nf-core/agat/convertspgxf2gxf/environment.yml b/modules/nf-core/agat/convertspgxf2gxf/environment.yml new file mode 100644 index 0000000..0410ee7 --- /dev/null +++ b/modules/nf-core/agat/convertspgxf2gxf/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::agat=1.4.0 diff --git a/modules/nf-core/agat/convertspgxf2gxf/main.nf b/modules/nf-core/agat/convertspgxf2gxf/main.nf new file mode 100644 index 0000000..b9a7668 --- /dev/null +++ b/modules/nf-core/agat/convertspgxf2gxf/main.nf @@ -0,0 +1,48 @@ +process AGAT_CONVERTSPGXF2GXF { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/agat:1.4.0--pl5321hdfd78af_0' : + 'biocontainers/agat:1.4.0--pl5321hdfd78af_0' }" + + input: + tuple val(meta), path(gxf) + + output: + tuple val(meta), path("*.agat.gff") , emit: output_gff + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + agat_convert_sp_gxf2gxf.pl \\ + --gxf $gxf \\ + --output ${prefix}.agat.gff \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_convert_sp_gxf2gxf.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.agat.gff + touch ${gxf}.agat.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_convert_sp_gxf2gxf.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ +} diff --git a/modules/nf-core/agat/convertspgxf2gxf/meta.yml b/modules/nf-core/agat/convertspgxf2gxf/meta.yml new file mode 100644 index 0000000..d9468ec --- /dev/null +++ b/modules/nf-core/agat/convertspgxf2gxf/meta.yml @@ -0,0 +1,56 @@ +name: agat_convertspgxf2gxf +description: | + Fixes and standardizes GFF/GTF files and outputs a cleaned GFF/GTF file +keywords: + - genome + - gff + - gtf + - conversion +tools: + - agat: + description: "AGAT is a toolkit for manipulation and getting information from + GFF/GTF files" + homepage: "https://github.com/NBISweden/AGAT" + documentation: "https://agat.readthedocs.io/" + tool_dev_url: "https://github.com/NBISweden/AGAT" + doi: "10.5281/zenodo.3552717" + licence: ["GPL v3"] + identifier: biotools:AGAT +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gxf: + type: file + description: Annotation file in GFF3/GTF format + pattern: "*.{gff, gtf}" +output: + - output_gff: + - meta: + type: file + description: Cleaned annotation file in GFF3 format + pattern: "*.{gff}" + - "*.agat.gff": + type: file + description: Cleaned annotation file in GFF3 format + pattern: "*.{gff}" + - log: + - meta: + type: file + description: Log file of the conversion process + pattern: "*.{log}" + - "*.log": + type: file + description: Log file of the conversion process + pattern: "*.{log}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@toniher" +maintainers: + - "@toniher" diff --git a/workflows/genomeqc.nf b/workflows/genomeqc.nf index ce515c2..33114de 100644 --- a/workflows/genomeqc.nf +++ b/workflows/genomeqc.nf @@ -8,6 +8,7 @@ include { CREATE_PATH } from '../modules/local/create_pa include { NCBIGENOMEDOWNLOAD } from '../modules/nf-core/ncbigenomedownload/main' include { PIGZ_UNCOMPRESS as UNCOMPRESS_FASTA } from '../modules/nf-core/pigz/uncompress/main' include { PIGZ_UNCOMPRESS as UNCOMPRESS_GFF } from '../modules/nf-core/pigz/uncompress/main' +include { AGAT_CONVERTSPGXF2GXF } from '../modules/nf-core/agat/convertspgxf2gxf' include { GENOME } from '../subworkflows/local/genome' include { GENOME_AND_ANNOTATION } from '../subworkflows/local/genome_and_annotation' include { MULTIQC } from '../modules/nf-core/multiqc/main' @@ -89,6 +90,9 @@ workflow GENOMEQC { ch_gff = gff } + // Check gff integrity + ch_agat_gff = AGAT_CONVERTSPGXF2GXF(ch_gff) + // // Run TIDK // @@ -108,7 +112,7 @@ workflow GENOMEQC { } else { GENOME_AND_ANNOTATION ( ch_fasta, - ch_gff + ch_agat_gff.output_gff ) } From 8c4757c98abd53364f7d79c6236dad66c73390ad Mon Sep 17 00:00:00 2001 From: felipe perezdeloscobos Date: Tue, 29 Oct 2024 09:53:13 +0000 Subject: [PATCH 03/11] push --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index f7cade8..f8a2386 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ testing* testing/ work/ felipe_testing* -test* \ No newline at end of file +test* +felipe_test/ \ No newline at end of file From 813e85f8bd7f1e60ea0e75612638a978e7399bbb Mon Sep 17 00:00:00 2001 From: felipe perezdeloscobos Date: Tue, 29 Oct 2024 09:58:50 +0000 Subject: [PATCH 04/11] added agat module --- felipe_testing.config | 3 + .../agat/convertspgxf2gxf/tests/main.nf.test | 60 ++++++++++++++++ .../convertspgxf2gxf/tests/main.nf.test.snap | 71 +++++++++++++++++++ .../agat/convertspgxf2gxf/tests/tags.yml | 2 + 4 files changed, 136 insertions(+) create mode 100644 felipe_testing.config create mode 100644 modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test create mode 100644 modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test.snap create mode 100644 modules/nf-core/agat/convertspgxf2gxf/tests/tags.yml diff --git a/felipe_testing.config b/felipe_testing.config new file mode 100644 index 0000000..882466f --- /dev/null +++ b/felipe_testing.config @@ -0,0 +1,3 @@ +procress{ + withName: ORTHO +} diff --git a/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test b/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test new file mode 100644 index 0000000..d8d7bc2 --- /dev/null +++ b/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process AGAT_CONVERTSPGXF2GXF" + script "../main.nf" + process "AGAT_CONVERTSPGXF2GXF" + + tag "modules" + tag "modules_nfcore" + tag "agat" + tag "agat/convertspgxf2gxf" + + test("sarscov2 genome [gtf]") { + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.output_gff, + process.out.versions).match() }, + { assert path(process.out.log[0][1]).exists() } + ) + } + + } + + test("sarscov2 genome [gtf] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test.snap b/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test.snap new file mode 100644 index 0000000..e89073f --- /dev/null +++ b/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test.snap @@ -0,0 +1,71 @@ +{ + "sarscov2 genome [gtf] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.agat.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "genome.gtf.agat.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,5ec6166c5c080ec4bc08a8fe55ada486" + ], + "log": [ + [ + { + "id": "test" + }, + "genome.gtf.agat.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "output_gff": [ + [ + { + "id": "test" + }, + "test.agat.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5ec6166c5c080ec4bc08a8fe55ada486" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-12T12:25:34.583294" + }, + "sarscov2 genome [gtf]": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.agat.gff:md5,7d7e9bcd82a2f0bb7d8a38f85e82f0bc" + ] + ], + [ + "versions.yml:md5,5ec6166c5c080ec4bc08a8fe55ada486" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-12T12:21:21.310464" + } +} \ No newline at end of file diff --git a/modules/nf-core/agat/convertspgxf2gxf/tests/tags.yml b/modules/nf-core/agat/convertspgxf2gxf/tests/tags.yml new file mode 100644 index 0000000..85c7000 --- /dev/null +++ b/modules/nf-core/agat/convertspgxf2gxf/tests/tags.yml @@ -0,0 +1,2 @@ +agat/convertspgxf2gxf: + - "modules/nf-core/agat/convertspgxf2gxf/**" From aa135417d1b365f1ec19c89dabd70e7b1620d56b Mon Sep 17 00:00:00 2001 From: fperezcobos <154519851+fperezcobos@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:12:43 +0100 Subject: [PATCH 05/11] Delete test_prunus_dulcis.csv --- test_prunus_dulcis.csv | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 test_prunus_dulcis.csv diff --git a/test_prunus_dulcis.csv b/test_prunus_dulcis.csv deleted file mode 100644 index e2fa8fe..0000000 --- a/test_prunus_dulcis.csv +++ /dev/null @@ -1,2 +0,0 @@ -species,refseq,fasta,gff -Prunus_dulcis,GCF_902201215.1,, From 9e708703c017c3c28cf231f0b65ff5cdab428623 Mon Sep 17 00:00:00 2001 From: fperezcobos <154519851+fperezcobos@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:13:17 +0100 Subject: [PATCH 06/11] Delete test_athaliana.csv --- test_athaliana.csv | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 test_athaliana.csv diff --git a/test_athaliana.csv b/test_athaliana.csv deleted file mode 100644 index 93956e5..0000000 --- a/test_athaliana.csv +++ /dev/null @@ -1,2 +0,0 @@ -species,refseq,fasta,gff -Arabidopsis_thaliana,GCF_000001735.4,, From a084ff463fec0429e5156f8b7e7f724aefbdee93 Mon Sep 17 00:00:00 2001 From: fperezcobos <154519851+fperezcobos@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:13:57 +0100 Subject: [PATCH 07/11] Delete felipe_testing.config --- felipe_testing.config | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 felipe_testing.config diff --git a/felipe_testing.config b/felipe_testing.config deleted file mode 100644 index 882466f..0000000 --- a/felipe_testing.config +++ /dev/null @@ -1,3 +0,0 @@ -procress{ - withName: ORTHO -} From f5247e1202ce4e07892ada911a35279e825b42f1 Mon Sep 17 00:00:00 2001 From: Chris Wyatt <9978862+chriswyatt1@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:40:32 +0100 Subject: [PATCH 08/11] Update conf/test_full.config Co-authored-by: Mahesh Binzer-Panchal --- conf/test_full.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_full.config b/conf/test_full.config index 3d290d6..3cd63b0 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -12,7 +12,7 @@ params { config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset of hymenpotera genomes' + config_profile_description = 'Full test dataset of Hymenoptera genomes' input = 'assets/samplesheet.csv' From 2ef206ab7dec07ba294769c956725111e811a4e5 Mon Sep 17 00:00:00 2001 From: felipe perezdeloscobos Date: Tue, 29 Oct 2024 10:44:00 +0000 Subject: [PATCH 09/11] Added AGAT gff checking to genome_and_annotation --- subworkflows/local/genome_and_annotation.nf | 12 ++++++++---- workflows/genomeqc.nf | 6 +----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/subworkflows/local/genome_and_annotation.nf b/subworkflows/local/genome_and_annotation.nf index e1af4cb..c07f551 100644 --- a/subworkflows/local/genome_and_annotation.nf +++ b/subworkflows/local/genome_and_annotation.nf @@ -1,4 +1,5 @@ +include { AGAT_CONVERTSPGXF2GXF } from '../../modules/nf-core/agat/convertspgxf2gxf' include { LONGEST } from '../../modules/local/longest' include { BUSCO_BUSCO } from '../../modules/nf-core/busco/busco/main' include { QUAST } from '../../modules/nf-core/quast/main' @@ -21,6 +22,9 @@ workflow GENOME_AND_ANNOTATION { // TODO nf-core: substitute modules here for the modules of your subworkflow + // Check GFF integrity + ch_agat_gff = AGAT_CONVERTSPGXF2GXF(ch_gff).output_gff + // // Run Quast // @@ -28,7 +32,7 @@ workflow GENOME_AND_ANNOTATION { QUAST ( ch_fasta, [[],[]], - ch_gff + ch_agat_gff ) ch_versions = ch_versions.mix(QUAST.out.versions.first()) @@ -41,7 +45,7 @@ workflow GENOME_AND_ANNOTATION { // AGAT_SPSTATISTICS ( - ch_gff + ch_agat_gff ) ch_versions = ch_versions.mix(AGAT_SPSTATISTICS.out.versions.first()) @@ -50,7 +54,7 @@ workflow GENOME_AND_ANNOTATION { // // LONGEST ( -// ch_gff +// ch_ch_agat_gff // ) // ch_versions = ch_versions.mix(LONGEST.out.versions.first()) // @@ -60,7 +64,7 @@ workflow GENOME_AND_ANNOTATION { // // ch_long_gff = LONGEST.out.longest_proteins // - inputChannel = ch_gff.combine(ch_fasta, by: 0) + inputChannel = ch_agat_gff.combine(ch_fasta, by: 0) // Split the input channel into two channels gffChannel = inputChannel.map { tuple -> diff --git a/workflows/genomeqc.nf b/workflows/genomeqc.nf index 33114de..ce515c2 100644 --- a/workflows/genomeqc.nf +++ b/workflows/genomeqc.nf @@ -8,7 +8,6 @@ include { CREATE_PATH } from '../modules/local/create_pa include { NCBIGENOMEDOWNLOAD } from '../modules/nf-core/ncbigenomedownload/main' include { PIGZ_UNCOMPRESS as UNCOMPRESS_FASTA } from '../modules/nf-core/pigz/uncompress/main' include { PIGZ_UNCOMPRESS as UNCOMPRESS_GFF } from '../modules/nf-core/pigz/uncompress/main' -include { AGAT_CONVERTSPGXF2GXF } from '../modules/nf-core/agat/convertspgxf2gxf' include { GENOME } from '../subworkflows/local/genome' include { GENOME_AND_ANNOTATION } from '../subworkflows/local/genome_and_annotation' include { MULTIQC } from '../modules/nf-core/multiqc/main' @@ -90,9 +89,6 @@ workflow GENOMEQC { ch_gff = gff } - // Check gff integrity - ch_agat_gff = AGAT_CONVERTSPGXF2GXF(ch_gff) - // // Run TIDK // @@ -112,7 +108,7 @@ workflow GENOMEQC { } else { GENOME_AND_ANNOTATION ( ch_fasta, - ch_agat_gff.output_gff + ch_gff ) } From 05b062c6b291ce153732e6d5a939347824c80232 Mon Sep 17 00:00:00 2001 From: fperezcobos <154519851+fperezcobos@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:52:21 +0100 Subject: [PATCH 10/11] Update .gitignore --- .gitignore | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitignore b/.gitignore index f8a2386..ef809d7 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,3 @@ test.xml testing* testing/ work/ -felipe_testing* -test* -felipe_test/ \ No newline at end of file From 1611259ca7ca591c3b5247be151426e683be0c55 Mon Sep 17 00:00:00 2001 From: chriswyatt1 Date: Tue, 29 Oct 2024 11:21:39 +0000 Subject: [PATCH 11/11] Fix Script path issue --- bin/busco_2_table.py | 1 + bin/plot_tree_summary.R | 1 + bin/plot_tree_summary2.R | 1 + bin/quast_2_table.py | 1 + conf/modules.config | 7 +++++++ modules/local/gffread.nf | 6 +++--- modules/local/longest.nf | 1 - modules/local/tree_summary.nf | 8 ++++---- 8 files changed, 18 insertions(+), 8 deletions(-) mode change 100644 => 100755 bin/busco_2_table.py mode change 100644 => 100755 bin/quast_2_table.py diff --git a/bin/busco_2_table.py b/bin/busco_2_table.py old mode 100644 new mode 100755 index 6b9f811..56901b3 --- a/bin/busco_2_table.py +++ b/bin/busco_2_table.py @@ -1,3 +1,4 @@ +#!/usr/bin/python3 import pandas as pd import argparse diff --git a/bin/plot_tree_summary.R b/bin/plot_tree_summary.R index 44de8a6..2839009 100755 --- a/bin/plot_tree_summary.R +++ b/bin/plot_tree_summary.R @@ -1,3 +1,4 @@ +#!/usr/bin/Rscript # Load necessary libraries if (!requireNamespace("argparse", quietly = TRUE)) { install.packages("argparse") diff --git a/bin/plot_tree_summary2.R b/bin/plot_tree_summary2.R index 2142c8b..d048b4d 100755 --- a/bin/plot_tree_summary2.R +++ b/bin/plot_tree_summary2.R @@ -1,3 +1,4 @@ +#!/usr/bin/Rscript # Load necessary libraries if (!requireNamespace("argparse", quietly = TRUE)) { install.packages("argparse") diff --git a/bin/quast_2_table.py b/bin/quast_2_table.py old mode 100644 new mode 100755 index 135adef..4e6199b --- a/bin/quast_2_table.py +++ b/bin/quast_2_table.py @@ -1,3 +1,4 @@ +#!/usr/bin/python3 import pandas as pd import argparse import os diff --git a/conf/modules.config b/conf/modules.config index 971fa5b..c8e4cd4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -73,4 +73,11 @@ process { ] } + withName: 'LONGEST' { + publishDir = [ + path: { "$params.outdir/output_data/longest" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/modules/local/gffread.nf b/modules/local/gffread.nf index 5f6f1a2..df79b81 100644 --- a/modules/local/gffread.nf +++ b/modules/local/gffread.nf @@ -63,9 +63,9 @@ process GFFREAD { fi - ${projectDir}/bin/gff_to_genetranshash.2.pl - ${projectDir}/bin/prot_fasta_to_longest.pl ${prefix}.prot.fa ${prefix}_longestisoform.txt - ${projectDir}/bin/fasta_topIsoform.pl ${prefix}.splicedcds.fa ${prefix}_longestisoform.txt + gff_to_genetranshash.2.pl + prot_fasta_to_longest.pl ${prefix}.prot.fa ${prefix}_longestisoform.txt + fasta_topIsoform.pl ${prefix}.splicedcds.fa ${prefix}_longestisoform.txt #This part checks if longest isoform worked, if not we will continue with all proteins into Orthofinder. Warning sent to screen. diff --git a/modules/local/longest.nf b/modules/local/longest.nf index cc16589..1636112 100644 --- a/modules/local/longest.nf +++ b/modules/local/longest.nf @@ -4,7 +4,6 @@ process LONGEST { label 'process_med_memory' container = 'biocontainers/agat:1.3.0--pl5321hdfd78af_0' - publishDir "$params.outdir/output_data/longest" , mode: "${params.publish_dir_mode}", pattern:"*.txt" input: tuple val (meta), path(gff) diff --git a/modules/local/tree_summary.nf b/modules/local/tree_summary.nf index c7bde84..eca2247 100644 --- a/modules/local/tree_summary.nf +++ b/modules/local/tree_summary.nf @@ -27,18 +27,18 @@ process TREE_SUMMARY { cut -f 1,3,4,5,6,7 Busco_combined >> Busco_combined_cut sed -i \'s/\\.fasta//g\' Busco_combined_cut - python3 ${projectDir}/bin/busco_2_table.py Busco_combined_cut Busco_to_plot.tsv + busco_2_table.py Busco_combined_cut Busco_to_plot.tsv # Combine QUAST ouput - python3 ${projectDir}/bin/quast_2_table.py *quast.tsv -o Quast_to_plot.tsv -col N50,N90 -plot_types bar,bar + quast_2_table.py *quast.tsv -o Quast_to_plot.tsv -col N50,N90 -plot_types bar,bar #Remove unwanted extensions from Busco tables sed \'s/.prot.fa.largestIsoform.fa//g\' Busco_to_plot.tsv > Busco_to_plot_final.tsv sed \'s/.prot.fa.largestIsoform.fa//g\' Quast_to_plot.tsv > Quast_to_plot_final.tsv # Run summary plot - /usr/bin/Rscript ${projectDir}/bin/plot_tree_summary2.R tree.nw Busco_to_plot_final.tsv --tree_size 0.6 - /usr/bin/Rscript ${projectDir}/bin/plot_tree_summary.R tree.nw Quast_to_plot_final.tsv --tree_size 0.6 + plot_tree_summary2.R tree.nw Busco_to_plot_final.tsv --tree_size 0.6 + plot_tree_summary.R tree.nw Quast_to_plot_final.tsv --tree_size 0.6 cat <<-END_VERSIONS > versions.yml "${task.process}":