diff --git a/conf/modules.config b/conf/modules.config index fbb5f52..dd15d6c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -240,8 +240,8 @@ process { // SUBWORKFLOW: GFF_MERGE_CLEANUP ext.prefix = { "${meta.id}.liftoff.braker" } } - withName: '.*:GFF_MERGE_CLEANUP:AGAT_SPFILTERBYORFSIZE' { - ext.args = params.filter_genes_by_aa_length ? "-s ${params.filter_genes_by_aa_length}" : '' + withName: '.*:GFF_MERGE_CLEANUP:FILTER_BY_ORF_SIZE' { + ext.args = params.filter_genes_by_aa_length ? "--no-pseudo --keep-genes -C -l ${params.filter_genes_by_aa_length * 3}" : '' } withName: '.*:GFF_MERGE_CLEANUP:GT_GFF3' { diff --git a/modules.json b/modules.json index 6b9d74a..da05f16 100644 --- a/modules.json +++ b/modules.json @@ -15,11 +15,6 @@ "git_sha": "a8939d36280e7d9037c7cf164eeede19e46546a4", "installed_by": ["gxf_fasta_agat_spaddintrons_spextractsequences"] }, - "agat/spfilterbyorfsize": { - "branch": "main", - "git_sha": "a0054cdffbd84f002fb6582b28575b699e01098e", - "installed_by": ["modules"] - }, "agat/spflagshortintrons": { "branch": "main", "git_sha": "d8f08700c82a3bd14811a3dfe7e7d63838130693", diff --git a/modules/gallvp/agat/spfilterbyorfsize/environment.yml b/modules/gallvp/agat/spfilterbyorfsize/environment.yml deleted file mode 100644 index 2c3daab..0000000 --- a/modules/gallvp/agat/spfilterbyorfsize/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -channels: - - conda-forge - - bioconda -dependencies: - - "bioconda::agat=1.4.2" diff --git a/modules/gallvp/agat/spfilterbyorfsize/main.nf b/modules/gallvp/agat/spfilterbyorfsize/main.nf deleted file mode 100644 index 502a9cd..0000000 --- a/modules/gallvp/agat/spfilterbyorfsize/main.nf +++ /dev/null @@ -1,60 +0,0 @@ -process AGAT_SPFILTERBYORFSIZE { - tag "$meta.id" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/agat:1.4.2--pl5321hdfd78af_0': - 'biocontainers/agat:1.4.2--pl5321hdfd78af_0' }" - - input: - tuple val(meta), path(gxf) - path config - - output: - tuple val(meta), path("*.passed.gff") , emit: passed_gff - tuple val(meta), path("*.failed.gff") , emit: failed_gff - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def config_arg = config ? "-c $config" : '' - if( "$gxf" in [ "${prefix}.passed.gff", "${prefix}.failed.gff" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - agat_sp_filter_by_ORF_size.pl \\ - -g $gxf \\ - $args \\ - $config_arg \\ - -o $prefix - - mv \\ - ${prefix}_NOT* \\ - "${prefix}.failed.gff" - - mv \\ - ${prefix}_* \\ - "${prefix}.passed.gff" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - agat: \$(agat_sp_flag_short_introns.pl -h | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - if( "$gxf" in [ "${prefix}.passed.gff", "${prefix}.failed.gff" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - touch ${prefix}.passed.gff - touch ${prefix}.failed.gff - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - agat: \$(agat_sp_flag_short_introns.pl -h | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') - END_VERSIONS - """ -} diff --git a/modules/gallvp/agat/spfilterbyorfsize/meta.yml b/modules/gallvp/agat/spfilterbyorfsize/meta.yml deleted file mode 100644 index cf399da..0000000 --- a/modules/gallvp/agat/spfilterbyorfsize/meta.yml +++ /dev/null @@ -1,67 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "agat_spfilterbyorfsize" -description: The script reads a gff annotation file, and create two output files, - one contains the gene models with ORF passing the test, the other contains the rest. - By default the test is "> 100" that means all gene models that have ORF longer than - 100 Amino acids, will pass the test. -keywords: - - genomics - - GFF/GTF - - filter - - annotation -tools: - - "agat": - description: "Another Gff Analysis Toolkit (AGAT). Suite of tools to handle gene - annotations in any GTF/GFF format." - homepage: "https://agat.readthedocs.io/en/latest/" - documentation: "https://agat.readthedocs.io/en/latest/" - tool_dev_url: "https://github.com/NBISweden/AGAT" - doi: "10.5281/zenodo.3552717" - licence: ["GPL v3"] - identifier: biotools:AGAT - -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - - gxf: - type: file - description: Input GFF3/GTF file - pattern: "*.{gff,gff3,gtf}" - - - config: - type: file - description: | - Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, - otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose". - The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). - pattern: "*.yaml" -output: - - passed_gff: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ] - - "*.passed.gff": - type: file - description: GFF file with gene models which pass the filter test - - failed_gff: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ] - - "*.failed.gff": - type: file - description: GFF file with remaining gene models - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@GallVp" -maintainers: - - "@GallVp" diff --git a/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test b/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test deleted file mode 100644 index 4a6e1fc..0000000 --- a/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test +++ /dev/null @@ -1,62 +0,0 @@ -nextflow_process { - - name "Test Process AGAT_SPFILTERBYORFSIZE" - script "../main.nf" - process "AGAT_SPFILTERBYORFSIZE" - - tag "modules" - tag "modules_gallvp" - tag "agat" - tag "agat/spfilterbyorfsize" - - test("actinidia_chinensis - genome - gtf") { - - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.gtf.gz', checkIfExists: true) - ] - input[1] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("homo_sapiens - genome - gtf - stub") { - - options '-stub' - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr1/genome.gtf', checkIfExists: true) - ] - input[1] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - -} diff --git a/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test.snap b/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test.snap deleted file mode 100644 index 22b26fe..0000000 --- a/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test.snap +++ /dev/null @@ -1,100 +0,0 @@ -{ - "homo_sapiens - genome - gtf - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.passed.gff:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test.failed.gff:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,bc298e3688f3f90f287f56ee6929bd29" - ], - "failed_gff": [ - [ - { - "id": "test" - }, - "test.failed.gff:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "passed_gff": [ - [ - { - "id": "test" - }, - "test.passed.gff:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,bc298e3688f3f90f287f56ee6929bd29" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.04.4" - }, - "timestamp": "2024-12-10T17:07:11.619928" - }, - "actinidia_chinensis - genome - gtf": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.passed.gff:md5,e2558c89e50df32d654f19f9a69e46a3" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test.failed.gff:md5,d7eb6ae1c3dc30675138029b513073eb" - ] - ], - "2": [ - "versions.yml:md5,bc298e3688f3f90f287f56ee6929bd29" - ], - "failed_gff": [ - [ - { - "id": "test" - }, - "test.failed.gff:md5,d7eb6ae1c3dc30675138029b513073eb" - ] - ], - "passed_gff": [ - [ - { - "id": "test" - }, - "test.passed.gff:md5,e2558c89e50df32d654f19f9a69e46a3" - ] - ], - "versions": [ - "versions.yml:md5,bc298e3688f3f90f287f56ee6929bd29" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.04.4" - }, - "timestamp": "2024-12-10T17:07:06.829402" - } -} \ No newline at end of file diff --git a/subworkflows/local/gff_merge_cleanup.nf b/subworkflows/local/gff_merge_cleanup.nf index fbdea37..8a77eda 100644 --- a/subworkflows/local/gff_merge_cleanup.nf +++ b/subworkflows/local/gff_merge_cleanup.nf @@ -1,6 +1,6 @@ include { AGAT_SPMERGEANNOTATIONS } from '../../modules/nf-core/agat/spmergeannotations/main' include { GT_GFF3 } from '../../modules/nf-core/gt/gff3/main' -include { AGAT_SPFILTERBYORFSIZE } from '../../modules/gallvp/agat/spfilterbyorfsize/main' +include { GFFREAD as FILTER_BY_ORF_SIZE } from '../../modules/nf-core/gffread/main' include { AGAT_CONVERTSPGXF2GXF } from '../../modules/nf-core/agat/convertspgxf2gxf/main' workflow GFF_MERGE_CLEANUP { @@ -31,18 +31,18 @@ workflow GFF_MERGE_CLEANUP { | mix ( ch_gff_branch.braker_only.map { meta, braker_gff, _liftoff_gff -> [ meta, braker_gff ] } ) ch_versions = ch_versions.mix(AGAT_SPMERGEANNOTATIONS.out.versions.first()) - // MODULE: AGAT_SPFILTERBYORFSIZE + // MODULE: GFFREAD as FILTER_BY_ORF_SIZE ch_filter_input = ch_merged_gff | branch { filter: val_filter_by_aa_length != null pass: val_filter_by_aa_length == null } - AGAT_SPFILTERBYORFSIZE ( ch_filter_input.filter, [] ) + FILTER_BY_ORF_SIZE ( ch_filter_input.filter, [] ) - ch_filtered_gff = AGAT_SPFILTERBYORFSIZE.out.passed_gff + ch_filtered_gff = FILTER_BY_ORF_SIZE.out.gffread_gff | mix ( ch_filter_input.pass ) - ch_versions = ch_versions.mix(AGAT_SPFILTERBYORFSIZE.out.versions.first()) + ch_versions = ch_versions.mix(FILTER_BY_ORF_SIZE.out.versions.first()) // MODULE: GT_GFF3 GT_GFF3 ( ch_filtered_gff ) diff --git a/tests/minimal/main.nf.test.snap b/tests/minimal/main.nf.test.snap index f4b2aba..e0f2ce3 100644 --- a/tests/minimal/main.nf.test.snap +++ b/tests/minimal/main.nf.test.snap @@ -16,9 +16,6 @@ "AGAT_SPEXTRACTSEQUENCES": { "agat": "v1.4.0" }, - "AGAT_SPFILTERBYORFSIZE": { - "agat": "v1.4.1" - }, "BRAKER3": { "braker3": "3.0.8", "augustus": "3.5.0", @@ -40,6 +37,9 @@ "FASTAVALIDATOR": { "py_fasta_validator": 0.6 }, + "FILTER_BY_ORF_SIZE": { + "gffread": "0.12.7" + }, "FINAL_GFF_CHECK": { "genometools": "1.6.5" }, @@ -70,9 +70,9 @@ "stable paths": [ "a_thaliana.cdna.fasta:md5,12b9bef973e488640aec8c04ba3882fe", "a_thaliana.cds.fasta:md5,b81060419355a590560f92aec8536281", - "a_thaliana.gt.gff3:md5,8ab16549095f605ff8715ac4a3de58ed", + "a_thaliana.gt.gff3:md5,528459cf9596523bf66de99d24c37e20", "a_thaliana.pep.fasta:md5,4994c0393ca0245a1c57966d846d101e", - "a_thaliana.gff3:md5,d23d16cd86499d48a30ffb981ed27891", + "a_thaliana.gff3:md5,30adac1b21d7aaed6ca7fb71ab33f32d", "summary_stats.json:md5,007ba5cf2b7a2fd395a27d9458ca2d2e" ], "stable names": [ @@ -97,6 +97,6 @@ "nf-test": "0.9.2", "nextflow": "24.04.4" }, - "timestamp": "2024-12-11T06:36:01.956188" + "timestamp": "2024-12-11T21:49:09.751422" } } \ No newline at end of file diff --git a/tests/stub/main.nf.test.snap b/tests/stub/main.nf.test.snap index 1548c96..7ed6f6e 100644 --- a/tests/stub/main.nf.test.snap +++ b/tests/stub/main.nf.test.snap @@ -16,9 +16,6 @@ "AGAT_SPEXTRACTSEQUENCES": { "agat": "v1.4.0" }, - "AGAT_SPFILTERBYORFSIZE": { - "agat": "v1.4.1" - }, "AGAT_SPFILTERFEATUREFROMKILLLIST": { "agat": "v1.4.0" }, @@ -73,6 +70,9 @@ "FASTP": { "fastp": "0.23.4" }, + "FILTER_BY_ORF_SIZE": { + "gffread": "0.12.7" + }, "FINAL_GFF_CHECK": { "genometools": "1.6.5" }, @@ -208,6 +208,6 @@ "nf-test": "0.9.2", "nextflow": "24.04.4" }, - "timestamp": "2024-12-10T21:52:10.308719" + "timestamp": "2024-12-11T21:51:12.841395" } } \ No newline at end of file