From 5587b06b4a8e0902865b2bc46ba76ddcff338255 Mon Sep 17 00:00:00 2001 From: richard-burhans Date: Mon, 14 Oct 2024 14:55:24 -0400 Subject: [PATCH 1/4] Update egapx (#37) * output gff is now a separate histroy item * changed tests back to expect failure * bumping version number * making form interface the default, adding proteins * making form interface the default, adding proteins, minor reformatting * bumping version --------- Co-authored-by: Richard C. Burhans --- tools/ncbi_egapx/macros.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ncbi_egapx/macros.xml b/tools/ncbi_egapx/macros.xml index 1878963..ef2f935 100644 --- a/tools/ncbi_egapx/macros.xml +++ b/tools/ncbi_egapx/macros.xml @@ -5,7 +5,7 @@ 0.2-alpha - 2 + 4 22.05 From 544ef29cd524d03b10fdc60769d5d0f2a93d3a67 Mon Sep 17 00:00:00 2001 From: richard-burhans Date: Thu, 14 Nov 2024 10:17:56 -0500 Subject: [PATCH 2/4] minor updates, upgrade to 0.3.0-alpha (#38) --- tools/ncbi_egapx/macros.xml | 23 ++++- tools/ncbi_egapx/ncbi_egapx.xml | 117 +++++++++++++------------- tools/ncbi_egapx/test-data/input.yaml | 4 +- 3 files changed, 80 insertions(+), 64 deletions(-) diff --git a/tools/ncbi_egapx/macros.xml b/tools/ncbi_egapx/macros.xml index ef2f935..5549eba 100644 --- a/tools/ncbi_egapx/macros.xml +++ b/tools/ncbi_egapx/macros.xml @@ -1,11 +1,11 @@ - quay.io/richard-burhans/egapx:@TOOL_VERSION@ + quay.io/galaxy/egapx:@TOOL_VERSION@ - 0.2-alpha - 4 + 0.3.0-alpha + 0 22.05 @@ -14,7 +14,22 @@ - 10.1093/bioinformatics/bts573 + + @misc{githubegapx, + author = {NCBI}, + year = "2024", + title = {Eukaryotic Genome Annotation Pipeline - External (EGAPx)}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/ncbi/egapxm + } + + + + + + + diff --git a/tools/ncbi_egapx/ncbi_egapx.xml b/tools/ncbi_egapx/ncbi_egapx.xml index fc09d9c..ba5e132 100644 --- a/tools/ncbi_egapx/ncbi_egapx.xml +++ b/tools/ncbi_egapx/ncbi_egapx.xml @@ -6,49 +6,51 @@ '$yamlconfig' && - #if str($reference_genome.genome_type_select) == "history" - echo 'genome: $reference_genome.genome' >> '$yamlconfig' && - #elif str($reference_genome.genome_type_select) == "indexed": - echo 'genome: $reference_genome.genome.fields.path' >> '$yamlconfig' && - #else: - echo 'genome: $reference_genome.uri' >> '$yamlconfig' && - #end if - echo 'taxid: $taxid' >> '$yamlconfig' && - echo 'reads:' >> '$yamlconfig' && - #if str($condrnaseq.rna_type_select) == "list": - #set rs = $rnaseq.split() - #set rsplit = [x.strip() for x in $rs] - #for $r in $rsplit: - echo ' - $r' >> '$yamlconfig' && - #end for - #else: - #for $r in $rnaseq: - echo ' - $r' >> '$yamlconfig' && - #end for - #end if - #if $proteins: - echo 'proteins: $proteins' >> '$yamlconfig' && - #end if - #if len($xtra.strip()) > 0: - #set lxtra = $xtra.split("\n") - #for row in $lxtra: - echo '$row' >> '$yamlconfig' && - #end for - #end if - echo '' >> '$yamlconfig' && - echo "Calculated contents of egapx yaml" && - cat '$yamlconfig' && - #else: + #if str($cond_input_style.input_style) == "fillform" + #set yamlconfig = $egapx_config + #else #set yamlconfig = $yamlin #end if + ## activate the following + ## - nextflow conda environment + ## - EGPAx python virtual environment source /galaxy/env.bash && - echo \${PATH} && + ## use the augmented container EGAPx config ln -s /galaxy/egapx/egapx_config && + ## run EGAPx python3 /galaxy/egapx/ui/egapx.py '$yamlconfig' -e galaxy -o 'egapx_out' ]]> + + + - + help="Select a built in, history or remote URI for the reference genome FASTA"> + - + - + - + - + - + + help="Either a working URI for a RNA-seq FASTA, or a bare SRA identifier will work - can be mixed"> @@ -97,7 +99,7 @@ help="All selected rna-seq fastqsanger will be added to the yaml for egapx configuration"/> - + @@ -106,7 +108,7 @@ - + @@ -122,8 +124,13 @@ - - + + + + + + + @@ -134,13 +141,8 @@ - - - - - - - + + @@ -242,7 +244,7 @@ The current version is an alpha release with limited features and organism scope EGAPx is the publicly accessible version of the updated NCBI [Eukaryotic Genome Annotation Pipeline](https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/). -EGAPx takes an assembly fasta file, a taxid of the organism, and RNA-seq data. Based on the taxid, EGAPx will pick protein sets and HMM models. The pipeline runs `miniprot` to align protein sequences, and `STAR` to align RNA-seq to the assembly. Protein alignments and RNA-seq read alignments are then passed to `Gnomon` for gene prediction. In the first step of `Gnomon`, the short alignments are chained together into putative gene models. +EGAPx takes an assembly FASTA file, a taxid of the organism, and RNA-seq data. Based on the taxid, EGAPx will pick protein sets and HMM models. The pipeline runs `miniprot` to align protein sequences, and `STAR` to align RNA-seq to the assembly. Protein alignments and RNA-seq read alignments are then passed to `Gnomon` for gene prediction. In the first step of `Gnomon`, the short alignments are chained together into putative gene models. In the second step, these predictions are further supplemented by *ab-initio* predictions based on HMM models. The final annotation for the input assembly is produced as a `gff` file. **Security Notice:** @@ -310,4 +312,5 @@ In the log, each line denotes the process that completed in the workflow. The fi 2024-03-27 11:20:24 17127134 aligns.paf ]]> + diff --git a/tools/ncbi_egapx/test-data/input.yaml b/tools/ncbi_egapx/test-data/input.yaml index 84cb561..585fd76 100644 --- a/tools/ncbi_egapx/test-data/input.yaml +++ b/tools/ncbi_egapx/test-data/input.yaml @@ -3,15 +3,13 @@ # To limit the requirements you also need to use -e docker_minimal genome: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz +taxid: 6954 reads: - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2 -taxid: 6954 -proteins: [] hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params tasks: star_wnode: star_wnode: -cpus-per-worker 4 - From cc28ee834f74de99d89dd30baeb7ad54e699649e Mon Sep 17 00:00:00 2001 From: richard-burhans Date: Thu, 14 Nov 2024 11:25:58 -0500 Subject: [PATCH 3/4] Update ncbi egapx (#39) * minor updates, upgrade to 0.3.0-alpha * adding default taxid=0 --- tools/ncbi_egapx/ncbi_egapx.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ncbi_egapx/ncbi_egapx.xml b/tools/ncbi_egapx/ncbi_egapx.xml index ba5e132..c5fce66 100644 --- a/tools/ncbi_egapx/ncbi_egapx.xml +++ b/tools/ncbi_egapx/ncbi_egapx.xml @@ -81,7 +81,7 @@ $row - + From a297ebaef5be4b422f2ad029ed5ef7fd2643bd04 Mon Sep 17 00:00:00 2001 From: richard-burhans Date: Thu, 14 Nov 2024 16:59:29 -0500 Subject: [PATCH 4/4] Update ncbi egapx (#40) * minor updates, upgrade to 0.3.0-alpha * adding default taxid=0 * bump version --- tools/ncbi_egapx/macros.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ncbi_egapx/macros.xml b/tools/ncbi_egapx/macros.xml index 5549eba..9678c39 100644 --- a/tools/ncbi_egapx/macros.xml +++ b/tools/ncbi_egapx/macros.xml @@ -5,7 +5,7 @@ 0.3.0-alpha - 0 + 1 22.05