From aafd1a4869da7b5d4defa25ba82101d31ec184f2 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Tue, 8 Oct 2024 15:50:45 +0200 Subject: [PATCH 01/20] add_helixer_workflow --- .../annotation_helixer/.dockstore.yml | 12 + .../annotation_helixer/CHANGELOG.md | 5 + ...laxy-Workflow-annotation_helixer-tests.yml | 73 +++ .../Galaxy-Workflow-annotation_helixer.ga | 502 ++++++++++++++++++ .../annotation_helixer/README.md | 64 +++ .../plnmotmptestjobu509bkbh.json | 1 + 6 files changed, 657 insertions(+) create mode 100644 workflows/genome_annotation/annotation_helixer/.dockstore.yml create mode 100644 workflows/genome_annotation/annotation_helixer/CHANGELOG.md create mode 100644 workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml create mode 100644 workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga create mode 100644 workflows/genome_annotation/annotation_helixer/README.md create mode 100644 workflows/genome_annotation/annotation_helixer/plnmotmptestjobu509bkbh.json diff --git a/workflows/genome_annotation/annotation_helixer/.dockstore.yml b/workflows/genome_annotation/annotation_helixer/.dockstore.yml new file mode 100644 index 000000000..18db55db3 --- /dev/null +++ b/workflows/genome_annotation/annotation_helixer/.dockstore.yml @@ -0,0 +1,12 @@ +version: 1 +workflows: +- name: main + subclass: Galaxy + publish: true + primaryDescriptorPath: /Galaxy-Workflow-annotation_helixer.ga + testParameterFiles: + - /Galaxy-Workflow-annotation_helixer-tests.yml + authors: + - name: Romane Libouban + email: romane.libouban@irisa.fr + orcid: 0009-0001-4920-9951 diff --git a/workflows/genome_annotation/annotation_helixer/CHANGELOG.md b/workflows/genome_annotation/annotation_helixer/CHANGELOG.md new file mode 100644 index 000000000..b1d9be4de --- /dev/null +++ b/workflows/genome_annotation/annotation_helixer/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog + +## [0.1] + +Initial version of the Helixer workflow for genome annotation \ No newline at end of file diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml new file mode 100644 index 000000000..618f95dde --- /dev/null +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml @@ -0,0 +1,73 @@ +- doc: Test outline for Helixer Workflow + job: + Input: + class: File + location: https://zenodo.org/records/13890774/files/genome_masked.fa?download=1 + filetype: fasta + + outputs: + helixer_output: + location: https://zenodo.org/records/13890774/files/Helixer.gff3?download=1 + compare: sim_size + delta: 300000 + + busco_sum_geno: + location: https://zenodo.org/records/13890774/files/Busco_short_summary_genome.txt?download=1 + compare: sim_size + delta: 30000 + busco_gff_geno: + location: https://zenodo.org/records/13890774/files/Busco_GFF_genome.gff3?download=1 + compare: sim_size + delta: 30000 + summary_image_geno: + location: https://zenodo.org/records/13890774/files/Busco_summary_image_genome.png?download=1 + compare: sim_size + delta: 30000 + busco_missing_geno: + location: https://zenodo.org/records/13890774/files/Busco_missing_buscos_genome.tabular?download=1 + compare: sim_size + delta: 30000 + busco_table_geno: + location: https://zenodo.org/records/13890774/files/Busco_full_table_genome.tabular?download=1 + compare: sim_size + delta: 30000 + + gffread_pep: + location: https://zenodo.org/records/13890774/files/gffread_pep.fasta?download=1 + compare: sim_size + delta: 30000 + + summary: + location: https://zenodo.org/records/13890774/files/genome_annotation_statistics_summary.txt?download=1 + compare: sim_size + delta: 30000 + graphs: + location: https://zenodo.org/records/13890774/files/genome_annotation_statistics_graphs.pdf?download=1 + compare: sim_size + delta: 30000 + + summary_image_pep: + location: https://zenodo.org/records/13902305/files/Busco_pep_summary_image.png?download=1 + compare: sim_size + delta: 30000 + busco_table_pep: + location: https://zenodo.org/records/13890774/files/Busco_full_table_pep.tabular?download=1 + compare: sim_size + delta: 30000 + busco_sum_pep: + location: https://zenodo.org/records/13890774/files/Busco_short_summary_pep.txt?download=1 + compare: sim_size + delta: 30000 + busco_gff_pep: + location: https://zenodo.org/records/13890774/files/Busco_GFF_pep.gff3?download=1 + compare: sim_size + delta: 30000 + busco_missing_pep: + location: https://zenodo.org/records/13890774/files/Busco_missing_buscos_pep.tabular?download=1 + compare: sim_size + delta: 30000 + + omark_detail_sum: + location: https://zenodo.org/records/13890774/files/OMArk_Detailed_summary.txt?download=1 + compare: sim_size + delta: 30000 \ No newline at end of file diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga new file mode 100644 index 000000000..5448566e9 --- /dev/null +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga @@ -0,0 +1,502 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "This workflow allows you to annotate a genome with Helixer and evaluate the quality of the annotation using BUSCO and Genome Annotation statistics. GFFRead is also used to predict protein sequences derived from this annotation, and BUSCO and OMArk are used to assess proteome quality. ", + "comments": [], + "creator": [ + { + "class": "Person", + "email": "mailto:romane.libouban@irisa.fr", + "identifier": "0009-0001-4920-9951", + "name": "Romane Libouban" + } + ], + "format-version": "0.1", + "license": "CC-BY-4.0", + "name": "annotation_helixer", + "report": { + "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" + }, + "steps": { + "0": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "Input" + } + ], + "label": "Input", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 0, + "top": 512.1362146249206 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"fasta\"], \"tag\": \"\"}", + "tool_version": null, + "type": "data_input", + "uuid": "e267e1df-03ae-4b70-98ce-65ea177a172e", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/genouest/helixer/helixer/0.3.3+galaxy1", + "errors": null, + "id": 1, + "input_connections": { + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Helixer", + "name": "input_model" + } + ], + "label": "Helixer", + "name": "Helixer", + "outputs": [ + { + "name": "output", + "type": "gff3" + } + ], + "position": { + "left": 258.5333251953125, + "top": 198.43398071289062 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/genouest/helixer/helixer/0.3.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "c2fc4ac35199", + "name": "helixer", + "owner": "genouest", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"input\": {\"__class__\": \"ConnectedValue\"}, \"input_model\": {\"__class__\": \"RuntimeValue\"}, \"lineages\": \"land_plant\", \"option_overlap\": {\"use_overlap\": \"true\", \"__current_case__\": 0, \"overlap_offset\": null, \"overlap_core_length\": null}, \"post_processing\": {\"window_size\": \"100\", \"edge_threshold\": \"0.1\", \"peak_threshold\": \"0.8\", \"min_coding_length\": \"100\"}, \"size\": \"8\", \"species\": null, \"subsequence_length\": null, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.3.3+galaxy1", + "type": "tool", + "uuid": "f60cf54d-31f2-4395-bb55-4916828cd211", + "when": null, + "workflow_outputs": [ + { + "label": "helixer_output", + "output_name": "output", + "uuid": "fe43bcd6-5f99-4fd3-b184-2d6bfb340030" + } + ] + }, + "2": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.7.1+galaxy0", + "errors": null, + "id": 2, + "input_connections": { + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Busco", + "name": "input" + } + ], + "label": null, + "name": "Busco", + "outputs": [ + { + "name": "busco_sum", + "type": "txt" + }, + { + "name": "busco_table", + "type": "tabular" + }, + { + "name": "busco_missing", + "type": "tabular" + }, + { + "name": "summary_image", + "type": "png" + }, + { + "name": "busco_gff", + "type": "gff3" + } + ], + "position": { + "left": 774.3333740234375, + "top": 617.4839685058594 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.7.1+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2babe6d5c561", + "name": "busco", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"adv\": {\"evalue\": \"0.001\", \"limit\": \"3\", \"contig_break\": \"10\"}, \"busco_mode\": {\"mode\": \"geno\", \"__current_case__\": 0, \"use_augustus\": {\"use_augustus_selector\": \"augustus\", \"__current_case__\": 2, \"aug_prediction\": {\"augustus_mode\": \"no\", \"__current_case__\": 0}, \"long\": false}}, \"input\": {\"__class__\": \"RuntimeValue\"}, \"lineage\": {\"lineage_mode\": \"auto_detect\", \"__current_case__\": 0, \"auto_lineage\": \"--auto-lineage\"}, \"lineage_conditional\": {\"selector\": \"cached\", \"__current_case__\": 0, \"cached_db\": \"all+2024-03-21-114020\"}, \"outputs\": [\"short_summary\", \"image\", \"gff\", \"missing\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.7.1+galaxy0", + "type": "tool", + "uuid": "c0e4cca7-0bc3-4ef2-81b2-c990b1b77d87", + "when": null, + "workflow_outputs": [ + { + "label": "summary_image_geno", + "output_name": "summary_image", + "uuid": "3232c386-3c31-4989-ac76-02722ea2d79b" + }, + { + "label": "busco_missing_geno", + "output_name": "busco_missing", + "uuid": "d039ef78-640f-4f7d-b449-69fac1a25130" + }, + { + "label": "busco_table_geno", + "output_name": "busco_table", + "uuid": "5cbbd77a-f521-4ee6-b990-a494b7671534" + }, + { + "label": "busco_sum_geno", + "output_name": "busco_sum", + "uuid": "bf09f09a-b403-4517-9a1a-acece8f36735" + }, + { + "label": "busco_gff_geno", + "output_name": "busco_gff", + "uuid": "961890cc-7a33-422a-ab09-b787e3592dd1" + } + ] + }, + "3": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/gffread/gffread/2.2.1.4+galaxy0", + "errors": null, + "id": 3, + "input_connections": { + "input": { + "id": 1, + "output_name": "output" + }, + "reference_genome|genome_fasta": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool gffread", + "name": "chr_replace" + }, + { + "description": "runtime parameter for tool gffread", + "name": "reference_genome" + } + ], + "label": null, + "name": "gffread", + "outputs": [ + { + "name": "output_pep", + "type": "fasta" + } + ], + "position": { + "left": 637, + "top": 179.65065551757812 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/gffread/gffread/2.2.1.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "3e436657dcd0", + "name": "gffread", + "owner": "devteam", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"chr_replace\": {\"__class__\": \"RuntimeValue\"}, \"decode_url\": false, \"expose\": false, \"filtering\": null, \"full_gff_attribute_preservation\": false, \"gffs\": {\"gff_fmt\": \"none\", \"__current_case__\": 0}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"maxintron\": null, \"merging\": {\"merge_sel\": \"none\", \"__current_case__\": 0}, \"reference_genome\": {\"source\": \"history\", \"__current_case__\": 2, \"genome_fasta\": {\"__class__\": \"ConnectedValue\"}, \"ref_filtering\": null, \"fa_outputs\": [\"-y pep.fa\"]}, \"region\": {\"region_filter\": \"none\", \"__current_case__\": 0}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.2.1.4+galaxy0", + "type": "tool", + "uuid": "00d60c74-1ed5-4529-aa82-8745b50205b7", + "when": null, + "workflow_outputs": [ + { + "label": "gffread_pep", + "output_name": "output_pep", + "uuid": "aa178118-cd37-495b-9e81-e2e53ebf27fd" + } + ] + }, + "4": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/jcvi_gff_stats/jcvi_gff_stats/0.8.4", + "errors": null, + "id": 4, + "input_connections": { + "gff": { + "id": 1, + "output_name": "output" + }, + "ref_genome|genome": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Genome annotation statistics", + "name": "ref_genome" + } + ], + "label": null, + "name": "Genome annotation statistics", + "outputs": [ + { + "name": "summary", + "type": "txt" + }, + { + "name": "graphs", + "type": "pdf" + } + ], + "position": { + "left": 483.26666259765625, + "top": 680.9339807128906 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/jcvi_gff_stats/jcvi_gff_stats/0.8.4", + "tool_shed_repository": { + "changeset_revision": "8cffbd184762", + "name": "jcvi_gff_stats", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"gff\": {\"__class__\": \"ConnectedValue\"}, \"ref_genome\": {\"genome_type_select\": \"history\", \"__current_case__\": 1, \"genome\": {\"__class__\": \"ConnectedValue\"}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.8.4", + "type": "tool", + "uuid": "f47f89eb-23f4-4a16-b0a8-49d8e62c9f3d", + "when": null, + "workflow_outputs": [ + { + "label": "graphs", + "output_name": "graphs", + "uuid": "4638cc23-fdb6-4e82-9cdf-c9fe38e76bd7" + }, + { + "label": "summary", + "output_name": "summary", + "uuid": "fb8ed4c9-4b55-4547-880d-1916a91f8a6e" + } + ] + }, + "5": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy1", + "errors": null, + "id": 5, + "input_connections": { + "reference_genome|genome": { + "id": 0, + "output_name": "output" + }, + "track_groups_0|data_tracks_0|data_format|annotation": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool JBrowse", + "name": "reference_genome" + } + ], + "label": null, + "name": "JBrowse", + "outputs": [ + { + "name": "output", + "type": "html" + } + ], + "position": { + "left": 1287.3333740234375, + "top": 548.4173181152344 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy1", + "tool_shed_repository": { + "changeset_revision": "a6e57ff585c0", + "name": "jbrowse", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"action\": {\"action_select\": \"create\", \"__current_case__\": 0}, \"gencode\": \"1\", \"jbgen\": {\"defaultLocation\": \"\", \"trackPadding\": \"20\", \"shareLink\": true, \"aboutDescription\": \"\", \"show_tracklist\": true, \"show_nav\": true, \"show_overview\": true, \"show_menu\": true, \"hideGenomeOptions\": false}, \"plugins\": {\"BlastView\": true, \"ComboTrackSelector\": false, \"GCContent\": false}, \"reference_genome\": {\"genome_type_select\": \"history\", \"__current_case__\": 1, \"genome\": {\"__class__\": \"ConnectedValue\"}}, \"standalone\": \"minimal\", \"track_groups\": [{\"__index__\": 0, \"category\": \"Annotation\", \"data_tracks\": [{\"__index__\": 0, \"data_format\": {\"data_format_select\": \"gene_calls\", \"__current_case__\": 2, \"annotation\": {\"__class__\": \"ConnectedValue\"}, \"match_part\": {\"match_part_select\": false, \"__current_case__\": 1}, \"index\": false, \"track_config\": {\"track_class\": \"NeatHTMLFeatures/View/Track/NeatFeatures\", \"__current_case__\": 3, \"html_options\": {\"topLevelFeatures\": null}}, \"jbstyle\": {\"style_classname\": \"feature\", \"style_label\": \"product,name,id\", \"style_description\": \"note,description\", \"style_height\": \"10px\", \"max_height\": \"600\"}, \"jbcolor_scale\": {\"color_score\": {\"color_score_select\": \"none\", \"__current_case__\": 0, \"color\": {\"color_select\": \"automatic\", \"__current_case__\": 0}}}, \"jb_custom_config\": {\"option\": []}, \"jbmenu\": {\"track_menu\": []}, \"track_visibility\": \"default_off\", \"override_apollo_plugins\": \"False\", \"override_apollo_drag\": \"False\"}}]}], \"uglyTestingHack\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.16.11+galaxy1", + "type": "tool", + "uuid": "04807fae-95f6-49c1-893e-76932a79cdf9", + "when": null, + "workflow_outputs": [ + { + "label": "output", + "output_name": "output", + "uuid": "19976896-9df1-45e4-9c96-89e24ae6e596" + } + ] + }, + "6": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.7.1+galaxy0", + "errors": null, + "id": 6, + "input_connections": { + "input": { + "id": 3, + "output_name": "output_pep" + } + }, + "inputs": [], + "label": null, + "name": "Busco", + "outputs": [ + { + "name": "busco_sum", + "type": "txt" + }, + { + "name": "busco_table", + "type": "tabular" + }, + { + "name": "busco_missing", + "type": "tabular" + }, + { + "name": "summary_image", + "type": "png" + }, + { + "name": "busco_gff", + "type": "gff3" + } + ], + "position": { + "left": 970, + "top": 0 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.7.1+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2babe6d5c561", + "name": "busco", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"adv\": {\"evalue\": \"0.001\", \"limit\": \"3\", \"contig_break\": \"10\"}, \"busco_mode\": {\"mode\": \"prot\", \"__current_case__\": 2}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"lineage\": {\"lineage_mode\": \"auto_detect\", \"__current_case__\": 0, \"auto_lineage\": \"--auto-lineage\"}, \"lineage_conditional\": {\"selector\": \"cached\", \"__current_case__\": 0, \"cached_db\": \"all+2024-03-21-114020\"}, \"outputs\": [\"short_summary\", \"image\", \"gff\", \"missing\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.7.1+galaxy0", + "type": "tool", + "uuid": "51dcc6a4-ff87-4a98-98fa-de00ce54325f", + "when": null, + "workflow_outputs": [ + { + "label": "busco_gff_pep", + "output_name": "busco_gff", + "uuid": "1db166fb-10c2-4823-a80c-9f22c7c15576" + }, + { + "label": "busco_table_pep", + "output_name": "busco_table", + "uuid": "1a113d6c-a167-432b-8200-dfb3aedc4ba1" + }, + { + "label": "busco_missing_pep", + "output_name": "busco_missing", + "uuid": "dc2d4533-d9c2-4cb0-a144-184e90fd4e01" + }, + { + "label": "summary_image_pep", + "output_name": "summary_image", + "uuid": "13c6bee4-824c-4533-bc78-c99ddf0b190d" + }, + { + "label": "busco_sum_pep", + "output_name": "busco_sum", + "uuid": "f44047d9-e713-41d9-a9f9-5543f0371d9d" + } + ] + }, + "7": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/omark/omark/0.3.0+galaxy2", + "errors": null, + "id": 7, + "input_connections": { + "input": { + "id": 3, + "output_name": "output_pep" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool OMArk", + "name": "input_iso" + } + ], + "label": null, + "name": "OMArk", + "outputs": [ + { + "name": "omark_detail_sum", + "type": "txt" + }, + { + "name": "omark_sum", + "type": "sum" + } + ], + "position": { + "left": 1008.3333740234375, + "top": 387.25066162109374 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/omark/omark/0.3.0+galaxy2", + "tool_shed_repository": { + "changeset_revision": "6f570ba54b41", + "name": "omark", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"database\": \"Primates-v2.0.0.h5\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"input_iso\": {\"__class__\": \"RuntimeValue\"}, \"omark_mode\": false, \"outputs\": \"detail_sum\", \"r\": null, \"t\": null, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.3.0+galaxy2", + "type": "tool", + "uuid": "75e1dde7-5d60-4092-af57-cd7b065145e2", + "when": null, + "workflow_outputs": [ + { + "label": "omark_sum", + "output_name": "omark_sum", + "uuid": "6ed870c9-d86a-46f8-89fd-127f24703afa" + }, + { + "label": "omark_detail_sum", + "output_name": "omark_detail_sum", + "uuid": "de489b9c-8808-47d4-9384-7617c33a9d34" + } + ] + } + }, + "tags": [], + "uuid": "6a6553e2-9b31-414d-83b0-db4e819ff0c2", + "version": 0 +} \ No newline at end of file diff --git a/workflows/genome_annotation/annotation_helixer/README.md b/workflows/genome_annotation/annotation_helixer/README.md new file mode 100644 index 000000000..5f6480ded --- /dev/null +++ b/workflows/genome_annotation/annotation_helixer/README.md @@ -0,0 +1,64 @@ +# Helixer Workflow + +This workflow allows you to annotate a genome with Helixer and evaluate the quality of the annotation using BUSCO and Genome Annotation statistics. GFFRead is also used to predict protein sequences derived from this annotation, and BUSCO and OMArk are used to assess proteome quality. + + +Helixer is an annotation software with a new and different approach: it performs evidence-free predictions (no need for RNASeq data or sequence aligments), using Graphics Processing Unit (GPU), with a much faster execution time. The annotation is based on the development and use of a cross-species deep learning model. The software is used to configure and train models for ab initio prediction of gene structure. In other words, it identifies the base pairs in a genome that belong to the UTR/CDS/Intron genes. + +To assess the quality of the proteome, we will use the GFFRead tool to extract the predicted protein sequences from the annotation (i.e. the Helixer annotation). + +To assess the quality of the annotation, we will use different tools: +- Genome Annotation Statistics: is a program designed to analyze and provide statistics on genomic annotations. This software performs its analyses from a GFF3 file. +- BUSCO (Benchmarking Universal Single-Copy Orthologs): is a tool allowing to evaluate the quality of a genome assembly or of a genome annotation. By comparing genomes from various more or less related species, the authors determined sets of ortholog genes that are present in single copy in (almost) all the species of a clade (Bacteria, Fungi, Plants, Insects, Mammals, …). Most of these genes are essential for the organism to live, and are expected to be found in any newly sequenced and annotated genome from the corresponding clade. Using this data, BUSCO is able to evaluate the proportion of these essential genes (also named BUSCOs) found in a set of (predicted) transcript or protein sequences. This is a good evaluation of the “completeness” of the annotation. +- OMArk: is proteome quality assessment software. It provides measures of proteome completeness, characterises the consistency of all protein-coding genes with their homologues and identifies the presence of contamination by other species. OMArk is based on the OMA orthology database, from which it exploits orthology relationships, and on the OMAmer software for rapid placement of all proteins in gene families. + +The final step is to view the generated annotation using a genome browser such as JBrowse. This browser allows you to navigate along the chromosomes of the genome and view the structure of each predicted gene. + +## Input dataset for Helixer +Helixer requires the genome sequence to be annotated, in fasta format. + +## Output dataset for Helixer +Helixer produces a single output dataset: a GFF3 file. The GFF3 format is a standard bioinformatics format for storing genome annotations. Each row describes a genomic entity, with columns detailing its identifier, location, score and other attributes. + +## Input dataset for Genome Annotation Statistics +This software requires a GFF3 file. In this workflow, the output generated is Helixer. + +## Output dataset for Genome Annotation Statistics +Two output files are generated: +- a file containing graphs in pdf format +- a summary in txt format + +## Input dataset for GFFRead +In this workflow, GFFRead requires two inputs: +- an annotation file in GFF3 format (the Helixer format) +- the genome sequence in fasta format + +## Output dataset for GFFRead +In this workflow, a unique output will be generated. This file, in fasta format, contains the protein sequences predicted from the annotation. + + +## Input dataset for BUSCO +BUSCO requires a fasta file. +BUSCO will be used twice for this workflow. Firstly on the predicted protein sequences and secondly on the genome sequence. + +## Output dataset for BUSCO +With BUSCO, we can obtain different output files: +- short summary : statistical summary of the quality of genomic assembly or annotation, including total number of genes evaluated, percentage of complete genes, percentage of partial genes, etc. +- full table : list of universal orthologs found in the assembled or annotated genome, with information on their completeness, location in the genome, quality score, etc. +- missing BUSCOs : list of orthologs not found in the genome, which may indicate gaps in assembly or annotation. +- summary image : graphics and visualizations to visually represent the results of the evaluation, such as bar charts showing the proportion of complete, partial and missing genes. +- GFF : contain information on gene locations, exons, introns, etc. + +## Input dataset for OMArk +OMAk requires the fasta file produced by GFFRead, containing the predicted protein sequences. + +## Output dataset for OMArk +In this tutorial, a single output file will be generated: a file detailing the assessment of completeness, consistency and species composition. + +## Input dataset for JBrowse +JBrowse requires two inputs: +- the genome sequence in fasta format +- the annotation file in gff3 format, generated by Helixer + +## Output dataset for JBrowse +An html file is generated for browsing the genome. \ No newline at end of file diff --git a/workflows/genome_annotation/annotation_helixer/plnmotmptestjobu509bkbh.json b/workflows/genome_annotation/annotation_helixer/plnmotmptestjobu509bkbh.json new file mode 100644 index 000000000..7a5d9f19c --- /dev/null +++ b/workflows/genome_annotation/annotation_helixer/plnmotmptestjobu509bkbh.json @@ -0,0 +1 @@ +{"Input": {"class": "File", "location": "https://zenodo.org/records/13890774/files/genome_masked.fa?download=1", "filetype": "fasta"}} \ No newline at end of file From 78f38cf4d29580f0df97c904060d954e2d8f8224 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Tue, 8 Oct 2024 16:22:58 +0200 Subject: [PATCH 02/20] fix dockstore error --- workflows/genome_annotation/annotation_helixer/.dockstore.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/genome_annotation/annotation_helixer/.dockstore.yml b/workflows/genome_annotation/annotation_helixer/.dockstore.yml index 18db55db3..477497d68 100644 --- a/workflows/genome_annotation/annotation_helixer/.dockstore.yml +++ b/workflows/genome_annotation/annotation_helixer/.dockstore.yml @@ -1,4 +1,4 @@ -version: 1 +version: 1.2 workflows: - name: main subclass: Galaxy From 261b7f3cf0dcf3a804c1fb2407e5dd217fe3f78d Mon Sep 17 00:00:00 2001 From: rlibouban Date: Tue, 8 Oct 2024 16:23:38 +0200 Subject: [PATCH 03/20] fix dockstore error --- workflows/genome_annotation/annotation_helixer/.dockstore.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/genome_annotation/annotation_helixer/.dockstore.yml b/workflows/genome_annotation/annotation_helixer/.dockstore.yml index 477497d68..a4efd394a 100644 --- a/workflows/genome_annotation/annotation_helixer/.dockstore.yml +++ b/workflows/genome_annotation/annotation_helixer/.dockstore.yml @@ -9,4 +9,4 @@ workflows: authors: - name: Romane Libouban email: romane.libouban@irisa.fr - orcid: 0009-0001-4920-9951 + orcid: https://orcid.org/0000-0002-1825-0097 From 081f2c506bef027d78f85c86e9d808e95c23be71 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Tue, 8 Oct 2024 17:42:01 +0200 Subject: [PATCH 04/20] modify Galaxy-Workflow-annotation_helixer.ga --- .../Galaxy-Workflow-annotation_helixer.ga | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga index 5448566e9..6c86129c8 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga @@ -6,7 +6,6 @@ { "class": "Person", "email": "mailto:romane.libouban@irisa.fr", - "identifier": "0009-0001-4920-9951", "name": "Romane Libouban" } ], @@ -18,14 +17,14 @@ }, "steps": { "0": { - "annotation": "", + "annotation": "Input dataset containing genomic sequences in FASTA format", "content_id": null, "errors": null, "id": 0, "input_connections": {}, "inputs": [ { - "description": "", + "description": "Genome sequences", "name": "Input" } ], @@ -45,7 +44,7 @@ "workflow_outputs": [] }, "1": { - "annotation": "", + "annotation": "Helixer tool for genomic annotation", "content_id": "toolshed.g2.bx.psu.edu/repos/genouest/helixer/helixer/0.3.3+galaxy1", "errors": null, "id": 1, @@ -95,7 +94,7 @@ ] }, "2": { - "annotation": "", + "annotation": "Completeness assessment of the genome using the Busco tool", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.7.1+galaxy0", "errors": null, "id": 2, @@ -111,7 +110,7 @@ "name": "input" } ], - "label": null, + "label": Busco, "name": "Busco", "outputs": [ { @@ -181,7 +180,7 @@ ] }, "3": { - "annotation": "", + "annotation": "Converts GFF files to other formats, such as FASTA", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/gffread/gffread/2.2.1.4+galaxy0", "errors": null, "id": 3, @@ -205,7 +204,7 @@ "name": "reference_genome" } ], - "label": null, + "label": Gffread, "name": "gffread", "outputs": [ { @@ -239,7 +238,7 @@ ] }, "4": { - "annotation": "", + "annotation": "Generates statistics and graphs for genome annotation", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/jcvi_gff_stats/jcvi_gff_stats/0.8.4", "errors": null, "id": 4, @@ -259,7 +258,7 @@ "name": "ref_genome" } ], - "label": null, + "label": Genome annotation statistics, "name": "Genome annotation statistics", "outputs": [ { @@ -302,7 +301,7 @@ ] }, "5": { - "annotation": "", + "annotation": "JBrowse", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy1", "errors": null, "id": 5, @@ -322,7 +321,7 @@ "name": "reference_genome" } ], - "label": null, + "label": JBrowse, "name": "JBrowse", "outputs": [ { @@ -356,7 +355,7 @@ ] }, "6": { - "annotation": "", + "annotation": "Completeness assessment of the genome using the Busco tool", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.7.1+galaxy0", "errors": null, "id": 6, @@ -367,7 +366,7 @@ } }, "inputs": [], - "label": null, + "label": Busco, "name": "Busco", "outputs": [ { @@ -437,7 +436,7 @@ ] }, "7": { - "annotation": "", + "annotation": "OMArk", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/omark/omark/0.3.0+galaxy2", "errors": null, "id": 7, @@ -453,7 +452,7 @@ "name": "input_iso" } ], - "label": null, + "label": OMArk, "name": "OMArk", "outputs": [ { From 9abf6e159eab8d36eda2c3d507a272c3b61c622b Mon Sep 17 00:00:00 2001 From: rlibouban Date: Wed, 9 Oct 2024 09:59:21 +0200 Subject: [PATCH 05/20] fix lint --- .../annotation_helixer/Galaxy-Workflow-annotation_helixer.ga | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga index 6c86129c8..d41275104 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga @@ -10,7 +10,8 @@ } ], "format-version": "0.1", - "license": "CC-BY-4.0", + "license": "MIT", + "release": "0.1", "name": "annotation_helixer", "report": { "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" From 450bfcf37665409b1d050717eaee29ec0da5f3d9 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Wed, 9 Oct 2024 11:15:10 +0200 Subject: [PATCH 06/20] typo error --- .../annotation_helixer/.dockstore.yml | 3 +-- .../Galaxy-Workflow-annotation_helixer.ga | 12 ++++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/workflows/genome_annotation/annotation_helixer/.dockstore.yml b/workflows/genome_annotation/annotation_helixer/.dockstore.yml index a4efd394a..1b44312f8 100644 --- a/workflows/genome_annotation/annotation_helixer/.dockstore.yml +++ b/workflows/genome_annotation/annotation_helixer/.dockstore.yml @@ -8,5 +8,4 @@ workflows: - /Galaxy-Workflow-annotation_helixer-tests.yml authors: - name: Romane Libouban - email: romane.libouban@irisa.fr - orcid: https://orcid.org/0000-0002-1825-0097 + email: romane.libouban@irisa.fr \ No newline at end of file diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga index d41275104..e235ca793 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga @@ -111,7 +111,7 @@ "name": "input" } ], - "label": Busco, + "label": "Busco", "name": "Busco", "outputs": [ { @@ -205,7 +205,7 @@ "name": "reference_genome" } ], - "label": Gffread, + "label": "Gffread", "name": "gffread", "outputs": [ { @@ -259,7 +259,7 @@ "name": "ref_genome" } ], - "label": Genome annotation statistics, + "label": "Genome annotation statistics", "name": "Genome annotation statistics", "outputs": [ { @@ -322,7 +322,7 @@ "name": "reference_genome" } ], - "label": JBrowse, + "label": "JBrowse", "name": "JBrowse", "outputs": [ { @@ -367,7 +367,7 @@ } }, "inputs": [], - "label": Busco, + "label": "Busco", "name": "Busco", "outputs": [ { @@ -453,7 +453,7 @@ "name": "input_iso" } ], - "label": OMArk, + "label": "OMArk", "name": "OMArk", "outputs": [ { From 351aebdecbd902013acfa4ac07821bce678f1b18 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Thu, 10 Oct 2024 11:09:33 +0200 Subject: [PATCH 07/20] error --- .../annotation_helixer/Galaxy-Workflow-annotation_helixer.ga | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga index e235ca793..1c6236794 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga @@ -111,7 +111,7 @@ "name": "input" } ], - "label": "Busco", + "label": "Busco on genome", "name": "Busco", "outputs": [ { @@ -367,7 +367,7 @@ } }, "inputs": [], - "label": "Busco", + "label": "Busco on protein", "name": "Busco", "outputs": [ { From eaa461ee740498bed64657989bd9cb1fd00854a1 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Thu, 10 Oct 2024 17:53:00 +0200 Subject: [PATCH 08/20] modify omark --- .../annotation_helixer/Galaxy-Workflow-annotation_helixer.ga | 5 ----- 1 file changed, 5 deletions(-) diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga index 1c6236794..3efe37c6c 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga @@ -483,11 +483,6 @@ "uuid": "75e1dde7-5d60-4092-af57-cd7b065145e2", "when": null, "workflow_outputs": [ - { - "label": "omark_sum", - "output_name": "omark_sum", - "uuid": "6ed870c9-d86a-46f8-89fd-127f24703afa" - }, { "label": "omark_detail_sum", "output_name": "omark_detail_sum", From a9c5bf0fd77cbbbbed90b128a739737e08fb061b Mon Sep 17 00:00:00 2001 From: rlibouban Date: Tue, 15 Oct 2024 17:23:18 +0200 Subject: [PATCH 09/20] retry --- .../annotation_helixer/Galaxy-Workflow-annotation_helixer.ga | 4 ---- 1 file changed, 4 deletions(-) diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga index 3efe37c6c..657fd69cc 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga @@ -459,10 +459,6 @@ { "name": "omark_detail_sum", "type": "txt" - }, - { - "name": "omark_sum", - "type": "sum" } ], "position": { From 77546e6dd621508833a3792a34c7b310315b0104 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Mon, 21 Oct 2024 09:56:25 +0200 Subject: [PATCH 10/20] rename --- workflows/genome_annotation/annotation_helixer/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/genome_annotation/annotation_helixer/README.md b/workflows/genome_annotation/annotation_helixer/README.md index 5f6480ded..c2477838b 100644 --- a/workflows/genome_annotation/annotation_helixer/README.md +++ b/workflows/genome_annotation/annotation_helixer/README.md @@ -1,4 +1,4 @@ -# Helixer Workflow +# Genome annotation workflow with Helixer This workflow allows you to annotate a genome with Helixer and evaluate the quality of the annotation using BUSCO and Genome Annotation statistics. GFFRead is also used to predict protein sequences derived from this annotation, and BUSCO and OMArk are used to assess proteome quality. From 1244008afa4dd33b79f11c1dd94c36839675a918 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Tue, 12 Nov 2024 10:36:27 +0100 Subject: [PATCH 11/20] addition of coloured borders and titles --- .../Galaxy-Workflow-annotation_helixer.ga | 203 +++++++++++++----- 1 file changed, 150 insertions(+), 53 deletions(-) diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga index 657fd69cc..c9b85fa0a 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga @@ -1,7 +1,105 @@ { "a_galaxy_workflow": "true", "annotation": "This workflow allows you to annotate a genome with Helixer and evaluate the quality of the annotation using BUSCO and Genome Annotation statistics. GFFRead is also used to predict protein sequences derived from this annotation, and BUSCO and OMArk are used to assess proteome quality. ", - "comments": [], + "comments": [ + { + "child_steps": [ + 4, + 2 + ], + "color": "lime", + "data": { + "title": "Evaluation - Genome annotation" + }, + "id": 2, + "position": [ + 468.3, + 902.5 + ], + "size": [ + 496.5, + 356.1 + ], + "type": "frame" + }, + { + "child_steps": [ + 3 + ], + "color": "orange", + "data": { + "title": "Protein prediction with Helixer annotation" + }, + "id": 1, + "position": [ + 628.9, + 255.2 + ], + "size": [ + 258, + 275 + ], + "type": "frame" + }, + { + "child_steps": [ + 1 + ], + "color": "blue", + "data": { + "title": "Annotation step" + }, + "id": 0, + "position": [ + 238.5, + 458.79999999999995 + ], + "size": [ + 240, + 183 + ], + "type": "frame" + }, + { + "child_steps": [ + 5 + ], + "color": "pink", + "data": { + "title": "Visualization" + }, + "id": 4, + "position": [ + 1045.3, + 680.0 + ], + "size": [ + 240, + 244.5 + ], + "type": "frame" + }, + { + "child_steps": [ + 6, + 7 + ], + "color": "turquoise", + "data": { + "title": "Evaluation - Predicted protein from annotation" + }, + "id": 3, + "position": [ + 1104.4, + 0.0 + ], + "size": [ + 312, + 563 + ], + "type": "frame" + } + ], "creator": [ { "class": "Person", @@ -11,7 +109,7 @@ ], "format-version": "0.1", "license": "MIT", - "release": "0.1", + "release": "0.1", "name": "annotation_helixer", "report": { "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" @@ -25,7 +123,7 @@ "input_connections": {}, "inputs": [ { - "description": "Genome sequences", + "description": "Input dataset containing genomic sequences in FASTA format", "name": "Input" } ], @@ -34,7 +132,7 @@ "outputs": [], "position": { "left": 0, - "top": 512.1362146249206 + "top": 812.5362146249206 }, "tool_id": null, "tool_state": "{\"optional\": false, \"format\": [\"fasta\"], \"tag\": \"\"}", @@ -71,7 +169,7 @@ ], "position": { "left": 258.5333251953125, - "top": 198.43398071289062 + "top": 498.8339807128906 }, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/genouest/helixer/helixer/0.3.3+galaxy1", @@ -105,12 +203,7 @@ "output_name": "output" } }, - "inputs": [ - { - "description": "runtime parameter for tool Busco", - "name": "input" - } - ], + "inputs": [], "label": "Busco on genome", "name": "Busco", "outputs": [ @@ -136,8 +229,8 @@ } ], "position": { - "left": 774.3333740234375, - "top": 617.4839685058594 + "left": 744.7633406324078, + "top": 942.4706486763349 }, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.7.1+galaxy0", @@ -147,26 +240,21 @@ "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu" }, - "tool_state": "{\"adv\": {\"evalue\": \"0.001\", \"limit\": \"3\", \"contig_break\": \"10\"}, \"busco_mode\": {\"mode\": \"geno\", \"__current_case__\": 0, \"use_augustus\": {\"use_augustus_selector\": \"augustus\", \"__current_case__\": 2, \"aug_prediction\": {\"augustus_mode\": \"no\", \"__current_case__\": 0}, \"long\": false}}, \"input\": {\"__class__\": \"RuntimeValue\"}, \"lineage\": {\"lineage_mode\": \"auto_detect\", \"__current_case__\": 0, \"auto_lineage\": \"--auto-lineage\"}, \"lineage_conditional\": {\"selector\": \"cached\", \"__current_case__\": 0, \"cached_db\": \"all+2024-03-21-114020\"}, \"outputs\": [\"short_summary\", \"image\", \"gff\", \"missing\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_state": "{\"adv\": {\"evalue\": \"0.001\", \"limit\": \"3\", \"contig_break\": \"10\"}, \"busco_mode\": {\"mode\": \"geno\", \"__current_case__\": 0, \"use_augustus\": {\"use_augustus_selector\": \"augustus\", \"__current_case__\": 2, \"aug_prediction\": {\"augustus_mode\": \"no\", \"__current_case__\": 0}, \"long\": false}}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"lineage\": {\"lineage_mode\": \"auto_detect\", \"__current_case__\": 0, \"auto_lineage\": \"--auto-lineage\"}, \"lineage_conditional\": {\"selector\": \"cached\", \"__current_case__\": 0, \"cached_db\": \"v5\"}, \"outputs\": [\"short_summary\", \"image\", \"gff\", \"missing\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "5.7.1+galaxy0", "type": "tool", "uuid": "c0e4cca7-0bc3-4ef2-81b2-c990b1b77d87", "when": null, "workflow_outputs": [ - { - "label": "summary_image_geno", - "output_name": "summary_image", - "uuid": "3232c386-3c31-4989-ac76-02722ea2d79b" - }, { "label": "busco_missing_geno", "output_name": "busco_missing", "uuid": "d039ef78-640f-4f7d-b449-69fac1a25130" }, { - "label": "busco_table_geno", - "output_name": "busco_table", - "uuid": "5cbbd77a-f521-4ee6-b990-a494b7671534" + "label": "busco_gff_geno", + "output_name": "busco_gff", + "uuid": "961890cc-7a33-422a-ab09-b787e3592dd1" }, { "label": "busco_sum_geno", @@ -174,9 +262,14 @@ "uuid": "bf09f09a-b403-4517-9a1a-acece8f36735" }, { - "label": "busco_gff_geno", - "output_name": "busco_gff", - "uuid": "961890cc-7a33-422a-ab09-b787e3592dd1" + "label": "summary_image_geno", + "output_name": "summary_image", + "uuid": "3232c386-3c31-4989-ac76-02722ea2d79b" + }, + { + "label": "busco_table_geno", + "output_name": "busco_table", + "uuid": "5cbbd77a-f521-4ee6-b990-a494b7671534" } ] }, @@ -214,8 +307,8 @@ } ], "position": { - "left": 637, - "top": 179.65065551757812 + "left": 658.9081573207637, + "top": 316.7812237670679 }, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/gffread/gffread/2.2.1.4+galaxy0", @@ -272,8 +365,8 @@ } ], "position": { - "left": 483.26666259765625, - "top": 680.9339807128906 + "left": 488.25061259116643, + "top": 991.5198240353345 }, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/jcvi_gff_stats/jcvi_gff_stats/0.8.4", @@ -289,15 +382,15 @@ "uuid": "f47f89eb-23f4-4a16-b0a8-49d8e62c9f3d", "when": null, "workflow_outputs": [ - { - "label": "graphs", - "output_name": "graphs", - "uuid": "4638cc23-fdb6-4e82-9cdf-c9fe38e76bd7" - }, { "label": "summary", "output_name": "summary", "uuid": "fb8ed4c9-4b55-4547-880d-1916a91f8a6e" + }, + { + "label": "graphs", + "output_name": "graphs", + "uuid": "4638cc23-fdb6-4e82-9cdf-c9fe38e76bd7" } ] }, @@ -331,8 +424,8 @@ } ], "position": { - "left": 1287.3333740234375, - "top": 548.4173181152344 + "left": 1065.313344724818, + "top": 719.9967480789329 }, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy1", @@ -392,8 +485,8 @@ } ], "position": { - "left": 970, - "top": 0 + "left": 1166.6977253236494, + "top": 58.61198039869754 }, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.7.1+galaxy0", @@ -403,7 +496,7 @@ "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu" }, - "tool_state": "{\"adv\": {\"evalue\": \"0.001\", \"limit\": \"3\", \"contig_break\": \"10\"}, \"busco_mode\": {\"mode\": \"prot\", \"__current_case__\": 2}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"lineage\": {\"lineage_mode\": \"auto_detect\", \"__current_case__\": 0, \"auto_lineage\": \"--auto-lineage\"}, \"lineage_conditional\": {\"selector\": \"cached\", \"__current_case__\": 0, \"cached_db\": \"all+2024-03-21-114020\"}, \"outputs\": [\"short_summary\", \"image\", \"gff\", \"missing\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_state": "{\"adv\": {\"evalue\": \"0.001\", \"limit\": \"3\", \"contig_break\": \"10\"}, \"busco_mode\": {\"mode\": \"prot\", \"__current_case__\": 2}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"lineage\": {\"lineage_mode\": \"auto_detect\", \"__current_case__\": 0, \"auto_lineage\": \"--auto-lineage\"}, \"lineage_conditional\": {\"selector\": \"cached\", \"__current_case__\": 0, \"cached_db\": \"v5\"}, \"outputs\": [\"short_summary\", \"image\", \"gff\", \"missing\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "5.7.1+galaxy0", "type": "tool", "uuid": "51dcc6a4-ff87-4a98-98fa-de00ce54325f", @@ -414,16 +507,6 @@ "output_name": "busco_gff", "uuid": "1db166fb-10c2-4823-a80c-9f22c7c15576" }, - { - "label": "busco_table_pep", - "output_name": "busco_table", - "uuid": "1a113d6c-a167-432b-8200-dfb3aedc4ba1" - }, - { - "label": "busco_missing_pep", - "output_name": "busco_missing", - "uuid": "dc2d4533-d9c2-4cb0-a144-184e90fd4e01" - }, { "label": "summary_image_pep", "output_name": "summary_image", @@ -433,6 +516,16 @@ "label": "busco_sum_pep", "output_name": "busco_sum", "uuid": "f44047d9-e713-41d9-a9f9-5543f0371d9d" + }, + { + "label": "busco_table_pep", + "output_name": "busco_table", + "uuid": "1a113d6c-a167-432b-8200-dfb3aedc4ba1" + }, + { + "label": "busco_missing_pep", + "output_name": "busco_missing", + "uuid": "dc2d4533-d9c2-4cb0-a144-184e90fd4e01" } ] }, @@ -459,11 +552,15 @@ { "name": "omark_detail_sum", "type": "txt" + }, + { + "name": "omark_sum", + "type": "sum" } ], "position": { - "left": 1008.3333740234375, - "top": 387.25066162109374 + "left": 1167.994173976809, + "top": 375.00649693590475 }, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/omark/omark/0.3.0+galaxy2", @@ -488,6 +585,6 @@ } }, "tags": [], - "uuid": "6a6553e2-9b31-414d-83b0-db4e819ff0c2", - "version": 0 + "uuid": "7a0c9f35-37a9-404e-a307-aed30a578b0c", + "version": 1 } \ No newline at end of file From 12c45378d7f15e1363b97ad82b7af8b2e3451676 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Tue, 12 Nov 2024 10:50:54 +0100 Subject: [PATCH 12/20] fix lint --- .../annotation_helixer/Galaxy-Workflow-annotation_helixer.ga | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga index c9b85fa0a..952db9c02 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga @@ -109,7 +109,7 @@ ], "format-version": "0.1", "license": "MIT", - "release": "0.1", + "release": "0.1", "name": "annotation_helixer", "report": { "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" From fa5a9ba3fe21ba615efe050965ede3b4788df9af Mon Sep 17 00:00:00 2001 From: rlibouban Date: Mon, 25 Nov 2024 14:45:36 +0100 Subject: [PATCH 13/20] update helixer workflow --- ...laxy-Workflow-annotation_helixer-tests.yml | 28 ++++++++-------- .../Galaxy-Workflow-annotation_helixer.ga | 33 ++++++++++--------- .../annotation_helixer/README.md | 4 +-- .../plnmotmptestjobu509bkbh.json | 1 - 4 files changed, 33 insertions(+), 33 deletions(-) delete mode 100644 workflows/genome_annotation/annotation_helixer/plnmotmptestjobu509bkbh.json diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml index 618f95dde..7b8b5f913 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml @@ -1,38 +1,38 @@ - doc: Test outline for Helixer Workflow job: - Input: + genome: class: File location: https://zenodo.org/records/13890774/files/genome_masked.fa?download=1 filetype: fasta outputs: - helixer_output: + helixer output: location: https://zenodo.org/records/13890774/files/Helixer.gff3?download=1 compare: sim_size delta: 300000 - busco_sum_geno: + busco sum genome: location: https://zenodo.org/records/13890774/files/Busco_short_summary_genome.txt?download=1 compare: sim_size delta: 30000 - busco_gff_geno: + busco gff genome: location: https://zenodo.org/records/13890774/files/Busco_GFF_genome.gff3?download=1 compare: sim_size delta: 30000 - summary_image_geno: + summary image genome: location: https://zenodo.org/records/13890774/files/Busco_summary_image_genome.png?download=1 compare: sim_size delta: 30000 - busco_missing_geno: + busco missing genome: location: https://zenodo.org/records/13890774/files/Busco_missing_buscos_genome.tabular?download=1 compare: sim_size delta: 30000 - busco_table_geno: + busco table genome: location: https://zenodo.org/records/13890774/files/Busco_full_table_genome.tabular?download=1 compare: sim_size delta: 30000 - gffread_pep: + gffread peptides: location: https://zenodo.org/records/13890774/files/gffread_pep.fasta?download=1 compare: sim_size delta: 30000 @@ -46,28 +46,28 @@ compare: sim_size delta: 30000 - summary_image_pep: + summary image peptides: location: https://zenodo.org/records/13902305/files/Busco_pep_summary_image.png?download=1 compare: sim_size delta: 30000 - busco_table_pep: + busco table peptides: location: https://zenodo.org/records/13890774/files/Busco_full_table_pep.tabular?download=1 compare: sim_size delta: 30000 - busco_sum_pep: + busco sum peptides: location: https://zenodo.org/records/13890774/files/Busco_short_summary_pep.txt?download=1 compare: sim_size delta: 30000 - busco_gff_pep: + busco gff peptides: location: https://zenodo.org/records/13890774/files/Busco_GFF_pep.gff3?download=1 compare: sim_size delta: 30000 - busco_missing_pep: + busco missing peptides: location: https://zenodo.org/records/13890774/files/Busco_missing_buscos_pep.tabular?download=1 compare: sim_size delta: 30000 - omark_detail_sum: + omark detail sum: location: https://zenodo.org/records/13890774/files/OMArk_Detailed_summary.txt?download=1 compare: sim_size delta: 30000 \ No newline at end of file diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga index 952db9c02..17e974dd9 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga @@ -104,13 +104,14 @@ { "class": "Person", "email": "mailto:romane.libouban@irisa.fr", + "identifier": "https://orcid.org/0009-0001-4920-9951", "name": "Romane Libouban" } ], "format-version": "0.1", "license": "MIT", "release": "0.1", - "name": "annotation_helixer", + "name": "Genome annotation with Helixer", "report": { "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" }, @@ -127,7 +128,7 @@ "name": "Input" } ], - "label": "Input", + "label": "Genome", "name": "Input dataset", "outputs": [], "position": { @@ -186,7 +187,7 @@ "when": null, "workflow_outputs": [ { - "label": "helixer_output", + "label": "helixer output", "output_name": "output", "uuid": "fe43bcd6-5f99-4fd3-b184-2d6bfb340030" } @@ -247,27 +248,27 @@ "when": null, "workflow_outputs": [ { - "label": "busco_missing_geno", + "label": "busco missing genome", "output_name": "busco_missing", "uuid": "d039ef78-640f-4f7d-b449-69fac1a25130" }, { - "label": "busco_gff_geno", + "label": "busco gff genome", "output_name": "busco_gff", "uuid": "961890cc-7a33-422a-ab09-b787e3592dd1" }, { - "label": "busco_sum_geno", + "label": "busco sum genome", "output_name": "busco_sum", "uuid": "bf09f09a-b403-4517-9a1a-acece8f36735" }, { - "label": "summary_image_geno", + "label": "summary image genome", "output_name": "summary_image", "uuid": "3232c386-3c31-4989-ac76-02722ea2d79b" }, { - "label": "busco_table_geno", + "label": "busco table genome", "output_name": "busco_table", "uuid": "5cbbd77a-f521-4ee6-b990-a494b7671534" } @@ -325,7 +326,7 @@ "when": null, "workflow_outputs": [ { - "label": "gffread_pep", + "label": "gffread peptides", "output_name": "output_pep", "uuid": "aa178118-cd37-495b-9e81-e2e53ebf27fd" } @@ -503,27 +504,27 @@ "when": null, "workflow_outputs": [ { - "label": "busco_gff_pep", + "label": "busco gff peptides", "output_name": "busco_gff", "uuid": "1db166fb-10c2-4823-a80c-9f22c7c15576" }, { - "label": "summary_image_pep", + "label": "summary image peptides", "output_name": "summary_image", "uuid": "13c6bee4-824c-4533-bc78-c99ddf0b190d" }, { - "label": "busco_sum_pep", + "label": "busco sum peptides", "output_name": "busco_sum", "uuid": "f44047d9-e713-41d9-a9f9-5543f0371d9d" }, { - "label": "busco_table_pep", + "label": "busco table peptides", "output_name": "busco_table", "uuid": "1a113d6c-a167-432b-8200-dfb3aedc4ba1" }, { - "label": "busco_missing_pep", + "label": "busco missing peptides", "output_name": "busco_missing", "uuid": "dc2d4533-d9c2-4cb0-a144-184e90fd4e01" } @@ -550,11 +551,11 @@ "name": "OMArk", "outputs": [ { - "name": "omark_detail_sum", + "name": "omark detail sum", "type": "txt" }, { - "name": "omark_sum", + "name": "omark sum", "type": "sum" } ], diff --git a/workflows/genome_annotation/annotation_helixer/README.md b/workflows/genome_annotation/annotation_helixer/README.md index c2477838b..c6a58357e 100644 --- a/workflows/genome_annotation/annotation_helixer/README.md +++ b/workflows/genome_annotation/annotation_helixer/README.md @@ -18,7 +18,8 @@ The final step is to view the generated annotation using a genome browser such a Helixer requires the genome sequence to be annotated, in fasta format. ## Output dataset for Helixer -Helixer produces a single output dataset: a GFF3 file. The GFF3 format is a standard bioinformatics format for storing genome annotations. Each row describes a genomic entity, with columns detailing its identifier, location, score and other attributes. +Helixer produces a single output dataset: a GFF3 file. + ## Input dataset for Genome Annotation Statistics This software requires a GFF3 file. In this workflow, the output generated is Helixer. @@ -36,7 +37,6 @@ In this workflow, GFFRead requires two inputs: ## Output dataset for GFFRead In this workflow, a unique output will be generated. This file, in fasta format, contains the protein sequences predicted from the annotation. - ## Input dataset for BUSCO BUSCO requires a fasta file. BUSCO will be used twice for this workflow. Firstly on the predicted protein sequences and secondly on the genome sequence. diff --git a/workflows/genome_annotation/annotation_helixer/plnmotmptestjobu509bkbh.json b/workflows/genome_annotation/annotation_helixer/plnmotmptestjobu509bkbh.json deleted file mode 100644 index 7a5d9f19c..000000000 --- a/workflows/genome_annotation/annotation_helixer/plnmotmptestjobu509bkbh.json +++ /dev/null @@ -1 +0,0 @@ -{"Input": {"class": "File", "location": "https://zenodo.org/records/13890774/files/genome_masked.fa?download=1", "filetype": "fasta"}} \ No newline at end of file From f021c32098f629447848e82598289b33be9efd46 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Mon, 25 Nov 2024 14:47:37 +0100 Subject: [PATCH 14/20] error typo --- .../Galaxy-Workflow-annotation_helixer-tests.yml | 2 +- .../annotation_helixer/Galaxy-Workflow-annotation_helixer.ga | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml index 7b8b5f913..049f4ed65 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml @@ -1,6 +1,6 @@ - doc: Test outline for Helixer Workflow job: - genome: + Genome sequences: class: File location: https://zenodo.org/records/13890774/files/genome_masked.fa?download=1 filetype: fasta diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga index 17e974dd9..e7a208f58 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga @@ -128,7 +128,7 @@ "name": "Input" } ], - "label": "Genome", + "label": "Genome sequence", "name": "Input dataset", "outputs": [], "position": { From 4409347ba13f5a76d3390933df2865d9a8c6ea34 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Mon, 25 Nov 2024 14:50:40 +0100 Subject: [PATCH 15/20] fix lint --- .../Galaxy-Workflow-annotation_helixer-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml index 049f4ed65..562bc8a67 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml @@ -1,6 +1,6 @@ - doc: Test outline for Helixer Workflow job: - Genome sequences: + Genome sequence: class: File location: https://zenodo.org/records/13890774/files/genome_masked.fa?download=1 filetype: fasta From 3afd41e6e202b56cc2304a7bebccba8107cfc511 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Mon, 25 Nov 2024 15:12:01 +0100 Subject: [PATCH 16/20] fix lint --- .../annotation_helixer/Galaxy-Workflow-annotation_helixer.ga | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga index e7a208f58..2ae6994f3 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga @@ -578,8 +578,8 @@ "when": null, "workflow_outputs": [ { - "label": "omark_detail_sum", - "output_name": "omark_detail_sum", + "label": "omark detail sum", + "output_name": "omark detail sum", "uuid": "de489b9c-8808-47d4-9384-7617c33a9d34" } ] From d52f56118c087fd9751337631769a65536543c4d Mon Sep 17 00:00:00 2001 From: rlibouban Date: Mon, 2 Dec 2024 15:22:07 +0100 Subject: [PATCH 17/20] change database for OMArk --- .../Galaxy-Workflow-annotation_helixer-tests.yml | 2 +- .../annotation_helixer/Galaxy-Workflow-annotation_helixer.ga | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml index 562bc8a67..5ba8850ec 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml @@ -68,6 +68,6 @@ delta: 30000 omark detail sum: - location: https://zenodo.org/records/13890774/files/OMArk_Detailed_summary.txt?download=1 + location: https://zenodo.org/records/14260288/files/OMArk_Detailed_summary_LUCA.txt?download=1 compare: sim_size delta: 30000 \ No newline at end of file diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga index 2ae6994f3..64d779160 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga @@ -571,7 +571,7 @@ "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu" }, - "tool_state": "{\"database\": \"Primates-v2.0.0.h5\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"input_iso\": {\"__class__\": \"RuntimeValue\"}, \"omark_mode\": false, \"outputs\": \"detail_sum\", \"r\": null, \"t\": null, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_state": "{\"database\": \"LUCA-v2.0.0.h5\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"input_iso\": {\"__class__\": \"RuntimeValue\"}, \"omark_mode\": false, \"outputs\": \"detail_sum\", \"r\": null, \"t\": null, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "0.3.0+galaxy2", "type": "tool", "uuid": "75e1dde7-5d60-4092-af57-cd7b065145e2", From 2b04d7779f910f9d0806ae5a853c93e95e2b8d48 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Tue, 3 Dec 2024 17:09:28 +0100 Subject: [PATCH 18/20] small modification --- .../Galaxy-Workflow-annotation_helixer.ga | 73 ++++++++++++------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga index 64d779160..5fc774a40 100644 --- a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga +++ b/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga @@ -204,7 +204,12 @@ "output_name": "output" } }, - "inputs": [], + "inputs": [ + { + "description": "runtime parameter for tool Busco", + "name": "input" + } + ], "label": "Busco on genome", "name": "Busco", "outputs": [ @@ -241,17 +246,12 @@ "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu" }, - "tool_state": "{\"adv\": {\"evalue\": \"0.001\", \"limit\": \"3\", \"contig_break\": \"10\"}, \"busco_mode\": {\"mode\": \"geno\", \"__current_case__\": 0, \"use_augustus\": {\"use_augustus_selector\": \"augustus\", \"__current_case__\": 2, \"aug_prediction\": {\"augustus_mode\": \"no\", \"__current_case__\": 0}, \"long\": false}}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"lineage\": {\"lineage_mode\": \"auto_detect\", \"__current_case__\": 0, \"auto_lineage\": \"--auto-lineage\"}, \"lineage_conditional\": {\"selector\": \"cached\", \"__current_case__\": 0, \"cached_db\": \"v5\"}, \"outputs\": [\"short_summary\", \"image\", \"gff\", \"missing\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_state": "{\"adv\": {\"evalue\": \"0.001\", \"limit\": \"3\", \"contig_break\": \"10\"}, \"busco_mode\": {\"mode\": \"geno\", \"__current_case__\": 0, \"use_augustus\": {\"use_augustus_selector\": \"augustus\", \"__current_case__\": 2, \"aug_prediction\": {\"augustus_mode\": \"no\", \"__current_case__\": 0}, \"long\": false}}, \"input\": {\"__class__\": \"RuntimeValue\"}, \"lineage\": {\"lineage_mode\": \"select_lineage\", \"__current_case__\": 1, \"lineage_dataset\": \"mucorales_odb10\"}, \"lineage_conditional\": {\"selector\": \"cached\", \"__current_case__\": 0, \"cached_db\": \"all+2024-03-21-114020\"}, \"outputs\": [\"short_summary\", \"image\", \"gff\", \"missing\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "5.7.1+galaxy0", "type": "tool", "uuid": "c0e4cca7-0bc3-4ef2-81b2-c990b1b77d87", "when": null, "workflow_outputs": [ - { - "label": "busco missing genome", - "output_name": "busco_missing", - "uuid": "d039ef78-640f-4f7d-b449-69fac1a25130" - }, { "label": "busco gff genome", "output_name": "busco_gff", @@ -271,6 +271,11 @@ "label": "busco table genome", "output_name": "busco_table", "uuid": "5cbbd77a-f521-4ee6-b990-a494b7671534" + }, + { + "label": "busco missing genome", + "output_name": "busco_missing", + "uuid": "d039ef78-640f-4f7d-b449-69fac1a25130" } ] }, @@ -460,7 +465,12 @@ "output_name": "output_pep" } }, - "inputs": [], + "inputs": [ + { + "description": "runtime parameter for tool Busco", + "name": "input" + } + ], "label": "Busco on protein", "name": "Busco", "outputs": [ @@ -497,12 +507,22 @@ "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu" }, - "tool_state": "{\"adv\": {\"evalue\": \"0.001\", \"limit\": \"3\", \"contig_break\": \"10\"}, \"busco_mode\": {\"mode\": \"prot\", \"__current_case__\": 2}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"lineage\": {\"lineage_mode\": \"auto_detect\", \"__current_case__\": 0, \"auto_lineage\": \"--auto-lineage\"}, \"lineage_conditional\": {\"selector\": \"cached\", \"__current_case__\": 0, \"cached_db\": \"v5\"}, \"outputs\": [\"short_summary\", \"image\", \"gff\", \"missing\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_state": "{\"adv\": {\"evalue\": \"0.001\", \"limit\": \"3\", \"contig_break\": \"10\"}, \"busco_mode\": {\"mode\": \"prot\", \"__current_case__\": 2}, \"input\": {\"__class__\": \"RuntimeValue\"}, \"lineage\": {\"lineage_mode\": \"select_lineage\", \"__current_case__\": 1, \"lineage_dataset\": \"mucorales_odb10\"}, \"lineage_conditional\": {\"selector\": \"cached\", \"__current_case__\": 0, \"cached_db\": \"all+2024-03-21-114020\"}, \"outputs\": [\"short_summary\", \"image\", \"gff\", \"missing\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "5.7.1+galaxy0", "type": "tool", "uuid": "51dcc6a4-ff87-4a98-98fa-de00ce54325f", "when": null, "workflow_outputs": [ + { + "label": "busco table peptides", + "output_name": "busco_table", + "uuid": "1a113d6c-a167-432b-8200-dfb3aedc4ba1" + }, + { + "label": "busco missing peptides", + "output_name": "busco_missing", + "uuid": "dc2d4533-d9c2-4cb0-a144-184e90fd4e01" + }, { "label": "busco gff peptides", "output_name": "busco_gff", @@ -517,16 +537,6 @@ "label": "busco sum peptides", "output_name": "busco_sum", "uuid": "f44047d9-e713-41d9-a9f9-5543f0371d9d" - }, - { - "label": "busco table peptides", - "output_name": "busco_table", - "uuid": "1a113d6c-a167-432b-8200-dfb3aedc4ba1" - }, - { - "label": "busco missing peptides", - "output_name": "busco_missing", - "uuid": "dc2d4533-d9c2-4cb0-a144-184e90fd4e01" } ] }, @@ -542,6 +552,10 @@ } }, "inputs": [ + { + "description": "runtime parameter for tool OMArk", + "name": "input" + }, { "description": "runtime parameter for tool OMArk", "name": "input_iso" @@ -551,17 +565,17 @@ "name": "OMArk", "outputs": [ { - "name": "omark detail sum", + "name": "omark_detail_sum", "type": "txt" }, { - "name": "omark sum", + "name": "omark_sum", "type": "sum" } ], "position": { - "left": 1167.994173976809, - "top": 375.00649693590475 + "left": 1339.8464912941502, + "top": 460.49653123113796 }, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/omark/omark/0.3.0+galaxy2", @@ -571,21 +585,26 @@ "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu" }, - "tool_state": "{\"database\": \"LUCA-v2.0.0.h5\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"input_iso\": {\"__class__\": \"RuntimeValue\"}, \"omark_mode\": false, \"outputs\": \"detail_sum\", \"r\": null, \"t\": null, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_state": "{\"database\": \"LUCA-v2.0.0.h5\", \"input\": {\"__class__\": \"RuntimeValue\"}, \"input_iso\": {\"__class__\": \"RuntimeValue\"}, \"omark_mode\": false, \"outputs\": \"detail_sum\", \"r\": null, \"t\": null, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "0.3.0+galaxy2", "type": "tool", "uuid": "75e1dde7-5d60-4092-af57-cd7b065145e2", "when": null, "workflow_outputs": [ + { + "label": "omark sum", + "output_name": "omark_sum", + "uuid": "d1eb0367-c708-4794-a256-81dac775776a" + }, { "label": "omark detail sum", - "output_name": "omark detail sum", - "uuid": "de489b9c-8808-47d4-9384-7617c33a9d34" + "output_name": "omark_detail_sum", + "uuid": "90f0b094-b667-4e05-a1e4-e6388770a107" } ] } }, "tags": [], - "uuid": "7a0c9f35-37a9-404e-a307-aed30a578b0c", + "uuid": "24ab5ff5-6b5b-4caf-8cf0-c2f2ccc139e8", "version": 1 } \ No newline at end of file From 58acaec9fcb3f6ad714c3009de512683ad8eea3a Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 3 Dec 2024 17:26:52 +0100 Subject: [PATCH 19/20] Rename workflow directory and add orcid to .dockstore.yml --- .../{annotation_helixer => annotation-helixer}/.dockstore.yml | 3 ++- .../{annotation_helixer => annotation-helixer}/CHANGELOG.md | 0 .../Galaxy-Workflow-annotation_helixer-tests.yml | 0 .../Galaxy-Workflow-annotation_helixer.ga | 0 .../{annotation_helixer => annotation-helixer}/README.md | 0 5 files changed, 2 insertions(+), 1 deletion(-) rename workflows/genome_annotation/{annotation_helixer => annotation-helixer}/.dockstore.yml (78%) rename workflows/genome_annotation/{annotation_helixer => annotation-helixer}/CHANGELOG.md (100%) rename workflows/genome_annotation/{annotation_helixer => annotation-helixer}/Galaxy-Workflow-annotation_helixer-tests.yml (100%) rename workflows/genome_annotation/{annotation_helixer => annotation-helixer}/Galaxy-Workflow-annotation_helixer.ga (100%) rename workflows/genome_annotation/{annotation_helixer => annotation-helixer}/README.md (100%) diff --git a/workflows/genome_annotation/annotation_helixer/.dockstore.yml b/workflows/genome_annotation/annotation-helixer/.dockstore.yml similarity index 78% rename from workflows/genome_annotation/annotation_helixer/.dockstore.yml rename to workflows/genome_annotation/annotation-helixer/.dockstore.yml index 1b44312f8..477497d68 100644 --- a/workflows/genome_annotation/annotation_helixer/.dockstore.yml +++ b/workflows/genome_annotation/annotation-helixer/.dockstore.yml @@ -8,4 +8,5 @@ workflows: - /Galaxy-Workflow-annotation_helixer-tests.yml authors: - name: Romane Libouban - email: romane.libouban@irisa.fr \ No newline at end of file + email: romane.libouban@irisa.fr + orcid: 0009-0001-4920-9951 diff --git a/workflows/genome_annotation/annotation_helixer/CHANGELOG.md b/workflows/genome_annotation/annotation-helixer/CHANGELOG.md similarity index 100% rename from workflows/genome_annotation/annotation_helixer/CHANGELOG.md rename to workflows/genome_annotation/annotation-helixer/CHANGELOG.md diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml b/workflows/genome_annotation/annotation-helixer/Galaxy-Workflow-annotation_helixer-tests.yml similarity index 100% rename from workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer-tests.yml rename to workflows/genome_annotation/annotation-helixer/Galaxy-Workflow-annotation_helixer-tests.yml diff --git a/workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga b/workflows/genome_annotation/annotation-helixer/Galaxy-Workflow-annotation_helixer.ga similarity index 100% rename from workflows/genome_annotation/annotation_helixer/Galaxy-Workflow-annotation_helixer.ga rename to workflows/genome_annotation/annotation-helixer/Galaxy-Workflow-annotation_helixer.ga diff --git a/workflows/genome_annotation/annotation_helixer/README.md b/workflows/genome_annotation/annotation-helixer/README.md similarity index 100% rename from workflows/genome_annotation/annotation_helixer/README.md rename to workflows/genome_annotation/annotation-helixer/README.md From 10a453ad1ea142bae5c86f1de21bd5784aba9fa2 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Thu, 12 Dec 2024 10:30:03 +0100 Subject: [PATCH 20/20] fix tests --- .../Galaxy-Workflow-annotation_helixer-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/genome_annotation/annotation-helixer/Galaxy-Workflow-annotation_helixer-tests.yml b/workflows/genome_annotation/annotation-helixer/Galaxy-Workflow-annotation_helixer-tests.yml index 5ba8850ec..55091166c 100644 --- a/workflows/genome_annotation/annotation-helixer/Galaxy-Workflow-annotation_helixer-tests.yml +++ b/workflows/genome_annotation/annotation-helixer/Galaxy-Workflow-annotation_helixer-tests.yml @@ -30,7 +30,7 @@ busco table genome: location: https://zenodo.org/records/13890774/files/Busco_full_table_genome.tabular?download=1 compare: sim_size - delta: 30000 + delta: 500000 gffread peptides: location: https://zenodo.org/records/13890774/files/gffread_pep.fasta?download=1 @@ -53,7 +53,7 @@ busco table peptides: location: https://zenodo.org/records/13890774/files/Busco_full_table_pep.tabular?download=1 compare: sim_size - delta: 30000 + delta: 500000 busco sum peptides: location: https://zenodo.org/records/13890774/files/Busco_short_summary_pep.txt?download=1 compare: sim_size