From 94c67d80f2823f81e6627d0a46cf323ddb7ff838 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Mon, 3 Jul 2023 11:51:46 -0600 Subject: [PATCH 01/17] removed v before version bump --- .github/workflows/create_release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index e3c7380..b91003f 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -36,6 +36,6 @@ jobs: run: zip -r bundle.zip *.wdl - name: Create Release - run: gh release create v${{ steps.bump_version.outputs.NEW_VERSION }} annotation_full.wdl bundle.zip + run: gh release create ${{ steps.bump_version.outputs.NEW_VERSION }} annotation_full.wdl bundle.zip env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file From 7c904fd31946b1dc29db360a20f207c6681de1ee Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Mon, 3 Jul 2023 15:09:12 -0600 Subject: [PATCH 02/17] changed source to scaffold-lineage for img-annotation-pipeline --- Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index ffd1704..4d748a0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -89,9 +89,7 @@ FROM buildbase as img RUN \ cd /opt && \ - git clone https://code.jgi.doe.gov/img/img-pipelines/img-annotation-pipeline && \ - cd img-annotation-pipeline && \ - git reset --hard e6fe2f19f691180be0165cfef453d76e17d1f57c + git clone -b scaffold-lineage https://code.jgi.doe.gov/img/img-pipelines/img-annotation-pipeline RUN \ cd /opt && \ From 7a26f85abce2588cfe7d60eb96b29dc0701b67ed Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Mon, 3 Jul 2023 15:21:46 -0600 Subject: [PATCH 03/17] added create_scaffold_lineage to ko_ec and updated DB source --- functional-annotation.wdl | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/functional-annotation.wdl b/functional-annotation.wdl index c4c95ec..6daceac 100755 --- a/functional-annotation.wdl +++ b/functional-annotation.wdl @@ -3,12 +3,12 @@ workflow f_annotate { String imgap_project_type Int additional_threads File? input_contigs_fasta - File input_fasta + File input_fasta 20230629 String database_location Boolean ko_ec_execute=true - String ko_ec_img_nr_db="${database_location}"+"/IMG-NR/20211118/img_nr" - String ko_ec_md5_mapping="${database_location}"+"/IMG-NR/20211118/md5Hash2Data.txt" - String ko_ec_taxon_to_phylo_mapping="${database_location}"+"/IMG-NR/20211118/taxonOid2Taxonomy.txt" + String ko_ec_img_nr_db="${database_location}"+"/IMG-NR/20230629/img_nr" + String ko_ec_md5_mapping="${database_location}"+"/IMG-NR/20230629/md5Hash2Data.txt" + String ko_ec_taxon_to_phylo_mapping="${database_location}"+"/IMG-NR/20230629/taxonOid2Taxonomy.txt" String lastal_bin="/opt/omics/bin/lastal" String selector_bin="/opt/omics/bin/functional_annotation/lastal_img_nr_ko_ec_gene_phylo_hit_selector.py" Boolean smart_execute=true @@ -171,6 +171,7 @@ workflow f_annotate { File? phylo_tsv = ko_ec.phylo_tsv File? ko_ec_gff = ko_ec.gff File? last_blasttab = ko_ec.last_blasttab + File? lineage_sdb = ko_ec.lineage_sdb File? cog_gff = cog.gff File? pfam_gff = pfam.gff File? tigrfam_gff = tigrfam.gff @@ -226,7 +227,8 @@ task ko_ec { ${project_type} ${md5} ${phylo} \ ${project_id}_ko.tsv ${project_id}_ec.tsv \ ${project_id}_gene_phylogeny.tsv ${project_id}_ko_ec.gff \ - ${project_id}_proteins.img_nr.last.blasttab + ${project_id}_proteins.img_nr.last.blasttab && \ + python /opt/omics/bin/functional_annotation/create_scaffold_lineage.py ${project_id}_gene_phylogeny.tsv ${project_id}_lineage_sdb.tsv #get version information lastal_version="`${lastal} -V`" @@ -247,6 +249,7 @@ task ko_ec { File ec_tsv = "${project_id}_ec.tsv" File phylo_tsv = "${project_id}_gene_phylogeny.tsv" File gff = "${project_id}_ko_ec.gff" + File lineage_sdb = "${project_id}_lineage_sdb.tsv" String lastal_ver = read_string(lastal_version_file) String img_nr_db_ver = read_string(img_nr_db_version_file) } @@ -300,7 +303,6 @@ task smart { } task cog { - String project_id File input_fasta String cog_db @@ -628,3 +630,4 @@ task product_name { File tsv = "${project_id}_product_names.tsv" } } + From 340d80a0fcf2c1bb161d9377fb036856129a5f38 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Mon, 3 Jul 2023 15:22:01 -0600 Subject: [PATCH 04/17] added lineage sdb --- annotation_full.wdl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/annotation_full.wdl b/annotation_full.wdl index 368d60f..73b54f3 100644 --- a/annotation_full.wdl +++ b/annotation_full.wdl @@ -72,6 +72,7 @@ workflow annotation { ec_tsvs = f_annotate.ec_tsv, phylo_tsvs = f_annotate.phylo_tsv, last_blasttabs = f_annotate.last_blasttab, + lineage_sdb = f_annotate.lineage_sdb, proteins = s_annotate.proteins, genes = s_annotate.genes, ko_ec_gffs = f_annotate.ko_ec_gff, @@ -291,6 +292,7 @@ task merge_outputs { Array[File?] ec_tsvs Array[File?] phylo_tsvs Array[File?] last_blasttabs + Array[File?] lineage_sdb Array[File?] proteins Array[File?] genes Array[File?] ko_ec_gffs @@ -336,6 +338,7 @@ task merge_outputs { cat ${sep=" " ec_tsvs} > "${project_id}_ec.tsv" cat ${sep=" " phylo_tsvs} > "${project_id}_gene_phylogeny.tsv" cat ${sep=" " last_blasttabs} > "${project_id}_proteins.img_nr.last.blasttab" + cat ${sep=" " lineage_sdb} > "${project_id}.contigLin.assembled.sdb" cat ${sep=" " proteins} > "${project_id}_proteins.faa" cat ${sep=" " genes} > "${project_id}_genes.fna" cat ${sep=" " ko_ec_gffs} > "${project_id}_ko_ec.gff" @@ -378,6 +381,7 @@ task merge_outputs { File ec_tsv = "${project_id}_ec.tsv" File gene_phylogeny_tsv = "${project_id}_gene_phylogeny.tsv" File last_blasttab = "${project_id}_proteins.img_nr.last.blasttab" + File lineage_sdb = "${project_id}.contigLin.assembled.sdb" File proteins_faa = "${project_id}_proteins.faa" File genes_fna = "${project_id}_genes.fna" File ko_ec_gff = "${project_id}_ko_ec.gff" @@ -569,6 +573,7 @@ task finish_ano { File smart_gff File supfam_gff File gene_phylogeny_tsv + File lineage_sdb File cath_funfam_gff File crt_gff File genemark_gff @@ -607,6 +612,7 @@ task finish_ano { cat ${rfam_gff} | sed ${sed} > ${prefix}_rfam.gff cat ${crt_crisprs} | sed ${sed} > ${prefix}_crt.crisprs cat ${gene_phylogeny_tsv} | sed ${sed} > ${prefix}_gene_phylogeny.tsv + cat ${lineage_sdb} | sed ${sed} > ${prefix}.contigLin.assembled.sdb cat ${product_names_tsv} | sed ${sed} > ${prefix}_product_names.tsv cat ${ko_ec_gff} | sed ${sed} > ${prefix}_ko_ec.gff cat ${stats_tsv} | sed ${sed} > ${prefix}_stats.tsv @@ -647,7 +653,8 @@ task finish_ano { ${prefix}_product_names.tsv "Product names file" "Product Names" "Product names for ${proj}" \ ${prefix}_gene_phylogeny.tsv "Gene Phylogeny file" "Gene Phylogeny" "Gene Phylogeny for ${proj}"\ ${prefix}_crt.crisprs "Crispr Terms" "Crispr Terms" "Crispr Terms for ${proj}" \ - ${prefix}_stats.tsv "Annotation statistics report" "Annotation Statistics" "Annotation Stats for ${proj}" + ${prefix}_stats.tsv "Annotation statistics report" "Annotation Statistics" "Annotation Stats for ${proj}" \ + ${prefix}.contigLin.assembled.sdb "Phylogeny at the contig level" "Lineage sdb" "Lineage sdb for ${proj}" } @@ -678,6 +685,7 @@ task finish_ano { # File final_proteins_supfam_domtblout = "${prefix}_proteins.supfam.domtblout" # File final_proteins_cath_funfam_domtblout = "${prefix}_proteins.cath_funfam.domtblout" File final_product_names_tsv = "${prefix}_product_names.tsv" + File final_lineage_sdb = "${prefix}.contigLin.assembled.sdb" File final_crt_crisprs = "${prefix}_crt.crisprs" File final_tsv = "${prefix}_stats.tsv" From 7405a95f048bf158d86c241fcc8f0ca885ec6002 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Wed, 5 Jul 2023 09:09:37 -0600 Subject: [PATCH 05/17] updated last version to 1458 --- Dockerfile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4d748a0..fbc63d3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -55,16 +55,14 @@ RUN \ gcc -std=gnu99 -O3 -fomit-frame-pointer -fstrict-aliasing -march=core2 -fopenmp -fPIC -msse2 -DHAVE_CONFIG_H -L../easel -L./impl_sse -L../libdivsufsort -L. -o hpc_hmmsearch hpc_hmmsearch.o -lhmmer -leasel -ldivsufsort -lm && \ cp hpc_hmmsearch /opt/omics/programs/hmmer/bin/ && \ /opt/omics/programs/hmmer/bin/hpc_hmmsearch -h -# Build last 1256 +# Build last 1458 # FROM buildbase as last RUN apt-get -y install g++ RUN \ - git clone --depth 1 --branch 1256 https://gitlab.com/mcfrith/last - -RUN \ + git clone --depth 1 --branch 1458 https://gitlab.com/mcfrith/last && \ cd last && \ make && \ make prefix=/opt/omics/programs/last install From d433a442bf80ff34fd7809168cce187e01192b4e Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Thu, 6 Jul 2023 07:41:42 -0600 Subject: [PATCH 06/17] use version 1456 of lastal --- Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index fbc63d3..433ee63 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM debian as buildbase +FROM debian:bullseye as buildbase RUN apt-get -y update && apt-get -y install git gcc make wget time autoconf unzip curl @@ -55,14 +55,14 @@ RUN \ gcc -std=gnu99 -O3 -fomit-frame-pointer -fstrict-aliasing -march=core2 -fopenmp -fPIC -msse2 -DHAVE_CONFIG_H -L../easel -L./impl_sse -L../libdivsufsort -L. -o hpc_hmmsearch hpc_hmmsearch.o -lhmmer -leasel -ldivsufsort -lm && \ cp hpc_hmmsearch /opt/omics/programs/hmmer/bin/ && \ /opt/omics/programs/hmmer/bin/hpc_hmmsearch -h -# Build last 1458 +# Build last 1456 # FROM buildbase as last RUN apt-get -y install g++ RUN \ - git clone --depth 1 --branch 1458 https://gitlab.com/mcfrith/last && \ + git clone --depth 1 --branch 1456 https://gitlab.com/mcfrith/last && \ cd last && \ make && \ make prefix=/opt/omics/programs/last install @@ -104,7 +104,7 @@ RUN \ #chmod -R 755 omics && \ rm gms2_linux_64.v1.14_1.25_lic.tar.gz -RUN apt-get install -y openjdk-11-jdk +RUN apt-get update && apt-get install -y openjdk-11-jdk # get CRT version 1.8.4 RUN \ wget https://code.jgi.doe.gov/img/img-pipelines/crt-cli-imgap-version/-/archive/main/crt-cli-imgap-version-main.zip && \ From cbd4af17110c29aae32993981498bde6ee746551 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Thu, 6 Jul 2023 07:42:25 -0600 Subject: [PATCH 07/17] changed container to use 5.1.14.2 version of the image --- annotation_full.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/annotation_full.wdl b/annotation_full.wdl index 73b54f3..7ed9f31 100644 --- a/annotation_full.wdl +++ b/annotation_full.wdl @@ -13,8 +13,7 @@ workflow annotation { String imgap_project_type="metagenome" String? gm_license="/refdata/licenses/.gmhmmp2_key" Int additional_threads=16 - # 5.1.14.1` -> sha256:e3e3fff75aeb3a6e321054d4bc9d8c8c925dcfb9245d60247ab29c3b24c4bc75 - String container="microbiomedata/img-omics@sha256:5c7f95bbffb53e6b7ba6899705fd83ad3c8bb88046c476952a7b9ca53a93888f" + String container="microbiomedata/img-omics@sha256:d5f4306bf36a97d55a3710280b940b89d7d4aca76a343e75b0e250734bc82b71" # structural annotation Boolean sa_execute=true From da16846e1d01aa7bee93cc863e47e69e112fbb4d Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Thu, 6 Jul 2023 09:08:31 -0600 Subject: [PATCH 08/17] fixed variable reference for input_fasta --- functional-annotation.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/functional-annotation.wdl b/functional-annotation.wdl index 6daceac..434e82a 100755 --- a/functional-annotation.wdl +++ b/functional-annotation.wdl @@ -3,7 +3,7 @@ workflow f_annotate { String imgap_project_type Int additional_threads File? input_contigs_fasta - File input_fasta 20230629 + File input_fasta String database_location Boolean ko_ec_execute=true String ko_ec_img_nr_db="${database_location}"+"/IMG-NR/20230629/img_nr" From 0671fe1d78977ef2f2c74088326d3712ecca404b Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Thu, 6 Jul 2023 09:09:15 -0600 Subject: [PATCH 09/17] removed unused variables, fixed lineage_sdb reference --- annotation_full.wdl | 59 +++------------------------------------------ 1 file changed, 4 insertions(+), 55 deletions(-) diff --git a/annotation_full.wdl b/annotation_full.wdl index 7ed9f31..4568206 100644 --- a/annotation_full.wdl +++ b/annotation_full.wdl @@ -3,10 +3,6 @@ import "./functional-annotation.wdl" as fa workflow annotation { String proj - String resource - String informed_by - String? git_url="https://github.com/microbiomedata/mg_annotation/releases/tag/0.1" - String? url_root="https://data.microbiomedata.org/data/" String input_file String imgap_project_id String database_location="/refdata/img/" @@ -71,7 +67,7 @@ workflow annotation { ec_tsvs = f_annotate.ec_tsv, phylo_tsvs = f_annotate.phylo_tsv, last_blasttabs = f_annotate.last_blasttab, - lineage_sdb = f_annotate.lineage_sdb, + lineage_sdbs = f_annotate.lineage_sdb, proteins = s_annotate.proteins, genes = s_annotate.genes, ko_ec_gffs = f_annotate.ko_ec_gff, @@ -144,15 +140,12 @@ workflow annotation { input_file=stage.imgap_input_fasta, proj=proj, start=stage.start, - resource=resource, - url_root=url_root, - git_url=git_url, - informed_by=informed_by, proteins_faa = merge_outputs.proteins_faa, structural_gff = merge_outputs.structural_gff, ko_ec_gff = merge_outputs.ko_ec_gff, gene_phylogeny_tsv = merge_outputs.gene_phylogeny_tsv, functional_gff = merge_outputs.functional_gff, + lineage_sdb = merge_outputs.lineage_sdb, ko_tsv = merge_outputs.ko_tsv, ec_tsv = merge_outputs.ec_tsv, stats_tsv = final_stats.tsv, @@ -201,7 +194,6 @@ workflow annotation { # File? proteins_cath_funfam_domtblout = finish_ano.final_proteins_cath_funfam_domtblout File? product_names_tsv = finish_ano.final_product_names_tsv File? crt_crisprs = finish_ano.final_crt_crisprs - File? ano_objects = finish_ano.objects File imgap_version = make_info_file.imgap_info } @@ -291,7 +283,7 @@ task merge_outputs { Array[File?] ec_tsvs Array[File?] phylo_tsvs Array[File?] last_blasttabs - Array[File?] lineage_sdb + Array[File?] lineage_sdbs Array[File?] proteins Array[File?] genes Array[File?] ko_ec_gffs @@ -337,7 +329,7 @@ task merge_outputs { cat ${sep=" " ec_tsvs} > "${project_id}_ec.tsv" cat ${sep=" " phylo_tsvs} > "${project_id}_gene_phylogeny.tsv" cat ${sep=" " last_blasttabs} > "${project_id}_proteins.img_nr.last.blasttab" - cat ${sep=" " lineage_sdb} > "${project_id}.contigLin.assembled.sdb" + cat ${sep=" " lineage_sdbs} > "${project_id}.contigLin.assembled.sdb" cat ${sep=" " proteins} > "${project_id}_proteins.faa" cat ${sep=" " genes} > "${project_id}_genes.fna" cat ${sep=" " ko_ec_gffs} > "${project_id}_ko_ec.gff" @@ -556,10 +548,6 @@ task finish_ano { String proj String prefix=sub(proj, ":", "_") String start - String informed_by - String resource - String url_root - String git_url File input_file File proteins_faa File structural_gff @@ -616,49 +604,10 @@ task finish_ano { cat ${ko_ec_gff} | sed ${sed} > ${prefix}_ko_ec.gff cat ${stats_tsv} | sed ${sed} > ${prefix}_stats.tsv cat ${stats_json} | sed ${sed} > ${prefix}_stats.json - nmdc gff2json ${prefix}_functional_annotation.gff -of features.json -oa annotations.json -ai ${informed_by} - - /scripts/generate_object_json.py \ - --type "nmdc:MetagenomeAnnotationActivity" \ - --set metagenome_annotation_activity_set \ - --part ${proj} \ - -p "name=Annotation Activity for ${proj}" \ - was_informed_by=${informed_by} \ - started_at_time=${start} \ - ended_at_time=$end \ - execution_resource="${resource}" \ - git_url=${git_url} \ - version="v1.0.1-beta" \ - --url ${url_root}${proj}/annotation/ \ - --inputs ${input_file} \ - --outputs \ - ${prefix}_proteins.faa "FASTA amino acid file for annotated proteins" "Annotation Amino Acid FASTA" "FASTA Amino Acid File for ${proj}" \ - ${prefix}_structural_annotation.gff "GFF3 format file with structural annotations" "Structural Annotation GFF" "Structural Annotation for ${proj}" \ - ${prefix}_functional_annotation.gff "GFF3 format file with functional annotations" "Functional Annotation GFF" "Functional Annotation for ${proj}" \ - ${prefix}_ko.tsv "Tab delimited file for KO annotation" "Annotation KEGG Orthology" "KEGG Orthology for ${proj}" \ - ${prefix}_ec.tsv "Tab delimited file for EC annotation" "Annotation Enzyme Commission" "EC Annotations for ${proj}" \ - ${prefix}_cog.gff "GFF3 format file with COGs" "Clusters of Orthologous Groups (COG) Annotation GFF" "COGs for ${proj}" \ - ${prefix}_pfam.gff "GFF3 format file with Pfam" "Pfam Annotation GFF" "Pfam Annotation for ${proj}" \ - ${prefix}_tigrfam.gff "GFF3 format file with TIGRfam" "TIGRFam Annotation GFF" "TIGRFam for ${proj}" \ - ${prefix}_smart.gff "GFF3 format file with SMART" "SMART Annotation GFF" "SMART Annotations for ${proj}" \ - ${prefix}_supfam.gff "GFF3 format file with SUPERFam" "SUPERFam Annotation GFF" "SUPERFam Annotations for ${proj}" \ - ${prefix}_cath_funfam.gff "GFF3 format file with CATH FunFams" "CATH FunFams (Functional Families) Annotation GFF" "CATH FunFams for ${proj}" \ - ${prefix}_crt.gff "GFF3 format file with CRT" "CRT Annotation GFF" "CRT Annotations for ${proj}" \ - ${prefix}_genemark.gff "GFF3 format file with Genemark" "Genemark Annotation GFF" "Genemark Annotations for ${proj}" \ - ${prefix}_prodigal.gff "GFF3 format file with Prodigal" "Prodigal Annotation GFF" "Prodigal Annotations ${proj}" \ - ${prefix}_trna.gff "GFF3 format file with TRNA" "TRNA Annotation GFF" "TRNA Annotations ${proj}" \ - ${prefix}_rfam.gff "GFF3 format file with RFAM" "RFAM Annotation GFF" "RFAM Annotations for ${proj}" \ - ${prefix}_ko_ec.gff "GFF3 format file with KO_EC" "KO_EC Annotation GFF" "KO_EC Annotations for ${proj}" \ - ${prefix}_product_names.tsv "Product names file" "Product Names" "Product names for ${proj}" \ - ${prefix}_gene_phylogeny.tsv "Gene Phylogeny file" "Gene Phylogeny" "Gene Phylogeny for ${proj}"\ - ${prefix}_crt.crisprs "Crispr Terms" "Crispr Terms" "Crispr Terms for ${proj}" \ - ${prefix}_stats.tsv "Annotation statistics report" "Annotation Statistics" "Annotation Stats for ${proj}" \ - ${prefix}.contigLin.assembled.sdb "Phylogeny at the contig level" "Lineage sdb" "Lineage sdb for ${proj}" } output { - File objects = "objects.json" File final_functional_gff = "${prefix}_functional_annotation.gff" File final_structural_gff = "${prefix}_structural_annotation.gff" File final_ko_tsv = "${prefix}_ko.tsv" From 06453e993a6d1bde0f58a5ba2ff509d5e8945891 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Thu, 6 Jul 2023 11:58:16 -0600 Subject: [PATCH 10/17] update docker image to 5.2.0 --- functional-annotation.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/functional-annotation.wdl b/functional-annotation.wdl index 434e82a..72f7237 100755 --- a/functional-annotation.wdl +++ b/functional-annotation.wdl @@ -41,8 +41,8 @@ workflow f_annotate { String product_assign_bin="/opt/omics/bin/functional_annotation/assign_product_names_and_create_fa_gff.py" String product_names_mapping_dir="${database_location}"+"/Product_Name_Mappings/latest" String container - String hmm_container="microbiomedata/img-omics@sha256:e3e3fff75aeb3a6e321054d4bc9d8c8c925dcfb9245d60247ab29c3b24c4bc75" - String last_container="microbiomedata/img-omics@sha256:e3e3fff75aeb3a6e321054d4bc9d8c8c925dcfb9245d60247ab29c3b24c4bc75" + String hmm_container="microbiomedata/img-omics@sha256:d5f4306bf36a97d55a3710280b940b89d7d4aca76a343e75b0e250734bc82b71" + String last_container="microbiomedata/img-omics@sha256:d5f4306bf36a97d55a3710280b940b89d7d4aca76a343e75b0e250734bc82b71" if(ko_ec_execute) { call ko_ec { From 469dc80593d72b1d144cef16334b0a480c53c4af Mon Sep 17 00:00:00 2001 From: aclum Date: Fri, 7 Jul 2023 10:17:16 -0700 Subject: [PATCH 11/17] Update trnascan.wdl Update transcan.wdl to microbiomedata/img-omics@sha256:d5f4306bf36a97d55a3710280b940b89d7d4aca76a343e75b0e250734bc82b71 --- trnascan.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trnascan.wdl b/trnascan.wdl index c4d86ad..1f18915 100755 --- a/trnascan.wdl +++ b/trnascan.wdl @@ -3,7 +3,7 @@ workflow trnascan { String imgap_project_id String imgap_project_type Int additional_threads - String container = "microbiomedata/img-omics@sha256:e3e3fff75aeb3a6e321054d4bc9d8c8c925dcfb9245d60247ab29c3b24c4bc75" + String container = "microbiomedata/img-omics@sha256:d5f4306bf36a97d55a3710280b940b89d7d4aca76a343e75b0e250734bc82b71" call trnascan_ba { input: From 8f2f6f15f42ee98bc76e3acf49688f96f3bf94de Mon Sep 17 00:00:00 2001 From: aclum Date: Fri, 7 Jul 2023 10:18:01 -0700 Subject: [PATCH 12/17] Update annotation_mt_full.wdl update annotation_mt_full.wdl container to microbiomedata/img-omics@sha256:d5f4306bf36a97d55a3710280b940b89d7d4aca76a343e75b0e250734bc82b71 --- annotation_mt_full.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/annotation_mt_full.wdl b/annotation_mt_full.wdl index 785d58c..6e66297 100755 --- a/annotation_mt_full.wdl +++ b/annotation_mt_full.wdl @@ -7,7 +7,7 @@ workflow annotation { String database_location="/refdata/img/" String imgap_project_type="metagenome" Int additional_threads=16 - String container="microbiomedata/img-omics@sha256:e3e3fff75aeb3a6e321054d4bc9d8c8c925dcfb9245d60247ab29c3b24c4bc75" + String container="microbiomedata/img-omics@sha256:d5f4306bf36a97d55a3710280b940b89d7d4aca76a343e75b0e250734bc82b71" String bc_bin="/miniconda3/bin/bc" # structural annotation Boolean sa_execute=true From 044a46751df3f24754d3191f26f8a4fa618955f1 Mon Sep 17 00:00:00 2001 From: aclum Date: Fri, 7 Jul 2023 10:18:47 -0700 Subject: [PATCH 13/17] Update test-small.wdl Update container for test-small to "microbiomedata/img-omics@sha256:d5f4306bf36a97d55a3710280b940b89d7d4aca76a343e75b0e250734bc82b71" --- test-small.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-small.wdl b/test-small.wdl index 3fb2364..3e22107 100755 --- a/test-small.wdl +++ b/test-small.wdl @@ -1,7 +1,7 @@ import "annotation_full.wdl" as awf workflow test_small { - String container="microbiomedata/img-omics@sha256:e3e3fff75aeb3a6e321054d4bc9d8c8c925dcfb9245d60247ab29c3b24c4bc75" + String container="microbiomedata/img-omics@sha256:d5f4306bf36a97d55a3710280b940b89d7d4aca76a343e75b0e250734bc82b71" String proj="Testsmall" String database="/refdata/img/" String url="https://portal.nersc.gov/project/m3408/test_data" From 8b577c9f0eba7797aa28d73298199d5c73f01205 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Mon, 10 Jul 2023 12:21:38 -0600 Subject: [PATCH 14/17] updated to track version file --- .github/workflows/create_release.yml | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index b91003f..9da096e 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -4,6 +4,8 @@ on: push: branches: - master + paths: + - 'version.txt' jobs: release: @@ -12,30 +14,17 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v2 - - - name: Get latest release - id: latest_release - run: | - LATEST_RELEASE=$(curl --silent "https://api.github.com/repos/$GITHUB_REPOSITORY/releases/latest" | jq -r .tag_name) - echo "Latest release: $LATEST_RELEASE" - echo "LATEST_RELEASE=$LATEST_RELEASE" >> $GITHUB_OUTPUT - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Bump patch version - id: bump_version + - name: Read version + id: get_version run: | - LATEST_VERSION=${{ steps.latest_release.outputs.LATEST_RELEASE }} - IFS='.' read -ra VERSION_PARTS <<< "$LATEST_VERSION" - PATCH_BUMP=$(( ${VERSION_PARTS[2]} + 1 )) - NEW_VERSION="${VERSION_PARTS[0]}.${VERSION_PARTS[1]}.$PATCH_BUMP" - echo "New version: $NEW_VERSION" - echo "NEW_VERSION=$NEW_VERSION" >> $GITHUB_OUTPUT - + VERSION=$(cat version.txt) + echo "VERSION=${VERSION}" >> $GITHUB_ENV + - name: Create bundle zip run: zip -r bundle.zip *.wdl - name: Create Release - run: gh release create ${{ steps.bump_version.outputs.NEW_VERSION }} annotation_full.wdl bundle.zip + run: gh release create ${{ env.VERSION }} annotation_full.wdl bundle.zip env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file From 0be5db25308b1bf143c48a552adf3ebd16142b01 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Mon, 10 Jul 2023 12:21:49 -0600 Subject: [PATCH 15/17] version.txt file --- version.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 version.txt diff --git a/version.txt b/version.txt new file mode 100644 index 0000000..e946d6b --- /dev/null +++ b/version.txt @@ -0,0 +1 @@ +v1.0.3 From c498912aedb18eeb170b51fab2b5a82602806baf Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Tue, 11 Jul 2023 13:32:18 -0600 Subject: [PATCH 16/17] changed lineage_sdb to lineage_tsv --- annotation_full.wdl | 16 ++++++++-------- functional-annotation.wdl | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/annotation_full.wdl b/annotation_full.wdl index 4568206..d7a0e39 100644 --- a/annotation_full.wdl +++ b/annotation_full.wdl @@ -67,7 +67,7 @@ workflow annotation { ec_tsvs = f_annotate.ec_tsv, phylo_tsvs = f_annotate.phylo_tsv, last_blasttabs = f_annotate.last_blasttab, - lineage_sdbs = f_annotate.lineage_sdb, + lineage_tsvs = f_annotate.lineage_tsv, proteins = s_annotate.proteins, genes = s_annotate.genes, ko_ec_gffs = f_annotate.ko_ec_gff, @@ -145,7 +145,7 @@ workflow annotation { ko_ec_gff = merge_outputs.ko_ec_gff, gene_phylogeny_tsv = merge_outputs.gene_phylogeny_tsv, functional_gff = merge_outputs.functional_gff, - lineage_sdb = merge_outputs.lineage_sdb, + lineage_tsv = merge_outputs.lineage_tsv, ko_tsv = merge_outputs.ko_tsv, ec_tsv = merge_outputs.ec_tsv, stats_tsv = final_stats.tsv, @@ -283,7 +283,7 @@ task merge_outputs { Array[File?] ec_tsvs Array[File?] phylo_tsvs Array[File?] last_blasttabs - Array[File?] lineage_sdbs + Array[File?] lineage_tsvs Array[File?] proteins Array[File?] genes Array[File?] ko_ec_gffs @@ -329,7 +329,7 @@ task merge_outputs { cat ${sep=" " ec_tsvs} > "${project_id}_ec.tsv" cat ${sep=" " phylo_tsvs} > "${project_id}_gene_phylogeny.tsv" cat ${sep=" " last_blasttabs} > "${project_id}_proteins.img_nr.last.blasttab" - cat ${sep=" " lineage_sdbs} > "${project_id}.contigLin.assembled.sdb" + cat ${sep=" " lineage_tsvs} > "${project_id}.contigLin.assembled.tsv" cat ${sep=" " proteins} > "${project_id}_proteins.faa" cat ${sep=" " genes} > "${project_id}_genes.fna" cat ${sep=" " ko_ec_gffs} > "${project_id}_ko_ec.gff" @@ -372,7 +372,7 @@ task merge_outputs { File ec_tsv = "${project_id}_ec.tsv" File gene_phylogeny_tsv = "${project_id}_gene_phylogeny.tsv" File last_blasttab = "${project_id}_proteins.img_nr.last.blasttab" - File lineage_sdb = "${project_id}.contigLin.assembled.sdb" + File lineage_tsv = "${project_id}.contigLin.assembled.tsv" File proteins_faa = "${project_id}_proteins.faa" File genes_fna = "${project_id}_genes.fna" File ko_ec_gff = "${project_id}_ko_ec.gff" @@ -560,7 +560,7 @@ task finish_ano { File smart_gff File supfam_gff File gene_phylogeny_tsv - File lineage_sdb + File lineage_tsv File cath_funfam_gff File crt_gff File genemark_gff @@ -599,7 +599,7 @@ task finish_ano { cat ${rfam_gff} | sed ${sed} > ${prefix}_rfam.gff cat ${crt_crisprs} | sed ${sed} > ${prefix}_crt.crisprs cat ${gene_phylogeny_tsv} | sed ${sed} > ${prefix}_gene_phylogeny.tsv - cat ${lineage_sdb} | sed ${sed} > ${prefix}.contigLin.assembled.sdb + cat ${lineage_tsv} | sed ${sed} > ${prefix}.contigLin.assembled.tsv cat ${product_names_tsv} | sed ${sed} > ${prefix}_product_names.tsv cat ${ko_ec_gff} | sed ${sed} > ${prefix}_ko_ec.gff cat ${stats_tsv} | sed ${sed} > ${prefix}_stats.tsv @@ -633,7 +633,7 @@ task finish_ano { # File final_proteins_supfam_domtblout = "${prefix}_proteins.supfam.domtblout" # File final_proteins_cath_funfam_domtblout = "${prefix}_proteins.cath_funfam.domtblout" File final_product_names_tsv = "${prefix}_product_names.tsv" - File final_lineage_sdb = "${prefix}.contigLin.assembled.sdb" + File final_lineage_tsv = "${prefix}.contigLin.assembled.tsv" File final_crt_crisprs = "${prefix}_crt.crisprs" File final_tsv = "${prefix}_stats.tsv" diff --git a/functional-annotation.wdl b/functional-annotation.wdl index 72f7237..67fd734 100755 --- a/functional-annotation.wdl +++ b/functional-annotation.wdl @@ -171,7 +171,7 @@ workflow f_annotate { File? phylo_tsv = ko_ec.phylo_tsv File? ko_ec_gff = ko_ec.gff File? last_blasttab = ko_ec.last_blasttab - File? lineage_sdb = ko_ec.lineage_sdb + File? lineage_tsv = ko_ec.lineage_tsv File? cog_gff = cog.gff File? pfam_gff = pfam.gff File? tigrfam_gff = tigrfam.gff @@ -249,7 +249,7 @@ task ko_ec { File ec_tsv = "${project_id}_ec.tsv" File phylo_tsv = "${project_id}_gene_phylogeny.tsv" File gff = "${project_id}_ko_ec.gff" - File lineage_sdb = "${project_id}_lineage_sdb.tsv" + File lineage_tsv = "${project_id}_lineage_sdb.tsv" String lastal_ver = read_string(lastal_version_file) String img_nr_db_ver = read_string(img_nr_db_version_file) } From 694d22101e3b0fbe09dcecafcd42420cabedd15b Mon Sep 17 00:00:00 2001 From: aclum Date: Tue, 11 Jul 2023 15:15:43 -0700 Subject: [PATCH 17/17] Update functional-annotation.wdl Update name of scaffold_lineage file to *scaffold_lineage.tsv --- functional-annotation.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/functional-annotation.wdl b/functional-annotation.wdl index 67fd734..2f191b0 100755 --- a/functional-annotation.wdl +++ b/functional-annotation.wdl @@ -228,7 +228,7 @@ task ko_ec { ${project_id}_ko.tsv ${project_id}_ec.tsv \ ${project_id}_gene_phylogeny.tsv ${project_id}_ko_ec.gff \ ${project_id}_proteins.img_nr.last.blasttab && \ - python /opt/omics/bin/functional_annotation/create_scaffold_lineage.py ${project_id}_gene_phylogeny.tsv ${project_id}_lineage_sdb.tsv + python /opt/omics/bin/functional_annotation/create_scaffold_lineage.py ${project_id}_gene_phylogeny.tsv ${project_id}_scaffold_lineage.tsv #get version information lastal_version="`${lastal} -V`" @@ -249,7 +249,7 @@ task ko_ec { File ec_tsv = "${project_id}_ec.tsv" File phylo_tsv = "${project_id}_gene_phylogeny.tsv" File gff = "${project_id}_ko_ec.gff" - File lineage_tsv = "${project_id}_lineage_sdb.tsv" + File lineage_tsv = "${project_id}_scaffold_lineage.tsv" String lastal_ver = read_string(lastal_version_file) String img_nr_db_ver = read_string(img_nr_db_version_file) }