From aee52bf0fd1b4587b1de277b092fc58eefcbaab5 Mon Sep 17 00:00:00 2001
From: Kaitlyn Jiayi Li <kli@lanl.gov>
Date: Tue, 25 Jun 2024 07:36:23 -0700
Subject: [PATCH] removed most legacy conditionals

---
 annotation_full.wdl       |  52 ++++------
 functional-annotation.wdl |   9 +-
 prodigal.wdl              | 213 --------------------------------------
 structural-annotation.wdl |  59 ++---------
 test-small.wdl            |   3 +-
 5 files changed, 29 insertions(+), 307 deletions(-)
 delete mode 100755 prodigal.wdl

diff --git a/annotation_full.wdl b/annotation_full.wdl
index d596215..9a94410 100644
--- a/annotation_full.wdl
+++ b/annotation_full.wdl
@@ -10,7 +10,7 @@ input {
   String  imgap_project_id
   String  database_location="/refdata/img/"
   String  imgap_project_type="metagenome"
-  String  gm_license="/refdata/licenses/.gmhmmp2_key"
+  String?  gm_license="/refdata/licenses/.gmhmmp2_key"
   Int     additional_threads=16
   String  container="microbiomedata/img-omics@sha256:d5f4306bf36a97d55a3710280b940b89d7d4aca76a343e75b0e250734bc82b71"
 
@@ -117,7 +117,6 @@ input {
        container=container,
        sa_execute = sa_execute,
        fa_execute = fa_execute,
-       map_execute = make_map_file.map_execute,
        map_info = make_map_file.out_log,
        structural_gff  = merge_outputs.structural_gff,
        imgap_version = split.imgap_version,
@@ -176,7 +175,6 @@ input {
       rfam_gff = merge_outputs.rfam_gff,
       product_names_tsv = merge_outputs.product_names_tsv,
       crt_crisprs = merge_outputs.crt_crisprs,
-      map_execute = make_map_file.map_execute,
       map_file = make_map_file.map_file,
       renamed_fasta = make_map_file.out_fasta
   }
@@ -212,8 +210,8 @@ input {
     File product_names_tsv = finish_ano.final_product_names_tsv
     File crt_crisprs = finish_ano.final_crt_crisprs
     File imgap_version = finish_ano.final_version
-    File? renamed_fasta = finish_ano.final_renamed_fasta
-    File? map_file = finish_ano.final_map_file
+    File renamed_fasta = finish_ano.final_renamed_fasta
+    File map_file = finish_ano.final_map_file
   }
 
   parameter_meta {
@@ -275,29 +273,21 @@ task make_map_file {
   }
 
   command <<<
-  find_prefix=`grep ~{proj_id} ~{input_file} | head -1`
-
-  set -euo pipefail
-  if [[ $find_prefix ]]
-  then
-    echo "false" > run_map.txt
-    ln ~{input_file} ~{prefix}_map.fasta || ln -s ~{input_file} ~{prefix}_map.fasta
-  else
-    echo "true" > run_map.txt
+    set -euo pipefail
+ 
     fasta_sanity.py -v
     fasta_sanity.py \
     -p ~{proj_id} \
     -l ~{min_seq_length} \
     -u ~{unknown_gap_length} \
     ~{input_file} ~{prefix}_map.fasta
-  fi
+
   >>>
 
   output{
-    File? map_file = "~{prefix}_contig_names_mapping.tsv"
+    File  map_file = "~{prefix}_contig_names_mapping.tsv"
     File  out_fasta = "~{prefix}_map.fasta"
     File  out_log = stdout()
-    Boolean map_execute = read_boolean("run_map.txt")
  }
   runtime {
     memory: "120G"
@@ -489,7 +479,6 @@ task make_info_file {
     input {
         String container
         String imgap_version
-        Boolean map_execute
         File map_info
         Boolean fa_execute
         Boolean sa_execute
@@ -522,12 +511,11 @@ task make_info_file {
     set -euo pipefail
      echo "IMGAP Version: ~{imgap_version}" > ~{prefix}_imgap.info
      #get map script version
-     if [[ "~{map_execute}" = true ]]
-       then
-       map_version=`grep "fasta_sanity.py" ~{map_info}`
-       map_version="Mapping Programs Used: $map_version"
-       echo $map_version >> ~{prefix}_imgap.info
-     fi
+
+     map_version=`grep "fasta_sanity.py" ~{map_info}`
+     map_version="Mapping Programs Used: $map_version"
+     echo $map_version >> ~{prefix}_imgap.info
+
      #get structual annotation versions
      if [[ "~{sa_execute}" = true ]]
        then
@@ -656,9 +644,8 @@ task finish_ano {
        File stats_json
        File product_names_tsv
        File crt_crisprs
-       Boolean map_execute
-       File? map_file
-       File? renamed_fasta
+       File map_file
+       File renamed_fasta
        String orig_prefix="scaffold"
        String sed="s/~{orig_prefix}_/~{proj}_/g"
     }
@@ -695,12 +682,9 @@ task finish_ano {
        cat ~{stats_json} | sed ~{sed} > ~{prefix}_stats.json
 
        ln ~{ano_info_file} ~{prefix}_imgap.info || ln -s ~{ano_info_file} ~{prefix}_imgap.info 
+       ln ~{map_file} ~{prefix}_contig_names_mapping.tsv || ln -s ~{map_file} ~{prefix}_contig_names_mapping.tsv
+       ln ~{renamed_fasta} ~{prefix}_contigs.fna || ln -s ~{renamed_fasta} ~{prefix}_contigs.fna
 
-       if [[ "~{map_execute}" = true ]]
-        then
-        ln ~{map_file} ~{prefix}_contig_names_mapping.tsv || ln -s ~{map_file} ~{prefix}_contig_names_mapping.tsv
-        ln ~{renamed_fasta} ~{prefix}_contigs.fna || ln -s ~{renamed_fasta} ~{prefix}_contigs.fna
-       fi
   >>>
 
    output {
@@ -731,8 +715,8 @@ task finish_ano {
         File final_product_names_tsv = "~{prefix}_product_names.tsv"
         File final_lineage_tsv = "~{prefix}_scaffold_lineage.tsv"
         File final_crt_crisprs = "~{prefix}_crt.crisprs"
-        File? final_renamed_fasta = "~{prefix}_contigs.fna"
-        File? final_map_file = "~{prefix}_contig_names_mapping.tsv"
+        File final_renamed_fasta = "~{prefix}_contigs.fna"
+        File final_map_file = "~{prefix}_contig_names_mapping.tsv"
         File final_tsv = "~{prefix}_stats.tsv"
         File final_version = "~{prefix}_imgap.info"
  
diff --git a/functional-annotation.wdl b/functional-annotation.wdl
index d7ddba5..9edcfa5 100755
--- a/functional-annotation.wdl
+++ b/functional-annotation.wdl
@@ -6,31 +6,24 @@ workflow f_annotate {
         Int     additional_threads
        # File    input_contigs_fasta
         File    input_fasta
-        String  database_location
-        Boolean ko_ec_execute=true
+        String  database_location="/refdata/img/"
         String  ko_ec_img_nr_db="~{database_location}"+"/IMG-NR/20230629/img_nr"
         String  ko_ec_md5_mapping="~{database_location}"+"/IMG-NR/20230629/md5Hash2Data.tsv"
         String  ko_ec_taxon_to_phylo_mapping="~{database_location}"+"/IMG-NR/20230629/taxonOId2Taxonomy.tsv"
         String  lastal_bin="/opt/omics/bin/lastal"
         String  selector_bin="/opt/omics/bin/functional_annotation/lastal_img_nr_ko_ec_gene_phylo_hit_selector.py"
-        Boolean smart_execute=true
         Int?    par_hmm_inst
         Int?    approx_num_proteins
         String  smart_db="~{database_location}"+"/SMART/01_06_2016/SMART.hmm"
         String  hmmsearch_bin="/opt/omics/bin/hmmsearch"
         String  frag_hits_filter_bin="/opt/omics/bin/functional_annotation/hmmsearch_fragmented_hits_filter.py"
-        Boolean cog_execute=true
         String  cog_db="~{database_location}"+"/COG/HMMs/2003/COG.hmm"
-        Boolean tigrfam_execute=true
         String  tigrfam_db="~{database_location}"+"/TIGRFAM/v15.0/TIGRFAM.hmm"
         String  hit_selector_bin="/opt/omics/bin/functional_annotation/hmmsearch_hit_selector.py"
-        Boolean superfam_execute=true
         String  superfam_db="~{database_location}"+"/SuperFamily/v1.75/supfam.hmm"
-        Boolean pfam_execute=true
         String  pfam_db="~{database_location}"+"/Pfam/Pfam-A/v34.0/Pfam-A.v34.0.hmm"
         String  pfam_claninfo_tsv="~{database_location}"+"/Pfam/Pfam-A/v34.0/Pfam-A.clans.tsv"
         String  pfam_clan_filter="/opt/omics/bin/functional_annotation/pfam_clan_filter.py"
-        Boolean cath_funfam_execute=true
         String  cath_funfam_db="~{database_location}"+"/Cath-FunFam/v4.2.0/funfam.hmm"
         #  Boolean signalp_execute=true
         #  String  signalp_gram_stain="GRAM_STAIN"
diff --git a/prodigal.wdl b/prodigal.wdl
deleted file mode 100755
index 0ea51f3..0000000
--- a/prodigal.wdl
+++ /dev/null
@@ -1,213 +0,0 @@
-version 1.0
-workflow prodigal {
-    input {
-        String imgap_input_fasta
-        String imgap_project_id
-        String imgap_project_type
-        String container
-    }
-  if(imgap_project_type == "isolate") {
-    call fasta_len {
-      input:
-        input_fasta = imgap_input_fasta
-    }
-  }
-  if(imgap_project_type == "isolate" && fasta_len.wc >= 20000) {
-    call iso_big {
-      input:
-        input_fasta = imgap_input_fasta,
-        project_id = imgap_project_id,
-        container=container
-    }
-  }
-  if(imgap_project_type == "isolate" && fasta_len.wc < 20000) {
-    call iso_small {
-      input:
-        input_fasta = imgap_input_fasta,
-        project_id = imgap_project_id,
-        container=container
-    }
-  }
-  if(imgap_project_type == "metagenome") {
-    call metag {
-      input:
-        input_fasta = imgap_input_fasta,
-        project_id = imgap_project_id,
-        container=container
-    }
-  }
-
-  call clean_and_unify {
-    input:
-      iso_big_proteins_fasta = iso_big.proteins,
-      iso_small_proteins_fasta = iso_small.proteins,
-      meta_proteins_fasta = metag.proteins,
-      iso_big_genes_fasta = iso_big.genes,
-      iso_small_genes_fasta = iso_small.genes,
-      meta_genes_fasta = metag.genes,
-      iso_big_gff = iso_big.gff,
-      iso_small_gff = iso_small.gff,
-      meta_gff = metag.gff,
-      project_id = imgap_project_id,
-      container=container
-  }
-
-  output {
-    File gff = clean_and_unify.gff
-    File genes = clean_and_unify.genes
-    File proteins = clean_and_unify.proteins
-  }
-}
-
-task fasta_len {
-    input {
-        File input_fasta
-    }
-  command {
-    grep -v '^>' ~{input_fasta} | wc -m
-  }
-
-  runtime {
-    time: "1:00:00"
-    memory: "86G"
-  }
-
-  output {
-    Int wc = select_first([read_int(stdout()),0])
-  }
-}
-
-task iso_big {
-    input {
-        String bin="/opt/omics/bin/prodigal"
-        File   input_fasta
-        Int?   translation_table = 11
-        String project_id
-        String prefix=sub(project_id, ":", "_")
-        File   train = "~{prefix}_prodigal.trn"
-        String container
-    }
-  command {
-    set -euo pipefail
-    ~{bin} -i ~{input_fasta} -t ~{train} -g ~{translation_table} -q
-    ~{bin} -f gff -g ~{translation_table} -p single -m -i ~{input_fasta} \
-    -t ~{train} -o ~{prefix}_prodigal.gff \
-    -d ~{prefix}_prodigal_genes.fna -a ~{prefix}_prodigal_proteins.faa
-  }
-
-  runtime {
-    time: "1:00:00"
-    memory: "86G"
-    docker: container
-  }
-
-  output {
-    File gff = "~{prefix}_prodigal.gff"
-    File genes = "~{prefix}_prodigal_genes.fna"
-    File proteins = "~{prefix}_prodigal_proteins.faa"
-  }
-}
-
-task iso_small {
-    input {
-        String bin="/opt/omics/bin/prodigal"
-        File   input_fasta
-        String project_id
-        String prefix=sub(project_id, ":", "_")
-        String container
-    }
-  command {
-    ~{bin} -f gff -p meta -m -i ~{input_fasta} \
-    -o ~{prefix}_prodigal.gff -d ~{prefix}_prodigal_genes.fna \
-    -a ~{prefix}_prodigal_proteins.faa
-  }
-
-  runtime {
-    time: "1:00:00"
-    memory: "86G"
-    docker: container
-  }
-
-  output {
-    File gff = "~{prefix}_prodigal.gff"
-    File genes = "~{prefix}_prodigal_genes.fna"
-    File proteins = "~{prefix}_prodigal_proteins.faa"
-  }
-}
-
-task metag {
-    input {
-        String bin="/opt/omics/bin/prodigal"
-        File   input_fasta
-        String project_id
-        String prefix=sub(project_id, ":", "_")
-        String container
-    }
-
-  command <<<
-    set -eou pipefail
-    ~{bin} -f gff -p meta -m -i ~{input_fasta} \
-    -o ~{prefix}_prodigal.gff -d ~{prefix}_prodigal_genes.fna \
-    -a ~{prefix}_prodigal_proteins.faa
-  >>>
-
-  runtime {
-    time: "1:00:00"
-    memory: "86G"
-    docker: container
-  }
-
-  output {
-    File gff = "~{prefix}_prodigal.gff"
-    File genes = "~{prefix}_prodigal_genes.fna"
-    File proteins = "~{prefix}_prodigal_proteins.faa"
-  }
-}
-
-task clean_and_unify {
-    input {
-        File?  iso_big_proteins_fasta
-        File?  iso_small_proteins_fasta
-        File?  meta_proteins_fasta
-        File?  iso_big_genes_fasta
-        File?  iso_small_genes_fasta
-        File?  meta_genes_fasta
-        File?  iso_big_gff
-        File?  iso_small_gff
-        File?  meta_gff
-        String unify_bin="/opt/omics/bin/structural_annotation/unify_gene_ids.py"
-        String project_id
-        String prefix=sub(project_id, ":", "_")
-        String container
-    }
-  command <<<
-    set -eou pipefail
-    sed -i 's/\*$//g' ~{iso_big_proteins_fasta} ~{iso_small_proteins_fasta} ~{meta_proteins_fasta}
-    sed -i 's/\*/X/g' ~{iso_big_proteins_fasta} ~{iso_small_proteins_fasta} ~{meta_proteins_fasta}
-    ~{unify_bin} ~{iso_big_gff} ~{iso_small_gff} ~{meta_gff} \
-                 ~{iso_big_genes_fasta} ~{iso_small_genes_fasta} ~{meta_genes_fasta} \
-                 ~{iso_big_proteins_fasta} ~{iso_small_proteins_fasta} ~{meta_proteins_fasta}
-    mv ~{iso_big_proteins_fasta} . 2> /dev/null
-    mv ~{iso_small_proteins_fasta} . 2> /dev/null
-    mv ~{meta_proteins_fasta} . 2> /dev/null
-    mv ~{iso_big_genes_fasta} . 2> /dev/null
-    mv ~{iso_small_genes_fasta} . 2> /dev/null
-    mv ~{meta_genes_fasta} . 2> /dev/null
-    mv ~{iso_big_gff} . 2> /dev/null
-    mv ~{iso_small_gff} . 2> /dev/null
-    mv ~{meta_gff} . 2> /dev/null
-  >>>
-
-  runtime {
-    time: "1:00:00"
-    memory: "86G"
-    docker: container
-  }
-
-  output {
-    File gff = "~{prefix}_prodigal.gff"
-    File genes = "~{prefix}_prodigal_genes.fna"
-    File proteins = "~{prefix}_prodigal_proteins.faa"
-  }
-}
-
diff --git a/structural-annotation.wdl b/structural-annotation.wdl
index 2d113b0..207ccc2 100755
--- a/structural-annotation.wdl
+++ b/structural-annotation.wdl
@@ -13,14 +13,6 @@ workflow s_annotate {
       String  imgap_project_type
       Int     additional_threads
       Int? imgap_structural_annotation_translation_table
-      Boolean pre_qc_execute=false
-      Boolean trnascan_se_execute=true
-      Boolean rfam_execute=true
-      Boolean crt_execute=true
-      Boolean cds_prediction_execute=true
-      Boolean prodigal_execute=true
-      Boolean genemark_execute=true
-      Boolean gff_and_fasta_stats_execute=true
       String  database_location
       String  container
       String gm_license="/refdata/licenses/.gmhmmp2_key"
@@ -69,8 +61,6 @@ workflow s_annotate {
          imgap_input_fasta = imgap_input_fasta,
          imgap_project_id = imgap_project_id,
          imgap_project_type = imgap_project_type,
-         prodigal_execute = prodigal_execute,
-         genemark_execute = genemark_execute,
          imgap_structural_annotation_translation_table = imgap_structural_annotation_translation_table,
          container = container,
          gm_license = gm_license
@@ -86,11 +76,6 @@ workflow s_annotate {
         trna_gff = trnascan.gff,
         crt_gff = crt.gff,
         cds_gff = cds_prediction.gff,
-        prodigal_execute = prodigal_execute,
-        genemark_execute = genemark_execute,
-        crt_execute = crt_execute,
-        rfam_execute = rfam_execute,
-        trnascan_se_execute = trnascan_se_execute,
         container = container
     }
 
@@ -216,47 +201,19 @@ task gff_merge {
         File  rfam_gff
         File  crt_gff
         File  cds_gff
-        Boolean prodigal_execute
-        Boolean genemark_execute
-        Boolean crt_execute
-        Boolean rfam_execute
-        Boolean trnascan_se_execute
         String container
   }
   command <<<
     set -euo pipefail
-    # set cromwell booleans as bash variables
-    prodigal_execute=~{prodigal_execute}
-    genemark_execute=~{genemark_execute}
-    crt_execute=~{crt_execute}
-    rfam_execute=~{rfam_execute}
-    trnascan_se_execute=~{trnascan_se_execute}
-
-    #construct arguments for gff_files_merger.py
-    merger_args="--contigs_fasta ~{input_fasta}"
-
-    if [[ "$prodigal_execute" = true ]] || [[ "$genemark_execute" = true ]] ; then
-       merger_args="$merger_args --cds_gff ~{cds_gff}"
-    fi
-
-    if [[ "$crt_execute" = true ]] ; then
-       merger_args="$merger_args --crt_gff ~{crt_gff}"
-    fi
-
-    if [[ ("$prodigal_execute" = true || "$genemark_execute" = true) ]] && [[ "$crt_execute" = true ]] ; then
-       merger_args="$merger_args --log_file ~{prefix}_gff_merge.log"
-    fi
-
-    if [[ "$rfam_execute" = true ]] ; then
-       merger_args="$merger_args ~{rfam_gff}"
-    fi
-
-    if [[ "$trnascan_se_execute" = true ]] ; then
-       merger_args="$merger_args ~{trna_gff}"
-    fi
 
-    #excute gff_files_merger.py
-    ~{bin} $merger_args 1> ~{prefix}_structural_annotation.gff
+    ~{bin} \
+      --contigs_fasta ~{input_fasta} \
+      --cds_gff ~{cds_gff} \
+      --crt_gff ~{crt_gff} \
+      --log_file ~{prefix}_gff_merge.log \
+      ~{rfam_gff} \
+      ~{trna_gff} \
+     1> ~{prefix}_structural_annotation.gff
 
 
   >>>
diff --git a/test-small.wdl b/test-small.wdl
index fec4f7b..1ad0b1a 100755
--- a/test-small.wdl
+++ b/test-small.wdl
@@ -42,6 +42,7 @@ task prepare {
    String url
   }
    command <<<
+       set -eou pipefail
        wget ~{url}/~{prefix}_contigs.fna
    >>>
 
@@ -68,7 +69,7 @@ task validate {
   }
 
    command <<<
-       set -e
+       set -eou pipefail
        wget ~{url}/~{prefix}_functional_annotation.gff
        wget ~{url}/~{prefix}_structural_annotation.gff
        validate.sh ~{func_gff}