From fcd86bd13fbd5087a269de56877592a47a3a789f Mon Sep 17 00:00:00 2001
From: Lee Katz <lskatz@gmail.com>
Date: Tue, 26 Apr 2022 21:39:02 -0400
Subject: [PATCH 01/24] Create Dockerfile

---
 EToKi/1.2/Dockerfile | 109 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 EToKi/1.2/Dockerfile

diff --git a/EToKi/1.2/Dockerfile b/EToKi/1.2/Dockerfile
new file mode 100644
index 000000000..b61e165e6
--- /dev/null
+++ b/EToKi/1.2/Dockerfile
@@ -0,0 +1,109 @@
+# FROM defines the base docker image. This command has to come first in the file
+# The 'as' keyword lets you name the folowing stage. We use `app` for the production image
+#FROM ubuntu:focal as app
+# Copying the Freyja container a bit
+FROM mambaorg/micromamba:0.22.0 as app
+
+# ARG sets environment variables during the build stage
+ARG SOFTWARENAME_VER="1.2"
+
+# build and run as root users since micromamba image has 'mambauser' set as the $USER
+USER root
+# set workdir to default for building; set to /data at the end
+WORKDIR /
+
+# LABEL instructions tag the image with metadata that might be important to the user
+# Optional, but highly recommended
+LABEL base.image="ubuntu:focal"
+LABEL dockerfile.version="1"
+LABEL software="EToKi"
+LABEL software.version=$SOFTWARENAME_VER
+LABEL description="All methods related to Enterobase data analysis pipelines"
+LABEL website="https://github.com/zheminzhou/EToKi"
+LABEL license="https://github.com/zheminzhou/EToKi/blob/master/LICENSE"
+LABEL maintainer="Lee Katz"
+LABEL maintainer.email="gzu2@cdc.gov"
+
+# https://askubuntu.com/a/1013396
+# avoid asking about timezone during apt-get
+ARG DEBIAN_FRONTEND=noninteractive
+
+# RUN executes code during the build
+# Install dependencies via apt-get or yum if using a centos or fedora base
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ python3-pip \
+ python3-dev \
+ libgconf-2-4 \
+ curl \
+ unzip \
+ libcurl4-openssl-dev \
+ build-essential \
+ git \
+ pigz \
+ libcurl4-openssl-dev \
+ libcurl4 \
+ ant \
+ libssl-dev \
+ python3-venv \
+ wget && \
+ apt-get autoclean
+
+ #openjdk-8-jdk \
+
+# Gimme python3 instead of python2
+RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
+
+# Double check the python version
+RUN python --version
+
+# set the environment, put new conda env in PATH by default
+ENV PATH="/opt/conda/envs/etoki/bin:/opt/conda/envs/env/bin:${PATH}" \
+ LC_ALL=C.UTF-8
+
+
+RUN micromamba create -n etoki python=3.8 -c conda-forge -c bioconda -c defaults ete3 numba numpy pandas scikit-learn psutil click scipy && \
+ micromamba clean -a -y
+# might also have to install sklearn??
+
+# Now get us into that yummy yummy EToKi env
+ENV ENV_NAME="etoki"
+ARG MAMBA_DOCKERFILE_ACTIVATE=1
+
+## EToKi itself ##
+
+# ENV instructions set environment variables that persist from the build into the resulting image
+# Use for e.g. $PATH and locale settings for compatibility with Singularity
+ENV PATH="/usr/local/bin/EToKi:$PATH" \
+ LC_ALL=C
+
+RUN cd /usr/local/bin && git clone https://github.com/zheminzhou/EToKi.git -b ${SOFTWARENAME_VER}
+
+# Installs all 3rd party software except the kraken database and usearch
+RUN cd /usr/local/bin/EToKi && python EToKi.py configure --install
+
+RUN find /usr/local/bin/EToKi -name '*.py' -exec sed -i.bak -e '1 i #!/usr/bin/env python\n# ^^^ inserted corrected shebang for this container' {} \;
+
+# WORKDIR sets working directory
+WORKDIR /data
+
+# A second FROM insruction creates a new stage
+# We use `test` for the test image
+#FROM app as test
+
+# Demonstrate that the program is successfully installed
+
+# Option 1: run the program's internal tests, for example with SPAdes:
+RUN cd /usr/local/bin/EToKi && EToKi.py --help
+#RUN cd /usr/local/bin/EToKi && $(which python) $(which EToKi.py) --help
+RUN cd /usr/local/bin/EToKi && bash example.bash
+
+# Option 2: write your own tests in a bash script in the same directory as your Dockerfile:
+#COPY my_tests.sh .
+#RUN bash my_tests.sh
+
+# Option 3: write python unit tests in a tests/ directory in the same directory as your Dockerfile:
+#RUN apt-get install -y python3
+#RUN mkdir tests/
+#COPY tests/ tests/
+#RUN python3 -m unittest discover -s tests
+#

From fdfd2dcbecc3d04ed37df5e21bdcbd37b5f8962d Mon Sep 17 00:00:00 2001
From: Lee Katz <lskatz@gmail.com>
Date: Tue, 26 Apr 2022 21:42:07 -0400
Subject: [PATCH 02/24] Rename EToKi/1.2/Dockerfile to etoki/1.2/Dockerfile

---
 {EToKi => etoki}/1.2/Dockerfile | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename {EToKi => etoki}/1.2/Dockerfile (100%)

diff --git a/EToKi/1.2/Dockerfile b/etoki/1.2/Dockerfile
similarity index 100%
rename from EToKi/1.2/Dockerfile
rename to etoki/1.2/Dockerfile

From b928781964b43fb058819d9403e87a645ded595a Mon Sep 17 00:00:00 2001
From: Lee Katz <lskatz@gmail.com>
Date: Wed, 27 Apr 2022 12:58:01 -0400
Subject: [PATCH 03/24] README.md, EToKi

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 42d0ad721..2fb2155bc 100644
--- a/README.md
+++ b/README.md
@@ -56,6 +56,7 @@ To learn more about the docker pull rate limits and the open source software pro
 | [datasets-sars-cov-2](https://github.com/CDCgov/datasets-sars-cov-2) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/datasets-sars-cov-2)](https://hub.docker.com/r/staphb/datasets-sars-cov-2) | <ul><li>0.6.2</li><li>0.6.3</li></ul> | https://github.com/CDCgov/datasets-sars-cov-2 |
 | [DSK](https://hub.docker.com/r/staphb/dsk) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/dsk)](https://hub.docker.com/r/staphb/dsk) | <ul><li>0.0.100</li></ul> | https://gatb.inria.fr/software/dsk/ |
 | [emm-typing-tool](https://hub.docker.com/r/staphb/emm-typing-tool) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/emm-typing-tool)](https://hub.docker.com/r/staphb/emm-typing-tool) | <ul><li>0.0.1 (no version)</li></ul> | https://github.com/phe-bioinformatics/emm-typing-tool |
+| [EToKi](https://hub.docker.com/r/staphb/etoki) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/etoki)](https://hub.docker.com/r/staphb/etoki) | <ul><li>1.2</li></ul> | https://github.com/zheminzhou/EToKi |
 | [FastANI](https://hub.docker.com/r/staphb/fastani) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/fastani)](https://hub.docker.com/r/staphb/fastani) | <ul><li>1.1</li><li>1.32</li><li>1.33</li></ul> | https://github.com/ParBLiSS/FastANI |
 | [FastTree](https://hub.docker.com/r/staphb/fasttree) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/fasttree)](https://hub.docker.com/r/staphb/fasttree) | <ul><li>2.1.11</li></ul> | http://www.microbesonline.org/fasttree/ |
 | [FastQC](https://hub.docker.com/r/staphb/fastqc) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/fastqc)](https://hub.docker.com/r/staphb/fastqc) | <ul><li>0.11.8</li><li>0.11.9</li></ul> | https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ <br/> https://github.com/s-andrews/FastQC |

From 8dc4adf0bd42a515477e5c0b46d43adbc235f946 Mon Sep 17 00:00:00 2001
From: Lee Katz <lskatz@gmail.com>
Date: Wed, 27 Apr 2022 12:59:41 -0400
Subject: [PATCH 04/24] EToKi program license

---
 Program_Licenses.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Program_Licenses.md b/Program_Licenses.md
index 5e3f119dc..62137efec 100644
--- a/Program_Licenses.md
+++ b/Program_Licenses.md
@@ -25,6 +25,7 @@ The licenses of the open-source software that is contained in these Docker image
 | colorid | MIT | https://github.com/hcdenbakker/colorid/blob/master/LICENSE |
 | DSK | GNU Affero GPLv3 | https://github.com/GATB/dsk/blob/master/LICENSE |
 | emm-typing-tool | GNU GPLv3 | https://github.com/phe-bioinformatics/emm-typing-tool/blob/master/LICENCE |
+| EToKi | GNU GPLv3 | https://github.com/zheminzhou/EToKi/blob/master/LICENSE |
 | FastANI | Apache v2.0 | https://github.com/ParBLiSS/FastANI/blob/master/LICENSE |
 | FastTree | GNU GPLv2 | http://www.microbesonline.org/fasttree/ |
 | FastQC | GNU GPLv3 | https://github.com/s-andrews/FastQC/blob/master/LICENSE.txt |

From 76dad2e037b1b687b3205487439b78ac27fac42c Mon Sep 17 00:00:00 2001
From: Lee Katz <lskatz@gmail.com>
Date: Wed, 27 Apr 2022 13:03:15 -0400
Subject: [PATCH 05/24] Create README.md

---
 etoki/1.2/README.md | 518 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 518 insertions(+)
 create mode 100644 etoki/1.2/README.md

diff --git a/etoki/1.2/README.md b/etoki/1.2/README.md
new file mode 100644
index 000000000..4deb8327a
--- /dev/null
+++ b/etoki/1.2/README.md
@@ -0,0 +1,518 @@
+# Quick Start (with examples)
+
+_note_ this text was lifted from the original repo README
+
+### Trim genomic reads
+~~~~~~~~~~~
+python EToKi.py prepare --pe examples/S_R1.fastq.gz,examples/S_R2.fastq.gz -p examples/prep_out
+~~~~~~~~~~~
+### Merge and trim metagenomic reads
+~~~~~~~~~~~
+python EToKi.py prepare --pe examples/S_R1.fastq.gz,examples/S_R2.fastq.gz -p examples/meta_out --noRename --merge
+~~~~~~~~~~~
+### Assemble genomic reads using SPAdes
+~~~~~~~~~~~
+python EToKi.py assemble --pe examples/prep_out_L1_R1.fastq.gz,examples/prep_out_L1_R2.fastq.gz --se examples/prep_out_L1_SE.fastq.gz -p examples/asm_out
+~~~~~~~~~~~
+### Assemble genomic reads using MEGAHIT
+~~~~~~~~~~~
+python EToKi.py assemble --se examples/meta_out_L1_MP.fastq.gz \
+--pe examples/meta_out_L1_R1.fastq.gz,examples/meta_out_L1_R2.fastq.gz --se examples/meta_out_L1_SE.fastq.gz \
+-p examples/asm_out2 --assembler megahit
+~~~~~~~~~~~
+### Map reads onto reference, with pre-filtering with ingroups and outgroups
+~~~~~~~~~~~
+python EToKi.py assemble --se examples/meta_out_L1_MP.fastq.gz --metagenome \
+--pe examples/meta_out_L1_R1.fastq.gz,examples/meta_out_L1_R2.fastq.gz --se examples/meta_out_L1_SE.fastq.gz \
+-p examples/map_out -r examples/GCF_000010485.1_ASM1048v1_genomic.fna.gz \
+-i examples/GCF_000214765.2_ASM21476v3_genomic.fna.gz -o examples/GCF_000005845.2_ASM584v2_genomic.fna.gz
+~~~~~~~~~~~
+### Prepare reference alleles and a local database for 7 Gene MLST scheme
+~~~~~~~~~~~
+python EToKi.py MLSTdb -i examples/Escherichia.Achtman.alleles.fasta -r examples/Escherichia.Achtman.references.fasta -d examples/Escherichia.Achtman.convert.tab
+~~~~~~~~~~~
+### Calculate 7 Gene MLST genotype for a queried genome
+~~~~~~~~~~~
+gzip -cd examples/GCF_001566635.1_ASM156663v1_genomic.fna.gz > examples/GCF_001566635.1_ASM156663v1_genomic.fna && \
+python EToKi.py MLSType -i examples/GCF_001566635.1_ASM156663v1_genomic.fna -r examples/Escherichia.Achtman.references.fasta -k G749 -o stdout -d examples/Escherichia.Achtman.convert.tab
+~~~~~~~~~~~
+### Run EBEis (EnteroBase Escherichia in silico serotyping)
+~~~~~~~~~~~
+python EToKi.py EBEis -t Escherichia -q examples/GCF_000010485.1_ASM1048v1_genomic.fna -p SE15
+~~~~~~~~~~~
+### Cluster sequences into similarity-based groups 
+~~~~~~~~~~~
+python EToKi.py clust -p examples/Escherichia.Achtman.alleles_clust -i examples/Escherichia.Achtman.alleles.fasta -d 0.95 -c 0.95
+~~~~~~~~~~~
+### Do a joint BLASTn-like search using BLASTn, uSearch (uBLASTp), Mimimap and mmseqs
+~~~~~~~~~~~
+python EToKi.py uberBlast -q examples/Escherichia.Achtman.alleles.fasta -r examples/GCF_001566635.1_ASM156663v1_genomic.fna -o examples/G749_7Gene.bsn --blastn --ublast --minimap --mmseq -s 2 -f
+~~~~~~~~~~~
+### align multiple genomes onto one reference
+~~~~~~~~~~~
+python EToKi.py align -r GCF_000010485:examples/GCF_000010485.1_ASM1048v1_genomic.fna.gz -p examples/phylo_out \
+GCF_000005845:examples/GCF_000005845.2_ASM584v2_genomic.fna.gz \
+GCF_000214765:examples/GCF_000214765.2_ASM21476v3_genomic.fna.gz \
+GCF_001566635:examples/GCF_001566635.1_ASM156663v1_genomic.fna.gz
+~~~~~~~~~~~
+### Build ML tree using RAxML and place all SNPs onto branches in the tree
+~~~~~~~~~~~
+cd examples && python ../EToKi.py phylo -t snp2mut -p phylo_out -s phylo_out.matrix.gz --ng && cd ..
+~~~~~~~~~~~
+
+# USAGE:
+The first argument passed into EToKi specifies the command to be called and the rest are the parameters for that command. To see all the commands available in EToKi, use
+> python EToKi.py -h
+
+And to see the parameters for an individual command, use:
+> EToKi.py \<command\> -h
+
+## configure - install and/or configure 3rd party programs
+See the INSTALL section or the help page below.
+~~~~~~~~~~~~~~
+usage: EToKi.py configure [-h] [--install] [--usearch USEARCH]
+                          [--download_krakenDB]
+                          [--link_krakenDB KRAKEN_DATABASE] [--path PATH]
+
+Install or modify the 3rd party programs.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --install             install 3rd party programs
+  --usearch USEARCH     usearch is required for ortho and MLSType. A 32-bit
+                        version of usearch can be downloaded from
+                        https://www.drive5.com/usearch/
+  --download_krakenDB   When specified, miniKraken2 (8GB) will be downloaded
+                        into the EToKi folder. You can also use
+                        --link_krakenDB to use a pre-installed kraken2
+                        database.
+  --link_krakenDB KRAKEN_DATABASE
+                        Kraken is optional in the assemble module. You can
+                        specify your own database here
+  --path PATH, -p PATH  Specify path to the 3rd party programs manually.
+                        Format: <program>=<path>. This parameter can be
+                        specified multiple times
+~~~~~~~~~~~~~~~~~
+
+## prepare - trim, collapse, downsize and rename the short reads
+~~~~~~~~~~~~~
+usage: EToKi.py prepare [-h] [--pe PE] [--se SE] [-p PREFIX] [-q READ_QUAL]
+                        [-b MAX_BASE] [-m MEMORY] [--noTrim] [--merge]
+                        [--noRename]
+
+EToKi.py prepare
+(1) Concatenates reads of the same library together.
+(2) Merge pair-end sequences for metagenomic reads (bbmap).
+(3) Trims sequences based on base-qualities (bbduk).
+(4) Removes potential adapters and barcodes (bbduk).
+(5) Limits total amount of reads to be used.
+(6) Renames reads using sequential numbers.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --pe PE               comma delimited files of PE reads from the same library.
+                        e.g. --pe a_R1.fq.gz,a_R2.fq.gz,b_R1.fq.gz,b_R2.fq.gz
+                        This can be specified multiple times for different libraries.
+  --se SE               comma delimited files of SE reads from the same library.
+                        e.g. --se c_SE.fq.gz,d_SE.fq.gz
+                        This can be specified multiple times for different libraries.
+  -p PREFIX, --prefix PREFIX
+                        prefix for the outputs. Default: EToKi_prepare
+  -q READ_QUAL, --read_qual READ_QUAL
+                        Minimum quality to be kept in bbduk. Default: 6
+  -b MAX_BASE, --max_base MAX_BASE
+                        Total amount of bases (in BPs) to be kept.
+                        Default as -1 for no restriction.
+                        Suggest to use ~100X coverage for de novo assembly.
+  -m MEMORY, --memory MEMORY
+                        maximum amount of memory to be used in bbduk. Default: 30g
+  --noTrim              Do not do quality trim using bbduk
+  --merge               Try to merge PE reads by their overlaps using bbmap
+  --noRename            Do not rename reads
+~~~~~~~~~~~~~~~~
+
+## assemble - *de novo* or reference-guided assembly for genomic or metagenomic reads
+**EToKi assemble** is a joint method for both *de novo* assembly and reference-guided assembly. 
+* *de novo* assembly approach calls either SPAdes (default) or MEGAHIT (default for metagenomic data) on short reads that have been cleaned up using **EToKi prepare**, and uses Pilon to polish the assembled scaffolds and evaluate the reliability of consensus bases of the scaffolds. 
+
+* Reference-guided assembly is also called "reference mapping". Short reads are aligned to a user-specified reference genome using minimap2. Nucleotide bases of the reference genome are updated using Pilon, according to the consensus base calls of the covered reads. Non-specific metagenomic reads of closely related species can sometimes also align to the reference genome and confuse consensus calling. Two arguments, **--outgroup** and **--ingroup**, are given to pre-filter these non-specific reads and obtain clean SNP calls. 
+~~~~~~~~~~~~~~~~~
+usage: EToKi.py assemble [-h] [--pe PE] [--se SE] [--pacbio PACBIO] [--ont ONT] [-p PREFIX] [-a ASSEMBLER] [-r REFERENCE] [-k KMERS] [-m MAPPER] [-d MAX_DIFF] [-i INGROUP] [-o OUTGROUP] [-S SNP] [-c CONT_DEPTH]
+                         [--excluded EXCLUDED] [--metagenome] [--numPolish NUMPOLISH] [--reassemble] [--onlySNP] [--noQuality] [--onlyEval] [--kraken]
+
+EToKi.py assemble
+(1.1) Assembles short reads into assemblies, or
+(1.2) Maps them onto a reference.
+And
+(2) Polishes consensus using polish,
+(3) Removes low level contaminations.
+(4) Estimates the base quality of the consensus.
+(5) Predicts taxonomy using Kraken.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --pe PE               comma delimited two files of PE reads.
+  --se SE               one file of SE read.
+  --pacbio PACBIO       one file of pacbio read.
+  --ont ONT             one file of nanopore read.
+  -p PREFIX, --prefix PREFIX
+                        prefix for the outputs. Default: EToKi_assemble
+  -a ASSEMBLER, --assembler ASSEMBLER
+                        Assembler used for de novo assembly.
+                        Disabled if you specify a reference.
+                        Default: spades for single colony isolates, megahit for metagenome.
+                         Long reads will always be assembled with Flye
+  -r REFERENCE, --reference REFERENCE
+                        Reference for read mapping. Specify this for reference mapping module.
+  -k KMERS, --kmers KMERS
+                        relative lengths of kmers used in SPAdes. Default: 30,50,70,90
+  -m MAPPER, --mapper MAPPER
+                        aligner used for read mapping.
+                        options are: miminap (default), bwa or bowtie2
+  -d MAX_DIFF, --max_diff MAX_DIFF
+                        Maximum proportion of variations allowed for a aligned reads.
+                        Default: 0.1 for single isolates, 0.05 for metagenome
+  -i INGROUP, --ingroup INGROUP
+                        Additional references presenting intra-population genetic diversities.
+  -o OUTGROUP, --outgroup OUTGROUP
+                        Additional references presenting genetic diversities outside of the studied population.
+                        Reads that are more similar to outgroups will be excluded from analysis.
+  -S SNP, --SNP SNP     Exclusive set of SNPs. This will overwrite the polish process.
+                        Required format:
+                        <cont_name> <site> <base_type>
+                        ...
+  -c CONT_DEPTH, --cont_depth CONT_DEPTH
+                        Allowed range of read depth variations relative to average value.
+                        Default: 0.2,2.5
+                        Contigs with read depths outside of this range will be removed from the final assembly.
+  --excluded EXCLUDED   A name of the file that contains reads to be excluded from the analysis.
+  --metagenome          Reads are from metagenomic samples
+  --numPolish NUMPOLISH
+                        Number of Pilon polish iterations. Default: 1
+  --reassemble          Do local re-assembly in PILON. Suggest to use this flag with long reads.
+  --onlySNP             Only modify substitutions during the PILON polish.
+  --noQuality           Do not estimate base qualities.
+  --onlyEval            Do not run assembly/mapping. Only evaluate assembly status.
+  --kraken              Run kmer based species predicton on contigs.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+## ortho  - pan-genome (and wgMLST scheme) prediction
+**EToKi ortho** has now been migrated to a [separate repository](https://github.com/zheminzhou/PEPPA) and renamed as **PEPPA**. 
+
+## MLSTdb - Set up exemplar alleles and database for MLST schemes
+**EToKi MLSTdb** converts existing allelic sequences into two files: (1) a multi-fasta file of exemplar allelic sequences and (2) a lookup table for the **EToKi MLSType** method. 
+* The exemplar alleles are defined as: 
+   1. Over 40% identity to the allelic sequences of a reference genome specified by **--refstrain**
+   2. Less than 90% identity between different exemplar sequences of the same locus
+   3. Identity to sequences of any different locus that is at least 10% less than the similarity to sequences of the same locus.
+~~~~~~~~~~~
+usage: EToKi.py MLSTdb [-h] -i ALLELEFASTA [-r REFSET] [-d DATABASE]
+                       [-s REFSTRAIN] [-x MAX_IDEN] [-m MIN_IDEN] [-p PARALOG]
+                       [-c COVERAGE] [-e]
+
+MLSTdb. Create reference sets of alleles for nomenclature.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -i ALLELEFASTA, --input ALLELEFASTA
+                        [REQUIRED] A single file contains all known alleles in
+                        a MLST scheme.
+  -r REFSET, --refset REFSET
+                        [DEFAULT: No ref allele] Output - Reference alleles
+                        used for MLSType.
+  -d DATABASE, --database DATABASE
+                        [DEFAULT: No allele DB] Output - A lookup table of all
+                        alleles.
+  -s REFSTRAIN, --refstrain REFSTRAIN
+                        [DEFAULT: None] A single file contains alleles from
+                        the reference genome.
+  -x MAX_IDEN, --max_iden MAX_IDEN
+                        [DEFAULT: 0.9 ] Maximum identities between resulting
+                        refAlleles.
+  -m MIN_IDEN, --min_iden MIN_IDEN
+                        [DEFAULT: 0.4 ] Minimum identities between refstrain
+                        and resulting refAlleles.
+  -p PARALOG, --paralog PARALOG
+                        [DEFAULT: 0.1 ] Minimum differences between difference
+                        loci.
+  -c COVERAGE, --coverage COVERAGE
+                        [DEFAULT: 0.7 ] Proportion of aligned regions between
+                        alleles.
+  -e, --relaxEnd        [DEFAULT: False ] Allow changed ends (for pubmlst).
+~~~~~~~~~~~
+
+## MLSType - MLST nomenclature using a local set of references
+**EToKi MLSType** identities allelic sequences in a queried genome, by comparing it with the exemplar alleles generated by **MLSTdb**. 
+ ~~~~~~~~~~
+usage: EToKi.py MLSType [-h] -i GENOME -r REFALLELE -k UNIQUE_KEY
+                        [-d DATABASE] [-o OUTPUT] [-q] [-f] [-m MIN_IDEN]
+                        [-p MIN_FRAG_PROP] [-l MIN_FRAG_LEN] [-x INTERGENIC]
+                        [--overlap_prop OVERLAP_PROP]
+                        [--overlap_iden OVERLAP_IDEN] [--max_dist MAX_DIST]
+                        [--diag_diff DIAG_DIFF] [--max_diff MAX_DIFF]
+
+MLSType. Find and designate MLST alleles from a queried assembly.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -i GENOME, --genome GENOME
+                        [REQUIRED] Input - filename for genomic assembly.
+  -r REFALLELE, --refAllele REFALLELE
+                        [REQUIRED] Input - fasta file for reference alleles.
+  -k UNIQUE_KEY, --unique_key UNIQUE_KEY
+                        [REQUIRED] An unique identifier for the assembly.
+  -d DATABASE, --database DATABASE
+                        [OPTIONAL] Input - lookup table of existing alleles.
+  -o OUTPUT, --output OUTPUT
+                        [DEFAULT: No output] Output - filename for the
+                        generated alleles. Specify to STDOUT for screen
+                        output.
+  -q, --query_only      [DEFAULT: False] Do not submit new allele, only query.
+  -f, --force           [DEFAULT: False] Force to accept low quality alleles.
+  -m MIN_IDEN, --min_iden MIN_IDEN
+                        [DEFAULT: 0.65 ] Minimum identities between refAllele
+                        and genome.
+  -p MIN_FRAG_PROP, --min_frag_prop MIN_FRAG_PROP
+                        [DEFAULT: 0.6 ] Minimum covereage of a fragment.
+  -l MIN_FRAG_LEN, --min_frag_len MIN_FRAG_LEN
+                        [DEFAULT: 50 ] Minimum length of a fragment.
+  -x INTERGENIC, --intergenic INTERGENIC
+                        [DEFAULT: -1,-1 ] Call alleles in intergenic region if
+                        the distance between two closely located loci fall
+                        within the range defined by the two numbers. Suggest
+                        to use 50,500. This is diabled by default with minus
+                        numbers.
+  --overlap_prop OVERLAP_PROP
+                        [DEFAULT: 0.5 ] Given two hits, if <overlap_prop> of
+                        their regions overlap, and the sequence identities of
+                        one hits is <overlap_iden> lower than the other. The
+                        hit with lower identities will be removed.
+  --overlap_iden OVERLAP_IDEN
+                        [DEFAULT: 0.05 ] Given two hits, if <overlap_prop> of
+                        their regions overlap, and the sequence identities of
+                        one hits is <overlap_iden> lower than the other. The
+                        hit with lower identities will be removed.
+  --max_dist MAX_DIST   [DEFAULT: 300 ] Consider two closely located hits as a
+                        synteny block if their coordinates in both queried
+                        genomes and reference gene are seperated by no more
+                        than <max_dist> bps.
+  --diag_diff DIAG_DIFF
+                        [DEFAULT: 1.2 ] Consider two closely located hits as a
+                        synteny block if, after merged, its covered region in
+                        the queried genome is no more than <diag_diff> folds
+                        of the region in the reference gene.
+  --max_diff MAX_DIFF   [DEFAULT: 200 ] Consider two closely located hits as a
+                        synteny block if, after merged, the lengths of its
+                        covered regions in the queried genome and the
+                        reference gene are differed by no more than <max_diff>
+                        bps.
+ ~~~~~~~~~~
+
+## align - align multiple queried genomes to a single reference
+~~~~~~~~~~~
+usage: EToKi.py align [-h] -r REFERENCE [-p PREFIX] [-a] [-m] [-l] [-c CORE]
+                      [-n N_PROC]
+                      queries [queries ...]
+
+Align multiple genomes onto a single reference.
+
+positional arguments:
+  queries               queried genomes. Use <Tag>:<Filename> format to feed
+                        in a tag for each genome. Otherwise filenames will be
+                        used as tags for genomes.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -r REFERENCE, --reference REFERENCE
+                        [REQUIRED; INPUT] reference genomes to be aligned
+                        against. Use <Tag>:<Filename> format to assign a tag
+                        to the reference.
+  -p PREFIX, --prefix PREFIX
+                        [OUTPUT] prefix for all outputs.
+  -a, --alignment       [OUTPUT] Generate core genomic alignments in FASTA
+                        format
+  -m, --matrix          [OUTPUT] Do not generate core SNP matrix
+  -l, --last            Activate to use LAST as aligner. [DEFAULT: minimap2]
+  -c CORE, --core CORE  [PARAM] percentage of presences for core genome.
+                        [DEFAULT: 0.95]
+  -n N_PROC, --n_proc N_PROC
+                        [PARAM] number of processes to use. [DEFAULT: 5]
+~~~~~~~~~~~
+
+## phylo - infer phylogeny and ancestral states from genomic alignments 
+~~~~~~~~~~~
+usage: EToKi.py phylo [-h] [--tasks TASKS] --prefix PREFIX
+                      [--alignment ALIGNMENT] [--snp SNP] [--tree TREE]
+                      [--ancestral ANCESTRAL] [--core CORE] [--n_proc N_PROC]
+
+EToKi phylo runs to:
+(1) Generate SNP matrix from alignment (-t matrix)
+(2) Calculate ML phylogeny from SNP matrix using RAxML (-t phylogeny)
+(3) Workout the nucleotide sequences of internal nodes in the tree using ML estimation (-t ancestral or -t ancestral_proportion for ratio frequencies)
+(4) Place mutations onto branches of the tree (-t mutation)
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --tasks TASKS, -t TASKS
+                        Tasks to call. Allowed tasks are:
+                        matrix: generate SNP matrix from alignment.
+                        phylogeny: generate phylogeny from SNP matrix.
+                        ancestral: generate AS (ancestral state) matrix from SNP matrix and phylogeny
+                        ancestral_proportion: generate possibilities of AS for each site
+                        mutation: assign SNPs into branches from AS matrix
+
+                        You can run multiple tasks by sending a comma delimited task list.
+                        There are also some pre-defined task combo:
+                        all: matrix,phylogeny,ancestral,mutation
+                        aln2phy: matrix,phylogeny [default]
+                        snp2anc: phylogeny,ancestral
+                        mat2mut: ancestral,mutation
+  --prefix PREFIX, -p PREFIX
+                        prefix for all outputs.
+  --alignment ALIGNMENT, -m ALIGNMENT
+                        aligned sequences in either fasta format or Xmfa format. Required for "matrix" task.
+  --snp SNP, -s SNP     SNP matrix in specified format. Required for "phylogeny" and "ancestral" if alignment is not given
+  --tree TREE, -z TREE  phylogenetic tree. Required for "ancestral" task
+  --ancestral ANCESTRAL, -a ANCESTRAL
+                        Inferred ancestral states in a specified format. Required for "mutation" task
+  --core CORE, -c CORE  Core genome proportion. Default: 0.95
+  --n_proc N_PROC, -n N_PROC
+                        Number of processes. Default: 7.
+~~~~~~~~~~~
+
+
+## EB*Eis* - *in silico* serotype prediction for *Escherichia coli* & *Shigella spp.*
+**EB*Eis*** is a BLASTn based prediction tool for the O and H antigens of *Escherichia coli* and *Shigella*. It uses essential genes (*wzx, wzy, wzt & wzm* for O; *fliC* for H) as markers. **EB*Eis*** uses a database built from two sources:
+1. [SeroTypeFinder ](https://bitbucket.org/genomicepidemiology/serotypefinder_db/src)
+2. O-antigen gene sequences reported in [DebRoy et al., PLoS ONE, 2016](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0147434#pone.0147434.ref011)
+~~~~~~~~~~~
+usage: EToKi.py EBEis [-h] -q QUERY [-t TAXON] [-p PREFIX]
+
+EnteroBase Escherichia in silico serotyping
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -q QUERY, --query QUERY
+                        file name for the queried assembly in multi-FASTA format.
+  -t TAXON, --taxon TAXON
+                        Taxon database to compare with. 
+                        Only support Escherichia (default) for the moment.
+  -p PREFIX, --prefix PREFIX
+                        prefix for intermediate files. Default: EBEis
+~~~~~~~~~~~
+
+## isCRISPOL - *in silico* prediction of CRISPOL array for *Salmonella enterica* serovar Typhimurium
+CRISPOL is an oligo based Typhimurium sub-typing method described in ([Fabre et al., PLoS ONE, 2012](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0036995)). We use the direct repeats (DRs) and spacers in the Typhimurium CPRISR array to predict CRISPOL types from genomic assemblies.
+~~~~~~~~~~~
+usage: EToKi.py isCRISPOL [-h] [N [N ...]]
+
+in silico Typhimurium subtyping using CRISPOL scheme (Fabre et al., PLoS ONE, 2012)
+
+positional arguments:
+  N           FASTA files containing assemblies of S. enterica Typhimurium.
+
+optional arguments:
+  -h, --help  show this help message and exit
+~~~~~~~~~~~
+
+## uberBlast - Use BLASTn, uBLASTp, minimap2 and/or mmseqs to identify similar sequences
+**EToKi uberBlast** is also internally called by **EToKi ortho** to align exemplar genes to queried genomes, using both BLASTn and uSearch-uBLASTp. Amino acid alignments are converted back to nucleotide sequences, meaning that genome coordinates remain consistent across different methods. 
+
+* minimap2 --- Fastest alignment in nucleotide level. High accuracy in identities >= 90%, but lose sensitivity quickly for lower identities. 
+* blastn --- Fast alignment in nucleotide level.  Lose sensitivity for identities < 80%
+* mmseqs --- Amino acid based alignment for identities >= 70% (open source)
+* uBLASTp --- Amino acid based alignment for identities < 50% (commercial software)
+~~~~~~~~~~~
+usage: EToKi.py uberBlast [-h] -r REFERENCE -q QUERY [-o OUTPUT] [--blastn]
+                          [--ublast] [--ublastSELF] [--minimap] [--minimapASM]
+                          [--mmseq] [--min_id MIN_ID] [--min_cov MIN_COV]
+                          [--min_ratio MIN_RATIO] [-s RE_SCORE] [-f]
+                          [--filter_cov FILTER_COV]
+                          [--filter_score FILTER_SCORE] [-m]
+                          [--merge_gap MERGE_GAP] [--merge_diff MERGE_DIFF]
+                          [-O] [--overlap_length OVERLAP_LENGTH]
+                          [--overlap_proportion OVERLAP_PROPORTION]
+                          [-e FIX_END] [-t N_THREAD] [-p]
+
+Five different alignment methods.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -r REFERENCE, --reference REFERENCE
+                        [INPUT; REQUIRED] filename for the reference. This is
+                        normally a genomic assembly.
+  -q QUERY, --query QUERY
+                        [INPUT; REQUIRED] filename for the query. This can be
+                        short-reads or genes or genomic assemblies.
+  -o OUTPUT, --output OUTPUT
+                        [OUTPUT; Default: None] save result to a file or to
+                        screen (stdout). Default do nothing.
+  --blastn              Run BLASTn. Slowest. Good for identities between [80,
+                        100]
+  --ublast              Run uBLAST in tBLASTn mode. Fast. Good for identities
+                        between [30-100]
+  --ublastSELF          Run uBLAST in tBLASTn mode. Fast. Good for identities
+                        between [30-100]
+  --minimap             Run minimap. Fast. Good for identities between
+                        [90-100]
+  --minimapASM          Run minimap on assemblies. Fast. Good for identities
+                        between [90-100]
+  --mmseq               Run mmseq2 in tBLASTn mode. Fast. Good for identities
+                        between [70-100]
+  --min_id MIN_ID       [DEFAULT: 0.3] Minimum identity before reScore for an
+                        alignment to be kept
+  --min_cov MIN_COV     [DEFAULT: 40] Minimum length for an alignment to be
+                        kept
+  --min_ratio MIN_RATIO
+                        [DEFAULT: 0.05] Minimum length for an alignment to be
+                        kept, proportional to the length of the query
+  -s RE_SCORE, --re_score RE_SCORE
+                        [DEFAULT: 0] Re-interpret alignment scores and
+                        identities. 0: No rescore; 1: Rescore with
+                        nucleotides; 2: Rescore with amino acid; 3: Rescore
+                        with codons
+  -f, --filter          [DEFAULT: False] Remove secondary alignments if they
+                        overlap with any other regions
+  --filter_cov FILTER_COV
+                        [DEFAULT: 0.9]
+  --filter_score FILTER_SCORE
+                        [DEFAULT: 0]
+  -m, --linear_merge    [DEFAULT: False] Merge consective alignments
+  --merge_gap MERGE_GAP
+                        [DEFAULT: 300]
+  --merge_diff MERGE_DIFF
+                        [DEFAULT: 1.2]
+  -O, --return_overlap  [DEFAULT: False] Report overlapped alignments
+  --overlap_length OVERLAP_LENGTH
+                        [DEFAULT: 300] Minimum overlap to report
+  --overlap_proportion OVERLAP_PROPORTION
+                        [DEFAULT: 0.6] Minimum overlap proportion to report
+  -e FIX_END, --fix_end FIX_END
+                        [FORMAT: L,R; DEFAULT: 0,0] Extend alignment to the
+                        edges if the un-aligned regions are <= [L,R]
+                        basepairs.
+  -t N_THREAD, --n_thread N_THREAD
+                        [DEFAULT: 8] Number of threads to use.
+  -p, --process         [DEFAULT: False] Use processes instead of threads.
+~~~~~~~~~~~
+
+## clust - linear-time clustering of short sequences using mmseqs linclust
+**EToKi clust** is called internally by **EToKi ortho** to cluster seed genes into gene clusters. Given its linear-time complexity, it can cluster millions of gene sequences in minutes. 
+~~~~~~~~~~~
+usage: EToKi.py clust [-h] -i INPUT -p PREFIX [-d IDENTITY] [-c COVERAGE]
+                      [-t N_THREAD]
+
+Get clusters and exemplars of clusters from gene sequences using mmseqs linclust.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -i INPUT, --input INPUT
+                        [INPUT; REQUIRED] name of the file containing gene sequneces in FASTA format.
+  -p PREFIX, --prefix PREFIX
+                        [OUTPUT; REQUIRED] prefix of the outputs.
+  -d IDENTITY, --identity IDENTITY
+                        [PARAM; DEFAULT: 0.9] minimum intra-cluster identity.
+  -c COVERAGE, --coverage COVERAGE
+                        [PARAM; DEFAULT: 0.9] minimum intra-cluster coverage.
+  -t N_THREAD, --n_thread N_THREAD
+                        [PARAM; DEFAULT: 8] number of threads to use.
+~~~~~~~~~~~

From 89dd197ace80de527f2f994af263e4d46a873105 Mon Sep 17 00:00:00 2001
From: Lee Katz <lskatz@gmail.com>
Date: Wed, 27 Apr 2022 13:05:55 -0400
Subject: [PATCH 06/24] Create test-etoki.yml

---
 .github/workflows/test-etoki.yml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 .github/workflows/test-etoki.yml

diff --git a/.github/workflows/test-etoki.yml b/.github/workflows/test-etoki.yml
new file mode 100644
index 000000000..32738591a
--- /dev/null
+++ b/.github/workflows/test-etoki.yml
@@ -0,0 +1,23 @@
+# This caller workflow builds an image to the "test" stage.
+# Instructions: replace all the <placeholder> stubs in this template with values for your image.
+# Some explanations come from: https://github.com/actions/starter-workflows/blob/main/automation/manual.yml
+
+name: Test etoki image
+
+# Controls when the action will run. Workflow runs when manually triggered using the UI or when you submit your pull request
+on:
+  workflow_dispatch:
+  pull_request:
+    paths:
+      - "etoki/1.2/Dockerfile"  # Dockerfile path, e.g. 'htslib/1.14/Dockerfile' so that only your image is tested
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+
+  # This job calls a workflow to build the image to the 'test' stage
+  build-to-test:
+    uses: ./.github/workflows/build-to-test.yml
+    with:
+      path_to_context: "./etoki/1.2"  # Path to directory with Dockerfile and context, e.g. "./spades/3.12.0"
+      dockerfile_name: "Dockerfile"
+      cache: "etoki"  # Use the program name as a nickname for a GitHub cache of your image's layers, e.g. "spades". The cache will speed up re-running the workflow.

From e5b18949afdd32548f143849222d0cddbc662fbc Mon Sep 17 00:00:00 2001
From: Lee Katz <lskatz@gmail.com>
Date: Wed, 27 Apr 2022 13:15:53 -0400
Subject: [PATCH 07/24] added back in FROM app as test

---
 etoki/1.2/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etoki/1.2/Dockerfile b/etoki/1.2/Dockerfile
index b61e165e6..59824378b 100644
--- a/etoki/1.2/Dockerfile
+++ b/etoki/1.2/Dockerfile
@@ -88,7 +88,7 @@ WORKDIR /data
 
 # A second FROM insruction creates a new stage
 # We use `test` for the test image
-#FROM app as test
+FROM app as test
 
 # Demonstrate that the program is successfully installed
 

From 74dd0de3996c7d81decd2199c476bb888bc8dbcd Mon Sep 17 00:00:00 2001
From: Lee Katz <lskatz@gmail.com>
Date: Thu, 28 Apr 2022 11:13:08 -0400
Subject: [PATCH 08/24] moving to multistage build

---
 etoki/1.2/Dockerfile | 108 ++++++++++++++++++++++++++++++-------------
 1 file changed, 76 insertions(+), 32 deletions(-)

diff --git a/etoki/1.2/Dockerfile b/etoki/1.2/Dockerfile
index 59824378b..886297c41 100644
--- a/etoki/1.2/Dockerfile
+++ b/etoki/1.2/Dockerfile
@@ -1,14 +1,12 @@
 # FROM defines the base docker image. This command has to come first in the file
 # The 'as' keyword lets you name the folowing stage. We use `app` for the production image
-#FROM ubuntu:focal as app
-# Copying the Freyja container a bit
-FROM mambaorg/micromamba:0.22.0 as app
+FROM ubuntu:focal as app
 
 # ARG sets environment variables during the build stage
-ARG SOFTWARENAME_VER="1.2"
+ARG ETOKI_VER="1.2"
+# Persistence with an env
+ENV ETOKI_VER_ENV=$ETOKI_VER
 
-# build and run as root users since micromamba image has 'mambauser' set as the $USER
-USER root
 # set workdir to default for building; set to /data at the end
 WORKDIR /
 
@@ -17,13 +15,23 @@ WORKDIR /
 LABEL base.image="ubuntu:focal"
 LABEL dockerfile.version="1"
 LABEL software="EToKi"
-LABEL software.version=$SOFTWARENAME_VER
+LABEL software.version=$ETOKI_VER
 LABEL description="All methods related to Enterobase data analysis pipelines"
 LABEL website="https://github.com/zheminzhou/EToKi"
 LABEL license="https://github.com/zheminzhou/EToKi/blob/master/LICENSE"
 LABEL maintainer="Lee Katz"
 LABEL maintainer.email="gzu2@cdc.gov"
 
+# Multistage build
+FROM staphb/shovill:1.1.0 AS shovill
+FROM staphb/kraken2:2.1.2-no-db AS kraken2
+FROM staphb/bowtie2:2.4.4 AS bowtie2
+FROM staphb/lyveset:1.1.4f AS lyveset
+FROM torognes/vsearch:2.21.1 as vsearch
+
+# Back to the base app so that we have things like ENV variables
+FROM app
+
 # https://askubuntu.com/a/1013396
 # avoid asking about timezone during apt-get
 ARG DEBIAN_FRONTEND=noninteractive
@@ -31,43 +39,68 @@ ARG DEBIAN_FRONTEND=noninteractive
 # RUN executes code during the build
 # Install dependencies via apt-get or yum if using a centos or fedora base
 RUN apt-get update && apt-get install -y --no-install-recommends \
+ libncurses5-dev \
+ libbz2-dev \
+ liblzma-dev \
+ perl \
+ libcurl4-gnutls-dev \
+ gcc \
+ g++ \
+ python-setuptools \
+ zlib1g-dev \
  python3-pip \
  python3-dev \
  libgconf-2-4 \
  curl \
  unzip \
- libcurl4-openssl-dev \
  build-essential \
  git \
  pigz \
- libcurl4-openssl-dev \
  libcurl4 \
  ant \
  libssl-dev \
  python3-venv \
  wget && \
- apt-get autoclean
+ apt-get autoclean && \
+ rm -rf /var/lib/apt/lists/*
 
  #openjdk-8-jdk \
+ #libcurl4-openssl-dev \
 
 # Gimme python3 instead of python2
 RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
 
-# Double check the python version
-RUN python --version
-
-# set the environment, put new conda env in PATH by default
-ENV PATH="/opt/conda/envs/etoki/bin:/opt/conda/envs/env/bin:${PATH}" \
- LC_ALL=C.UTF-8
-
-
-RUN micromamba create -n etoki python=3.8 -c conda-forge -c bioconda -c defaults ete3 numba numpy pandas scikit-learn psutil click scipy && \
- micromamba clean -a -y
-# might also have to install sklearn??
-
-# Now get us into that yummy yummy EToKi env
-ENV ENV_NAME="etoki"
-ARG MAMBA_DOCKERFILE_ACTIVATE=1
+COPY --from=shovill   /skesa/skesa         /usr/local/bin/
+COPY --from=shovill   /megahit             /megahit
+COPY --from=shovill   /pilon               /pilon
+COPY --from=shovill   /SPAdes-*-Linux      /spades
+COPY --from=kraken2   /kraken2-2.1.2       /kraken2
+COPY --from=kraken2   /kraken2-db          /kraken2-db
+COPY --from=bowtie2   /opt/bowtie2-*       /opt/bowtie2
+
+# Copied from flye container
+ENV FLYE_VER="2.9"
+RUN wget https://github.com/fenderglass/Flye/archive/${FLYE_VER}.tar.gz && \
+ tar -xvf ${FLYE_VER}.tar.gz && \
+ rm ${FLYE_VER}.tar.gz && \
+ cd Flye-${FLYE_VER} && \
+ python setup.py build && \
+ python setup.py install
+
+# Copied from samtools container
+ENV SAMTOOLSVER="1.15"
+RUN wget https://github.com/samtools/samtools/releases/download/${SAMTOOLSVER}/samtools-${SAMTOOLSVER}.tar.bz2 && \
+ tar -xjf samtools-${SAMTOOLSVER}.tar.bz2 && \
+ rm samtools-${SAMTOOLSVER}.tar.bz2 && \
+ cd samtools-${SAMTOOLSVER} && \
+ ./configure && \
+ make && \
+ make install
+
+# vsearch aims to be a drop in replacement for usearch and so let's see if that's true
+# ie, set `usearch` as path to vsearch
+COPY --from=vsearch  /usr/local/bin/vsearch /usr/local/bin/vsearch
+COPY --from=vsearch  /usr/local/bin/vsearch /usr/local/bin/usearch
 
 ## EToKi itself ##
 
@@ -76,11 +109,21 @@ ARG MAMBA_DOCKERFILE_ACTIVATE=1
 ENV PATH="/usr/local/bin/EToKi:$PATH" \
  LC_ALL=C
 
-RUN cd /usr/local/bin && git clone https://github.com/zheminzhou/EToKi.git -b ${SOFTWARENAME_VER}
+WORKDIR /usr/local/bin
+RUN git clone https://github.com/zheminzhou/EToKi.git -b ${ETOKI_VER_ENV}
+
+RUN pip3 install ete3 numba numpy==1.21 pandas scikit-learn psutil click scipy
 
-# Installs all 3rd party software except the kraken database and usearch
-RUN cd /usr/local/bin/EToKi && python EToKi.py configure --install
+# Install all 3rd party software except the kraken database
+WORKDIR /usr/local/bin/EToKi
+# Install 3rd party tools
+# Samtools has an issue for some reason and so explicitly give that path
+# Also give the path to usearch since it is normally proprietary (although we have vsearch)
+RUN python EToKi.py configure --install --usearch $(which usearch)
+RUN python EToKi.py configure --path samtools=$(which samtools)
+RUN python EToKi.py configure --path blast=$(which blastn)
 
+# Fix the shebang line for all EToKi scripts to /usr/bin/env python
 RUN find /usr/local/bin/EToKi -name '*.py' -exec sed -i.bak -e '1 i #!/usr/bin/env python\n# ^^^ inserted corrected shebang for this container' {} \;
 
 # WORKDIR sets working directory
@@ -88,14 +131,15 @@ WORKDIR /data
 
 # A second FROM insruction creates a new stage
 # We use `test` for the test image
-FROM app as test
+#FROM app as test
 
 # Demonstrate that the program is successfully installed
 
 # Option 1: run the program's internal tests, for example with SPAdes:
-RUN cd /usr/local/bin/EToKi && EToKi.py --help
-#RUN cd /usr/local/bin/EToKi && $(which python) $(which EToKi.py) --help
-RUN cd /usr/local/bin/EToKi && bash example.bash
+WORKDIR /usr/local/bin/EToKi
+RUN usearch && which usearch
+RUN EToKi.py --help
+RUN bash -e example.bash
 
 # Option 2: write your own tests in a bash script in the same directory as your Dockerfile:
 #COPY my_tests.sh .

From 0f5fc247d1cdb679f6936f78a93dfe792ade63f3 Mon Sep 17 00:00:00 2001
From: Lee Katz <lskatz@gmail.com>
Date: Thu, 28 Apr 2022 16:00:20 -0400
Subject: [PATCH 09/24] Update Dockerfile

---
 etoki/1.2/Dockerfile | 68 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 55 insertions(+), 13 deletions(-)

diff --git a/etoki/1.2/Dockerfile b/etoki/1.2/Dockerfile
index 886297c41..eecb1a788 100644
--- a/etoki/1.2/Dockerfile
+++ b/etoki/1.2/Dockerfile
@@ -27,7 +27,11 @@ FROM staphb/shovill:1.1.0 AS shovill
 FROM staphb/kraken2:2.1.2-no-db AS kraken2
 FROM staphb/bowtie2:2.4.4 AS bowtie2
 FROM staphb/lyveset:1.1.4f AS lyveset
-FROM torognes/vsearch:2.21.1 as vsearch
+FROM torognes/vsearch:2.21.1 AS vsearch
+FROM staphb/bbtools:38.96 AS bbtools
+FROM staphb/mlst:2.19.0 AS mlst
+FROM staphb/orthofinder:2.17 AS orthofinder
+FROM staphb/cfsan-snp-pipeline:2.0.2 AS cfsan
 
 # Back to the base app so that we have things like ENV variables
 FROM app
@@ -74,9 +78,26 @@ COPY --from=shovill   /skesa/skesa         /usr/local/bin/
 COPY --from=shovill   /megahit             /megahit
 COPY --from=shovill   /pilon               /pilon
 COPY --from=shovill   /SPAdes-*-Linux      /spades
+COPY --from=shovill   /megahit             /megahit
 COPY --from=kraken2   /kraken2-2.1.2       /kraken2
 COPY --from=kraken2   /kraken2-db          /kraken2-db
 COPY --from=bowtie2   /opt/bowtie2-*       /opt/bowtie2
+COPY --from=bbtools   /opt/bbmap           /opt/bbmap
+COPY --from=mlst      /ncbi-blast-2.9.0+   /ncbi-blast-2.9.0+
+COPY --from=lyveset   /lyve-SET            /lyve-SET
+COPY --from=orthofinder /mmseqs            /mmseqs
+COPY --from=cfsan     /gatk                /gatk
+# vsearch aims to be a drop in replacement for usearch and so let's see if that's true
+# ie, set `usearch` as path to vsearch
+#COPY --from=vsearch  /usr/local/bin/vsearch /usr/local/bin/vsearch
+#COPY --from=vsearch  /usr/local/bin/vsearch /usr/local/bin/usearch
+
+WORKDIR /
+RUN wget https://github.com/torognes/vsearch/releases/download/v2.21.1/vsearch-2.21.1-linux-x86_64.tar.gz && \
+ tar zxvf vsearch-2.21.1*.tar.gz && \
+ cp -v vsearch-2.21.1-linux-x86_64/bin/vsearch /usr/local/bin/ && \
+ ln -sv /usr/local/bin/vsearch /usr/local/bin/usearch && \
+ rm -rfv ./vsearch
 
 # Copied from flye container
 ENV FLYE_VER="2.9"
@@ -97,16 +118,17 @@ RUN wget https://github.com/samtools/samtools/releases/download/${SAMTOOLSVER}/s
  make && \
  make install
 
-# vsearch aims to be a drop in replacement for usearch and so let's see if that's true
-# ie, set `usearch` as path to vsearch
-COPY --from=vsearch  /usr/local/bin/vsearch /usr/local/bin/vsearch
-COPY --from=vsearch  /usr/local/bin/vsearch /usr/local/bin/usearch
+# Diamond
+ENV DIAMOND_VER="v2.0.15"
+RUN wget https://github.com/bbuchfink/diamond/releases/download/${DIAMOND_VER}/diamond-linux64.tar.gz && \
+ tar zxvf diamond-linux64.tar.gz && \
+ mv -v diamond /usr/local/bin/diamond && \
+ rm diamond-linux64.tar.gz
 
 ## EToKi itself ##
-
 # ENV instructions set environment variables that persist from the build into the resulting image
 # Use for e.g. $PATH and locale settings for compatibility with Singularity
-ENV PATH="/usr/local/bin/EToKi:$PATH" \
+ENV PATH="/usr/local/bin:/usr/local/bin/EToKi:/megahit:/pilon:/spades/bin:/kraken2:/opt/bowtie2:/opt/bbmap:/ncbi-blast-2.9.0+/bin:/megahit/megahit_v1.1.4_LINUX_CPUONLY_x86_64-bin:/lyve-SET:/lyve-SET/scripts:/mmseqs/bin:/mmseqs/util:$PATH" \
  LC_ALL=C
 
 WORKDIR /usr/local/bin
@@ -119,13 +141,34 @@ WORKDIR /usr/local/bin/EToKi
 # Install 3rd party tools
 # Samtools has an issue for some reason and so explicitly give that path
 # Also give the path to usearch since it is normally proprietary (although we have vsearch)
-RUN python EToKi.py configure --install --usearch $(which usearch)
-RUN python EToKi.py configure --path samtools=$(which samtools)
-RUN python EToKi.py configure --path blast=$(which blastn)
+RUN python EToKi.py configure --path bbduk=$(which bbduk.sh) || true
+RUN python EToKi.py configure --path bbmerge=$(which bbmerge.sh) || true
+RUN python EToKi.py configure --path repair=$(which repair.sh) || true
+RUN python EToKi.py configure --path pilon=$(find /pilon -name 'pilon-*.jar' |  head -n 1) || true
+RUN python EToKi.py configure --path flye=$(which flye) || true
+RUN python EToKi.py configure --path kraken2=$(find /kraken2 -type f -name kraken2 | head -n 1) || true
+RUN python EToKi.py configure --path bowtie2=$(which bowtie2) || true
+RUN python EToKi.py configure --path bowtie2build=$(which bowtie2-build) || true
+RUN python EToKi.py configure --path raxml=$(which raxml) || true
+RUN python EToKi.py configure --path raxml_ng=$(which raxml_ng) || true
+RUN python EToKi.py configure --path samtools=$(which samtools) || true
+RUN python EToKi.py configure --path blastn=$(which blastn) || true
+RUN python EToKi.py configure --path makeblastdb=$(which makeblastdb) || true
+RUN python EToKi.py configure --path diamond=$(which diamond) || true
+RUN python EToKi.py configure --path gatk=/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/GenomeAnalysisTK.jar || true
+#RUN python EToKi.py configure --path lastal=
+#RUN python EToKi.py configure --path lastdb=
+RUN python EToKi.py configure --path mmseqs=$(which mmseqs) || true
+RUN python EToKi.py configure --path megahit=$(which megahit) || true
+RUN python EToKi.py configure --path spades=$(which spades.py) || true
+#RUN python EToKi.py configure --path kraken_db=
+RUN python EToKi.py configure --usearch /usr/local/bin/usearch
+RUN python EToKi.py configure
 
 # Fix the shebang line for all EToKi scripts to /usr/bin/env python
 RUN find /usr/local/bin/EToKi -name '*.py' -exec sed -i.bak -e '1 i #!/usr/bin/env python\n# ^^^ inserted corrected shebang for this container' {} \;
 
+
 # WORKDIR sets working directory
 WORKDIR /data
 
@@ -137,9 +180,9 @@ WORKDIR /data
 
 # Option 1: run the program's internal tests, for example with SPAdes:
 WORKDIR /usr/local/bin/EToKi
-RUN usearch && which usearch
 RUN EToKi.py --help
-RUN bash -e example.bash
+#RUN bash -e example.bash
+RUN python EToKi.py MLSTdb -i examples/Escherichia.Achtman.alleles.fasta -r examples/Escherichia.Achtman.references.fasta -d examples/Escherichia.Achtman.convert.tab
 
 # Option 2: write your own tests in a bash script in the same directory as your Dockerfile:
 #COPY my_tests.sh .
@@ -150,4 +193,3 @@ RUN bash -e example.bash
 #RUN mkdir tests/
 #COPY tests/ tests/
 #RUN python3 -m unittest discover -s tests
-#

From 5b31182b66ce5fac11d5281b36dd2711cc3ff476 Mon Sep 17 00:00:00 2001
From: Lee Katz <lskatz@gmail.com>
Date: Thu, 28 Apr 2022 16:05:39 -0400
Subject: [PATCH 10/24] added back in test

---
 etoki/1.2/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etoki/1.2/Dockerfile b/etoki/1.2/Dockerfile
index eecb1a788..2f69498a6 100644
--- a/etoki/1.2/Dockerfile
+++ b/etoki/1.2/Dockerfile
@@ -174,7 +174,7 @@ WORKDIR /data
 
 # A second FROM insruction creates a new stage
 # We use `test` for the test image
-#FROM app as test
+FROM app as test
 
 # Demonstrate that the program is successfully installed
 

From b5184737f9f34540d9303a48104f5021f3c97847 Mon Sep 17 00:00:00 2001
From: Lee Katz - Aspen <gzu2@cdc.gov>
Date: Mon, 2 May 2022 16:28:41 -0400
Subject: [PATCH 11/24] updated multistage etoki

---
 etoki/1.2/Dockerfile | 232 ++++++++++++++++++-------------------------
 1 file changed, 95 insertions(+), 137 deletions(-)

diff --git a/etoki/1.2/Dockerfile b/etoki/1.2/Dockerfile
index 2f69498a6..5ff34146f 100644
--- a/etoki/1.2/Dockerfile
+++ b/etoki/1.2/Dockerfile
@@ -1,17 +1,37 @@
-# FROM defines the base docker image. This command has to come first in the file
-# The 'as' keyword lets you name the folowing stage. We use `app` for the production image
-FROM ubuntu:focal as app
+# FYI this is a multistage build, which can get very complicated
 
-# ARG sets environment variables during the build stage
 ARG ETOKI_VER="1.2"
-# Persistence with an env
-ENV ETOKI_VER_ENV=$ETOKI_VER
+ARG SKESA_VER=2.4.0
+ARG SPADES_VER=3.15.4
+ARG SHOVILL_VER=1.1.0
+ARG KRAKEN2_VER=2.1.2-no-db
+ARG BOWTIE_VER=2.4.4
+ARG LYVESET_VER=1.1.4f
+ARG VSEARCH_VER=2.21.1
+ARG BBTOOLS_VER=38.96
+ARG MLST_VER=2.19.0
+ARG ORTHOFINDER_VER=2.17
+ARG CFSAN_VER=2.0.2
+ARG FLYE_VER=2.9
+ARG SAMTOOLS_VER=1.15
+ARG DIAMOND_VER="v2.0.15"
+ARG RAXML_VER="8.2.12"
+
+FROM staphb/shovill:${SHOVILL_VER}          AS shovill
+FROM staphb/kraken2:${KRAKEN2_VER}          AS kraken2
+FROM staphb/bowtie2:${BOWTIE_VER}           AS bowtie2
+FROM staphb/lyveset:${LYVESET_VER}          AS lyveset
+FROM torognes/vsearch:${VSEARCH_VER}        AS vsearch
+FROM staphb/bbtools:${BBTOOLS_VER}          AS bbtools
+FROM staphb/mlst:${MLST_VER}                AS mlst
+FROM staphb/orthofinder:${ORTHOFINDER_VER}  AS orthofinder
+FROM staphb/cfsan-snp-pipeline:${CFSAN_VER} AS cfsan
+FROM staphb/flye:${FLYE_VER}                AS flye
+FROM staphb/samtools:${SAMTOOLS_VER}        AS samtools
+FROM staphb/raxml:${RAXML_VER}              AS raxml
+
+FROM ubuntu:jammy as app
 
-# set workdir to default for building; set to /data at the end
-WORKDIR /
-
-# LABEL instructions tag the image with metadata that might be important to the user
-# Optional, but highly recommended
 LABEL base.image="ubuntu:focal"
 LABEL dockerfile.version="1"
 LABEL software="EToKi"
@@ -22,26 +42,8 @@ LABEL license="https://github.com/zheminzhou/EToKi/blob/master/LICENSE"
 LABEL maintainer="Lee Katz"
 LABEL maintainer.email="gzu2@cdc.gov"
 
-# Multistage build
-FROM staphb/shovill:1.1.0 AS shovill
-FROM staphb/kraken2:2.1.2-no-db AS kraken2
-FROM staphb/bowtie2:2.4.4 AS bowtie2
-FROM staphb/lyveset:1.1.4f AS lyveset
-FROM torognes/vsearch:2.21.1 AS vsearch
-FROM staphb/bbtools:38.96 AS bbtools
-FROM staphb/mlst:2.19.0 AS mlst
-FROM staphb/orthofinder:2.17 AS orthofinder
-FROM staphb/cfsan-snp-pipeline:2.0.2 AS cfsan
-
-# Back to the base app so that we have things like ENV variables
-FROM app
-
-# https://askubuntu.com/a/1013396
-# avoid asking about timezone during apt-get
 ARG DEBIAN_FRONTEND=noninteractive
 
-# RUN executes code during the build
-# Install dependencies via apt-get or yum if using a centos or fedora base
 RUN apt-get update && apt-get install -y --no-install-recommends \
  libncurses5-dev \
  libbz2-dev \
@@ -52,6 +54,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  g++ \
  python-setuptools \
  zlib1g-dev \
+ python-is-python3 \
  python3-pip \
  python3-dev \
  libgconf-2-4 \
@@ -68,128 +71,83 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  apt-get autoclean && \
  rm -rf /var/lib/apt/lists/*
 
- #openjdk-8-jdk \
- #libcurl4-openssl-dev \
-
-# Gimme python3 instead of python2
-RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
-
-COPY --from=shovill   /skesa/skesa         /usr/local/bin/
-COPY --from=shovill   /megahit             /megahit
-COPY --from=shovill   /pilon               /pilon
-COPY --from=shovill   /SPAdes-*-Linux      /spades
-COPY --from=shovill   /megahit             /megahit
-COPY --from=kraken2   /kraken2-2.1.2       /kraken2
-COPY --from=kraken2   /kraken2-db          /kraken2-db
-COPY --from=bowtie2   /opt/bowtie2-*       /opt/bowtie2
-COPY --from=bbtools   /opt/bbmap           /opt/bbmap
-COPY --from=mlst      /ncbi-blast-2.9.0+   /ncbi-blast-2.9.0+
-COPY --from=lyveset   /lyve-SET            /lyve-SET
-COPY --from=orthofinder /mmseqs            /mmseqs
-COPY --from=cfsan     /gatk                /gatk
-# vsearch aims to be a drop in replacement for usearch and so let's see if that's true
-# ie, set `usearch` as path to vsearch
-#COPY --from=vsearch  /usr/local/bin/vsearch /usr/local/bin/vsearch
-#COPY --from=vsearch  /usr/local/bin/vsearch /usr/local/bin/usearch
-
-WORKDIR /
-RUN wget https://github.com/torognes/vsearch/releases/download/v2.21.1/vsearch-2.21.1-linux-x86_64.tar.gz && \
- tar zxvf vsearch-2.21.1*.tar.gz && \
- cp -v vsearch-2.21.1-linux-x86_64/bin/vsearch /usr/local/bin/ && \
- ln -sv /usr/local/bin/vsearch /usr/local/bin/usearch && \
- rm -rfv ./vsearch
-
-# Copied from flye container
-ENV FLYE_VER="2.9"
-RUN wget https://github.com/fenderglass/Flye/archive/${FLYE_VER}.tar.gz && \
- tar -xvf ${FLYE_VER}.tar.gz && \
- rm ${FLYE_VER}.tar.gz && \
- cd Flye-${FLYE_VER} && \
- python setup.py build && \
- python setup.py install
-
-# Copied from samtools container
-ENV SAMTOOLSVER="1.15"
-RUN wget https://github.com/samtools/samtools/releases/download/${SAMTOOLSVER}/samtools-${SAMTOOLSVER}.tar.bz2 && \
- tar -xjf samtools-${SAMTOOLSVER}.tar.bz2 && \
- rm samtools-${SAMTOOLSVER}.tar.bz2 && \
- cd samtools-${SAMTOOLSVER} && \
- ./configure && \
- make && \
- make install
+RUN pip3 install ete3 numba pandas scikit-learn psutil click scipy
+RUN pip3 install numpy==1.21.6
+
+COPY --from=shovill     /skesa/skesa            /usr/local/bin/
+COPY --from=shovill     /megahit                /megahit
+COPY --from=shovill     /pilon                  /pilon
+COPY --from=shovill     /SPAdes-*-Linux         /spades
+COPY --from=kraken2     /kraken2-2*             /kraken2
+COPY --from=kraken2     /kraken2-db             /kraken2-db
+COPY --from=bowtie2     /opt/bowtie2-*          /opt/bowtie2
+COPY --from=bbtools     /opt/bbmap              /opt/bbmap
+COPY --from=mlst        /ncbi-blast-2.9.0+      /ncbi-blast-2.9.0+
+COPY --from=lyveset     /lyve-SET               /lyve-SET
+COPY --from=orthofinder /mmseqs                 /mmseqs
+COPY --from=cfsan       /gatk                   /gatk
+COPY --from=flye        /Flye-*                 /flye
+COPY --from=samtools    /samtools-*             /samtools
+COPY --from=vsearch     /usr/local/bin/vsearch  /usr/local/bin/vsearch
+COPY --from=vsearch     /usr/local/bin/vsearch  /usr/local/bin/usearch
+COPY --from=raxml       /standard-RAxML-8.2.12  /standard-RAxML
+COPY --from=raxml       /raxml_ng               /raxml_ng
+
+ARG DIAMOND_VER
+ARG ETOKI_VER
 
 # Diamond
-ENV DIAMOND_VER="v2.0.15"
 RUN wget https://github.com/bbuchfink/diamond/releases/download/${DIAMOND_VER}/diamond-linux64.tar.gz && \
  tar zxvf diamond-linux64.tar.gz && \
  mv -v diamond /usr/local/bin/diamond && \
+ chmod +x /usr/local/bin/diamond && \
  rm diamond-linux64.tar.gz
 
 ## EToKi itself ##
-# ENV instructions set environment variables that persist from the build into the resulting image
-# Use for e.g. $PATH and locale settings for compatibility with Singularity
-ENV PATH="/usr/local/bin:/usr/local/bin/EToKi:/megahit:/pilon:/spades/bin:/kraken2:/opt/bowtie2:/opt/bbmap:/ncbi-blast-2.9.0+/bin:/megahit/megahit_v1.1.4_LINUX_CPUONLY_x86_64-bin:/lyve-SET:/lyve-SET/scripts:/mmseqs/bin:/mmseqs/util:$PATH" \
- LC_ALL=C
-
-WORKDIR /usr/local/bin
-RUN git clone https://github.com/zheminzhou/EToKi.git -b ${ETOKI_VER_ENV}
-
-RUN pip3 install ete3 numba numpy==1.21 pandas scikit-learn psutil click scipy
+RUN wget https://github.com/zheminzhou/EToKi/archive/refs/tags/${ETOKI_VER}.tar.gz && \
+  ls / && \
+  tar zxvf ${ETOKI_VER}.tar.gz && \
+  rm ${ETOKI_VER}.tar.gz
+#RUN git clone https://github.com/zheminzhou/EToKi.git -b ${ETOKI_VER} /EToKi-${ETOKI_VER}
+
+WORKDIR /EToKi-${ETOKI_VER}
+
+ENV PATH="/usr/local/bin:/EToKi-${ETOKI_VER}:/flye/bin:/megahit:/pilon:/spades/bin:/kraken2:/opt/bowtie2:/opt/bbmap:/ncbi-blast-2.9.0+/bin:/megahit/megahit_v1.1.4_LINUX_CPUONLY_x86_64-bin:/raxml_ng:/standard-RAxML:/lyve-SET:/lyve-SET/scripts:/mmseqs/bin:/mmseqs/util:$PATH" \
+   LC_ALL=C
+
+RUN EToKi.py configure --help
+
+RUN EToKi.py configure --path bbduk=/opt/bbmap/bbduk.sh 
+RUN EToKi.py configure --path bbmerge=$(which bbmerge.sh) 
+RUN EToKi.py configure --path bbduk=/opt/bbmap/bbduk.sh 
+RUN EToKi.py configure --path bbmerge=$(which bbmerge.sh) 
+RUN EToKi.py configure --path repair=$(which repair.sh) 
+RUN EToKi.py configure --path pilon=$(find /pilon -name 'pilon-*.jar' |  head -n 1) 
+RUN EToKi.py configure --path flye=$(which flye) 
+RUN EToKi.py configure --path kraken2=$(find /kraken2 -type f -name kraken2 | head -n 1) 
+RUN EToKi.py configure --path bowtie2=$(which bowtie2) 
+RUN EToKi.py configure --path bowtie2build=$(which bowtie2-build) 
+RUN EToKi.py configure --path raxml=$(which raxmlHPC) 
+RUN EToKi.py configure --path raxml_ng=$(which raxml-ng) 
+RUN EToKi.py configure --path samtools=$(which samtools) 
+RUN EToKi.py configure --path blastn=$(which blastn) 
+RUN EToKi.py configure --path makeblastdb=$(which makeblastdb) 
+RUN EToKi.py configure --path diamond=$(which diamond) 
+RUN EToKi.py configure --path gatk=/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/GenomeAnalysisTK.jar 
+RUN EToKi.py configure --path mmseqs=$(which mmseqs) 
+RUN EToKi.py configure --path megahit=$(which megahit) 
+RUN EToKi.py configure --path spades=$(which spades.py) 
+RUN EToKi.py configure --usearch /usr/local/bin/usearch
 
-# Install all 3rd party software except the kraken database
-WORKDIR /usr/local/bin/EToKi
-# Install 3rd party tools
-# Samtools has an issue for some reason and so explicitly give that path
-# Also give the path to usearch since it is normally proprietary (although we have vsearch)
-RUN python EToKi.py configure --path bbduk=$(which bbduk.sh) || true
-RUN python EToKi.py configure --path bbmerge=$(which bbmerge.sh) || true
-RUN python EToKi.py configure --path repair=$(which repair.sh) || true
-RUN python EToKi.py configure --path pilon=$(find /pilon -name 'pilon-*.jar' |  head -n 1) || true
-RUN python EToKi.py configure --path flye=$(which flye) || true
-RUN python EToKi.py configure --path kraken2=$(find /kraken2 -type f -name kraken2 | head -n 1) || true
-RUN python EToKi.py configure --path bowtie2=$(which bowtie2) || true
-RUN python EToKi.py configure --path bowtie2build=$(which bowtie2-build) || true
-RUN python EToKi.py configure --path raxml=$(which raxml) || true
-RUN python EToKi.py configure --path raxml_ng=$(which raxml_ng) || true
-RUN python EToKi.py configure --path samtools=$(which samtools) || true
-RUN python EToKi.py configure --path blastn=$(which blastn) || true
-RUN python EToKi.py configure --path makeblastdb=$(which makeblastdb) || true
-RUN python EToKi.py configure --path diamond=$(which diamond) || true
-RUN python EToKi.py configure --path gatk=/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/GenomeAnalysisTK.jar || true
-#RUN python EToKi.py configure --path lastal=
-#RUN python EToKi.py configure --path lastdb=
-RUN python EToKi.py configure --path mmseqs=$(which mmseqs) || true
-RUN python EToKi.py configure --path megahit=$(which megahit) || true
-RUN python EToKi.py configure --path spades=$(which spades.py) || true
-#RUN python EToKi.py configure --path kraken_db=
-RUN python EToKi.py configure --usearch /usr/local/bin/usearch
-RUN python EToKi.py configure
-
-# Fix the shebang line for all EToKi scripts to /usr/bin/env python
-RUN find /usr/local/bin/EToKi -name '*.py' -exec sed -i.bak -e '1 i #!/usr/bin/env python\n# ^^^ inserted corrected shebang for this container' {} \;
-
-
-# WORKDIR sets working directory
 WORKDIR /data
 
-# A second FROM insruction creates a new stage
-# We use `test` for the test image
 FROM app as test
 
-# Demonstrate that the program is successfully installed
-
-# Option 1: run the program's internal tests, for example with SPAdes:
 WORKDIR /usr/local/bin/EToKi
+# Get the help menu up
 RUN EToKi.py --help
+# Show the configuration works
+RUN EToKi.py configure  
 #RUN bash -e example.bash
-RUN python EToKi.py MLSTdb -i examples/Escherichia.Achtman.alleles.fasta -r examples/Escherichia.Achtman.references.fasta -d examples/Escherichia.Achtman.convert.tab
-
-# Option 2: write your own tests in a bash script in the same directory as your Dockerfile:
-#COPY my_tests.sh .
-#RUN bash my_tests.sh
+RUN EToKi.py MLSTdb -i examples/Escherichia.Achtman.alleles.fasta -r examples/Escherichia.Achtman.references.fasta -d examples/Escherichia.Achtman.convert.tab
 
-# Option 3: write python unit tests in a tests/ directory in the same directory as your Dockerfile:
-#RUN apt-get install -y python3
-#RUN mkdir tests/
-#COPY tests/ tests/
-#RUN python3 -m unittest discover -s tests

From f582fe83faecc569f452784d4af9396708dd116b Mon Sep 17 00:00:00 2001
From: Lee Katz <lskatz@gmail.com>
Date: Mon, 2 May 2022 16:31:00 -0400
Subject: [PATCH 12/24] with ideas from Erin

---
 etoki/1.2/Dockerfile | 232 ++++++++++++++++++-------------------------
 1 file changed, 95 insertions(+), 137 deletions(-)

diff --git a/etoki/1.2/Dockerfile b/etoki/1.2/Dockerfile
index 2f69498a6..5ff34146f 100644
--- a/etoki/1.2/Dockerfile
+++ b/etoki/1.2/Dockerfile
@@ -1,17 +1,37 @@
-# FROM defines the base docker image. This command has to come first in the file
-# The 'as' keyword lets you name the folowing stage. We use `app` for the production image
-FROM ubuntu:focal as app
+# FYI this is a multistage build, which can get very complicated
 
-# ARG sets environment variables during the build stage
 ARG ETOKI_VER="1.2"
-# Persistence with an env
-ENV ETOKI_VER_ENV=$ETOKI_VER
+ARG SKESA_VER=2.4.0
+ARG SPADES_VER=3.15.4
+ARG SHOVILL_VER=1.1.0
+ARG KRAKEN2_VER=2.1.2-no-db
+ARG BOWTIE_VER=2.4.4
+ARG LYVESET_VER=1.1.4f
+ARG VSEARCH_VER=2.21.1
+ARG BBTOOLS_VER=38.96
+ARG MLST_VER=2.19.0
+ARG ORTHOFINDER_VER=2.17
+ARG CFSAN_VER=2.0.2
+ARG FLYE_VER=2.9
+ARG SAMTOOLS_VER=1.15
+ARG DIAMOND_VER="v2.0.15"
+ARG RAXML_VER="8.2.12"
+
+FROM staphb/shovill:${SHOVILL_VER}          AS shovill
+FROM staphb/kraken2:${KRAKEN2_VER}          AS kraken2
+FROM staphb/bowtie2:${BOWTIE_VER}           AS bowtie2
+FROM staphb/lyveset:${LYVESET_VER}          AS lyveset
+FROM torognes/vsearch:${VSEARCH_VER}        AS vsearch
+FROM staphb/bbtools:${BBTOOLS_VER}          AS bbtools
+FROM staphb/mlst:${MLST_VER}                AS mlst
+FROM staphb/orthofinder:${ORTHOFINDER_VER}  AS orthofinder
+FROM staphb/cfsan-snp-pipeline:${CFSAN_VER} AS cfsan
+FROM staphb/flye:${FLYE_VER}                AS flye
+FROM staphb/samtools:${SAMTOOLS_VER}        AS samtools
+FROM staphb/raxml:${RAXML_VER}              AS raxml
+
+FROM ubuntu:jammy as app
 
-# set workdir to default for building; set to /data at the end
-WORKDIR /
-
-# LABEL instructions tag the image with metadata that might be important to the user
-# Optional, but highly recommended
 LABEL base.image="ubuntu:focal"
 LABEL dockerfile.version="1"
 LABEL software="EToKi"
@@ -22,26 +42,8 @@ LABEL license="https://github.com/zheminzhou/EToKi/blob/master/LICENSE"
 LABEL maintainer="Lee Katz"
 LABEL maintainer.email="gzu2@cdc.gov"
 
-# Multistage build
-FROM staphb/shovill:1.1.0 AS shovill
-FROM staphb/kraken2:2.1.2-no-db AS kraken2
-FROM staphb/bowtie2:2.4.4 AS bowtie2
-FROM staphb/lyveset:1.1.4f AS lyveset
-FROM torognes/vsearch:2.21.1 AS vsearch
-FROM staphb/bbtools:38.96 AS bbtools
-FROM staphb/mlst:2.19.0 AS mlst
-FROM staphb/orthofinder:2.17 AS orthofinder
-FROM staphb/cfsan-snp-pipeline:2.0.2 AS cfsan
-
-# Back to the base app so that we have things like ENV variables
-FROM app
-
-# https://askubuntu.com/a/1013396
-# avoid asking about timezone during apt-get
 ARG DEBIAN_FRONTEND=noninteractive
 
-# RUN executes code during the build
-# Install dependencies via apt-get or yum if using a centos or fedora base
 RUN apt-get update && apt-get install -y --no-install-recommends \
  libncurses5-dev \
  libbz2-dev \
@@ -52,6 +54,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  g++ \
  python-setuptools \
  zlib1g-dev \
+ python-is-python3 \
  python3-pip \
  python3-dev \
  libgconf-2-4 \
@@ -68,128 +71,83 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  apt-get autoclean && \
  rm -rf /var/lib/apt/lists/*
 
- #openjdk-8-jdk \
- #libcurl4-openssl-dev \
-
-# Gimme python3 instead of python2
-RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
-
-COPY --from=shovill   /skesa/skesa         /usr/local/bin/
-COPY --from=shovill   /megahit             /megahit
-COPY --from=shovill   /pilon               /pilon
-COPY --from=shovill   /SPAdes-*-Linux      /spades
-COPY --from=shovill   /megahit             /megahit
-COPY --from=kraken2   /kraken2-2.1.2       /kraken2
-COPY --from=kraken2   /kraken2-db          /kraken2-db
-COPY --from=bowtie2   /opt/bowtie2-*       /opt/bowtie2
-COPY --from=bbtools   /opt/bbmap           /opt/bbmap
-COPY --from=mlst      /ncbi-blast-2.9.0+   /ncbi-blast-2.9.0+
-COPY --from=lyveset   /lyve-SET            /lyve-SET
-COPY --from=orthofinder /mmseqs            /mmseqs
-COPY --from=cfsan     /gatk                /gatk
-# vsearch aims to be a drop in replacement for usearch and so let's see if that's true
-# ie, set `usearch` as path to vsearch
-#COPY --from=vsearch  /usr/local/bin/vsearch /usr/local/bin/vsearch
-#COPY --from=vsearch  /usr/local/bin/vsearch /usr/local/bin/usearch
-
-WORKDIR /
-RUN wget https://github.com/torognes/vsearch/releases/download/v2.21.1/vsearch-2.21.1-linux-x86_64.tar.gz && \
- tar zxvf vsearch-2.21.1*.tar.gz && \
- cp -v vsearch-2.21.1-linux-x86_64/bin/vsearch /usr/local/bin/ && \
- ln -sv /usr/local/bin/vsearch /usr/local/bin/usearch && \
- rm -rfv ./vsearch
-
-# Copied from flye container
-ENV FLYE_VER="2.9"
-RUN wget https://github.com/fenderglass/Flye/archive/${FLYE_VER}.tar.gz && \
- tar -xvf ${FLYE_VER}.tar.gz && \
- rm ${FLYE_VER}.tar.gz && \
- cd Flye-${FLYE_VER} && \
- python setup.py build && \
- python setup.py install
-
-# Copied from samtools container
-ENV SAMTOOLSVER="1.15"
-RUN wget https://github.com/samtools/samtools/releases/download/${SAMTOOLSVER}/samtools-${SAMTOOLSVER}.tar.bz2 && \
- tar -xjf samtools-${SAMTOOLSVER}.tar.bz2 && \
- rm samtools-${SAMTOOLSVER}.tar.bz2 && \
- cd samtools-${SAMTOOLSVER} && \
- ./configure && \
- make && \
- make install
+RUN pip3 install ete3 numba pandas scikit-learn psutil click scipy
+RUN pip3 install numpy==1.21.6
+
+COPY --from=shovill     /skesa/skesa            /usr/local/bin/
+COPY --from=shovill     /megahit                /megahit
+COPY --from=shovill     /pilon                  /pilon
+COPY --from=shovill     /SPAdes-*-Linux         /spades
+COPY --from=kraken2     /kraken2-2*             /kraken2
+COPY --from=kraken2     /kraken2-db             /kraken2-db
+COPY --from=bowtie2     /opt/bowtie2-*          /opt/bowtie2
+COPY --from=bbtools     /opt/bbmap              /opt/bbmap
+COPY --from=mlst        /ncbi-blast-2.9.0+      /ncbi-blast-2.9.0+
+COPY --from=lyveset     /lyve-SET               /lyve-SET
+COPY --from=orthofinder /mmseqs                 /mmseqs
+COPY --from=cfsan       /gatk                   /gatk
+COPY --from=flye        /Flye-*                 /flye
+COPY --from=samtools    /samtools-*             /samtools
+COPY --from=vsearch     /usr/local/bin/vsearch  /usr/local/bin/vsearch
+COPY --from=vsearch     /usr/local/bin/vsearch  /usr/local/bin/usearch
+COPY --from=raxml       /standard-RAxML-8.2.12  /standard-RAxML
+COPY --from=raxml       /raxml_ng               /raxml_ng
+
+ARG DIAMOND_VER
+ARG ETOKI_VER
 
 # Diamond
-ENV DIAMOND_VER="v2.0.15"
 RUN wget https://github.com/bbuchfink/diamond/releases/download/${DIAMOND_VER}/diamond-linux64.tar.gz && \
  tar zxvf diamond-linux64.tar.gz && \
  mv -v diamond /usr/local/bin/diamond && \
+ chmod +x /usr/local/bin/diamond && \
  rm diamond-linux64.tar.gz
 
 ## EToKi itself ##
-# ENV instructions set environment variables that persist from the build into the resulting image
-# Use for e.g. $PATH and locale settings for compatibility with Singularity
-ENV PATH="/usr/local/bin:/usr/local/bin/EToKi:/megahit:/pilon:/spades/bin:/kraken2:/opt/bowtie2:/opt/bbmap:/ncbi-blast-2.9.0+/bin:/megahit/megahit_v1.1.4_LINUX_CPUONLY_x86_64-bin:/lyve-SET:/lyve-SET/scripts:/mmseqs/bin:/mmseqs/util:$PATH" \
- LC_ALL=C
-
-WORKDIR /usr/local/bin
-RUN git clone https://github.com/zheminzhou/EToKi.git -b ${ETOKI_VER_ENV}
-
-RUN pip3 install ete3 numba numpy==1.21 pandas scikit-learn psutil click scipy
+RUN wget https://github.com/zheminzhou/EToKi/archive/refs/tags/${ETOKI_VER}.tar.gz && \
+  ls / && \
+  tar zxvf ${ETOKI_VER}.tar.gz && \
+  rm ${ETOKI_VER}.tar.gz
+#RUN git clone https://github.com/zheminzhou/EToKi.git -b ${ETOKI_VER} /EToKi-${ETOKI_VER}
+
+WORKDIR /EToKi-${ETOKI_VER}
+
+ENV PATH="/usr/local/bin:/EToKi-${ETOKI_VER}:/flye/bin:/megahit:/pilon:/spades/bin:/kraken2:/opt/bowtie2:/opt/bbmap:/ncbi-blast-2.9.0+/bin:/megahit/megahit_v1.1.4_LINUX_CPUONLY_x86_64-bin:/raxml_ng:/standard-RAxML:/lyve-SET:/lyve-SET/scripts:/mmseqs/bin:/mmseqs/util:$PATH" \
+   LC_ALL=C
+
+RUN EToKi.py configure --help
+
+RUN EToKi.py configure --path bbduk=/opt/bbmap/bbduk.sh 
+RUN EToKi.py configure --path bbmerge=$(which bbmerge.sh) 
+RUN EToKi.py configure --path bbduk=/opt/bbmap/bbduk.sh 
+RUN EToKi.py configure --path bbmerge=$(which bbmerge.sh) 
+RUN EToKi.py configure --path repair=$(which repair.sh) 
+RUN EToKi.py configure --path pilon=$(find /pilon -name 'pilon-*.jar' |  head -n 1) 
+RUN EToKi.py configure --path flye=$(which flye) 
+RUN EToKi.py configure --path kraken2=$(find /kraken2 -type f -name kraken2 | head -n 1) 
+RUN EToKi.py configure --path bowtie2=$(which bowtie2) 
+RUN EToKi.py configure --path bowtie2build=$(which bowtie2-build) 
+RUN EToKi.py configure --path raxml=$(which raxmlHPC) 
+RUN EToKi.py configure --path raxml_ng=$(which raxml-ng) 
+RUN EToKi.py configure --path samtools=$(which samtools) 
+RUN EToKi.py configure --path blastn=$(which blastn) 
+RUN EToKi.py configure --path makeblastdb=$(which makeblastdb) 
+RUN EToKi.py configure --path diamond=$(which diamond) 
+RUN EToKi.py configure --path gatk=/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/GenomeAnalysisTK.jar 
+RUN EToKi.py configure --path mmseqs=$(which mmseqs) 
+RUN EToKi.py configure --path megahit=$(which megahit) 
+RUN EToKi.py configure --path spades=$(which spades.py) 
+RUN EToKi.py configure --usearch /usr/local/bin/usearch
 
-# Install all 3rd party software except the kraken database
-WORKDIR /usr/local/bin/EToKi
-# Install 3rd party tools
-# Samtools has an issue for some reason and so explicitly give that path
-# Also give the path to usearch since it is normally proprietary (although we have vsearch)
-RUN python EToKi.py configure --path bbduk=$(which bbduk.sh) || true
-RUN python EToKi.py configure --path bbmerge=$(which bbmerge.sh) || true
-RUN python EToKi.py configure --path repair=$(which repair.sh) || true
-RUN python EToKi.py configure --path pilon=$(find /pilon -name 'pilon-*.jar' |  head -n 1) || true
-RUN python EToKi.py configure --path flye=$(which flye) || true
-RUN python EToKi.py configure --path kraken2=$(find /kraken2 -type f -name kraken2 | head -n 1) || true
-RUN python EToKi.py configure --path bowtie2=$(which bowtie2) || true
-RUN python EToKi.py configure --path bowtie2build=$(which bowtie2-build) || true
-RUN python EToKi.py configure --path raxml=$(which raxml) || true
-RUN python EToKi.py configure --path raxml_ng=$(which raxml_ng) || true
-RUN python EToKi.py configure --path samtools=$(which samtools) || true
-RUN python EToKi.py configure --path blastn=$(which blastn) || true
-RUN python EToKi.py configure --path makeblastdb=$(which makeblastdb) || true
-RUN python EToKi.py configure --path diamond=$(which diamond) || true
-RUN python EToKi.py configure --path gatk=/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/GenomeAnalysisTK.jar || true
-#RUN python EToKi.py configure --path lastal=
-#RUN python EToKi.py configure --path lastdb=
-RUN python EToKi.py configure --path mmseqs=$(which mmseqs) || true
-RUN python EToKi.py configure --path megahit=$(which megahit) || true
-RUN python EToKi.py configure --path spades=$(which spades.py) || true
-#RUN python EToKi.py configure --path kraken_db=
-RUN python EToKi.py configure --usearch /usr/local/bin/usearch
-RUN python EToKi.py configure
-
-# Fix the shebang line for all EToKi scripts to /usr/bin/env python
-RUN find /usr/local/bin/EToKi -name '*.py' -exec sed -i.bak -e '1 i #!/usr/bin/env python\n# ^^^ inserted corrected shebang for this container' {} \;
-
-
-# WORKDIR sets working directory
 WORKDIR /data
 
-# A second FROM insruction creates a new stage
-# We use `test` for the test image
 FROM app as test
 
-# Demonstrate that the program is successfully installed
-
-# Option 1: run the program's internal tests, for example with SPAdes:
 WORKDIR /usr/local/bin/EToKi
+# Get the help menu up
 RUN EToKi.py --help
+# Show the configuration works
+RUN EToKi.py configure  
 #RUN bash -e example.bash
-RUN python EToKi.py MLSTdb -i examples/Escherichia.Achtman.alleles.fasta -r examples/Escherichia.Achtman.references.fasta -d examples/Escherichia.Achtman.convert.tab
-
-# Option 2: write your own tests in a bash script in the same directory as your Dockerfile:
-#COPY my_tests.sh .
-#RUN bash my_tests.sh
+RUN EToKi.py MLSTdb -i examples/Escherichia.Achtman.alleles.fasta -r examples/Escherichia.Achtman.references.fasta -d examples/Escherichia.Achtman.convert.tab
 
-# Option 3: write python unit tests in a tests/ directory in the same directory as your Dockerfile:
-#RUN apt-get install -y python3
-#RUN mkdir tests/
-#COPY tests/ tests/
-#RUN python3 -m unittest discover -s tests

From 0f18dc29e712920caec3b249319ee479b802c5ce Mon Sep 17 00:00:00 2001
From: Lee Katz <katzle@lab.local@lvslinhp118.lab.local>
Date: Tue, 3 May 2022 16:16:01 -0400
Subject: [PATCH 13/24] etoki finally works

---
 etoki/1.2/Dockerfile | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/etoki/1.2/Dockerfile b/etoki/1.2/Dockerfile
index 5ff34146f..b5d8f9ed6 100644
--- a/etoki/1.2/Dockerfile
+++ b/etoki/1.2/Dockerfile
@@ -143,11 +143,13 @@ WORKDIR /data
 
 FROM app as test
 
-WORKDIR /usr/local/bin/EToKi
+ARG ETOKI_VER
+
+WORKDIR /EToKi-${ETOKI_VER}
 # Get the help menu up
 RUN EToKi.py --help
 # Show the configuration works
 RUN EToKi.py configure  
-#RUN bash -e example.bash
-RUN EToKi.py MLSTdb -i examples/Escherichia.Achtman.alleles.fasta -r examples/Escherichia.Achtman.references.fasta -d examples/Escherichia.Achtman.convert.tab
+RUN bash example.bash
+#RUN EToKi.py MLSTdb -i examples/Escherichia.Achtman.alleles.fasta -r examples/Escherichia.Achtman.references.fasta -d examples/Escherichia.Achtman.convert.tab
 

From ac94572a4855957a51f403e9c71284eb41de6b2f Mon Sep 17 00:00:00 2001
From: Lee Katz <katzle@lab.local@lvslinhp118.lab.local>
Date: Wed, 4 May 2022 13:19:43 -0400
Subject: [PATCH 14/24] etoki test works

---
 etoki/1.2/Dockerfile | 59 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 53 insertions(+), 6 deletions(-)

diff --git a/etoki/1.2/Dockerfile b/etoki/1.2/Dockerfile
index b5d8f9ed6..6b10fd1e8 100644
--- a/etoki/1.2/Dockerfile
+++ b/etoki/1.2/Dockerfile
@@ -21,7 +21,6 @@ FROM staphb/shovill:${SHOVILL_VER}          AS shovill
 FROM staphb/kraken2:${KRAKEN2_VER}          AS kraken2
 FROM staphb/bowtie2:${BOWTIE_VER}           AS bowtie2
 FROM staphb/lyveset:${LYVESET_VER}          AS lyveset
-FROM torognes/vsearch:${VSEARCH_VER}        AS vsearch
 FROM staphb/bbtools:${BBTOOLS_VER}          AS bbtools
 FROM staphb/mlst:${MLST_VER}                AS mlst
 FROM staphb/orthofinder:${ORTHOFINDER_VER}  AS orthofinder
@@ -54,6 +53,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  g++ \
  python-setuptools \
  zlib1g-dev \
+ libbz2-dev \
  python-is-python3 \
  python3-pip \
  python3-dev \
@@ -67,12 +67,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  ant \
  libssl-dev \
  python3-venv \
+ autoconf \
+ automake \
+ make \
  wget && \
  apt-get autoclean && \
  rm -rf /var/lib/apt/lists/*
 
-RUN pip3 install ete3 numba pandas scikit-learn psutil click scipy
-RUN pip3 install numpy==1.21.6
+RUN pip3 install ete3 numba pandas scikit-learn psutil click scipy numpy==1.21.6
 
 COPY --from=shovill     /skesa/skesa            /usr/local/bin/
 COPY --from=shovill     /megahit                /megahit
@@ -88,13 +90,12 @@ COPY --from=orthofinder /mmseqs                 /mmseqs
 COPY --from=cfsan       /gatk                   /gatk
 COPY --from=flye        /Flye-*                 /flye
 COPY --from=samtools    /samtools-*             /samtools
-COPY --from=vsearch     /usr/local/bin/vsearch  /usr/local/bin/vsearch
-COPY --from=vsearch     /usr/local/bin/vsearch  /usr/local/bin/usearch
 COPY --from=raxml       /standard-RAxML-8.2.12  /standard-RAxML
 COPY --from=raxml       /raxml_ng               /raxml_ng
 
 ARG DIAMOND_VER
 ARG ETOKI_VER
+ARG VSEARCH_VER
 
 # Diamond
 RUN wget https://github.com/bbuchfink/diamond/releases/download/${DIAMOND_VER}/diamond-linux64.tar.gz && \
@@ -103,6 +104,21 @@ RUN wget https://github.com/bbuchfink/diamond/releases/download/${DIAMOND_VER}/d
  chmod +x /usr/local/bin/diamond && \
  rm diamond-linux64.tar.gz
 
+# Vsearch sort of copied from https://github.com/torognes/vsearch/blob/v2.21.1/Dockerfile
+WORKDIR /opt
+RUN git clone https://github.com/torognes/vsearch.git && cd vsearch && git checkout v${VSEARCH_VER}
+WORKDIR /opt/vsearch
+RUN ./autogen.sh && \
+    ./configure CFLAGS="-O3" CXXFLAGS="-O3" && \
+    make clean && \
+    make && \
+    make install && \
+    make clean && \
+    cd .. && \
+    rm -rf /opt/vsearch
+
+WORKDIR /
+
 ## EToKi itself ##
 RUN wget https://github.com/zheminzhou/EToKi/archive/refs/tags/${ETOKI_VER}.tar.gz && \
   ls / && \
@@ -137,7 +153,14 @@ RUN EToKi.py configure --path gatk=/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/Gen
 RUN EToKi.py configure --path mmseqs=$(which mmseqs) 
 RUN EToKi.py configure --path megahit=$(which megahit) 
 RUN EToKi.py configure --path spades=$(which spades.py) 
-RUN EToKi.py configure --usearch /usr/local/bin/usearch
+RUN EToKi.py configure --usearch /usr/local/bin/vsearch
+
+# Swap out usearch for vsearch in the MLSType code
+RUN sed -i "s/ublast_cmd = .*/ublast_cmd = 'vsearch  --usearch_global {refAA}   --db {qryAA}  --threads {n_thread}  --id 0.8   --maxaccepts 6 --blast6out {aaMatch} --userfields query+target+id+alnlen+mism+opens+qlo+qhi+tlo+thi+evalue+raw+ql+tl+qrow+trow+qstrand'.format(/" /EToKi-${ETOKI_VER}/modules/MLSType.py
+RUN sed -i -e "s/qryAA=qryAA/qryAA=qry/" -e "s/refAA=refAA/refAA=ref/" /EToKi-${ETOKI_VER}/modules/MLSType.py
+RUN sed -i "s/pp.communicate\(\)/ublast_res = pp.communicate(); print(ublast_res[1]);/" /EToKi-${ETOKI_VER}/modules/MLSType.py
+# Show the results of this swap
+RUN grep -n -C 5 [uv]search /EToKi-${ETOKI_VER}/modules/MLSType.py
 
 WORKDIR /data
 
@@ -150,6 +173,30 @@ WORKDIR /EToKi-${ETOKI_VER}
 RUN EToKi.py --help
 # Show the configuration works
 RUN EToKi.py configure  
+
+ENV MLSTdb="/EToKi-${ETOKI_VER}/examples/Escherichia.Achtman.alleles.fasta"
+ENV MLSTalleles="/EToKi-${ETOKI_VER}/examples/Escherichia.Achtman.alleles.fasta"
+ENV MLSTtab="/EToKi-${ETOKI_VER}/examples/Escherichia.Achtman.convert.tab"
+ENV ECOLI_assembly="/EToKi-${ETOKI_VER}/examples/GCF_000005845.2_ASM584v2_genomic.fna"
+ENV ECOLI_assembly2="/EToKi-${ETOKI_VER}/examples/GCF_000214765.2_ASM21476v3_genomic.fna"
+ENV ECOLI_assembly3="/EToKi-${ETOKI_VER}/examples/GCF_001566635.1_ASM156663v1_genomic.fna"
+RUN gunzip -vf ${ECOLI_assembly} ${ECOLI_assembly2} ${ECOLI_assembly3}
+
+# Try out MLST
+RUN EToKi.py MLSTdb -i ${MLSTalleles} -r ${MLSTdb} -d ${MLSTtab}
+# Show some results of the database creation
+RUN ls -lh $MLSTdb $MLSTalleles $MLSTtab && head ${MLSTalleles} ${MLSTdb} ${MLSTtab}
+# Run typing
+RUN EToKi.py MLSType -i ${ECOLI_assembly3} -r ${MLSTdb} -k G749 -o stdout -d ${MLSTtab}
+
+# Return the fasta files back to gzip status
+RUN gzip -vf ${ECOLI_assembly} ${ECOLI_assembly2} ${ECOLI_assembly3}
+# Some of this command will appear like it failed because we are lacking lastdb and kraken
 RUN bash example.bash
+
+# Run EToKi uberBlast, the same way that it is called in EToKi mlstDb
+#RUN EToKi.py uberBlast -q ${ECOLI_assembly} -r ${ECOLI_assembly2} -f --blastn --diamondSELF --min_id 0.1 --min_ratio 0.1 -t 2 -p -s 0 -e 0,3 -o /dev/stdout
+# RUN EToKi.py uberBlast -q ${ECOLI_assembly} -r ${ECOLI_assembly2} -f --blastn --diamondSELF --min_id 0.6 --min_ratio 0.7 -t 2 -p -s 1 -e 0,3 -o /dev/stdout
+#RUN bash example.bash
 #RUN EToKi.py MLSTdb -i examples/Escherichia.Achtman.alleles.fasta -r examples/Escherichia.Achtman.references.fasta -d examples/Escherichia.Achtman.convert.tab
 

From b9600f9b5bbcb270bcf8148a79e3123d0ac7c68c Mon Sep 17 00:00:00 2001
From: Lee Katz - Aspen <gzu2@cdc.gov>
Date: Mon, 23 May 2022 15:45:59 -0400
Subject: [PATCH 15/24] incorporating a suggestion from @erinyoung

---
 etoki/1.2/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etoki/1.2/Dockerfile b/etoki/1.2/Dockerfile
index 5ff34146f..858f4221a 100644
--- a/etoki/1.2/Dockerfile
+++ b/etoki/1.2/Dockerfile
@@ -32,7 +32,7 @@ FROM staphb/raxml:${RAXML_VER}              AS raxml
 
 FROM ubuntu:jammy as app
 
-LABEL base.image="ubuntu:focal"
+LABEL base.image="ubuntu:jammy"
 LABEL dockerfile.version="1"
 LABEL software="EToKi"
 LABEL software.version=$ETOKI_VER

From 5a43e8fbbfdd0e8820543a5a636f1326c782cb75 Mon Sep 17 00:00:00 2001
From: Lee Katz - Aspen <gzu2@cdc.gov>
Date: Mon, 23 May 2022 15:52:05 -0400
Subject: [PATCH 16/24] added note that usearch replaced by blast

---
 etoki/1.2/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/etoki/1.2/README.md b/etoki/1.2/README.md
index 4deb8327a..3bcb25aa2 100644
--- a/etoki/1.2/README.md
+++ b/etoki/1.2/README.md
@@ -2,6 +2,8 @@
 
 _note_ this text was lifted from the original repo README
 
+_note_ this is a modified version of EToKi mainly to remove `usearch` and replace it with blast
+
 ### Trim genomic reads
 ~~~~~~~~~~~
 python EToKi.py prepare --pe examples/S_R1.fastq.gz,examples/S_R2.fastq.gz -p examples/prep_out

From dcaf8d033df8fde4712b0e663e1ab339cb6a0baa Mon Sep 17 00:00:00 2001
From: Lee Katz - Aspen <gzu2@cdc.gov>
Date: Wed, 25 May 2022 21:20:34 -0400
Subject: [PATCH 17/24] updated to EToKi lskatz fork

---
 etoki/1.2/Dockerfile | 91 ++++++++++++++++++++++++--------------------
 1 file changed, 50 insertions(+), 41 deletions(-)

diff --git a/etoki/1.2/Dockerfile b/etoki/1.2/Dockerfile
index e9e4b4cc4..a60273864 100644
--- a/etoki/1.2/Dockerfile
+++ b/etoki/1.2/Dockerfile
@@ -7,9 +7,9 @@ ARG SHOVILL_VER=1.1.0
 ARG KRAKEN2_VER=2.1.2-no-db
 ARG BOWTIE_VER=2.4.4
 ARG LYVESET_VER=1.1.4f
-ARG VSEARCH_VER=2.21.1
+#ARG VSEARCH_VER=2.21.1
 ARG BBTOOLS_VER=38.96
-ARG MLST_VER=2.19.0
+#ARG MLST_VER=2.19.0
 ARG ORTHOFINDER_VER=2.17
 ARG CFSAN_VER=2.0.2
 ARG FLYE_VER=2.9
@@ -22,8 +22,8 @@ FROM staphb/kraken2:${KRAKEN2_VER}          AS kraken2
 FROM staphb/bowtie2:${BOWTIE_VER}           AS bowtie2
 FROM staphb/lyveset:${LYVESET_VER}          AS lyveset
 FROM staphb/bbtools:${BBTOOLS_VER}          AS bbtools
-FROM staphb/mlst:${MLST_VER}                AS mlst
-FROM staphb/orthofinder:${ORTHOFINDER_VER}  AS orthofinder
+#FROM staphb/mlst:${MLST_VER}                AS mlst
+#FROM staphb/orthofinder:${ORTHOFINDER_VER}  AS orthofinder
 FROM staphb/cfsan-snp-pipeline:${CFSAN_VER} AS cfsan
 FROM staphb/flye:${FLYE_VER}                AS flye
 FROM staphb/samtools:${SAMTOOLS_VER}        AS samtools
@@ -47,7 +47,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  libncurses5-dev \
  libbz2-dev \
  liblzma-dev \
- perl \
+ perl-base \
  libcurl4-gnutls-dev \
  gcc \
  g++ \
@@ -70,7 +70,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  autoconf \
  automake \
  make \
- wget && \
+ wget \
+ mmseqs2 \
+ ncbi-blast+ && \
  apt-get autoclean && \
  rm -rf /var/lib/apt/lists/*
 
@@ -84,9 +86,9 @@ COPY --from=kraken2     /kraken2-2*             /kraken2
 COPY --from=kraken2     /kraken2-db             /kraken2-db
 COPY --from=bowtie2     /opt/bowtie2-*          /opt/bowtie2
 COPY --from=bbtools     /opt/bbmap              /opt/bbmap
-COPY --from=mlst        /ncbi-blast-2.9.0+      /ncbi-blast-2.9.0+
+#COPY --from=mlst        /ncbi-blast-2.9.0+      /ncbi-blast-2.9.0+
 COPY --from=lyveset     /lyve-SET               /lyve-SET
-COPY --from=orthofinder /mmseqs                 /mmseqs
+#COPY --from=orthofinder /mmseqs                 /mmseqs
 COPY --from=cfsan       /gatk                   /gatk
 COPY --from=flye        /Flye-*                 /flye
 COPY --from=samtools    /samtools-*             /samtools
@@ -95,7 +97,7 @@ COPY --from=raxml       /raxml_ng               /raxml_ng
 
 ARG DIAMOND_VER
 ARG ETOKI_VER
-ARG VSEARCH_VER
+#ARG VSEARCH_VER
 
 # Diamond
 RUN wget https://github.com/bbuchfink/diamond/releases/download/${DIAMOND_VER}/diamond-linux64.tar.gz && \
@@ -104,33 +106,50 @@ RUN wget https://github.com/bbuchfink/diamond/releases/download/${DIAMOND_VER}/d
  chmod +x /usr/local/bin/diamond && \
  rm diamond-linux64.tar.gz
 
+# mmseqs
+#RUN wget https://mmseqs.com/latest/mmseqs-linux-avx2.tar.gz && \
+#RUN wget --no-check-certificate --secure-protocol=TLSv1_2 --debug -v --auth-no-challenge http://mmseqs.com/latest/mmseqs-linux-avx2.tar.gz && \
+#  tar -xvf mmseqs-linux-avx2.tar.gz && \
+#  rm -rf mmseqs-linux-avx2.tar.gz && \
+#  /bin/bash -c "source /mmseqs/util/bash-completion.sh"
+
+
+ENV PATH="/usr/local/bin:/EToKi-${ETOKI_VER}:/flye/bin:/megahit:/pilon:/spades/bin:/kraken2:/opt/bowtie2:/opt/bbmap:/ncbi-blast-2.9.0+/bin:/megahit/megahit_v1.1.4_LINUX_CPUONLY_x86_64-bin:/raxml_ng:/standard-RAxML:/lyve-SET:/lyve-SET/scripts:/mmseqs/bin:/mmseqs/util:$PATH" \
+   LC_ALL=C
+
+# Test many executables
+#RUN mmseqs --version
+#RUN flye --version
+#RUN samtools --help
+#RUN diamond --help && diamond --version
+
 # Vsearch sort of copied from https://github.com/torognes/vsearch/blob/v2.21.1/Dockerfile
-WORKDIR /opt
-RUN git clone https://github.com/torognes/vsearch.git && cd vsearch && git checkout v${VSEARCH_VER}
-WORKDIR /opt/vsearch
-RUN ./autogen.sh && \
-    ./configure CFLAGS="-O3" CXXFLAGS="-O3" && \
-    make clean && \
-    make && \
-    make install && \
-    make clean && \
-    cd .. && \
-    rm -rf /opt/vsearch
+#WORKDIR /opt
+#RUN git clone https://github.com/torognes/vsearch.git && cd vsearch && git checkout v${VSEARCH_VER}
+#WORKDIR /opt/vsearch
+#RUN ./autogen.sh && \
+    #./configure CFLAGS="-O3" CXXFLAGS="-O3" && \
+    #make clean && \
+    #make && \
+    #make install && \
+    #make clean && \
+    #cd .. && \
+    #rm -rf /opt/vsearch
 
 WORKDIR /
 
 ## EToKi itself ##
-RUN wget https://github.com/zheminzhou/EToKi/archive/refs/tags/${ETOKI_VER}.tar.gz && \
-  ls / && \
-  tar zxvf ${ETOKI_VER}.tar.gz && \
-  rm ${ETOKI_VER}.tar.gz
-#RUN git clone https://github.com/zheminzhou/EToKi.git -b ${ETOKI_VER} /EToKi-${ETOKI_VER}
+#RUN wget https://github.com/zheminzhou/EToKi/archive/refs/tags/${ETOKI_VER}.tar.gz && \
+#  ls / && \
+#  tar zxvf ${ETOKI_VER}.tar.gz && \
+#  rm ${ETOKI_VER}.tar.gz
+# TODO checkout a specific tag or hashtag
+RUN git clone https://github.com/lskatz/EToKi.git /EToKi-${ETOKI_VER} && \
+ cd /EToKi-${ETOKI_VER} && \
+ cd -
 
 WORKDIR /EToKi-${ETOKI_VER}
 
-ENV PATH="/usr/local/bin:/EToKi-${ETOKI_VER}:/flye/bin:/megahit:/pilon:/spades/bin:/kraken2:/opt/bowtie2:/opt/bbmap:/ncbi-blast-2.9.0+/bin:/megahit/megahit_v1.1.4_LINUX_CPUONLY_x86_64-bin:/raxml_ng:/standard-RAxML:/lyve-SET:/lyve-SET/scripts:/mmseqs/bin:/mmseqs/util:$PATH" \
-   LC_ALL=C
-
 RUN EToKi.py configure --help
 
 RUN EToKi.py configure --path bbduk=/opt/bbmap/bbduk.sh 
@@ -147,20 +166,15 @@ RUN EToKi.py configure --path raxml=$(which raxmlHPC)
 RUN EToKi.py configure --path raxml_ng=$(which raxml-ng) 
 RUN EToKi.py configure --path samtools=$(which samtools) 
 RUN EToKi.py configure --path blastn=$(which blastn) 
+RUN EToKi.py configure --path blastp=$(which blastp) 
 RUN EToKi.py configure --path makeblastdb=$(which makeblastdb) 
 RUN EToKi.py configure --path diamond=$(which diamond) 
 RUN EToKi.py configure --path gatk=/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/GenomeAnalysisTK.jar 
 RUN EToKi.py configure --path mmseqs=$(which mmseqs) 
 RUN EToKi.py configure --path megahit=$(which megahit) 
 RUN EToKi.py configure --path spades=$(which spades.py) 
-RUN EToKi.py configure --usearch /usr/local/bin/vsearch
-
-# Swap out usearch for vsearch in the MLSType code
-RUN sed -i "s/ublast_cmd = .*/ublast_cmd = 'vsearch  --usearch_global {refAA}   --db {qryAA}  --threads {n_thread}  --id 0.8   --maxaccepts 6 --blast6out {aaMatch} --userfields query+target+id+alnlen+mism+opens+qlo+qhi+tlo+thi+evalue+raw+ql+tl+qrow+trow+qstrand'.format(/" /EToKi-${ETOKI_VER}/modules/MLSType.py
-RUN sed -i -e "s/qryAA=qryAA/qryAA=qry/" -e "s/refAA=refAA/refAA=ref/" /EToKi-${ETOKI_VER}/modules/MLSType.py
-RUN sed -i "s/pp.communicate\(\)/ublast_res = pp.communicate(); print(ublast_res[1]);/" /EToKi-${ETOKI_VER}/modules/MLSType.py
-# Show the results of this swap
-RUN grep -n -C 5 [uv]search /EToKi-${ETOKI_VER}/modules/MLSType.py
+# In the LK version, we are emulating usearch with blastp
+RUN EToKi.py configure --usearch $(which blastp)
 
 WORKDIR /data
 
@@ -194,8 +208,3 @@ RUN gzip -vf ${ECOLI_assembly} ${ECOLI_assembly2} ${ECOLI_assembly3}
 # Some of this command will appear like it failed because we are lacking lastdb and kraken
 RUN bash example.bash
 
-# Run EToKi uberBlast, the same way that it is called in EToKi mlstDb
-#RUN EToKi.py uberBlast -q ${ECOLI_assembly} -r ${ECOLI_assembly2} -f --blastn --diamondSELF --min_id 0.1 --min_ratio 0.1 -t 2 -p -s 0 -e 0,3 -o /dev/stdout
-# RUN EToKi.py uberBlast -q ${ECOLI_assembly} -r ${ECOLI_assembly2} -f --blastn --diamondSELF --min_id 0.6 --min_ratio 0.7 -t 2 -p -s 1 -e 0,3 -o /dev/stdout
-#RUN bash example.bash
-#RUN EToKi.py MLSTdb -i examples/Escherichia.Achtman.alleles.fasta -r examples/Escherichia.Achtman.references.fasta -d examples/Escherichia.Achtman.convert.tab

From 6dde5550e5693d134db1b113295928fe62069543 Mon Sep 17 00:00:00 2001
From: Lee Katz - Aspen <gzu2@cdc.gov>
Date: Thu, 26 May 2022 10:43:52 -0400
Subject: [PATCH 18/24] updated EToKi to v1.2.1 and made a note in the EToKi
 readme that only MLST is guaranteed

---
 etoki/{1.2 => 1.2.1}/Dockerfile | 10 ++++++++--
 etoki/{1.2 => 1.2.1}/README.md  |  4 +++-
 2 files changed, 11 insertions(+), 3 deletions(-)
 rename etoki/{1.2 => 1.2.1}/Dockerfile (94%)
 rename etoki/{1.2 => 1.2.1}/README.md (99%)

diff --git a/etoki/1.2/Dockerfile b/etoki/1.2.1/Dockerfile
similarity index 94%
rename from etoki/1.2/Dockerfile
rename to etoki/1.2.1/Dockerfile
index a60273864..cbe6b6dc8 100644
--- a/etoki/1.2/Dockerfile
+++ b/etoki/1.2.1/Dockerfile
@@ -146,6 +146,7 @@ WORKDIR /
 # TODO checkout a specific tag or hashtag
 RUN git clone https://github.com/lskatz/EToKi.git /EToKi-${ETOKI_VER} && \
  cd /EToKi-${ETOKI_VER} && \
+ git checkout 1.2.1 && \
  cd -
 
 WORKDIR /EToKi-${ETOKI_VER}
@@ -205,6 +206,11 @@ RUN EToKi.py MLSType -i ${ECOLI_assembly3} -r ${MLSTdb} -k G749 -o stdout -d ${M
 
 # Return the fasta files back to gzip status
 RUN gzip -vf ${ECOLI_assembly} ${ECOLI_assembly2} ${ECOLI_assembly3}
-# Some of this command will appear like it failed because we are lacking lastdb and kraken
-RUN bash example.bash
+
+# Just take the examples of unit tests that are most relevant to MLST
+# from example.bash in the repo
+#RUN python EToKi.py MLSTdb -i examples/Escherichia.Achtman.alleles.fasta -r examples/Escherichia.Achtman.references.fasta -d examples/Escherichia.Achtman.convert.tab
+#RUN gzip -cd examples/GCF_001566635.1_ASM156663v1_genomic.fna.gz > examples/GCF_001566635.1_ASM156663v1_genomic.fna && \
+
+
 
diff --git a/etoki/1.2/README.md b/etoki/1.2.1/README.md
similarity index 99%
rename from etoki/1.2/README.md
rename to etoki/1.2.1/README.md
index 3bcb25aa2..fa822506c 100644
--- a/etoki/1.2/README.md
+++ b/etoki/1.2.1/README.md
@@ -2,7 +2,9 @@
 
 _note_ this text was lifted from the original repo README
 
-_note_ this is a modified version of EToKi mainly to remove `usearch` and replace it with blast
+_note_ this is a modified version of EToKi mainly to remove `usearch` and replace it with blast.
+This is noted by using a custom version of EToKi labeled as `1.2.1` by @lskatz.
+Additionally, only MLST methods are tested and there are other modules in EToKi that are not guaranteed to work such as assembly and metagenomics.
 
 ### Trim genomic reads
 ~~~~~~~~~~~

From a66fd67817acd5d1a56a25c4ae00860ca8fa4d7d Mon Sep 17 00:00:00 2001
From: Lee Katz <lskatz@gmail.com>
Date: Wed, 8 Jun 2022 21:59:58 -0400
Subject: [PATCH 19/24] fixed version in test-etoki.yml to v1.2.1

---
 .github/workflows/test-etoki.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-etoki.yml b/.github/workflows/test-etoki.yml
index 32738591a..0ce825cf9 100644
--- a/.github/workflows/test-etoki.yml
+++ b/.github/workflows/test-etoki.yml
@@ -9,7 +9,7 @@ on:
   workflow_dispatch:
   pull_request:
     paths:
-      - "etoki/1.2/Dockerfile"  # Dockerfile path, e.g. 'htslib/1.14/Dockerfile' so that only your image is tested
+      - "etoki/1.2.1/Dockerfile"  # Dockerfile path, e.g. 'htslib/1.14/Dockerfile' so that only your image is tested
 
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:

From a9dd585c370db189b9c1d896d7e5cf2e3e7fc9b6 Mon Sep 17 00:00:00 2001
From: Lee Katz <lskatz@gmail.com>
Date: Wed, 8 Jun 2022 22:01:47 -0400
Subject: [PATCH 20/24] etoki v1.2.1

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c3b94c5a3..b828dd998 100644
--- a/README.md
+++ b/README.md
@@ -56,7 +56,7 @@ To learn more about the docker pull rate limits and the open source software pro
 | [datasets-sars-cov-2](https://github.com/CDCgov/datasets-sars-cov-2) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/datasets-sars-cov-2)](https://hub.docker.com/r/staphb/datasets-sars-cov-2) | <ul><li>0.6.2</li><li>0.6.3</li></ul> | https://github.com/CDCgov/datasets-sars-cov-2 |
 | [DSK](https://hub.docker.com/r/staphb/dsk) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/dsk)](https://hub.docker.com/r/staphb/dsk) | <ul><li>0.0.100</li></ul> | https://gatb.inria.fr/software/dsk/ |
 | [emm-typing-tool](https://hub.docker.com/r/staphb/emm-typing-tool) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/emm-typing-tool)](https://hub.docker.com/r/staphb/emm-typing-tool) | <ul><li>0.0.1 (no version)</li></ul> | https://github.com/phe-bioinformatics/emm-typing-tool |
-| [EToKi](https://hub.docker.com/r/staphb/etoki) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/etoki)](https://hub.docker.com/r/staphb/etoki) | <ul><li>1.2</li></ul> | https://github.com/zheminzhou/EToKi |
+| [EToKi](https://hub.docker.com/r/staphb/etoki) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/etoki)](https://hub.docker.com/r/staphb/etoki) | <ul><li>1.2.1</li></ul> | https://github.com/zheminzhou/EToKi |
 | [FastANI](https://hub.docker.com/r/staphb/fastani) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/fastani)](https://hub.docker.com/r/staphb/fastani) | <ul><li>1.1</li><li>1.32</li><li>1.33</li></ul> | https://github.com/ParBLiSS/FastANI |
 | [FastTree](https://hub.docker.com/r/staphb/fasttree) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/fasttree)](https://hub.docker.com/r/staphb/fasttree) | <ul><li>2.1.11</li></ul> | http://www.microbesonline.org/fasttree/ |
 | [FastQC](https://hub.docker.com/r/staphb/fastqc) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/fastqc)](https://hub.docker.com/r/staphb/fastqc) | <ul><li>0.11.8</li><li>0.11.9</li></ul> | https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ <br/> https://github.com/s-andrews/FastQC |

From 85320705478646fb06a6d47b4c46bd50ee5eae43 Mon Sep 17 00:00:00 2001
From: Lee Katz <lskatz@gmail.com>
Date: Wed, 8 Jun 2022 22:02:36 -0400
Subject: [PATCH 21/24] v1.2.1

---
 .github/workflows/test-etoki.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-etoki.yml b/.github/workflows/test-etoki.yml
index 0ce825cf9..6a5955222 100644
--- a/.github/workflows/test-etoki.yml
+++ b/.github/workflows/test-etoki.yml
@@ -18,6 +18,6 @@ jobs:
   build-to-test:
     uses: ./.github/workflows/build-to-test.yml
     with:
-      path_to_context: "./etoki/1.2"  # Path to directory with Dockerfile and context, e.g. "./spades/3.12.0"
+      path_to_context: "./etoki/1.2.1"  # Path to directory with Dockerfile and context, e.g. "./spades/3.12.0"
       dockerfile_name: "Dockerfile"
       cache: "etoki"  # Use the program name as a nickname for a GitHub cache of your image's layers, e.g. "spades". The cache will speed up re-running the workflow.

From 781b52ca2f5290be01ac45d3421302198e53db7d Mon Sep 17 00:00:00 2001
From: Lee Katz - Aspen <gzu2@cdc.gov>
Date: Wed, 8 Jun 2022 22:10:44 -0400
Subject: [PATCH 22/24] added wget command instead of git checkout; v1.2.1

---
 etoki/1.2.1/Dockerfile | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/etoki/1.2.1/Dockerfile b/etoki/1.2.1/Dockerfile
index cbe6b6dc8..18574e1fa 100644
--- a/etoki/1.2.1/Dockerfile
+++ b/etoki/1.2.1/Dockerfile
@@ -1,6 +1,6 @@
 # FYI this is a multistage build, which can get very complicated
 
-ARG ETOKI_VER="1.2"
+ARG ETOKI_VER="1.2.1"
 ARG SKESA_VER=2.4.0
 ARG SPADES_VER=3.15.4
 ARG SHOVILL_VER=1.1.0
@@ -144,10 +144,13 @@ WORKDIR /
 #  tar zxvf ${ETOKI_VER}.tar.gz && \
 #  rm ${ETOKI_VER}.tar.gz
 # TODO checkout a specific tag or hashtag
-RUN git clone https://github.com/lskatz/EToKi.git /EToKi-${ETOKI_VER} && \
- cd /EToKi-${ETOKI_VER} && \
- git checkout 1.2.1 && \
- cd -
+RUN wget https://github.com/lskatz/EToKi/archive/refs/tags/${ETOKI_VER}.tar.gz && \
+ tar zxvf 1.2.1.tar.gz
+
+#RUN git clone https://github.com/lskatz/EToKi.git /EToKi-${ETOKI_VER} && \
+# cd /EToKi-${ETOKI_VER} && \
+# git checkout 1.2.1 && \
+# cd -
 
 WORKDIR /EToKi-${ETOKI_VER}
 

From f6cf4c3ea469d0658bc4e7f6a7627e6700dc7c8d Mon Sep 17 00:00:00 2001
From: Lee Katz <lskatz@gmail.com>
Date: Fri, 17 Jun 2022 20:23:57 -0400
Subject: [PATCH 23/24] added `tar zxvf ${ETOKI_VER}.tar.gz`

---
 etoki/1.2.1/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etoki/1.2.1/Dockerfile b/etoki/1.2.1/Dockerfile
index 18574e1fa..6cde5bad3 100644
--- a/etoki/1.2.1/Dockerfile
+++ b/etoki/1.2.1/Dockerfile
@@ -145,7 +145,7 @@ WORKDIR /
 #  rm ${ETOKI_VER}.tar.gz
 # TODO checkout a specific tag or hashtag
 RUN wget https://github.com/lskatz/EToKi/archive/refs/tags/${ETOKI_VER}.tar.gz && \
- tar zxvf 1.2.1.tar.gz
+ tar zxvf ${ETOKI_VER}.tar.gz
 
 #RUN git clone https://github.com/lskatz/EToKi.git /EToKi-${ETOKI_VER} && \
 # cd /EToKi-${ETOKI_VER} && \

From ae897fd739eb607d6c35e691ac1bf8bd90fc123a Mon Sep 17 00:00:00 2001
From: Lee Katz <lskatz@gmail.com>
Date: Fri, 17 Jun 2022 20:25:29 -0400
Subject: [PATCH 24/24] Delete test-etoki.yml

---
 .github/workflows/test-etoki.yml | 23 -----------------------
 1 file changed, 23 deletions(-)
 delete mode 100644 .github/workflows/test-etoki.yml

diff --git a/.github/workflows/test-etoki.yml b/.github/workflows/test-etoki.yml
deleted file mode 100644
index 6a5955222..000000000
--- a/.github/workflows/test-etoki.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-# This caller workflow builds an image to the "test" stage.
-# Instructions: replace all the <placeholder> stubs in this template with values for your image.
-# Some explanations come from: https://github.com/actions/starter-workflows/blob/main/automation/manual.yml
-
-name: Test etoki image
-
-# Controls when the action will run. Workflow runs when manually triggered using the UI or when you submit your pull request
-on:
-  workflow_dispatch:
-  pull_request:
-    paths:
-      - "etoki/1.2.1/Dockerfile"  # Dockerfile path, e.g. 'htslib/1.14/Dockerfile' so that only your image is tested
-
-# A workflow run is made up of one or more jobs that can run sequentially or in parallel
-jobs:
-
-  # This job calls a workflow to build the image to the 'test' stage
-  build-to-test:
-    uses: ./.github/workflows/build-to-test.yml
-    with:
-      path_to_context: "./etoki/1.2.1"  # Path to directory with Dockerfile and context, e.g. "./spades/3.12.0"
-      dockerfile_name: "Dockerfile"
-      cache: "etoki"  # Use the program name as a nickname for a GitHub cache of your image's layers, e.g. "spades". The cache will speed up re-running the workflow.