From 8cfa2e543245e00f4a40e4bc6cc75070dfe1c8be Mon Sep 17 00:00:00 2001 From: Lee Katz Date: Wed, 5 Jun 2024 15:59:28 -0400 Subject: [PATCH] Esearch input flag (#48) * Add genomes (#45) (#46) * Corynebacterium diphtheriae * added Bifidobacterium adolenscentis * replaced S. enterica IIIa; Added hops (Humulus lupulus) * added a Citrobacter species * m * replaced repressed genome accession for B. faecium * remove random single quotes * bump version * helpful log messages * v5.6.3 * make symlink to avoid naming mistakes * check whether taxonkit is loaded * use efetch -input * fix tr bug * get latest edirect * update installation instructions * update installation instructions: fix PATH * bring in other tests * update installation method for search with unit-testing * update installation method for search with kraken2 * debug the ls statement * debug the ls statement * debug the ls statement * debug building taxonomy * exclusive unit testing for taxonomy for right now * install taxonkit --- .../workflows/unit-testing.Listeria.Kraken1.yml | 16 ++++------------ .../workflows/unit-testing.Yersinia.Kraken2.yml | 10 +++++++++- .github/workflows/unit-testing.yml | 17 ++++++----------- .github/workflows/validateTaxonomy.yml | 13 ++++++++++--- bin/buildKraken1.sh | 1 + bin/filterTaxonomy.sh | 2 ++ 6 files changed, 32 insertions(+), 27 deletions(-) diff --git a/.github/workflows/unit-testing.Listeria.Kraken1.yml b/.github/workflows/unit-testing.Listeria.Kraken1.yml index e06f425..9d5c442 100644 --- a/.github/workflows/unit-testing.Listeria.Kraken1.yml +++ b/.github/workflows/unit-testing.Listeria.Kraken1.yml @@ -1,7 +1,7 @@ # This is a subsampling unit test to get early results on: push: - branches: [master, dev, validate-taxonomy] + branches: [master, dev] name: Listeria-with-Kraken1 env: @@ -41,18 +41,10 @@ jobs: tree $(realpath .) - name: install-edirect run: | - sudo apt-get install ncbi-entrez-direct - echo "installed edirect the apt way" - exit - cd $HOME - perl -MNet::FTP -e '$ftp = new Net::FTP("ftp.ncbi.nlm.nih.gov", Passive => 1); $ftp->login; $ftp->binary; $ftp->get("/entrez/entrezdirect/edirect.tar.gz");' - gunzip -cv edirect.tar.gz | tar xf - - rm -v edirect.tar.gz - echo $GITHUB_WORKSPACE/edirect >> $GITHUB_PATH + sh -c "$(curl -fsSL https://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/install-edirect.sh)" + echo $HOME/edirect >> $GITHUB_PATH echo $GITHUB_WORKSPACE/Kalamari/bin >> $GITHUB_PATH - #export PATH=${PATH}:$HOME/edirect >& /dev/null || setenv PATH "${PATH}:$HOME/edirect" - yes Y | ./edirect/setup.sh - tree edirect + tree $HOME/edirect - name: check-env run: echo "$PATH" - name: select for only Listeria diff --git a/.github/workflows/unit-testing.Yersinia.Kraken2.yml b/.github/workflows/unit-testing.Yersinia.Kraken2.yml index 0f7efde..4b34a7a 100644 --- a/.github/workflows/unit-testing.Yersinia.Kraken2.yml +++ b/.github/workflows/unit-testing.Yersinia.Kraken2.yml @@ -1,7 +1,7 @@ # This is a subsampling unit test to get early results on: push: - branches: [master, dev, validate-taxonomy] + branches: [master, dev] name: Genera-with-Kraken2 env: @@ -34,6 +34,14 @@ jobs: - name: env check run: | echo $PATH | tr ':' '\n' | sort + + - name: install-edirect + run: | + sh -c "$(curl -fsSL https://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/install-edirect.sh)" + echo $HOME/edirect >> $GITHUB_PATH + echo $GITHUB_WORKSPACE/Kalamari/bin >> $GITHUB_PATH + tree $HOME/edirect + - name: apt-get install run: sudo apt-get install ca-certificates tree jellyfish ncbi-entrez-direct - name: select for only for this genus diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml index 2cb5dd3..b29d7bb 100644 --- a/.github/workflows/unit-testing.yml +++ b/.github/workflows/unit-testing.yml @@ -1,6 +1,6 @@ on: push: - branches: [master, dev, validate-taxonomy] + branches: [master, dev] name: Pull-down-all-accessions jobs: @@ -25,18 +25,13 @@ jobs: - name: apt-get install run: sudo apt-get install ca-certificates tree + - name: install-edirect run: | - sudo apt-get install ncbi-entrez-direct - echo "installed edirect the apt way" - exit - cd $HOME - perl -MNet::FTP -e '$ftp = new Net::FTP("ftp.ncbi.nlm.nih.gov", Passive => 1); $ftp->login; $ftp->binary; $ftp->get("/entrez/entrezdirect/edirect.tar.gz");' - gunzip -cv edirect.tar.gz | tar xf - - rm -v edirect.tar.gz - export PATH=${PATH}:$HOME/edirect >& /dev/null || setenv PATH "${PATH}:$HOME/edirect" - yes Y | ./edirect/setup.sh - tree edirect + sh -c "$(curl -fsSL https://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/install-edirect.sh)" + echo $HOME/edirect >> $GITHUB_PATH + echo $GITHUB_WORKSPACE/Kalamari/bin >> $GITHUB_PATH + tree $HOME/edirect - name: check-env run: echo "$PATH" - name: download diff --git a/.github/workflows/validateTaxonomy.yml b/.github/workflows/validateTaxonomy.yml index 5147f40..1ab159c 100644 --- a/.github/workflows/validateTaxonomy.yml +++ b/.github/workflows/validateTaxonomy.yml @@ -1,6 +1,6 @@ on: push: - branches: [master, dev, validate-taxonomy] + branches: [master, dev, esearch-input] name: Validate taxonomy jobs: @@ -27,11 +27,18 @@ jobs: echo $PATH echo "" cat $GITHUB_PATH + - name: install taxonkit + run: | + wget https://github.com/shenwei356/taxonkit/releases/download/v0.16.0/taxonkit_linux_amd64.tar.gz + tar -xvf taxonkit_linux_amd64.tar.gz + rm -v taxonkit_linux_amd64.tar.gz + chmod +x taxonkit + echo $(realpath .) >> $GITHUB_PATH - name: build taxonomy run: | echo $PATH - bash Kalamari/bin/buildTaxonomy.sh - bash Kalamari/bin/filterTaxonomy.sh + bash -x Kalamari/bin/buildTaxonomy.sh + bash -x Kalamari/bin/filterTaxonomy.sh ls -lhR Kalamari/share/kalamari-*/taxonomy - name: validate taxonomy run: | diff --git a/bin/buildKraken1.sh b/bin/buildKraken1.sh index 851ff75..f1816b0 100755 --- a/bin/buildKraken1.sh +++ b/bin/buildKraken1.sh @@ -30,6 +30,7 @@ find $SRC -name '*.fasta.gz' | \ done > $tmpfile echo -ne "ADDING to library:\n " zgrep "^>" $tmpfile | sed "s/^>//" | tr "$nl" " " + echo echo "^^ contents of $tmpfile ^^" kraken-build --db $DB --add-to-library $tmpfile ' diff --git a/bin/filterTaxonomy.sh b/bin/filterTaxonomy.sh index 1af4912..38d1bb8 100755 --- a/bin/filterTaxonomy.sh +++ b/bin/filterTaxonomy.sh @@ -3,7 +3,9 @@ set -eu # Check for dependencies +echo "Check for dependencies" which taxonkit +echo thisdir=$(dirname $0) thisfile=$(basename $0)