From 8ca01fe34d876d5b4cdfaf0c844d3fa169d7b697 Mon Sep 17 00:00:00 2001 From: Lee Katz Date: Fri, 23 Jul 2021 11:50:03 -0400 Subject: [PATCH] Listeria unit testing (#18) * Listeria unit testing draft * m * debug * debug * debug * update kalamari script; add --and flags * kraken1 db * m * m * m * m * editing PATH * editing PATH * fixing src path * m * fixing installation dir * jellyfish1 * jellyfish1 * m * just two genomes * tree kraken * added threads 2 * added threads 2 * build kraken -x * work on disk in kraken * debug * trying out kraken2 * m * removed rebuild and work-on-disk * kraken report * kraken report * more inspection of kraken output * more inspection of kraken output * done with unit testing for now Co-authored-by: Lee Katz - Aspen --- .../unit-testing.Listeria.Kraken1.yml.bak | 96 +++++++++++++++++++ .../unit-testing.Listeria.Kraken2.yml | 73 ++++++++++++++ 2 files changed, 169 insertions(+) create mode 100644 .github/workflows/unit-testing.Listeria.Kraken1.yml.bak create mode 100644 .github/workflows/unit-testing.Listeria.Kraken2.yml diff --git a/.github/workflows/unit-testing.Listeria.Kraken1.yml.bak b/.github/workflows/unit-testing.Listeria.Kraken1.yml.bak new file mode 100644 index 0000000..9c5d6fc --- /dev/null +++ b/.github/workflows/unit-testing.Listeria.Kraken1.yml.bak @@ -0,0 +1,96 @@ +# This is a subsampling unit test to get early results +on: [push] +name: Listeria-with-Kraken1 + +env: + TSV: "Kalamari/src/Listeria.tsv" + OUTDIR: "Listeria.out" + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: ['ubuntu-18.04' ] + perl: [ '5.32' ] + name: Perl ${{ matrix.perl }} on ${{ matrix.os }} + steps: + - name: Set up perl + uses: shogo82148/actions-setup-perl@v1 + with: + perl-version: ${{ matrix.perl }} + multi-thread: "true" + - name: checkout my repo + uses: actions/checkout@v2 + with: + path: Kalamari + + - name: env check + run: | + echo $PATH | tr ':' '\n' | sort + - name: apt-get install + run: sudo apt-get install ca-certificates tree + - name: install jellyfish + run: | + wget https://github.com/gmarcais/Jellyfish/releases/download/v1.1.12/jellyfish-linux -O jellyfish1 + chmod -v +x jellyfish1 + - name: install-edirect + run: | + sudo apt-get install ncbi-entrez-direct + echo "installed edirect the apt way" + exit + cd $HOME + perl -MNet::FTP -e '$ftp = new Net::FTP("ftp.ncbi.nlm.nih.gov", Passive => 1); $ftp->login; $ftp->binary; $ftp->get("/entrez/entrezdirect/edirect.tar.gz");' + gunzip -cv edirect.tar.gz | tar xf - + rm -v edirect.tar.gz + export PATH=${PATH}:$HOME/edirect >& /dev/null || setenv PATH "${PATH}:$HOME/edirect" + yes Y | ./edirect/setup.sh + tree edirect + - name: check-env + run: echo "$PATH" + - name: select for only Listeria + run: | + head -n 1 Kalamari/src/chromosomes.tsv > ${{ env.TSV }} + grep -m 2 Listeria Kalamari/src/chromosomes.tsv >> ${{ env.TSV }} + echo "These are the Listeria genomes for downstream tests" + column -ts $'\t' ${{ env.TSV }} + hexdump -c ${{ env.TSV }} + - name: download + run: perl Kalamari/bin/downloadKalamari.pl --outdir ${{ env.OUTDIR }} ${{ env.TSV }} + - name: check-results + run: tree ${{ env.OUTDIR }} + #- name: download-more + # run: perl Kalamari/bin/downloadKalamari.pl --outdir ${{ env.OUTDIR }} ${{ env.TSV }} --and protein --and nucleotide + #- name: check-results + # run: tree ${{ env.OUTDIR }} + - name: install kraken + run: | + wget https://github.com/DerrickWood/kraken/archive/refs/tags/v1.1.1.tar.gz -O kraken-v1.1.1.tar.gz + tar zxvf kraken-v1.1.1.tar.gz + a_bin=$(echo "$PATH" | tr ':' "\n" | head -n 1) + cd kraken-1.1.1 && bash install_kraken.sh kraken-src && cd - + cp -v kraken-1.1.1/kraken-src/* $a_bin/ + - name: Kraken1 database + run: | + a_bin=$(echo "$PATH" | tr ':' "\n" | head -n 1) + ls -lh $a_bin + chmod -v +x $a_bin/* + export PATH=$PATH:$HOME/bin/kraken/bin:$a_bin + echo $PATH + #ln -svf jellyfish1 $a_bin/jellyfish + cp -vf jellyfish1 $a_bin/jellyfish + which kraken-build + mkdir -pv kraken + cp -rv Kalamari/src/taxonomy_v3.9 kraken/taxonomy + find ${{ env.OUTDIR }} -name '*.fasta' -exec kraken-build --db kraken --add-to-library {} \; + tree kraken + # Some super debugging here with -x + sed -i.bak 's/set -e/set -e; set -x/' $a_bin/build_kraken_db.sh + grep -v '^#' -m 20 -C 2 $a_bin/build_kraken_db.sh + echo ".....Building the database....." + kraken-build --rebuild --db kraken --threads 2 --work-on-disk + - name: Kraken1 view results + run: | + tree kraken + ls -lhSR kraken + diff --git a/.github/workflows/unit-testing.Listeria.Kraken2.yml b/.github/workflows/unit-testing.Listeria.Kraken2.yml new file mode 100644 index 0000000..0a85e48 --- /dev/null +++ b/.github/workflows/unit-testing.Listeria.Kraken2.yml @@ -0,0 +1,73 @@ +# This is a subsampling unit test to get early results +on: [push] +name: Listeria-with-Kraken2 + +env: + TSV: "Kalamari/src/Listeria.tsv" + OUTDIR: "Listeria.out" + DB: "kraken2" + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: ['ubuntu-18.04' ] + perl: [ '5.32' ] + name: Perl ${{ matrix.perl }} on ${{ matrix.os }} + steps: + - name: Set up perl + uses: shogo82148/actions-setup-perl@v1 + with: + perl-version: ${{ matrix.perl }} + multi-thread: "true" + - name: checkout my repo + uses: actions/checkout@v2 + with: + path: Kalamari + + - name: env check + run: | + echo $PATH | tr ':' '\n' | sort + - name: apt-get install + run: sudo apt-get install ca-certificates tree jellyfish ncbi-entrez-direct + - name: select for only Listeria + run: | + head -n 1 Kalamari/src/chromosomes.tsv > ${{ env.TSV }} + grep -m 2 Listeria Kalamari/src/chromosomes.tsv >> ${{ env.TSV }} + echo "These are the Listeria genomes for downstream tests" + column -ts $'\t' ${{ env.TSV }} + hexdump -c ${{ env.TSV }} + - name: download + run: perl Kalamari/bin/downloadKalamari.pl --outdir ${{ env.OUTDIR }} ${{ env.TSV }} + - name: check-results + run: tree ${{ env.OUTDIR }} + - name: install kraken + run: | + wget https://github.com/DerrickWood/kraken2/archive/refs/tags/v2.1.2.tar.gz -O kraken-v2.1.2.tar.gz + tar zxvf kraken-v2.1.2.tar.gz + cd kraken2-2.1.2 && bash install_kraken2.sh target && cd - + ls -lhS kraken2-2.1.2/target + chmod +x kraken2-2.1.2/target/* + - name: Kraken1 database + run: | + export PATH=$PATH:kraken2-2.1.2/target + which kraken2-build + mkdir -pv ${{ env.DB }} + cp -rv Kalamari/src/taxonomy_v3.9 ${{ env.DB }}/taxonomy + find ${{ env.OUTDIR }} -name '*.fasta' -exec kraken2-build --db ${{ env.DB }} --add-to-library {} \; + tree ${{ env.DB }} + echo ".....Building the database....." + kraken2-build --build --db ${{ env.DB }} --threads 2 + - name: Kraken2 view results + run: | + export PATH=$PATH:kraken2-2.1.2/target + tree ${{ env.DB }} + ls -lhSR ${{ env.DB }} + QUERY=$(find ${{ env.OUTDIR }} -name '*.fasta' | head -n 1) + echo "QUERY is $QUERY" + head -n 2 $QUERY + kraken2 --db ${{ env.DB }} --report kraken2.report --use-mpa-style --output kraken2.raw $QUERY + head kraken2.report kraken2.raw + +