Skip to content

Commit

Permalink
Listeria unit testing (#18)
Browse files Browse the repository at this point in the history
* Listeria unit testing draft

* m

* debug

* debug

* debug

* update kalamari script; add --and flags

* kraken1 db

* m

* m

* m

* m

* editing PATH

* editing PATH

* fixing src path

* m

* fixing installation dir

* jellyfish1

* jellyfish1

* m

* just two genomes

* tree kraken

* added threads 2

* added threads 2

* build kraken -x

* work on disk in kraken

* debug

* trying out kraken2

* m

* removed rebuild and work-on-disk

* kraken report

* kraken report

* more inspection of kraken output

* more inspection of kraken output

* done with unit testing for now

Co-authored-by: Lee Katz - Aspen <[email protected]>
  • Loading branch information
lskatz and lskatz authored Jul 23, 2021
1 parent 3637434 commit 8ca01fe
Show file tree
Hide file tree
Showing 2 changed files with 169 additions and 0 deletions.
96 changes: 96 additions & 0 deletions .github/workflows/unit-testing.Listeria.Kraken1.yml.bak
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# This is a subsampling unit test to get early results
on: [push]
name: Listeria-with-Kraken1

env:
TSV: "Kalamari/src/Listeria.tsv"
OUTDIR: "Listeria.out"

jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: ['ubuntu-18.04' ]
perl: [ '5.32' ]
name: Perl ${{ matrix.perl }} on ${{ matrix.os }}
steps:
- name: Set up perl
uses: shogo82148/actions-setup-perl@v1
with:
perl-version: ${{ matrix.perl }}
multi-thread: "true"
- name: checkout my repo
uses: actions/checkout@v2
with:
path: Kalamari

- name: env check
run: |
echo $PATH | tr ':' '\n' | sort
- name: apt-get install
run: sudo apt-get install ca-certificates tree
- name: install jellyfish
run: |
wget https://github.com/gmarcais/Jellyfish/releases/download/v1.1.12/jellyfish-linux -O jellyfish1
chmod -v +x jellyfish1
- name: install-edirect
run: |
sudo apt-get install ncbi-entrez-direct
echo "installed edirect the apt way"
exit
cd $HOME
perl -MNet::FTP -e '$ftp = new Net::FTP("ftp.ncbi.nlm.nih.gov", Passive => 1); $ftp->login; $ftp->binary; $ftp->get("/entrez/entrezdirect/edirect.tar.gz");'
gunzip -cv edirect.tar.gz | tar xf -
rm -v edirect.tar.gz
export PATH=${PATH}:$HOME/edirect >& /dev/null || setenv PATH "${PATH}:$HOME/edirect"
yes Y | ./edirect/setup.sh
tree edirect
- name: check-env
run: echo "$PATH"
- name: select for only Listeria
run: |
head -n 1 Kalamari/src/chromosomes.tsv > ${{ env.TSV }}
grep -m 2 Listeria Kalamari/src/chromosomes.tsv >> ${{ env.TSV }}
echo "These are the Listeria genomes for downstream tests"
column -ts $'\t' ${{ env.TSV }}
hexdump -c ${{ env.TSV }}
- name: download
run: perl Kalamari/bin/downloadKalamari.pl --outdir ${{ env.OUTDIR }} ${{ env.TSV }}
- name: check-results
run: tree ${{ env.OUTDIR }}
#- name: download-more
# run: perl Kalamari/bin/downloadKalamari.pl --outdir ${{ env.OUTDIR }} ${{ env.TSV }} --and protein --and nucleotide
#- name: check-results
# run: tree ${{ env.OUTDIR }}
- name: install kraken
run: |
wget https://github.com/DerrickWood/kraken/archive/refs/tags/v1.1.1.tar.gz -O kraken-v1.1.1.tar.gz
tar zxvf kraken-v1.1.1.tar.gz
a_bin=$(echo "$PATH" | tr ':' "\n" | head -n 1)
cd kraken-1.1.1 && bash install_kraken.sh kraken-src && cd -
cp -v kraken-1.1.1/kraken-src/* $a_bin/
- name: Kraken1 database
run: |
a_bin=$(echo "$PATH" | tr ':' "\n" | head -n 1)
ls -lh $a_bin
chmod -v +x $a_bin/*
export PATH=$PATH:$HOME/bin/kraken/bin:$a_bin
echo $PATH
#ln -svf jellyfish1 $a_bin/jellyfish
cp -vf jellyfish1 $a_bin/jellyfish
which kraken-build
mkdir -pv kraken
cp -rv Kalamari/src/taxonomy_v3.9 kraken/taxonomy
find ${{ env.OUTDIR }} -name '*.fasta' -exec kraken-build --db kraken --add-to-library {} \;
tree kraken
# Some super debugging here with -x
sed -i.bak 's/set -e/set -e; set -x/' $a_bin/build_kraken_db.sh
grep -v '^#' -m 20 -C 2 $a_bin/build_kraken_db.sh
echo ".....Building the database....."
kraken-build --rebuild --db kraken --threads 2 --work-on-disk
- name: Kraken1 view results
run: |
tree kraken
ls -lhSR kraken

73 changes: 73 additions & 0 deletions .github/workflows/unit-testing.Listeria.Kraken2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# This is a subsampling unit test to get early results
on: [push]
name: Listeria-with-Kraken2

env:
TSV: "Kalamari/src/Listeria.tsv"
OUTDIR: "Listeria.out"
DB: "kraken2"

jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: ['ubuntu-18.04' ]
perl: [ '5.32' ]
name: Perl ${{ matrix.perl }} on ${{ matrix.os }}
steps:
- name: Set up perl
uses: shogo82148/actions-setup-perl@v1
with:
perl-version: ${{ matrix.perl }}
multi-thread: "true"
- name: checkout my repo
uses: actions/checkout@v2
with:
path: Kalamari

- name: env check
run: |
echo $PATH | tr ':' '\n' | sort
- name: apt-get install
run: sudo apt-get install ca-certificates tree jellyfish ncbi-entrez-direct
- name: select for only Listeria
run: |
head -n 1 Kalamari/src/chromosomes.tsv > ${{ env.TSV }}
grep -m 2 Listeria Kalamari/src/chromosomes.tsv >> ${{ env.TSV }}
echo "These are the Listeria genomes for downstream tests"
column -ts $'\t' ${{ env.TSV }}
hexdump -c ${{ env.TSV }}
- name: download
run: perl Kalamari/bin/downloadKalamari.pl --outdir ${{ env.OUTDIR }} ${{ env.TSV }}
- name: check-results
run: tree ${{ env.OUTDIR }}
- name: install kraken
run: |
wget https://github.com/DerrickWood/kraken2/archive/refs/tags/v2.1.2.tar.gz -O kraken-v2.1.2.tar.gz
tar zxvf kraken-v2.1.2.tar.gz
cd kraken2-2.1.2 && bash install_kraken2.sh target && cd -
ls -lhS kraken2-2.1.2/target
chmod +x kraken2-2.1.2/target/*
- name: Kraken1 database
run: |
export PATH=$PATH:kraken2-2.1.2/target
which kraken2-build
mkdir -pv ${{ env.DB }}
cp -rv Kalamari/src/taxonomy_v3.9 ${{ env.DB }}/taxonomy
find ${{ env.OUTDIR }} -name '*.fasta' -exec kraken2-build --db ${{ env.DB }} --add-to-library {} \;
tree ${{ env.DB }}
echo ".....Building the database....."
kraken2-build --build --db ${{ env.DB }} --threads 2
- name: Kraken2 view results
run: |
export PATH=$PATH:kraken2-2.1.2/target
tree ${{ env.DB }}
ls -lhSR ${{ env.DB }}
QUERY=$(find ${{ env.OUTDIR }} -name '*.fasta' | head -n 1)
echo "QUERY is $QUERY"
head -n 2 $QUERY
kraken2 --db ${{ env.DB }} --report kraken2.report --use-mpa-style --output kraken2.raw $QUERY
head kraken2.report kraken2.raw

0 comments on commit 8ca01fe

Please sign in to comment.