diff --git a/.github/workflows/unit-testing.Yersinia.Kraken2.yml b/.github/workflows/unit-testing.Yersinia.Kraken2.yml index 12432b8..9a4f39d 100644 --- a/.github/workflows/unit-testing.Yersinia.Kraken2.yml +++ b/.github/workflows/unit-testing.Yersinia.Kraken2.yml @@ -47,7 +47,10 @@ jobs: - name: download run: perl Kalamari/bin/downloadKalamari.pl --outdir ${{ matrix.GENUS }} ${{ env.TSV }} - name: check-results - run: tree ${{ matrix.GENUS }} + run: | + tree ${{ matrix.GENUS }} + echo "First two lines of each fasta file:" + find ${{ matrix.GENUS }} -name '*.fasta' | xargs head -n 2 | cut -c 1-60 - name: install kraken run: | wget https://github.com/DerrickWood/kraken2/archive/refs/tags/v2.1.2.tar.gz -O kraken-v2.1.2.tar.gz @@ -77,8 +80,7 @@ jobs: ls -lhSR ${{ env.DB }} QUERY=$(find ${{ matrix.GENUS }} -name '*.fasta' | head -n 1) echo "QUERY is $QUERY" - head -n 2 $QUERY - kraken2 --db ${{ env.DB }} --report kraken2.report --use-mpa-style --output kraken2.raw $QUERY + set -x; kraken2 --db ${{ env.DB }} --report kraken2.report --use-mpa-style --output kraken2.raw $QUERY; set +x; head kraken2.report kraken2.raw diff --git a/.github/workflows/validateTaxonomy.yml b/.github/workflows/validateTaxonomy.yml index 0fc47c5..849dabb 100644 --- a/.github/workflows/validateTaxonomy.yml +++ b/.github/workflows/validateTaxonomy.yml @@ -37,6 +37,7 @@ jobs: # perl Kalamari/bin/validateTaxonomy.pl Kalamari/share/kalamari-*/taxonomy/nodes.dmp Kalamari/share/kalamari-*/taxonomy/names.dmp - name: matching taxids run: | + export taxdir=$(\ls -d Kalamari/share/kalamari-*/taxonomy) echo "Making sure that all taxids in chromosomes.tsv and plasmids.tsv are present in nodes.tsv and names.tsv" - tail -n +2 Kalamari/src/chromosomes.tsv Kalamari/src/plasmids.tsv -q | perl -F'\t' -lane 'BEGIN{@node=`cat Kalamari/src/taxonomy/nodes.dmp`; for $n(@node){($taxid)=split(/\t/, $n); $taxid{$taxid}++; } } for my $t($F[2], $F[3]){ if(!$taxid{$t}){ print "Could not find $t taxid";} }' - tail -n +2 Kalamari/src/chromosomes.tsv Kalamari/src/plasmids.tsv -q | perl -F'\t' -lane 'BEGIN{@name=`cat Kalamari/src/taxonomy/names.dmp`; for $n(@name){($taxid)=split(/\t/, $n); $taxid{$taxid}++; } } for my $t($F[2], $F[3]){ if(!$taxid{$t}){ print "Could not find $t taxid";} }' + tail -n +2 Kalamari/src/chromosomes.tsv Kalamari/src/plasmids.tsv -q | perl -F'\t' -lane 'BEGIN{@node=`cat $ENV{taxdir}/nodes.dmp`; for $n(@node){($taxid)=split(/\t/, $n); $taxid{$taxid}++; } } for my $t($F[2], $F[3]){ if(!$taxid{$t}){ print "Could not find $t taxid";} }' + tail -n +2 Kalamari/src/chromosomes.tsv Kalamari/src/plasmids.tsv -q | perl -F'\t' -lane 'BEGIN{@name=`cat $ENV{taxdir}/names.dmp`; for $n(@name){($taxid)=split(/\t/, $n); $taxid{$taxid}++; } } for my $t($F[2], $F[3]){ if(!$taxid{$t}){ print "Could not find $t taxid";} }'