Skip to content

Commit

Permalink
numcpus option added; new bash script to download and format
Browse files Browse the repository at this point in the history
  • Loading branch information
lskatz committed Apr 29, 2024
1 parent 7256445 commit fe458c8
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 3 deletions.
55 changes: 52 additions & 3 deletions bin/downloadKalamari.pl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
use File::Path qw/make_path/;
use File::Copy qw/mv/;
use Data::Dumper qw/Dumper/;
use POSIX qw/ceil/;

use threads;

local $0 = basename $0;
sub logmsg{ print STDERR "$0: @_\n";}
Expand Down Expand Up @@ -46,6 +49,8 @@ sub downloadKalamari{
my $header = <$fh>;
chomp($header);
my @header = split /\t/, $header;

my @queue;
while(<$fh>){
chomp;
my %F;
Expand All @@ -56,14 +61,39 @@ sub downloadKalamari{
next;
}

my $fasta = downloadEntry(\%F, $settings);
$download_counter++;
push(@queue, \%F);
}
close $fh;

my @thr;
my $numPerThread = ceil(scalar(@queue)/$$settings{numcpus});
for(my $i=0;$i<$$settings{numcpus};$i++){
my @subQueue = splice(@queue, 0, $numPerThread);
logmsg "Sending ".scalar(@subQueue)." entries to thread $i";
$thr[$i] = threads->new(\&downloadEntryWorker, \@subQueue, $settings);
}

# Close out the threads
for(my $i=0;$i<@thr;$i++){
logmsg "Joining thread $i";
my $fastas = $thr[$i]->join;
}

return $download_counter;
}

sub downloadEntryWorker{
my($queue, $settings) = @_;

my @fasta = ();
for my $fields(@$queue){
my $fasta = downloadEntry($fields, $settings);
push(@fasta, $fasta);
}

return \@fasta;
}

sub downloadEntry{
my($fields,$settings) = @_;
logmsg "Downloading $$fields{scientificName}:$$fields{nuccoreAcc}";
Expand All @@ -79,7 +109,7 @@ sub downloadEntry{
# Get the esearch xml in place for at least one downstream query
my $esearchXml = "$dir/$acc.esearch.xml";
if(! -e $esearchXml){
system("esearch -db nuccore -query '$acc' > $esearchXml.tmp");
command("esearch -db nuccore -query '$acc' > $esearchXml.tmp");
if($?){
die "ERROR running esearch: $!";
}
Expand Down Expand Up @@ -279,6 +309,25 @@ sub which{
return "";
}
sub command{
my($command) = @_;
my $maxTries = 3;
my $numTries = 0;
do{{
system($command);
my $exit_code = $? >> 8;
if($exit_code){
logmsg "ERROR on command (numTries: $numTries):\n $command";
sleep 1;
} else {
last;
}
}} while($numTries++ < $maxTries);
}
sub usage{
print
"Usage: $0 [options] spreadsheet.tsv
Expand Down
11 changes: 11 additions & 0 deletions bin/downloadKalamari.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
#!/bin/bash

set -e

if [[ "$1" =~ -h ]]; then
echo "Usage: $0 "
echo " Downloads the standard chromosomes and plasmids for Kalamari"
echo " from source and formats the kraken1 and kraken2 databases"
exit 0
fi

set -u

thisdir=$(dirname $0)
Expand Down Expand Up @@ -35,6 +43,8 @@ function build_kraken1(){
kraken-build --db $DB --build --threads 1
kraken-build --db $DB --clean
du -shc $DB

echo "DONE. Set KRAKEN_DEFAULT_DB=$(realpath $DB)"
}

function build_kraken2(){
Expand All @@ -49,6 +59,7 @@ function build_kraken2(){
kraken2-build --db $DB --build --threads 1
kraken2-build --db $DB --clean
du -shc $DB
echo "DONE. Set KRAKEN2_DEFAULT_DB=$(realpath $DB)"
}

perl $thisdir/downloadKalamari.pl $TSV \
Expand Down

0 comments on commit fe458c8

Please sign in to comment.