Skip to content

Commit

Permalink
new flag to include optional files with --and
Browse files Browse the repository at this point in the history
  • Loading branch information
lskatz committed Jun 28, 2021
1 parent a54947f commit 522c9be
Showing 1 changed file with 31 additions and 13 deletions.
44 changes: 31 additions & 13 deletions bin/downloadKalamari.pl
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@

sub main{
my $settings={};
GetOptions($settings,qw(numcpus=i outdir=s help)) or die $!;
GetOptions($settings,qw(numcpus=i and=s@ outdir=s help)) or die $!;
die usage() if($$settings{help} || !@ARGV);
$$settings{outdir} //= "Kalamari";
$$settings{numcpus}||= 1;
$$settings{and} //= [];
logmsg "Outdir will be $$settings{outdir}";

# Check for prerequisites
Expand Down Expand Up @@ -75,24 +76,35 @@ sub downloadEntry{
make_path($dir);

my $acc = "$$fields{nuccoreAcc}";
my $outfile = "$dir/$acc.fasta";
# If it exists, then skip the download
if(-e $outfile && -s $outfile > 0){
logmsg " SKIP: found $outfile already";
return $outfile;
}

# Get the esearch xml in place for at least one downstream query
my $esearchXml = "$dir/$acc.esearch.xml";
system("esearch -db nuccore -query '$acc' > $esearchXml");
if($?){
die "ERROR running esearch: $!";
if(! -e $esearchXml){
system("esearch -db nuccore -query '$acc' > $esearchXml.tmp");
if($?){
die "ERROR running esearch: $!";
}
mv("$esearchXml.tmp", $esearchXml);
}

# Download the accessory files
downloadCds("$dir/$acc", "protein", $settings);
downloadCds("$dir/$acc", "nucleotide", $settings);
geneCoordinatesFile("$dir/$acc", $settings);
for my $and (@{ $$settings{and} }){
logmsg "Downloading $and for $acc";
downloadCds("$dir/$acc", $and, $settings);
}
# If the genes nucleotide file exists, define gene coordinates too
if(-e "$dir/$acc.ffn"){
logmsg "Creating genes coordinate file for $acc";
geneCoordinatesFile("$dir/$acc", $settings);
}

# Get started on the assembly file
my $outfile = "$dir/$acc.fasta";
# If it exists, then skip the download
if(-e $outfile && -s $outfile > 0){
logmsg " SKIP: found $outfile already";
return $outfile;
}

# Main query: efetch
my $command = "cat $esearchXml | efetch -format fasta > $outfile.tmp";
Expand Down Expand Up @@ -265,5 +277,11 @@ sub usage{
--outdir '' Output directory of Kalamari database
--numcpus 1
--and Download additional files. Multiple --and
flags are allowed.
Possible values: protein, nucleotide
where either protein or nucleotide will
return files with CDS entries.
E.g., $0 --and protein --and nucleotide
";
}

0 comments on commit 522c9be

Please sign in to comment.