Skip to content

v5.6.3

v5.6.3 #161

Workflow file for this run

on:
push:
branches: [master, dev, validate-taxonomy]
name: Pull-down-all-accessions
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: ['ubuntu-20.04' ]
perl: [ '5.32' ]
runner_id: [0, 1]
name: chunk ${{ matrix.runner_id }} Perl ${{ matrix.perl }} on ${{ matrix.os }}
steps:
- name: Set up perl
uses: shogo82148/actions-setup-perl@v1
with:
perl-version: ${{ matrix.perl }}
multi-thread: "true"
- name: checkout my repo
uses: actions/checkout@v2
with:
path: Kalamari
- name: apt-get install
run: sudo apt-get install ca-certificates tree
- name: install-edirect
run: |
sudo apt-get install ncbi-entrez-direct
echo "installed edirect the apt way"
exit
cd $HOME
perl -MNet::FTP -e '$ftp = new Net::FTP("ftp.ncbi.nlm.nih.gov", Passive => 1); $ftp->login; $ftp->binary; $ftp->get("/entrez/entrezdirect/edirect.tar.gz");'
gunzip -cv edirect.tar.gz | tar xf -
rm -v edirect.tar.gz
export PATH=${PATH}:$HOME/edirect >& /dev/null || setenv PATH "${PATH}:$HOME/edirect"
yes Y | ./edirect/setup.sh
tree edirect
- name: check-env
run: echo "$PATH"
- name: download
run: |
runner_id=${{ matrix.runner_id }}
echo "Runner_id is $runner_id"
# Make a chunk input spreadsheet by just adding
# each sample to chromosomes.tsv
head -n 1 Kalamari/src/chromosomes.tsv > header.tsv
tail -n +2 Kalamari/src/plasmids.tsv > in.tsv
tail -n +2 Kalamari/src/chromosomes.tsv >> in.tsv
total_lines=$(wc -l < in.tsv)
start_line=$(( $runner_id * $total_lines / 2 + 1 ))
end_line=$(( ($runner_id + 1) * $total_lines / 2 -1 ))
echo "($runner_id) start_line: $start_line"
echo "($runner_id) end_line: $end_line"
cat header.tsv > chunk_${runner_id}.tsv
sed -n "${start_line},${end_line}p" in.tsv >> chunk_${runner_id}.tsv
perl Kalamari/bin/downloadKalamari.pl --numcpus 1 --outdir kalamari.out --buffersize 100 chunk_${runner_id}.tsv
- name: check-results
run: tree kalamari.out
- name: check-file-sizes
run: |
find kalamari.out -name '*.fasta.gz' > fasta.sizes
#cat fasta.sizes | xargs -n 100 ls -lhS