v5.6.3 #161
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
on: | |
push: | |
branches: [master, dev, validate-taxonomy] | |
name: Pull-down-all-accessions | |
jobs: | |
build: | |
runs-on: ${{ matrix.os }} | |
strategy: | |
matrix: | |
os: ['ubuntu-20.04' ] | |
perl: [ '5.32' ] | |
runner_id: [0, 1] | |
name: chunk ${{ matrix.runner_id }} Perl ${{ matrix.perl }} on ${{ matrix.os }} | |
steps: | |
- name: Set up perl | |
uses: shogo82148/actions-setup-perl@v1 | |
with: | |
perl-version: ${{ matrix.perl }} | |
multi-thread: "true" | |
- name: checkout my repo | |
uses: actions/checkout@v2 | |
with: | |
path: Kalamari | |
- name: apt-get install | |
run: sudo apt-get install ca-certificates tree | |
- name: install-edirect | |
run: | | |
sudo apt-get install ncbi-entrez-direct | |
echo "installed edirect the apt way" | |
exit | |
cd $HOME | |
perl -MNet::FTP -e '$ftp = new Net::FTP("ftp.ncbi.nlm.nih.gov", Passive => 1); $ftp->login; $ftp->binary; $ftp->get("/entrez/entrezdirect/edirect.tar.gz");' | |
gunzip -cv edirect.tar.gz | tar xf - | |
rm -v edirect.tar.gz | |
export PATH=${PATH}:$HOME/edirect >& /dev/null || setenv PATH "${PATH}:$HOME/edirect" | |
yes Y | ./edirect/setup.sh | |
tree edirect | |
- name: check-env | |
run: echo "$PATH" | |
- name: download | |
run: | | |
runner_id=${{ matrix.runner_id }} | |
echo "Runner_id is $runner_id" | |
# Make a chunk input spreadsheet by just adding | |
# each sample to chromosomes.tsv | |
head -n 1 Kalamari/src/chromosomes.tsv > header.tsv | |
tail -n +2 Kalamari/src/plasmids.tsv > in.tsv | |
tail -n +2 Kalamari/src/chromosomes.tsv >> in.tsv | |
total_lines=$(wc -l < in.tsv) | |
start_line=$(( $runner_id * $total_lines / 2 + 1 )) | |
end_line=$(( ($runner_id + 1) * $total_lines / 2 -1 )) | |
echo "($runner_id) start_line: $start_line" | |
echo "($runner_id) end_line: $end_line" | |
cat header.tsv > chunk_${runner_id}.tsv | |
sed -n "${start_line},${end_line}p" in.tsv >> chunk_${runner_id}.tsv | |
perl Kalamari/bin/downloadKalamari.pl --numcpus 1 --outdir kalamari.out --buffersize 100 chunk_${runner_id}.tsv | |
- name: check-results | |
run: tree kalamari.out | |
- name: check-file-sizes | |
run: | | |
find kalamari.out -name '*.fasta.gz' > fasta.sizes | |
#cat fasta.sizes | xargs -n 100 ls -lhS | |