Skip to content

Merge branch 'dev' of github.com:lskatz/Kalamari into dev #169

Merge branch 'dev' of github.com:lskatz/Kalamari into dev

Merge branch 'dev' of github.com:lskatz/Kalamari into dev #169

Workflow file for this run

on:
push:
branches: [master, dev]
name: Pull-down-all-accessions
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: ['ubuntu-20.04' ]
perl: [ '5.32' ]
runner_id: [0, 1]
name: chunk ${{ matrix.runner_id }} Perl ${{ matrix.perl }} on ${{ matrix.os }}
steps:
- name: Set up perl
uses: shogo82148/actions-setup-perl@v1
with:
perl-version: ${{ matrix.perl }}
multi-thread: "true"
- name: checkout my repo
uses: actions/checkout@v2
with:
path: Kalamari
- name: apt-get install
run: sudo apt-get install ca-certificates tree
- name: install-edirect
run: |
sh -c "$(curl -fsSL https://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/install-edirect.sh)"
echo $HOME/edirect >> $GITHUB_PATH
echo $GITHUB_WORKSPACE/Kalamari/bin >> $GITHUB_PATH
tree $HOME/edirect
- name: check-env
run: echo "$PATH"
- name: download
run: |
runner_id=${{ matrix.runner_id }}
echo "Runner_id is $runner_id"
# Make a chunk input spreadsheet by just adding
# each sample to chromosomes.tsv
head -n 1 Kalamari/src/chromosomes.tsv > header.tsv
tail -n +2 Kalamari/src/plasmids.tsv > in.tsv
tail -n +2 Kalamari/src/chromosomes.tsv >> in.tsv
total_lines=$(wc -l < in.tsv)
start_line=$(( $runner_id * $total_lines / 2 + 1 ))
end_line=$(( ($runner_id + 1) * $total_lines / 2 -1 ))
echo "($runner_id) start_line: $start_line"
echo "($runner_id) end_line: $end_line"
cat header.tsv > chunk_${runner_id}.tsv
sed -n "${start_line},${end_line}p" in.tsv >> chunk_${runner_id}.tsv
perl Kalamari/bin/downloadKalamari.pl --numcpus 1 --outdir kalamari.out --buffersize 100 chunk_${runner_id}.tsv
- name: check-results
run: tree kalamari.out
- name: check-file-sizes
run: |
find kalamari.out -name '*.fasta.gz' > fasta.sizes
#cat fasta.sizes | xargs -n 100 ls -lhS