-
Notifications
You must be signed in to change notification settings - Fork 3
73 lines (69 loc) · 2.61 KB
/
unit-testing.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
on:
push:
branches: [master]
name: Pull-down-all-accessions
env:
# What percent of each spreadsheet to send to each job
# for parallelization
CHUNK_PERCENT: 5
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: ['ubuntu-20.04' ]
perl: [ '5.32' ]
runner_id: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
name: chunk ${{ matrix.runner_id }} Perl ${{ matrix.perl }} on ${{ matrix.os }}
steps:
- name: Set up perl
uses: shogo82148/actions-setup-perl@v1
with:
perl-version: ${{ matrix.perl }}
multi-thread: "true"
- name: checkout my repo
uses: actions/checkout@v2
with:
path: Kalamari
- name: apt-get install
run: sudo apt-get install ca-certificates tree
- name: install-edirect
run: |
sudo apt-get install ncbi-entrez-direct
echo "installed edirect the apt way"
exit
cd $HOME
perl -MNet::FTP -e '$ftp = new Net::FTP("ftp.ncbi.nlm.nih.gov", Passive => 1); $ftp->login; $ftp->binary; $ftp->get("/entrez/entrezdirect/edirect.tar.gz");'
gunzip -cv edirect.tar.gz | tar xf -
rm -v edirect.tar.gz
export PATH=${PATH}:$HOME/edirect >& /dev/null || setenv PATH "${PATH}:$HOME/edirect"
yes Y | ./edirect/setup.sh
tree edirect
- name: check-env
run: echo "$PATH"
- name: download
run: |
runner_id=${{ matrix.runner_id }}
echo "Runner_id is $runner_id"
# Make a chunk input spreadsheet by just adding
# each sample to chromosomes.tsv
head -n 1 Kalamari/src/chromosomes.tsv > header.tsv
tail -n +2 Kalamari/src/plasmids.tsv > in.tsv
tail -n +2 Kalamari/src/chromosomes.tsv >> in.tsv
total_lines=$(wc -l < in.tsv)
start_line=$(( $runner_id * $total_lines / 20 ))
end_line=$(( ($runner_id + 1) * $total_lines / 20 -1 ))
echo "($runner_id) start_line: $start_line"
echo "($runner_id) end_line: $end_line"
cat header.tsv > chunk_${runner_id}.tsv
sed -n "${start_line},${end_line}p" in.tsv >> chunk_${runner_id}.tsv
perl Kalamari/bin/downloadKalamari.pl --numcpus 2 --outdir kalamari.out chunk_${runner_id}.tsv
- name: check-results
run: tree kalamari.out
- name: check-file-sizes
run: |
find kalamari.out -name '*.fasta' > fasta.sizes
echo "by size"
cat fasta.sizes | xargs ls -lhS
echo "by name"
cat fasta.sizes | xargs ls -lh