-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathexample.sh
39 lines (34 loc) · 1.81 KB
/
example.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
mkdir three_algae
cd three_algae
# download all the data
for pth in ftp://ftp.ensemblgenomes.org/pub/plants/release-47/fasta/chlamydomonas_reinhardtii/dna/Chlamydomonas_reinhardtii.Chlamydomonas_reinhardtii_v5.5.dna_sm.toplevel.fa.gz \
ftp://ftp.ensemblgenomes.org/pub/plants/release-47/gff3/chlamydomonas_reinhardtii/Chlamydomonas_reinhardtii.Chlamydomonas_reinhardtii_v5.5.47.gff3.gz \
ftp://ftp.ensemblgenomes.org/pub/plants/release-47/fasta/ostreococcus_lucimarinus/dna/Ostreococcus_lucimarinus.ASM9206v1.dna.toplevel.fa.gz \
ftp://ftp.ensemblgenomes.org/pub/plants/release-47/gff3/ostreococcus_lucimarinus/Ostreococcus_lucimarinus.ASM9206v1.47.gff3.gz \
ftp://ftp.ensemblgenomes.org/pub/plants/release-47/fasta/cyanidioschyzon_merolae/dna/Cyanidioschyzon_merolae.ASM9120v1.dna.toplevel.fa.gz \
ftp://ftp.ensemblgenomes.org/pub/plants/release-47/gff3/cyanidioschyzon_merolae/Cyanidioschyzon_merolae.ASM9120v1.47.gff3.gz
do
wget $pth
sleep 0.4s
done
# uncompress the data
gunzip *.gz
# put the data in the format compatible with the --basedir parameter
# basically --basedir needs a folder <your_species>
# with a subfolder <your_species>/input containing a (compressed) gff3 annotation and fasta genome file.
# The results will then be located in <your_species>/output
# If desired, you can alternatively specify all file parameters individually
# --gff3 <your.gff3> --fasta <your.fa> --db-path <your_output_genuff.sqlite3> --log-file <your_output.log>
species="Chlamydomonas_reinhardtii Ostreococcus_lucimarinus Cyanidioschyzon_merolae"
for sp in $species
do
spdir=$sp/input
mkdir -p $spdir
mv ${sp}.* $spdir/
done
# import into databases (the main output will land in <basedir>/output/<species>.sqlite3
for sp in $species
do
import2geenuff.py --basedir $sp --species $sp
done
cd ..