forked from pinellolab/CRISPRme
-
Notifications
You must be signed in to change notification settings - Fork 0
/
crisprme_auto_test_conda.sh
executable file
·76 lines (71 loc) · 3.64 KB
/
crisprme_auto_test_conda.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# ------------------------------------------------------------------------------
# TEST CRISPRme conda package
echo "Download and extract fundamental data..."
# download hg38 genome assembly
GENOMEDIR="Genomes"
HG38="hg38"
echo "Downloading hg38 genome assembly..."
mkdir -p $GENOMEDIR # create Genomes folder
cd $GENOMEDIR
# download chromosomes FASTA files
original_md5sum="a5aa5da14ccf3d259c4308f7b2c18cb0" # see https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/md5sum.txt
wget -c https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.chromFa.tar.gz
chromsfasta="hg38.chromFa.tar.gz"
local_md5sum="$(md5sum $chromsfasta | cut -d ' ' -f 1)"
if [ "$original_md5sum" != "$local_md5sum" ]; then
echo "ERROR: unexpected failure while downloading https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.chromFa.tar.gz"
exit 1
fi
echo "Extracting ${chromsfasta}..."
tar -xzf $chromsfasta
mv chroms $HG38 # create hg38 directory
cd ..
# download 1000G VCFs
echo "Downloading 1000 Genomes VCFs..."
VCF="VCFs"
mkdir -p $VCF # create VCF folder
cd $VCF
VCF1000G="hg38_1000G"
mkdir -p $VCF1000G # create 1000 genomes folder
cd $VCF1000G
# download 1000 genomes VCF files
for i in $(seq 1 22; echo "X");
do
original_md5sum="$(curl -sL ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chr${i}.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz | md5sum | cut -d ' ' -f 1)" # compute original md5sum
wget -c ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chr${i}.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz
local_md5sum="$(md5sum ALL.chr${i}.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz | cut -d ' ' -f 1)"
if [ "$original_md5sum" != "$local_md5sum" ]; then # check download consistency
echo "ERROR: unexpected failure while downloading ALL.chr${i}.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz"
exit 1
fi
done
cd ../..
# download annotation data
ANNOTATIONDIR="Annotations"
mkdir -p $ANNOTATIONDIR # create annotation folder
cd $ANNOTATIONDIR
echo "Downloading ENCODE+GENCODE annotation data..."
original_md5sum="$(curl -sL https://www.dropbox.com/s/1n2f0qxdba7u3gb/encode%2Bgencode.hg38.bed.zip?dl=0 | md5sum | cut -d ' ' -f 1)"
encodegencode="encode+gencode.hg38.bed.zip"
wget -c -O $encodegencode https://www.dropbox.com/s/1n2f0qxdba7u3gb/encode%2Bgencode.hg38.bed.zip?dl=0
local_md5sum="$(md5sum $encodegencode | cut -d ' ' -f 1)"
if [ "$original_md5sum" != "$local_md5sum" ]; then
echo "ERROR: unexpected failure while downloading ${encodegencode}"
exit 1
fi
echo "Extracting ${encodegencode}..."
unzip $encodegencode
echo "Downloading GENCODE encoding sequences..."
original_md5sum="$(curl -sL https://www.dropbox.com/s/isqpkg113cr1xea/gencode.protein_coding.bed.zip?dl=0 | md5sum | cut -d ' ' -f 1)"
gencode="gencode.protein_coding.bed.zip"
wget -c -O $gencode https://www.dropbox.com/s/isqpkg113cr1xea/gencode.protein_coding.bed.zip?dl=0
local_md5sum="$(md5sum $gencode | cut -d ' ' -f 1)"
if [ "$original_md5sum" != "$local_md5sum" ]; then
echo "ERROR: unexpected failure while downloading ${gencode}"
exit 1
fi
echo "Extracting ${gencode}..."
unzip $gencode
cd ..
echo "Start CRISPRme test..."
crisprme.py complete-search --genome Genomes/hg38/ --vcf list_vcf.txt/ --guide sg1617.txt --pam PAMs/20bp-NGG-spCas9.txt --annotation Annotations/encode+gencode.hg38.bed --samplesID list_samplesID.txt --gene_annotation Annotations/gencode.protein_coding.bed --bMax 2 --mm 6 --bDNA 2 --bRNA 2 --merge 3 --output sg1617.6.2.2 --thread 4