forked from ISUgenomics/common_scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gsnap_pe_clip_final.sh
executable file
·52 lines (46 loc) · 1.14 KB
/
gsnap_pe_clip_final.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/bin/bash
# this is optimized to run on 32 procs: spliting input to 16 peices, 2 procs per peice
## MODULES
module use /data004/software/GIF/modules
module load parallel
module load gmap
## PATHS
export GMAPDB=/home/arnstrm/arnstrm/GMAPDB
DB_NAME="GRCm38.78_musmus"
FILE1="$1"
FILE2=$(echo "$1" |sed 's/_R1_/_R2_/g')
OUTFILE=$(basename ${FILE1%%.*})
## COMMAND
# important options to consider
#==============================
# if using RNA-seq, use: --novelsplicing=1
#
# if mate pairs use: --orientation=RF
# if paired end use: --orientation=FR
# if not sure, don't include --orientation option
#
# for allowing soft-clipping of alignments, exlucde all 3 options below:
# --terminal-threshold=100
# --indel-penalty=1
# --trim-mismatch-score=0
#
# if fastq is gzipped use:
# --gunzip
parallel --jobs 4 \
"gsnap \
--db=${DB_NAME} \
--part={}/4 \
--batch=4 \
--nthreads=8 \
--novelsplicing=1 \
--gunzip \
--expand-offsets=1 \
--max-mismatches=5.0 \
--input-buffer-size=1000000 \
--output-buffer-size=1000000 \
--format=sam \
--split-output=${DB_NAME}_AP_${OUTFILE}.{} \
--failed-input=${DB_NAME}_AP_${OUTFILE}.not_mapped.{} \
${FILE1} \
${FILE2} " \
::: {0..3}