-
Notifications
You must be signed in to change notification settings - Fork 2
/
metagwastoolkit.conf
executable file
·154 lines (154 loc) · 6.61 KB
/
metagwastoolkit.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
### CONFIGURATION FILE FOR METAGWASTOOLKIT ###
# Precede your comments with a #-sign.
#
# Set the directory variables, the order doesn't matter.
# Don't end the directory variables with '/' (forward-slash)!
#
# REQUIRED: Path_to where the software resides on the server.
SOFTWARE="/hpc/local/Rocky8/dhl_ec/software"
# PLINK="${SOFTWARE}/plink_v1.9"
PLINK="${SOFTWARE}/plink_v1.90_beta7_20230116"
LOCUSZOOM="${SOFTWARE}/locuszoom_v14"
#
# REQUIRED: Path_to where MetaGWASToolKit resides on the server.
METAGWASTOOLKITDIR="${SOFTWARE}/MetaGWASToolKit"
#
# REQUIRED: Path_to where the main meta-analysis directory resides.
PROJECTDIR="${METAGWASTOOLKITDIR}/EXAMPLE"
#
# REQUIRED: Name of the project.
PROJECTNAME="EXAMPLEPHENOTYPE"
#
# REQUIRED: Name of (sub)project -- this will be used to create subfolders within the
# OUTPUTDIR. This could be name of a specific (sub-) analysis of the GWAS, for instance
# sex-stratification, or correction for cov1+cov2 (model1) vs. cov1+cov2+cov3 (model2).
SUBPROJECTDIRNAME="MODEL1"
#
# REQUIRED: Path_to where the main meta-analysis output directory resides.
# OUTPUTDIRNAME="EXAMPLEPHENOTYPE_MODEL1"
OUTPUTDIR="${PROJECTDIR}"
#
# REQUIRED: Path_to the original GWAS data resides.
DATA_UPLOAD_FREEZE="${PROJECTDIR}/RAWDATA"
#
# REQUIRED: SLURM settings -- ONLY change what is indicated.
# Run time
QRUNTIME="03:00:00"
QRUNTIMERUNNER="24:00:00"
QRUNTIMEHARMONIZE="06:00:00"
QRUNTIMEWRAPPER="04:00:00"
QRUNTIMECLEANER="03:00:00"
QRUNTIMEPLOTTER="05:00:00"
QRUNTIMEMETAPREP="12:00:00"
QRUNTIMEANALYZER="24:00:00"
QRUNTIMECLUMPER="01:00:00"
QRUNTIMELDSCORE="01:00:00"
QRUNTIMEMRBASE="01:00:00"
# Run memory
QMEM="10G"
QMEMRUNNER="1G"
QMEMHARMONIZE="32G"
QMEMWRAPPER="15G"
QMEMCLEANER="15G"
QMEMPLOTTER="15G"
QMEMMETAPREP="48G"
QMEMANALYZER="8G"
# QMEMCLUMPER: use 8G for HM2/1Gp1; use 32G for 1Gp3/1Gp3GoNL5
QMEMCLUMPER="32G"
QMEMLDSCORE="32G"
QMEMMRBASE="4G"
# CHANGE THIS TO YOUR EMAIL
QMAIL="[email protected]"
# CHANGE THIS TO YOUR PREFERENCE
# Valid type values are NONE, BEGIN, END, FAIL, REQUEUE or ALL (equivalent to BEGIN, END, FAIL, INVALID_DEPEND, REQUEUE, and STAGE_OUT)
# Multiple type values may be specified in a comma separated list.
QMAILOPTIONS="FAIL"
#
# REQUIRED: chunk size of data for parallelization, minimum should be 125000
CHUNKSIZE="400000"
#
# REQUIRED: GWAS dataset quality control settings.
# MAF -- minimum minor allele frequency to keep variants, e.g. "0.005"
# MAC -- minimum minor allele count to keep variants, e.g. "30"
# HWE -- Hardy-Weinberg equilibrium p-value at which to drop variants, e.g. "1E-6"
# INFO -- minimum imputation quality score to keep variants, e.g. "0.3"
# BETA -- maximum effect size to allow for any variant, e.g. "10"
# SE -- maximum standard error to allow for any variant, e.g. "10"
MAF="0.005"
MAC="30"
HWE="1E-3"
INFO="0.4"
BETA="5"
SE="5"
#
# REQUIRED: GWAS dataset plot settings.
# - for the random sampling in the P-Z plotter; if the dataset is smaller than this sample
# the script will automagically set it to the size of the dataset
RANDOMSAMPLE="500000"
# - for the stat-type in the QQ-plotters [options: PVAL, Z, CHISQ]
STATTYPE="PVAL"
# - for the image format in the Manhattan/QQ-plotters [options: PDF, PNG, TIFF, EPS]
IMAGEFORMATQC="PNG"
IMAGEFORMATMETA="PNG"
TITLEPLOT=""
#
# REQUIRED: settings for meta-analysis
# GENESDISTANCE -- distance in kb to nearest gene for given variant in meta-analysis
# FREQFLIP -- the threshold of allele frequency where alleles are still flipped, default: "0.30"
# FREQWARNING -- the threshold of allele frequency where alleles are flipped but a
# warning is given in 'CAVEAT', default: "0.45"
# POPULATION -- dependent on the reference chosen [HM2/1Gp1/1Gp3/GoNL4/GoNL5/1Gp3GONL5]:
# [ HM2 -- EUR/AFR/JPT/CHB -- legacy; not implemented ]
# 1Gp1 -- PAN/EUR/AFR/AMR/EAS/SAS
# 1Gp3 -- PAN/EUR/AFR/AMR/EAS/SAS
# [ GoNL4 -- PAN -- legacy; not implemented ]
# [ GoNL5 -- PAN -- legacy; not implemented ]
# 1Gp3GONL5 -- PAN
# METAMODEL -- the fixed and z-score based models are always used, the random model is
# optional (and additional) [DEFAULT/RANDOM].
# VERBOSE -- add individual cohort results or not; default is non-verbose [DEFAULT/VERBOSE].
# DBSNPFILE -- a dbSNP file containing information per variant.
# REFFREQFILE -- a file containing frequencies per variant.
# VINFOFILE -- a file containing per variant information.
# GENESFILE -- a file containing chromosomal basepair positions per gene.
# REFERENCEPLINK-- path to the folder containing PLINK-format reference files
# PARAMSFILE -- a file containing 5 fields: study name, lambda, sample size,
# beta-correction factor, file locations.
# ### FUTURE VERSIONS WILL HAVE AN AUTOMATIC SCRIPTER FOR THE PARAMSFILE ###
GENESDISTANCE="250"
FREQFLIP="0.30"
FREQWARNING="0.45"
POPULATION="EUR"
REFERENCE="1Gp3"
METAMODEL="RANDOM"
VERBOSE="VERBOSE"
RESOURCES="${METAGWASTOOLKITDIR}/RESOURCES"
DBSNPFILE="${RESOURCES}/1000Gp3v5_20130502_mvncall_integrated_v5c.EUR.FUNC.txt.gz"
REFFREQFILE="${RESOURCES}/1000Gp3v5_20130502_mvncall_integrated_v5c.EUR.FREQ.txt.gz"
VINFOFILE="${RESOURCES}/1000Gp3v5_20130502_mvncall_integrated_v5c.EUR.INFO.txt.gz"
GENESFILE="${RESOURCES}/gencode_v19_GRCh37_hg19_Feb2009.txt.gz"
REFERENCEPLINK="/hpc/dhl_ec/data/references/1000G/Phase3/PLINK_format"
PARAMSFILE="${PROJECTDIR}/${OUTPUTDIRNAME}/metagwastoolkit.${SUBPROJECTDIRNAME}.params"
#
# REQUIRED: CLUMP settings.
# CLUMP_P1 # e.g.5.0e-06 Significance threshold for index SNPs
# CLUMP_P2 # e.g. 0.05 Secondary significance threshold for clumped SNPs
# CLUMP_R2 # LD threshold for clumping
# CLUMP_KB # Physical distance threshold for clumping
# CLUMP_FIELD # Column name of p-value, e.g. P_FIXED, P, P_RANDOM
# CLUMP_SNP_FIELD # Column name of variantIDs, e.g. VARIANTID, RSID
# LDMAP # specify the LD map used by LocusZoom
# LOCUSZOOM_SETTINGS # specify additional settings of LocusZoom.
# LZRANGE # Range to plot around index-variants -- should usually be the same as the CLUMP_KB parameter
CLUMP_P1="5.0e-8"
CLUMP_P2="0.05"
CLUMP_R2="0.05"
CLUMP_KB="1000"
CLUMP_FIELD="P_FIXED"
CLUMP_SNP_FIELD="RSID"
LDMAP="--pop EUR --build hg19 --source 1000G_March2012"
LOCUSZOOM_SETTINGS="ldColors=\"#595A5C,#4C81BF,#1396D8,#C5D220,#F59D10,red,#9A3480\" showRecomb=TRUE drawMarkerNames=FALSE refsnpTextSize=1.0 showRug=FALSE showAnnot=TRUE showRefsnpAnnot=TRUE showGenes=TRUE clean=TRUE bigDiamond=TRUE refsnpLineWidth=2 axisSize=1.25 axisTextSize=1.25 refsnpLineWidth=1.25 geneFontSize=1.25"
LZRANGE="250"
#
# REQUIRED: MRBASE settings.
MRBASEPVAL="5e-8"