-
Notifications
You must be signed in to change notification settings - Fork 0
/
Run_SwapTest.py
158 lines (125 loc) · 9.8 KB
/
Run_SwapTest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from IGCexpansion.PSJSGeneconv import *
from IGCexpansion.JSGeneconv import *
import argparse
from collections import namedtuple
import numpy as np
def main(args):
paralog = [args.paralog1, args.paralog2]
gene_to_orlg_file = './' + '_'.join(paralog) +'_GeneToOrlg.txt'
alignment_file = './' + '_'.join(paralog) +'_Cleaned_input.fasta'
tree_newick = './input_tree.newick'
DupLosList = './EDN_ECP_DupLost.txt'
terminal_node_list = ['Tamarin', 'Macaque', 'Orangutan', 'Gorilla', 'Chimpanzee']
node_to_pos = {'D1':0}
pm_model = 'HKY'
IGC_pm = 'One rate'
seq_index_file = './' + '_'.join(paralog) +'_seq_index.txt'
if args.rate_variation:
save_file = './save/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_save.txt'
log_file = './log/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_log.txt'
summary_file = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_summary.txt'
x_js = np.log([ 0.5, 0.5, 0.5, 4.35588244, 0.5, 5.0, 0.3])
else:
save_file = './save/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_save.txt'
log_file = './log/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_log.txt'
summary_file = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_summary.txt'
x_js = np.log([ 0.5, 0.5, 0.5, 4.35588244, 0.3])
test_JS = JSGeneconv(alignment_file, gene_to_orlg_file, args.cdna, tree_newick, DupLosList, x_js, pm_model, IGC_pm,
args.rate_variation, node_to_pos, terminal_node_list, save_file, log_file = log_file)
test_JS.get_mle()
#test_JS.get_expectedNumGeneconv()
test_JS.get_individual_summary(summary_file)
if __name__ == '__main__':
## parser = argparse.ArgumentParser()
## parser.add_argument('--paralog1', required = True, help = 'Name of the 1st paralog')
## parser.add_argument('--paralog2', required = True, help = 'Name of the 2nd paralog')
## parser.add_argument('--L', type = float, dest = 'tract_length', default = 30.0, help = 'Initial guess tract length')
## parser.add_argument('--D', type = int, dest = 'dim', default = 0, help = 'Dimension used in search with default value 0')
## parser.add_argument('--heterogeneity', dest = 'rate_variation', action = 'store_true', help = 'rate heterogeneity control')
## parser.add_argument('--homogeneity', dest = 'rate_variation', action = 'store_false', help = 'rate heterogeneity control')
## parser.add_argument('--coding', dest = 'cdna', action = 'store_true', help = 'coding sequence control')
## parser.add_argument('--noncoding', dest = 'cdna', action = 'store_false', help = 'coding sequence control')
## parser.add_argument('--samecodon', dest = 'allow_same_codon', action = 'store_true', help = 'whether allow pair sites from same codon')
## parser.add_argument('--no-samecodon', dest = 'allow_same_codon', action = 'store_false', help = 'whether allow pair sites from same codon')
##
## main(parser.parse_args())
MyStruct = namedtuple('MyStruct', 'paralog1 paralog2 tract_length dim cdna rate_variation allow_same_codon')
args = MyStruct(paralog1 = 'EDN', paralog2 = 'ECP', tract_length = 30.0, dim = 1, cdna = True, rate_variation = True, allow_same_codon = True)
paralog = [args.paralog1, args.paralog2]
gene_to_orlg_file = './' + '_'.join(paralog) +'_GeneToOrlg_SwapTest.txt'
alignment_A_file = './' + '_'.join(paralog) +'_Cleaned_SwapTest_A_input.fasta'
alignment_B_file = './' + '_'.join(paralog) +'_Cleaned_SwapTest_B_input.fasta'
tree_newick = './input_tree_SwapTest.newick' # Chimp's sequence is swapped
DupLosList = './EDN_ECP_DupLost.txt'
terminal_node_list = ['Tamarin', 'Macaque', 'Chimpanzee']
node_to_pos = {'D1':0}
pm_model = 'HKY'
IGC_pm = 'One rate'
seq_index_file = './' + '_'.join(paralog) +'_seq_index.txt'
force = None
if args.rate_variation:
save_file_A = './save/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_SwapTest_A_save.txt'
log_file_A = './log/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_SwapTest_A_log.txt'
summary_file_A = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_SwapTest_A_summary.txt'
gradient_file_A = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_SwapTest_A_gradient.txt'
hessian_file_A = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_SwapTest_A_hessian.txt'
godambe_file_A = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_SwapTest_A_godambe.txt'
x_js = np.log([ 0.5, 0.5, 0.5, 4.35588244, 0.5, 5.0, 0.3])
else:
save_file_A = './save/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_SwapTest_A_save.txt'
log_file_A = './log/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_SwapTest_A_log.txt'
summary_file_A = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_SwapTest_A_summary.txt'
gradient_file_A = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_SwapTest_A_gradient.txt'
hessian_file_A = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_SwapTest_A_hessian.txt'
godambe_file_A = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_SwapTest_A_godambe.txt'
x_js = np.log([ 0.5, 0.5, 0.5, 4.35588244, 0.3])
test_Swap_A = JSGeneconv(alignment_A_file, gene_to_orlg_file, args.cdna, tree_newick, DupLosList, x_js, pm_model, IGC_pm,
args.rate_variation, node_to_pos, terminal_node_list, save_file_A)
test_Swap_A.get_mle()
test_Swap_A.get_expectedNumGeneconv()
test_Swap_A.get_expectedMutationNum()
test_Swap_A.get_individual_summary(summary_file_A)
godambe = test_Swap_A.get_Godambe_matrix(test_Swap_A.x, gradient_file_A, hessian_file_A, 1e-6)
np.savetxt(open(godambe_file_A, 'w+'), np.array(godambe))
print(test_Swap_A)
if args.rate_variation:
save_file_B = './save/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_SwapTest_B_save.txt'
log_file_B = './log/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_SwapTest_B_log.txt'
summary_file_B = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_SwapTest_B_summary.txt'
gradient_file_B = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_SwapTest_B_gradient.txt'
hessian_file_B = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_SwapTest_B_hessian.txt'
godambe_file_B = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_SwapTest_B_godambe.txt'
x_js = np.log([ 0.5, 0.5, 0.5, 4.35588244, 0.5, 5.0, 0.3])
else:
save_file_B = './save/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_SwapTest_B_save.txt'
log_file_B = './log/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_SwapTest_B_log.txt'
summary_file_B = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_SwapTest_B_summary.txt'
gradient_file_B = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_SwapTest_B_gradient.txt'
hessian_file_B = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_SwapTest_B_hessian.txt'
godambe_file_B = './summary/JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_SwapTest_B_godambe.txt'
x_js = np.log([ 0.5, 0.5, 0.5, 4.35588244, 0.3])
test_Swap_B = JSGeneconv(alignment_B_file, gene_to_orlg_file, args.cdna, tree_newick, DupLosList, x_js, pm_model, IGC_pm,
args.rate_variation, node_to_pos, terminal_node_list, save_file_B)
test_Swap_B.get_mle()
test_Swap_B.get_expectedNumGeneconv()
test_Swap_B.get_expectedMutationNum()
test_Swap_B.get_individual_summary(summary_file_B)
godambe = test_Swap_B.get_Godambe_matrix(test_Swap_B.x, gradient_file_B, hessian_file_B, 1e-6)
np.savetxt(open(godambe_file_B, 'w+'), np.array(godambe))
print(test_Swap_B)
# force = {6:0.0}
# if args.rate_variation:
# save_file = './save/Force_JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_save.txt'
# log_file = './log/Force_JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_log.txt'
# summary_file = './summary/Force_JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_rv_nonclock_summary.txt'
# x_js = np.log([ 0.5, 0.5, 0.5, 4.35588244, 0.5, 5.0, 0.3])
# else:
# save_file = './save/Force_JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_save.txt'
# log_file = './log/Force_JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_log.txt'
# summary_file = './summary/Force_JS_HKY_'+ '_'.join(paralog) + '_' + IGC_pm.replace(' ', '_') + '_nonclock_summary.txt'
# x_js = np.log([ 0.5, 0.5, 0.5, 4.35588244, 0.3])
# test_JS_Force = JSGeneconv(alignment_file, gene_to_orlg_file, args.cdna, tree_newick, DupLosList, x_js, pm_model, IGC_pm,
# args.rate_variation, node_to_pos, terminal_node_list, save_file, force = force)
# test_JS_Force.get_mle()
# #test_JS.get_expectedNumGeneconv()
# test_JS_Force.get_individual_summary(summary_file)