-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathregStat.py
136 lines (118 loc) · 4.25 KB
/
regStat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import numpy as np
import math
import gpdPerm
def olsTTestPermute(regressors,response,nperm=1000):
"""Caclulates p (significance) values for the
regressors in an ordinarly least squares fit,
null assupmtion is that the regressor
coefficent is zero. Calculates t statistic and
performs a permutation test to; applie a generalized
pereto dist to approximate t-stat distribution tail when
appropriate.
regressors - matrix of regression varriables
(col-regressors row-observation)
response - vector of the response varriable (col-observation)
nperm - scalar number of permutations
returns
p values corrisponding to the col of
regressors.
tStat the test statistic
tStatPerm tStats for random permutations
the rows - regressorsm the col - permutations
coef the coefficents from the linear fit
"""
n,m = regressors.shape
# check to see if we have enough observations
if math.factorial(n)<nperm:
raise ValueError("Not enough observations \
for {} permutations".format(nperm))
# get the OLS coef estimates:
coefs,a,b,c = np.linalg.lstsq(regressors,response)
yHat = np.dot(regressors,coefs)
# get the sum of the sum residuals squared
srs = np.sum((response.T-yHat)**2)
# calculate the co square inverse
cInv = np.linalg.inv(np.dot(regressors.T,regressors))
# coef error estimates
d = np.diag(cInv)
s = np.sqrt(np.abs((1.0/(n-1))*srs*d))
# t-statistic
tStat = np.abs(coefs)/s
tStatPerm = np.ones((m,nperm))
for i in range(nperm):
# permute the response
# *** probably should keep track to avoid repeats, future???
responsePerm = np.random.permutation(response)
# repeat calc of tStat
coefsPerm,a,b,c = np.linalg.lstsq(regressors,responsePerm)
yHat = np.dot(regressors,coefsPerm)
srs = np.sum((responsePerm.T-yHat)**2)
# no need to redo the operations on regressor matrix
s = np.sqrt(np.abs((1.0/(n-1))*srs*d))
tStatPerm[:,i] = np.abs(coefsPerm)/s
p = np.ones(m)*2
for i in range(m):
p[i] = gpdPerm.est(tStat[i],tStatPerm[i,:])
return p, tStat, tStatPerm, coefs
def netTTestPermute(regressors,response,lam,alpha,nperm=1000):
"""Caclulates p (significance) values for the
regressors in an elastic net linear fit,
null assupmtion is that the regressor
coefficent is zero. Calculates t statistic and
performs a permutation test to; applie a generalized
pereto dist to approximate t-stat distribution tail when
appropriate.
regressors - matrix of regression varriables
(col-regressors row-observation)
response - vector of the response varriable (col-observation)
lam - scalar float; elastic net lambda (penalty) parameter
alpha - scalar float; elastic net alpha (balance) parameter
nperm - scalar number of permutations
returns
p values corrisponding to the col of
regressors.
tStat the test statistic
tStatPerm tStats for random permutations
the rows - regressorsm the col - permutations
coef the coefficents from the linear fit
"""
import elasticNetLinReg as enet
n,m = regressors.shape
# check to see if we have enough observations
if math.factorial(n)<nperm:
raise ValueError("Not enough observations \
for {} permutations".format(nperm))
# get the enet coef estimates:
coefs = np.zeros(m)
enm = enet.fit(regressors,response,alpha,lambdas=[lam])
coefs[enm.indices] = enm.coef
#*********
yHat = enm.predict(regressors)
# get the sum of the sum residuals squared
srs = np.sum((response.T-yHat)**2)
# calculate the co square inverse
cInv = np.linalg.inv(np.dot(regressors.T,regressors))
# coef error estimates
d = np.diag(cInv)
s = np.sqrt(np.abs((1.0/(n-1))*srs*d))
#*********
# t-statistic
tStat = np.abs(coefs)/s
tStatPerm = np.ones((m,nperm))
for i in range(nperm):
# permute the response
# *** probably should keep track to avoid repeats, future???
responsePerm = np.random.permutation(response)
# repeat calc of tStat
coefsPerm = np.zeros(m)
enmPerm = enet.fit(regressors,responsePerm,alpha,lambdas=[lam])
coefsPerm[enmPerm.indices] = enmPerm.coef
yHat = enmPerm.predict(regressors)
srs = np.sum((responsePerm.T-yHat)**2)
# no need to redo the operations on regressor matrix
sPerm = np.sqrt(np.abs((1.0/(n-1))*srs*d))
tStatPerm[:,i] = np.abs(coefsPerm)/sPerm
p = np.ones(m)*2
for i in range(m):
p[i] = gpdPerm.est(tStat[i],tStatPerm[i,:])
return p, tStat, tStatPerm, s