-
Notifications
You must be signed in to change notification settings - Fork 3
/
LogisticRegression.py
119 lines (89 loc) · 3.21 KB
/
LogisticRegression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import numpy as np
from scipy.special import expit
import load_test_data
import pre_process
X_train, Y_train, X_dev, Y_dev = pre_process.preprocessData('train.csv')
X_test, Y_test = load_test_data.loadTestData('test.csv')
def prediction(x, W):
return expit(x.dot(W))
def updateWeights(gradient, learningRate, W):
W = W - (learningRate * gradient)
return W
def logisticRegression(X, Y, maxIter, learningRate, lmda):
W = np.zeros(X.shape[1])
newLoss = ExpLoss(X, Y, W, lmda)
prevLoss = 0.0
count = 0
while (True):
count += 1
for i in range(len(Y)):
# Not converged... so continue
d = X[i,]
gradient = ExpLossGradient(d, Y[i], W, lmda)
W = updateWeights(gradient, learningRate, W)
prevLoss = newLoss
# print W
newLoss = ExpLoss(X, Y, W, lmda)
print "Iteration # : ", count, " Loss Value : ", newLoss
if abs(newLoss - prevLoss) < 0.001:
# print "Difference in old and new loss less than ", 0.0001
# print "Total Iterations till now : ", count
# print "prevLoss", prevLoss
# print "newLoss", newLoss
break
if count == maxIter:
# print "MaxIterations reached!"
break
return W
def ExpLoss(X, Y, W, lmda):
loss = lmda * (W.dot(W))
yHat = X.dot(W)
activation = -Y * yHat
activationExp = np.exp(activation)
loss += np.sum(activationExp)
return loss
def ExpLossGradient(x, y, W, lmda):
grad = (x.dot(W))
grad = -y * grad
grad = np.exp(grad)
grad = -y * x * grad
Wgrad = 2 * lmda * W
Wgrad = Wgrad + grad
return Wgrad
def runExperiments(X, Y, X_dev, Y_dev, X_test, Y_test, lmda, learningRate, maxIter=10):
W = logisticRegression(X, Y, maxIter, learningRate, lmda)
nCorrect = 0
nIncorrect = 0
for i in range(len(Y_test)):
y_hat = np.sign(X_test[i,].dot(W))
if y_hat == Y_test[i]:
nCorrect += 1
else:
nIncorrect += 1
accuracy_t = (nCorrect * 1.0 / (nIncorrect + nCorrect))
nCorrect = 0
nIncorrect = 0
for i in range(len(Y_dev)):
y_hat = np.sign(X_dev[i,].dot(W))
if y_hat == Y_dev[i]:
nCorrect += 1
else:
nIncorrect += 1
accuracy_d = (nCorrect * 1.0 / (nIncorrect + nCorrect))
nCorrect = 0
nIncorrect = 0
for i in range(len(Y)):
y_hat = np.sign(X[i,].dot(W))
if y_hat == Y[i]:
nCorrect += 1
else:
nIncorrect += 1
accuracy_tr = (nCorrect * 1.0 / (nIncorrect + nCorrect))
return accuracy_d, accuracy_t, accuracy_tr
if __name__ == "__main__":
accuracy_d, accuracy_t, accuracy_tr = runExperiments(X_train, Y_train, X_dev, Y_dev, X_test, Y_test, lmda=.0001,
learningRate=.001,
maxIter=100)
print 'Accuracy for Logistic Regresstion on Dev set : ', accuracy_d
print 'Accuracy for Logistic Regresstion on Train set : ', accuracy_tr
print 'Accuracy for Logistic Regresstion on Test set : ', accuracy_t