-
Notifications
You must be signed in to change notification settings - Fork 0
/
grad_descent.py
51 lines (40 loc) · 1.66 KB
/
grad_descent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from __future__ import division
import numpy as np
import time
import pdb
data = np.load('train_data_sanitized.npy')
train_N = int(data.shape[0] * .8)
test_N = data.shape[0] - train_N
dim = data.shape[1] - 1 # the -1 is b/c the last element is the label (ie loss)
numEpochs = 500
batch_size = 50
mu = .9 # referred to as momentum, but basically equivalent to friction coefficient
learning_rate = .0000000002
X_train = data[:train_N, 0:-1]
y_train = data[:train_N, -1] * 100 # the *100 is b/c data_sanitizer normalized the loss
X_test = data[train_N:, 0:-1]
y_test = data[train_N:, -1] * 100
#pdb.set_trace()
weights = np.random.normal(size=dim)
gradient = np.zeros(dim)
v = np.zeros(dim)
loss = 0
start = time.time()
print("all zeros MAE: %.4f" % (np.sum(np.abs(y_train)) / train_N))
for t in range(numEpochs):
idxs = np.random.choice(train_N, size=batch_size, replace=False)
X_train_batch = X_train[idxs]
y_train_batch = y_train[idxs]
gradient = np.sum(np.sign(np.matmul(X_train_batch, weights.T) - y_train_batch)[:, np.newaxis] * X_train_batch)
# gradient calc goes here (770 x 1)
v = mu * v - learning_rate * gradient
weights = weights + v
if (t % 50 == 0):
prediction = np.matmul(X_train_batch, weights.T)
train_MAE = np.sum(np.abs(y_train_batch - prediction.astype(int))) / batch_size # this is loss
test_MAE = np.sum(np.abs(y_test - np.matmul(X_test, weights.T).astype(int))) / test_N
# loss calc goes here
print("Epoch: %d, train MAE: %.4f, test MAE: %.4f" % (t, train_MAE, test_MAE))
#import pdb; pdb.set_trace()
end = time.time()
print("Running time was %.2f seconds." % (end - start))