-
Notifications
You must be signed in to change notification settings - Fork 0
/
control.py
128 lines (107 loc) · 5.11 KB
/
control.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
'''
Ankit Kumar
control scripts/utils for the RNTN
'''
import cPickle as pkl
from data import *
import random
import time
from stepper import *
from rntn import *
class control(object):
def __init__(self,mbsize=27, d=25, V=16582, epochs=70, unsupervised=False):
self.mbsize = mbsize
self.d = d
self.V = V
self.epochs = epochs
self.unsupervised = unsupervised
train = pkl.load(open('formatted/train.pkl'))
dev = pkl.load(open('formatted/dev.pkl'))
both = train.trees #+ dev.trees
self.minibatches = [both[i:i+mbsize] for i in range(0,len(both), mbsize)]
def run(self, lr, reg, learning_algo, regularize_bias, activation,transform_eye=False, reset_adagrad=None,name='', costfunction=softmax_crossentropy_cost,wordActivations=True,gpu=False):
s = stepper(lr=lr, reg=reg, learning_algo=learning_algo)
if self.unsupervised:
model = initRNTN(self.d, self.V, self.V, regularize_bias=regularize_bias, activation=activation, transform_eye=transform_eye, wordActivations=wordActivations,gpu=gpu)
else:
model = initRNTN(self.d, self.V, 5, regularize_bias=regularize_bias, activation=activation, transform_eye=transform_eye,gpu=gpu)
costs_full = []
# try:
for ep in range(self.epochs):
if reset_adagrad is not None:
if ep > 0 and (ep%reset_adagrad == 0):
print 'resetting adagrad'
s.historical_gparams=None
tick = time.time()
costs = []
random.shuffle(self.minibatches) # so we dont see the same order every time (matters bc of adagrad resetting)
for minibatch in self.minibatches:
cost = s.step(model, minibatch,costfunction,epoch=ep)
costs.append(cost)
costs_full.append(cost)
tock = time.time()
if ep == 1:
s.gradcheck(model,minibatch,costfunction)
print "epoch %s took %s" % (ep, str(tock-tick))
print "average cost %s" % str(sum(costs)/float(len(costs)))
if ep>0 and ep%10 == 0:
pkl.dump(model, open('checkpoints/%s_epoch_%s.pkl' % (name,str(ep)),'w'))
pkl.dump(model, open('checkpoints/%s_epochs_final.pkl' % name,'w'))
# except:
return costs_full,model
def validate(self, num_epochs=None):
'''
try a bunch of settings and check the validation set on them
'''
accuracies = {}
lrs = [1e-2, 1e-1,1e-3]
regs = [1e-6, 1e-4,1e-5]
learning_algos = ['sgd']#,'adagrad']
regularize_biases = [True]#, False]
initLrs = [0.01, 0.0001]
activations = ['tanh']
if num_epochs is None:
num_epochs = self.epochs
for lr_base in lrs:
for reg in regs:
for learning_algo in learning_algos:
for regularize_bias in regularize_biases:
for initLr in initLrs:
for activation in activations:
if learning_algo == 'sgd':
lr = lr_base*1e-2 # sgd learning rates need to be significantly lower
else:
lr = lr_base
try:
print "now doing: %s, %s, %s, %s, %s, %s" % (str(lr),str(reg),str(learning_algo),str(regularize_bias),str(initLr), str(activation))
s = stepper(lr=lr, reg=reg, learning_algo=learning_algo)
model = initRNTN(self.d, self.V, 5, regularize_bias=regularize_bias, initLr=initLr, activation=activation)
costs_full = []
for ep in range(num_epochs):
tick = time.time()
costs = []
for minibatch in self.minibatches:
cost = s.step(model, minibatch,epoch=ep)
costs.append(cost)
costs_full.append(cost)
tock = time.time()
print "epoch %s took %s" % (ep, str(tock-tick))
print "average cost %s" % str(sum(costs)/float(len(costs)))
# pkl.dump(costs_full, open('checkpoints/costs_%s%s%s%s%s%s' % (str(lr),str(reg),str(learning_algo),str(regularize_bias),str(initLr),str(activation)),'w'))
# pkl.dump(model, open('checkpoints/model_%s%s%s%s%s%s' % (str(lr),str(reg),str(learning_algo),str(regularize_bias),str(initLr),str(activation)),'w'))
accuracies[(lr,reg,learning_algo,regularize_bias,initLr)] = accuracy(model,self.dev.trees)
print accuracies
except Exception as e:
print e
continue
return accuracies
if __name__ == '__main__':
c = control(epochs=100, unsupervised=False)
c1,m1 = c.run(lr=1e-2, reg={'Ws': 1e-4, 'L':1e-4, 'W':1e-3, 'V':1e-3, 'bs':1e-4, 'b':1e-3}, learning_algo='adagrad', regularize_bias=True, activation='tanh',transform_eye=True, reset_adagrad=2, name="reset_2", costfunction=softmax_crossentropy_cost, wordActivations=True, gpu=False)
# c2,m2 = c.run(lr=1e-2, reg={'Ws': 1e-4, 'L':1e-4, 'W':1e-3, 'V':1e-3, 'bs':1e-4, 'b':1e-3}, learning_algo='adagrad', regularize_bias=True, activation='tanh',transform_eye=True, reset_adagrad=3, name="reset_3", costfunction=softmax_crossentropy_cost, wordActivations=True)
# c3,m3 = c.run(lr=1e-2, reg={'Ws': 1e-4, 'L':1e-4, 'W':1e-3, 'V':1e-3, 'bs':1e-4, 'b':1e-3}, learning_algo='adagrad', regularize_bias=True, activation='tanh',transform_eye=True, reset_adagrad=3, name="unsup", costfunction=unsupervised_softmax_cost, wordActivations=True)
# c2,m2 = c.run(1e-4,1e-6, 'sgd', True, 0.01, 'tanh')
# c3,m3 = c.run(1e-2, 1e-4, 'adagrad', True, 0.0001, 'tanh')
# acc = c.validate(num_epochs=5)
# pkl.dump(acc, open('acc.pkl','w'))