This repository has been archived by the owner on Sep 30, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
trainer.py
232 lines (192 loc) · 8.01 KB
/
trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import multiprocessing
import tensorflow as tf
from tensorflow.keras.layers import InputLayer, Conv2D, BatchNormalization, LeakyReLU, Add, Flatten, Dense, Concatenate, Dot, Reshape, Dropout
import tensorflow.keras.backend as K
print(tf.__version__)
import numpy as np
import gym
import battleship_envs
import builtins
# import timeit # DEBUG Only
import time
from random import random, shuffle, randrange, choice
CHANNEL_TYPE = 'channels_last'
tf.keras.backend.set_image_data_format(CHANNEL_TYPE)
from customs import customAccuracy, buildModel
from copy import copy
print(tf.__version__)
# MODEL TWEAKS
NUM_GAMES = 1000
EPSILON = 0.8
LEARNING_RATE = 0.001
TOLERANCE = 100 # how many tries to permit
AXIS = 1 if CHANNEL_TYPE == "channels_first" else -1
# print(NUM_GAMES)
# CONFIGURING THE MODEL
model = buildModel()
# model = oldBuildModel()
# model = tf.keras.models.load_model('saved_model/my_model9.h5',compile=False,custom_objects={'customAccuracy':customAccuracy})
# for layer in model.layers:
# layer.trainable = False
# model.layers[-2].trainable = True # Switch between these two
# model.load_weights('saved_model/checkpoints/cp') #sometimes buggy with initializing the optimizer, perm fix
optim = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
error = tf.keras.metrics.MeanAbsoluteError()
lossAvg = tf.keras.metrics.Mean()
accuracy = tf.keras.metrics.Mean()
gameLength = tf.keras.metrics.Mean()
model.compile(optimizer=optim,loss='binary_crossentropy',metrics=[error,customAccuracy])
summary_writer = tf.summary.create_file_writer('logs')
# tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='logs', histogram_freq=1)
print(model.summary())
# GLOBALS
ct = time.time()
env = gym.make('battleship1-v1') # watch safety here
blankBoard = np.zeros((100,), np.float32)
# Takes a move and executes it on the environment
@tf.function() # Decoration is 10 fold faster,
def makeMove(obs,f):
preds = model(obs, training=False)
if f == 1:
return tf.argmax(preds, 1, output_type=tf.int32)[0]
else:
return tf.math.top_k(preds, k=f)[1][0][f-1]
# Converts regular spaces to what would be seen in a game
def singleShipSight(e, match):
if '2' in match.description:
return 1 if '2' in e.description else 0
if 'S' in match.description:
return 1 if 'S' in e.description else 0
if 'C' in match.description:
return 1 if 'C' in e.description else 0
if '4' in match.description:
return 1 if '4' in e.description else 0
if '5' in match.description:
return 1 if '5' in e.description else 0
return 0
# Recursion Variables and Stats
lengthLimit = 98
hits = 0
iterations = 0
observations = []
expecteds = []
vfunc = np.vectorize(lambda e: e.old_values)
vfuncSingleShip = np.vectorize(singleShipSight)
possMoves = list(range(100))
seed = 0
failed = 0
if EPSILON >= 1.0: # Time Savers
fullyRandom = np.random.rand(NUM_GAMES,100).argsort(1)[:,:100]
epoch = 0
# Loop through for each epoch
while epoch < NUM_GAMES:
if failed != 0:
prevObs, prevOut = env.reset(seed=seed)
else:
prevObs, prevOut = env.reset()
seed = env.seed
# Get the first observation
prevObs = np.moveaxis(prevObs,0,-1) # CPU ONLY
prevObs = vfunc(prevObs) # redo timeit with numpy
prevObs = tf.convert_to_tensor([prevObs])
# Set up variables for this epoch
done = False
slotsLeft = copy(possMoves)
prevReward = False
gameObs = []
gameExp = []
# Loop through until game is over
while not done: # Could Accelerate this, however few tf methods, and a couple of outside methods
# Decide what move to make
if EPSILON >= 1.0:
move = fullyRandom[epoch, env.counter]
elif random() < EPSILON:
move = choice(slotsLeft)
else:
for f in range(TOLERANCE):
move = makeMove(prevObs,f+1).numpy() # Could convert f to tensor for speed up
if move in slotsLeft:
break
obs, reward, done, out = env.step(move)
obs = vfunc(obs)
if reward:
hits += 1
# prevOut = vfunc(prevOut) #out = vfunc(out) #prevobs non zero should never have one in corresponding expected # non zero pre obs should never be greater than one
# prevReward = reward
# print(out[move]) #fullyRandom[epoch, abs(env.counter - 2)]
expect = np.zeros((100,), np.float32)
expect[move] = 1.0
gameObs.append(prevObs[0])
gameExp.append(expect)
if done:
observations.extend(gameObs)
expecteds.extend(gameExp)
failed = 0
elif env.counter >= lengthLimit: # or equal
failed += 1
if failed > 100: # BUG: if we are failing more than 100 iterations something is wrong
failed = 0 # dont want to get stuck in a loop and waste time
print("SKIPPING SEED: " + str(seed) + " after failing " + str(failed) + " times")
# epoch += 1
elif failed == 20:
print("failed " + str(seed) + " for the " + str(failed) + " time this is epoch " + str(epoch)) # report failures
epoch -= 1
break
# zer = tf.math.count_nonzero(prevObs, axis=1, keepdims=True, dtype=tf.float32)
# sums = tf.add_n([tf.reshape(zer, [100]), tf.convert_to_tensor(prevOut)]) #tf.reshape(expBatch, [32,1,10,10])
# if 2 in sums.numpy():
# raise
# prevOut = np.copy(out)
iterations += 1
if done:
gameLength.update_state(env.counter)
else:
obs = np.moveaxis(obs,0,-1) # CPU ONLY
prevObs = tf.convert_to_tensor([obs])
slotsLeft.remove(move)
# TRAINING THE MODEL
if len(observations) > 1024 and failed == 0: # only print once we have succeeded
batch_size = 32
pairing = list(zip(observations, expecteds))
shuffle(pairing)
observations, expecteds = list(zip(*pairing))
observations = [observations[i:i + batch_size] for i in range(0, len(observations), batch_size)]
expecteds = [expecteds[i:i + batch_size] for i in range(0, len(expecteds), batch_size)]
for b in range(len(observations)):
obsBatch = tf.stack(observations[b])
expBatch = tf.stack(expecteds[b])
ret = model.train_on_batch(x=obsBatch,y=expBatch,reset_metrics=False, return_dict=True)
lossAvg.update_state(ret['loss'])
accuracy.update_state(ret['customAccuracy'])
observations = []
expecteds = []
# Ocassionally update stats and save the model
if (epoch+1) % (NUM_GAMES // 30) == 0:
# with summary_writer.as_default():
# tf.summary.scalar('Loss', lossAvg.result(), step=epoch+1)
# tf.summary.scalar('Error', error.result(), step=epoch+1)
# tf.summary.scalar('Accuracy', accuracy.result()*100, step=epoch+1)
# tf.summary.scalar('Hits', 100*hits / iterations, step=epoch+1)
# tf.summary.scalar('Game Length', gameLength.result(), step=epoch+1)
iterations = 1
print(f"Completed {epoch+1} epochs at {round(EPSILON,7)} in {round(time.time() - ct, 3)}s. L={round(float(lossAvg.result().numpy()),6)} E={round(float(error.result().numpy()),6)} A={round(float(accuracy.result().numpy()),6)} H={round(hits / iterations,6)} I={round(float(gameLength.result().numpy()),3)}")
# BUG: We need to finish atleast 1 game before changing the length limit
lengthLimit = int(round(float(gameLength.result().numpy()),0)) - 1 #new game target length
error.reset_states()
accuracy.reset_states()
# gameLength.reset_states() # roll the avg
lossAvg.reset_states()
hits = 0
iterations = 0
# if EPSILON > 0.06:
# EPSILON -= 0.02
# else:
# EPSILON /= 1.75
if lossAvg.result() != 0.0:
model.save_weights('saved_model/checkpoints/cp')
ct = time.time()
# Next Iteration
epoch += 1
# Finally save
model.save('saved_model/foresithe.h5')
print("Model Saved")