-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
101 lines (85 loc) · 2.92 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import Gammon
import time
import net
net = net.Net()
count = 0
wins = 0
while count < 1000:
count += 1
print("Game #:{}".format(count))
g = Gammon.Gammon()
p1Roll = (0, 0)
p2Roll = (0, 0)
while sum(p1Roll) == sum(p2Roll):
p1Roll = g.roll_dice()
p2Roll = g.roll_dice()
if sum(p1Roll) > sum(p2Roll):
print("White gets the first turn")
g.turn = g.players[0]
else:
print("Black gets the first turn")
g.turn = g.players[1]
start = 1
moves = 0
states = []
while not g.game_over():
actions = []
if start == 1:
actions = g.find_moves(p1Roll, g.turn)
start = 0
else:
actions = g.find_moves(g.roll_dice(), g.turn)
if len(actions) > 0:
values = []
for action in actions:
g.take_action(g.turn, action)
representation = g.get_representation(
g.board, g.players, g.on_bar, g.off_board, g.turn
)
values.append(net.getValue(representation))
g.undo_action(g.turn, action)
# Max for White and Min for Black
max = 0
max_index = 0
min = 1
min_index = 0
for i in range(0, len(values)):
if g.turn == "white":
if max < values[i][0]:
max = values[i][0]
max_index = i
elif g.turn == "black":
if min > values[i][1]:
min = values[i][1]
min_index = i
if g.turn == "white":
best_action = actions[max_index]
else:
best_action = actions[min_index]
g.take_action(g.turn, best_action)
expected_board = g.get_representation(
g.board, g.players, g.on_bar, g.off_board, g.turn
)
if g.turn == "white":
states.append(expected_board)
print('state size',len(states))
moves += 1
g.turn = g.get_opponent(g.turn)
reward = 0
if g.game_over():
print("Game over in {} moves".format(moves))
print("Num states: ", len(states))
print("{} won".format(g.find_winner()))
if g.find_winner() == "white":
reward = 1
wins += 1
for i in range(len(g.board)):
g.print_point(i)
for i in range(0, len(states) - 2):
# print("State:", i)
current_state = states[i]
predicted_state = states[i + 1]
error = net.getValue(predicted_state)[0] - net.getValue(current_state)[0]
net.feedforward(current_state)
net.do_td(current_state, net.getValue(current_state), error)
print("Win percentage: {}".format(wins / count))