-
Notifications
You must be signed in to change notification settings - Fork 0
/
policy_gradient.py
executable file
·30 lines (24 loc) · 1.33 KB
/
policy_gradient.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
"""
Builds and trains a neural network that uses policy gradients to learn to play Tic-Tac-Toe.
Returns a probabilitity over the action space expressing the confidence that that action is the best
"""
import functools
from common.network_helpers import create_network
from games.tic_tac_toe_x import TicTacToeXGameSpec
from games.tic_tac_toe import TicTacToeGameSpec
from techniques.train_policy_gradient import train_policy_gradients
HIDDEN_NODES = (10, 10, 10)
BATCH_SIZE = 100 # every how many games to do a parameter update?
LEARN_RATE = 0.001
PRINT_RESULTS_EVERY_X = 1000 # every how many games to print the results
NETWORK_FILE_PATH = 'current_network' #'current_network.p' # path to save the network to
NUMBER_OF_GAMES_TO_RUN = 1000000
# to play a different game change this to another spec, e.g TicTacToeXGameSpec or ConnectXGameSpec, to get these to run
# well may require tuning the hyper parameters a bit
game_spec = TicTacToeXGameSpec(3, 3)
create_network_func = functools.partial(create_network, game_spec.board_squares(), (300, 200, 100, 100))
train_policy_gradients(game_spec, create_network_func, NETWORK_FILE_PATH,
number_of_games=NUMBER_OF_GAMES_TO_RUN,
batch_size=BATCH_SIZE,
learn_rate=LEARN_RATE,
print_results_every=PRINT_RESULTS_EVERY_X)