forked from NVlabs/GA3C
-
Notifications
You must be signed in to change notification settings - Fork 25
/
Config.py
174 lines (140 loc) · 5.61 KB
/
Config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
class Config:
#########################################################################
# Number of stacked LSTM layers
NUM_LSTMS = 2
#########################################################################
# Game configuration
#MAP = 'seekavoid_arena_01'
MAP = 'stairway_to_melon'
#MAP = 'nav_maze_static_01'
#MAP = 'nav_maze_static_02'
# Enable to see the trained agent in action
PLAY_MODE = False
# Enable to train
TRAIN_MODELS = True
# Load old models. Throws if the model doesn't exist
LOAD_CHECKPOINT = False
# If 0, the latest checkpoint is loaded
LOAD_EPISODE = 0
#########################################################################
# Number of agents, predictors, trainers and other system settings
# If the dynamic configuration is on, these are the initial values.
# Number of Agents
AGENTS = 8
# Number of Predictors
PREDICTORS = 2
# Number of Trainers
TRAINERS = 2
# Device
DEVICE = 'gpu:0'
# Play mode display size
DISPLAY_SIZE = (440, 400)
# Movie recording
RECORD = False
VIDEO_DURATION = 60 # seconds
# Enable the dynamic adjustment (+ waiting time to start it)
DYNAMIC_SETTINGS = False
DYNAMIC_SETTINGS_STEP_WAIT = 20
DYNAMIC_SETTINGS_INITIAL_WAIT = 10
#########################################################################
# Algorithm parameters
# Discount factor
DISCOUNT = 0.99
# Tmax (Interval over which gradients are computerd)
TIME_MAX = 50
# Maximum steps taken by agent in environment
MAX_STEPS = 10 * 10**7
# Reward Clipping
REWARD_CLIPPING = False
REWARD_MIN = -1
REWARD_MAX = 1
# Max size of the queue
MAX_QUEUE_SIZE = 100
PREDICTION_BATCH_SIZE = 128
# Input of the DNN
STACKED_FRAMES = 1
IMAGE_WIDTH = 84
IMAGE_HEIGHT = 84
IMAGE_DEPTH = 3 # 3 for RGB, 4 for RGBD
COMBINED_STATE_SIZE = 21240 # includes auxiliary inputs to NN (TODO: can be calculated inside the program using other params)
VEL_DIM = 6 # velocity dimension
DEPTH_PIXELS = 64 # number of depth pixels for auxiliary supervision
DEPTH_QUANTIZATION = 8 # number of bins for depth
# scaling factors for depth loss
BETA1 = 1
BETA2 = 1
# Lab setting (frames per second)
FPS = 60
# Rotation for look-left, look-right actions [-512, 512]
ROTATION = 20
# Total number of episodes and annealing frequency
EPISODES = 400000
ANNEALING_EPISODE_COUNT = 400000
# Entropy regualrization hyper-parameter
BETA_START = 0.001
BETA_END = 0.001
# Learning rate
LEARNING_RATE_START = 0.0005
LEARNING_RATE_END = 0.0005
# RMSProp parameters
RMSPROP_DECAY = 0.99
RMSPROP_MOMENTUM = 0.0
RMSPROP_EPSILON = 0.1
# Dual RMSProp - we found that using a single RMSProp for the two cost function works better and faster
DUAL_RMSPROP = False
# Gradient clipping
USE_GRAD_CLIP = False
GRAD_CLIP_NORM = 40.0
# Epsilon (regularize policy lag in GA3C)
LOG_EPSILON = 1e-6
# Training min batch size - increasing the batch size increases the stability of the algorithm, but make learning slower
TRAINING_MIN_BATCH_SIZE = 0
#########################################################################
# Log and save
# Enable TensorBoard
TENSORBOARD = False
# Update TensorBoard every X training steps
TENSORBOARD_UPDATE_FREQUENCY = 1000
# Enable to save models every SAVE_FREQUENCY episodes
SAVE_MODELS = True
# Save every SAVE_FREQUENCY episodes
SAVE_FREQUENCY = 1000
# Print stats every PRINT_STATS_FREQUENCY episodes
PRINT_STATS_FREQUENCY = 1
# The window to average stats
STAT_ROLLING_MEAN_WINDOW = 1000
# Results filename
RESULTS_FILENAME = 'results.txt'
# Network checkpoint name
NETWORK_NAME = 'network'
#########################################################################
# More experimental parameters here
# Minimum policy
MIN_POLICY = 0.0
# Use log_softmax() instead of log(softmax())
USE_LOG_SOFTMAX = False