forked from NVlabs/GA3C
-
Notifications
You must be signed in to change notification settings - Fork 25
/
ThreadTrainer.py
89 lines (75 loc) · 3.65 KB
/
ThreadTrainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from threading import Thread
import numpy as np
from Config import Config
class ThreadTrainer(Thread):
def __init__(self, server, id):
super(ThreadTrainer, self).__init__()
self.setDaemon(True)
self.id = id
self.server = server
self.exit_flag = False
@staticmethod
def dynamic_pad(x, r, a):
size = int(Config.TIME_MAX) # required size
z = np.zeros((size-len(x),) + x.shape[1:])
x = np.append(x, z, axis=0)
z = np.zeros((size-len(r),) + r.shape[1:])
r = np.append(r, z, axis=0)
z = np.zeros((size-len(a),) + a.shape[1:])
a = np.append(a, z, axis=0)
assert len(x) == size
return x, r, a
def run(self):
while not self.exit_flag:
batch_size = 0
c__ = []; h__ = [] # lstm hidden states
while batch_size <= Config.TRAINING_MIN_BATCH_SIZE:
x_, r_, a_, lstm_ = self.server.training_q.get()
# when using LSTMs, the recurrence is over the TIME_MAX length
# trajectory from each agent. Use padding for trajectories of
# length < TIME_MAX
if Config.NUM_LSTMS and x_.shape[0] != int(Config.TIME_MAX):
x_, r_, a_ = ThreadTrainer.dynamic_pad(x_, r_, a_)
if batch_size == 0:
x__ = x_; r__ = r_; a__ = a_
if len(lstm_):
c__ = []; h__ = []
for i in range(Config.NUM_LSTMS):
c__.append(lstm_[i]['c'])
h__.append(lstm_[i]['h'])
else:
x__ = np.concatenate((x__, x_))
r__ = np.concatenate((r__, r_))
a__ = np.concatenate((a__, a_))
if len(lstm_):
for i in range(Config.NUM_LSTMS):
c__[i] = np.concatenate((c__[i], lstm_[i]['c']))
h__[i] = np.concatenate((h__[i], lstm_[i]['h']))
batch_size += x_.shape[0]
if Config.TRAIN_MODELS:
self.server.train_model(x__, r__, a__, c__, h__, self.id)