-
Notifications
You must be signed in to change notification settings - Fork 9
/
ddpg_continue_dynamics.py
194 lines (165 loc) · 7.51 KB
/
ddpg_continue_dynamics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# this is for display currently!!!!
from environment import OnePlayerEnv
from policy import TestPolicy
from world import *
from time import sleep
from time import time as now
import random
import numpy as np
import gym
from gym import wrappers
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, Concatenate, Conv2D
from keras.layers import MaxPool2D, AvgPool2D, BatchNormalization, Dropout
import keras.regularizers as regularizers
from keras.optimizers import Adam
from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.policy import GreedyQPolicy
from rl.random import OrnsteinUhlenbeckProcess
class NpaProcessor(Processor):
def __init__(self):
pass
def process_state_batch(self, batch):
batch_size = batch.shape[0]
# unit_size = batch.shape[-3: ]
unit_size = batch.shape[-1:]
new_size = (batch_size,) + unit_size
# print(new_size)
# exit()
return np.reshape(batch, new_size)
EPISODES = 100000
if __name__ == "__main__":
# env = gym.make('CartPole-v1')
w = ContinuousDynamicWorld(TestPolicy, obsticle_moving=False)
env = OnePlayerEnv(w)
# Next, we build a very simple model.
actor = Sequential()
# actor.add(Conv2D(filters=64, kernel_size=(3, 3), activation="relu", input_shape=(5, 100, 100),
# data_format="channels_first"))
# actor.add(Conv2D(filters=64, kernel_size=(3, 3),
# activation="relu", data_format="channels_first"))
# actor.add(MaxPool2D(2, 2, data_format="channels_first"))
# actor.add(Conv2D(filters=64, kernel_size=(3, 3),
# activation="relu", data_format="channels_first"))
# actor.add(Conv2D(filters=64, kernel_size=(3, 3),
# activation="relu", data_format="channels_first"))
# actor.add(MaxPool2D(2, 2, data_format="channels_first"))
# actor.add(Conv2D(filters=64, kernel_size=(3, 3),
# activation="relu", data_format="channels_first"))
# actor.add(Conv2D(filters=8, kernel_size=(3, 3),
# activation="relu", data_format="channels_first"))
# actor.add(MaxPool2D(2, 2, data_format="channels_first"))
# actor.add(Flatten())
actor.add(Dense(1024, input_shape=(6,)))
actor.add(Activation('relu'))
actor.add(Dense(1024, kernel_regularizer=regularizers.l2(0.01)))
actor.add(Activation('relu'))
actor.add(Dropout(0.5))
actor.add(Dense(1024, kernel_regularizer=regularizers.l2(0.01)))
actor.add(Activation('relu'))
actor.add(Dense(1024, kernel_regularizer=regularizers.l2(0.01)))
actor.add(Activation('relu'))
actor.add(Dropout(0.5))
actor.add(Dense(1024, kernel_regularizer=regularizers.l2(0.01)))
actor.add(Activation('relu'))
actor.add(Dense(32, kernel_regularizer=regularizers.l2(0.01)))
actor.add(Activation('sigmoid'))
actor.add(Dense(2, kernel_regularizer=regularizers.l2(0.01)))
actor.add(Activation('tanh'))
actor.summary()
action_input = Input(shape=(2,), name='action_input')
observation_input = Input(
shape=(6,), name='observation_input')
# shape=(5, 100, 100), name='observation_input')
# x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu",
# data_format="channels_first")(observation_input)
# x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu",
# data_format="channels_first")(x)
# x = MaxPool2D(2, 2, data_format="channels_first")(x)
# x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu",
# data_format="channels_first")(x)
# x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu",
# data_format="channels_first")(x)
# x = MaxPool2D(2, 2, data_format="channels_first")(x)
# x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu",
# data_format="channels_first")(x)
# x = Conv2D(filters=8, kernel_size=(3, 3), activation="relu",
# data_format="channels_first")(x)
# x = MaxPool2D(2, 2, data_format="channels_first")(x)
# x = Flatten()(x)
x = Dense(1024)(observation_input)
x = Activation('relu')(x)
x = Dense(1024, kernel_regularizer=regularizers.l2(0.01))(x)
x = Activation('relu')(x)
x = Dropout(0.5)(x)
x = Dense(1024, kernel_regularizer=regularizers.l2(0.01))(x)
x = Activation('relu')(x)
x = Dense(1024, kernel_regularizer=regularizers.l2(0.01))(x)
x = Activation('relu')(x)
x = Dropout(0.5)(x)
x = Dense(254)(x)
x = Activation('relu')(x)
x = Concatenate()([x, action_input])
x = Dense(256, kernel_regularizer=regularizers.l2(0.01))(x)
x = Activation('relu')(x)
x = Dense(256, kernel_regularizer=regularizers.l2(0.01))(x)
x = Activation('relu')(x)
x = Dense(32, kernel_regularizer=regularizers.l2(0.01))(x)
x = Activation('sigmoid')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
critic.summary()
class MyDDPG(DDPGAgent):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def forward(self, observation):
state = self.memory.get_recent_state(observation)
action = self.select_action(state)
self.recent_observation = observation
self.recent_action = action
# print(self.step)
# first check if we are in warming up, if so,
# run the pre-defined path
# or, if the warm up phrase is over, give a
# random action
if (self.step < self.nb_steps_warmup_actor):
actor = w.policy_agents[0]
Action = actor.action_class
try:
# F, T = actor.pathGuide33()
F, T = 2.9 / 5, - 0.1 / 0.3
except:
pass
else:
action = np.array([F, T])
action = np.reshape(action, self.recent_action.shape)
self.recent_action = action
return action
# # warm up is over...
# if self.training and random.random() < 0.15:
# F = (random.random() - 0.5) * 2
# T = (random.random() - 0.5) * 2
# action = np.array([F, T])
# action = np.reshape(action, self.recent_action.shape)
# self.recent_action = action
# return action
# return the predicted one
# print(action)
return action
random_process = OrnsteinUhlenbeckProcess(
size=2, theta=.1, mu=0., sigma=.5)
memory = SequentialMemory(limit=100000, window_length=1)
agent = DDPGAgent(nb_actions=2, actor=actor, critic=critic, critic_action_input=action_input,
memory=memory, nb_steps_warmup_critic=50000, nb_steps_warmup_actor=50000,
gamma=.9, target_model_update=1e-3, processor=NpaProcessor(), random_process=random_process)
agent.compile([Adam(lr=1e-3), Adam(lr=1e-3)], metrics=['mae'])
# agent.load_weights('ddpg_{}_weights.h5f'.format("continous_dynamic"))
agent.fit(env, nb_steps=100000, visualize=False, verbose=1)
# After training is done, we save the final weights.
agent.save_weights('ddpg_{}_weights.h5f'.format(
"continous_dynamic"), overwrite=True)
# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=200, visualize=False, nb_max_episode_steps=2000)