Skip to content

Commit

Permalink
1. modify structure.
Browse files Browse the repository at this point in the history
2. add prioritized dqn (not finished).
  • Loading branch information
wenlisong committed Oct 29, 2018
1 parent 67c53c7 commit 2b17869
Show file tree
Hide file tree
Showing 11 changed files with 265 additions and 223 deletions.
4 changes: 2 additions & 2 deletions reinforcement_learning.py → Agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def store_transition(self, *args):
pass


class ReinforcementLearning:
class Agent:
def __init__(self, action_cnt=4, learning_rate=0.1, reward_decay=0.9, e_greedy=0.9, replace_target_iter=300,
batch_size=32, observe_step=200., explore_step=1000., memory=None):
self.action_cnt = action_cnt
Expand Down Expand Up @@ -54,7 +54,7 @@ def choose_action(self, observation):
if np.random.uniform() > self.epsilon:
action_val = self.sess.run(self.q_eval, feed_dict={self.s: observation})
action_idx = np.argmax(action_val, axis=1)
print('Q_Max_val {0}'.format(action_val))
# print('Q_Max_val {0}'.format(action_val))
else:
action_idx = np.random.randint(0, self.action_cnt)
action[action_idx] = 1.
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ Markov Reward Process is a tuple <***S, P, R, γ***>, ***S*** is a finite set of

##### Return
The return Gt is the total discounted reward from time-step t.
Gt =Rt+1+γRt+2+...=􏰋Sigma(k=0->∞)[γkRt+k+1]
Gt = Rt+1+γRt+2+... = 􏰋Sigma(k=0->∞)[γkRt+k+1]

## Reproduction
First, I'll reproduce what [yenchenlin/DeepLearningFlappyBird](https://github.com/yenchenlin/DeepLearningFlappyBird)
Expand Down
6 changes: 3 additions & 3 deletions double_dqn.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from dqn import DeepQNetwork
from dqn import DQN_Agent
import numpy as np
import random


class DoubleDQN(DeepQNetwork):
class DoubleDQN_Agent(DQN_Agent):
def __init__(self, use_pre_weights=False, save_path='./saved_double_dqn_model/'):
super(DoubleDQN, self).__init__(use_pre_weights=use_pre_weights, save_path=save_path)
super(DoubleDQN_Agent, self).__init__(use_pre_weights=use_pre_weights, save_path=save_path)

def learn(self):
if self.learn_step_counter % self.replace_target_iter == 0:
Expand Down
8 changes: 4 additions & 4 deletions dqn.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from reinforcement_learning import ReinforcementLearning, Memory
from Agent import Agent, Memory
import tensorflow as tf
import numpy as np
import random
Expand All @@ -15,13 +15,13 @@ def store_transition(self, s, a, r, s_, terminal):
self.pool.popleft()


class DeepQNetwork(ReinforcementLearning):
class DQN_Agent(Agent):
def __init__(self, action_cnt=2, learning_rate=1e-6, reward_decay=0.99, e_greedy=0.1, replace_target_iter=300,
batch_size=32, observe_step=10000., explore_step=3000000., memory=Deque(), use_pre_weights=False,
save_path='./saved_dqn_model/'):

super(DeepQNetwork, self).__init__(action_cnt, learning_rate, reward_decay, e_greedy, replace_target_iter,
batch_size, observe_step, explore_step, memory)
super(DQN_Agent, self).__init__(action_cnt, learning_rate, reward_decay, e_greedy, replace_target_iter,
batch_size, observe_step, explore_step, memory)
# record average score per episode
self.score_per_episode = 0
self.score = tf.placeholder(tf.float16, [], name='score')
Expand Down
167 changes: 0 additions & 167 deletions dqn_prio_memo.py

This file was deleted.

18 changes: 6 additions & 12 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,16 @@ def main(argv=None):
elif opt in ("-g", "--game"):
if arg == 'fb':
from train_fb import train
elif arg == 'fb2':
from train_fb2 import train
else:
print("No this game, now we have flappy bird(fb)")
return 1
elif opt in ("-n", "--network"):
train(arg)
# if arg == 'dqn':
# train(arg)
# elif arg == 'doubledqn':
# train(arg)
# elif arg == 'mydqn':
# train(arg)
# else:
# print("You could choose 'dqn', 'doubledqn' as network's parameter")
# return 1
# train(arg)
if arg in ['dqn','doubledqn','mydqn', 'mydqn2', 'priodqn']:
train(arg)
else:
print("You could choose 'dqn', 'doubledqn' as network's parameter")
return 1
return 0


Expand Down
8 changes: 4 additions & 4 deletions mydqn.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from reinforcement_learning import ReinforcementLearning, Memory
from Agent import Agent, Memory
import tensorflow as tf
import numpy as np
import random
Expand All @@ -15,13 +15,13 @@ def store_transition(self, s, a, r, s_, terminal):
self.pool.popleft()


class DeepQNetwork(ReinforcementLearning):
class DQN_Agent(Agent):
def __init__(self, action_cnt=2, learning_rate=1e-6, reward_decay=0.99, e_greedy=0.1, replace_target_iter=200,
batch_size=32, observe_step=10000., explore_step=3000000., memory=Deque(), use_pre_weights=False,
save_path='./saved_dqn_model/'):

super(DeepQNetwork, self).__init__(action_cnt, learning_rate, reward_decay, e_greedy, replace_target_iter,
batch_size, observe_step, explore_step, memory)
super(DQN_Agent, self).__init__(action_cnt, learning_rate, reward_decay, e_greedy, replace_target_iter,
batch_size, observe_step, explore_step, memory)
# record average score per episode
self.score_per_episode = 0
self.score = tf.placeholder(tf.float16, [], name='score')
Expand Down
12 changes: 6 additions & 6 deletions mydqn2.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from reinforcement_learning import ReinforcementLearning, Memory
from Agent import Agent, Memory
import tensorflow as tf
import numpy as np
import random
Expand All @@ -16,13 +16,13 @@ def store_transition(self, s, a, r, s_, terminal):
self.pool.popleft()


class DeepQNetwork(ReinforcementLearning):
def __init__(self, action_cnt=2, learning_rate=1e-6, reward_decay=0.99, e_greedy=0.1, replace_target_iter=200,
batch_size=32, observe_step=10000., explore_step=3000000., memory=Deque(), use_pre_weights=False,
class DQN_Agent(Agent):
def __init__(self, action_cnt=2, learning_rate=1e-6, reward_decay=0.99, e_greedy=0., replace_target_iter=200,
batch_size=32, observe_step=1000000., explore_step=3000000., memory=Deque(), use_pre_weights=False,
save_path='./saved_dqn_model/'):

super(DeepQNetwork, self).__init__(action_cnt, learning_rate, reward_decay, e_greedy, replace_target_iter,
batch_size, observe_step, explore_step, memory)
super(DQN_Agent, self).__init__(action_cnt, learning_rate, reward_decay, e_greedy, replace_target_iter,
batch_size, observe_step, explore_step, memory)
# record average score per episode
self.score_per_episode = 0
self.score = tf.placeholder(tf.float16, [], name='score')
Expand Down
Loading

0 comments on commit 2b17869

Please sign in to comment.