-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdueling_network.py
44 lines (38 loc) · 1.41 KB
/
dueling_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import torch
import torch.nn as nn
import torch.nn.functional as F
class Network(nn.Module):
"""Actor (Policy) Model."""
def __init__(self, state_size, action_size, seed):
"""Initialize parameters and build model.
Params
======
state_size (int): Dimension of each state
action_size (int): Dimension of each action
seed (int): Random seed
"""
super(Network, self).__init__()
self.seed = torch.manual_seed(seed)
"*** YOUR CODE HERE ***"
feature_size = 64
self.feature_layer = nn.Sequential(
nn.Linear(state_size, feature_size),
nn.ReLU())
value_size = 64
self.value_layer = nn.Sequential(
nn.Linear(feature_size, value_size),
nn.ReLU(),
nn.Linear(value_size, 1))
advantage_size = 64
self.advantage_layer = nn.Sequential(
nn.Linear(feature_size, advantage_size),
nn.ReLU(),
nn.Linear(advantage_size, action_size))
def forward(self, state):
"""Build a network that maps state -> action values."""
x = state
feature = self.feature_layer(x)
action_value = self.value_layer(feature)
advantage = self.advantage_layer(feature)
q_value = action_value + (advantage - advantage.mean(dim=1, keepdim=True))
return q_value