-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.py
executable file
·70 lines (63 loc) · 1.93 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# Actual DQN model
import numpy as np
import torch
import torch.nn as nn
class Flatten(nn.Module):
def forward(self, x):
return x.view(x.shape[0], -1)
class DQN(nn.Module):
def __init__(self, out_size):
'''
:param out_size: number of actions in the game
:input (N, C, H, W)
:output (N)
'''
super().__init__()
# Input (N, 4, 84, 84)
self.layers = nn.Sequential(
nn.Conv2d(4, 32, kernel_size=8, stride=4), # (N, 32, 20, 20)
nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=4, stride=2), # (N, 64, 9, 9)
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=1), # (N, 64, 7, 7)
nn.ReLU(),
Flatten(),
nn.Linear(3136, 512),
nn.ReLU(),
nn.Linear(512, out_size)
)
def forward(self, x):
return self.layers(x)
class DuelingNet(nn.Module):
def __init__(self, out_size):
'''
:param out_size: number of actions in the game
:input (N, C, H, W)
:output (N)
'''
super().__init__()
# Input (N, 4, 84, 84)
self.conv_layers = nn.Sequential(
nn.Conv2d(4, 32, kernel_size=8, stride=4), # (N, 32, 20, 20)
nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=4, stride=2), # (N, 64, 9, 9)
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=1), # (N, 64, 7, 7)
nn.ReLU(),
Flatten(),
)
self.advantage = nn.Sequential(
nn.Linear(3136, 512),
nn.ReLU(),
nn.Linear(512, out_size)
)
self.value = nn.Sequential(
nn.Linear(3136, 512),
nn.ReLU(),
nn.Linear(512, 1)
)
def forward(self, x):
x = self.conv_layers(x)
advantage = self.advantage(x)
value = self.value(x)
return value + advantage - advantage.mean()