This repository has been archived by the owner on Sep 1, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
111 lines (89 loc) · 4.69 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import torch
from namedtensor import ntorch, NamedTensor
class S2SNet(ntorch.nn.Module):
def __init__(self, hidden_dim=512, num_layers=2, dropout=0.5):
super(S2SNet, self).__init__()
self.encoder = EncoderS2S(hidden_dim=hidden_dim, num_layers=num_layers, dropout=dropout)
self.decoder = DecoderS2S(hidden_dim=hidden_dim, num_layers=num_layers, dropout=dropout)
def forward(self, src, trg):
context, hidden = self.encoder(src)
preds, _ = self.decoder(trg, hidden, context)
return preds[{"trgSeqlen": slice(0,preds.size("trgSeqlen")-1)}]
class AttnNet(ntorch.nn.Module):
def __init__(self, hidden_dim=512, num_layers=2, dropout=0.5, n=2):
super(AttnNet, self).__init__()
self.encoder = EncoderS2S(hidden_dim=hidden_dim, num_layers=num_layers, dropout=dropout)
self.decoder = DecoderAttn(hidden_dim=hidden_dim, num_layers=num_layers, dropout=dropout, n=n)
def forward(self, src, trg):
context, hidden = self.encoder(src)
preds, _ = self.decoder(trg, hidden, context)
return preds[{"trgSeqlen": slice(0,preds.size("trgSeqlen")-1)}]
class EncoderS2S(ntorch.nn.Module):
def __init__(self, hidden_dim = 512, num_layers = 2, dropout=0.5, src_vocab_len = 13353):
super(EncoderS2S, self).__init__()
self.embedding = ntorch.nn.Embedding(src_vocab_len, hidden_dim).spec("srcSeqlen", "embedding")
self.dropout = ntorch.nn.Dropout(dropout)
self.LSTM = ntorch.nn.LSTM(hidden_dim, hidden_dim, num_layers, dropout=dropout).spec("embedding", "srcSeqlen", "hidden")
def forward(self, input, hidden=None):
# NOTE: `input` must have its named dimensions correctly ordered: ("srcSeqlen", "batch")
input = ntorch.tensor(torch.flip(input.values, (0,)),("srcSeqlen","batch")) #reverse input, improves translation
x = self.embedding(input)
x = self.dropout(x)
x, hidden = self.LSTM(x)
x = self.dropout(x) #Miro 3/7/19 12:00 PM - Add dropout
return x, hidden
class DecoderS2S(ntorch.nn.Module):
def __init__(self, hidden_dim = 512, num_layers = 2, dropout = 0.5, trg_vocab_len = 11560):
super(DecoderS2S, self).__init__()
self.embedding = ntorch.nn.Embedding(trg_vocab_len, hidden_dim).spec("trgSeqlen", "embedding")
self.dropout = ntorch.nn.Dropout(dropout)
self.LSTM = ntorch.nn.LSTM(hidden_dim, hidden_dim, num_layers, dropout=dropout).spec("embedding", "trgSeqlen", "hidden")
self.out = ntorch.nn.Linear(hidden_dim, trg_vocab_len).spec("hidden", "vocab")
def forward(self, input, hidden, unk=None):
x = self.embedding(input)
x = self.dropout(x)
x, hidden = self.LSTM(x, hidden)
x = self.dropout(x)
y = self.out(x)
# No softmax because cross-entropy
return y, hidden
class DecoderAttn(ntorch.nn.Module):
"""Decoder based on the implementation in Yuntian's slides and Luong.
"""
def __init__(self, hidden_dim = 512, num_layers = 2, dropout = 0.5, trg_vocab_len = 11560, n=2):
super(DecoderAttn, self).__init__()
self.embedding = ntorch.nn.Embedding(trg_vocab_len, hidden_dim).spec("trgSeqlen", "embedding")
self.dropout = ntorch.nn.Dropout(dropout)
self.LSTM = ntorch.nn.LSTM(hidden_dim, hidden_dim, num_layers, dropout=dropout).spec("embedding", "trgSeqlen", "hidden")
self.h2h = ntorch.nn.Linear(2*hidden_dim, n*hidden_dim).spec("hidden", "hidden") #reduce to hidden
self.out = ntorch.nn.Linear(n*hidden_dim, trg_vocab_len).spec("hidden", "vocab")
def get_context(self, hidden, decoder_context):
"""(batch, srcSeqlen, hidden) x (batch, trgSeqlen, hidden) -> (batch, trgSeqlen, srcSeqlen)
"""
attn_weights = hidden.dot("hidden", decoder_context).softmax("srcSeqlen")
context = attn_weights.dot("srcSeqlen", decoder_context) # (batch, trgSeqlen, hidden)
return context, attn_weights
def forward(self, input, hidden, decoder_context, return_attn=False):
"""Forward pass
Parameters (all NamedTensors)
----------
input : (trgSeqlen, batch)
hidden : tuple((batch, layers, hidden) , ((batch, layers, hidden)))
decoder_context : (batch, srcSeqlen, hidden)
Todo
----
* Where do we apply dropout?
* Read over the Slack for description
"""
x = self.embedding(input)
x = self.dropout(x)
x, hidden = self.LSTM(x, hidden)
x = self.dropout(x) #Miro 4:15 PM 3/4/19 - Changing dropout location
context, attn_weights = self.get_context(x, decoder_context)
x = self.h2h(ntorch.cat([x,context], "hidden")).relu()
y = self.out(x)
# No softmax because cross-entropy
if return_attn:
return y, hidden, attn_weights
else:
return y, hidden