-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathmodel.py
100 lines (73 loc) · 3.23 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# # Define the RNN Model
from torch.nn import functional
import torch.nn as nn
import torch
from torch.autograd import Variable
class SelfAttentive( nn.Module ):
def __init__( self, ntoken, ninp, nhid, nlayers, da, r, mlp_nhid, nclass, emb_matrix, cuda ):
super( SelfAttentive , self).__init__()
# Embedding Layer
self.encoder = nn.Embedding( ntoken, ninp )
# RNN type
self.rnn = nn.LSTM( ninp, nhid, nlayers, bias=False, batch_first=True, bidirectional=True )
# Self Attention Layers
self.S1 = nn.Linear( nhid * 2, da, bias=False )
self.S2 = nn.Linear( da, r, bias=False )
# Final MLP Layers
self.MLP = nn.Linear( r * nhid * 2, mlp_nhid )
self.decoder = nn.Linear( mlp_nhid, nclass )
self.init_wordembedding( emb_matrix )
self.init_weights()
self.r = r
self.nhid = nhid
self.nlayers = nlayers
if cuda:
self.cuda()
def init_weights( self ):
initrange = 0.1
self.S1.weight.data.uniform_( -initrange, initrange )
self.S2.weight.data.uniform_( -initrange, initrange )
self.MLP.weight.data.uniform_( -initrange, initrange )
self.MLP.bias.data.fill_( 0 )
self.decoder.weight.data.uniform_( -initrange, initrange )
self.decoder.bias.data.fill_( 0 )
def init_wordembedding( self, embedding_matrix ):
self.encoder.weight.data = embedding_matrix
def forward(self, input, hidden, len_li ):
emb = self.encoder( input )
rnn_input = torch.nn.utils.rnn.pack_padded_sequence( emb, list( len_li.data ), batch_first=True )
output, hidden = self.rnn( rnn_input , hidden )
depacked_output, lens = torch.nn.utils.rnn.pad_packed_sequence( output, batch_first=True )
if self.cuda:
BM = Variable( torch.zeros( input.size( 0 ) , self.r * self.nhid * 2 ).cuda() )
penal = Variable( torch.zeros( 1 ).cuda() )
I = Variable( torch.eye( self.r ).cuda() )
else:
BM = Variable( torch.zeros( input.size( 0 ) , self.r * self.nhid * 2 ) )
penal = Variable( torch.zeros( 1 ) )
I = Variable( torch.eye( self.r ) )
weights = {}
# Attention Block
for i in range( input.size( 0 ) ):
H = depacked_output[ i , :lens[ i ], : ]
s1 = self.S1( H )
s2 = self.S2( functional.tanh( s1 ) )
# Attention Weights and Embedding
A = functional.softmax( s2.t() )
M = torch.mm( A, H )
BM[ i, : ] = M.view( -1 )
# Penalization term
AAT = torch.mm( A, A.t() )
P = torch.norm( AAT - I, 2 )
penal += P * P
weights[ i ] = A
# Penalization Term
penal /= input.size( 0 )
# MLP block for Classifier Feature
MLPhidden = self.MLP( BM )
decoded = self.decoder( functional.relu( MLPhidden ) )
return decoded, hidden, penal, weights
def init_hidden( self, bsz ):
weight = next( self.parameters() ).data
return ( Variable( weight.new( self.nlayers * 2 , bsz, self.nhid ).zero_() ),
Variable( weight.new( self.nlayers * 2, bsz, self.nhid ).zero_() ) )