-
Notifications
You must be signed in to change notification settings - Fork 6
/
layers.py
205 lines (160 loc) · 7.38 KB
/
layers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# coding: utf-8
import tensorflow as tf
conv1d = tf.layers.conv1d
def fcn_layer(
inputs,
input_dim,
output_dim,
activation=None):
W = tf.Variable(tf.truncated_normal([input_dim, output_dim], stddev=0.1))
b = tf.Variable(tf.zeros([output_dim]))
XWb = tf.matmul(inputs, W) + b # Y=WX+B
if (activation == None):
outputs = XWb
else:
outputs = activation(XWb)
return outputs
def relu(x, alpha=0., max_value=None):
'''
ReLU.
alpha: slope of negative section.
'''
negative_part = tf.nn.relu(-x)
x = tf.nn.leaky_relu(x)
if max_value is not None:
x = tf.clip_by_value(x, tf.cast(0., dtype=tf.float32),
tf.cast(max_value, dtype=tf.float32))
x -= tf.constant(alpha, dtype=tf.float32) * negative_part
return x
def attn_head(seq, out_sz, bias_mat, activation, in_drop=0.0, coef_drop=0.5, residual=False):
with tf.name_scope('my_attn'):
if in_drop != 0.0:
seq = tf.nn.dropout(seq, 1.0 - in_drop)
seq_fts = tf.layers.conv1d(seq, out_sz, 1, use_bias=False)
# simplest self-attention possible
f_1 = tf.layers.conv1d(seq_fts, 1, 1)
f_2 = tf.layers.conv1d(seq_fts, 1, 1)
logits = f_1 + tf.transpose(f_2, [0, 2, 1])
# logits = tf.matmul(f_1 , tf.transpose(f_2, [0, 2, 1]))
coefs = tf.nn.softmax(tf.nn.leaky_relu(logits)) # )
# coefs = tf.matmul(tf.matrix_diag(1/(tf.reduce_sum(logits,axis = -1)+0.01)),
# logits)
if coef_drop != 0.0:
coefs = tf.nn.dropout(coefs, 1.0 - coef_drop)
if in_drop != 0.0:
seq_fts = tf.nn.dropout(seq_fts, 1.0 - in_drop)
vals = tf.matmul(coefs, seq_fts)
# ret = tf.contrib.layers.bias_add(vals)
# residual connection
if residual:
if seq.shape[-1] != ret.shape[-1]:
ret = ret + conv1d(seq, ret.shape[-1], 1) # activation
else:
seq_fts = ret + seq
return activation(vals) # activation
def sp_attn_head(seq, out_sz, adj_mat, activation, nb_nodes, in_drop=0.0, coef_drop=0.0, residual=False):
with tf.name_scope('sp_attn'):
if in_drop != 0.0:
seq = tf.nn.dropout(seq, 1.0 - in_drop)
seq_fts = tf.layers.conv1d(seq, out_sz, 1, use_bias=False)
# simplest self-attention possible
f_1 = tf.layers.conv1d(seq_fts, 1, 1)
f_2 = tf.layers.conv1d(seq_fts, 1, 1)
f_1 = tf.reshape(f_1, (nb_nodes, 1))
f_2 = tf.reshape(f_2, (nb_nodes, 1))
f_1 = adj_mat * f_1
f_2 = adj_mat * tf.transpose(f_2, [1, 0])
logits = tf.sparse_add(f_1, f_2)
lrelu = tf.SparseTensor(indices=logits.indices,
values=tf.nn.leaky_relu(logits.values),
dense_shape=logits.dense_shape)
coefs = tf.sparse_softmax(lrelu)
if coef_drop != 0.0:
coefs = tf.SparseTensor(indices=coefs.indices,
values=tf.nn.dropout(
coefs.values, 1.0 - coef_drop),
dense_shape=coefs.dense_shape)
if in_drop != 0.0:
seq_fts = tf.nn.dropout(seq_fts, 1.0 - in_drop)
# As tf.sparse_tensor_dense_matmul expects its arguments to have rank-2,
# here we make an assumption that our input is of batch size 1, and reshape appropriately.
# The method will fail in all other cases!
coefs = tf.sparse_reshape(coefs, [nb_nodes, nb_nodes])
seq_fts = tf.squeeze(seq_fts)
vals = tf.sparse_tensor_dense_matmul(coefs, seq_fts)
vals = tf.expand_dims(vals, axis=0)
vals.set_shape([1, nb_nodes, out_sz])
ret = tf.contrib.layers.bias_add(vals)
# residual connection
if residual:
if seq.shape[-1] != ret.shape[-1]:
ret = ret + conv1d(seq, ret.shape[-1], 1) # activation
else:
seq_fts = ret + seq
return activation(ret) # activation
class BaseGAttN:
def loss(logits, labels, nb_classes, class_weights):
sample_wts = tf.reduce_sum(tf.multiply(
tf.one_hot(labels, nb_classes), class_weights), axis=-1)
xentropy = tf.multiply(tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits), sample_wts)
return tf.reduce_mean(xentropy, name='xentropy_mean')
def training(loss, lr, l2_coef):
# weight decay
vars = tf.trainable_variables()
lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in vars if v.name not
in ['bias', 'gamma', 'b', 'g', 'beta']]) * l2_coef
opt = tf.train.AdamOptimizer(learning_rate=lr)
train_op = opt.minimize(loss + lossL2)
return train_op
def preshape(logits, labels, nb_classes):
new_sh_lab = [-1]
new_sh_log = [-1, nb_classes]
log_resh = tf.reshape(logits, new_sh_log)
lab_resh = tf.reshape(labels, new_sh_lab)
return log_resh, lab_resh
def confmat(logits, labels):
preds = tf.argmax(logits, axis=1)
return tf.confusion_matrix(labels, preds)
class GAT(BaseGAttN):
def inference(self, inputs, nb_classes, bias_mat, hid_units,
n_heads, activation=tf.nn.elu, residual=False, k=0.5):
select_num = tf.cast(inputs.shape[1].value * k, dtype=tf.int32)
# mean_sum = tf.reduce_sum(tf.square(inputs), -1)
p = tf.Variable(tf.truncated_normal([int(inputs.shape[-1]), 1], stddev=0.1))
mean_sum = tf.reshape(tf.matmul(inputs, p) / tf.reduce_sum(tf.square(p)), [-1, int(inputs.shape[1])])
a_top, a_top_idx = tf.nn.top_k(mean_sum, select_num)
a_top_1, a_top_idx_1 = tf.nn.top_k(mean_sum, inputs.shape[1])
a_shape = tf.shape(mean_sum)
a_top_sm = a_top * 0 + 1
a_row_idx = tf.tile(tf.range(a_shape[0])[:, tf.newaxis], (1, select_num))
"""
a_row_idx = [array([[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
...
[15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15]],
dtype=int32)]
"""
scatter_idx = tf.stack([a_row_idx, a_top_idx], axis=-1)
result = tf.scatter_nd(scatter_idx, a_top_sm, a_shape)
a_index = tf.tile(tf.expand_dims(result, -1), (1, 1, inputs.shape[-1]))
c_index = a_index
inputs = a_index * inputs
attns = []
for _ in range(n_heads[0]):
attns.append(attn_head(inputs, bias_mat=bias_mat,
out_sz=hid_units[0], activation=activation, residual=False))
h_1 = tf.concat(attns, axis=-1)
for i in range(1, len(hid_units)):
attns = []
for _ in range(n_heads[i]):
attns.append(attn_head(h_1, bias_mat=bias_mat,
out_sz=hid_units[i], activation=activation, residual=residual))
h_1 = tf.concat(attns, axis=-1)
a_index = tf.tile(tf.expand_dims(result, -1), (1, 1, h_1.shape[-1]))
h_1 = a_index * h_1
logits = tf.layers.dense(
inputs=h_1, units=nb_classes, activation=tf.nn.leaky_relu)
a_index = tf.tile(tf.expand_dims(result, -1), (1, 1, logits.shape[-1]))
logits = a_index * logits
return a_index, h_1, logits, inputs, select_num, a_top_idx_1