forked from WEIRDLabUW/CSE-579-HW1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pytorch_utils.py
352 lines (269 loc) · 9.73 KB
/
pytorch_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
import torch
import numpy as np
from torch import nn
from collections import OrderedDict
from numbers import Number
def create_stats_ordered_dict(
name,
data,
stat_prefix=None,
always_show_all_stats=True,
exclude_max_min=False,
):
if stat_prefix is not None:
name = "{}{}".format(stat_prefix, name)
if isinstance(data, Number):
return OrderedDict({name: data})
if len(data) == 0:
return OrderedDict()
if isinstance(data, tuple):
ordered_dict = OrderedDict()
for number, d in enumerate(data):
sub_dict = create_stats_ordered_dict(
"{0}_{1}".format(name, number),
d,
)
ordered_dict.update(sub_dict)
return ordered_dict
if isinstance(data, list):
try:
iter(data[0])
except TypeError:
pass
else:
data = np.concatenate(data)
if (isinstance(data, np.ndarray) and data.size == 1
and not always_show_all_stats):
return OrderedDict({name: float(data)})
stats = OrderedDict([
(name + ' Mean', np.mean(data)),
(name + ' Std', np.std(data)),
])
if not exclude_max_min:
stats[name + ' Max'] = np.max(data)
stats[name + ' Min'] = np.min(data)
return stats
def identity(x):
return x
_str_to_activation = {
'identity': identity,
'relu': nn.ReLU(),
'tanh': nn.Tanh(),
'leaky_relu': nn.LeakyReLU(),
'sigmoid': nn.Sigmoid(),
'selu': nn.SELU(),
'softplus': nn.Softplus(),
}
def activation_from_string(string):
return _str_to_activation[string]
def soft_update_from_to(source, target, tau):
for target_param, param in zip(target.parameters(), source.parameters()):
target_param.data.copy_(
target_param.data * (1.0 - tau) + param.data * tau
)
def copy_model_params_from_to(source, target):
for target_param, param in zip(target.parameters(), source.parameters()):
target_param.data.copy_(param.data)
def maximum_2d(t1, t2):
# noinspection PyArgumentList
return torch.max(
torch.cat((t1.unsqueeze(2), t2.unsqueeze(2)), dim=2),
dim=2,
)[0].squeeze(2)
def kronecker_product(t1, t2):
"""
Computes the Kronecker product between two tensors
See https://en.wikipedia.org/wiki/Kronecker_product
"""
t1_height, t1_width = t1.size()
t2_height, t2_width = t2.size()
out_height = t1_height * t2_height
out_width = t1_width * t2_width
# TODO(vitchyr): see if you can use expand instead of repeat
tiled_t2 = t2.repeat(t1_height, t1_width)
expanded_t1 = (
t1.unsqueeze(2)
.unsqueeze(3)
.repeat(1, t2_height, t2_width, 1)
.view(out_height, out_width)
)
return expanded_t1 * tiled_t2
def alpha_dropout(
x,
p=0.05,
alpha=-1.7580993408473766,
fixedPointMean=0,
fixedPointVar=1,
training=False,
):
keep_prob = 1 - p
if keep_prob == 1 or not training:
return x
a = np.sqrt(fixedPointVar / (keep_prob * (
(1 - keep_prob) * pow(alpha - fixedPointMean, 2) + fixedPointVar)))
b = fixedPointMean - a * (
keep_prob * fixedPointMean + (1 - keep_prob) * alpha)
keep_prob = 1 - p
random_tensor = keep_prob + torch.rand(x.size())
binary_tensor = torch.floor(random_tensor)
x = x.mul(binary_tensor)
ret = x + alpha * (1 - binary_tensor)
ret.mul_(a).add_(b)
return ret
def alpha_selu(x, training=False):
return alpha_dropout(nn.SELU(x), training=training)
def double_moments(x, y):
"""
Returns the first two moments between x and y.
Specifically, for each vector x_i and y_i in x and y, compute their
outer-product. Flatten this resulting matrix and return it.
The first moments (i.e. x_i and y_i) are included by appending a `1` to x_i
and y_i before taking the outer product.
:param x: Shape [batch_size, feature_x_dim]
:param y: Shape [batch_size, feature_y_dim]
:return: Shape [batch_size, (feature_x_dim + 1) * (feature_y_dim + 1)
"""
batch_size, x_dim = x.size()
_, y_dim = x.size()
x = torch.cat((x, torch.ones(batch_size, 1)), dim=1)
y = torch.cat((y, torch.ones(batch_size, 1)), dim=1)
x_dim += 1
y_dim += 1
x = x.unsqueeze(2)
y = y.unsqueeze(1)
outer_prod = (
x.expand(batch_size, x_dim, y_dim) * y.expand(batch_size, x_dim,
y_dim)
)
return outer_prod.view(batch_size, -1)
def batch_diag(diag_values, diag_mask=None):
batch_size, dim = diag_values.size()
if diag_mask is None:
diag_mask = torch.diag(torch.ones(dim))
batch_diag_mask = diag_mask.unsqueeze(0).expand(batch_size, dim, dim)
batch_diag_values = diag_values.unsqueeze(1).expand(batch_size, dim, dim)
return batch_diag_values * batch_diag_mask
def batch_square_vector(vector, M):
"""
Compute x^T M x
"""
vector = vector.unsqueeze(2)
return torch.bmm(torch.bmm(vector.transpose(2, 1), M), vector).squeeze(2)
def fanin_init(tensor):
size = tensor.size()
if len(size) == 2:
fan_in = size[0]
elif len(size) > 2:
fan_in = np.prod(size[1:])
else:
raise Exception("Shape must be have dimension at least 2.")
bound = 1. / np.sqrt(fan_in)
return tensor.data.uniform_(-bound, bound)
def fanin_init_weights_like(tensor):
size = tensor.size()
if len(size) == 2:
fan_in = size[0]
elif len(size) > 2:
fan_in = np.prod(size[1:])
else:
raise Exception("Shape must be have dimension at least 2.")
bound = 1. / np.sqrt(fan_in)
new_tensor = FloatTensor(tensor.size())
new_tensor.uniform_(-bound, bound)
return new_tensor
def almost_identity_weights_like(tensor):
"""
Set W = I + lambda * Gaussian no
:param tensor:
:return:
"""
shape = tensor.size()
init_value = np.eye(*shape)
init_value += 0.01 * np.random.rand(*shape)
return FloatTensor(init_value)
def clip1(x):
return torch.clamp(x, -1, 1)
def compute_conv_output_size(h_in, w_in, kernel_size, stride, padding=0):
h_out = (h_in + 2 * padding - (kernel_size - 1) - 1) / stride + 1
w_out = (w_in + 2 * padding - (kernel_size - 1) - 1) / stride + 1
return int(np.floor(h_out)), int(np.floor(w_out))
def compute_deconv_output_size(h_in, w_in, kernel_size, stride, padding=0):
h_out = (h_in - 1) * stride - 2 * padding + kernel_size
w_out = (w_in - 1) * stride - 2 * padding + kernel_size
return int(np.floor(h_out)), int(np.floor(w_out))
def compute_conv_layer_sizes(h_in, w_in, kernel_sizes, strides, paddings=None):
if paddings == None:
for kernel, stride in zip(kernel_sizes, strides):
h_in, w_in = compute_conv_output_size(h_in, w_in, kernel, stride)
print('Output Size:', (h_in, w_in))
else:
for kernel, stride, padding in zip(kernel_sizes, strides, paddings):
h_in, w_in = compute_conv_output_size(h_in, w_in, kernel, stride,
padding=padding)
print('Output Size:', (h_in, w_in))
def compute_deconv_layer_sizes(h_in, w_in, kernel_sizes, strides,
paddings=None):
if paddings == None:
for kernel, stride in zip(kernel_sizes, strides):
h_in, w_in = compute_deconv_output_size(h_in, w_in, kernel, stride)
print('Output Size:', (h_in, w_in))
else:
for kernel, stride, padding in zip(kernel_sizes, strides, paddings):
h_in, w_in = compute_deconv_output_size(h_in, w_in, kernel, stride,
padding=padding)
print('Output Size:', (h_in, w_in))
"""
GPU wrappers
"""
_use_gpu = False
device = None
def set_gpu_mode(mode, gpu_id=0):
global _use_gpu
global device
global _gpu_id
_gpu_id = gpu_id
_use_gpu = mode
device = torch.device("cuda:" + str(gpu_id) if _use_gpu else "cpu")
def gpu_enabled():
return _use_gpu
def set_device(gpu_id):
torch.cuda.set_device(gpu_id)
# noinspection PyPep8Naming
def FloatTensor(*args, torch_device=None, **kwargs):
if torch_device is None:
torch_device = device
return torch.FloatTensor(*args, **kwargs, device=torch_device)
def from_numpy(*args, **kwargs):
return torch.from_numpy(*args, **kwargs).float().to(device)
def get_numpy(tensor):
return tensor.to('cpu').detach().numpy()
def randint(*sizes, torch_device=None, **kwargs):
if torch_device is None:
torch_device = device
return torch.randint(*sizes, **kwargs, device=torch_device)
def zeros(*sizes, torch_device=None, **kwargs):
if torch_device is None:
torch_device = device
return torch.zeros(*sizes, **kwargs, device=torch_device)
def ones(*sizes, torch_device=None, **kwargs):
if torch_device is None:
torch_device = device
return torch.ones(*sizes, **kwargs, device=torch_device)
def ones_like(*args, torch_device=None, **kwargs):
if torch_device is None:
torch_device = device
return torch.ones_like(*args, **kwargs, device=torch_device)
def randn(*args, torch_device=None, **kwargs):
if torch_device is None:
torch_device = device
return torch.randn(*args, **kwargs, device=torch_device)
def zeros_like(*args, torch_device=None, **kwargs):
if torch_device is None:
torch_device = device
return torch.zeros_like(*args, **kwargs, device=torch_device)
def tensor(*args, torch_device=None, **kwargs):
if torch_device is None:
torch_device = device
return torch.tensor(*args, **kwargs, device=torch_device)
def normal(*args, **kwargs):
return torch.normal(*args, **kwargs).to(device)