-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnn_layers.py
153 lines (113 loc) · 6.52 KB
/
nn_layers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import numpy as np
class Layer:
def forward(self, input):
raise NotImplementedError
def backward(self, output_gradient, learning_rate):
raise NotImplementedError
class DenseLayer(Layer):
def __init__(self, input_size, output_size):
self.weights = np.random.randn(input_size, output_size) * np.sqrt(2. / input_size)
self.biases = np.zeros((1, output_size))
self.verbose_forward = False
self.verbose_backward = False
def forward(self, input):
self.stored_input = input
# inputs is shaped (batch_size, input_size)
# weights is shaped (input_size, output_size)
# biases is shaped (1, output_size)
# ---
# > Compute: output from this layer to the next0 by multiplying inputs with weights
# > Dot: product input (batch_size, input_size) x weights (input_size, output_size) = (batch_size, output_size)
self.output = np.dot(input, self.weights) + self.biases
if self.verbose_forward == True:
print(f'forward input shape {self.stored_input.shape} weights shape {self.weights.shape} biases shape {self.biases.shape} output shape {self.output.shape}')
return self.output
def backward(self, output_gradient, learning_rate):
# ---
# > Compute: the gradient of the loss with respect to the input
# > Dot: product output_gradient(batch_size, output_size) x self.weights transposed (output_size, input_size) = (batch_size, input_size)
input_gradient = np.dot(output_gradient, self.weights.T)
if self.verbose_backward == True:
print(f'backward output_gradient {output_gradient.shape} weights transposed {self.weights.T.shape}')
# ---
# > Compute: the gradient of the loss with respect to the weights
# > Dot: product input transposed (input_size, batch_size) x output_gradient(batch_size, output_size) = (input_size, output_size)
weights_gradient = np.dot(self.stored_input.T, output_gradient)
# > Compute: the gradient of the loss with respect to the biases
# > Dot: sum over all examples in a batch
biases_gradient = np.sum(output_gradient, axis=0, keepdims=True)
self.weights -= learning_rate * weights_gradient
self.biases -= learning_rate * biases_gradient
return input_gradient
class ActivationLayer(Layer):
def __init__(self, activation, activation_derivative):
self.activation = activation
self.activation_derivative = activation_derivative
def forward(self, input):
self.input = input
self.output = self.activation(input)
return self.output
def backward(self, output_gradient, learning_rate):
return output_gradient * self.activation_derivative(self.input)
class ConvLayer(Layer):
def __init__(self, input_shape, num_filters, filter_size, stride=1, padding=0):
self.input_shape = input_shape
self.num_filters = num_filters
self.filter_size = filter_size
self.stride = stride
self.padding = padding
self.filters = np.random.randn(num_filters, input_shape[0], filter_size, filter_size) * 0.1
def forward(self, input):
self.input = input
self.input_padded = np.pad(input, ((0, 0), (0, 0), (self.padding, self.padding), (self.padding, self.padding)), mode='constant')
self.output_shape = (
(self.input_padded.shape[2] - self.filter_size) // self.stride + 1,
(self.input_padded.shape[3] - self.filter_size) // self.stride + 1
)
self.output = np.zeros((input.shape[0], self.num_filters, *self.output_shape))
for i in range(0, self.output.shape[2], self.stride):
for j in range(0, self.output.shape[3], self.stride):
region = self.input_padded[:, :, i:i+self.filter_size, j:j+self.filter_size]
self.output[:, :, i // self.stride, j // self.stride] = np.sum(region * self.filters, axis=(1, 2, 3))
return self.output
def backward(self, output_gradient, learning_rate):
filter_gradient = np.zeros(self.filters.shape)
input_gradient_padded = np.zeros(self.input_padded.shape)
for i in range(0, self.output.shape[2], self.stride):
for j in range(0, self.output.shape[3], self.stride):
region = self.input_padded[:, :, i:i+self.filter_size, j:j+self.filter_size]
for k in range(self.num_filters):
filter_gradient[k] += np.sum(region * (output_gradient[:, k, i // self.stride, j // self.stride])[:, None, None, None], axis=0)
for n in range(output_gradient.shape[0]):
input_gradient_padded[n, :, i:i+self.filter_size, j:j+self.filter_size] += np.sum((self.filters * (output_gradient[n, :, i // self.stride, j // self.stride])[:, None, None, None]), axis=0)
self.filters -= learning_rate * filter_gradient
if self.padding != 0:
input_gradient = input_gradient_padded[:, :, self.padding:-self.padding, self.padding:-self.padding]
else:
input_gradient = input_gradient_padded
return input_gradient
class MaxPoolingLayer(Layer):
def __init__(self, pool_size, stride):
self.pool_size = pool_size
self.stride = stride
def forward(self, input):
self.input = input
self.output_shape = (
(input.shape[2] - self.pool_size) // self.stride + 1,
(input.shape[3] - self.pool_size) // self.stride + 1
)
self.output = np.zeros((input.shape[0], input.shape[1], *self.output_shape))
for i in range(0, self.output.shape[2], self.stride):
for j in range(0, self.output.shape[3], self.stride):
region = input[:, :, i:i+self.pool_size, j:j+self.pool_size]
self.output[:, :, i // self.stride, j // self.stride] = np.max(region, axis=(2, 3))
return self.output
def backward(self, output_gradient, learning_rate):
input_gradient = np.zeros(self.input.shape)
for i in range(0, self.output.shape[2], self.stride):
for j in range(0, self.output.shape[3], self.stride):
region = self.input[:, :, i:i+self.pool_size, j:j+self.pool_size]
max_region = np.max(region, axis=(2, 3), keepdims=True)
region_mask = (region == max_region)
input_gradient[:, :, i:i+self.pool_size, j:j+self.pool_size] += region_mask * (output_gradient[:, :, i // self.stride, j // self.stride])[:, :, None, None]
return input_gradient