-
Notifications
You must be signed in to change notification settings - Fork 0
/
simple_network.py
190 lines (161 loc) · 8.08 KB
/
simple_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
"""
File name: simple_network.py
Author: Benjamin Planche
Date created: 10.12.2018
Date last modified: 11:27 03.04.2019
Python Version: 3.6
Copyright = "Copyright (C) 2018-2019 of Packt"
Credits = ["Eliot Andres, Benjamin Planche"]
License = "MIT"
Version = "1.0.0"
Maintainer = "non"
Status = "Prototype" # "Prototype", "Development", or "Production"
"""
#==============================================================================
# Imported Modules
#==============================================================================
import numpy as np
from fully_connected_layer import FullyConnectedLayer
#==============================================================================
# Function Definitions
#==============================================================================
def sigmoid(x): # sigmoid function
return 1 / (1 + np.exp(-x)) # y
def derivated_sigmoid(y): # sigmoid derivative function
return y * (1 - y)
def loss_L2(pred, target): # L2 loss function
return np.sum(np.square(pred - target)) / pred.shape[0] # opt. we divide by the batch size
def derivated_loss_L2(pred, target): # L2 derivative function
return 2 * (pred - target)
def cross_entropy(pred, target): # cross-entropy loss function
return -np.mean(np.multiply(np.log(pred), target) + np.multiply(np.log(1 - pred), (1 - target)))
def derivated_cross_entropy(pred, target): # cross-entropy derivative function
return (pred - target) / (pred * (1 - pred))
#==============================================================================
# Class Definition
#==============================================================================
class SimpleNetwork(object):
"""A simple fully-connected NN.
Args:
num_inputs (int): The input vector size / number of input values.
num_outputs (int): The output vector size.
hidden_layers_sizes (list): A list of sizes for each hidden layer to add to the network
activation_function (callable): The activation function for all the layers
derivated_activation_function (callable): The derivated activation function
loss_function (callable): The loss function to train this network
derivated_loss_function (callable): The derivative of the loss function, for back-propagation
Attributes:
layers (list): The list of layers forming this simple network.
loss_function (callable): The loss function to train this network.
derivated_loss_function (callable): The derivative of the loss function, for back-propagation.
"""
def __init__(self, num_inputs, num_outputs, hidden_layers_sizes=(64, 32),
activation_function=sigmoid, derivated_activation_function=derivated_sigmoid,
loss_function=loss_L2, derivated_loss_function=derivated_loss_L2):
super().__init__()
# We build the list of layers composing the network, according to the provided arguments:
layer_sizes = [num_inputs, *hidden_layers_sizes, num_outputs]
self.layers = [
FullyConnectedLayer(layer_sizes[i], layer_sizes[i + 1], activation_function, derivated_activation_function)
for i in range(len(layer_sizes) - 1)]
self.loss_function = loss_function
self.derivated_loss_function = derivated_loss_function
def forward(self, x):
"""
Forward the input vector through the layers, returning the output vector.
Args:
x (ndarray): The input vector, of shape `(batch_size, num_inputs)`.
Returns:
activation (ndarray): The output activation value, of shape `(batch_size, layer_size)`.
"""
for layer in self.layers: # from the input layer to the output one
x = layer.forward(x)
return x
def predict(self, x):
"""
Compute the output corresponding to input `x`, and return the index of the largest output value.
Args:
x (ndarray): The input vector, of shape `(1, num_inputs)`.
Returns:
best_class (int): The predicted class ID.
"""
estimations = self.forward(x)
best_class = np.argmax(estimations)
return best_class
def backward(self, dL_dy):
"""
Back-propagate the loss hrough the layers (require `forward()` to be called before).
Args:
dL_dy (ndarray): The loss derivative w.r.t. the network's output (dL/dy).
Returns:
dL_dx (ndarray): The loss derivative w.r.t. the network's input (dL/dx).
"""
for layer in reversed(self.layers): # from the output layer to the input one
dL_dy = layer.backward(dL_dy)
return dL_dy
def optimize(self, epsilon):
"""
Optimize the network parameters according to the stored gradients (require `backward()` to be called before).
Args:
epsilon (float): The learning rate.
"""
for layer in self.layers: # the order doesn't matter here
layer.optimize(epsilon)
def evaluate_accuracy(self, X_val, y_val):
"""
Given a dataset and its ground-truth labels, evaluate the current accuracy of the network.
Args:
X_val (ndarray): The input validation dataset.
y_val (ndarray): The corresponding ground-truth validation dataset.
Returns:
accuracy (float): The accuracy of the network (= number of correct predictions / dataset size).
"""
num_corrects = 0
for i in range(len(X_val)):
pred_class = self.predict(X_val[i])
if pred_class == y_val[i]:
num_corrects += 1
return num_corrects / len(X_val)
def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, num_epochs=5, learning_rate=1e-3):
"""
Given a dataset and its ground-truth labels, evaluate the current accuracy of the network.
Args:
X_train (ndarray): The input training dataset.
y_train (ndarray): The corresponding ground-truth training dataset.
X_val (ndarray): The input validation dataset.
y_val (ndarray): The corresponding ground-truth validation dataset.
batch_size (int): The mini-batch size.
num_epochs (int): The number of training epochs i.e. iterations over the whole dataset.
learning_rate (float): The learning rate to scale the derivatives.
Returns:
losses (list): The list of training losses for each epoch.
accuracies (list): The list of validation accuracy values for each epoch.
"""
num_batches_per_epoch = len(X_train) // batch_size
do_validation = X_val is not None and y_val is not None
losses, accuracies = [], []
for i in range(num_epochs): # for each training epoch
epoch_loss = 0
for b in range(num_batches_per_epoch): # for each batch composing the dataset
# Get batch:
batch_index_begin = b * batch_size
batch_index_end = batch_index_begin + batch_size
x = X_train[batch_index_begin: batch_index_end]
targets = y_train[batch_index_begin: batch_index_end]
# Optimize on batch:
predictions = y = self.forward(x) # forward pass
L = self.loss_function(predictions, targets) # loss computation
dL_dy = self.derivated_loss_function(predictions, targets) # loss derivation
self.backward(dL_dy) # back-propagation pass
self.optimize(learning_rate) # optimization of the NN
epoch_loss += L
# Logging training loss and validation accuracy, to follow the training:
epoch_loss /= num_batches_per_epoch
losses.append(epoch_loss)
if do_validation:
accuracy = self.evaluate_accuracy(X_val, y_val)
accuracies.append(accuracy)
else:
accuracy = np.NaN
print("Epoch {:4d}: training loss = {:.6f} | val accuracy = {:.2f}%".format(i, epoch_loss, accuracy * 100))
return losses, accuracies