-
Notifications
You must be signed in to change notification settings - Fork 3
/
learning_rate_annealing.py
92 lines (69 loc) · 3.22 KB
/
learning_rate_annealing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
'''
learning rate annealing, recommends starting with a relatively
high learning rate and then gradually lowering the learning rate during training.
The intuition behind this approach is that we'd like to traverse quickly from the
initial parameters to a range of "good" parameter values but then we'd like a
learning rate small enough that we can explore the "deeper,
but narrower parts of the loss function"
Ref: https://cs231n.github.io/neural-networks-3/#annealing-the-learning-rate
Ref: https://www.jeremyjordan.me/nn-learning-rate/
'''
from keras.callbacks import Callback
import matplotlib.pyplot as plt
class LRFinder(Callback):
'''
A simple callback for finding the optimal learning rate range for your model + dataset.
# Usage
```python
lr_finder = LRFinder(min_lr=1e-5,
max_lr=1e-2,
steps_per_epoch=np.ceil(epoch_size/batch_size),
epochs=3)
model.fit(X_train, Y_train, callbacks=[lr_finder])
lr_finder.plot_loss()
```
# Arguments
min_lr: The lower bound of the learning rate range for the experiment.
max_lr: The upper bound of the learning rate range for the experiment.
steps_per_epoch: Number of mini-batches in the dataset. Calculated as `np.ceil(epoch_size/batch_size)`.
epochs: Number of epochs to run experiment. Usually between 2 and 4 epochs is sufficient.
# References
Blog post: jeremyjordan.me/nn-learning-rate
Original paper: https://arxiv.org/abs/1506.01186
'''
def __init__(self, min_lr=1e-5, max_lr=1e-2, steps_per_epoch=None, epochs=None):
super().__init__()
self.min_lr = min_lr
self.max_lr = max_lr
self.total_iterations = steps_per_epoch * epochs
self.iteration = 0
self.history = {}
def clr(self):
'''Calculate the learning rate.'''
x = self.iteration / self.total_iterations
return self.min_lr + (self.max_lr-self.min_lr) * x
def on_train_begin(self, logs=None):
'''Initialize the learning rate to the minimum value at the start of training.'''
logs = logs or {}
K.set_value(self.model.optimizer.lr, self.min_lr)
def on_batch_end(self, epoch, logs=None):
'''Record previous batch statistics and update the learning rate.'''
logs = logs or {}
self.iteration += 1
self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr))
self.history.setdefault('iterations', []).append(self.iteration)
for k, v in logs.items():
self.history.setdefault(k, []).append(v)
K.set_value(self.model.optimizer.lr, self.clr())
def plot_lr(self):
'''Helper function to quickly inspect the learning rate schedule.'''
plt.plot(self.history['iterations'], self.history['lr'])
plt.yscale('log')
plt.xlabel('Iteration')
plt.ylabel('Learning rate')
def plot_loss(self):
'''Helper function to quickly observe the learning rate experiment results.'''
plt.plot(self.history['lr'], self.history['loss'])
plt.xscale('log')
plt.xlabel('Learning rate')
plt.ylabel('Loss')