-
Notifications
You must be signed in to change notification settings - Fork 0
/
cnn.py
162 lines (120 loc) · 5.8 KB
/
cnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# -*- coding: utf-8 -*-
import os
import numpy as np
import glob
import shutil
import matplotlib.pyplot as plt
# %matplotlib inline
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
"""## Downloading the dataset"""
_URL = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
zip_file = tf.keras.utils.get_file(origin=_URL,
fname="flower_photos.tgz",
extract=True)
base_dir = os.path.join(os.path.dirname(zip_file), 'flower_photos')
"""## Exploring our dataset"""
classes = ['roses', 'daisy', 'dandelion', 'sunflowers', 'tulips']
for cl in classes:
img_path = os.path.join(base_dir, cl)
images = glob.glob(img_path + '/*.jpg')
print("{}: {} Images".format(cl, len(images)))
num_train = int(round(len(images)*0.8))
train, val = images[:num_train], images[num_train:]
for t in train:
if not os.path.exists(os.path.join(base_dir, 'train', cl)):
os.makedirs(os.path.join(base_dir, 'train', cl))
shutil.move(t, os.path.join(base_dir, 'train', cl))
for v in val:
if not os.path.exists(os.path.join(base_dir, 'val', cl)):
os.makedirs(os.path.join(base_dir, 'val', cl))
shutil.move(v, os.path.join(base_dir, 'val', cl))
round(len(images)*0.8)
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
"""# Data Augmentation
Overfitting generally occurs when we have small number of training examples. One way to fix this problem is to augment our dataset so that it has sufficient number of training examples. Data augmentation takes the approach of generating more training data from existing training samples, by augmenting the samples via a number of random transformations that yield believable-looking images. The goal is that at training time, your model will never see the exact same picture twice. This helps expose the model to more aspects of the data and generalize better.
"""
batch_size = 200
IMG_SHAPE = 256
image_gen_train = ImageDataGenerator(
rescale=1./255,
rotation_range=45,
width_shift_range=.25,
height_shift_range=.25,
horizontal_flip=True,
vertical_flip=True,
zoom_range=0.3,
)
train_data_gen = image_gen_train.flow_from_directory(
batch_size=batch_size,
directory=train_dir,
shuffle=True,
target_size=(IMG_SHAPE,IMG_SHAPE),
class_mode='sparse'
)
"""Let's preview the changes that we made in a random picture from our dataset."""
# This function will plot images in the form of a grid with 1 row and 5 columns where images are placed in each column.
def plotImages(images_arr):
fig, axes = plt.subplots(1, 10, figsize=(50,50))
axes = axes.flatten()
for img, ax in zip( images_arr, axes):
ax.imshow(img)
plt.tight_layout()
plt.show()
augmented_images = [train_data_gen[0][0][0] for i in range(10)]
plotImages(augmented_images)
"""Now let's create our validation data set. Note that in the validation data set we will not use data augmentantion as we want to test the accuracy of our model in real data."""
image_gen_val = ImageDataGenerator(rescale=1./255)
val_data_gen = image_gen_val.flow_from_directory(batch_size=batch_size,
directory=val_dir,
target_size=(IMG_SHAPE, IMG_SHAPE),
class_mode='sparse')
"""## Setting up the network's architecture"""
model = Sequential()
model.add(Conv2D(16, 3, padding='same', activation='relu', input_shape=(IMG_SHAPE,IMG_SHAPE, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, 3, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, 3, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(5, activation='softmax'))
"""And compiling our model."""
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
"""## Training the CNN.
We will randomly choose to train our model in 80 epochs. This will give us a clear view in order to know if we need more epochs or less depedning on the loss function that we will compare after the completion of training of our model.
"""
epochs = 80
history = model.fit_generator(
train_data_gen,
steps_per_epoch=int(np.ceil(train_data_gen.n / float(batch_size))),
epochs=epochs,
validation_data=val_data_gen,
validation_steps=int(np.ceil(val_data_gen.n / float(batch_size))),
verbose=1
)
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(epochs)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()