-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathface_recognition.py
336 lines (265 loc) · 11.5 KB
/
face_recognition.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
import numpy as np
import pandas as pd
import cv2
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import os
from os import listdir
from os.path import isfile, join, exists
class EigenFace:
"""
The PCA based algorithm for face recognition
"""
def __init__(self, image_x=200, image_y=200, root_dir=""):
"""
Creates an instance of EigenFace class
Parameters:
-------------
image_x : integer
The size/length of image row.
image_y : integer
The size/length of image column.
root_dir : string, optional
Location of directory where the program will save all the necessary model parameters.
Uses the current directory by default.
"""
if not exists('etc'):
os.makedirs('etc')
self.image_x = image_x
self.image_y = image_y
self.label_csv_path = os.path.join(root_dir, 'etc', 'data.csv')
self.trained_weight_path = os.path.join(root_dir, 'etc', 'weight.txt')
self.eigen_vector_path = os.path.join(root_dir, 'etc', 'eigen_vector.txt')
self.average_face_path = os.path.join(root_dir, 'etc', 'average_face.txt')
self.train_model_output_path = os.path.join(root_dir, 'etc', 'y_train.txt')
self.x_train_path = os.path.join(root_dir, 'etc', 'x_train.txt')
self.eigen_vector = None
self.y_train = None
self.avg_face = None
self.weights = None
self.x_train = None #for analysis purpose only
def generateLabels(self, dataset_path):
"""
Creates a pandas DataFrame with 2 columns: the location of the image
and the corresponding name of the associated face in the image
Parameters
--------
dataset_path : string
Path to the folder consisting of the images
Returns
--------
pandas DataFrame with 2 columns: path to the image and the name
of the user associated with the image
Notes
--------
The dataset_path must be a directory of directories with each
directory consisting the images of a single user with the name
of that directory set to the name of that particular user
"""
cnt=0
self.label_df = pd.DataFrame()
self.label_map = {}
for dirr in listdir(dataset_path):
data_path = join(dataset_path,dirr)
only_images = [f for f in listdir(data_path) if isfile(join(data_path,f))]
for image_name in only_images:
image_path = join(data_path, image_name)
self.label_df = self.label_df.append([[image_path, dirr]])
cnt += 1
return self.label_df
def readLabels(self, label_df=None):
"""
Reads the pandas DataFrame consisting of image path and the output(the
name of the user associated with the image) and separates them into 2
numpy arrays with one consisting of image paths and the other
consisting of the output
Parameters
--------
label_df : pandas.DataFrame, optional
Returns
--------
X : numpy n-d array, string
Image paths
y : numpy n-d array, string
Output associated with the image path
"""
if label_df is None:
label_df = pd.read_csv(self.label_csv_path)
X = label_df.iloc[:, 0].values
y = label_df.iloc[:, 1].values
return X,y
def trainModel(self, X, y, num_of_eigen):
"""
Trains the model using EigenFace
Parameters
--------
X : list, string
Array of strings consisting of image path.
y : list, string
Output associated with image corresponding to X.
num_of_eigen : int
Number of eigen vectors to use when building model.
"""
self.x_train = X
self.y_train = y
all_image = np.zeros((X.shape[0], self.image_x*self.image_y))
for i, image_path in enumerate(X):
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
image.resize(1,self.image_x * self.image_y)
# image = cv2.GaussianBlur(image, (5,5), 0)
all_image[i,:] = image/255
#Calculate average face
self.avg_face = all_image.sum(axis=0)/all_image.shape[0]
#adjusted face dataset
self.avg_face.resize(1,self.image_x*self.image_y)
adjusted_face = np.subtract(all_image,self.avg_face)
#covariance matrix calculation
cov_matrix = np.dot(adjusted_face, adjusted_face.T)
#calcluate eigen vectors of the covariance matrix
temp_eigen = np.linalg.eig(cov_matrix)
#Find all eigen vectors
df_eigen = pd.DataFrame(temp_eigen[1])
df_eigen['eigen_value'] = temp_eigen[0]
df_eigen.sort_values(by='eigen_value', ascending=False, inplace=True)
selected_eigen = df_eigen.iloc[:,:num_of_eigen]
#Compute eigenvector for actual covariance matrix of the image
self.eigen_vector = (adjusted_face.T).dot(selected_eigen)
#weight calculation
self.weights = np.dot(self.eigen_vector.T, adjusted_face.T)
def saveModel(self, label_df=None):
"""
Saves the current model in the 'root_dir'.
Parameters
--------
label_df : pandas.DataFrame, optional
If provided, saves the pandas DataFrame consisting
of the image paths and corresponding output, alongside
the model.
"""
joblib.dump(self.eigen_vector, self.eigen_vector_path)
joblib.dump(self.avg_face, self.average_face_path)
joblib.dump(self.weights, self.trained_weight_path)
joblib.dump(self.y_train, self.train_model_output_path)
joblib.dump(self.x_train, self.x_train_path)
with open(self.label_csv_path, "w+") as f:
f.write(label_df.to_csv())
def loadModel(self):
"""
Loads the saved model
"""
self.eigen_vector = joblib.load(self.eigen_vector_path)
self.avg_face = joblib.load(self.average_face_path)
self.weights = joblib.load(self.trained_weight_path)
self.y_train = joblib.load(self.train_model_output_path)
self.x_train = joblib.load(self.x_train_path)
def fit(self,X,y,mode='train',num_of_eigen=20):
if mode=='train':
self.trainModel(X,y,num_of_eigen)
elif mode=='load':
self.loadModel()
def image_predict(self,test_img,threshold=3e14,n_neighbors=1):
"""
Makes prediction for single image.
Parameters
--------
img : list, ndarray
test image array
threshold : float, default=3e14
Defines the threshold beyond which the prediction will be discarded
n_neighbors : integer, default=1
Defines the number of neighbors to consider during prediction.
Uses KNN algorithm for n>1.
Note
--------
Use n_neighbors only when there are a lot of images of a single user
in the training set.
"""
# test_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# test_img = cv2.GaussianBlur(test_img, (5,5), 0)
test_img.resize(1, self.image_x*self.image_y)
test_img = test_img/255
adjusted_face = test_img - self.avg_face
test_weight = np.dot(self.eigen_vector.T, adjusted_face.T)
diff_weight = self.weights - test_weight
sum_of_squared_errors = np.sum(diff_weight*diff_weight, axis=0)
if n_neighbors==1:
name = self.y_train[np.argmin(sum_of_squared_errors)]
comp = self.x_train[np.argmin(sum_of_squared_errors)]
potential_match = None
else:
tmp_df = pd.DataFrame()
tmp_df['name'] = self.y_train[list(range(len(sum_of_squared_errors)))]
tmp_df['comp'] = self.x_train[list(range(len(sum_of_squared_errors)))]
tmp_df['sse'] = sum_of_squared_errors
tmp_df.sort_values(by='sse', inplace=True)
tmp_df.iloc[:,:n_neighbors]
tmp_df.groupby('name_index').count()
name = tmp_df.iloc[0,0]
comp = tmp_df.iloc[0,1]
#return atmost 3 potential id if the first one doesn't match
potential_match = tmp_df.iloc[0,:3]
# print('sse: ', min(sum_of_squared_errors))
y_pred = name
# if min(sum_of_squared_errors)<threshold:
# y_pred = name
# else:
# y_pred = 'nan'
# print(y_pred,'\n', min(sum_of_squared_errors))
#return name if it's sse is less than threshold
if (min(sum_of_squared_errors) < threshold):
print(y_pred, ":\t", min(sum_of_squared_errors))
return y_pred, comp
else:
return None, None
def predict(self,X,y,threshold=3e14,n_neighbors=1):
"""
Makes prediction for provided test images.
Parameters
--------
X : list, string
List of string that contain path to the image.
y : list, string
List of string that contains name of user associated
with the image represented by X.
threshold : float, default=3e14
Defines the threshold beyond which the prediction will be discarded
n_neighbors : integer, default=1
Defines the number of neighbors to consider during prediction.
Uses KNN algorithm for n>1.
Note
--------
Use n_neighbors only when there are a lot of images of a single user
in the training set.
"""
y_pred = []
y_act = []
y_comp = []
sse_min = []
sse_max = []
for i,img_path in enumerate(X):
test_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
test_img.resize(1, self.image_x*self.image_y)
adjusted_face = test_img - self.avg_face
test_weight = np.dot(self.eigen_vector.T, adjusted_face.T)
diff_weight = self.weights - test_weight
sum_of_squared_errors = np.sum(diff_weight*diff_weight, axis=0)
if n_neighbors==1:
name = self.y_train[np.argmin(sum_of_squared_errors)]
potential_match = None
else:
tmp_df = pd.DataFrame()
tmp_df['name'] = self.y_train[list(range(len(sum_of_squared_errors)))]
tmp_df['sse'] = sum_of_squared_errors
tmp_df.sort_values(by='sse', inplace=True)
tmp_df.iloc[:,:n_neighbors]
tmp_df.groupby('name_index').count()
name = tmp_df.iloc[0,0]
#return atmost 3 potential id if the first one doesn't match
potential_match = tmp_df.iloc[0,:3]
if min(sum_of_squared_errors)<threshold:
y_pred.append(name)
else:
y_pred.append('nan')
return y_pred