forked from yashre-bh/project-iago
-
Notifications
You must be signed in to change notification settings - Fork 0
/
recognize_gesture.py
169 lines (142 loc) · 5.05 KB
/
recognize_gesture.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import cv2, pickle
import numpy as np
import tensorflow as tf
from PIL import Image as im
import os
import sqlite3
from cnn_tf import cnn_model_fn
from keras.models import load_model
from keras import backend
from keras.backend import set_session
from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer
from azure.cognitiveservices.speech.audio import AudioOutputConfig
from dotenv import load_dotenv
load_dotenv()
azure_key = os.getenv('AZURE_KEY')
azure_region = os.getenv('AZURE_REGION')
session = backend.get_session()
init = tf.global_variables_initializer()
session.run(init)
speech_config = SpeechConfig(subscription=azure_key, region=azure_region)
speech_config.speech_synthesis_language = "en-GB"
speech_config.speech_synthesis_voice_name ="en-GB-SoniaNeural"
audio_config = AudioOutputConfig(use_default_speaker=True)
synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
classifier = tf.estimator.Estimator(model_dir="tmp/cnn_model2", model_fn=cnn_model_fn)
prediction = None
config=tf.compat.v1.ConfigProto()
# # tf_config = config
session = tf.compat.v1.Session()
set_session(session)
graph = tf.get_default_graph()
model = tf.python.keras.models.load_model('cnn_model_keras2.h5')
model._make_predict_function()
# tf.reset_default_graph()
# graph = tf.get_default_graph()
tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)
def get_image_size():
img = cv2.imread('gestures/0/100.jpg', 0)
return img.shape
image_x, image_y = get_image_size()
def tf_process_image(img):
img = cv2.resize(img, (image_x, image_y))
img = np.array(img, dtype=np.float32)
np_array = np.array(img)
return np_array
def tf_predict(classifier, image):
'''
need help with prediction using tensorflow
'''
global prediction
processed_array = tf_process_image(image)
pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x":processed_array}, shuffle=False)
pred = classifier.predict(input_fn=pred_input_fn)
prediction = next(pred)
print(prediction)
def keras_process_image(img):
img = cv2.resize(img, (image_x, image_y))
img = np.array(img, dtype=np.float32)
img = np.reshape(img, (1, image_x, image_y, 1))
return img
def keras_predict(model, image):
processed = keras_process_image(image)
global graph
global session
with graph.as_default():
set_session(session)
pred_probab = model.predict(processed)[0]
pred_class = list(pred_probab).index(max(pred_probab))
return max(pred_probab), pred_class
def get_pred_text_from_db(pred_class):
conn = sqlite3.connect("gesture_db.db")
cmd = "SELECT g_name FROM gesture WHERE g_id="+str(pred_class)
cursor = conn.execute(cmd)
for row in cursor:
return row[0]
def split_sentence(text, num_of_words):
'''
Splits a text into group of num_of_words
'''
list_words = text.split(" ")
length = len(list_words)
splitted_sentence = []
b_index = 0
e_index = num_of_words
while length > 0:
part = ""
for word in list_words[b_index:e_index]:
part = part + " " + word
splitted_sentence.append(part)
b_index += num_of_words
e_index += num_of_words
length -= num_of_words
return splitted_sentence
def put_splitted_text_in_blackboard(blackboard, splitted_text):
y = 200
for text in splitted_text:
cv2.putText(blackboard, text, (4, y), cv2.FONT_HERSHEY_TRIPLEX, 2, (255, 255, 255))
y += 50
def get_hand_hist():
with open("hist", "rb") as f:
hist = pickle.load(f)
return hist
def recognize(frame):
global prediction
hist = get_hand_hist()
x, y, w, h = 300, 100, 300, 300
text = ""
img = frame
img = cv2.flip(img, 1)
img = cv2.resize(img, (640, 480))
imgCrop = img[y:y+h, x:x+w]
imgHSV = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
dst = cv2.calcBackProject([imgHSV], [0, 1], hist, [0, 180, 0, 256], 1)
disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(10,10))
cv2.filter2D(dst,-1,disc,dst)
blur = cv2.GaussianBlur(dst, (11,11), 0)
blur = cv2.medianBlur(blur, 15)
thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
thresh = cv2.merge((thresh,thresh,thresh))
thresh = cv2.cvtColor(thresh, cv2.COLOR_BGR2GRAY)
thresh = thresh[y:y+h, x:x+w]
(openCV_ver,_,__) = cv2.__version__.split(".")
if openCV_ver=='3':
contours = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[1]
elif openCV_ver=='4':
contours = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]
if len(contours) > 0:
contour = max(contours, key = cv2.contourArea)
if cv2.contourArea(contour) > 10000:
x1, y1, w1, h1 = cv2.boundingRect(contour)
save_img = thresh[y1:y1+h1, x1:x1+w1]
if w1 > h1:
save_img = cv2.copyMakeBorder(save_img, int((w1-h1)/2) , int((w1-h1)/2) , 0, 0, cv2.BORDER_CONSTANT, (0, 0, 0))
elif h1 > w1:
save_img = cv2.copyMakeBorder(save_img, 0, 0, int((h1-w1)/2) , int((h1-w1)/2) , cv2.BORDER_CONSTANT, (0, 0, 0))
pred_probab, pred_class = keras_predict(model, save_img)
if pred_probab*100 > 80:
text = get_pred_text_from_db(pred_class)
synthesizer.speak_text_async("2")
print(text)