-
Notifications
You must be signed in to change notification settings - Fork 0
/
test-app.py
94 lines (68 loc) · 2.86 KB
/
test-app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
'''
This is the main module responsible for Gesture Recognition
'''
import tensorflow as tf
import numpy as np
import cv2
from model import Conv3DModel
import mediapipe as mp
# Available Gestures
classes = [
"Forward",
"Back",
"Turn Right",
"Turn Left",
"No Command"
]
def normalize_data(np_data):
# Transforms video into numpy array neural network usage
# Shape of a single array is (video count, number of frames, height, width, number of channels)
scaled_images = np_data.reshape(-1, 30, 64, 64, 1)
return scaled_images
# Creating instance of a 3D-CNN model
new_model = Conv3DModel()
# Initialization of model
new_model.compile(loss='sparse_categorical_crossentropy',
optimizer=tf.keras.optimizers.Adam())
# Loading weights of a previously trained model
new_model.load_weights('model_weights/variables/variables')
# Gesture Recognition variables
to_predict = []
cap = cv2.VideoCapture(0)
image_class = ''
# Pose settings for MediaPipe
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(min_detection_confidence=0.5,
min_tracking_confidence=0.5)
while (True):
# Camera capture frame-by-frame
ret, frame = cap.read()
result = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
#uncomment code block below in order to show video without processing
cv2.namedWindow('Human', cv2.WINDOW_NORMAL)
cv2.imshow('Human', frame)
black_image = np.zeros(frame.shape, dtype=np.uint8)
mp_drawing.draw_landmarks(black_image, result.pose_landmarks, mp_pose.POSE_CONNECTIONS)
gray = cv2.cvtColor(black_image, cv2.COLOR_BGR2GRAY)
to_predict.append(cv2.resize(gray, (64, 64)))
# Takes 30 frames of gesture for prediction
if len(to_predict) == 30:
frame_to_predict = np.array(to_predict, dtype=np.float32) # Turns frames into numbers
model_input = normalize_data(frame_to_predict) # Transforms current array to 5 digit format for model input
predict = new_model.predict(model_input) # Prediction itself
image_class = classes[np.argmax(predict)] # Some black magic to guess which gesture it is
print('Classe = ', image_class, 'Precision = ', np.amax(predict) * 100, '%') # Console log
to_predict = [] # Releases an array to get another batch of frames
# Show the result within the frame
cv2.putText(black_image, image_class, (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 1, cv2.LINE_AA)
# Display the resulting frame
cv2.namedWindow('Hand Gesture Recognition', cv2.WINDOW_NORMAL)
# uncomment code above in order to make window editable
cv2.imshow('Hand Gesture Recognition', black_image)
# Wait for any key to quit the program
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()