Skip to content

Commit

Permalink
facial expression recognition demo update (FPS added) (#405)
Browse files Browse the repository at this point in the history
* added FPS to webcam demo

* Update projects/python/perception/facial_expression_recognition/image_based_facial_emotion_estimation/inference_demo.py

Co-authored-by: Kostas Tsampazis <[email protected]>

* added FPS to webcam demo

* Fix demo

* PEP8 fixes

---------

Co-authored-by: Kostas Tsampazis <[email protected]>
Co-authored-by: ad-daniel <[email protected]>
Co-authored-by: ad-daniel <[email protected]>
  • Loading branch information
4 people authored Feb 15, 2023
1 parent 561a66c commit 572ae75
Showing 1 changed file with 40 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from torchvision import transforms
import PIL
import cv2
import time

# OpenDR Modules
from opendr.perception.facial_expression_recognition import FacialEmotionLearner, image_processing
Expand All @@ -39,15 +40,15 @@ def is_none(x):
return False


def detect_face(image):
def detect_face(img):
"""
Detects faces in an image.
:param image: (ndarray) Raw input image.
:param img: (ndarray) Raw input image.
:return: (list) Tuples with coordinates of a detected face.
"""

# Converts to greyscale
greyscale_image = image_processing.convert_bgr_to_grey(image)
greyscale_image = image_processing.convert_bgr_to_grey(img)

# Runs haar cascade classifiers
_FACE_DETECTOR_HAAR_CASCADE = cv2.CascadeClassifier("./face_detector/frontal_face.xml")
Expand All @@ -60,27 +61,25 @@ def detect_face(image):
return face_coordinates[0] if (len(face_coordinates) > 0 and (np.sum(face_coordinates[0]) > 0)) else None


def _pre_process_input_image(image):
def _pre_process_input_image(img):
"""
Pre-processes an image for ESR-9.
:param image: (ndarray)
:return: (ndarray) image
:param img: (ndarray)
:return: (ndarray) img
"""

image = image_processing.resize(image, INPUT_IMAGE_SIZE)
image = PIL.Image.fromarray(image)
image = transforms.Normalize(mean=INPUT_IMAGE_NORMALIZATION_MEAN,
std=INPUT_IMAGE_NORMALIZATION_STD)(transforms.ToTensor()(image)).unsqueeze(0)
return image.numpy()
img = image_processing.resize(img, INPUT_IMAGE_SIZE)
img = PIL.Image.fromarray(img) # NOQA
img = transforms.Normalize(mean=INPUT_IMAGE_NORMALIZATION_MEAN,
std=INPUT_IMAGE_NORMALIZATION_STD)(transforms.ToTensor()(image)).unsqueeze(0)
return img.numpy()


def _predict(learner, input_face):
"""
Facial emotion/expression estimation. Classifies the pre-processed input image with FacialEmotionLearner.
:param input_face: (ndarray) input image.
:param device: runs the classification on CPU or GPU
:param ensemble_size: number of branches in the network
:return: Lists of emotions and affect values including the ensemble predictions based on plurality.
"""

Expand All @@ -100,34 +99,44 @@ def recognize_facial_expression(learner, image, display):
If more than one face is detected, the biggest one is used.
The detected face is fed to the _predict function which runs FacialEmotionLearner for facial emotion/expression
estimation.
:param image: (ndarray) input image.
"""

start_time = time.perf_counter()
# Detect face
face_coordinates = detect_face(image)
end_time = time.perf_counter()
detect_fps = 1.0 / (end_time - start_time)
img = cv2.putText(image, "Detection FPS: %.2f" % (detect_fps,), (10, image.shape[1] - 280), cv2.FONT_HERSHEY_SIMPLEX, 1,
(0, 255, 255), 2, cv2.LINE_AA)

if face_coordinates is None:
print("No face detected.")
else:
start_time = time.perf_counter()
face = image[face_coordinates[0][1]:face_coordinates[1][1], face_coordinates[0][0]:face_coordinates[1][0], :]
# Pre_process detected face
input_face = _pre_process_input_image(face)
# Recognize facial expression
emotion, affect = _predict(learner, input_face=input_face)
end_time = time.perf_counter()
model_fps = 1.0 / (end_time - start_time)
img = cv2.putText(img, "Model FPS: %.2f" % (model_fps,), (10, image.shape[1] - 240), cv2.FONT_HERSHEY_SIMPLEX, 1,
(0, 255, 255), 2, cv2.LINE_AA)

# display
if display:
image = cv2.putText(image, "Valence: %.2f" % affect[0], (10, 40 + 0 * 30), cv2.FONT_HERSHEY_SIMPLEX,
1, (0, 255, 255), 2, )
image = cv2.putText(image, "Arousal: %.2f" % affect[1], (10, 40 + 1 * 30), cv2.FONT_HERSHEY_SIMPLEX,
1, (0, 255, 255), 2, )
image = cv2.putText(image, emotion.description, (10, 40 + 2 * 30), cv2.FONT_HERSHEY_SIMPLEX,
1, (0, 255, 255), 2, )
img = cv2.putText(img, "Valence: %.2f" % affect[0], (10, 40 + 0 * 30), cv2.FONT_HERSHEY_SIMPLEX,
1, (0, 255, 0), 2, )
img = cv2.putText(img, "Arousal: %.2f" % affect[1], (10, 40 + 1 * 30), cv2.FONT_HERSHEY_SIMPLEX,
1, (0, 255, 0), 2, )
img = cv2.putText(img, "Expression: " + emotion.description, (10, 40 + 2 * 30), cv2.FONT_HERSHEY_SIMPLEX,
1, (255, 0, 0), 2, )
else:
print('emotion:', emotion)
print('valence, arousal:', affect)

return image
return img


def webcam(learner, camera_id, display, frames):
Expand All @@ -147,7 +156,16 @@ def webcam(learner, camera_id, display, frames):
while image_processing.is_video_capture_open():
# Get a frame
img, _ = image_processing.get_frame()

start_time = time.perf_counter()

img = None if (img is None) else recognize_facial_expression(learner, img, display)

end_time = time.perf_counter()
total_fps = 1.0 / (end_time - start_time)
img = cv2.putText(img, "Total FPS: %.2f" % (total_fps,), (10, img.shape[1] - 200), cv2.FONT_HERSHEY_SIMPLEX, 1,
(0, 255, 255), 2, cv2.LINE_AA)

if display and img is not None:
cv2.imshow('Result', img)
cv2.waitKey(1)
Expand Down Expand Up @@ -249,6 +267,7 @@ def main():
raise RuntimeError("Error: 'input' is not valid. The argument 'input' is a mandatory "
"field when image or video mode is chosen.")
image(learner, args.input, args.display)

except RuntimeError as e:
print(e)
elif args.mode == "video":
Expand Down

0 comments on commit 572ae75

Please sign in to comment.