ROS nodes FPS performance measurements (#419)

* Added performance node * Added time measurement for tool inference and publishing * Moved performance start time before image preprocessing * audiovisual_emotion_recognition_node.py performance and some formatting * binary_high_resolution_node.py performance * continual_skeleton_based_action_recognition_node.py performance and some rearrangement * face_detection_retinaface_node.py performance * face_recognition_node.py performance * facial_emotion_estimation_node.py performance * fall_detection_node.py performance * heart_anomaly_detection_node.py performance * hr_pose_estimation_node.py performance * landmark_based_facial_expression_recognition_node.py performance * object_detection_2d_centernet_node.py performance * object_detection_2d_detr_node.py performance * object_detection_2d_gem_node.py performance * object_detection_2d_nanodet_node.py performance * object_detection_2d_ssd_node.py performance * object_detection_2d_yolov3_node.py performance * object_detection_2d_yolov5_node.py performance * object_detection_3d_voxel_node.py performance * object_tracking_2d_deep_sort_node.py performance * object_tracking_2d_fair_mot_node.py performance * object_tracking_2d_siamrpn_node.py performance * object_tracking_3d_ab3dmot_node.py performance * panoptic_segmentation_efficient_lps_node.py performance * panoptic_segmentation_efficient_ps_node.py performance * pose_estimation_node.py minor fixes * rgbd_hand_gesture_recognition_node.py performance * semantic_segmentation_bisenet_node.py performance * skeleton_based_action_recognition_node.py performance * speech_command_recognition_node.py performance * video_activity_recognition_node.py performance * Added section for utility nodes and entry for the performance node * Added entry in notes for logging performance * Added entries for the new performance topic in all nodes * audiovisual_emotion_recognition_node.py ROS2 performance * binary_high_resolution_node.py ROS2 performance * continual_skeleton_based_action_recognition_node.py ROS2 performance * face_detection_retinaface_node.py ROS2 performance * face_recognition_node.py ROS2 performance * facial_emotion_estimation_node.py ROS2 performance * fall_detection_node.py ROS2 performance * heart_anomaly_detection_node.py ROS2 performance * hr_pose_estimation_node.py ros1 renamed class * hr_pose_estimation_node.py ROS2 performance * landmark_based_facial_expression_recognition_node.py ROS2 performance * object_detection_2d_centernet_node.py ROS2 performance * object_detection_2d_detr_node.py ROS2 performance * object_detection_2d_gem_node.py ROS2 performance * object_detection_2d_nanodet_node.py ROS1 minor fix * object_detection_2d_nanodet_node.py ROS2 performance * object_detection_2d_ssd_node.py ROS2 performance * object_detection_2d_yolov3_node.py ROS1 class name fix * object_detection_2d_yolov3_node.py ROS2 performance * object_detection_2d_yolov5_node.py ROS1 class name fix * object_detection_2d_yolov5_node.py ROS2 performance * object_detection_3d_voxel_node.py ROS2 performance * object_tracking_2d_deep_sort_node.py ROS2 performance * object_tracking_2d_fair_mot_node.py ROS2 performance * object_tracking_2d_siamrpn_node.py ROS2 performance * object_tracking_3d_ab3dmot_node.py ROS2 performance * panoptic_segmentation_efficient_lps_node.py ROS1 some imports rearrangement * panoptic_segmentation_efficient_lps_node.py ROS2 performance * panoptic_segmentation_efficient_ps_node.py ROS1 some imports rearrangement * panoptic_segmentation_efficient_ps_node.py ROS2 performance * pose_estimation_node.py ROS2 performance * rgbd_hand_gesture_recognition_node.py ROS2 performance * semantic_segmentation_bisenet_node.py ROS2 performance * skeleton_based_action_recognition_node.py ROS2 performance and some refactoring * speech_command_recognition_node.py ROS2 performance * video_activity_recognition_node.py ROS2 performance * Added ROS2 performance_node.py * ROS2 readme updates for performance topics/node * Apply suggestions from code review Co-authored-by: Stefania Pedrazzi <[email protected]> * Disable running tracking infer if no tracking is needed for ab3dmot ROS2 Co-authored-by: Stefania Pedrazzi <[email protected]> * Disable running tracking infer if no tracking is needed for ab3dmot ROS1 * Apply suggestions from code review Co-authored-by: Nikolaos Passalis <[email protected]> * Removed additional spaces after suggested change * Applied review suggestion to ROS2 node as well --------- Co-authored-by: Stefania Pedrazzi <[email protected]> Co-authored-by: Nikolaos Passalis <[email protected]>
opendr-eu · Apr 11, 2023 · 0c2edb0 · 0c2edb0
1 parent e9d3801
commit 0c2edb0
Show file tree

Hide file tree

Showing 66 changed files with 1,920 additions and 348 deletions.
diff --git a/projects/opendr_ws/src/opendr_perception/CMakeLists.txt b/projects/opendr_ws/src/opendr_perception/CMakeLists.txt
@@ -28,6 +28,7 @@ include_directories(
 #############
 
 catkin_install_python(PROGRAMS
+   scripts/performance_node.py
    scripts/pose_estimation_node.py
    scripts/hr_pose_estimation_node.py
    scripts/fall_detection_node.py

diff --git a/projects/opendr_ws/src/opendr_perception/README.md b/projects/opendr_ws/src/opendr_perception/README.md
diff --git a/projects/opendr_ws/src/opendr_perception/scripts/audiovisual_emotion_recognition_node.py b/projects/opendr_ws/src/opendr_perception/scripts/audiovisual_emotion_recognition_node.py
@@ -20,9 +20,11 @@
 import torch
 import librosa
 import cv2
+from time import perf_counter
 
 import rospy
 import message_filters
+from std_msgs.msg import Float32
 from sensor_msgs.msg import Image as ROS_Image
 from audio_common_msgs.msg import AudioData
 from vision_msgs.msg import Classification2D
@@ -36,7 +38,8 @@
 class AudiovisualEmotionNode:
 
     def __init__(self, input_video_topic="/usb_cam/image_raw", input_audio_topic="/audio/audio",
-                 output_emotions_topic="/opendr/audiovisual_emotion", buffer_size=3.6, device="cuda"):
+                 output_emotions_topic="/opendr/audiovisual_emotion", performance_topic=None,
+                 buffer_size=3.6, device="cuda"):
         """
         Creates a ROS Node for audiovisual emotion recognition
         :param input_video_topic: Topic from which we are reading the input video. Expects detected face of size 224x224
@@ -45,6 +48,9 @@ def __init__(self, input_video_topic="/usb_cam/image_raw", input_audio_topic="/a
         :type input_audio_topic: str
         :param output_emotions_topic: Topic to which we are publishing the predicted class
         :type output_emotions_topic: str
+        :param performance_topic: Topic to which we are publishing performance information (if None, no performance
+        message is published)
+        :type performance_topic:  str
         :param buffer_size: length of audio and video in sec
         :type buffer_size: float
         :param device: device on which we are running inference ('cpu' or 'cuda')
@@ -56,6 +62,11 @@ def __init__(self, input_video_topic="/usb_cam/image_raw", input_audio_topic="/a
         self.input_video_topic = input_video_topic
         self.input_audio_topic = input_audio_topic
 
+        if performance_topic is not None:
+            self.performance_publisher = rospy.Publisher(performance_topic, Float32, queue_size=1)
+        else:
+            self.performance_publisher = None
+
         self.bridge = ROSBridge()
 
         # Initialize the gesture recognition
@@ -69,7 +80,7 @@ def __init__(self, input_video_topic="/usb_cam/image_raw", input_audio_topic="/a
         self.video_buffer = np.zeros((1, 224, 224, 3))
 
         self.video_transform = transforms.Compose([
-                              transforms.ToTensor(255)])
+            transforms.ToTensor(255)])
 
     def listen(self):
         """
@@ -94,26 +105,35 @@ def callback(self, image_data, audio_data):
         :param audio_data: input audio message, speech
         :type audio_data: audio_common_msgs.msg.AudioData
         """
-        audio_data = np.reshape(np.frombuffer(audio_data.data, dtype=np.int16)/32768.0, (1, -1))
+        if self.performance_publisher:
+            start_time = perf_counter()
+        audio_data = np.reshape(np.frombuffer(audio_data.data, dtype=np.int16) / 32768.0, (1, -1))
         self.data_buffer = np.append(self.data_buffer, audio_data)
 
         image_data = self.bridge.from_ros_image(image_data, encoding='bgr8').convert(format='channels_last')
         image_data = cv2.resize(image_data, (224, 224))
 
         self.video_buffer = np.append(self.video_buffer, np.expand_dims(image_data.data, 0), axis=0)
 
-        if self.data_buffer.shape[0] > 16000*self.buffer_size:
+        if self.data_buffer.shape[0] > 16000 * self.buffer_size:
             audio = librosa.feature.mfcc(self.data_buffer[1:], sr=16000, n_mfcc=10)
             audio = Timeseries(audio)
 
-            to_select = select_distributed(15, len(self.video_buffer)-1)
+            to_select = select_distributed(15, len(self.video_buffer) - 1)
             video = self.video_buffer[1:][to_select]
 
             video = [self.video_transform(img) for img in video]
             video = Video(torch.stack(video, 0).permute(1, 0, 2, 3))
 
             class_pred = self.avlearner.infer(audio, video)
 
+            if self.performance_publisher:
+                end_time = perf_counter()
+                fps = 1.0 / (end_time - start_time)  # NOQA
+                fps_msg = Float32()
+                fps_msg.data = fps
+                self.performance_publisher.publish(fps_msg)
+
             # Publish output
             ros_class = self.bridge.from_category_to_rosclass(class_pred)
             self.publisher.publish(ros_class)
@@ -122,7 +142,7 @@ def callback(self, image_data, audio_data):
             self.video_buffer = np.zeros((1, 224, 224, 3))
 
 
-def select_distributed(m, n): return [i*n//m + n//(2*m) for i in range(m)]
+def select_distributed(m, n): return [i * n // m + n // (2 * m) for i in range(m)]
 
 
 if __name__ == '__main__':
@@ -133,6 +153,8 @@ def select_distributed(m, n): return [i*n//m + n//(2*m) for i in range(m)]
                         help="Listen to audio input data on this topic")
     parser.add_argument("-o", "--output_emotions_topic", type=str, default="/opendr/audiovisual_emotion",
                         help="Topic name for output emotions recognition")
+    parser.add_argument("--performance_topic", help="Topic name for performance messages, disabled (None) by default",
+                        type=str, default=None)
     parser.add_argument("--device", type=str, default="cuda",
                         help="Device to use (cpu, cuda)", choices=["cuda", "cpu"])
     parser.add_argument("--buffer_size", type=float, default=3.6,
@@ -154,5 +176,6 @@ def select_distributed(m, n): return [i*n//m + n//(2*m) for i in range(m)]
 
     avnode = AudiovisualEmotionNode(input_video_topic=args.input_video_topic, input_audio_topic=args.input_audio_topic,
                                     output_emotions_topic=args.output_emotions_topic,
+                                    performance_topic=args.performance_topic,
                                     buffer_size=args.buffer_size, device=device)
     avnode.listen()
diff --git a/projects/opendr_ws/src/opendr_perception/scripts/binary_high_resolution_node.py b/projects/opendr_ws/src/opendr_perception/scripts/binary_high_resolution_node.py
@@ -17,8 +17,10 @@
 import numpy as np
 import torch
 import cv2
+from time import perf_counter
 
 import rospy
+from std_msgs.msg import Float32
 from sensor_msgs.msg import Image as ROS_Image
 from opendr_bridge import ROSBridge
 
@@ -29,7 +31,7 @@
 class BinaryHighResolutionNode:
 
     def __init__(self, input_rgb_image_topic="/usb_cam/image_raw", output_heatmap_topic="/opendr/binary_hr_heatmap",
-                 output_rgb_image_topic="/opendr/binary_hr_heatmap_visualization",
+                 output_rgb_image_topic="/opendr/binary_hr_heatmap_visualization", performance_topic=None,
                  model_path=None, architecture="VGG_720p", device="cuda"):
         """
         Create a ROS Node for binary high resolution classification with Binary High Resolution.
@@ -40,6 +42,9 @@ def __init__(self, input_rgb_image_topic="/usb_cam/image_raw", output_heatmap_to
         :param output_rgb_image_topic: Topic to which we are publishing the heatmap image blended with the
         input image for visualization purposes
         :type output_rgb_image_topic: str
+        :param performance_topic: Topic to which we are publishing performance information (if None, no performance
+        message is published)
+        :type performance_topic:  str
         :param model_path: The path to the directory of a trained model
         :type model_path: str
         :param architecture: Architecture used on trained model (`VGG_720p` or `VGG_1080p`)
@@ -59,6 +64,11 @@ def __init__(self, input_rgb_image_topic="/usb_cam/image_raw", output_heatmap_to
         else:
             self.visualization_publisher = None
 
+        if performance_topic is not None:
+            self.performance_publisher = rospy.Publisher(performance_topic, Float32, queue_size=1)
+        else:
+            self.performance_publisher = None
+
         self.bridge = ROSBridge()
 
         # Initialize the binary high resolution model
@@ -94,12 +104,21 @@ def callback(self, data):
         :param data: Input image message
         :type data: sensor_msgs.msg.Image
         """
+        if self.performance_publisher:
+            start_time = perf_counter()
         # Convert sensor_msgs.msg.Image into OpenDR Image
         image = self.bridge.from_ros_image(data, encoding='bgr8')
         image = image.convert("channels_last")
         # Run learner to retrieve the OpenDR heatmap
         heatmap = self.learner.infer(image)
 
+        if self.performance_publisher:
+            end_time = perf_counter()
+            fps = 1.0 / (end_time - start_time)  # NOQA
+            fps_msg = Float32()
+            fps_msg.data = fps
+            self.performance_publisher.publish(fps_msg)
+
         # Publish heatmap in the form of an image
         if self.heatmap_publisher is not None:
             self.heatmap_publisher.publish(self.bridge.to_ros_image(heatmap))
@@ -126,6 +145,8 @@ def main():
                                                                 "blended with the input image for visualization purposes",
                         type=lambda value: value if value.lower() != "none" else None,
                         default="/opendr/binary_hr_heatmap_visualization")
+    parser.add_argument("--performance_topic", help="Topic name for performance messages, disabled (None) by default",
+                        type=str, default=None)
     parser.add_argument("-m", "--model_path", help="Path to the directory of the trained model",
                         type=str, default="test_model")
     parser.add_argument("-a", "--architecture", help="Architecture used for the trained model, either \"VGG_720p\" or "
@@ -152,6 +173,7 @@ def main():
                                               input_rgb_image_topic=args.input_rgb_image_topic,
                                               output_heatmap_topic=args.output_heatmap_topic,
                                               output_rgb_image_topic=args.output_rgb_image_topic,
+                                              performance_topic=args.performance_topic,
                                               model_path=args.model_path,
                                               architecture=args.architecture)
     binary_hr_node.listen()

diff --git a/...endr_ws/src/opendr_perception/scripts/continual_skeleton_based_action_recognition_node.py b/...endr_ws/src/opendr_perception/scripts/continual_skeleton_based_action_recognition_node.py
@@ -16,9 +16,10 @@
 import argparse
 import torch
 import numpy as np
+from time import perf_counter
 
 import rospy
-from std_msgs.msg import String
+from std_msgs.msg import String, Float32
 from vision_msgs.msg import ObjectHypothesis
 from sensor_msgs.msg import Image as ROS_Image
 from opendr_bridge.msg import OpenDRPose2D
@@ -37,6 +38,7 @@ def __init__(self, input_rgb_image_topic="/usb_cam/image_raw",
                  pose_annotations_topic="/opendr/poses",
                  output_category_topic="/opendr/continual_skeleton_recognized_action",
                  output_category_description_topic="/opendr/continual_skeleton_recognized_action_description",
+                 performance_topic=None,
                  device="cuda", model='costgcn'):
         """
         Creates a ROS Node for continual skeleton-based action recognition.
@@ -54,6 +56,9 @@ def __init__(self, input_rgb_image_topic="/usb_cam/image_raw",
         :param output_category_description_topic: Topic to which we are publishing the description of the recognized
         action (if None, we are not publishing the description)
         :type output_category_description_topic:  str
+        :param performance_topic: Topic to which we are publishing performance information (if None, no performance
+        message is published)
+        :type performance_topic:  str
         :param device: device on which we are running inference ('cpu' or 'cuda')
         :type device: str
         :param model:  model to use for continual skeleton-based action recognition.
@@ -85,6 +90,11 @@ def __init__(self, input_rgb_image_topic="/usb_cam/image_raw",
         else:
             self.string_publisher = None
 
+        if performance_topic is not None:
+            self.performance_publisher = rospy.Publisher(performance_topic, Float32, queue_size=1)
+        else:
+            self.performance_publisher = None
+
         # Initialize the pose estimation
         self.pose_estimator = LightweightOpenPoseLearner(device=device, num_refinement_stages=2,
                                                          mobilenet_use_stride=False,
@@ -117,6 +127,8 @@ def callback(self, data):
         :param data: input message
         :type data: sensor_msgs.msg.Image
         """
+        if self.performance_publisher:
+            start_time = perf_counter()
 
         # Convert sensor_msgs.msg.Image into OpenDR Image
         image = self.bridge.from_ros_image(data, encoding='bgr8')
@@ -127,6 +139,22 @@ def callback(self, data):
             # select two poses with the highest energy
             poses = _select_2_poses(poses)
 
+        num_frames = 1
+        poses_list = [poses]
+        skeleton_seq = _pose2numpy(num_frames, poses_list)
+
+        # Run action recognition
+        result = self.action_classifier.infer(skeleton_seq)  # input_size: BxCxTxVxS
+        category = result[0]
+        category.confidence = float(category.confidence.max())
+
+        if self.performance_publisher:
+            end_time = perf_counter()
+            fps = 1.0 / (end_time - start_time)  # NOQA
+            fps_msg = Float32()
+            fps_msg.data = fps
+            self.performance_publisher.publish(fps_msg)
+
         #  Publish detections in ROS message
         if self.pose_publisher is not None:
             for pose in poses:
@@ -142,15 +170,6 @@ def callback(self, data):
             # Convert the annotated OpenDR image to ROS image message using bridge and publish it
             self.image_publisher.publish(self.bridge.to_ros_image(Image(image), encoding='bgr8'))
 
-        num_frames = 1
-        poses_list = [poses]
-        skeleton_seq = _pose2numpy(num_frames, poses_list)
-
-        # Run action recognition
-        result = self.action_classifier.infer(skeleton_seq)  # input_size: BxCxTxVxS
-        category = result[0]
-        category.confidence = float(category.confidence.max())
-
         if self.hypothesis_publisher is not None:
             self.hypothesis_publisher.publish(self.bridge.to_ros_category(category))
 
@@ -199,6 +218,8 @@ def main():
                                                                           "recognized action category",
                         type=lambda value: value if value.lower() != "none" else None,
                         default="/opendr/continual_skeleton_recognized_action_description")
+    parser.add_argument("--performance_topic", help="Topic name for performance messages, disabled (None) by default",
+                        type=str, default=None)
     parser.add_argument("--device", help="Device to use, either \"cpu\" or \"cuda\"",
                         type=str, default="cuda", choices=["cuda", "cpu"])
     parser.add_argument("--model", help="Model to use, either \"costgcn\"",
@@ -225,6 +246,7 @@ def main():
                                         pose_annotations_topic=args.pose_annotations_topic,
                                         output_category_topic=args.output_category_topic,
                                         output_category_description_topic=args.output_category_description_topic,
+                                        performance_topic=args.performance_topic,
                                         device=device,
                                         model=args.model)
     continual_skeleton_action_recognition_node.listen()