diff --git a/.gitmodules b/.gitmodules index 6462c0754b..059c5d777d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -5,3 +5,6 @@ path = src/opendr/perception/panoptic_segmentation/efficient_lps/algorithm/EfficientLPS url = https://github.com/robot-learning-freiburg/EfficientLPS.git branch = opendr +[submodule "src/opendr/perception/continual_slam/algorithm/g2o/g2opy"] + path = src/opendr/perception/continual_slam/algorithm/g2o/g2opy + url = https://github.com/uoip/g2opy diff --git a/projects/opendr_ws/README.md b/projects/opendr_ws/README.md index d38e39eca6..bc4011b91c 100644 --- a/projects/opendr_ws/README.md +++ b/projects/opendr_ws/README.md @@ -90,6 +90,9 @@ Currently, apart from tools, opendr_ws contains the following ROS nodes (categor 1. [RGBD Hand Gesture Recognition](src/opendr_perception/README.md#rgbd-hand-gesture-recognition-ros-node) ## RGB + Audio input 1. [Audiovisual Emotion Recognition](src/opendr_perception/README.md#audiovisual-emotion-recognition-ros-node) + +## RGB + IMU input +1. [Continual SLAM](src/opendr_perception//README.md#continual-slam-ros-nodes) ## Audio input 1. [Speech Command Recognition](src/opendr_perception/README.md#speech-command-recognition-ros-node) 2. [Speech Transcription](src/opendr_perception/README.md#speech-transcription-ros-node) diff --git a/projects/opendr_ws/src/opendr_bridge/src/opendr_bridge/bridge.py b/projects/opendr_ws/src/opendr_bridge/src/opendr_bridge/bridge.py index 76fd5d30ea..f4335e2a36 100755 --- a/projects/opendr_ws/src/opendr_bridge/src/opendr_bridge/bridge.py +++ b/projects/opendr_ws/src/opendr_bridge/src/opendr_bridge/bridge.py @@ -21,6 +21,9 @@ import numpy as np import struct + +import rospy +from rospy.rostime import Time from cv_bridge import CvBridge from vision_msgs.msg import Detection2DArray, Detection2D, BoundingBox2D, ObjectHypothesisWithPose,\ Detection3DArray, Detection3D, BoundingBox3D as BoundingBox3DMsg, ObjectHypothesis, Classification2D @@ -29,11 +32,17 @@ from std_msgs.msg import ColorRGBA, String, Header from sensor_msgs.msg import Image as ImageMsg, PointCloud as PointCloudMsg, PointCloud2 as PointCloud2Msg,\ ChannelFloat32 as ChannelFloat32Msg, PointField as PointFieldMsg -import rospy -from geometry_msgs.msg import Point32 as Point32Msg, Quaternion as QuaternionMsg +from geometry_msgs.msg import ( + Point32 as Point32Msg, + Quaternion as QuaternionMsg, + Vector3Stamped as Vector3StampedMsg, + TransformStamped as TransformStampedMsg +) +from visualization_msgs.msg import Marker as MarkerMsg, MarkerArray as MarkerArrayMsg from sensor_msgs import point_cloud2 as pc2 from hri_msgs.msg import LiveSpeech from opendr_bridge.msg import OpenDRPose2D, OpenDRPose2DKeypoint +import tf.transformations as tr class ROSBridge: @@ -61,18 +70,31 @@ def from_ros_image(self, message: ImageMsg, encoding: str='passthrough') -> Imag image = Image(np.asarray(cv_image, dtype=np.uint8)) return image - def to_ros_image(self, image: Image, encoding: str='passthrough') -> ImageMsg: + def to_ros_image(self, + image: Image, + encoding: str='passthrough', + frame_id: str = None, + time: Time = None) -> ImageMsg: """ Converts an OpenDR image into a ROS image message :param image: OpenDR image to be converted :type image: engine.data.Image :param encoding: encoding to be used for the conversion (inherited from CvBridge) :type encoding: str + :param frame_id: frame id of the image + :type frame_id: str + :param time: time of the image + :type time: rospy.rostime.Time :return: ROS image :rtype: sensor_msgs.msg.Image """ # Convert from the OpenDR standard (CHW/RGB) to OpenCV standard (HWC/BGR) - message = self._cv_bridge.cv2_to_imgmsg(image.opencv(), encoding=encoding) + header = Header() + if frame_id is not None: + header.frame_id = frame_id + if time is not None: + header.stamp = time + message = self._cv_bridge.cv2_to_imgmsg(image.opencv(), encoding=encoding, header=header) return message def to_ros_pose(self, pose: Pose): @@ -641,7 +663,7 @@ def from_ros_point_cloud2(self, point_cloud: PointCloud2Msg): return result - def to_ros_point_cloud2(self, point_cloud: PointCloud, channels: str = None): + def to_ros_point_cloud2(self, point_cloud: PointCloud, channels: str = None, frame_id="base_link"): """ Converts an OpenDR PointCloud message into a ROS PointCloud2 @@ -655,7 +677,7 @@ def to_ros_point_cloud2(self, point_cloud: PointCloud, channels: str = None): header = Header() header.stamp = rospy.Time.now() - header.frame_id = "base_link" + header.frame_id = frame_id channel_count = point_cloud.data.shape[-1] - 3 @@ -760,6 +782,164 @@ def to_ros_boxes_3d(self, boxes_3d: BoundingBox3DList, classes): ros_boxes_3d.detections.append(box) return ros_boxes_3d + def to_ros_marker(self, + frame_id: str, + position: list, + id: int, + rgba: tuple = None) -> MarkerMsg: + """ + Creates ROS Marker message given positions x,y,z and frame_id. + :param frame_id: The frame_id of the marker. + :type frame_id: str + :param position: The position of the marker. + :type position: list + :param id: The id of the marker. + :type id: int + :param rgba: The color of the marker. + :type rgba: tuple + :return: ROS Marker message. + :rtype: visualization_msgs.msg.Marker + """ + marker = MarkerMsg() + marker.header.frame_id = frame_id + marker.header.stamp = rospy.Time.now() + marker.type = marker.SPHERE + marker.id = id + marker.action = marker.ADD + marker.pose.position.x = position[0] + marker.pose.position.y = position[1] + marker.pose.position.z = position[2] + marker.pose.orientation.x = 0.0 + marker.pose.orientation.y = 0.0 + marker.pose.orientation.z = 0.0 + marker.pose.orientation.w = 1.0 + marker.scale.x = 1.0 + marker.scale.y = 1.0 + marker.scale.z = 1.0 + if rgba is not None: + marker.color.a = rgba[3] + marker.color.r = rgba[0] / 255.0 + marker.color.g = rgba[1] / 255.0 + marker.color.b = rgba[2] / 255.0 + else: + marker.color.a = 1.0 + marker.color.r = 1.0 + marker.color.g = 0.0 + marker.color.b = 0.0 + return marker + + def to_ros_marker_array(self, + position_list: list, + frame_id_list: list, + rgba: tuple = None) -> MarkerArrayMsg: + """ + Creates ROS MarkerArray message given positions x,y,z and frame_id. + :param position_list: The list of positions of the markers. + :type position_list: list + :param frame_id_list: The list of frame_ids of the markers. + :type frame_id_list: list + :param rgba: The color of the marker. + :type rgba: tuple + """ + marker_array = MarkerArrayMsg() + for i in range(len(position_list)): + marker_array.markers.append(self.to_ros_marker(frame_id_list[i], + position_list[i], + i, + rgba)) + return marker_array + + def to_ros_vector3_stamped(self, + x: float, + y: float, + z: float, + frame_id: str, + time: Time) -> Vector3StampedMsg: + """ + Creates a Vector3Stamped message given x,y,z coordinates and frame_id and time + :param x: The x coordinate of the vector. + :type x: float + :param y: The y coordinate of the vector. + :type y: float + :param z: The z coordinate of the vector. + :type z: float + :param frame_id: The frame_id of the vector. + :type frame_id: str + :param time: The time of the vector. + :type time: rospy.rostime.Time + :return: ROS message with the vector. + :rtype: geometry_msgs.msg.Vector3Stamped + """ + + message = Vector3StampedMsg() + message.header.frame_id = frame_id + message.header.stamp = time + message.vector.x = x + message.vector.y = y + message.vector.z = z + + return message + + def from_ros_vector3_stamped(self, message: Vector3StampedMsg): + """ + Creates a Vector3Stamped message given x,y,z coordinates and frame_id and time + :param message: The ROS message to be converted. + :type message: geometry_msgs.msg.Vector3Stamped + :return: The frame_id and the vector. + :rtype: tuple + """ + + x = message.vector.x + y = message.vector.y + z = message.vector.z + + frame_id = message.header.frame_id + + return frame_id, [x, y, z] + + def to_ros_string(self, data: str) -> String: + """ + Creates a String message given data. + :param data: The data to be converted. + :type data: str + :return: ROS message with the data. + :rtype: std_msgs.msg.String + """ + + message = String() + message.data = data + + return message + + def from_ros_string(self, message: String): + """ + Creates a String message given data. + :param message: The ROS message to be converted. + :type message: std_msgs.msg.String + :return: The data. + :rtype: str + """ + + return message.data + + def to_ros_transformstamped(self, stamp, frame_id, child_frame_id, odometry): + """ + Creates a TransformStamped message given frame_id, child_frame_id and odometry. + """ + t = TransformStampedMsg() + t.header.stamp = stamp + t.header.frame_id = frame_id + t.child_frame_id = child_frame_id + t.transform.translation.x = -odometry[0, 3] + t.transform.translation.y = 0.0 + t.transform.translation.z = -odometry[2, 3] + q = tr.quaternion_from_matrix(odometry) + t.transform.rotation.x = q[0] + t.transform.rotation.y = q[1] + t.transform.rotation.z = q[2] + t.transform.rotation.w = q[3] + return t + def from_ros_transcription(self, ros_transcripton: LiveSpeech) -> VoskTranscription: """ Converts an LiveSpeech object to a VoskTranscription object. diff --git a/projects/opendr_ws/src/opendr_perception/CMakeLists.txt b/projects/opendr_ws/src/opendr_perception/CMakeLists.txt index 18835b5d4b..821543cbee 100644 --- a/projects/opendr_ws/src/opendr_perception/CMakeLists.txt +++ b/projects/opendr_ws/src/opendr_perception/CMakeLists.txt @@ -39,7 +39,9 @@ catkin_install_python(PROGRAMS scripts/object_detection_2d_gem_node.py scripts/semantic_segmentation_bisenet_node.py scripts/binary_high_resolution_node.py - scripts/object_tracking_2d_siamrpn_node.py + scripts/continual_slam_dataset_node.py + scripts/continual_slam_predictor_node.py + scripts/continual_slam_learner_node.py scripts/facial_emotion_estimation_node.py scripts/continual_skeleton_based_action_recognition_node.py scripts/point_cloud_2_publisher_node.py diff --git a/projects/opendr_ws/src/opendr_perception/README.md b/projects/opendr_ws/src/opendr_perception/README.md index 3837a63bbb..9ced51e439 100644 --- a/projects/opendr_ws/src/opendr_perception/README.md +++ b/projects/opendr_ws/src/opendr_perception/README.md @@ -879,6 +879,61 @@ whose documentation can be found [here](../../../../docs/reference/audiovisual-e For viewing the output, refer to the [notes above.](#notes) ---- +## RGB + IMU input + +### Continual SLAM ROS Nodes +A ROS node for performing depth+position output mapping based on visual + imu input. Continual SLAM involves the use of two distinct ROS nodes, one dedicated to performing inference and the other exclusively focused on training. Both of the nodes are based on the learner class defined in [ContinualSLAMLearner](../../../../src/opendr/perception/continual_slam/continual_slam_learner.py). + +You can find the continual slam ROS node python scripts here [learner](./scripts/continual_slam_learner_node.py), [predictor](./scripts/continual_slam_predictor_node.py). You can further also find the RGB image + IMU publisher node [here](./scripts/continual_slam_dataset_node.py). + +#### Instructions for basic usage: + +1. Download the KITTI Visual Odometry datased as it is described [here](../../../../src/opendr/perception/continual_slam/datasets/README.md). + +2. Decide on the frame rate FPS, then one can start the dataset publisher node using the following line: + ```shell + rosrun opendr_perception continual_slam_dataset_node.py + ``` + + The following optional arguments are available: + - `-h or --help`: show a help message and exit + - `--dataset_path`: path to the dataset (default=`./kitti`) + - `--config_file_path`: path to the config file for learner class (default=`src/opendr/perception/continual_slam/configs/singlegpu_kitti.yaml`) + - `--output_image_topic OUTPUT_IMAGE_TOPIC`: topic to which we are publishing the RGB image (default=`/cl_slam/image`) + - `--output_distance_topic OUTPUT_DISTANCE_TOPIC`: topic to publish distances (default=`/cl_slam/distance`) + - `--dataset_fps FPS`: frame rate which the dataset will be published, (default=`3`) + +3. Start the Predictor Node + ```shell + rosrun opendr_perception continual_slam_predictor_node.py + ``` + The following optional arguments are available: + - `-h or --help`: show a help message and exit + - `-c or --config_path`: path to the config file for the learner class (default=`src/opendr/perception/continual_slam/configs/singlegpu_kitti.yaml`) + - `-it or --input_image_topic`: input image topic, listened from Continual SLAM Dataset Node (default=`/cl_slam/image`) + - `-dt or --input_distance_topic`: input distance topic, listened from Continual SLAM Dataset Node (default=`/cl_slam/distance`) + - `-odt or --output_depth_topic`: output depth topic, published to visual output tools (default=`/opendr/predicted/image`) + - `-opt or --output_pose_topic`: output pose topic, published to visual output tools (default=`/opendr/predicted/pose`) + - `-ppcl or --publish_pointcloud`: boolean to decide whether pointcloud output is asked or not (default=`false`) + - `-opct or --output_pointcloud_topic`: output pointcloud topic, depending on `--publish_pointcloud`, published to visual output tools (default=`/opendr/predicted/pointcloud`) + - `-ut or --update_topic`: update topic, listened from Continual SLAM Dataset Node (default=`/cl_slam/update`) + +4. Start the Learner Node (Optional) + ```shell + rosrun opendr_perception continual_slam_learner_node.py + ``` + The following optional arguments are available: + - `-h or --help`: show a help message and exit + - `-c or --config_path`: path to the config file for the learner class (default=`src/opendr/perception/continual_slam/configs/singlegpu_kitti.yaml`) + - `-it or --input_image_topic`: input image topic, listened from Continual SLAM Dataset Node (default=`/cl_slam/image`) + - `-dt or --input_distance_topic`: input distance topic, listened from Continual SLAM Dataset Node (default=`/cl_slam/distance`) + - `-ot or --output_weights_topic`: output weights topic to be published to Continual SLAM Predictor Node (default=`/cl_slam/update`) + - `-pr or --publish_rate`: publish rate of the weights (default=`20`) + - `-bs or --buffer_size`: size of the replay buffer (default=`10`) + - `-ss or --sample_size`: sample size of the replay buffer. If 0 is given, only online data is used (default=`3`) + - `-sm or --save_memory`: whether to save memory or not. Add it to the command if you want to write to disk (default=`True`) +---- + ## Audio input ### Speech Command Recognition ROS Node diff --git a/projects/opendr_ws/src/opendr_perception/scripts/continual_slam_dataset_node.py b/projects/opendr_ws/src/opendr_perception/scripts/continual_slam_dataset_node.py new file mode 100644 index 0000000000..e1b97e0759 --- /dev/null +++ b/projects/opendr_ws/src/opendr_perception/scripts/continual_slam_dataset_node.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python +# Copyright 2020-2023 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import time +from pathlib import Path + +import rospy +from opendr_bridge import ROSBridge +from sensor_msgs.msg import Image as ROS_Image +from geometry_msgs.msg import Vector3Stamped as ROS_Vector3Stamped + +from opendr.perception.continual_slam.datasets.kitti import KittiDataset + + +class ContinualSlamDatasetNode: + + def __init__(self, + dataset_path: str, + config_file_path: str, + output_image_topic: str = "/opendr/dataset/image", + output_distance_topic: str = "/opendr/imu/distance", + dataset_fps: float = 0.1): + """ + Creates a ROS Node for publishing dataset images + :param dataset_path: Path to the dataset + :type dataset_path: str + :param config_file_path: Path to the config file + :type config_file_path: str + :param output_image_topic: Topic to publish images to + :type output_image_topic: str + :param output_distance_topic: Topic to publish distance to + :type output_distance_topic: str + :param dataset_fps: Dataset FPS + :type dataset_fps: float + """ + self.dataset_path = dataset_path + self.config_file_path = config_file_path + self.bridge = ROSBridge() + self.delay = 1.0 / dataset_fps + + self.output_image_topic = output_image_topic + self.output_distance_topic = output_distance_topic + + def start(self): + """ + Runs the node + """ + rospy.init_node("continual_slam_dataset_node", anonymous=True) + self._init_publisher() + if self._init_dataset(): + self._publish() + + # Auxiliary functions + def _init_publisher(self): + """ + Initializes publishers + """ + self.output_image_publisher = rospy.Publisher(self.output_image_topic, + ROS_Image, + queue_size=10) + self.output_distance_publisher = rospy.Publisher(self.output_distance_topic, + ROS_Vector3Stamped, + queue_size=10) + + def _init_dataset(self): + """ + Initializes dataset + """ + env = os.getenv("OPENDR_HOME") + config_file_path = os.path.join(env, self.config_file_path) + if not Path(config_file_path).exists(): + raise FileNotFoundError("Config file not found") + try: + self.dataset = KittiDataset(self.dataset_path, config_file_path) + return True + except FileNotFoundError: + rospy.logerr("Dataset path is incorrect. Please check the path") + return False + + def _publish(self): + """ + Publishes images and distances + """ + rospy.loginfo("Start publishing dataset images") + i = 0 + while not rospy.is_shutdown(): + if i == len(self.dataset)-2: + break + data = self.dataset[i][0] + # Data is in format of {"image_id" : (image, velocity, distance)} for 3 past frames + image_ids = list(data.keys()) + if len(data) < 3: + i += 1 + continue + image_t0, distance_t0 = data[image_ids[0]] + + stamp = rospy.Time.now() + # Convert image to ROS Image + image = self.bridge.to_ros_image(image=image_t0, frame_id=image_ids[0], time=stamp) + # Convert velocity to ROS Vector3Stamped + distance = self.bridge.to_ros_vector3_stamped(distance_t0, 0, 0, image_ids[0], stamp) + # Publish the image and distance + self.output_image_publisher.publish(image) + self.output_distance_publisher.publish(distance) + + rospy.loginfo("Published image {}".format(image_ids[0])) + rospy.loginfo("Published distance {}".format([distance_t0])) + i += 1 + time.sleep(self.delay) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--dataset_path", type=str, default="/home/canakcia/Desktop/kitti_dset/", + help="Path to the dataset") + parser.add_argument("--config_file_path", type=str, + default="src/opendr/perception/continual_slam/configs/singlegpu_kitti.yaml", + help="Path to the config file") + parser.add_argument("--output_image_topic", type=str, default="/cl_slam/image", + help="ROS topic to publish images") + parser.add_argument("--output_distance_topic", type=str, default="/cl_slam/distance", + help="ROS topic to publish distances") + parser.add_argument("--dataset_fps", type=float, default=3, + help="Dataset frame rate") + args = parser.parse_args() + + node = ContinualSlamDatasetNode(args.dataset_path, + args.config_file_path, + args.output_image_topic, + args.output_distance_topic, + args.dataset_fps) + node.start() + + +if __name__ == "__main__": + main() diff --git a/projects/opendr_ws/src/opendr_perception/scripts/continual_slam_learner_node.py b/projects/opendr_ws/src/opendr_perception/scripts/continual_slam_learner_node.py new file mode 100644 index 0000000000..371cf759dd --- /dev/null +++ b/projects/opendr_ws/src/opendr_perception/scripts/continual_slam_learner_node.py @@ -0,0 +1,286 @@ +#!/usr/bin/env python3 +# Copyright 2020-2023 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import message_filters +import rospy +import os + +from opendr_bridge import ROSBridge +from opendr.perception.continual_slam.continual_slam_learner import ContinualSLAMLearner +from opendr.perception.continual_slam.algorithm.depth_pose_module.replay_buffer import ReplayBuffer + +from sensor_msgs.msg import Image as ROS_Image +from geometry_msgs.msg import Vector3Stamped as ROS_Vector3Stamped +from std_msgs.msg import String as ROS_String + + +class ContinualSlamLearner: + def __init__(self, + path: str, + input_image_topic: str, + input_distance_topic: str, + output_weights_topic: str, + publish_rate: int = 20, + buffer_size: int = 1000, + save_memory: bool = True, + sample_size: int = 3): + """ + Continual SLAM learner node. This node is responsible for training and updating the predictor. + :param path: Path to the folder where the model will be saved. + :type path: str + :param input_image_topic: ROS topic where the input images are published. + :type input_image_topic: str + :param input_distance_topic: ROS topic where the input distances are published. + :type input_distance_topic: str + :param output_weights_topic: ROS topic where the output weights are published. + :type output_weights_topic: str + :param publish_rate: Rate at which the output weights are published. + :type publish_rate: int + :param buffer_size: Size of the replay buffer. + :type buffer_size: int + :param save_memory: If True, the replay buffer will be saved to memory. + :type save_memory: bool + :param sample_size: Number of samples (triplets from replay buffer) that will be used for training. + :type sample_size: int + """ + self.bridge = ROSBridge() + self.publish_rate = publish_rate + self.buffer_size = buffer_size + self.save_memory = save_memory + self.sample_size = sample_size + + self.input_image_topic = input_image_topic + self.input_distance_topic = input_distance_topic + self.output_weights_topic = output_weights_topic + + self.path = path + self.learner = None + self.sequence = None + + self.do_publish = 0 + + # Create caches + self.cache = {"image": [], + "distance": [], + "id": []} + + def callback(self, image: ROS_Image, distance: ROS_Vector3Stamped): + """ + Callback method of predictor node. + :param image: Input image as a ROS message + :type ROS_Image + :param distance: Distance to the object as a ROS message + :type ROS_Vector3Stamped + """ + image = self.bridge.from_ros_image(image) + frame_id, distance = self.bridge.from_ros_vector3_stamped(distance) + incoming_sequence = frame_id.split("_")[0] + distance = distance[0] + + # Clear cache if new sequence is detected + if self.sequence is None: + self.sequence = incoming_sequence + if self.sequence != incoming_sequence: + self._clean_cache() + self.sequence = incoming_sequence + + self._cache_arriving_data(image, distance, frame_id) + + # If cache is not full, return + if len(self.cache['image']) < 3: + return + + # Add triplet to replay buffer and sample + item = self._convert_cache_into_triplet() + if self.sample_size > 0: + self.replay_buffer.add(item) + item = ContinualSLAMLearner._input_formatter(item) + if len(self.replay_buffer) < self.sample_size: + return + batch = self.replay_buffer.sample() + # Concatenate the current triplet with the sampled batch + batch.insert(0, item) + else: + item = ContinualSLAMLearner._input_formatter(item) + batch = [item] + + # Train learner + self.learner.fit(batch, learner=True) + + # Publish new weights + if self.do_publish % self.publish_rate == 0: + message = self.learner.save() + rospy.loginfo(f"CL-SLAM learner publishing new weights, currently in the frame {frame_id}") + ros_message = self.bridge.to_ros_string(message) + self.output_weights_publisher.publish(ros_message) + self.do_publish += 1 + + def listen(self): + """ + Start the node and begin processing input data. The order of the function calls ensures that the node does not + try to process input images without being in a trained state. + """ + rospy.init_node('opendr_continual_slam_node', anonymous=True) + if self._init_learner() and self._init_replay_buffer(): + rospy.loginfo("Continual SLAM node started.") + self._init_publisher() + self._init_subscribers() + rospy.spin() + + def _init_subscribers(self): + """ + Initializing subscribers. Here we also do synchronization between two ROS topics. + """ + self.input_image_subscriber = message_filters.Subscriber( + self.input_image_topic, ROS_Image, queue_size=1, buff_size=10000000) + self.input_distance_subscriber = message_filters.Subscriber( + self.input_distance_topic, ROS_Vector3Stamped, queue_size=1, buff_size=10000000) + self.ts = message_filters.TimeSynchronizer([self.input_image_subscriber, + self.input_distance_subscriber], 1) + self.ts.registerCallback(self.callback) + + def _init_publisher(self): + """ + Initializing publishers. + """ + self.output_weights_publisher = rospy.Publisher(self.output_weights_topic, ROS_String, queue_size=10) + + def _init_learner(self): + """ + Creating a ContinualSLAMLearner instance with predictor and ros mode + """ + env = os.getenv('OPENDR_HOME') + path = os.path.join(env, self.path) + print(path) + try: + self.learner = ContinualSLAMLearner(path, mode="learner", ros=False) + return True + except Exception as e: + rospy.logerr("Continual SLAM node failed to initialize, due to learner initialization error.") + rospy.logerr(e) + return False + + def _init_replay_buffer(self): + """ + Creating a replay buffer instance + """ + env = os.getenv('OPENDR_HOME') + path = os.path.join(env, self.path) + try: + self.replay_buffer = ReplayBuffer(buffer_size=self.buffer_size, + save_memory=self.save_memory, + dataset_config_path=path, + sample_size=self.sample_size) + return True + except Exception: + rospy.logerr("Continual SLAM node failed to initialize, due to replay buffer initialization error.") + if self.sample_size > 0: + return False + else: + return True + + def _clean_cache(self): + """ + Clearing the cache. + """ + for key in self.cache.keys(): + self.cache[key].clear() + + def _cache_arriving_data(self, image, distance, frame_id): + """ + Caching the arriving data. + :param image: Input image as a ROS message + :type ROS_Image + :param distance: Distance to the object as a ROS message + :type ROS_Vector3Stamped + :param frame_id: Frame id of the arriving data + :type int + """ + self.cache["image"].append(image) + self.cache["distance"].append(distance) + self.cache["id"].append(frame_id) + if len(self.cache['image']) > 3: + self.cache["image"].pop(0) + self.cache["distance"].pop(0) + self.cache["id"].pop(0) + + def _convert_cache_into_triplet(self) -> dict: + """ + Converting the cache into a triplet. + """ + triplet = {} + for i in range(len(self.cache["image"])): + triplet[self.cache['id'][i]] = (self.cache["image"][i], self.cache["distance"][i]) + return triplet + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-c', + '--config_path', + type=str, + default='src/opendr/perception/continual_slam/configs/singlegpu_kitti.yaml', + help='Path to the config file') + parser.add_argument('-it', + '--input_image_topic', + type=str, + default='/cl_slam/image', + help='Input image topic, listened from Continual SLAM Dataset Node') + parser.add_argument('-dt', + '--input_distance_topic', + type=str, + default='/cl_slam/distance', + help='Input distance topic, listened from Continual SLAM Dataset Node') + parser.add_argument('-ot', + '--output_weights_topic', + type=str, + default='/cl_slam/update', + help='Output weights topic, published to Continual SLAM Predictor Node') + parser.add_argument('-pr', + '--publish_rate', + type=int, + default=20, + help='Publish rate of the weights') + parser.add_argument('-bs', + '--buffer_size', + type=int, + default=10, + help='Size of the replay buffer') + parser.add_argument('-ss', + '--sample_size', + type=int, + default=3, + help='Sample size of the replay buffer. If 0 is given, only online data is used') + parser.add_argument('-sm', + '--save_memory', + action='store_false', + default=True, + help='Whether to save memory or not. Add it to the command if you want to write to disk') + args = parser.parse_args() + + node = ContinualSlamLearner(args.config_path, + args.input_image_topic, + args.input_distance_topic, + args.output_weights_topic, + args.publish_rate, + args.buffer_size, + args.save_memory, + args.sample_size) + node.listen() + + +if __name__ == '__main__': + main() diff --git a/projects/opendr_ws/src/opendr_perception/scripts/continual_slam_predictor_node.py b/projects/opendr_ws/src/opendr_perception/scripts/continual_slam_predictor_node.py new file mode 100644 index 0000000000..ee6dc73776 --- /dev/null +++ b/projects/opendr_ws/src/opendr_perception/scripts/continual_slam_predictor_node.py @@ -0,0 +1,345 @@ +#!/usr/bin/env python3 +# Copyright 2020-2023 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import numpy as np +from pathlib import Path +import os + +from opendr.perception.continual_slam.continual_slam_learner import ContinualSLAMLearner + +import message_filters +import rospy +from sensor_msgs.msg import Image as ROS_Image +from geometry_msgs.msg import Vector3Stamped as ROS_Vector3Stamped +from visualization_msgs.msg import MarkerArray as ROS_MarkerArray +from std_msgs.msg import String as ROS_String +from opendr_bridge import ROSBridge +import tf2_ros +from sensor_msgs.msg import PointCloud2 as ROS_PointCloud2 + + +class ContinualSlamPredictor: + def __init__(self, + path: Path, + input_image_topic: str, + input_distance_topic: str, + output_depth_topic: str, + output_pose_topic: str, + output_pointcloud_topic: str, + publish_pointcloud: bool, + update_topic: str): + """ + Continual SLAM predictor node. This node is responsible for publishing predicted pose and depth outputs. + :param path: Path to the folder where the model will be saved. + :type path: str + :param input_image_topic: ROS topic where the input image is published. + :type input_image_topic: str + :param input_distance_topic: ROS topic where the input distance is published. + :type input_distance_topic: str + :param output_depth_topic: ROS topic where the output depth will be published. + :type output_depth_topic: str + :param output_pose_topic: ROS topic where the output pose will be published. + :type output_pose_topic: str + :param output_pointcloud_topic: ROS topic where the output pointcloud will be published. + :type output_pointcloud_topic: str + :param publish_pointcloud: Whether to publish the pointcloud or not. + :type publish_pointcloud: bool + :param update_topic: ROS topic where the update signal is published. + :type update_topic: str + """ + self.bridge = ROSBridge() + + self.input_image_topic = input_image_topic + self.input_distance_topic = input_distance_topic + self.output_depth_topic = output_depth_topic + self.output_pose_topic = output_pose_topic + if publish_pointcloud: + self.output_pointcloud_topic = output_pointcloud_topic + else: + self.output_pointcloud_topic = None + self.update_topic = update_topic + + self.path = path + self.predictor = None + self.sequence = None + self.color = None + self.frame_id = None + + # Create caches + self.cache = { + "image": [], + "distance": [], + "id": [], + "marker_position": [], + "marker_frame_id": []} + + def callback(self, image: ROS_Image, distance: ROS_Vector3Stamped): + """ + Callback method of predictor node. + :param image: Input image as a ROS message + :type ROS_Image + :param distance: Distance to the object as a ROS message + :type ROS_Vector3Stamped + """ + # Data-preprocessing + image = self.bridge.from_ros_image(image) + frame_id, distance = self.bridge.from_ros_vector3_stamped(distance) + self.frame_id = frame_id + incoming_sequence = frame_id.split("_")[0] + distance = distance[0] + + self._check_sequence(incoming_sequence) + self._cache_arriving_data(image, distance, frame_id) + triplet = self._convert_cache_into_triplet() + # If triplet is not ready, return + if len(triplet) < 3: + return + # Infer depth and pose + if self.output_pointcloud_topic: + depth, raw_depth, pose_graph = self._infer(triplet, return_raw_depth=True, pointcloud=True) + odometry = pose_graph.return_last_poses(n=1)[0] + pointcloud = self.predictor.visualize_3d(image, raw_depth) + pointcloud = self.bridge.to_ros_point_cloud2(pointcloud, 'rgb', 'car') + tf = self.bridge.to_ros_transformstamped(stamp=rospy.Time.now(), + frame_id="world", + child_frame_id="car", + odometry=odometry) + else: + depth, _ = self._infer(triplet) + if depth is None: + return + rgba = (self.color[0], self.color[1], self.color[2], 1.0) + marker_list = self.bridge.to_ros_marker_array(self.cache['marker_position'], + self.cache['marker_frame_id'], + rgba) + + depth = self.bridge.to_ros_image(depth) + self.output_depth_publisher.publish(depth) + self.output_pose_publisher.publish(marker_list) + if self.output_pointcloud_topic: + self.tf_broadcaster.sendTransform(tf) + self.pointcloud_publisher.publish(pointcloud) + + def update(self, message: ROS_String): + """ + Update the predictor with the new data + :param message: ROS message + :type ROS_Byte + """ + rospy.loginfo("CL-SLAM predictor is currently updating its weights.") + message = self.bridge.from_ros_string(message) + self.predictor.load(weights_folder=message) + + def listen(self): + """ + Start the node and begin processing input data. The order of the function calls ensures that the node does not + try to process input images without being in a trained state. + """ + rospy.init_node('opendr_continual_slam_node', anonymous=True) + if self._init_predictor(): + rospy.loginfo("Continual SLAM node started.") + self._init_publisher() + self._init_subscribers() + rospy.spin() + + # Auxiliary functions + def _init_subscribers(self): + """ + Initializing subscribers. Here we also do synchronization between two ROS topics. + """ + self.input_image_subscriber = message_filters.Subscriber( + self.input_image_topic, ROS_Image, queue_size=1, buff_size=10000000) + self.input_distance_subscriber = message_filters.Subscriber( + self.input_distance_topic, ROS_Vector3Stamped, queue_size=1, buff_size=10000000) + self.ts = message_filters.TimeSynchronizer([self.input_image_subscriber, self.input_distance_subscriber], 1) + self.ts.registerCallback(self.callback) + + self.update_subscriber = rospy.Subscriber(self.update_topic, ROS_String, self.update, queue_size=1, buff_size=10000000) + + def _init_publisher(self): + """ + Initializing publishers. + """ + self.output_depth_publisher = rospy.Publisher( + self.output_depth_topic, ROS_Image, queue_size=10) + self.output_pose_publisher = rospy.Publisher( + self.output_pose_topic, ROS_MarkerArray, queue_size=10) + if self.output_pointcloud_topic: + self.tf_broadcaster = tf2_ros.TransformBroadcaster() + self.pointcloud_publisher = rospy.Publisher( + self.output_pointcloud_topic, ROS_PointCloud2, queue_size=10) + + def _init_predictor(self): + """ + Creating a ContinualSLAMLearner instance with predictor and ros mode + """ + env = os.getenv('OPENDR_HOME') + path = os.path.join(env, self.path) + try: + self.predictor = ContinualSLAMLearner(path, mode="predictor", ros=False, do_loop_closure=True) + return True + except Exception: + rospy.logerr("Continual SLAM node failed to initialize, due to predictor initialization error.") + return False + + def _clean_cache(self): + """ + Cleaning the cache + """ + for key in self.cache.keys(): + self.cache[key].clear() + + def _cache_arriving_data(self, image, distance, frame_id): + """ + Caching arriving data + :param image: Input image + :type image: np.ndarray + :param distance: Distance to the object + :type distance: float + :param frame_id: Frame id + :type frame_id: str + """ + # Cache the arriving last 3 data + self.cache['image'].append(image) + self.cache['distance'].append(distance) + self.cache['id'].append(frame_id) + if len(self.cache['image']) > 3: + self.cache['image'].pop(0) + self.cache['distance'].pop(0) + self.cache['id'].pop(0) + + def _convert_cache_into_triplet(self) -> dict: + """ + Converting the cached data into a triplet dictionary + return: Triplet dictionary + rtype: dict + """ + triplet = {} + for i in range(len(self.cache['image'])): + triplet[self.cache['id'][i]] = (self.cache['image'][i], self.cache['distance'][i]) + return triplet + + def _check_sequence(self, incoming_sequence): + """ + Checking if the new sequence is detected + :param incoming_sequence: Incoming sequence + :type incoming_sequence: str + """ + # If new sequence is detected, clean the cache + if self.sequence is None: + self.sequence = incoming_sequence + self.color = list(np.random.choice(range(256), size=3)) + if self.sequence != incoming_sequence: + self._clean_cache() + self.predictor._reset() + self.sequence = incoming_sequence + self.color = list(np.random.choice(range(256), size=3)) + + def _infer(self, triplet: dict, return_raw_depth: bool = False, pointcloud: bool = False): + """ + Infer the triplet + :param triplet: Triplet + :type triplet: dict + :param return_raw_depth: Return raw depth + :type return_raw_depth: bool + :param pointcloud: Return pointcloud + :type pointcloud: bool + :return: Depth iimage, odometry, optional raw depth image + :rtype: np.ndarray, np.ndarray, np.ndarray + """ + depth, odometry, _, lc, pose_graph = self.predictor.infer(triplet, pointcloud=pointcloud) + if not lc: + points = pose_graph.return_last_positions(n=5) + if not len(points): + return None + for point in points: + position = [-point[0], 0.0, -point[2]] + + self.cache["marker_position"].append(position) + self.cache["marker_frame_id"].append("world") + if self.color is None: + self.color = [255, 0, 0] + else: + self.cache["marker_position"].clear() + self.cache["marker_frame_id"].clear() + points = pose_graph.return_all_positions() + for point in points: + position = [-point[0], 0.0, -point[2]] + self.cache["marker_position"].append(position) + self.cache["marker_frame_id"].append("world") + if return_raw_depth: + depth, raw_depth = depth + return depth, raw_depth, pose_graph + return depth, odometry + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument('-c', + '--config_path', + type=str, + default='src/opendr/perception/continual_slam/configs/singlegpu_kitti.yaml', + help='Path to the config file') + parser.add_argument('-it', + '--input_image_topic', + type=str, + default='/cl_slam/image', + help='Input image topic, listened from Continual SLAM Dataset Node') + parser.add_argument('-dt', + '--input_distance_topic', + type=str, + default='/cl_slam/distance', + help='Input distance topic, listened from Continual SLAM Dataset Node') + parser.add_argument('-odt', + '--output_depth_topic', + type=str, + default='/opendr/predicted/image', + help='Output depth topic, published to Continual SLAM Dataset Node') + parser.add_argument('-opt', + '--output_pose_topic', + type=str, + default='/opendr/predicted/pose', + help='Output pose topic, published to Continual SLAM Dataset Node') + parser.add_argument('-ppcl', + '--publish_pointcloud', + action='store_true', + help='Publish pointcloud to ROS') + parser.add_argument('-opct', + '--output_pointcloud_topic', + type=str, + default='/opendr/predicted/pointcloud', + help='Output pointcloud topic, published to Continual SLAM Dataset Node') + parser.add_argument('-ut', + '--update_topic', + type=str, + default='/cl_slam/update', + help='Update topic, listened from Continual SLAM Dataset Node') + args = parser.parse_args() + + node = ContinualSlamPredictor(args.config_path, + args.input_image_topic, + args.input_distance_topic, + args.output_depth_topic, + args.output_pose_topic, + args.output_pointcloud_topic, + args.publish_pointcloud, + args.update_topic) + node.listen() + + +if __name__ == '__main__': + main() diff --git a/projects/opendr_ws_2/README.md b/projects/opendr_ws_2/README.md index a7d55f3001..db19d17a40 100644 --- a/projects/opendr_ws_2/README.md +++ b/projects/opendr_ws_2/README.md @@ -83,6 +83,10 @@ Currently, apart from tools, opendr_ws_2 contains the following ROS2 nodes (cate 1. [RGBD Hand Gesture Recognition](src/opendr_perception/README.md#rgbd-hand-gesture-recognition-ros2-node) ## RGB + Audio input 1. [Audiovisual Emotion Recognition](src/opendr_perception/README.md#audiovisual-emotion-recognition-ros2-node) + +## RGB + IMU input +1. [Continual SLAM](src/opendr_perception//README.md#continual-slam-ros2-nodes) + ## Audio input 1. [Speech Command Recognition](src/opendr_perception/README.md#speech-command-recognition-ros2-node) 2. [Speech Transcription](src/opendr_perception/README.md#speech-transcription-ros2-node) diff --git a/projects/opendr_ws_2/src/opendr_bridge/opendr_bridge/bridge.py b/projects/opendr_ws_2/src/opendr_bridge/opendr_bridge/bridge.py index e598762d81..31ac7600c0 100644 --- a/projects/opendr_ws_2/src/opendr_bridge/opendr_bridge/bridge.py +++ b/projects/opendr_ws_2/src/opendr_bridge/opendr_bridge/bridge.py @@ -35,7 +35,13 @@ from geometry_msgs.msg import ( Pose2D, Point32 as Point32Msg, Quaternion as QuaternionMsg, Pose as Pose3D, - Point + Vector3Stamped as Vector3StampedMsg, + Point, + TransformStamped as TransformStampedMsg, +) +from visualization_msgs.msg import ( + Marker as MarkerMsg, + MarkerArray as MarkerArrayMsg, ) from opendr_interface.msg import OpenDRPose2D, OpenDRPose2DKeypoint, OpenDRPose3D, OpenDRPose3DKeypoint, OpenDRTranscription from sensor_msgs_py import point_cloud2 as pc2 @@ -53,18 +59,31 @@ class ROS2Bridge: def __init__(self): self._cv_bridge = CvBridge() - def to_ros_image(self, image: Image, encoding: str='passthrough') -> ImageMsg: + def to_ros_image(self, + image: Image, + encoding: str='passthrough', + frame_id: str = None, + time: str = None) -> ImageMsg: """ Converts an OpenDR image into a ROS2 image message :param image: OpenDR image to be converted :type image: engine.data.Image :param encoding: encoding to be used for the conversion (inherited from CvBridge) :type encoding: str + :param frame_id: frame id of the image + :type frame_id: str + :param time: time of the image + :type time: str :return: ROS2 image :rtype: sensor_msgs.msg.Image """ + header = Header() + if frame_id is not None: + header.frame_id = frame_id + if time is not None: + header.stamp = time # Convert from the OpenDR standard (CHW/RGB) to OpenCV standard (HWC/BGR) - message = self._cv_bridge.cv2_to_imgmsg(image.opencv(), encoding=encoding) + message = self._cv_bridge.cv2_to_imgmsg(image.opencv(), encoding=encoding, header=header) return message def from_ros_image(self, message: ImageMsg, encoding: str='passthrough') -> Image: @@ -410,7 +429,7 @@ def from_ros_point_cloud2(self, point_cloud: PointCloud2Msg): return result - def to_ros_point_cloud2(self, point_cloud: PointCloud, timestamp: str, channels: str=None): + def to_ros_point_cloud2(self, point_cloud: PointCloud, timestamp: str, channels: str=None, frame_id="base_link"): """ Converts an OpenDR PointCloud message into a ROS PointCloud2 @@ -424,7 +443,7 @@ def to_ros_point_cloud2(self, point_cloud: PointCloud, timestamp: str, channels: header = Header() header.stamp = timestamp - header.frame_id = "base_link" + header.frame_id = frame_id channel_count = point_cloud.data.shape[-1] - 3 @@ -733,6 +752,195 @@ def from_category_to_rosclass(self, prediction, timestamp, source_data=None): classification.source_img = source_data return classification + def to_ros_marker(self, + frame_id: str, + position: list, + id: int, + timestamp: str, + rgba: tuple = None) -> MarkerMsg: + """ + Creates ROS Marker message given positions x,y,z and frame_id. + :param frame_id: The frame_id of the marker. + :type frame_id: str + :param position: The position of the marker. + :type position: list + :param id: The id of the marker. + :type id: int + :param timestamp: The timestamp of the marker. + :type timestamp: str + :param rgba: The color of the marker. + :type rgba: tuple + :return: ROS Marker message. + :rtype: visualization_msgs.msg.Marker + """ + marker = MarkerMsg() + marker.header.frame_id = frame_id + marker.header.stamp = timestamp + marker.type = marker.SPHERE + marker.id = id + marker.action = marker.ADD + marker.pose.position.x = float(position[0]) + marker.pose.position.y = float(position[1]) + marker.pose.position.z = float(position[2]) + marker.pose.orientation.x = 0.0 + marker.pose.orientation.y = 0.0 + marker.pose.orientation.z = 0.0 + marker.pose.orientation.w = 1.0 + marker.scale.x = 1.0 + marker.scale.y = 1.0 + marker.scale.z = 1.0 + if rgba is not None: + marker.color.a = 1.0 + marker.color.r = float(rgba[0]/255) + marker.color.g = float(rgba[1]/255) + marker.color.b = float(rgba[2]/255) + else: + marker.color.a = 1.0 + marker.color.r = 1.0 + marker.color.g = 0.0 + marker.color.b = 0.0 + return marker + + def to_ros_marker_array(self, + position_list: list, + frame_id_list: list, + stamp: str, + rgba: tuple = None) -> MarkerArrayMsg: + """ + Creates ROS MarkerArray message given positions x,y,z and frame_id. + :param position_list: The list of positions of the markers. + :type position_list: list + :param frame_id_list: The list of frame_ids of the markers. + :type frame_id_list: list + :param stamp: The timestamp of the marker. + :type stamp: str + :param rgba: The color of the marker. + :type rgba: tuple + """ + marker_array = MarkerArrayMsg() + for i in range(len(position_list)): + marker_array.markers.append(self.to_ros_marker(frame_id_list[i], + position_list[i], + i, + stamp, + rgba)) + return marker_array + + def to_ros_vector3_stamped(self, + x: float, + y: float, + z: float, + frame_id: str, + time: str) -> Vector3StampedMsg: + """ + Creates a Vector3Stamped message given x,y,z coordinates and frame_id and time + :param x: The x coordinate of the vector. + :type x: float + :param y: The y coordinate of the vector. + :type y: float + :param z: The z coordinate of the vector. + :type z: float + :param frame_id: The frame_id of the vector. + :type frame_id: str + :param time: The time of the vector. + :type time: str + :return: ROS message with the vector. + :rtype: geometry_msgs.msg.Vector3Stamped + """ + + message = Vector3StampedMsg() + message.header.frame_id = frame_id + message.header.stamp = time + message.vector.x = x + message.vector.y = y + message.vector.z = z + + return message + + def from_ros_vector3_stamped(self, message: Vector3StampedMsg): + """ + Creates a Vector3Stamped message given x,y,z coordinates and frame_id and time + :param message: The ROS message to be converted. + :type message: geometry_msgs.msg.Vector3Stamped + :return: The frame_id and the vector. + :rtype: tuple + """ + + x = message.vector.x + y = message.vector.y + z = message.vector.z + + frame_id = message.header.frame_id + + return frame_id, [x, y, z] + + def to_ros_string(self, data: str) -> String: + """ + Creates a String message given data. + :param data: The data to be converted. + :type data: str + :return: ROS message with the data. + :rtype: std_msgs.msg.String + """ + + message = String() + message.data = data + + return message + + def from_ros_string(self, message: String): + """ + Creates a String message given data. + :param message: The ROS message to be converted. + :type message: std_msgs.msg.String + :return: The data. + :rtype: str + """ + + return message.data + + def to_ros_transformstamped(self, stamp, frame_id, child_frame_id, odometry): + """ + Creates a TransformStamped message given frame_id, child_frame_id and odometry. + """ + t = TransformStampedMsg() + t.header.stamp = stamp + t.header.frame_id = frame_id + t.child_frame_id = child_frame_id + t.transform.translation.x = -odometry[0, 3] + t.transform.translation.y = 0.0 + t.transform.translation.z = -odometry[2, 3] + + def quaternion_from_matrix(matrix): + q = np.empty((4, ), dtype=np.float64) + M = np.array(matrix, dtype=np.float64, copy=False)[:4, :4] + t = np.trace(M) + if t > M[3, 3]: + q[3] = t + q[2] = M[1, 0] - M[0, 1] + q[1] = M[0, 2] - M[2, 0] + q[0] = M[2, 1] - M[1, 2] + else: + i, j, k = 0, 1, 2 + if M[1, 1] > M[0, 0]: + i, j, k = 1, 2, 0 + if M[2, 2] > M[i, i]: + i, j, k = 2, 0, 1 + t = M[i, i] - (M[j, j] + M[k, k]) + M[3, 3] + q[i] = t + q[j] = M[i, j] + M[j, i] + q[k] = M[k, i] + M[i, k] + q[3] = M[k, j] - M[j, k] + q *= 0.5 / np.sqrt(t * M[3, 3]) + return q + + q = quaternion_from_matrix(odometry) + t.transform.rotation.x = q[0] + t.transform.rotation.y = q[1] + t.transform.rotation.z = q[2] + t.transform.rotation.w = q[3] + return t + def from_ros_transcription(self, ros_transcripton: OpenDRTranscription) -> VoskTranscription: """ Converts an OpenDRTranscription object to a VoskTranscription object. @@ -753,7 +961,7 @@ def to_ros_transcription(self, transcription: VoskTranscription) -> OpenDRTransc :type transcription: VoskTranscription. :return: An OpenDRTranscription object containing the same text as the input Transcription object. :rtype: OpenDRTranscription. - """ + """ ros_transcripton = OpenDRTranscription() if transcription.accept_waveform: ros_transcripton.final = transcription.text diff --git a/projects/opendr_ws_2/src/opendr_perception/README.md b/projects/opendr_ws_2/src/opendr_perception/README.md index 42176623c0..587051cda8 100755 --- a/projects/opendr_ws_2/src/opendr_perception/README.md +++ b/projects/opendr_ws_2/src/opendr_perception/README.md @@ -890,6 +890,61 @@ whose documentation can be found [here](../../../../docs/reference/audiovisual-e For viewing the output, refer to the [notes above.](#notes) +---- +## RGB + IMU input + +### Continual SLAM ROS2 Nodes +A ROS node for performing depth+position output mapping based on visual + imu input. Continual SLAM involves the use of two distinct ROS nodes, one dedicated to performing inference and the other exclusively focused on training. Both of the nodes are based on the learner class defined in [ContinualSLAMLearner](../../../../src/opendr/perception/continual_slam/continual_slam_learner.py). + +You can find the continual slam ROS node python scripts here [learner](./opendr_perception/continual_slam_learner_node.py), [predictor](./opendr_perception/continual_slam_predictor_node.py). You can further also find the RGB image + IMU publisher node [here](./opendr_perception/continual_slam_dataset_node.py). + +#### Instructions for basic usage: + +1. Download the KITTI Visual Odometry datased as it is described [here](../../../../src/opendr/perception/continual_slam/datasets/README.md). + +2. Decide on the frame rate FPS, then one can start the dataset publisher node using the following line: + ```shell + ros2 run opendr_perception continual_slam_dataset + ``` + + The following optional arguments are available: + - `-h or --help`: show a help message and exit + - `--dataset_path`: path to the dataset (default=`./kitti`) + - `--config_file_path`: path to the config file for learner class (default=`src/opendr/perception/continual_slam/configs/singlegpu_kitti.yaml`) + - `--output_image_topic OUTPUT_IMAGE_TOPIC`: topic to which we are publishing the RGB image (default=`/cl_slam/image`) + - `--output_distance_topic OUTPUT_DISTANCE_TOPIC`: topic to publish distances (default=`/cl_slam/distance`) + - `--dataset_fps FPS`: frame rate which the dataset will be published, (default=`3`) + +3. Start the Predictor Node + ```shell + ros2 run opendr_perception continual_slam_predictor + ``` + The following optional arguments are available: + - `-h or --help`: show a help message and exit + - `-c or --config_path`: path to the config file for the learner class (default=`src/opendr/perception/continual_slam/configs/singlegpu_kitti.yaml`) + - `-it or --input_image_topic`: input image topic, listened from Continual SLAM Dataset Node (default=`/cl_slam/image`) + - `-dt or --input_distance_topic`: input distance topic, listened from Continual SLAM Dataset Node (default=`/cl_slam/distance`) + - `-odt or --output_depth_topic`: output depth topic, published to visual output tools (default=`/opendr/predicted/image`) + - `-opt or --output_pose_topic`: output pose topic, published to visual output tools (default=`/opendr/predicted/pose`) + - `-ppcl or --publish_pointcloud`: boolean to decide whether pointcloud output is asked or not (default=`false`) + - `-opct or --output_pointcloud_topic`: output pointcloud topic, depending on `--publish_pointcloud`, published to visual output tools (default=`/opendr/predicted/pointcloud`) + - `-ut or --update_topic`: update topic, listened from Continual SLAM Dataset Node (default=`/cl_slam/update`) + +4. Start the Learner Node (Optional) + ```shell + ros2 run opendr_perception continual_slam_learner + ``` + The following optional arguments are available: + - `-h or --help`: show a help message and exit + - `-c or --config_path`: path to the config file for the learner class (default=`src/opendr/perception/continual_slam/configs/singlegpu_kitti.yaml`) + - `-it or --input_image_topic`: input image topic, listened from Continual SLAM Dataset Node (default=`/cl_slam/image`) + - `-dt or --input_distance_topic`: input distance topic, listened from Continual SLAM Dataset Node (default=`/cl_slam/distance`) + - `-ot or --output_weights_topic`: output weights topic to be published to Continual SLAM Predictor Node (default=`/cl_slam/update`) + - `-pr or --publish_rate`: publish rate of the weights (default=`20`) + - `-bs or --buffer_size`: size of the replay buffer (default=`10`) + - `-ss or --sample_size`: sample size of the replay buffer. If 0 is given, only online data is used (default=`3`) + - `-sm or --save_memory`: whether to save memory or not. Add it to the command if you want to write to disk (default=`True`) + ---- ## Audio input diff --git a/projects/opendr_ws_2/src/opendr_perception/opendr_perception/continual_slam_dataset_node.py b/projects/opendr_ws_2/src/opendr_perception/opendr_perception/continual_slam_dataset_node.py new file mode 100644 index 0000000000..2142520e92 --- /dev/null +++ b/projects/opendr_ws_2/src/opendr_perception/opendr_perception/continual_slam_dataset_node.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python +# Copyright 2020-2023 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import time +from pathlib import Path + +import rclpy +from rclpy.node import Node +from opendr_bridge import ROS2Bridge +from sensor_msgs.msg import Image as ROS_Image +from geometry_msgs.msg import Vector3Stamped as ROS_Vector3Stamped + +from opendr.perception.continual_slam.datasets.kitti import KittiDataset + + +class ContinualSlamDatasetNode(Node): + def __init__(self, + dataset_path: str, + config_file_path: str, + output_image_topic: str = "/opendr/dataset/image", + output_distance_topic: str = "/opendr/imu/distance", + dataset_fps: float = 0.1): + super().__init__("opendr_continual_slam_dataset_node") + """ + Creates a ROS Node for publishing dataset images + :param dataset_path: Path to the dataset + :type dataset_path: str + :param config_file_path: Path to the config file + :type config_file_path: str + :param output_image_topic: Topic to publish images to + :type output_image_topic: str + :param output_distance_topic: Topic to publish distance to + :type output_distance_topic: str + :param dataset_fps: Dataset FPS + :type dataset_fps: float + """ + + self.dataset_path = dataset_path + self.config_file_path = config_file_path + self.bridge = ROS2Bridge() + self.delay = 1.0 / dataset_fps + + self.output_image_topic = output_image_topic + self.output_distance_topic = output_distance_topic + + def start(self): + """ + Runs the node + """ + self._init_publisher() + if self._init_dataset(): + self._publish() + + # Auxiliary functions + def _init_publisher(self): + """ + Initializes the publishers + """ + self.output_image_publisher = self.create_publisher(ROS_Image, + self.output_image_topic, + 10) + self.output_distance_publisher = self.create_publisher(ROS_Vector3Stamped, + self.output_distance_topic, + 10) + + def _init_dataset(self): + """ + Initializes the dataset + """ + env = os.getenv("OPENDR_HOME") + config_file_path = os.path.join(env, self.config_file_path) + if not Path(config_file_path).exists(): + raise FileNotFoundError("Config file not found") + try: + self.dataset = KittiDataset(self.dataset_path, config_file_path) + return True + except FileNotFoundError: + self.get_logger().error("Dataset path is incorrect. Please check the path") + return False + + def _publish(self): + """ + Publishes the dataset images and distances + """ + self.get_logger().info("Start publishing dataset images") + i = 0 + while rclpy.ok(): + if i == len(self.dataset)-2: + break + data = self.dataset[i][0] + # Data is in format of {"image_id" : (image, velocity, distance)} for 3 past frames + image_ids = list(data.keys()) + if len(data) < 3: + i += 1 + continue + image_t0, distance_t0 = data[image_ids[0]] + + stamp = self.get_clock().now().to_msg() + # Convert image to ROS Image + image = self.bridge.to_ros_image(image=image_t0, frame_id=image_ids[0], time=stamp) + # Convert velocity to ROS Vector3Stamped + distance = self.bridge.to_ros_vector3_stamped(distance_t0, 0.0, 0.0, image_ids[0], time=stamp) + # Publish the image and distance + self.output_image_publisher.publish(image) + self.output_distance_publisher.publish(distance) + + self.get_logger().info("Published image {}".format(image_ids[0])) + self.get_logger().info("Published distance {}".format([distance_t0])) + i += 1 + time.sleep(self.delay) + + +def main(args=None): + rclpy.init(args=args) + + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("--dataset_path", type=str, default="/home/canakcia/Desktop/kitti_dset/", + help="Path to the dataset") + parser.add_argument("--config_file_path", type=str, + default="src/opendr/perception/continual_slam/configs/singlegpu_kitti.yaml", + help="Path to the config file") + parser.add_argument("--output_image_topic", type=str, default="/cl_slam/image", + help="ROS topic to publish images") + parser.add_argument("--output_distance_topic", type=str, default="/cl_slam/distance", + help="ROS topic to publish distances") + parser.add_argument("--dataset_fps", type=float, default=3, + help="Dataset frame rate") + args = parser.parse_args() + + node = ContinualSlamDatasetNode(args.dataset_path, + args.config_file_path, + args.output_image_topic, + args.output_distance_topic, + args.dataset_fps) + node.start() + + +if __name__ == "__main__": + main() diff --git a/projects/opendr_ws_2/src/opendr_perception/opendr_perception/continual_slam_learner_node.py b/projects/opendr_ws_2/src/opendr_perception/opendr_perception/continual_slam_learner_node.py new file mode 100644 index 0000000000..79f59461b7 --- /dev/null +++ b/projects/opendr_ws_2/src/opendr_perception/opendr_perception/continual_slam_learner_node.py @@ -0,0 +1,296 @@ +#!/usr/bin/env python3 +# Copyright 2020-2023 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import message_filters +import rclpy +from rclpy.node import Node +import os + +from opendr_bridge import ROS2Bridge +from opendr.perception.continual_slam.continual_slam_learner import ContinualSLAMLearner +from opendr.perception.continual_slam.algorithm.depth_pose_module.replay_buffer import ReplayBuffer + +from sensor_msgs.msg import Image as ROS_Image +from geometry_msgs.msg import Vector3Stamped as ROS_Vector3Stamped +from std_msgs.msg import String as ROS_String + + +class ContinualSlamLearner(Node): + def __init__(self, + path: str, + input_image_topic: str, + input_distance_topic: str, + output_weights_topic: str, + publish_rate: int = 20, + buffer_size: int = 1000, + save_memory: bool = True, + sample_size: int = 3): + super().__init__("opendr_continual_slam_learner_node") + """ + Continual SLAM learner node. This node is responsible for training and updating the predictor. + :param path: Path to the folder where the model will be saved. + :type path: str + :param input_image_topic: ROS topic where the input images are published. + :type input_image_topic: str + :param input_distance_topic: ROS topic where the input distances are published. + :type input_distance_topic: str + :param output_weights_topic: ROS topic where the output weights are published. + :type output_weights_topic: str + :param publish_rate: Rate at which the output weights are published. + :type publish_rate: int + :param buffer_size: Size of the replay buffer. + :type buffer_size: int + :param save_memory: If True, the replay buffer will be saved to memory. + :type save_memory: bool + :param sample_size: Number of samples (triplets from replay buffer) that will be used for training. + :type sample_size: int + """ + self.bridge = ROS2Bridge() + self.publish_rate = publish_rate + self.buffer_size = buffer_size + self.save_memory = save_memory + self.sample_size = sample_size + + self.input_image_topic = input_image_topic + self.input_distance_topic = input_distance_topic + self.output_weights_topic = output_weights_topic + + self.path = path + self.learner = None + self.sequence = None + + self.do_publish = 0 + + # Create caches + self.cache = {"image": [], + "distance": [], + "id": []} + + def callback(self, image: ROS_Image, distance: ROS_Vector3Stamped): + """ + Callback method of predictor node. + :param image: Input image as a ROS message + :type ROS_Image + :param distance: Distance to the object as a ROS message + :type ROS_Vector3Stamped + """ + # Data preprocessing + image = self.bridge.from_ros_image(image) + frame_id, distance = self.bridge.from_ros_vector3_stamped(distance) + incoming_sequence = frame_id.split("_")[0] + distance = distance[0] + + # Clear cache if new sequence is detected + if self.sequence is None: + self.sequence = incoming_sequence + if self.sequence != incoming_sequence: + self._clean_cache() + self.sequence = incoming_sequence + + self._cache_arriving_data(image, distance, frame_id) + + # If cache is not full, return + if len(self.cache['image']) < 3: + return + + # Add triplet to replay buffer and sample + item = self._convert_cache_into_triplet() + if self.sample_size > 0: + self.replay_buffer.add(item) + item = ContinualSLAMLearner._input_formatter(item) + if len(self.replay_buffer) < self.sample_size: + return + batch = self.replay_buffer.sample() + # Concatenate the current triplet with the sampled batch + batch.insert(0, item) + else: + item = ContinualSLAMLearner._input_formatter(item) + batch = [item] + + # Train learner + self.learner.fit(batch, learner=True) + + # Publish new weights + if self.do_publish % self.publish_rate == 0: + message = self.learner.save() + self.get_logger().info(f"CL-SLAM learner publishing new weights, currently in the frame {frame_id}\n") + ros_message = self.bridge.to_ros_string(message) + self.output_weights_publisher.publish(ros_message) + self.do_publish += 1 + + def listen(self): + """ + Start the node and begin processing input data. The order of the function calls ensures that the node does not + try to process input images without being in a trained state. + """ + if self._init_learner() and self._init_replay_buffer(): + self._init_publisher() + self._init_subscribers() + self.get_logger().info("Continual SLAM node started.") + rclpy.spin(self) + + # Destroy the node explicitly + # (optional - otherwise it will be done automatically + # when the garbage collector destroys the node object) + self.destroy_node() + rclpy.shutdown() + + # Auxiliary functions + def _init_subscribers(self): + """ + Initializing subscribers. Here we also do synchronization between two ROS topics. + """ + self.input_image_subscriber = message_filters.Subscriber( + self, ROS_Image, self.input_image_topic) + self.input_distance_subscriber = message_filters.Subscriber( + self, ROS_Vector3Stamped, self.input_distance_topic) + self.ts = message_filters.TimeSynchronizer([self.input_image_subscriber, + self.input_distance_subscriber], 10) + self.ts.registerCallback(self.callback) + + def _init_publisher(self): + """ + Initializing publishers. + """ + self.output_weights_publisher = self.create_publisher(ROS_String, + self.output_weights_topic, + 10) + + def _init_learner(self): + """ + Creating a ContinualSLAMLearner instance with predictor and ros mode + """ + env = os.getenv('OPENDR_HOME') + path = os.path.join(env, self.path) + try: + self.learner = ContinualSLAMLearner(path, mode="learner", ros=False) + return True + except Exception: + self.get_logger().error("Continual SLAM node failed to initialize, due to learner initialization error.") + return False + + def _init_replay_buffer(self): + """ + Creating a replay buffer instance + """ + env = os.getenv('OPENDR_HOME') + path = os.path.join(env, self.path) + try: + self.replay_buffer = ReplayBuffer(buffer_size=self.buffer_size, + save_memory=self.save_memory, + dataset_config_path=path, + sample_size=self.sample_size) + return True + except Exception: + self.get_logger().error("Continual SLAM node failed to initialize, due to replay buffer initialization error.") + if self.sample_size > 0: + return False + else: + return True + + def _clean_cache(self): + """ + Clearing the cache. + """ + for key in self.cache.keys(): + self.cache[key].clear() + + def _cache_arriving_data(self, image, distance, frame_id): + """ + Caching the arriving data. + :param image: Input image as a ROS message + :type ROS_Image + :param distance: Distance to the object as a ROS message + :type ROS_Vector3Stamped + :param frame_id: Frame id of the arriving data + :type int + """ + self.cache["image"].append(image) + self.cache["distance"].append(distance) + self.cache["id"].append(frame_id) + if len(self.cache['image']) > 3: + self.cache["image"].pop(0) + self.cache["distance"].pop(0) + self.cache["id"].pop(0) + + def _convert_cache_into_triplet(self) -> dict: + """ + Converting the cache into a triplet. + """ + triplet = {} + for i in range(len(self.cache["image"])): + triplet[self.cache['id'][i]] = (self.cache["image"][i], self.cache["distance"][i]) + return triplet + + +def main(args=None): + rclpy.init(args=args) + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('-c', + '--config_path', + type=str, + default='src/opendr/perception/continual_slam/configs/singlegpu_kitti.yaml', + help='Path to the config file') + parser.add_argument('-it', + '--input_image_topic', + type=str, + default='/cl_slam/image', + help='Input image topic, listened from Continual SLAM Dataset Node') + parser.add_argument('-dt', + '--input_distance_topic', + type=str, + default='/cl_slam/distance', + help='Input distance topic, listened from Continual SLAM Dataset Node') + parser.add_argument('-ot', + '--output_weights_topic', + type=str, + default='/cl_slam/update', + help='Output weights topic, published to Continual SLAM Predictor Node') + parser.add_argument('-pr', + '--publish_rate', + type=int, + default=20, + help='Publish rate of the weights') + parser.add_argument('-bs', + '--buffer_size', + type=int, + default=10, + help='Size of the replay buffer') + parser.add_argument('-ss', + '--sample_size', + type=int, + default=3, + help='Sample size of the replay buffer. If 0 is given, only online data is used') + parser.add_argument('-sm', + '--save_memory', + action='store_false', + default=True, + help='Whether to save memory or not. Add it to the command if you want to write to disk') + args = parser.parse_args() + + node = ContinualSlamLearner(args.config_path, + args.input_image_topic, + args.input_distance_topic, + args.output_weights_topic, + args.publish_rate, + args.buffer_size, + args.save_memory, + args.sample_size) + node.listen() + + +if __name__ == '__main__': + main() diff --git a/projects/opendr_ws_2/src/opendr_perception/opendr_perception/continual_slam_predictor_node.py b/projects/opendr_ws_2/src/opendr_perception/opendr_perception/continual_slam_predictor_node.py new file mode 100644 index 0000000000..90bbefd2c7 --- /dev/null +++ b/projects/opendr_ws_2/src/opendr_perception/opendr_perception/continual_slam_predictor_node.py @@ -0,0 +1,356 @@ +#!/usr/bin/env python3 +# Copyright 2020-2023 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import numpy as np +from pathlib import Path +import os + +from opendr.perception.continual_slam.continual_slam_learner import ContinualSLAMLearner + +import message_filters +import rclpy +from rclpy.node import Node +from sensor_msgs.msg import Image as ROS_Image +from geometry_msgs.msg import Vector3Stamped as ROS_Vector3Stamped +from visualization_msgs.msg import MarkerArray as ROS_MarkerArray +from std_msgs.msg import String as ROS_String +from opendr_bridge import ROS2Bridge +import tf2_ros +from sensor_msgs.msg import PointCloud2 as ROS_PointCloud2 + + +class ContinualSlamPredictor(Node): + def __init__(self, + path: Path, + input_image_topic: str, + input_distance_topic: str, + output_depth_topic: str, + output_pose_topic: str, + output_pointcloud_topic: str, + publish_pointcloud: bool, + update_topic: str): + super().__init__("opendr_continual_slam_predictor_node") + """ + Continual SLAM predictor node. This node is responsible for publishing predicted pose and depth outputs. + :param path: Path to the folder where the model will be saved. + :type path: str + :param input_image_topic: ROS topic where the input image is published. + :type input_image_topic: str + :param input_distance_topic: ROS topic where the input distance is published. + :type input_distance_topic: str + :param output_depth_topic: ROS topic where the output depth will be published. + :type output_depth_topic: str + :param output_pose_topic: ROS topic where the output pose will be published. + :type output_pose_topic: str + :param output_pointcloud_topic: ROS topic where the output pointcloud will be published. + :type output_pointcloud_topic: str + :param publish_pointcloud: Whether to publish the pointcloud or not. + :type publish_pointcloud: bool + :param update_topic: ROS topic where the update signal is published. + :type update_topic: str + """ + self.bridge = ROS2Bridge() + + self.input_image_topic = input_image_topic + self.input_distance_topic = input_distance_topic + self.output_depth_topic = output_depth_topic + self.output_pose_topic = output_pose_topic + if publish_pointcloud: + self.output_pointcloud_topic = output_pointcloud_topic + else: + self.output_pointcloud_topic = None + self.update_topic = update_topic + + self.path = path + self.predictor = None + self.sequence = None + self.color = None + self.frame_id = None + + # Create caches + self.cache = { + "image": [], + "distance": [], + "id": [], + "marker_position": [], + "marker_frame_id": []} + + def callback(self, image: ROS_Image, distance: ROS_Vector3Stamped): + """ + Callback method of predictor node. + :param image: Input image as a ROS message + :type ROS_Image + :param distance: Distance to the object as a ROS message + :type ROS_Vector3Stamped + """ + # Data-preprocessing + stamp = self.get_clock().now().to_msg() + image = self.bridge.from_ros_image(image) + frame_id, distance = self.bridge.from_ros_vector3_stamped(distance) + self.frame_id = frame_id + incoming_sequence = frame_id.split("_")[0] + distance = distance[0] + + self._check_sequence(incoming_sequence) + self._cache_arriving_data(image, distance, frame_id) + triplet = self._convert_cache_into_triplet() + + # If triplet is not ready, return + if len(triplet) < 3: + return + # Infer depth and pose + if self.output_pointcloud_topic: + depth, raw_depth, pose_graph = self._infer(triplet, return_raw_depth=True, pointcloud=True) + odometry = pose_graph.return_last_poses(n=1)[0] + pointcloud = self.predictor.visualize_3d(image, raw_depth) + pointcloud = self.bridge.to_ros_point_cloud2(pointcloud, stamp, 'rgb', 'car') + tf = self.bridge.to_ros_transformstamped(stamp=stamp, + frame_id="world", + child_frame_id="car", + odometry=odometry) + else: + depth, _ = self._infer(triplet) + if depth is None: + return + rgba = (self.color[0], self.color[1], self.color[2], 1.0) + marker_list = self.bridge.to_ros_marker_array(self.cache['marker_position'], + self.cache['marker_frame_id'], + stamp, + rgba) + + depth = self.bridge.to_ros_image(depth) + self.output_depth_publisher.publish(depth) + self.output_pose_publisher.publish(marker_list) + if self.output_pointcloud_topic: + self.tf_broadcaster.sendTransform(tf) + self.pointcloud_publisher.publish(pointcloud) + + def update(self, message: ROS_String): + """ + Update the predictor with the new data + :param message: ROS message + :type ROS_Byte + """ + message = self.bridge.from_ros_string(message) + self.get_logger().info(f"CL-SLAM predictor is currently updating its weights from {message}.\n" + f"Last predicted frame id is {self.frame_id}\n") + self.predictor.load(weights_folder=message) + + def listen(self): + """ + Start the node and begin processing input data. The order of the function calls ensures that the node does not + try to process input images without being in a trained state. + """ + if self._init_predictor(): + self._init_publisher() + self._init_subscribers() + self.get_logger().info("Continual SLAM node started.") + rclpy.spin(self) + + # Destroy the node explicitly + # (optional - otherwise it will be done automatically + # when the garbage collector destroys the node object) + self.destroy_node() + rclpy.shutdown() + + # Auxiliary functions + def _init_subscribers(self): + """ + Initializing subscribers. Here we also do synchronization between two ROS topics. + """ + self.input_image_subscriber = message_filters.Subscriber( + self, ROS_Image, self.input_image_topic) + self.input_distance_subscriber = message_filters.Subscriber( + self, ROS_Vector3Stamped, self.input_distance_topic) + self.ts = message_filters.TimeSynchronizer([self.input_image_subscriber, + self.input_distance_subscriber], 10) + self.ts.registerCallback(self.callback) + + self.update_subscriber = self.create_subscription(ROS_String, self.update_topic, self.update, 1) + + def _init_publisher(self): + """ + Initializing publishers. + """ + self.output_depth_publisher = self.create_publisher( + ROS_Image, self.output_depth_topic, 10) + self.output_pose_publisher = self.create_publisher( + ROS_MarkerArray, self.output_pose_topic, 10) + if self.output_pointcloud_topic: + self.tf_broadcaster = tf2_ros.TransformBroadcaster(self) + self.pointcloud_publisher = self.create_publisher( + ROS_PointCloud2, self.output_pointcloud_topic, 10) + + def _init_predictor(self): + """ + Creating a ContinualSLAMLearner instance with predictor and ros mode + """ + env = os.getenv('OPENDR_HOME') + path = os.path.join(env, self.path) + try: + self.predictor = ContinualSLAMLearner(path, mode="predictor", ros=False, do_loop_closure=True) + return True + except Exception: + self.get_logger().error("Continual SLAM node failed to initialize, due to predictor initialization error.") + return False + + def _clean_cache(self): + """ + Cleaning the cache + """ + for key in self.cache.keys(): + self.cache[key].clear() + + def _cache_arriving_data(self, image, distance, frame_id): + """ + Caching arriving data + :param image: Input image + :type image: np.ndarray + :param distance: Distance to the object + :type distance: float + :param frame_id: Frame id + :type frame_id: str + """ + # Cache the arriving last 3 data + self.cache['image'].append(image) + self.cache['distance'].append(distance) + self.cache['id'].append(frame_id) + if len(self.cache['image']) > 3: + self.cache['image'].pop(0) + self.cache['distance'].pop(0) + self.cache['id'].pop(0) + + def _convert_cache_into_triplet(self) -> dict: + """ + Converting the cache into a triplet + """ + triplet = {} + for i in range(len(self.cache['image'])): + triplet[self.cache['id'][i]] = (self.cache['image'][i], self.cache['distance'][i]) + return triplet + + def _check_sequence(self, incoming_sequence): + """ + Checking if the new sequence is detected + :param incoming_sequence: Incoming sequence + :type incoming_sequence: str + """ + # If new sequence is detected, clean the cache + if self.sequence is None: + self.sequence = incoming_sequence + self.color = list(np.random.choice(range(256), size=3)) + if self.sequence != incoming_sequence: + self._clean_cache() + self.predictor._reset() + self.sequence = incoming_sequence + self.color = list(np.random.choice(range(256), size=3)) + + def _infer(self, triplet: dict, return_raw_depth=False, pointcloud=False): + """ + Infer the triplet + :param triplet: Triplet + :type triplet: dict + :param return_raw_depth: Return raw depth + :type return_raw_depth: bool + :param pointcloud: Return pointcloud + :type pointcloud: bool + :return: Depth iimage, odometry, optional raw depth image + :rtype: np.ndarray, np.ndarray, np.ndarray + """ + depth, odometry, _, lc, pose_graph = self.predictor.infer(triplet, pointcloud=pointcloud) + if not lc: + points = pose_graph.return_last_positions(n=5) + if not len(points): + return None + for point in points: + position = [-point[0], 0.0, -point[2]] + + self.cache["marker_position"].append(position) + self.cache["marker_frame_id"].append("world") + if self.color is None: + self.color = [255, 0, 0] + else: + self.cache["marker_position"].clear() + self.cache["marker_frame_id"].clear() + points = pose_graph.return_all_positions() + for point in points: + position = [-point[0], 0.0, -point[2]] + self.cache["marker_position"].append(position) + self.cache["marker_frame_id"].append("world") + if return_raw_depth: + depth, raw_depth = depth + return depth, raw_depth, pose_graph + return depth, odometry + + +def main(args=None): + rclpy.init(args=args) + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument('-c', + '--config_path', + type=str, + default='src/opendr/perception/continual_slam/configs/singlegpu_kitti.yaml', + help='Path to the config file') + parser.add_argument('-it', + '--input_image_topic', + type=str, + default='/cl_slam/image', + help='Input image topic, listened from Continual SLAM Dataset Node') + parser.add_argument('-dt', + '--input_distance_topic', + type=str, + default='/cl_slam/distance', + help='Input distance topic, listened from Continual SLAM Dataset Node') + parser.add_argument('-odt', + '--output_depth_topic', + type=str, + default='/opendr/predicted/image', + help='Output depth topic, published to Continual SLAM Dataset Node') + parser.add_argument('-opt', + '--output_pose_topic', + type=str, + default='/opendr/predicted/pose', + help='Output pose topic, published to Continual SLAM Dataset Node') + parser.add_argument('-ppcl', + '--publish_pointcloud', + action='store_true', + help='Publish pointcloud to ROS') + parser.add_argument('-opct', + '--output_pointcloud_topic', + type=str, + default='/opendr/predicted/pointcloud', + help='Output pointcloud topic, published to Continual SLAM Dataset Node') + parser.add_argument('-ut', + '--update_topic', + type=str, + default='/cl_slam/update', + help='Update topic, listened from Continual SLAM Dataset Node') + args = parser.parse_args() + + node = ContinualSlamPredictor(args.config_path, + args.input_image_topic, + args.input_distance_topic, + args.output_depth_topic, + args.output_pose_topic, + args.output_pointcloud_topic, + args.publish_pointcloud, + args.update_topic) + node.listen() + + +if __name__ == '__main__': + main() diff --git a/projects/opendr_ws_2/src/opendr_perception/setup.py b/projects/opendr_ws_2/src/opendr_perception/setup.py index fb51f4fad9..a7b3943aaf 100644 --- a/projects/opendr_ws_2/src/opendr_perception/setup.py +++ b/projects/opendr_ws_2/src/opendr_perception/setup.py @@ -56,6 +56,9 @@ 'binary_high_resolution = opendr_perception.binary_high_resolution_node:main', 'continual_skeleton_based_action_recognition = \ opendr_perception.continual_skeleton_based_action_recognition_node:main', + 'continual_slam_dataset = opendr_perception.continual_slam_dataset_node:main', + 'continual_slam_predictor = opendr_perception.continual_slam_predictor_node:main', + 'continual_slam_learner = opendr_perception.continual_slam_learner_node:main', 'gesture_recognition = opendr_perception.gesture_recognition_node:main', 'performance = opendr_perception.performance_node:main', 'speech_transcription = opendr_perception.speech_transcription_node:main', diff --git a/projects/python/perception/continual_slam/README.md b/projects/python/perception/continual_slam/README.md new file mode 100644 index 0000000000..9e89e805d8 --- /dev/null +++ b/projects/python/perception/continual_slam/README.md @@ -0,0 +1,13 @@ +# Continual SLAM Demo + +The file [example_usage](./example_usage.py) contains code snippets explaining the following use cases: + +1. Download the provided pre-trained model weights. + +2. Prepare the supported datasets (SemanticKITTI) after having downloaded the raw files. See the [datasets' readme](../../../../../src/opendr/perception/continual_slam/datasets/README.md) for further details. + +3. Train the model. + +4. Run inference + +To increase the usability, we have added download method for both pre-trained model weights and test data which we provide on our server. For reference, we are publishing depth maps for respective frames; however, CL-SLAM is mainly written for ROS usage (2 headed nodes), and for more reference please have a look at our [ROS](/projects/opendr_ws/README.md) and [ROS2](/projects/opendr_ws_2/README.md) nodes. You can find more information about CL-SLAM usage in there nodes. \ No newline at end of file diff --git a/projects/python/perception/continual_slam/example_usage.py b/projects/python/perception/continual_slam/example_usage.py new file mode 100644 index 0000000000..ca80320e33 --- /dev/null +++ b/projects/python/perception/continual_slam/example_usage.py @@ -0,0 +1,104 @@ +# Copyright 2020-2023 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import cv2 +import matplotlib.pyplot as plt +import numpy as np + +from opendr.perception.continual_slam.continual_slam_learner import ContinualSLAMLearner +from opendr.perception.continual_slam.datasets.kitti import KittiDataset +from opendr.perception.continual_slam.algorithm.depth_pose_module.replay_buffer import ReplayBuffer + + +def download_models(root_path): + if os.path.exists(root_path): + return os.path.join(root_path, 'models') + model_path = ContinualSLAMLearner.download(path=root_path, trained_on="cityscapes") + return model_path + + +def download_test_data(root_path): + if os.path.exists(root_path): + return os.path.join(root_path, 'test_data', 'infer_data') + dataset_path = ContinualSLAMLearner.download(path=root_path, mode="test_data") + return dataset_path + + +def load_model(model_path, model: ContinualSLAMLearner): + model.load(model_path) + return model + + +def train(dataset_path, config_file, model_path): + dataset = KittiDataset(str(dataset_path), config_file) + learner = ContinualSLAMLearner(config_file, mode="learner") + learner = load_model(model_path, learner) + replay_buffer = ReplayBuffer(buffer_size=5, + save_memory=True, + dataset_config_path=config_file, + sample_size=3) + for item in dataset: + replay_buffer.add(item) + if replay_buffer.count < 3: + continue + item = ContinualSLAMLearner._input_formatter(item) + sample = replay_buffer.sample() + sample.insert(0, item) + learner.fit(sample, learner=True) + return (learner.save("./cl_slam/trained_model/")) + + +def inference(dataset_path, config_file, model_path=None): + dataset = KittiDataset(str(dataset_path), config_file) + predictor = ContinualSLAMLearner(config_file, mode="predictor") + load_model(model_path, predictor) + predictor.do_loop_closure = True + os.makedirs('./cl_slam/predictions', exist_ok=True) + for i, item in enumerate(dataset): + depth, _, _, _, pose_graph = predictor.infer(item, return_losses=True) + cv2.imwrite(f'./cl_slam/predictions/depth_{i}.png', depth.opencv()) + poses = pose_graph.return_all_positions() + poses = np.delete(np.array(poses), 1, 1) + hfont = {'fontname': 'Helvetica Neue'} + plt.scatter(-poses[:, 0], -poses[:, 1], c='r', marker='o') + plt.xlabel('x [m]', **hfont) + plt.ylabel('z [m]', **hfont) + plt.title('Pose Graph for the Inference on test data') + plt.savefig('./cl_slam/predictions/pose_graph.png') + + +def main(): + env = os.getenv('OPENDR_HOME') + config_file = os.path.join(env, 'src/opendr/perception/continual_slam/configs/singlegpu_kitti.yaml') + + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('-d', '--data_root', type=str, default='./cl_slam/data', + help='To specify the path to the data root directory, where models are stored') + parser.add_argument('-m', '--model_root', type=str, default='./cl_slam/model', + help='To specify the path where models are going to be downloaded stored') + + args = parser.parse_args() + + model_path = download_models(args.model_root) + data_path = download_test_data(args.data_root) + trained_model = train(data_path, config_file, model_path) + print('-' * 40 + '\n===> Training succeeded\n' + '-' * 40) + inference(data_path, config_file, trained_model) + print('-' * 40 + '\n===> Inference succeeded\n' + '-' * 40) + + +if __name__ == "__main__": + main() diff --git a/src/opendr/perception/continual_slam/README.md b/src/opendr/perception/continual_slam/README.md new file mode 100644 index 0000000000..dbbbd9cc27 --- /dev/null +++ b/src/opendr/perception/continual_slam/README.md @@ -0,0 +1,91 @@ +'# Continual Learning + +Continual Learning is a machine learning paradigm enabling a model to learn tasks consecutively without forgetting previously learned knowledge or skills. +In particular, online continual learning operates on a continuous stream of data, i.e., without requiring access to all the data at once. +The goal is to maintain and improve the model's performance over time as it encounters new tasks or domains. + +## Modules + +### Continual SLAM: Beyond Lifelong Simultaneous Localization and Mapping Through Continual Learning + +For the task of simultaneous localization and mapping (SLAM), Continual SLAM has been included in the OpenDR toolkit. +CL-SLAM leveraging a dual-network architecture to both adapt to new environments and retain knowledge with respect to previously visited environments. +Two separate network heads create predictions for depth estimation and odometry, respectively. +The final combination model creates a 3D pointcloud mapping of the environment based on visual odometry input solely. + +Website: http://continual-slam.cs.uni-freiburg.de
+Arxiv: https://arxiv.org/abs/2203.01578
+GitHub repository: https://github.com/robot-learning-freiburg/CL-SLAM + +**BibTeX**: +```bibtex +@InProceedings{voedisch2023clslam, + author="V{\"o}disch, Niclas and Cattaneo, Daniele and Burgard, Wolfram and Valada, Abhinav", + editor="Billard, Aude and Asfour, Tamim and Khatib, Oussama", + title="Continual SLAM: Beyond Lifelong Simultaneous Localization and Mapping Through Continual Learning", + booktitle="Robotics Research", + year="2023", + publisher="Springer Nature Switzerland", + address="Cham", + pages="19--35", +} +``` + +**Base repositories** + +The OpenDR implementation extends the [Continual SLAM repository](https://github.com/robot-learning-freiburg/CL-SLAM), from [Niclas Vödisch](http://www.informatik.uni-freiburg.de/~voedisch), with the OpenDR interface. + +Please note that the original repository is heavily based on +- [monodepthv2](https://github.com/nianticlabs/monodepth2) by the [Niantic Labs](https://www.nianticlabs.com/) authored by [Clément Godard](http://www0.cs.ucl.ac.uk/staff/C.Godard/) + +#### Example Usage + +More code snippets can be found in [example_usage.py](../../../../projects/python/perception/continual_slam/example_usage.py) with the corresponding [readme](../../../../projects/python/perception/continual_slam/README.md). + +Below, you can see an example of published PointClouds from ROS nodes, where the visual input is published with respect to depth estimation. +![](continual_slam_pcl.png) + +**Prepare the downloaded SemanticKITTI dataset** (see the [datasets' readme](./datasets/README.md) as well) +```python +from opendr.perception.continual_slam.datasets import KittiDataset +DOWNLOAD_PATH = '~/data/kitti' +DATA_ROOT = '~/data/kitti' +KittiDataset.prepare_data(DOWNLOAD_PATH, DATA_ROOT) +``` + +**Run inference** +```python +from opendr.perception.continual_slam.datasets import KittiDataset +from opendr.perception.continual_slam import ContinualSLAMLearner +DATA_ROOT = '~/data/kitti' +config_file = 'configs/singlegpu_kitti.yaml' # stored in continual_slam/configs +predictor = ContinualSLAMLearner(config_file, mode="predictor") # Adapt the path in config folder to load the pretrained model +dataset = KittiDataset(DATA_ROOT, config_file) +for batch in dataset: + predictions = predictor.infer(batch) +``` + +**Run training** +```python +from opendr.perception.continual_slam.datasets import KittiDataset +from opendr.perception.continual_slam import ContinualSLAMLearner +from opendr.perception.continual_slam.algorithm.depth_pose_module.replay_buffer import ReplayBuffer + +DATA_ROOT = '~/data/kitti' +config_file = 'configs/singlegpu_kitti.yaml' # stored in continual_slam/configs +dataset = KittiDataset(DATA_ROOT, config_file) +predictor = ContinualSLAMLearner(config_file, mode="learner") # Adapt the path in config folder to load the pretrained model + +replay_buffer = ReplayBuffer(buffer_size=5, + save_memory=True, + dataset_config_path=config_file, + sample_size=3) +for triplet in dataset: + replay_buffer.add(triplet) + if replay_buffer.size < 3: + continue + triplet = ContinualSLAMLearner._input_formatter(triplet) + batch = replay_buffer.sample() + batch.insert(0, triplet) + learner.fit(batch, learner=True) +``` diff --git a/src/opendr/perception/continual_slam/__init__.py b/src/opendr/perception/continual_slam/__init__.py new file mode 100644 index 0000000000..7650249a0a --- /dev/null +++ b/src/opendr/perception/continual_slam/__init__.py @@ -0,0 +1,3 @@ +from opendr.perception.continual_slam.continual_slam_learner import ContinualSLAMLearner + +__all__ = ['ContinualSLAMLearner'] diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/__init__.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/__init__.py new file mode 100644 index 0000000000..707e87db9f --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/__init__.py @@ -0,0 +1,3 @@ +from opendr.perception.continual_slam.algorithm.depth_pose_module.depth_pose_module import DepthPoseModule + +__all__ = ['DepthPoseModule'] diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/depth_pose_module.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/depth_pose_module.py new file mode 100644 index 0000000000..d79f2e1417 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/depth_pose_module.py @@ -0,0 +1,684 @@ +from typing import Any, Dict, Optional, Tuple, Union + +import math +import torch +import numpy as np +from torch import nn, optim, Tensor +import torch.nn.functional as F + +from opendr.perception.continual_slam.algorithm.depth_pose_module.networks import ( + ResnetEncoder, + DepthDecoder, + PoseDecoder, + ) +from opendr.perception.continual_slam.algorithm.depth_pose_module.networks.layers import ( + SSIM, + BackProjectDepth, + Project3D, + ) +from opendr.perception.continual_slam.algorithm.depth_pose_module.losses import ( + compute_reprojection_loss, + compute_smooth_loss, + compute_velocity_loss, +) +from opendr.perception.continual_slam.algorithm.parsing.config import Config +from opendr.perception.continual_slam.algorithm.parsing.dataset_config import DatasetConfig +from opendr.perception.continual_slam.algorithm.depth_pose_module.utils import ( + transformation_from_parameters, + disp_to_depth, +) +from opendr.perception.continual_slam.datasets import KittiDataset + + +class DepthPoseModule: + """ + This class implements the DepthPosePredictor algorithm. + """ + def __init__(self, config: Config, dataset_config: DatasetConfig, use_online: bool = False, mode: str = 'predictor'): + + # Parse the configuration parameters + self._parse_configs(config, dataset_config) + + # Set the configuration parameters from input + self.use_online = use_online + self.mode = mode + + # Set static parameters + self.num_pose_frames = 2 + self.camera_matrix = np.array( + [[0.58, 0, 0.5, 0], [0, 1.92, 0.5, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=np.float32) + self.frame_ids = (0, -1, 1) + + # Set the device + self.device_type = 'cuda' if torch.cuda.is_available() else 'cpu' + self.device = torch.device(self.device_type) + + # Create the model + self._create_model() + + # Set the flag to indicate if the model is trained + self.is_trained = False + + def adapt(self, + inputs: Dict[Any, Tensor], + online_index: int = 0, + steps: int = 1, + online_loss_weight: Optional[float] = None, + do_adapt: bool = True, + return_loss: bool = False, + batch_size: int = None, + ) -> Tuple[Dict[Tensor, Any], Optional[Dict[Tensor, Any]]]: + """ + Adapt the model to a new task. + :param inputs: A dictionary containing the inputs to the model. + :type: Dict[Any, Tensor] + :param online_index: The index of the online model in the batch. Defaults to 0. + :type: int + :param steps: The number of gradient steps to take. Defaults to 1. + :type: int + :param online_loss_weight: The weight to give to the online loss. Defaults to None. + :type: float + :param use_expert: Whether to use the expert model to compute the online loss. Defaults to True. + :type: bool + :param do_adapt: Whether to adapt the model. Defaults to True. + :type: bool + :param return_loss: Whether to return the loss. Defaults to False. + :type: bool + :return: A tuple containing the outputs and the loss. + :rtype: Tuple[Dict[Tensor, Any], Optional[Dict[Tensor, Any]]] + """ + if batch_size is not None: + self.batch_size = batch_size + if online_loss_weight is None: + loss_weights = None + elif self.batch_size == 1: + loss_weights = torch.ones(1, device=self.device) + else: + loss_weights = torch.empty(self.batch_size, device=self.device) + buffer_loss_weight = (1 - online_loss_weight) / (self.batch_size - 1) + loss_weights[online_index] = online_loss_weight + loss_weights[np.arange(self.batch_size) != online_index] = buffer_loss_weight + + if do_adapt: + self._set_adapt(freeze_encoder=True) + else: + self._set_eval() + steps = 1 + + for _ in range(steps): + self.optimizer.zero_grad() + outputs, losses = self._process_batch(inputs, loss_weights) + if do_adapt: + losses['loss'].backward() + self.optimizer.step() + + if return_loss: + return outputs, losses + return outputs + + def predict(self, + batch: Dict[Any, Tensor], + return_loss: bool = False + ) -> Tuple[Dict[Tensor, Any], Optional[Dict[Tensor, Any]]]: + """ + Predict the output of a batch of inputs + :param batch: A dictionary containing the inputs to the model. + :type: Dict[Any, Tensor] + :param return_loss: Whether to return the loss. Defaults to False. + :type: bool + :return: A dictionary containing the outputs of the model. + :rtype: Dict[Tensor, Any] + """ + self._set_eval() + with torch.no_grad(): + outputs, losses = self._process_batch(batch) + if return_loss: + return outputs, losses + return outputs, None + + def predict_pose(self, + image_0: Tensor, + image_1: Tensor, + as_numpy: bool = True, + use_online: bool = False, + ) -> Tuple[Union[Tensor, np.ndarray], Union[Tensor, np.ndarray]]: + if len(image_0.shape) == 3: + image_0 = image_0.unsqueeze(dim=0) + if len(image_1.shape) == 3: + image_1 = image_1.unsqueeze(dim=0) + + self._set_eval() + with torch.no_grad(): + image_0 = image_0.to(self.device) + image_1 = image_1.to(self.device) + + # Pose network + pose_inputs = torch.cat([image_0, image_1], 1) + if use_online: + pose_features = [self.online_models['pose_encoder'](pose_inputs)] + axis_angle, translation = self.online_models['pose_decoder'](pose_features) + else: + pose_features = [self.models['pose_encoder'](pose_inputs)] + axis_angle, translation = self.models['pose_decoder'](pose_features) + axis_angle, translation = axis_angle[:, 0], translation[:, 0] + transformation = transformation_from_parameters(axis_angle, translation, invert=False) + + cov_matrix = torch.eye(6, device=self.device) + + if as_numpy: + transformation = transformation.squeeze().cpu().detach().numpy() + cov_matrix = cov_matrix.cpu().detach().numpy() + return transformation, cov_matrix + + def load_model(self, weights_folder: str = None, load_optimizer: bool = True) -> None: + """ + Load the model from the checkpoint. + :param load_optimizer: Whether to load the optimizer. Defaults to True. + :type: bool + """ + self._load_model(weights_folder, load_optimizer) + + def load_online_model(self, load_optimizer: bool = True) -> None: + """ + Load the online model from the checkpoint. + :param load_optimizer: Whether to load the optimizer. Defaults to True. + :type: bool + """ + self._load_online_model(load_optimizer) + + def create_dataset_loaders(self, + training: bool = False, + validation: bool = True, + ) -> None: + """ + Create the dataset loaders. + :param training: Whether to create the training dataset loader. Defaults to False. + :type: bool + :param validation: Whether to create the validation dataset loader. Defaults to True. + :type: bool + """ + self._create_dataset_loaders(training, validation) + + # Private methods -------------------------------------------------------------------------------------------- + def _parse_configs(self, + config: Config, + dataset_config: DatasetConfig + ) -> None: + + # Set the configuration parameters from dataset config + self.dataset_type = dataset_config.dataset + self.dataset_path = dataset_config.dataset_path + self.height = dataset_config.height + self.width = dataset_config.width + + # Set the configuration parameters from config + self.resnet = config.resnet + self.resnet_pretrained = config.resnet_pretrained + self.scales = config.scales + self.gpu_ids = config.gpu_ids + self.multiple_gpus = config.multiple_gpus + self.learning_rate = config.learning_rate + self.scheduler_step_size = config.scheduler_step_size + self.load_weights_folder = config.load_weights_folder + self.min_depth = config.min_depth + self.max_depth = config.max_depth + self.batch_size = config.batch_size + self.disparity_smoothness = config.disparity_smoothness + self.velocity_loss_scaling = config.velocity_loss_scaling + + def _create_model(self) -> None: + + # Create a dictionary to store the models ============================================================== + self.models = {} + self.models['depth_encoder'] = ResnetEncoder(self.resnet, self.resnet_pretrained) + self.models['depth_decoder'] = DepthDecoder(self.models['depth_encoder'].num_ch_encoder, + self.scales) + self.models['pose_encoder'] = ResnetEncoder(self.resnet, self.resnet_pretrained, + self.num_pose_frames) + self.models['pose_decoder'] = PoseDecoder(self.models['pose_encoder'].num_ch_encoder, + num_input_features=1, + num_frames_to_predict_for=2) + + self.online_models = {} + if self.use_online: + self.online_models['depth_encoder'] = ResnetEncoder(self.resnet, + self.resnet_pretrained) + self.online_models['depth_decoder'] = DepthDecoder(self.models['depth_encoder'].num_ch_encoder, + self.scales) + self.online_models['pose_encoder'] = ResnetEncoder(self.resnet, + self.resnet_pretrained, + self.num_pose_frames) + self.online_models['pose_decoder'] = PoseDecoder(self.models['pose_encoder'].num_ch_encoder, + num_input_features=1, + num_frames_to_predict_for=2) + self.backproject_depth = {} + self.project_3d = {} + self.backproject_depth_single = {} + self.project_3d_single = {} + for scale in self.scales: + h = self.height // (2**scale) + w = self.width // (2**scale) + self.backproject_depth[scale] = BackProjectDepth(self.batch_size, h, w) + self.project_3d[scale] = Project3D(self.batch_size, h, w) + + self.backproject_depth_single[scale] = BackProjectDepth(1, h, w) + self.project_3d_single[scale] = Project3D(1, h, w) + # ====================================================================================================== + + # Structural similarity =========================== + self.ssim = SSIM() + self.ssim.to(self.device) + # ================================================= + + # Send everything to the GPU(s) =================== + if 'cuda' in self.device.type: + print(f'Selected GPUs: {list(self.gpu_ids)}') + if self.multiple_gpus: + for model_name, m in self.models.items(): + if m is not None: + self.models[model_name] = nn.DataParallel(m, device_ids=self.gpu_ids) + + self.parameters_to_train = [] + for model_name, m in self.models.items(): + if m is not None: + m.to(self.device) + self.parameters_to_train += list(m.parameters()) + + if self.use_online: + self.online_parameters_to_train = [] + for m in self.online_models.values(): + m.to(self.device) + self.online_parameters_to_train += list(m.parameters()) + for m in self.backproject_depth.values(): + m.to(self.device) + for m in self.project_3d.values(): + m.to(self.device) + for m in self.backproject_depth_single.values(): + m.to(self.device) + for m in self.project_3d_single.values(): + m.to(self.device) + # ================================================= + + # Set up optimizer ================================ + self.optimizer = optim.Adam(self.parameters_to_train, self.learning_rate) + self.lr_scheduler = optim.lr_scheduler.StepLR(self.optimizer, self.scheduler_step_size, 0.1) + self.epoch = 0 + if self.use_online: + self.online_optimizer = optim.Adam(self.online_parameters_to_train, self.learning_rate) + else: + self.online_optimizer = None + # ================================================= + + def _process_batch(self, + batch: Dict[Any, Tensor], + loss_sample_weights: Optional[Tensor] = None, + use_online: bool = False + ) -> Dict[Tensor, Any]: + + if len(batch) != 6: + raise ValueError(f'Expected 3 images, but got {len(batch)//2}') + + distances = {} + inputs = {} + for key, data in batch.items(): + if "distance" in key: + distances[key[0]] = data.to(self.device) + continue + + if len(data.shape) == 3: + data = data.unsqueeze(0) + + if type(data) != torch.Tensor: + inputs[key] = torch.from_numpy(data).to(self.device) + else: + data = data/255.0 + inputs[key[0]] = data.to(self.device) + + outputs = {} + outputs.update(self._predict_disparity(inputs, use_online=use_online)) + outputs.update(self._predict_poses(inputs, use_online=use_online)) + outputs.update(self._reconstruct_images(inputs, outputs)) + + if self.mode == 'learner': + losses = self._compute_loss(inputs, outputs, distances, sample_weights=loss_sample_weights) + elif self.mode == 'predictor': + losses = None + else: + raise ValueError(f'Invalid mode: {self.mode}') + return outputs, losses + + def _compute_loss(self, + inputs: Dict[Any, Tensor], + outputs: Dict[Any, Tensor], + distances: Dict[Any, Tensor], + scales: Optional[Tuple[int, ...]]=None, + sample_weights: Optional[Tensor]=None + ) -> Dict[str, Tensor]: + + scales = self.scales if scales is None else scales + if sample_weights is None: + sample_weights = torch.ones(self.batch_size, device=self.device)/self.batch_size + losses = {} + total_loss = torch.zeros(1, device=self.device) + scaled_inputs = self._create_scaled_inputs(inputs) + + for scale in self.scales: + target = inputs[0] + reprojection_losses = [] + for frame_id in self.frame_ids[1:]: + pred = outputs[('rgb', frame_id, scale)] + reprojection_losses.append(compute_reprojection_loss(self.ssim, pred, target)) + reprojection_losses = torch.cat(reprojection_losses, 1) + + identity_reprojection_losses = [] + for frame_id in self.frame_ids[1:]: + pred = inputs[frame_id] + identity_reprojection_losses.append(compute_reprojection_loss(self.ssim, pred, target)) + identity_reprojection_losses = torch.cat(identity_reprojection_losses, 1) + # Add random numbers to break ties + identity_reprojection_losses += torch.randn(identity_reprojection_losses.shape, + device=self.device) * 0.00001 + combined = torch.cat((identity_reprojection_losses, reprojection_losses), dim=1) + to_optimize, _ = torch.min(combined, dim=1) + reprojection_loss = (to_optimize.mean(2).mean(1) * sample_weights).sum() + losses[f'reprojection_loss/scale_{scale}'] = reprojection_loss + mask = torch.ones_like(outputs['disp', 0, scale], dtype=torch.bool, device=self.device) + color = scaled_inputs['rgb', 0, scale] + disp = outputs['disp', 0, scale] + mean_disp = disp.mean(2, True).mean(3, True) + norm_disp = disp / (mean_disp + 1e-7) + smooth_loss = compute_smooth_loss(norm_disp, color, mask) + smooth_loss = (smooth_loss * sample_weights).sum() + losses[f'smooth_loss/scale_{scale}'] = smooth_loss + # ================================================== + + regularization_loss = self.disparity_smoothness / (2**scale) * smooth_loss + losses[f'reg_loss/scale_{scale}'] = regularization_loss + # ================================================== + + loss = reprojection_loss + regularization_loss + losses[f'depth_loss/scale_{scale}'] = loss + total_loss += loss + + total_loss /= len(self.scales) + losses['depth_loss'] = total_loss + + # Velocity supervision loss (scale independent) ==== + if self.velocity_loss_scaling is not None and self.velocity_loss_scaling > 0: + velocity_loss = self.velocity_loss_scaling * \ + compute_velocity_loss(inputs, outputs, distances, device=self.device) + velocity_loss = (velocity_loss * sample_weights).sum() + losses['velocity_loss'] = velocity_loss + total_loss += velocity_loss + # ================================================== + + losses['loss'] = total_loss + + if np.isnan(losses['loss'].item()): + for k, v in losses.items(): + print(k, v.item()) + raise RuntimeError('NaN loss') + + return losses + + def _reconstruct_images(self, + inputs: Dict[Any, Tensor], + outputs: Dict[Any, Tensor],) -> Dict[Any, Tensor]: + """Generate the warped (reprojected) color images for a minibatch. + Generated images are added to the 'outputs' dictionary. + """ + batch_size = outputs['disp', 0, self.scales[0]].shape[0] + + for scale in self.scales: + # Upsample the disparity from scale to the target height x width + disp = outputs[('disp', 0, scale)] + disp = F.interpolate(disp, [self.height, self.width], + mode='bilinear', + align_corners=False) + source_scale = 0 + + depth = disp_to_depth(disp, self.min_depth, self.max_depth) + outputs[('depth', 0, scale)] = depth + if self.mode == 'learner': + camera_matrix, inv_camera_matrix = self._scale_camera_matrix( + self.camera_matrix, source_scale) + camera_matrix = torch.Tensor(camera_matrix).to(self.device).unsqueeze(0) + inv_camera_matrix = torch.Tensor(inv_camera_matrix).to(self.device).unsqueeze(0) + + for _, frame_id in enumerate(self.frame_ids[1:]): + T = outputs[('cam_T_cam', 0, frame_id)] + + if batch_size == 1: + cam_points = self.backproject_depth_single[source_scale]( + depth, inv_camera_matrix) + pixel_coordinates = self.project_3d_single[source_scale]( + cam_points, camera_matrix, T) + else: + cam_points = self.backproject_depth[source_scale](depth, + inv_camera_matrix) + pixel_coordinates = self.project_3d[source_scale](cam_points, + camera_matrix, + T) + # Save the warped image + outputs[('rgb', frame_id, scale)] = F.grid_sample(inputs[frame_id], + pixel_coordinates, + padding_mode='border', + align_corners=True) + return outputs + + def _predict_disparity(self, + inputs: Dict[Any, Tensor], + frame_id: int = 0, + use_online: bool = False) -> Dict[Any, Tensor]: + """Predict disparity for the current frame. + :param inputs: dictionary containing the input images. The format is: {0: img_t1, -1: img_t2, 1: img_t0} + :param frame_id: the frame id for which the disparity is predicted. Default is 0. + :return: dictionary containing the disparity maps. The format is: {('disp', frame_id, scale): disp0, ...} + """ + if not use_online: + features = self.models['depth_encoder'](inputs[frame_id]) + outputs = self.models['depth_decoder'](features) + else: + features = self.online_models['depth_encoder'](inputs[frame_id]) + outputs = self.online_models['depth_decoder'](features) + + output = {} + for scale in self.scales: + output['disp', frame_id, scale] = outputs[("disp", scale)] + return output + + def _predict_poses(self, + inputs: Dict[Any, Tensor], + use_online: bool = False) -> Dict[Any, Tensor]: + + """Predict poses for the current frame and the previous one. 0 -> - 1 and 0 -> 1 + :param inputs: dictionary containing the input images. The format is: {0: img_t1, -1: img_t2, 1: img_t0} + """ + assert self.num_pose_frames == 2, self.num_pose_frames + assert self.frame_ids == (0, -1, 1) + + outputs = {} + pose_inputs_dict = inputs + + for frame_id in self.frame_ids[1:]: + if frame_id == -1: + pose_inputs = [pose_inputs_dict[frame_id], pose_inputs_dict[0]] + else: + pose_inputs = [pose_inputs_dict[0], pose_inputs_dict[frame_id]] + pose_inputs = torch.cat(pose_inputs, 1) + + if use_online: + pose_features = [self.online_models['pose_encoder'](pose_inputs)] + axis_angle, translation = self.online_models['pose_decoder'](pose_features) + else: + pose_features = [self.models['pose_encoder'](pose_inputs)] + axis_angle, translation = self.models['pose_decoder'](pose_features) + + axis_angle, translation = axis_angle[:, 0], translation[:, 0] + outputs[('axis_angle', 0, frame_id)] = axis_angle + outputs[('translation', 0, frame_id)] = translation + + outputs[('cam_T_cam', 0, frame_id)] = transformation_from_parameters(axis_angle, + translation, + invert=(frame_id == -1)) + return outputs + + def _scale_camera_matrix(self, camera_matrix: np.ndarray, + scale: int) -> Tuple[np.ndarray, np.ndarray]: + scaled_camera_matrix = camera_matrix.copy() + scaled_camera_matrix[0, :] *= self.width // (2**scale) + scaled_camera_matrix[1, :] *= self.height // (2**scale) + inv_scaled_camera_matrix = np.linalg.pinv(scaled_camera_matrix) + return scaled_camera_matrix, inv_scaled_camera_matrix + + def _create_dataset_loaders(self, training: bool = True, validation: bool = True) -> None: + if self.dataset_type.lower() != 'kitti': + raise ValueError(f'Unknown dataset type: {self.dataset_type}') + if training: + # self.train_loader = KittiDataset(self.dataset_path) + raise NotImplementedError + if validation: + print('Loading validation dataset...') + self.val_loader = KittiDataset(str(self.dataset_path)) + + def _load_model(self, weights_folder: str = None, load_optimizer: bool = True) -> None: + """Load model(s) from disk + """ + if weights_folder is not None: + from pathlib import Path + self.load_weights_folder = Path(weights_folder) + if self.load_weights_folder is None: + print('Weights folder required to load the model is not specified.') + if not self.load_weights_folder.exists(): + print(f'Cannot find folder: {self.load_weights_folder}') + print(f'Load model from: {self.load_weights_folder}') + + # Load the network weights + for model_name, model in self.models.items(): + if model is None: + continue + path = self.load_weights_folder / f'{model_name}.pth' + pretrained_dict = torch.load(path, map_location=self.device) + if isinstance(model, nn.DataParallel): + model_dict = model.module.state_dict() + else: + model_dict = model.state_dict() + pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} + if len(pretrained_dict.keys()) == 0: + raise RuntimeError(f'No fitting weights found in: {path}') + model_dict.update(pretrained_dict) + if isinstance(model, nn.DataParallel): + model.module.load_state_dict(model_dict) + else: + model.load_state_dict(model_dict) + self.is_trained = True + + if load_optimizer: + # Load the optimizer and LR scheduler + optimizer_load_path = self.load_weights_folder / 'optimizer.pth' + try: + optimizer_dict = torch.load(optimizer_load_path, map_location=self.device) + if 'optimizer' in optimizer_dict: + self.optimizer.load_state_dict(optimizer_dict['optimizer']['param_groups']) + self.lr_scheduler.load_state_dict(optimizer_dict['scheduler']) + self.epoch = self.lr_scheduler.last_epoch + print(f'Restored optimizer and LR scheduler (resume from epoch {self.epoch}).') + else: + self.optimizer.load_state_dict(optimizer_dict) + print('Restored optimizer (legacy mode).') + except: # pylint: disable=bare-except + print('Cannot find matching optimizer weights, so the optimizer is randomly ' + 'initialized.') + + def _load_online_model(self, load_optimizer: bool = True) -> None: + """Load model(s) from disk + """ + if self.load_weights_folder is None: + print('Weights folder required to load the model is not specified.') + if not self.load_weights_folder.exists(): + print(f'Cannot find folder: {self.load_weights_folder}') + print(f'Load online model from: {self.load_weights_folder}') + + # Load the network weights + for model_name, model in self.online_models.items(): + if model is None: + continue + path = self.load_weights_folder / f'{model_name}.pth' + pretrained_dict = torch.load(path, map_location=self.device) + if isinstance(model, nn.DataParallel): + model_dict = model.module.state_dict() + else: + model_dict = model.state_dict() + pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} + if len(pretrained_dict.keys()) == 0: + raise RuntimeError(f'No fitting weights found in: {path}') + model_dict.update(pretrained_dict) + if isinstance(model, nn.DataParallel): + model.module.load_state_dict(model_dict) + else: + model.load_state_dict(model_dict) + + if load_optimizer: + # Load the optimizer and LR scheduler + optimizer_load_path = self.load_weights_folder / 'optimizer.pth' + try: + optimizer_dict = torch.load(optimizer_load_path, map_location=self.device) + if 'optimizer' in optimizer_dict: + self.online_optimizer.load_state_dict(optimizer_dict['optimizer']) + print('Restored online optimizer') + else: + self.online_optimizer.load_state_dict(optimizer_dict) + print('Restored online optimizer (legacy mode).') + except: # pylint: disable=bare-except + print('Cannot find matching optimizer weights, so the optimizer is randomly ' + 'initialized.') + + # Setting the model to adapt, train or eval mode ================================================== + def _set_adapt(self, freeze_encoder: bool = True) -> None: + """ + Set all to train except for batch normalization (freeze parameters) + Convert all models to adaptation mode: batch norm is in eval mode + frozen params + Adapted from: + https://github.com/Yevkuzn/CoMoDA/blob/main/code/CoMoDA.py + """ + for model_name, model in self.models.items(): + model.eval() # To set the batch norm to eval mode + for name, param in model.named_parameters(): + if name.find('bn') != -1: + param.requires_grad = False # Freeze batch norm + if freeze_encoder and 'encoder' in model_name: + param.requires_grad = False + + for model_name, model in self.online_models.items(): + model.eval() # To set the batch norm to eval mode + for name, param in model.named_parameters(): + if name.find('bn') != -1: + param.requires_grad = False # Freeze batch norm + if freeze_encoder and 'encoder' in model_name: + param.requires_grad = False + + def _set_train(self) -> None: + for m in self.models.values(): + if m is not None: + m.train() + + def _set_eval(self) -> None: + """Set the model to evaluation mode + """ + for model in self.models.values(): + if model is not None: + model.eval() + + # ================================================================================================== + + def _create_scaled_inputs(self, inputs): + scaled_inputs = {} + for scale in self.scales: + exp_scale = 2 ** scale + height = math.ceil(self.height / exp_scale) + width = math.ceil(self.width / exp_scale) + scaled_inputs[('rgb', 0, scale)] = F.interpolate(inputs[0], + [height, width], + mode='bilinear', + align_corners=False) + return scaled_inputs diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/losses.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/losses.py new file mode 100644 index 0000000000..3bbc9c73f7 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/losses.py @@ -0,0 +1,77 @@ +import torch +from torch import Tensor, nn +import torch.nn.functional as F + +from typing import Dict, Any + + +def compute_reprojection_loss(ssim: nn.Module, + pred: Tensor, + target: Tensor, + ) -> Tensor: + """ + Computes reprojection loss between a batch of predicted and target images + This is the photometric error + """ + abs_diff = torch.abs(target - pred) + l1_loss = abs_diff.mean(1, True) + + ssim_loss = ssim(pred, target).mean(1, True) + reprojection_loss = 0.85 * ssim_loss + 0.15 * l1_loss + + return reprojection_loss + + +def compute_smooth_loss(disp: Tensor, + img: Tensor, + mask: Tensor, + ) -> Tensor: + """ + Computes the smoothness loss for a disparity image + The color image is used for edge-aware smoothness + """ + grad_disp_x = torch.abs(disp[:, :, :, :-1] - disp[:, :, :, 1:]) + grad_disp_y = torch.abs(disp[:, :, :-1, :] - disp[:, :, 1:, :]) + + grad_img_x = torch.mean(torch.abs(img[:, :, :, :-1] - img[:, :, :, 1:]), 1, keepdim=True) + grad_img_y = torch.mean(torch.abs(img[:, :, :-1, :] - img[:, :, 1:, :]), 1, keepdim=True) + + grad_disp_x *= torch.exp(-grad_img_x) + grad_disp_y *= torch.exp(-grad_img_y) + + grad_disp_x = torch.masked_select(grad_disp_x, mask[..., :-1]) + grad_disp_y = torch.masked_select(grad_disp_y, mask[..., :-1, :]) + + batch_size = disp.shape[0] + smooth_loss = torch.empty(batch_size, device=disp.device) + for i in range(batch_size): + _grad_disp_x = torch.masked_select(grad_disp_x[i, ...], mask[i, :, :, :-1]) + _grad_disp_y = torch.masked_select(grad_disp_y[i, ...], mask[i, :, :-1, :]) + smooth_loss[i] = _grad_disp_x.mean() + _grad_disp_y.mean() + + return smooth_loss + + +def compute_velocity_loss(inputs: Dict[Any, Tensor], + outputs: Dict[Any, Tensor], + distances: Dict[float, float], + device: torch.device, + ) -> Tensor: + batch_size = inputs[0].shape[0] # might be different from self.batch_size + velocity_loss = torch.zeros(batch_size, device=device).squeeze() + num_frames = 0 + for frame in [0, -1, 1]: + if frame == -1: + continue + if frame == 0: + pred_translation = outputs[('translation', 0, -1)] + else: + pred_translation = outputs[('translation', 0, 1)] + gt_distance = torch.abs(distances[frame]).squeeze() + pred_distance = torch.linalg.norm(pred_translation, dim=-1).squeeze() + velocity_loss += F.l1_loss(pred_distance, + gt_distance, + reduction='none') # separated by sample in batch + num_frames += 1 + velocity_loss /= num_frames + return velocity_loss diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/__init__.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/__init__.py new file mode 100644 index 0000000000..7a1c742908 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/__init__.py @@ -0,0 +1,5 @@ +from opendr.perception.continual_slam.algorithm.depth_pose_module.networks.depth_decoder import DepthDecoder +from opendr.perception.continual_slam.algorithm.depth_pose_module.networks.pose_decoder import PoseDecoder +from opendr.perception.continual_slam.algorithm.depth_pose_module.networks.resnet_encoder import ResnetEncoder + +__all__ = ['DepthDecoder', 'PoseDecoder', 'ResnetEncoder'] diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/depth_decoder.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/depth_decoder.py new file mode 100644 index 0000000000..0b46636ba1 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/depth_decoder.py @@ -0,0 +1,67 @@ +from typing import Dict, Tuple + +import numpy as np +import torch +import torch.nn.functional as F +from torch import Tensor, nn + +from .layers.ConvBlock import ConvBlock, Conv3x3 + + +class DepthDecoder(nn.Module): + """ + The DepthNet Decoder module. + """ + + def __init__(self, + num_ch_encoder: np.ndarray, + scales: Tuple[int, ...]=(0, 1, 2, 3), + use_skips: bool = True) -> None: + + super().__init__() + + self.scales = scales + self.use_skips = use_skips + self.num_output_channels = 1 + + self.num_ch_encoder = num_ch_encoder + self.num_ch_decoder = np.array([16, 32, 64, 128, 256]) + + self.convs = {} + for i in range(4, -1, -1): + # upconv_0 + num_ch_in = self.num_ch_encoder[-1] if i == 4 else self.num_ch_decoder[i + 1] + num_ch_out = self.num_ch_decoder[i] + setattr(self, f'upconv_{i}_0', ConvBlock(num_ch_in, num_ch_out)) + + # upconv_1 + num_ch_in = self.num_ch_decoder[i] + if self.use_skips and i > 0: + num_ch_in += self.num_ch_encoder[i - 1] + num_ch_out = self.num_ch_decoder[i] + setattr(self, f'upconv_{i}_1', ConvBlock(num_ch_in, num_ch_out)) + + for s in self.scales: + setattr(self, f'dispconv_{s}', Conv3x3(self.num_ch_decoder[s], + self.num_output_channels)) + + self.sigmoid = nn.Sigmoid() + self.output = {} + + def forward(self, input_features: Tensor) -> Dict[Tuple[str, int], Tensor]: + self.output = {} + + x = input_features[-1] + for i in range(4, -1, -1): + x = getattr(self, f'upconv_{i}_0')(x) + if self.use_skips and i > 0: + x = [F.interpolate(x, size=input_features[i - 1].shape[2:], mode='nearest')] + x += [input_features[i - 1]] + else: + x = [F.interpolate(x, scale_factor=2, mode='nearest')] + x = torch.cat(x, 1) + x = getattr(self, f'upconv_{i}_1')(x) + if i in self.scales: + self.output[('disp', i)] = self.sigmoid(getattr(self, f'dispconv_{i}')(x)) + + return self.output diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/layers/BackProjectDepth.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/layers/BackProjectDepth.py new file mode 100644 index 0000000000..2d98caddae --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/layers/BackProjectDepth.py @@ -0,0 +1,35 @@ +import numpy as np +import torch +from torch import nn + + +class BackProjectDepth(nn.Module): + """ + Layer to transform a depth image into a point cloud + """ + def __init__(self, batch_size: int, height: int, width: int) -> None: + super().__init__() + + self.batch_size = batch_size + self.height = height + self.width = width + + meshgrid = np.meshgrid(range(self.width), range(self.height), indexing='xy') + self.id_coords = np.stack(meshgrid, axis=0).astype(np.float32) + self.id_coords = nn.Parameter(torch.from_numpy(self.id_coords), requires_grad=False) + + self.ones = nn.Parameter(torch.ones(self.batch_size, 1, self.height * self.width), + requires_grad=False) + + self.pix_coords = torch.unsqueeze( + torch.stack([self.id_coords[0].view(-1), self.id_coords[1].view(-1)], 0), 0) + self.pix_coords = self.pix_coords.repeat(batch_size, 1, 1) + self.pix_coords = nn.Parameter(torch.cat([self.pix_coords, self.ones], 1), + requires_grad=False) + + def forward(self, depth, inv_K): + cam_points = torch.matmul(inv_K[:, :3, :3], self.pix_coords) + cam_points = depth.view(self.batch_size, 1, -1) * cam_points + cam_points = torch.cat([cam_points, self.ones], 1) + + return cam_points diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/layers/ConvBlock.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/layers/ConvBlock.py new file mode 100644 index 0000000000..a7798f6f7b --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/layers/ConvBlock.py @@ -0,0 +1,45 @@ +from torch import Tensor, nn + + +class ConvBlock(nn.Module): + """ + Layer to perform a convolution followed by ELU. + """ + def __init__( + self, + in_channels: int, + out_channels: int, + ) -> None: + super().__init__() + + self.conv = Conv3x3(in_channels, out_channels) + self.nonlin = nn.ELU(inplace=True) + + def forward(self, x: Tensor) -> Tensor: + out = self.conv(x) + out = self.nonlin(out) + return out + + +class Conv3x3(nn.Module): + """ + Layer to pad and convolve input. + """ + def __init__( + self, + in_channels: int, + out_channels: int, + use_reflection: bool = True, + ) -> None: + super().__init__() + + if use_reflection: + self.pad = nn.ReflectionPad2d(1) + else: + self.pad = nn.ZeroPad2d(1) + self.conv = nn.Conv2d(int(in_channels), int(out_channels), 3) + + def forward(self, x: Tensor) -> Tensor: + out = self.pad(x) + out = self.conv(out) + return out diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/layers/Project3D.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/layers/Project3D.py new file mode 100644 index 0000000000..4546d08aa1 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/layers/Project3D.py @@ -0,0 +1,28 @@ +import torch +from torch import nn + + +class Project3D(nn.Module): + """ + Layer which projects 3D points into a camera with intrinsics K and at position T + """ + def __init__(self, batch_size: int, height: int, width: int, eps: float = 1e-7) -> None: + super().__init__() + + self.batch_size = batch_size + self.height = height + self.width = width + self.eps = eps + + def forward(self, points, K, T): + P = torch.matmul(K, T)[:, :3, :] + + cam_points = torch.matmul(P, points) + + pixel_coordinates = cam_points[:, :2, :] / (cam_points[:, 2, :].unsqueeze(1) + self.eps) + pixel_coordinates = pixel_coordinates.view(self.batch_size, 2, self.height, self.width) + pixel_coordinates = pixel_coordinates.permute(0, 2, 3, 1) + pixel_coordinates[..., 0] /= self.width - 1 + pixel_coordinates[..., 1] /= self.height - 1 + pixel_coordinates = (pixel_coordinates - 0.5) * 2 + return pixel_coordinates diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/layers/SSIM.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/layers/SSIM.py new file mode 100644 index 0000000000..c6352f1486 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/layers/SSIM.py @@ -0,0 +1,36 @@ +import torch +from torch import Tensor, nn + + +class SSIM(nn.Module): + """ + Layer to compute the SSIM loss between a pair of images + """ + def __init__(self) -> None: + super().__init__() + self.mu_x_pool = nn.AvgPool2d(3, 1) + self.mu_y_pool = nn.AvgPool2d(3, 1) + self.sig_x_pool = nn.AvgPool2d(3, 1) + self.sig_y_pool = nn.AvgPool2d(3, 1) + self.sig_xy_pool = nn.AvgPool2d(3, 1) + + self.refl = nn.ReflectionPad2d(1) + + self.C1 = 0.01**2 + self.C2 = 0.03**2 + + def forward(self, x: Tensor, y: Tensor) -> Tensor: + x = self.refl(x) + y = self.refl(y) + + mu_x = self.mu_x_pool(x) + mu_y = self.mu_y_pool(y) + + sigma_x = self.sig_x_pool(x**2) - mu_x**2 + sigma_y = self.sig_y_pool(y**2) - mu_y**2 + sigma_xy = self.sig_xy_pool(x * y) - mu_x * mu_y + + SSIM_n = (2 * mu_x * mu_y + self.C1) * (2 * sigma_xy + self.C2) + SSIM_d = (mu_x**2 + mu_y**2 + self.C1) * (sigma_x + sigma_y + self.C2) + + return torch.clamp((1 - SSIM_n / SSIM_d) / 2, 0, 1) diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/layers/__init__.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/layers/__init__.py new file mode 100644 index 0000000000..5f650532bd --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/layers/__init__.py @@ -0,0 +1,5 @@ +from opendr.perception.continual_slam.algorithm.depth_pose_module.networks.layers.BackProjectDepth import BackProjectDepth +from opendr.perception.continual_slam.algorithm.depth_pose_module.networks.layers.SSIM import SSIM +from opendr.perception.continual_slam.algorithm.depth_pose_module.networks.layers.Project3D import Project3D + +__all__ = ['BackProjectDepth', 'SSIM', 'Project3D'] diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/pose_decoder.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/pose_decoder.py new file mode 100644 index 0000000000..7dacd05cf2 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/pose_decoder.py @@ -0,0 +1,54 @@ +from typing import Optional, Tuple + +import numpy as np +import torch +from torch import Tensor, nn + + +class PoseDecoder(nn.Module): + """ + Decoder of the PoseNet module. + """ + def __init__( + self, + num_ch_encoder: np.ndarray, + num_input_features: int, + num_frames_to_predict_for: Optional[int] = None, + ) -> None: + super().__init__() + + self.num_ch_encoder = num_ch_encoder + self.num_input_features = num_input_features + + if num_frames_to_predict_for is None: + num_frames_to_predict_for = num_input_features - 1 + self.num_frames_to_predict_for = num_frames_to_predict_for + + setattr(self, 'squeeze', nn.Conv2d(self.num_ch_encoder[-1], 256, 1)) + setattr(self, 'pose_0', nn.Conv2d(num_input_features * 256, 256, 3, 1, 1)) + setattr(self, 'pose_1', nn.Conv2d(256, 256, 3, 1, 1)) + setattr(self, 'pose_2', nn.Conv2d(256, 6 * num_frames_to_predict_for, 1)) + + self.relu = nn.ReLU() + self.tanh = nn.Tanh() + self.softplus = nn.Softplus() + self.sigmoid = nn.Sigmoid() + + def forward(self, input_features: Tensor) -> Tuple[Tensor, Tensor]: + last_features = [f[-1] for f in input_features] + + cat_features = [self.relu(getattr(self, 'squeeze')(f)) for f in last_features] + cat_features = torch.cat(cat_features, 1) + + out = cat_features + for i in range(3): + out = getattr(self, f'pose_{i}')(out) + if i != 2: + out = self.relu(out) + + out = out.mean(3).mean(2) + out = 0.01 * out.view(-1, self.num_frames_to_predict_for, 1, 6) + + axis_angle = out[..., :3] + translation = out[..., 3:] + return axis_angle, translation diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/resnet_encoder.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/resnet_encoder.py new file mode 100644 index 0000000000..bf64ca6d8c --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/networks/resnet_encoder.py @@ -0,0 +1,127 @@ +# Adapted from: +# https://github.com/nianticlabs/monodepth2/blob/master/networks/resnet_encoder.py + +from typing import List, Type, Union + +import numpy as np +import torch +from torch import Tensor, nn +from torch.utils import model_zoo +from torchvision import models + + +class ResNetMultiImageInput(models.ResNet): + """ + Constructs a resnet model with varying number of input images. + Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py + Encoders for depth and pose prediction. + """ + def __init__( + self, + block: Type[Union[models.resnet.BasicBlock, models.resnet.Bottleneck]], + layers: List[int], + num_input_images: int = 1, + ) -> None: + super().__init__(block, layers) + self.inplanes = 64 + self.conv1 = nn.Conv2d(num_input_images * 3, + 64, + kernel_size=7, + stride=2, + padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + +def resnet_multiimage_input( + num_layers: int, + pretrained: bool = False, + num_input_images: int = 1, +) -> models.ResNet: + """Constructs a resnet model with varying number of input images. + + :param num_layers: Specifies the ResNet model to be used + :param pretrained: If True, returns a model pre-trained on ImageNet + :param num_input_images: Specifies the number of input images. For PoseNet, >1. + """ + if num_layers not in [18, 50]: + raise ValueError('Resnet multi-image can only run with 18 or 50 layers.') + + block_type = { + 18: models.resnet.BasicBlock, + 50: models.resnet.Bottleneck, + }[num_layers] + blocks = { + 18: [2, 2, 2, 2], + 50: [3, 4, 6, 3], + }[num_layers] + model = ResNetMultiImageInput(block_type, blocks, num_input_images=num_input_images) + + if pretrained: + loaded = model_zoo.load_url(models.resnet.model_urls[f'resnet{num_layers}']) + loaded['conv1.weight'] = torch.cat([loaded['conv1.weight']] * num_input_images, + 1) / num_input_images + model.load_state_dict(loaded) + return model + + +class ResnetEncoder(nn.Module): + AVAILABLE_RESNETS = { + 18: models.resnet18, + 34: models.resnet34, + # 50: models.resnet50, + } + + def __init__( + self, + num_layers: int, + pretrained: bool, + num_input_images: int = 1, + ) -> None: + """Constructs a ResNet model. + + :param num_layers: Specifies the ResNet model to be used + :param pretrained: If True, returns a model pre-trained on ImageNet + :param num_input_images: Specifies the number of input images. For PoseNet, >1. + """ + super().__init__() + self.num_ch_encoder = np.array([64, 64, 128, 256, 512]) + + if num_layers not in self.AVAILABLE_RESNETS.keys(): + raise ValueError(f'Could not find a ResNet model with {num_layers} layers.') + + if num_input_images < 1: + raise ValueError(f'Invalid value ({num_input_images}) for num_input_images.') + if num_input_images == 1: + self.resnet = self.AVAILABLE_RESNETS[num_layers](pretrained) + else: + self.resnet = resnet_multiimage_input(num_layers, pretrained, num_input_images) + + # From paper + if num_layers > 34: + self.num_ch_encoder[1:] *= 4 + + def forward(self, x: Tensor) -> List[Tensor]: + features = [] + x = (x - 0.45) / 0.225 + x = self.resnet.conv1(x) + x = self.resnet.bn1(x) + features.append(self.resnet.relu(x)) + features.append(self.resnet.layer1(self.resnet.maxpool(features[-1]))) + features.append(self.resnet.layer2(features[-1])) + features.append(self.resnet.layer3(features[-1])) + features.append(self.resnet.layer4(features[-1])) + return features diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/pytorch3d.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/pytorch3d.py new file mode 100644 index 0000000000..3810b4b0ef --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/pytorch3d.py @@ -0,0 +1,93 @@ +# Copied from: +# https://pytorch3d.readthedocs.io/en/latest/_modules/pytorch3d/transforms/rotation_conversions.html + +import torch +import torch.nn.functional as F + + +def quaternion_to_axis_angle(quaternions: torch.Tensor) -> torch.Tensor: + """ + Convert rotations given as quaternions to axis/angle. + Args: + quaternions: quaternions with real part first, + as tensor of shape (..., 4). + Returns: + Rotations given as a vector in axis angle form, as a tensor + of shape (..., 3), where the magnitude is the angle + turned anticlockwise in radians around the vector's + direction. + """ + norms = torch.norm(quaternions[..., 1:], p=2, dim=-1, keepdim=True) + half_angles = torch.atan2(norms, quaternions[..., :1]) + angles = 2 * half_angles + eps = 1e-6 + small_angles = angles.abs() < eps + sin_half_angles_over_angles = torch.empty_like(angles) + sin_half_angles_over_angles[~small_angles] = (torch.sin(half_angles[~small_angles]) / + angles[~small_angles]) + # for x small, sin(x/2) is about x/2 - (x/2)^3/6 + # so sin(x/2)/x is about 1/2 - (x*x)/48 + sin_half_angles_over_angles[small_angles] = (0.5 - + (angles[small_angles] * angles[small_angles]) / 48) + return quaternions[..., 1:] / sin_half_angles_over_angles + + +def matrix_to_quaternion(matrix: torch.Tensor) -> torch.Tensor: + """ + Convert rotations given as rotation matrices to quaternions. + Args: + matrix: Rotation matrices as tensor of shape (..., 3, 3). + Returns: + quaternions with real part first, as tensor of shape (..., 4). + """ + if matrix.size(-1) != 3 or matrix.size(-2) != 3: + raise ValueError(f"Invalid rotation matrix shape {matrix.shape}.") + + batch_dim = matrix.shape[:-2] + m00, m01, m02, m10, m11, m12, m20, m21, m22 = torch.unbind(matrix.reshape(batch_dim + (9, )), + dim=-1) + + q_abs = _sqrt_positive_part( + torch.stack( + [ + 1.0 + m00 + m11 + m22, + 1.0 + m00 - m11 - m22, + 1.0 - m00 + m11 - m22, + 1.0 - m00 - m11 + m22, + ], + dim=-1, + )) + + # we produce the desired quaternion multiplied by each of r, i, j, k + quat_by_rijk = torch.stack( + [ + torch.stack([q_abs[..., 0]**2, m21 - m12, m02 - m20, m10 - m01], dim=-1), + torch.stack([m21 - m12, q_abs[..., 1]**2, m10 + m01, m02 + m20], dim=-1), + torch.stack([m02 - m20, m10 + m01, q_abs[..., 2]**2, m12 + m21], dim=-1), + torch.stack([m10 - m01, m20 + m02, m21 + m12, q_abs[..., 3]**2], dim=-1), + ], + dim=-2, + ) + + # We floor here at 0.1 but the exact level is not important; if q_abs is small, + # the candidate won't be picked. + flr = torch.tensor(0.1).to(dtype=q_abs.dtype, device=q_abs.device) + quat_candidates = quat_by_rijk / (2.0 * q_abs[..., None].max(flr)) + + # if not for numerical problems, quat_candidates[i] should be same (up to a sign), + # forall i; we pick the best-conditioned one (with the largest denominator) + + return quat_candidates[F.one_hot(q_abs.argmax( + dim=-1), num_classes=4) > 0.5, : # pyre-ignore[16] + ].reshape(batch_dim + (4, )) + + +def _sqrt_positive_part(x: torch.Tensor) -> torch.Tensor: + """ + Returns torch.sqrt(torch.max(0, x)) + but with a zero subgradient where x is 0. + """ + ret = torch.zeros_like(x) + positive_mask = x > 0 + ret[positive_mask] = torch.sqrt(x[positive_mask]) + return ret diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/replay_buffer/__init__.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/replay_buffer/__init__.py new file mode 100644 index 0000000000..98a8d38829 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/replay_buffer/__init__.py @@ -0,0 +1,4 @@ +from .encoder import FeatureEncoder +from .replay_buffer import ReplayBuffer + +__all__ = ['FeatureEncoder', 'ReplayBuffer'] diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/replay_buffer/encoder.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/replay_buffer/encoder.py new file mode 100644 index 0000000000..49465ca9e9 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/replay_buffer/encoder.py @@ -0,0 +1,33 @@ +import torch +import torch.nn as nn +from torch import Tensor +from torchvision import models, transforms + + +class Identity(nn.Module): + def __init__(self): + super(Identity, self).__init__() + + def forward(self, x): + return x + + +class FeatureEncoder: + def __init__(self, device: torch.device, num_features: int = 576): + super().__init__() + self.device = device + + self.model = models.mobilenet_v3_small(pretrained=True) + self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + self.num_features = num_features + self.model.classifier = Identity() + self.model.to(self.device) + self.model.eval() + + def __call__(self, image: Tensor) -> Tensor: + image = self.normalize(image) + image = image.to(self.device).unsqueeze(0) + with torch.no_grad(): + features = self.model(image) + return features diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/replay_buffer/replay_buffer.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/replay_buffer/replay_buffer.py new file mode 100644 index 0000000000..fb5237de13 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/replay_buffer/replay_buffer.py @@ -0,0 +1,274 @@ +import os +import pickle +import random +import numpy as np +from pathlib import Path +from typing import Any, Dict, List, Optional + +import torch +from PIL import Image +from torch import Tensor +from torch.utils.data import Dataset as TorchDataset +import torch.nn.functional as F + +from opendr.perception.continual_slam.algorithm.depth_pose_module.replay_buffer.encoder import FeatureEncoder +from opendr.perception.continual_slam.continual_slam_learner import ContinualSLAMLearner +from opendr.perception.continual_slam.configs.config_parser import ConfigParser + + +class ReplayBuffer(TorchDataset): + def __init__(self, + buffer_size: int, + save_memory: bool, + device: Optional[torch.device] = None, + dataset_config_path: Optional[str] = None, + height: Optional[int] = None, + width: Optional[int] = None, + save_state_path: Optional[Path] = None, + load_state_path: Optional[Path] = None, + local_save_path: Optional[Path] = None, + num_workers: int = 1, + cosine_similarity_threshold: float = 0.9, + num_features: int = 576, + sample_size: int = 3, + ): + """ + This class implements a replay buffer for the depth pose module. + """ + + super().__init__() + self.buffer_size = buffer_size + self.save_memory = save_memory + if device is None: + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + else: + self.device = device + self.save_state_path = save_state_path + self.load_state_path = load_state_path + self.local_save_path = local_save_path + if dataset_config_path: + self.config = ConfigParser(dataset_config_path) + self.dataset_config = self.config.dataset + if not (dataset_config_path or (height and width)): + raise ValueError("Either dataset_config or height and width must be specified.") + if dataset_config_path: + self.height = self.dataset_config.height + self.width = self.dataset_config.width + else: + self.height = height + self.width = width + if not self.save_memory and not self.local_save_path: + self.local_save_path = self.config.depth_pose.log_path + + self.num_workers = num_workers + self.cosine_similarity_threshold = cosine_similarity_threshold + self.num_features = num_features + self.sample_size = sample_size + self.frame_ids = [-1, 0, 1] + # Count how many images are in the buffer + self.count = 0 + + # Create encoder + self.encoder = FeatureEncoder(device=self.device, num_features=num_features) + + if self.save_memory: + # Create image buffer tensor with shape (buffer_size, 3, 3, height, width) + self.image_buffer = torch.zeros((buffer_size, 3, 3, self.height, self.width), + dtype=torch.float32, device=self.device) + self.feature_vector_buffer = torch.zeros((buffer_size, num_features), + dtype=torch.float32, device=self.device) + # Create distance buffer tensor with shape (buffer_size, 3) + self.distance_buffer = torch.zeros((buffer_size, 3), dtype=torch.float32, device=self.device) + + else: + if self.local_save_path is None: + raise ValueError("local_save_path must be specified if save_memory is False.") + # Read from disk + # Create image buffer that holds file paths to the images + self.image_buffer = [] + # Create distance buffer that holds file paths to the distance maps + self.distance_buffer = [] + + # Have to hold the feature vectors in memory + self.feature_vector_buffer = torch.zeros((buffer_size, num_features), dtype=torch.float32, device=self.device) + + # Create a dictionary that holds the buffer + os.makedirs(self.local_save_path, exist_ok=True) + # Create a dictionary that holds the image buffer + os.makedirs(os.path.join(self.local_save_path, "images"), exist_ok=True) + # Create a dictionary that holds the distance buffer + os.makedirs(os.path.join(self.local_save_path, "distances"), exist_ok=True) + + self.buffer = {"image_buffer": self.image_buffer, + "distance_buffer": self.distance_buffer, + "feature_vector_buffer": self.feature_vector_buffer} + + def add(self, data: Dict[Any, Tensor]): + """ + This method adds a new image to the replay buffer. + """ + + # Format the data + data = ContinualSLAMLearner._input_formatter(data) + if self.count < self.buffer_size: + if self.save_memory: + for i in range(len(self.frame_ids)): + self.image_buffer[self.count, i] = data[(self.frame_ids[i], 'image')] + self.distance_buffer[self.count, i] = data[(self.frame_ids[i], 'distance')] + + # Get the feature vector of the new image (central image from the image triplet) + new_image_feature = self.encoder(data[(0, 'image')]) + self.feature_vector_buffer[self.count] = new_image_feature + else: + # Read from disk + # Save into the disk + for i in range(len(self.frame_ids)): + # Save the image to disk in the following format: image_{count}_{frame_id}.png + image_path = os.path.join(self.local_save_path, + "images", + f"image_{self.count}_{self.frame_ids[i]}.png") + # Send image from torch to PIL + image = data[(self.frame_ids[i], 'image')].cpu().numpy() + image = Image.fromarray(image.transpose(1, 2, 0).astype(np.uint8)) + image.save(image_path) + self.image_buffer.append(image_path) + + # Save the distance map to disk in the following format: distance_{count}_{frame_id}.npy + distance_path = os.path.join(self.local_save_path, + "distances", + f"distance_{self.count}_{self.frame_ids[i]}.npy") + distance = data[(self.frame_ids[i], 'distance')].cpu().numpy() + np.save(distance_path, distance) + self.distance_buffer.append(distance_path) + + # Get the feature vector of the new image (central image from the image triplet) + new_image_feature = self.encoder(data[(0, 'image')]) + self.feature_vector_buffer[self.count] = new_image_feature + + self.count += 1 + else: + # Now we compare the new image with the images in the buffer using the encoder + # and we add the new image to the buffer if it is sufficiently different from the + # images in the buffer using cosine similarity. + + # Get the feature vector of the new image (central image from the image triplet) + new_image_feature = self.encoder(data[(0, 'image')]) + if self.compute_cosine_similarity(new_image_feature): + # Throw away a random image from the buffer + random_index = random.randint(0, self.buffer_size - 1) + if self.save_memory: + for i in range(len(self.frame_ids)): + self.image_buffer[random_index, i] = data[(self.frame_ids[i], 'image')] + self.distance_buffer[random_index, i] = data[(self.frame_ids[i], 'distance')] + + self.feature_vector_buffer[random_index] = new_image_feature + else: + # Change the image and distance map on disk + for i in range(len(self.frame_ids)): + # Save the image to disk in the following format: image_{random_index}_{frame_id}.png + image_path = os.path.join(self.local_save_path, + "images", + f"image_{random_index}_{self.frame_ids[i]}.png") + image = data[(self.frame_ids[i], 'image')].cpu().numpy() + image = Image.fromarray(image.transpose(1, 2, 0).astype(np.uint8)) + image.save(image_path) + self.image_buffer[random_index] = image_path + + # Save the distance map to disk in the following format: distance_{count}_{frame_id}.npy + distance_path = os.path.join(self.local_save_path, + "distances", + f"distance_{random_index}_{self.frame_ids[i]}.npy") + distance = data[(self.frame_ids[i], 'distance')].cpu().numpy() + np.save(distance_path, distance) + self.distance_buffer[random_index] = distance_path + + self.feature_vector_buffer[random_index] = new_image_feature + if self.count % 20 == 0 and self.count != self.buffer_size: + print(f"Replay buffer size: {self.count}") + + def sample(self) -> List: + """ + This method returns a random sample of images from the replay buffer. + """ + if self.count < self.sample_size: + raise ValueError("The replay buffer does not have enough images to sample from.") + else: + # Sample random indices from the buffer + random_indices = random.sample(range(self.count), self.sample_size) + if self.save_memory: + # Get the images and distances from the buffer + image_sample = self.image_buffer[random_indices] + distance_sample = self.distance_buffer[random_indices] + batch = [] + for i in range(self.sample_size): + single_sample = {} + for j in range(len(self.frame_ids)): + single_sample[(self.frame_ids[j], 'image')] = image_sample[i, j] + single_sample[(self.frame_ids[j], 'distance')] = distance_sample[i, j] + batch.append(single_sample) + else: + # Read from disk + batch = [] + for i in range(self.sample_size): + single_sample = {} + for j in range(len(self.frame_ids)): + image_path = self.image_buffer[random_indices[i]] + image = Image.open(image_path) + image = torch.Tensor(np.array(image)).permute(2, 0, 1).to(self.device) + single_sample[(self.frame_ids[j], 'image')] = image + + distance_path = self.distance_buffer[random_indices[i]] + distance = torch.Tensor(np.load(distance_path)).to(self.device) + single_sample[(self.frame_ids[j], 'distance')] = distance + + batch.append(single_sample) + return batch + + def save_state(self): + state = { + 'buffer': self.buffer, + 'buffer_size': self.buffer_size, + 'count': self.count, + 'save_memory': self.save_memory, + 'device': self.device, + 'save_state_path': self.save_state_path, + 'load_state_path': self.load_state_path, + 'local_save_path': self.local_save_path, + } + with open(self.save_state_path, 'wb') as f: + pickle.dump(state, f) + + def load_state(self): + with open(self.load_state_path, 'rb') as f: + state = pickle.load(f) + self.buffer = state['buffer'] + self.image_buffer = self.buffer['image'] + self.distance_buffer = self.buffer['distance'] + self.feature_vector_buffer = self.buffer['feature_vector'] + self.buffer_size = state['buffer_size'] + self.save_memory = state['save_memory'] + self.device = state['device'] + self.save_state_path = state['save_state_path'] + self.load_state_path = state['load_state_path'] + self.local_save_path = state['local_save_path'] + + def compute_cosine_similarity(self, feature_vector: Tensor) -> bool: + """ + This method computes the cosine similarity between a feature vector and the feature vectors + in the replay buffer. + """ + # Compute the cosine similarity between the feature vector and the feature vectors in the buffer + similarity = F.cosine_similarity(feature_vector, self.feature_vector_buffer) + return similarity.max() < self.cosine_similarity_threshold + + def _recover_state(self): + if self.load_state_path is not None: + self.load_state() + else: + print('No state to load. Initializing new replay buffer.') + + def __getitem__(self) -> Dict[Any, Tensor]: + return self.sample() + + def __len__(self): + return self.count diff --git a/src/opendr/perception/continual_slam/algorithm/depth_pose_module/utils.py b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/utils.py new file mode 100644 index 0000000000..01a6054ff1 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/depth_pose_module/utils.py @@ -0,0 +1,152 @@ +from typing import Optional, Tuple, Union + +import numpy as np +import torch +from PIL import Image +from torch import Tensor + +from opendr.perception.continual_slam.algorithm.depth_pose_module.pytorch3d import ( + matrix_to_quaternion, + quaternion_to_axis_angle, +) + + +def parameters_from_transformation( + transformation: Tensor, + as_numpy: bool = False, +) -> Tuple[Tensor, Tensor]: + """Convert a 4x4 transformation matrix to a translation vector and the axis angles + """ + translation_vector = transformation[:, :, :3, 3] + axis_angle = quaternion_to_axis_angle(matrix_to_quaternion(transformation[:, :, :3, :3])) + if as_numpy: + translation_vector = translation_vector.squeeze().cpu().numpy() + axis_angle = axis_angle.squeeze().cpu().numpy() + return translation_vector, axis_angle + + +# ----------------------------------------------------------------------------- + +# The code below is adapted from: +# https://github.com/nianticlabs/monodepth2/blob/master/layers.py + + +def transformation_from_parameters( + axis_angle: Tensor, + translation: Tensor, + invert: bool = False, +) -> Tensor: + """Convert the network's (axis angle, translation) output into a 4x4 matrix + """ + R = rot_from_axisangle(axis_angle) + t = translation.clone() + + if invert: + R = R.transpose(1, 2) + t *= -1 + + T = get_translation_matrix(t) + + if invert: + M = torch.matmul(R, T) + else: + M = torch.matmul(T, R) + + return M + + +def get_translation_matrix(translation_vector: Tensor) -> Tensor: + """Convert a translation vector into a 4x4 transformation matrix + """ + T = torch.zeros(translation_vector.shape[0], 4, 4).to(device=translation_vector.device) + + t = translation_vector.contiguous().view(-1, 3, 1) + + T[:, 0, 0] = 1 + T[:, 1, 1] = 1 + T[:, 2, 2] = 1 + T[:, 3, 3] = 1 + T[:, :3, 3, None] = t + + return T + + +def rot_from_axisangle(axis_angle: Tensor) -> Tensor: + """Convert an axisangle rotation into a 4x4 transformation matrix + (adapted from https://github.com/Wallacoloo/printipi) + Input 'axis_angle' has to be Bx1x3 + + This is similar to the code below but has less rounding: + from scipy.spatial.transform import Rotation + Rotation.from_rotvec(axis_angle).as_matrix() + """ + angle = torch.norm(axis_angle, 2, 2, True) + axis = axis_angle / (angle + 1e-7) + + ca = torch.cos(angle) + sa = torch.sin(angle) + C = 1 - ca + + x = axis[..., 0].unsqueeze(1) + y = axis[..., 1].unsqueeze(1) + z = axis[..., 2].unsqueeze(1) + + xs = x * sa + ys = y * sa + zs = z * sa + xC = x * C + yC = y * C + zC = z * C + xyC = x * yC + yzC = y * zC + zxC = z * xC + + rot = torch.zeros((axis_angle.shape[0], 4, 4)).to(device=axis_angle.device) + + rot[:, 0, 0] = torch.squeeze(x * xC + ca) + rot[:, 0, 1] = torch.squeeze(xyC - zs) + rot[:, 0, 2] = torch.squeeze(zxC + ys) + rot[:, 1, 0] = torch.squeeze(xyC + zs) + rot[:, 1, 1] = torch.squeeze(y * yC + ca) + rot[:, 1, 2] = torch.squeeze(yzC - xs) + rot[:, 2, 0] = torch.squeeze(zxC - ys) + rot[:, 2, 1] = torch.squeeze(yzC + xs) + rot[:, 2, 2] = torch.squeeze(z * zC + ca) + rot[:, 3, 3] = 1 + + return rot + + +def disp_to_depth( + disp: Union[Tensor, np.ndarray], + min_depth: Optional[float] = None, + max_depth: Optional[float] = None, +) -> Union[Tensor, np.ndarray]: + """Convert network's sigmoid output into depth prediction + The formula for this conversion is given in the 'additional considerations' section of the + monodepth2 paper. + """ + # if sum(x is None for x in [min_depth, max_depth]) not in [0, 2]: + # raise ValueError('Either none or both of min_depth and max_depth must be None.') + if min_depth is None and max_depth is None: + depth = 1 / disp + elif max_depth is None: + depth = min_depth / disp + elif min_depth is None: + raise ValueError('min_depth is None') + else: + min_disp = 1 / max_depth + max_disp = 1 / min_depth + scaled_disp = min_disp + (max_disp - min_disp) * disp + depth = 1 / scaled_disp + return depth + + +# ----------------------------------------------------------------------------- + + +def h_concat_images(im1: Image, im2: Image) -> Image: + dst = Image.new('RGB', (im1.width + im2.width, im1.height)) + dst.paste(im1, (0, 0)) + dst.paste(im2, (im1.width, 0)) + return dst diff --git a/src/opendr/perception/continual_slam/algorithm/g2o/__init__.py b/src/opendr/perception/continual_slam/algorithm/g2o/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/opendr/perception/continual_slam/algorithm/g2o/fix_g2o.py b/src/opendr/perception/continual_slam/algorithm/g2o/fix_g2o.py new file mode 100644 index 0000000000..4f92d0d798 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/g2o/fix_g2o.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +# pylint: disable = line-too-long + +import os +from pathlib import Path + +def main(): + INPUT_FILE = Path('g2opy/python/core/eigen_types.h') + assert INPUT_FILE.exists() + + tmp_file = Path(str(INPUT_FILE) + '_tmp') + + with open(INPUT_FILE, 'r', encoding='utf-8') as input_file: + with open(tmp_file, 'w', encoding='utf-8') as output_file: + for i, line in enumerate(input_file.readlines()): + if line == ' .def("x", (double (Eigen::Quaterniond::*) () const) &Eigen::Quaterniond::x)\n': + line = ' .def("x", (double &(Eigen::Quaterniond::*)()) & Eigen::Quaterniond::x)\n' + elif line == ' .def("y", (double (Eigen::Quaterniond::*) () const) &Eigen::Quaterniond::y)\n': + line = ' .def("y", (double &(Eigen::Quaterniond::*)()) & Eigen::Quaterniond::y)\n' + elif line == ' .def("z", (double (Eigen::Quaterniond::*) () const) &Eigen::Quaterniond::z)\n': + line = ' .def("z", (double &(Eigen::Quaterniond::*)()) & Eigen::Quaterniond::z)\n' + elif line == ' .def("w", (double (Eigen::Quaterniond::*) () const) &Eigen::Quaterniond::w)\n': + line = ' .def("w", (double &(Eigen::Quaterniond::*)()) & Eigen::Quaterniond::w)\n' + output_file.write(line) + + os.rename(tmp_file, INPUT_FILE) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/src/opendr/perception/continual_slam/algorithm/g2o/g2opy b/src/opendr/perception/continual_slam/algorithm/g2o/g2opy new file mode 160000 index 0000000000..5587024b17 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/g2o/g2opy @@ -0,0 +1 @@ +Subproject commit 5587024b17fd812c66d91740716fbf0bf5824fbc diff --git a/src/opendr/perception/continual_slam/algorithm/g2o/setup.py b/src/opendr/perception/continual_slam/algorithm/g2o/setup.py new file mode 100644 index 0000000000..de6e7ff2db --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/g2o/setup.py @@ -0,0 +1,63 @@ +from setuptools import setup +from setuptools.command.install import install +from opendr.perception.continual_slam.algorithm.g2o.fix_g2o import main +from distutils.sysconfig import get_python_lib +import shutil +import glob +import subprocess + +__library_file__ = 'g2opy/lib/g2o*.so' +__version__ = '0.0.1' + +def build_g2o(): + subprocess.run( + ["cd g2opy && mkdir build"], + shell=True + ) + run = subprocess.run( + ["cd g2opy/build && cmake -DPYBIND11_PYTHON_VERSION=3.8 .."], + shell=True, + capture_output=True + ) + print(run.stdout.decode('utf-8')) + run = subprocess.run( + ["cd g2opy/build && make -j4"], + shell=True, + capture_output=True + ) + print(run.stdout.decode('utf-8')) + +def copy_g2o(): + install_dir = get_python_lib() + lib_file = glob.glob(__library_file__) + assert len(lib_file) == 1 + + print('copying {} -> {}'.format(lib_file[0], install_dir)) + shutil.copy(lib_file[0], install_dir) + +class InstallLocalPackage(install): + def run(self): + install.run(self) + main() + build_g2o() + copy_g2o() + +setup( + name='g2opy', + version=__version__, + description='Python binding of C++ graph optimization framework g2o.', + url='https://github.com/uoip/g2opy', + license='BSD', + cmdclass=dict( + install=InstallLocalPackage + ), + keywords='g2o, SLAM, BA, ICP, optimization, python, binding', + long_description="""This is a Python binding for c++ library g2o + (https://github.com/RainerKuemmerle/g2o). + + g2o is an open-source C++ framework for optimizing graph-based nonlinear + error functions. g2o has been designed to be easily extensible to a wide + range of problems and a new problem typically can be specified in a few + lines of code. The current implementation provides solutions to several + variants of SLAM and BA.""" +) diff --git a/src/opendr/perception/continual_slam/algorithm/loop_closure/__init__.py b/src/opendr/perception/continual_slam/algorithm/loop_closure/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/opendr/perception/continual_slam/algorithm/loop_closure/buffer.py b/src/opendr/perception/continual_slam/algorithm/loop_closure/buffer.py new file mode 100644 index 0000000000..5d614bb1c4 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/loop_closure/buffer.py @@ -0,0 +1,7 @@ +class Buffer(): + def __init__(self) -> None: + self.images = {} + def add(self, step, image): + self.images[step] = image + def get(self, index): + return self.images[index] diff --git a/src/opendr/perception/continual_slam/algorithm/loop_closure/config.py b/src/opendr/perception/continual_slam/algorithm/loop_closure/config.py new file mode 100644 index 0000000000..0d5eed22c4 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/loop_closure/config.py @@ -0,0 +1,10 @@ +import dataclasses +from pathlib import Path + + +@dataclasses.dataclass +class LoopClosureDetection: + config_file: Path + detection_threshold: float + id_threshold: int + num_matches: int diff --git a/src/opendr/perception/continual_slam/algorithm/loop_closure/encoder.py b/src/opendr/perception/continual_slam/algorithm/loop_closure/encoder.py new file mode 100644 index 0000000000..98a20d7ab0 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/loop_closure/encoder.py @@ -0,0 +1,33 @@ +import torch +import torch.nn as nn +from torch import Tensor +from torchvision import models, transforms + + +class Identity(nn.Module): + def __init__(self): + super(Identity, self).__init__() + + def forward(self, x): + return x + + +class LCFeatureEncoder: + def __init__(self, device: torch.device): + super().__init__() + self.device = device + self.num_features = 576 + + self.model = models.mobilenet_v3_small(pretrained=True) + self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + self.model.classifier = Identity() + self.model.to(self.device) + self.model.eval() + + def __call__(self, image: Tensor) -> Tensor: + image = self.normalize(image) + image = image.to(self.device) + with torch.no_grad(): + features = self.model(image) + return features diff --git a/src/opendr/perception/continual_slam/algorithm/loop_closure/loop_closure_detection.py b/src/opendr/perception/continual_slam/algorithm/loop_closure/loop_closure_detection.py new file mode 100644 index 0000000000..1319de5a2c --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/loop_closure/loop_closure_detection.py @@ -0,0 +1,112 @@ +from pathlib import Path +from typing import List, Tuple + +import faiss +import matplotlib.pyplot as plt +import numpy as np +from numpy.typing import ArrayLike +import torch +from scipy.spatial.distance import cosine +from torch import Tensor + +from opendr.perception.continual_slam.algorithm.loop_closure.config import LoopClosureDetection as Config +from opendr.perception.continual_slam.algorithm.loop_closure.encoder import LCFeatureEncoder as FeatureEncoder + + +class LoopClosureDetection: + def __init__( + self, + config: Config, + ): + # Initialize parameters =========================== + self.threshold = config.detection_threshold + self.id_threshold = config.id_threshold + self.num_matches = config.num_matches + + # Fixed parameters ================================ + self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + # ================================================= + + # Construct network =============================== + self.model = FeatureEncoder(self.device) + # ================================================= + + # Feature cache =================================== + # Cosine similarity + self.faiss_index = faiss.IndexIDMap(faiss.index_factory(self.model.num_features, + "Flat", + faiss.METRIC_INNER_PRODUCT + ) + ) + # ================================================= + + def add(self, image_id: int, image: Tensor) -> None: + # Add batch dimension + if len(image.shape) == 3: + image = image.unsqueeze(dim=0) + features = self.model(image).squeeze().cpu().detach().numpy() + features = np.expand_dims(features, 0) + faiss.normalize_L2(features) # Then the inner product becomes cosine similarity + self.faiss_index.add_with_ids(features, np.array([image_id])) + return features + # print(f'Is Faiss index trained: {self.faiss_index.is_trained}') + + def search(self, image_id: int, features: ArrayLike) -> Tuple[List[int], List[float]]: + index_id = image_id + distances, indices = self.faiss_index.search(features, 100) + distances = distances.squeeze() + indices = indices.squeeze() + # Remove placeholder entries without a match + distances = distances[indices != -1] + indices = indices[indices != -1] + # Remove self detection + distances = distances[indices != index_id] + indices = indices[indices != index_id] + # Filter by the threshold + indices = indices[distances > self.threshold] + distances = distances[distances > self.threshold] + # Do not return neighbors (trivial matches) + distances = distances[np.abs(indices - index_id) > self.id_threshold] + indices = indices[np.abs(indices - index_id) > self.id_threshold] + # Return best N matches + distances = distances[:self.num_matches] + indices = indices[:self.num_matches] + # Convert back to image IDs + image_ids = sorted(indices) + if not len(indices) == 0: + print(image_ids) + return image_ids, distances + + def predict(self, image_0: Tensor, image_1: Tensor) -> float: + features_0 = self.model(image_0) + features_1 = self.model(image_1) + cos_sim = 1 - cosine(features_0.squeeze().cpu().detach().numpy(), + features_1.squeeze().cpu().detach().numpy()) + return cos_sim + + @staticmethod + def display_matches(image_0, image_1, image_id_0, image_id_1, transformation, + cosine_similarity): + # Prevent circular import + from opendr.perception.continual_slam.algorithm.loop_closure.transform import \ + string_tmat # pylint: disable=import-outside-toplevel + if isinstance(image_0, Tensor): + image_0 = image_0.squeeze().cpu().detach().permute(1, 2, 0) + if isinstance(image_1, Tensor): + image_1 = image_1.squeeze().cpu().detach().permute(1, 2, 0) + + filename = Path(f'./figures/sequence_00/matches/{image_id_0:04}_{image_id_1:04}.png') + filename.parent.mkdir(parents=True, exist_ok=True) + + fig = plt.figure() + plt.subplot(211) + plt.imshow(image_0) + plt.axis('off') + plt.title(image_id_0) + plt.subplot(212) + plt.imshow(image_1) + plt.axis('off') + plt.title(image_id_1) + plt.suptitle(f'cos_sim = {cosine_similarity:.4f} \n {string_tmat(transformation)}') + plt.savefig(filename) + plt.close(fig) \ No newline at end of file diff --git a/src/opendr/perception/continual_slam/algorithm/loop_closure/pose_graph_optimization.py b/src/opendr/perception/continual_slam/algorithm/loop_closure/pose_graph_optimization.py new file mode 100644 index 0000000000..6435622df1 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/loop_closure/pose_graph_optimization.py @@ -0,0 +1,159 @@ +import g2o +import numpy as np + + +class PoseGraphOptimization(g2o.SparseOptimizer): + def __init__(self): + self.edge_vertices = set() + self.num_loop_closures = 0 + + super().__init__() + solver = g2o.BlockSolverSE3(g2o.LinearSolverCholmodSE3()) + solver = g2o.OptimizationAlgorithmLevenberg(solver) + super().set_algorithm(solver) + + # See https://github.com/RainerKuemmerle/g2o/issues/34 + self.se3_offset_id = 0 + se3_offset = g2o.ParameterSE3Offset() + se3_offset.set_id(self.se3_offset_id) + super().add_parameter(se3_offset) + + def __str__(self): + string = f'Vertices: {len(self.vertex_ids)}\n' + string += f'Edges: {len(self.edge_vertices)}\n' + string += f'Loops: {self.num_loop_closures}' + return string + + @property + def vertex_ids(self): + return sorted(list(self.vertices().keys())) + + def optimize(self, max_iterations=1000, verbose=False): + super().initialize_optimization() + super().set_verbose(verbose) + super().optimize(max_iterations) + + def add_vertex(self, vertex_id, pose, fixed=False): + v_se3 = g2o.VertexSE3() + v_se3.set_id(vertex_id) + v_se3.set_estimate(g2o.Isometry3d(pose)) + v_se3.set_fixed(fixed) + super().add_vertex(v_se3) + + def add_vertex_point(self, vertex_id, point, fixed=False): + v_point = g2o.VertexPointXYZ() + v_point.set_id(vertex_id) + v_point.set_estimate(point) + v_point.set_fixed(fixed) + super().add_vertex(v_point) + + def add_edge(self, + vertices, + measurement, + information=np.eye(6), + robust_kernel=None, + is_loop_closure=False): + self.edge_vertices.add(vertices) + if is_loop_closure: + self.num_loop_closures += 1 + + edge = g2o.EdgeSE3() + for i, v in enumerate(vertices): + if isinstance(v, int): + v = self.vertex(v) + edge.set_vertex(i, v) + + edge.set_measurement(g2o.Isometry3d(measurement)) # relative pose + edge.set_information(information) + # robust_kernel = g2o.RobustKernelHuber() + if robust_kernel is not None: + edge.set_robust_kernel(robust_kernel) + super().add_edge(edge) + + def add_edge_pose_point(self, + vertex_pose, + vertex_point, + measurement, + information=np.eye(3), + robust_kernel=None): + edge = g2o.EdgeSE3PointXYZ() + edge.set_vertex(0, self.vertex(vertex_pose)) + edge.set_vertex(1, self.vertex(vertex_point)) + edge.set_measurement(measurement) + edge.set_information(information) + if robust_kernel is not None: + edge.set_robust_kernel(robust_kernel) + edge.set_parameter_id(0, self.se3_offset_id) + super().add_edge(edge) + + def get_pose(self, vertex_id): + return self.vertex(vertex_id).estimate().matrix() + + def get_all_poses(self): + return [self.get_pose(i) for i in self.vertex_ids] + + def get_transform(self, vertex_id_src, vertex_id_dst): + pose_src = self.get_pose(vertex_id_src) + pose_dst = self.get_pose(vertex_id_dst) + transform = np.linalg.inv(pose_src) @ pose_dst + return transform + + def does_edge_exists(self, vertex_id_a, vertex_id_b): + return (vertex_id_a, + vertex_id_b) in self.edge_vertices or (vertex_id_b, + vertex_id_a) in self.edge_vertices + + def is_vertex_in_any_edge(self, vertex_id): + vertices = set() + for edge in self.edge_vertices: + vertices.add(edge[0]) + vertices.add(edge[1]) + return vertex_id in vertices + + def does_vertex_have_only_global_edges(self, vertex_id): + assert self.is_vertex_in_any_edge(vertex_id) + for edge in self.edge_vertices: + if vertex_id not in edge: + continue + if np.abs(edge[0] - edge[1]) == 1: + return False + return True + + def return_last_positions(self, n=20, reversed=False): + positions = [] + keys = list(self.vertices().keys()) + length = len(keys) + i = 0 + while i < n and i < length: + if reversed: + key = keys[length-i-1] + else: + key = keys[i] + vertex = self.vertices()[key] + if isinstance(vertex, g2o.VertexSE3): + positions.append(vertex.estimate().matrix()[:3, 3]) + i += 1 + return positions + + def return_all_positions(self): + positions = [] + for _, vertex in self.vertices().items(): + if isinstance(vertex, g2o.VertexSE3): + positions.append(vertex.estimate().matrix()[:3, 3]) + return list(reversed(positions)) + + def return_last_poses(self, n=20, reversed=False): + poses = [] + keys = list(self.vertices().keys()) + length = len(keys) + i = 0 + while i < n and i < length: + if reversed: + key = keys[length-i-1] + else: + key = keys[i] + vertex = self.vertices()[key] + if isinstance(vertex, g2o.VertexSE3): + poses.append(vertex.estimate().matrix()) + i += 1 + return poses \ No newline at end of file diff --git a/src/opendr/perception/continual_slam/algorithm/loop_closure/transform.py b/src/opendr/perception/continual_slam/algorithm/loop_closure/transform.py new file mode 100644 index 0000000000..52884a34dc --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/loop_closure/transform.py @@ -0,0 +1,107 @@ +import numpy as np +from scipy.spatial.transform import Rotation + + +def print_tmat(tmat, note=''): + return print_sixdof(tmat2sixdof(tmat), note) + + +def print_array(array, note=''): + return print_sixdof(array2sixdof(array), note) + + +def print_sixdof(sixdof, note=''): + print(string_sixdof(sixdof, note)) + + +def string_tmat(tmat, note=''): + return string_sixdof(tmat2sixdof(tmat), note) + + +def string_sixdof(sixdof, note=''): + string = f'R({np.rad2deg(sixdof["rx"]):>7.3f}, {np.rad2deg(sixdof["ry"]):>7.3f}, ' \ + f'{np.rad2deg(sixdof["rz"]):>7.3f}), T({sixdof["tx"]:>7.3f}, ' \ + f'{sixdof["ty"]:>7.3f}, {sixdof["tz"]:>7.3f}) {note}' + return string + + +def create_empty_sixdof(): + sixdof = {'tx': 0, 'ty': 0, 'tz': 0, 'rx': 0, 'ry': 0, 'rz': 0} + return sixdof + + +def tmat2sixdof(tmat): + r = Rotation.from_matrix(tmat[:3, :3]).as_rotvec() + sixdof = { + 'tx': tmat[0, 3], + 'ty': tmat[1, 3], + 'tz': tmat[2, 3], + 'rx': r[0], + 'ry': r[1], + 'rz': r[2] + } + return sixdof + + +def sixdof2tmat(sixdof): + tmat = np.eye(4) + tmat[:3, :3] = Rotation.from_rotvec([sixdof['rx'], sixdof['ry'], sixdof['rz']]).as_matrix() + tmat[0, 3] = sixdof['tx'] + tmat[1, 3] = sixdof['ty'] + tmat[2, 3] = sixdof['tz'] + return tmat + + +def tmat2array(tmat): + sixdof = tmat2sixdof(tmat) + array = np.zeros((6, 1)) + array[0] = sixdof['rx'] + array[1] = sixdof['ry'] + array[2] = sixdof['rz'] + array[3] = sixdof['tx'] + array[4] = sixdof['ty'] + array[5] = sixdof['tz'] + return array.T.ravel() + + +def array2tmat(array): + return sixdof2tmat(array2sixdof(array)) + + +def array2sixdof(array): + array_ = array.T + sixdof = { + 'rx': array_[0], + 'ry': array_[1], + 'rz': array_[2], + 'tx': array_[3], + 'ty': array_[4], + 'tz': array_[5] + } + return sixdof + + +def apply_transformation(transformation: np.ndarray, input_data: np.ndarray) -> np.ndarray: + if len(input_data.shape) != 2 and len(input_data.shape) != 3: + raise RuntimeError('data should be a 2D or 3D array') + if len(transformation.shape) != 2: + raise RuntimeError('transformation should be a 2D array') + if transformation.shape[0] != transformation.shape[1]: + raise RuntimeError('transformation should be square matrix') + + if len(input_data.shape) == 2: + d = input_data.shape[1] + input_data_ = input_data + else: + d = input_data.shape[2] + input_data_ = input_data.reshape(-1, 3) + + if transformation.shape[0] != d + 1: + raise RuntimeError('transformation dimension mismatch') + if np.max(np.abs(transformation[-1, :] - np.r_[np.zeros(d), 1])) > np.finfo(float).eps * 1e3: + raise RuntimeError('bad transformation') + + x_t = np.c_[input_data_, np.ones((input_data_.shape[0], 1))] @ transformation.T + x_t = x_t[:, :d].reshape(input_data.shape) + + return x_t \ No newline at end of file diff --git a/src/opendr/perception/continual_slam/algorithm/loop_closure/utils.py b/src/opendr/perception/continual_slam/algorithm/loop_closure/utils.py new file mode 100644 index 0000000000..5fb7869ad3 --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/loop_closure/utils.py @@ -0,0 +1,32 @@ +from typing import Optional + +import matplotlib.pyplot as plt + + +def plot_image_matches( + image_0, + image_1, + image_id_0: Optional[int] = None, + image_id_1: Optional[int] = None, + cosine_similarity: Optional[float] = None, + save_figure: bool = True, +) -> None: + fig = plt.figure() + plt.subplot(211) + plt.imshow(image_0) + plt.axis('off') + if image_id_0 is not None: + plt.title(image_id_0) + plt.subplot(212) + plt.imshow(image_1) + plt.axis('off') + if image_id_1 is not None: + plt.title(image_id_1) + if cosine_similarity is not None: + plt.suptitle(f'cos_sim = {cosine_similarity}') + if save_figure: + assert image_id_0 is not None and image_id_1 is not None + plt.savefig(f'./figures/sequence_08/matches/{image_id_0:04}_{image_id_1:04}.png') + else: + plt.show() + plt.close(fig) diff --git a/src/opendr/perception/continual_slam/algorithm/parsing/config.py b/src/opendr/perception/continual_slam/algorithm/parsing/config.py new file mode 100644 index 0000000000..13a4c42cec --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/parsing/config.py @@ -0,0 +1,31 @@ +import dataclasses +from pathlib import Path +from typing import Optional, Tuple, Union + + +@dataclasses.dataclass +class Config: + config_file: Path + train_set: Optional[Union[Tuple[int, ...], int, str]] + val_set: Optional[Union[Tuple[int, ...], Tuple[str, ...], int, str]] + resnet: int + resnet_pretrained: bool + scales: Tuple[int, ...] + learning_rate: float + scheduler_step_size: int + batch_size: int + num_workers: int + num_epochs: int + min_depth: Optional[float] + max_depth: Optional[float] + disparity_smoothness: float + velocity_loss_scaling: Optional[float] + mask_dynamic: bool + log_path: Path + save_frequency: int + save_val_depth: bool + save_val_depth_batches: int + multiple_gpus: bool + gpu_ids: Optional[Tuple[int, ...]] + load_weights_folder: Optional[Path] + use_wandb: Optional[bool] diff --git a/src/opendr/perception/continual_slam/algorithm/parsing/dataset_config.py b/src/opendr/perception/continual_slam/algorithm/parsing/dataset_config.py new file mode 100644 index 0000000000..22d069484d --- /dev/null +++ b/src/opendr/perception/continual_slam/algorithm/parsing/dataset_config.py @@ -0,0 +1,10 @@ +import dataclasses +from pathlib import Path + + +@dataclasses.dataclass +class DatasetConfig: + dataset: str + dataset_path: Path + height: int + width: int diff --git a/src/opendr/perception/continual_slam/configs/config_parser.py b/src/opendr/perception/continual_slam/configs/config_parser.py new file mode 100644 index 0000000000..1a5a29fd7c --- /dev/null +++ b/src/opendr/perception/continual_slam/configs/config_parser.py @@ -0,0 +1,116 @@ +# Copyright 2020-2023 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses +import sys +from os import PathLike +from pathlib import Path +from typing import List, Union, get_args, get_origin + +import yaml + +from opendr.perception.continual_slam.datasets import Config as Dataset +from opendr.perception.continual_slam.algorithm.parsing.config import Config as DepthPosePrediction +from opendr.perception.continual_slam.algorithm.loop_closure.config import LoopClosureDetection + + +class ConfigParser(): + def __init__(self, config_file: Union[str, PathLike, Path]) -> None: + self.filename = Path(config_file) + + self.config_dict = {} + self.dataset = None + self.depth_pose = None + self.loop_closure = None + + self.parse() + + def parse(self): + with open(self.filename, 'r', encoding='utf-8') as file: + self.config_dict = yaml.safe_load(file) + + # Read lists as tuples + for config_type in self.config_dict.values(): + for key, value in config_type.items(): + if isinstance(value, List): + config_type[key] = tuple(value) + + # Correct wrongly parsed data types + for config_type_key, config_type in self.config_dict.items(): + config_type_class = getattr(sys.modules[__name__], config_type_key) + for field in dataclasses.fields(config_type_class): + if field.name == 'config_file': + continue + value = config_type[field.name] + if value is not None: + expected_type = field.type + if get_origin(field.type) is not None: + expected_type = get_origin(field.type) + if expected_type is Union: + expected_type = [] + for tp in get_args(field.type): + if get_origin(tp) is not None: + expected_type.append(get_origin(tp)) + else: + expected_type.append(tp) + else: + expected_type = [expected_type] + + # Remove the NoneType before attempting conversions + expected_type = [tp for tp in expected_type if tp is not type(None)] # noqa: E721 + + if not any(isinstance(value, tp) for tp in expected_type): + if len(expected_type) == 1: + print(f'[CONFIG] Converting {field.name} from {type(value).__name__} ' + f'to {expected_type[0].__name__}.') + config_type[field.name] = expected_type[0](value) + else: + assert False, 'Found an unknown issue!' + elif get_origin(field.type) is Union and type(None) in get_args(field.type): + # Is optional + print(f'[CONFIG] Setting {field.name} to None.') + elif get_origin(field.type) is Union: + # Is required + raise ValueError(f'[CONFIG] Required parameter missing: {field.name}') + else: + assert False, 'Found an unknown issue!' + + # Add the path to the config file to all Configurations + for config_type in self.config_dict.values(): + config_type['config_file'] = self.filename + + # Convert paths to absolute paths + for config_type in self.config_dict.values(): + for key, value in config_type.items(): + if isinstance(value, Path): + config_type[key] = value.absolute() + + # Read the sections + if 'Dataset' in self.config_dict: + self.dataset = Dataset(**self.config_dict['Dataset']) + if 'DepthPosePrediction' in self.config_dict: + self.depth_pose = DepthPosePrediction(**self.config_dict['DepthPosePrediction']) + if 'LoopClosureDetection' in self.config_dict: + self.loop_closure = LoopClosureDetection(**self.config_dict['LoopClosureDetection']) + + def __str__(self): + string = '' + for config_type_name, config_type in self.config_dict.items(): + string += f'----- {config_type_name} --- START -----\n' + for name, value in config_type.items(): + if name != 'config_file': + string += f'{name:25} : {value}\n' + string += f'----- {config_type_name} --- END -------\n' + string = string[:-1] + return string diff --git a/src/opendr/perception/continual_slam/configs/singlegpu_kitti.yaml b/src/opendr/perception/continual_slam/configs/singlegpu_kitti.yaml new file mode 100644 index 0000000000..cbfd64fc15 --- /dev/null +++ b/src/opendr/perception/continual_slam/configs/singlegpu_kitti.yaml @@ -0,0 +1,51 @@ +# Copyright 2020-2023 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Dataset: + dataset: Kitti + dataset_path: /home/canakcia/Desktop/kitti_dset + scales: [ 0, 1, 2, 3 ] # Provided by dataloader + height: 192 + width: 640 + frame_ids: [ 0, -1, 1 ] + +DepthPosePrediction: + train_set: [ train ] + val_set: val + resnet: 18 + resnet_pretrained: true + scales: [ 0, 1, 2, 3 ] # Network size + learning_rate: 1e-4 + scheduler_step_size: 15 + batch_size: 4 + num_workers: 12 + num_epochs: 25 + min_depth: .1 + max_depth: + disparity_smoothness: .001 + velocity_loss_scaling: .05 + mask_dynamic: False + log_path: ./log/kitti + save_frequency: 5 + save_val_depth: true + save_val_depth_batches: 1 + multiple_gpus: true + gpu_ids: [ 0 ] + load_weights_folder: /home/canakcia/Downloads/weights_025 + use_wandb: false + +LoopClosureDetection: + detection_threshold: .999 + id_threshold: 100 + num_matches: 1 \ No newline at end of file diff --git a/src/opendr/perception/continual_slam/continual_slam_learner.py b/src/opendr/perception/continual_slam/continual_slam_learner.py new file mode 100644 index 0000000000..f0c0edc273 --- /dev/null +++ b/src/opendr/perception/continual_slam/continual_slam_learner.py @@ -0,0 +1,479 @@ +# Copyright 2020-2023 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from pathlib import Path +from typing import Dict, Any, Tuple, Optional, List, Union + +import torch +import cv2 +import numpy as np +import matplotlib as mpl +from matplotlib import pyplot as plt +from torch import Tensor +from numpy.typing import ArrayLike +from tqdm import tqdm +import urllib +from zipfile import ZipFile + +from opendr.engine.data import Image +from opendr.engine.data import PointCloud +from opendr.engine.learners import Learner +from opendr.engine.constants import OPENDR_SERVER_URL + +from opendr.perception.continual_slam.algorithm.depth_pose_module import DepthPoseModule +from opendr.perception.continual_slam.configs.config_parser import ConfigParser +try: + from opendr.perception.continual_slam.algorithm.loop_closure.pose_graph_optimization import PoseGraphOptimization + from opendr.perception.continual_slam.algorithm.loop_closure.loop_closure_detection import LoopClosureDetection +except ImportError: + print("Loop closure not available. Please install the optional dependencies.") +from opendr.perception.continual_slam.algorithm.loop_closure.buffer import Buffer +from opendr.perception.continual_slam.algorithm.depth_pose_module.networks.layers import BackProjectDepth + + +class ContinualSLAMLearner(Learner): + """ + This class implements the CL-SLAM algorithm, which is a continual learning approach for SLAM. + """ + def __init__(self, + config_file: Union[str, Path], + mode: str = 'predictor', + ros: bool = False, + do_loop_closure: bool = False, + key_frame_freq: int = 5, + lc_distance_poses: int = 150, + ): + """ + :param config_file: path to the config file + :type config_file: Path + :param mode: mode of the learner, either predictor or learner. Either 'predictor' or 'learner' + :type mode: str + :param ros: whether the learner is used in ROS environment + :type ros: bool + """ + self.config_file = ConfigParser(config_file) + self.dataset_config = self.config_file.dataset + self.height = self.dataset_config.height + self.width = self.dataset_config.width + self.model_config = self.config_file.depth_pose + self.is_ros = ros + self.step = 0 + self.key_frame_freq = key_frame_freq + self.do_loop_closure = do_loop_closure + self.lc_distance_poses = lc_distance_poses + self.since_last_lc = lc_distance_poses + self.online_dataset = Buffer() + self.pose_graph = PoseGraphOptimization() + self.loop_closure_detection = LoopClosureDetection(self.config_file.loop_closure) + self.project_depth = BackProjectDepth(1, self.height, self.width) + super(ContinualSLAMLearner, self).__init__(lr=self.model_config.learning_rate) + + self.mode = mode + + if self.mode == 'predictor': + self.predictor = DepthPoseModule(self.model_config, self.dataset_config, use_online=False, mode=mode) + self.predictor.load_model(load_optimizer=True) + elif self.mode == 'learner': + self.learner = DepthPoseModule(self.model_config, self.dataset_config, use_online=False, mode=mode) + self.learner.load_model(load_optimizer=True) + depth_pose_config = self.config_file.depth_pose + for g in self.learner.optimizer.param_groups: + g['lr'] = depth_pose_config.learning_rate + else: + raise ValueError('Mode should be either predictor or learner') + + def fit(self, + batch: List, + return_losses: bool = False, + replay_buffer: bool = False, + learner: bool = False): + """ + In the context of CL-SLAM, we implemented fit method as adapt method, which updates the weights of learner + based on coming input. Only works in learner mode. + :param batch: tuple of (input, target) + :type batch: Tuple[Dict, None] + :param return_losses: whether to return the losses + :type return_losses: bool + :return: tuple of (prediction, loss) + :rtype: Tuple[Dict[Tensor, Any], Optional[Dict[Tensor, Any]]] + """ + if self.mode == 'learner': + return self._fit(batch, return_losses, replay_buffer, learner=learner) + else: + raise ValueError('Fit is only available in learner mode') + + def infer(self, + batch: Tuple[Dict, None], + return_losses: bool = False, + *args, + **kwargs) -> Tuple[Dict[Tensor, Any], Optional[Dict[Tensor, Any]]]: + """ + We implemented infer method as predict method which predicts the output based on coming input. Only works in + predictor mode. + :param batch: tuple of (input, target) + :type batch: Tuple[Dict, None] + :param return_losses: whether to return the losses + :type return_losses: bool + :return: tuple of (prediction, loss) + :rtype: Tuple[Dict[Tensor, Any], Optional[Dict[Tensor, Any]]] + """ + if self.mode == 'predictor': + return self._predict(batch, return_losses, *args, **kwargs) + else: + raise ValueError('Inference is only available in predictor mode') + + def save(self, location: str = None) -> str: + """ + Save the model weights as an binary-encoded string for ros message + """ + save_dict = {} + if location is None: + location = Path.cwd() / 'temp_weights' + else: + location = Path(location) + if not location.exists(): + location.mkdir(parents=True, exist_ok=True) + for model_name, model in self.learner.models.items(): + if model is None: + continue + if isinstance(model, torch.nn.DataParallel): + model = model.module + to_save = model.state_dict() + if 'encoder' in model_name and not self.is_ros: + to_save['height'] = Tensor(self.height) + to_save['width'] = Tensor(self.width) + if not self.is_ros: + save_path = location / f'{model_name}.pth' + torch.save(to_save, save_path) + if self.is_ros: + save_dict[model_name] = to_save + + return str(location) + + def load(self, weights_folder: str = None, load_optimizer: bool = False) -> bool: + """ + Load the model weights from an binary-encoded string for ros message + """ + try: + self.predictor.load_model(weights_folder=weights_folder, load_optimizer=load_optimizer) + return True + except Exception as e: + print(str(e)) + return False + + def visualize_3d(self, image: Image, depth_map: ArrayLike, threshold=40): + """ + Visualize the 3D point cloud + :param image: RGB image + :type image: Image + :param depth_map: depth map + :type depth_map: ArrayLike + :param threshold: threshold for the depth map + :type threshold: float + """ + # Convert to numpy + image = image.numpy() + + # Get camera matrix + camera_matrix = self.predictor.camera_matrix + _, inv_camera_matrix = self.predictor._scale_camera_matrix(camera_matrix, 0) + inv_camera_matrix = torch.Tensor(inv_camera_matrix).unsqueeze(0) + depth_map = torch.Tensor(depth_map).unsqueeze(0).unsqueeze(0) + pcl = self.project_depth(depth_map, inv_camera_matrix).squeeze(0).numpy() + + x = pcl[0, :] + y = pcl[1, :] + z = pcl[2, :] + + colors = image.reshape(3, -1).T + + # Filter out points with depth greater than threshold + length = np.linalg.norm(np.c_[x, y, z], axis=1) + mask = length < threshold + x = x[mask] + y = y[mask] + z = z[mask] + colors = colors[mask] + + points = np.c_[x, y, z, colors] + return PointCloud(points) + + @staticmethod + def download(path: str, mode: str = 'model', trained_on: str = 'cityscapes', prepare_data: bool = False): + """ + Download data from the OpenDR server. + Valid modes include pre-trained model weights and data used in the unit tests. + + Currently, the following pre-trained models are available: + - SemanticKITTI visual odometry + + :param path: path to the directory where the data should be downloaded + :type path: str + :param mode: mode to use. Valid options are ['model', 'test_data'] + :type mode: str + :param trained_on: dataset on which the model was trained. Valid options are ['cityscapes'] + :type trained_on: str + :param prepare_data: whether to prepare the data for the unit tests + :type prepare_data: bool + + :return: path to the downloaded data + :rtype: str + """ + if mode == "model": + models = { + "cityscapes": + f"{OPENDR_SERVER_URL}perception/continual_slam/models/cityscapes_masks_pretrain.zip" + } + if trained_on not in models.keys(): + raise ValueError(f"Could not find model weights pre-trained on {trained_on}. " + f"Valid options are {list(models.keys())}") + url = models[trained_on] + elif mode == "test_data": + url = f"{OPENDR_SERVER_URL}perception/continual_slam/test_data.zip" + else: + raise ValueError("Invalid mode. Valid options are ['model', 'test_data']") + + if not isinstance(path, Path): + path = Path(path) + filename = path + path.mkdir(parents=True, exist_ok=True) + + def pbar_hook(prog_bar: tqdm): + prev_b = [0] + + def update_to(b=1, bsize=1, total=None): + if total is not None: + prog_bar.total = total + prog_bar.update((b - prev_b[0]) * bsize) + prev_b[0] = b + + return update_to + + if os.path.exists(filename) and os.path.isfile(filename): + print(f'File already downloaded: {filename}') + else: + filename = path / url.split('/')[-1] + with tqdm(unit="B", unit_scale=True, unit_divisor=1024, miniters=1, + desc=f"Downloading {filename}") as pbar: + urllib.request.urlretrieve(url, filename, pbar_hook(pbar)) + print(f"Extracting {filename}") + if mode == "model": + path = path / "models" + try: + with ZipFile(filename, 'r') as zipObj: + zipObj.extractall(path) + os.remove(filename) + except: + print(f"Could not extract {filename} to {path}. Please extract it manually.") + print("The data might have been already extracted an is available in the test_data folder.") + if mode == "test_data": + path = path / "test_data" / "infer_data" + return path + + def eval(self, dataset, *args, **kwargs): + raise NotImplementedError + + def optimize(self, target_device): + raise NotImplementedError + + def reset(self): + raise NotImplementedError + + # Private methods ================================================================================ + def _fit(self, + batch: List, + return_losses: bool = False, + replay_buffer: bool = False, + learner: bool = False): + """ + :param batch: list of tuples of (input, target) + :type batch: List[Tuple[Dict, None]] + :param return_losses: whether to return the losses + :type return_losses: bool + :param replay_buffer: whether to use replay buffer + :type replay_buffer: bool + :param learner: whether to use learner + :type learner: bool + """ + batch_size = len(batch) + input_dict = self._input_formatter(batch, replay_buffer, learner=learner, height=self.height, width=self.width) + # Adapt + try: + if return_losses: + self.learner.adapt(input_dict, steps=5, return_loss=return_losses, batch_size=batch_size) + else: + self.learner.adapt(input_dict, steps=5, return_loss=return_losses, batch_size=batch_size) + return True + except: + return False + + def _predict(self, + batch: Tuple[Dict, None], + return_losses: bool = False, + pointcloud: bool = False + ) -> Tuple[Dict[Tensor, Any], Optional[Dict[Tensor, Any]]]: + """ + :param batch: tuple of (input, target) + :type batch: Tuple[Dict, None] + :param return_losses: whether to return the losses + :type return_losses: bool + :param pointcloud: whether to return the depthmap for pointcloud format + :type pointcloud: bool + :return: tuple of (prediction, loss) + :rtype: Tuple[Dict[Tensor, Any], Optional[Dict[Tensor, Any]]] + """ + input_dict = self._input_formatter(batch) + self.step += 1 + # Get the prediction + prediction, losses = self.predictor.predict(input_dict, return_loss=return_losses) + # Convert the prediction to opendr format + depth, odometry = self._output_formatter(prediction, pointcloud=pointcloud) + if not self.do_loop_closure: + return (depth, odometry), losses + else: + if self.step == 1 or len(self.pose_graph.vertex_ids) == 0: + self.pose_graph.add_vertex(0, np.eye(4), fixed=True) + elif self.step > 1: + odom_pose = self.pose_graph.get_pose(self.pose_graph.vertex_ids[-1]) @ odometry + self.pose_graph.add_vertex(self.step, odom_pose) + cov_matrix = np.eye(6) + cov_matrix[2, 2] = .1 + cov_matrix[5, 5] = .1 + self.pose_graph.add_edge((self.pose_graph.vertex_ids[-2], self.step), + odometry, + information=np.linalg.inv(cov_matrix)) + optimized = False + lc = False + image = input_dict[(0, 'image')].squeeze()/255.0 + if self.step % self.key_frame_freq == 0: + features = self.loop_closure_detection.add(self.step, image) + self.online_dataset.add(self.step, image) + if self.since_last_lc > self.lc_distance_poses: + lc_step_ids, distances = self.loop_closure_detection.search(self.step, features) + if len(lc_step_ids) > 0: + print(f'Loop closure detected at step {self.step}, candidates: {lc_step_ids}') + lc = True + for i, d in zip(lc_step_ids, distances): + lc_image = self.online_dataset.get(i) + lc_transformation, cov_matrix = self.predictor.predict_pose(image, + lc_image, + as_numpy=True) + cov_matrix = np.eye(6) + cov_matrix[2, 2] = .1 + cov_matrix[5, 5] = .1 + self.pose_graph.add_edge((self.step, int(i)), + lc_transformation, + information=.5 * np.linalg.inv(cov_matrix), + is_loop_closure=True) + if len(lc_step_ids) > 0: + self.pose_graph.optimize(max_iterations=100000, verbose=False) + optimized = True + if optimized: + self.since_last_lc = 0 + else: + self.since_last_lc += 1 + return depth, np.expand_dims(odometry, axis=0), losses, lc, self.pose_graph + + @staticmethod + def _input_formatter(batch: Tuple[Dict, None], + replay_buffer: bool = False, + learner: bool = False, + height: int = None, + width: int = None,)-> Union[List, Dict[Any, Tensor]]: + """ + Format the input for the prediction + :param batch: tuple of (input, target) + :type batch: Tuple[Dict, None] + :return: Either a list of input dicts or a single dictionary of input tensors + :rtype: Union[List, Dict[Any, Tensor]] + """ + if learner: + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + if not (height and width): + raise ValueError('Height and width must be provided for learner') + + input_dict = {(-1, 'image'): torch.zeros(size=(len(batch), 3, height, width), device=device), + (-1, 'distance'): torch.zeros(size=(len(batch), 1), device=device), + (0, 'image'): torch.zeros(size=(len(batch), 3, height, width), device=device), + (0, 'distance'): torch.zeros(size=(len(batch), 1), device=device), + (1, 'image'): torch.zeros(size=(len(batch), 3, height, width), device=device), + (1, 'distance'): torch.zeros(size=(len(batch), 1), device=device), + } + for i, input in enumerate(batch): + for frame_id in ([-1, 0, 1]): + image = input[(frame_id, 'image')] + distance = input[(frame_id, 'distance')] + if image.is_cuda and not torch.cuda.is_available(): + image = image.cpu() + distance = distance.cpu() + if not image.is_cuda and torch.cuda.is_available(): + image = image.cuda() + distance = distance.cuda() + input_dict[(frame_id, 'image')][i] = image + input_dict[(frame_id, 'distance')][i] = distance + return input_dict + + if isinstance(batch, dict): + inputs = batch + else: + inputs = batch[0] + # Create a dictionary with frame ids as [-1, 0, 1] + input_dict = {} + if replay_buffer: + return inputs + for frame_id, id in zip([-1, 0, 1], inputs.keys()): + input_dict[(frame_id, 'image')] = torch.Tensor(inputs[id][0].data) + input_dict[(frame_id, 'distance')] = torch.Tensor([inputs[id][1]]) + return input_dict + + def _output_formatter(self, prediction: Dict, pointcloud: bool = False) -> Tuple[ArrayLike, ArrayLike]: + """ + Format the output of the prediction + :param prediction: dictionary of predictions which has items of: + frame_id -> (depth, pose) + depth -> Tensors of shape (1, 1, H, W). The number of tensors is equal to scales + pose -> 6 Tensors, which we will only use cam_T_cam for 0->1 since it is the odometry + """ + # Convert the prediction to opendr format + for item in prediction: + if item[0] == 'disp': + if item[2] == 0: + disp = self._colorize_depth(prediction[item].squeeze().cpu().detach().numpy()) + if item[0] == 'cam_T_cam': + if item[2] == 1: + odometry = prediction[item][0, :].cpu().detach().numpy() + if item[0] == 'depth' and pointcloud: + if item[2] == 0: + depth = prediction[item].squeeze().cpu().detach().numpy() + if pointcloud: + return (disp, depth), odometry + return disp, odometry + + def _colorize_depth(self, depth): + """ + Colorize the depth map + """ + vmax = np.percentile(depth, 95) + normalizer = mpl.colors.Normalize(vmin=depth.min(), vmax=vmax) + mapper = plt.cm.ScalarMappable(norm=normalizer, cmap="magma") + colormapped_img = (mapper.to_rgba(depth.squeeze())[:, :, :3]*255).astype(np.uint8) + colormapped_img = cv2.cvtColor(colormapped_img, cv2.COLOR_RGB2BGR) + return Image(colormapped_img) + + def _reset(self): + self.step = 0 + self.online_dataset = Buffer() + self.pose_graph = PoseGraphOptimization() + self.loop_closure_detection = LoopClosureDetection(self.config_file.loop_closure) diff --git a/src/opendr/perception/continual_slam/continual_slam_pcl.png b/src/opendr/perception/continual_slam/continual_slam_pcl.png new file mode 100644 index 0000000000..eadc14d5c3 Binary files /dev/null and b/src/opendr/perception/continual_slam/continual_slam_pcl.png differ diff --git a/src/opendr/perception/continual_slam/datasets/README.md b/src/opendr/perception/continual_slam/datasets/README.md new file mode 100644 index 0000000000..85d78aecb8 --- /dev/null +++ b/src/opendr/perception/continual_slam/datasets/README.md @@ -0,0 +1,13 @@ +# KITTI Odometry Data +1. Download the KITTI Odometry data from http://www.cvlibs.net/datasets/kitti/eval_odometry.php: +- `odometry data set (color, 65 GB)` +- `odometry ground truth poses (4 MB)` + +2. Download the KITTI raw data from http://www.cvlibs.net/datasets/kitti/raw_data.php for the runs specified in [`datasets/kitti.py`](datasets/kitti.py) (search for `KITTI_RAW_SEQ_MAPPING`). - `[synced+rectified data]` + +3. Extract it into RAW_PATH folder and run the following line + +``` +python src/opendr/perception/continual_slam/datasets/kitti.py --oxts +``` + diff --git a/src/opendr/perception/continual_slam/datasets/__init__.py b/src/opendr/perception/continual_slam/datasets/__init__.py new file mode 100644 index 0000000000..1ea400a947 --- /dev/null +++ b/src/opendr/perception/continual_slam/datasets/__init__.py @@ -0,0 +1,4 @@ +from opendr.perception.continual_slam.datasets.config import Dataset as Config +from opendr.perception.continual_slam.datasets.kitti import KittiDataset + +__all__ = ['Config', 'KittiDataset'] diff --git a/src/opendr/perception/continual_slam/datasets/config.py b/src/opendr/perception/continual_slam/datasets/config.py new file mode 100644 index 0000000000..51194feff9 --- /dev/null +++ b/src/opendr/perception/continual_slam/datasets/config.py @@ -0,0 +1,29 @@ +# flake8: noqa +# Copyright 2020-2023 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses +from pathlib import Path +from typing import Optional, Tuple + + +@dataclasses.dataclass +class Dataset: + dataset: str + config_file: Path + dataset_path: Optional[Path] + scales: Optional[Tuple[int, ...]] + height: Optional[int] + width: Optional[int] + frame_ids: Tuple[int, ...] diff --git a/src/opendr/perception/continual_slam/datasets/kitti.py b/src/opendr/perception/continual_slam/datasets/kitti.py new file mode 100644 index 0000000000..8b9ef5e398 --- /dev/null +++ b/src/opendr/perception/continual_slam/datasets/kitti.py @@ -0,0 +1,281 @@ +# Copyright 2020-2023 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import os +import re +from datetime import datetime +from tqdm import tqdm +from shutil import copyfile +import argparse +from os import PathLike +from pathlib import Path +from typing import Tuple, Union, Any, List + +from opendr.engine.data import Image +from opendr.engine.datasets import ExternalDataset, DatasetIterator +from opendr.perception.continual_slam.configs.config_parser import ConfigParser + +from torchvision.transforms import Resize, InterpolationMode +from PIL import Image as PILImage + + +class KittiDataset(ExternalDataset, DatasetIterator): + + def __init__(self, path: Union[str, Path, PathLike], config_file_path: str): + super().__init__(path, dataset_type='kitti') + config = ConfigParser(config_file_path).dataset + if not Path(path).exists(): + print(f'Given path {path} does not exist. Using path from config file...') + path = config.dataset_path + self._path = Path(os.path.join(path, 'sequences')) + + # ======================================================== + self.frame_ids = config.frame_ids + self.height = config.height + self.width = config.width + self.scales = config.scales + self.valid_sequences = ['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10'] + self.sequences = os.listdir(self._path) + if self.sequences is None: + raise FileNotFoundError(f'No sequences found in {self._path}') + # ======================================================== + + self._images = {} + self._velocities = {} + self._timestamps = {} + for sequence in self.sequences: + # Check if the sequence is valid + if sequence not in self.valid_sequences: + print(f'Sequence {sequence} is not valid. Skipping...') + continue + self._image_folder = self._path / sequence / 'image_2' + self._velocity_folder = self._path / sequence / 'oxts/data' + self._timestamps_file = self._path / sequence / 'oxts/timestamps.txt' + + self._images[sequence] = sorted(self._image_folder.glob(f'*.png')) + self._velocities[sequence] = sorted(self._velocity_folder.glob(f'*.txt')) + self._timestamps[sequence] = self._create_timestamps(timestamps=self._timestamps_file) + + self.camera_matrix = np.array( + [[0.58, 0, 0.5, 0], [0, 1.92, 0.5, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=np.float32) + + # Now we simply put all sequences available on into a single list + self._create_lists_from_sequences() + + self.sequence_check = None + + @staticmethod + def prepare_dataset(raw_dataset_path: str = None, + odometry_dataset_path: str = None, + oxts: bool = True, + gt_depth: bool = False) -> None: + """ + This method is used for preparing the dataset that is downloaded from the KITTI website. + :param raw_dataset_path: path to the downloaded dataset + :type raw_dataset_path: str + :param odometry_dataset_path: path to the extracted dataset + :type odometry_dataset_path: str + :param oxts: whether to copy the oxts files + :type oxts: bool + :param gt_depth: whether to copy the ground truth depth files + :type gt_depth: bool + """ + + # yapf: disable + # Mapping between KITTI Raw Drives and KITTI Odometry Sequences + KITTI_RAW_SEQ_MAPPING = { + 0: {'date': '2011_10_03', 'drive': 27, 'start_frame': 0, 'end_frame': 4540}, + 1: {'date': '2011_10_03', 'drive': 42, 'start_frame': 0, 'end_frame': 1100}, + 2: {'date': '2011_10_03', 'drive': 34, 'start_frame': 0, 'end_frame': 4660}, + # 3: {'date': '2011_09_26', 'drive': 67, 'start_frame': 0, 'end_frame': 800}, # No IMU + 4: {'date': '2011_09_30', 'drive': 16, 'start_frame': 0, 'end_frame': 270}, + 5: {'date': '2011_09_30', 'drive': 18, 'start_frame': 0, 'end_frame': 2760}, + 6: {'date': '2011_09_30', 'drive': 20, 'start_frame': 0, 'end_frame': 1100}, + 7: {'date': '2011_09_30', 'drive': 27, 'start_frame': 0, 'end_frame': 1100}, + 8: {'date': '2011_09_30', 'drive': 28, 'start_frame': 1100, 'end_frame': 5170}, + 9: {'date': '2011_09_30', 'drive': 33, 'start_frame': 0, 'end_frame': 1590}, + 10: {'date': '2011_09_30', 'drive': 34, 'start_frame': 0, 'end_frame': 1200}, + } + # yapf: enable + + total_frames = 0 + for mapping in KITTI_RAW_SEQ_MAPPING.values(): + total_frames += (mapping['end_frame'] - mapping['start_frame'] + 1) + + if gt_depth: + with tqdm(desc='Copying depth files', total=total_frames * 2, unit='files') as pbar: + for sequence, mapping in KITTI_RAW_SEQ_MAPPING.items(): + # This is the improved gt depth using 5 consecutive frames from + # "Sparsity invariant CNNs", J. Uhrig et al., 3DV, 2017. + odometry_sequence_path = odometry_dataset_path / f'{sequence:02}' / 'gt_depth' + split = 'val' if sequence == 4 else 'train' + raw_sequence_path = raw_dataset_path / split / \ + f'{mapping["date"]}_drive_{mapping["drive"]:04}_sync' / \ + 'proj_depth' / 'groundtruth' + if not raw_sequence_path.exists(): + continue + for image in ['image_02', 'image_03']: + image_raw_sequence_path = raw_sequence_path / image + (odometry_sequence_path / image).mkdir(exist_ok=True, parents=True) + raw_filenames = sorted(image_raw_sequence_path.glob('*')) + for raw_filename in raw_filenames: + odometry_filename = odometry_sequence_path / image / raw_filename.name + frame = int(raw_filename.stem) + if mapping['start_frame'] <= frame <= mapping['end_frame']: + copyfile(raw_filename, odometry_filename) + pbar.update(1) + pbar.set_postfix({'sequence': sequence}) + + if oxts: + with tqdm(desc='Copying OXTS files', total=total_frames, unit='files') as pbar: + for sequence, mapping in KITTI_RAW_SEQ_MAPPING.items(): + odometry_sequence_path = odometry_dataset_path / f'{sequence:02}' / 'oxts' + raw_sequence_path = raw_dataset_path / \ + f'{mapping["date"]}' / \ + f'{mapping["date"]}_drive_{mapping["drive"]:04}_sync' / \ + 'oxts' + if not raw_sequence_path.exists(): + continue + odometry_sequence_path.mkdir(exist_ok=True, parents=True) + copyfile(raw_sequence_path / 'dataformat.txt', + odometry_sequence_path / 'dataformat.txt') + with open(raw_sequence_path / 'timestamps.txt', 'r', encoding='utf-8') as f: + timestamps = f.readlines()[mapping['start_frame']:mapping['end_frame'] + 1] + with open(odometry_sequence_path / 'timestamps.txt', 'w', encoding='utf-8') as f: + f.writelines(timestamps) + + for image in ['data']: + image_raw_sequence_path = raw_sequence_path / image + (odometry_sequence_path / image).mkdir(exist_ok=True, parents=True) + raw_filenames = sorted(image_raw_sequence_path.glob('*')) + for raw_filename in raw_filenames: + odometry_filename = odometry_sequence_path / image / raw_filename.name + frame = int(raw_filename.stem) + if mapping['start_frame'] <= frame <= mapping['end_frame']: + copyfile(raw_filename, odometry_filename) + pbar.update(1) + pbar.set_postfix({'sequence': sequence}) + + def _create_timestamps(self, timestamps) -> List[int]: + """ + This method is used for creating a list of timestamps from the timestamps file. + + :return: a list of timestamps + :rtype: List of float + """ + timestamps = [] + with open(self._timestamps_file, 'r', encoding='utf-8') as f: + string_timestamps = f.read().splitlines() + # Convert relative to timestamp of initial frame and output in seconds + # Discard nanoseconds as they are not supported by datetime + for timestamp in string_timestamps: + timestamps.append( + (datetime.strptime(timestamp[:-3], '%Y-%m-%d %H:%M:%S.%f') - + datetime.strptime(string_timestamps[0][:-3], '%Y-%m-%d %H:%M:%S.%f')).total_seconds()) + return timestamps + + def _load_relative_distance(self, index: int) -> float: + """ + Distance in meters and with respect to the previous frame. + """ + previous_timestamp = self.timestamps[index - 1] + current_timestamp = self.timestamps[index] + delta_timestamp = current_timestamp - previous_timestamp # s + # Compute speed as norm of forward, leftward, and upward elements + previous_speed = np.linalg.norm(np.loadtxt(str(self.velocities[index - 1]))[8:11]) + current_speed = np.linalg.norm(np.loadtxt(str(self.velocities[index]))[8:11]) + speed = (previous_speed + current_speed) / 2 # m/s + distance = speed * delta_timestamp + return distance + + def _create_lists_from_sequences(self): + """ + This method is used for creating a single list of images, velocities and timestamps from the sequences. + """ + + self.images = [] + self.velocities = [] + self.timestamps = [] + + for sequence in self.sequences: + # Check if the sequence is valid + if sequence not in self.valid_sequences: + # print(f'Sequence {sequence} is not valid. Skipping...') + continue + self.images.extend(self._images[sequence]) + self.velocities.extend(self._velocities[sequence]) + self.timestamps.extend(self._timestamps[sequence]) + + def __getitem__(self, idx: int) -> Tuple[Any, None]: + """ + This method is used for getting the item at the given index. + + :param idx: index of the item + :type idx: int + :return: the item at the given index + :rtype: Tuple[Any, None] + """ + data = {} + flag = False + counter = 0 + for i in range(idx, idx+3): + image = PILImage.open(str(self.images[i])) + image = Resize((self.height, self.width), interpolation=InterpolationMode.LANCZOS)(image) + image = Image(image).opencv().transpose(2, 0, 1) + image = Image(image) + distance = self._load_relative_distance(i) + image_id = self.images[i].name.split('.')[0] + sequence_id = re.findall("sequences/\d\d", str(self.images[i]))[0].split('/')[1] + if self.sequence_check is None: + self.sequence_check = sequence_id + else: + if self.sequence_check != sequence_id: + # This means that now we have a new sequence and we need to wait two iterations + # to get the first image of the new sequence + flag = True + if flag: + counter += 1 + if counter < 2: + continue + else: + self.sequence_check = sequence_id + flag = False + counter = 0 + image_id = sequence_id + '_' + image_id + data[image_id] = (image, distance) + return data, None + + def __len__(self): + """ + This method is used for getting the length of the dataset. + + :return: the length of the dataset + :rtype: int + """ + return len(self.images) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('raw_path', type=str) + parser.add_argument('odom_path', type=str) + group = parser.add_mutually_exclusive_group() + group.add_argument('--oxts', action='store_true') + group.add_argument('--depth', action='store_true') + args = parser.parse_args() + + KittiDataset.prepare_dataset(Path(args.raw_path), + Path(args.odom_path), + oxts=args.oxts, + gt_depth=args.depth) diff --git a/src/opendr/perception/continual_slam/dependencies.ini b/src/opendr/perception/continual_slam/dependencies.ini new file mode 100644 index 0000000000..7dbbbfeb78 --- /dev/null +++ b/src/opendr/perception/continual_slam/dependencies.ini @@ -0,0 +1,25 @@ +[runtime] +python= + torch>=1.9.0 + torchvision>=0.10.0 + setuptools>=58.2.0 + numpy + Pillow + tqdm + matplotlib + pyyaml + opencv-python + colour_demosaicing + scipy + sklearn + scikit-image + opencv-python + ${OPENDR_HOME}/src/opendr/perception/continual_slam/algorithm/g2o +opendr=opendr-toolkit-engine +linux=libsuitesparse-dev + cmake + libeigen3-dev + +[device] +# These dependencies require a GPU. If no GPU is available, the package will not be installed. +opendr_device=gpu diff --git a/tests/sources/tools/perception/continual_slam/__init__.py b/tests/sources/tools/perception/continual_slam/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/sources/tools/perception/continual_slam/test_continual_slam.py b/tests/sources/tools/perception/continual_slam/test_continual_slam.py new file mode 100644 index 0000000000..9afc9e61b9 --- /dev/null +++ b/tests/sources/tools/perception/continual_slam/test_continual_slam.py @@ -0,0 +1,153 @@ +# Copyright 2020-2023 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import shutil +import unittest +import warnings +import yaml +from pathlib import Path +import numpy as np + +from opendr.perception.continual_slam.continual_slam_learner import ContinualSLAMLearner +from opendr.perception.continual_slam.datasets.kitti import KittiDataset +from opendr.perception.continual_slam.algorithm.depth_pose_module.replay_buffer import ReplayBuffer +from opendr.engine.data import Image + + +def rmfile(path): + try: + os.remove(path) + except OSError as e: + print(f'Error: {e.filename} - {e.strerror}.') + + +def rmdir(_dir): + try: + shutil.rmtree(_dir) + except OSError as e: + print(f'Error: {e.filename} - {e.strerror}.') + + +class TestContinualSlamLearner(unittest.TestCase): + @classmethod + def setUpClass(cls): + print("\n\n**********************\nTEST Continaul SLAM Predictor/Learner\n******************************") + cls.temp_dir = os.path.join('tests', 'sources', 'tools', 'perception', 'continual_slam', + 'continual_slam_temp') + if os.path.exists(cls.temp_dir): + rmdir(cls.temp_dir) + os.makedirs(cls.temp_dir) + + # Download all required files for testing + cls.model_weights = ContinualSLAMLearner.download(path=cls.temp_dir, trained_on="cityscapes") + cls.test_data = ContinualSLAMLearner.download(path=cls.temp_dir, mode="test_data") + + # Configuration for the weights pre-trained on SemanticKITTI + base_config_file = str(Path(sys.modules[ + ContinualSLAMLearner.__module__].__file__).parent / 'configs' / 'singlegpu_kitti.yaml') + + with open(base_config_file) as f: + try: + databaseConfig = yaml.safe_load(f) + except yaml.YAMLError as exc: + print(exc) + databaseConfig['Dataset']['dataset_path'] = str(cls.test_data) + databaseConfig['DepthPosePrediction']['load_weights_folder'] = str(cls.model_weights) + cls.config_file = os.path.join(cls.temp_dir, 'singlegpu_kitti.yaml') + with open(cls.config_file, 'w') as f: + yaml.dump(databaseConfig, f) + + @classmethod + def tearDownClass(cls): + # Clean up downloaded files + rmdir(cls.temp_dir) + + def test_init(self): + predictor = ContinualSLAMLearner(self.config_file, mode="predictor") + learner = ContinualSLAMLearner(self.config_file, mode="learner") + assert predictor.step == 0 + assert learner.step == 0 + + def test_fit(self): + warnings.simplefilter("ignore", UserWarning) + warnings.simplefilter("ignore", DeprecationWarning) + + dataset = KittiDataset(str(self.test_data), self.config_file) + learner = ContinualSLAMLearner(self.config_file, mode="learner") + predictor = ContinualSLAMLearner(self.config_file, mode="predictor") + replay_buffer = ReplayBuffer(buffer_size=5, + save_memory=True, + dataset_config_path=self.config_file, + sample_size=3) + # Test without replay buffer + for item in dataset: + replay_buffer.add(item) + item = ContinualSLAMLearner._input_formatter(item) + sample = [item] + assert learner.fit(sample, learner=True) + + # Test with replay buffer + for item in dataset: + replay_buffer.add(item) + if replay_buffer.count < 3: + continue + item = ContinualSLAMLearner._input_formatter(item) + sample = replay_buffer.sample() + sample.insert(0, item) + assert learner.fit(sample, learner=True) + try: + predictor.fit(sample, learner=True) + assert False, "Should raise NotImplementedError" + except ValueError: + pass + + def test_infer(self): + warnings.simplefilter("ignore", UserWarning) + warnings.simplefilter("ignore", DeprecationWarning) + + dataset = KittiDataset(str(self.test_data), self.config_file) + predictor = ContinualSLAMLearner(self.config_file, mode="predictor") + learner = ContinualSLAMLearner(self.config_file, mode="learner") + + # Test without loop closure + for item in dataset: + (depth, odometry), losses = predictor.infer(item, return_losses=True) + assert losses is None, "Losses should be None" + self.assertIsInstance(depth, Image) + self.assertIsInstance(odometry, np.ndarray) + try: + learner.infer(item, return_losses=True) + assert False, "Should raise NotImplementedError" + except ValueError: + pass + + def test_save(self): + learner = ContinualSLAMLearner(self.config_file, mode="learner") + temp_model_path = self.temp_dir + '/test_save_weights' + location = learner.save(temp_model_path) + self.assertTrue(temp_model_path == location) + self.assertTrue(os.path.exists(os.path.join(temp_model_path, 'depth_decoder.pth'))) + self.assertTrue(os.path.exists(os.path.join(temp_model_path, 'pose_encoder.pth'))) + self.assertTrue(os.path.exists(os.path.join(temp_model_path, 'depth_encoder.pth'))) + self.assertTrue(os.path.exists(os.path.join(temp_model_path, 'pose_decoder.pth'))) + + def test_load(self): + predictor = ContinualSLAMLearner(self.config_file, mode="predictor") + successful = predictor.load(self.model_weights) + self.assertTrue(successful) + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_clang_format.py b/tests/test_clang_format.py index 76424c2b19..4cfbe6cf03 100755 --- a/tests/test_clang_format.py +++ b/tests/test_clang_format.py @@ -60,6 +60,7 @@ def test_sources_are_clang_format_compliant(self): skippedPaths = [ 'src/opendr/perception/panoptic_segmentation/efficient_ps/algorithm/EfficientPS', 'src/opendr/perception/panoptic_segmentation/efficient_lps/algorithm/EfficientLPS', + 'src/opendr/perception/continual_slam/algorithm/g2o', 'src/opendr/planning/end_to_end_planning/ardupilot', ] skippedFiles = [ diff --git a/tests/test_license.py b/tests/test_license.py index 19c0632b46..72e1330ccf 100644 --- a/tests/test_license.py +++ b/tests/test_license.py @@ -108,6 +108,7 @@ def setUp(self): 'src/opendr/perception/object_detection_2d/nanodet/algorithm', 'src/opendr/perception/panoptic_segmentation/efficient_ps/algorithm/EfficientPS', 'src/opendr/perception/panoptic_segmentation/efficient_lps/algorithm/EfficientLPS', + 'src/opendr/perception/continual_slam/algorithm', 'src/opendr/perception/facial_expression_recognition/landmark_based_facial_expression_recognition/algorithm', 'src/opendr/perception/facial_expression_recognition/image_based_facial_emotion_estimation/algorithm', 'src/opendr/perception/gesture_recognition/algorithm', diff --git a/tests/test_pep8.py b/tests/test_pep8.py index 6ef65c59ce..a62dd37ced 100755 --- a/tests/test_pep8.py +++ b/tests/test_pep8.py @@ -33,6 +33,8 @@ 'lib', 'src/opendr/perception/panoptic_segmentation/efficient_ps/algorithm/EfficientPS', 'src/opendr/perception/panoptic_segmentation/efficient_lps/algorithm/EfficientLPS', + 'src/opendr/perception/continual_slam/algorithm', + 'src/opendr/perception/continual_slam/datasets', 'projects/python/control/eagerx', 'projects/opendr_ws_2/src/vision_opencv', 'projects/opendr_ws/devel',