add IMU-PARSAC

openxrlab · Jan 4, 2024 · 031d812 · 031d812
1 parent ed21fef
commit 031d812
Show file tree

Hide file tree

Showing 39 changed files with 2,018 additions and 175 deletions.
diff --git a/README.md b/README.md
@@ -57,7 +57,15 @@ If you use this toolbox or benchmark in your research, please cite this project.
     year={2022}
 }
 ```
-
+If you use the Robust Visual-Inertial Odometry in your research, please cite:
+```bibtex
+@article{li2023rd,
+  title={RD-VIO: Robust Visual-Inertial Odometry for Mobile Augmented Reality in Dynamic Environments},
+  author={Li, Jinyu and Pan, Xiaokun and Huang, Gan and Zhang, Ziyang and Wang, Nan and Bao, Hujun and Zhang, Guofeng},
+  journal={arXiv preprint arXiv:2310.15072},
+  year={2023}
+}
+```
 ## Contributing
 
 We appreciate all contributions to improve XRSLAM.

diff --git a/configs/euroc_slam.yaml b/configs/euroc_slam.yaml
@@ -5,14 +5,18 @@ output:
 
 sliding_window:
   size: 10
+  subframe_size: 3
   force_keyframe_landmarks: 35
 
 feature_tracker:
   min_keypoint_distance: 10.0 # [px]
-  max_keypoint_detection: 1000
+  max_keypoint_detection: 200
   max_init_frames: 60
   max_frames: 20
   predict_keypoints: true
+  clahe_clip_limit: 6.0
+  clahe_width: 8
+  clahe_height: 8
 
 initializer:
   keyframe_num: 8
@@ -26,3 +30,14 @@ initializer:
 solver:
   iteration_limit: 30
   time_limit: 1.0e6 # [s]
+
+rotation:
+  misalignment_threshold: 0.02
+  ransac_threshold: 10 # degree
+
+parsac:
+  use_flag: false
+  dynamic_probability: 0.15
+  threshold: 1.0
+  norm_scale: 1.0
+  keyframe_check_size: 1
diff --git a/docs/en/benchmark.md b/docs/en/benchmark.md
@@ -1,73 +1,53 @@
 # Benchmark
 
-We run our algorithm on EuRoC dataset on Ubuntu18.04 and macOS 10.14. And make comparisons with  VINS-Mono, which is one of the state-of-the-art VIO systems. We analyze the accuracy of the algorithm by comparing the root mean squared error (RMSE) of the absolute trajectory error (ATE). ATE is given by the simple difference between the estimated trajectory and ground truth after it has been aligned so that it has a minimal error, and RMSE is the standard deviation of the residuals (prediction errors). The lower the RMSE, the better a given trajectory is able to fit its ground truth. We use "evo" tool to evaluate and compare the trajectory output of odometry and SLAM algorithms, and for more information please refer to [euroc evaluation](./tutorials/euroc_evaluation.md). As shown in the following table, XRSLAM has higher accuracy than VINS-Mono (without loop). The average results for visual-inertial algorithms are bolded, and the following figures show the trajectory of XRSLAM on Vicon Room 1 01 sequence.
-
-<table >
-	<tr>
-	    <th rowspan="2">Sequence</th>
-        <th colspan="2">APE(m)</th>
-        <th colspan="2">ARE(deg)</th>
-	</tr >
-	<tr>
-	    <th>XRSLAM</th>
-	    <th>VINS-Mono</th>
-	    <th>XRSLAM</th>
-	    <th>VINS-Mono</th>
-	</tr >
-	<tr >
-	    <td align="center">MH_01</td>
-        <td  align="center">0.147</td> <td align="center">0.154</td> <td align="center">2.516</td> <td align="center">1.516</td>
-	</tr>
-	<tr >
-	    <td align="center">MH_02</td>
-        <td align="center">0.077</td> <td align="center">0.178</td> <td align="center">1.707</td> <td align="center">2.309</td>
-	</tr>  
-	<tr >
-	    <td align="center">MH_03</td>
-        <td align="center">0.154</td> <td align="center">0.195</td> <td align="center">1.554</td> <td align="center">1.646</td>
-	</tr>  
-	<tr >
-	    <td align="center">MH_04</td>
-        <td align="center">0.269</td> <td align="center">0.376</td> <td align="center">1.344</td> <td align="center">1.431</td>
-	</tr>
-	<tr >
-	    <td align="center">MH_05</td>
-        <td align="center">0.252</td> <td align="center">0.300</td> <td align="center">0.740</td> <td align="center">0.782</td>
-	</tr>
-	<tr >
-	    <td align="center">V1_01</td>
-        <td align="center">0.063</td> <td align="center">0.088</td> <td align="center">5.646</td> <td align="center">6.338</td>
-	</tr>
-	<tr >
-	    <td align="center">V1_02</td>
-        <td align="center">0.097</td> <td align="center">0.111</td> <td align="center">1.877</td> <td align="center">3.278</td>
-	</tr>
-	<tr >
-	    <td align="center">V1_03</td>
-        <td align="center">0.102</td> <td align="center">0.187</td> <td align="center">2.190</td> <td align="center">6.211</td>
-	</tr>
-	<tr >
-	    <td align="center">V2_01</td>
-        <td align="center">0.066</td> <td align="center">0.082</td> <td align="center">1.301</td> <td align="center">2.137</td>
-	</tr>
-	<tr >
-	    <td align="center">V2_02</td>
-        <td align="center">0.092</td> <td align="center">0.149</td> <td align="center">1.521</td> <td align="center">3.976</td>
-	</tr>
-	<tr >
-	    <td align="center">V2_03</td>
-        <td align="center">0.193</td> <td align="center">0.287</td> <td align="center">1.592</td> <td align="center">3.331</td>
-	</tr>
-	<tr >
-	    <td align="center">Average</td>
-        <th>0.137</th> <th>0.192</th> <th>1.998</th> <th>2.995</th>
-	</tr>
-
-
-
----
-
-
+We run our algorithm on EuRoC dataset on Ubuntu18.04 and macOS 10.14. And make comparisons with the state-of-the-art VIO systems. We analyze the accuracy of the algorithm by comparing the root mean squared error (RMSE) of the absolute trajectory error (ATE). ATE is given by the simple difference between the estimated trajectory and ground truth after it has been aligned so that it has a minimal error, and RMSE is the standard deviation of the residuals (prediction errors). The lower the RMSE, the better a given trajectory is able to fit its ground truth. We use "evo" tool to evaluate and compare the trajectory output of odometry and SLAM algorithms, and for more information please refer to [euroc evaluation](./tutorials/euroc_evaluation.md). We have achieved obvious better results on EuRoc and ADVIO datasets, which proves the effectiveness of our system.
+
+**SF-VIO** completely disables the dynamic object removal strategy in XRSLAM, which can be enabled in the configuration *parsac_flag*. When the flag activated, it is referred to as **RD-VIO**. As shown in the following tables, the best results for visual-inertial algorithms are bolded. Comparing with other systems, SF-VIO showed significant improvements on many sequences on EuRoC dataset. Thanks to the additional stabilization effect, the significant drifts are canceled when using  the subframe strategy in our system.
+
+As a challenging dataset in real-world settings, ADVIO offers 23 diverse scenarios, encompassing indoor and outdoor environments, varying lighting conditions, and dynamic elements such as pedestrians and vehicles. Compared to SF-VIO without dynamic object removal strategies, RD-VIO showed significantly better RMSEs on ADVIO dataset.
+
+**Tracking Accuracy (RMSE in meters) on the EuRoC Dataset.**
+| Algorithm   | MH-01 | MH-02 | MH-03 | MH-04 | MH-05 | V1-01 | V1-02 | V1-03 | V2-01 | V2-02 | V2-03 | AVG   |
+|-------------|-------|-------|-------|-------|-------|-------|-------|-------|-------|-------|-------|-------|
+| **SF-VIO**  | 0.109 | 0.147 | **0.131** | 0.189 | 0.240 | **0.056** | 0.101 | 0.134 | 0.066 | 0.089 | **0.122** | **0.125** |
+| **RD-VIO**  | **0.109** | 0.115 | 0.141 | 0.247 | 0.267 | 0.060 | 0.091 | 0.168 | 0.058 | 0.100 | 0.147 | 0.136 |
+| **LARVIO**  | 0.132 | 0.137 | 0.168 | 0.237 | 0.314 | 0.083 | **0.064** | 0.086 | 0.148 | 0.077 | 0.168 | 0.147 |
+| **Open-VINS**| 0.111| 0.287 | 0.181 | **0.182** | 0.365 | 0.059 | 0.084 | **0.075** | 0.086 | **0.074** | 0.145 | 0.150 |
+| **VI-DSO**  | 0.125 | **0.072** | 0.285 | 0.343 | **0.202** | 0.197 | 0.135 | 4.073 | 0.242 | 0.202 | 0.212 | 0.553 |
+| **OKVIS***  | 0.342 | 0.361 | 0.319 | 0.318 | 0.448 | 0.139 | 0.232 | 0.262 | 0.163 | 0.211 | 0.291 | 0.281 |
+| **MSCKF**   | 0.734 | 0.909 | 0.376 | 1.676 | 0.995 | 0.520 | 0.567 |   -   | 0.236 |   -   |   -   | 0.752 |
+| **PVIO**    | 0.129 | 0.210 | 0.162 | 0.286 | 0.341 | 0.079 | 0.093 | 0.155 | **0.054** | 0.202 | 0.290 | 0.182 |
+| **DynaVINS**| 0.308 | 0.152 | 1.789 | 2.264 |   -   |   -   | 0.365 |   -   |   -   |   -   |   -   | 0.976 |
+| **VINS-Fusion(VIO)** | 0.149 | 0.110 | 0.168 | 0.221 | 0.310 | 0.071 | 0.282 | 0.170 | 0.166 | 0.386 | 0.190 | 0.202 |
+| **ORB-SLAM3(VIO)**| 0.543 | 0.700 | 1.874 | 0.999 | 0.964 | 0.709 | 0.545 | 2.649 | 0.514 | 0.451 | 1.655 | 1.055 |
+<br>
+
+**Accuracy on the ADVIO Dataset**
+| Sequence | **SF-VIO** | **RD-VIO** | **VINS-Fusion(VIO)** | **LARVIO** |
+| :------: | :--------: | :--------: | :------------------: | :--------: |
+|    01    |   2.177    |   **1.788**    |        2.339        |   5.049    |
+|    02    | **1.679**  |   1.695    |        1.914        |   4.242    |
+|    03    |   2.913    |   2.690    |      **2.290**      |   4.295    |
+|    04    |     -      |   **2.860**    |        3.350        |     -      |
+|    05    |   1.385    |   1.263    |      **0.938**      |   2.034    |
+|    06    |   2.837    |   **2.497**    |       11.005        |   8.201    |
+|    07    |   0.559    |   **0.548**    |        0.912        |   2.369    |
+|    08    |   2.075    |   2.151    |      **1.136**      |   2.078    |
+|    09    | **0.332**  |   2.281    |        1.063        |   3.168    |
+|    10    |   1.997    |   2.128    |      **1.847**      |   4.742    |
+|    11    |   4.103    | **3.986**  |       18.760        |   5.298    |
+|    12    |   2.084    |   1.951    |          -           | **1.191**  |
+|    13    |   3.227    |   2.899    |          -           | **1.324**  |
+|    14    | **1.524**  |   1.532    |          -           |     -      |
+|    15    | **0.779**  |   0.780    |        0.944        |   0.851    |
+|    16    | **0.986**  |   0.991    |        1.289        |   2.346    |
+|    17    |   1.657    | **1.235**  |        1.569        |   1.734    |
+|    18    |   1.164    | **1.057**  |        3.436        |   1.171    |
+|    19    |   3.154    |   2.740    |      **2.010**      |   3.256    |
+|    20    |   7.013    | **6.960**  |       10.433        |     -      |
+|    21    |   8.534    | **8.432**  |       11.004        |   8.962    |
+|    22    |   4.548    | **4.498**  |          -           |   4.686    |
+|    23    |   6.486    |   5.085    |      **4.668**      |   9.389    |
+|   AVG    |   2.873    | **2.671**  |        3.272        |   3.699    |
+<br>
 <div align='center'><img src="../images/PC-Player.png" width="60%" height="100%"><img src="../images/trajectory.png" width="39.8%" height="20%"></div>
-
----
diff --git a/docs/en/changelog.md b/docs/en/changelog.md
@@ -1,10 +1,16 @@
 # Changelog
 
+## XRSLAM Release v0.6.0
+
+**Highlights:**
+* [ADD] XRSLAM example for ROS
+* [ADD] RD-VIO version of SLAM with IMU-PARSAC algorithm
+
 ## XRSLAM Release v0.5.0
 
 **Highlights:**
 * [CHANGE] Redefine the function call interface of XRSLAM
-* [CHANGE] Use the newly defined interface to call XRSLAM in the example of ios and PC.
+* [CHANGE] Use the newly defined interface to call XRSLAM in the example of iOS and PC.
 * [CHANGE] Refactoring the PC example.
 * [CHANGE] Unified the configuration file format
 * [ADD] Add API interface documentation

diff --git a/docs/en/config_parse.md b/docs/en/config_parse.md
@@ -11,25 +11,27 @@ The following is the parameter description of the PC configuration, include [sla
 
 ### camera
 
-* `noise`: the convariance matrix for keypoint measurement noise, (pixel^2)
-* `distortion`: the distortion coefficient of camera, (k1 k2 p1 p2)
 * `intrinsic`: the intrinsics of camera (fx fy cx cy)
+* `camera_distortion_flag`: use distortion model or not
+* `distortion`: the distortion coefficient of camera, (k1 k2 p1 p2)
+* `time_offset`: camera time delay wrt. IMU [s]
 * `q_bc`: the rotation part of extrinsics from camera to body
 * `p_bc`: the translation part of extrinsics from camera to body
-* **note**: if you need to modify the `distortion` and `intrinsic`, remember the content at `xrslam-pc/player/src/euroc_dataset_reader.cpp` L55,56 to be modified at the same time.
+* `noise`: the convariance matrix for keypoint measurement noise, (pixel^2)
 
 ### imu
 
+* `q_bi`: the rotation part of extrinsics from IMU to body
+* `p_bi`: the translation part of extrinsics from IMU to body
 * `cov_g`: the convariance matrix for gyroscope white noise, (rad/s/sqrt(hz))^2)
 * `cov_a`: the convariance matrix for accelerometer  white noise, ((m/s^2/sqrt(hz))^2)
 * `cov_bg`: the convariance matrix for gyroscope bias diffusion, ((rad/s^2/sqrt(hz))^2)
 * `cov_ba`: the convariance matrix for accelerometer bias diffusion, ((m/s^3/sqrt(hz))^2)
-* `q_bi`: the rotation part of extrinsics from IMU to body
-* `p_bi`: the translation part of extrinsics from IMU to body
 
 ### sliding window
 
 * `size`: the size of keyframes in the sliding window
+* `subframe_size`: the size of subframes in the subframe window
 * `force_keyframe_landmark`: decide keyframe if the number of landmarks observed at this frame is below a certain threshold
 
 ### feature_tracker
@@ -55,6 +57,13 @@ The following is the parameter description of the PC configuration, include [sla
 * `iteration_limit`: the maxmium number of iteration for each optimization
 * `time_limit`: the maxmium time which optimizer cost for each optimization
 
+### parsac
+* `parsac_flag`: use imu-parsac or not
+* `dynamic_probability`: dynamic coefficient to measure the dynamic degree of the scene
+* `threshold`: the threshold of the stage one
+* `norm_scale`: the scale normalized to the image plane
+* `keyframe_check_size`: the size of keyframes involved in cross checking in stage two
+
 ## iOS
 
 iOS configuration also includes [slam parameters](../../xrslam-ios/visualizer/configs/slam_params.yaml) and [iPhone Parameters](../../xrslam-ios/visualizer/configs/iPhone&#32;X.yaml).
@@ -71,11 +80,10 @@ iOS configuration also includes [slam parameters](../../xrslam-ios/visualizer/co
 * `intrinsic`: the intrinsics of camera (fx fy cx cy)
 * `q_bc`: the rotation part of extrinsics from camera to body
 * `p_bc`: the translation part of extrinsics from camera to body
-* **note**: if you need to modify the `distortion` and `intrinsic`, remember the content at `xrslam-pc/player/src/euroc_dataset_reader.cpp` L55,56 to be modified at the same time.
 
 ### imu
 
-* `cov_g`: the convariance matrix for gyroscope white noise, (rad/s/sqrt(hz))^2)
+* `cov_g`: the convariance matrix for gyroscope white noise, ((rad/s/sqrt(hz))^2)
 * `cov_a`: the convariance matrix for accelerometer  white noise, ((m/s^2/sqrt(hz))^2)
 * `cov_bg`: the convariance matrix for gyroscope bias diffusion, ((rad/s^2/sqrt(hz))^2)
 * `cov_ba`: the convariance matrix for accelerometer bias diffusion, ((m/s^3/sqrt(hz))^2)
@@ -102,5 +110,4 @@ iOS configuration also includes [slam parameters](../../xrslam-ios/visualizer/co
 
 * `enable`:  enable sfm-based localization or not
 * `ip`:  server IP
-
 * `port`: server port
diff --git a/docs/en/tutorials/euroc_evaluation.md b/docs/en/tutorials/euroc_evaluation.md
@@ -4,16 +4,10 @@
 
 Download EuRoC dataset to YOUR_DATASET_FOLDER. Take [MH_01_easy ](http://robotics.ethz.ch/~asl-datasets/ijrr_euroc_mav_dataset/machine_hall/MH_01_easy/MH_01_easy.zip)for example.
 
-Start the XRSLAM pc player using the following command line, and it will not show visualization.
+Start the XRSLAM pc player using the following command line
 
 ```bash
-./build/xrslam-pc/player/xrslam-pc-player -H -c configs/euroc.yaml --tum trajectory.tum euroc:///data/EuRoC/MH_01_easy/mav0
-```
-
-We also provide a GUI version and could start by using the following command line
-
-```bash
-./build/xrslam-pc/player/xrslam-pc-player -c configs/euroc.yaml --tum trajectory.tum euroc:///data/EuRoC/MH_01_easy/mav0
+./build/xrslam-pc/player/xrslam-pc-player -sc configs/euroc_slam.yaml -dc configs/euroc_sensor.yaml --tum trajectory.tum euroc:///data/EuRoC/MH_01_easy/mav0
 ```
 
 + Click the first button "Stopped" of the player to automatically execute the program on the whole sequence. (recommend)
@@ -38,11 +32,10 @@ cd data/EuRoC/MH_01_easy/mav0/state_groundtruth_estimate0
 evo_traj euroc data.csv --save_as_tum
 ```
 
-After converting the ground truth trajectory to the "tum" format,  you can evaluate the accuracy ( APE and ARE ) by
+After converting the ground truth trajectory to the "tum" format,  you can evaluate the accuracy by
 
 ```bash
 evo_ape tum data.tum $PROJECT/trajectory.tum -a
-evo_ape tum data.tum $PROJECT/trajectory.tum -r angle_deg
 ```
 
-You will get the RMSE of XRSLAM on MH_01 sequence, in which APE is 0.147 and ARE is 2.56.
+You will get the RMSE of XRSLAM on MH_01 sequence, in which APE is 0.109.
diff --git a/xrslam-extra/include/xrslam/extra/opencv_image.h b/xrslam-extra/include/xrslam/extra/opencv_image.h
@@ -31,7 +31,7 @@ class OpenCvImage : public Image {
                          std::vector<vector<2>> &next_keypoints,
                          std::vector<char> &result_status) const override;
 
-    void preprocess() override;
+    void preprocess(double clipLimit, int width, int height) override;
     void correct_distortion(const matrix<3> &intrinsics,
                             const vector<4> &coeffs);
     void release_image_buffer() override;
@@ -50,7 +50,7 @@ class OpenCvImage : public Image {
     typedef ceres::BiCubicInterpolator<Grid> Interpolator;
     std::vector<Interpolator> interpolator_levels;
 
-    static cv::CLAHE *clahe();
+    static cv::CLAHE *clahe(double clipLimit, int width, int height);
     static cv::GFTTDetector *gftt(size_t max_points);
     static cv::FastFeatureDetector *fast();
     static cv::ORB *orb();

diff --git a/xrslam-extra/include/xrslam/extra/yaml_config.h b/xrslam-extra/include/xrslam/extra/yaml_config.h
@@ -33,6 +33,9 @@ class YamlConfig : public Config {
 
     vector<2> camera_resolution() const override;
     matrix<3> camera_intrinsic() const override;
+    vector<4> camera_distortion() const override;
+    size_t camera_distortion_flag() const override;
+    double camera_time_offset() const override;
     quaternion camera_to_body_rotation() const override;
     vector<3> camera_to_body_translation() const override;
     quaternion imu_to_body_rotation() const override;
@@ -48,13 +51,17 @@ class YamlConfig : public Config {
     vector<3> output_to_body_translation() const override;
 
     size_t sliding_window_size() const override;
+    size_t sliding_window_subframe_size() const override;
     size_t sliding_window_tracker_frequent() const override;
     size_t sliding_window_force_keyframe_landmarks() const override;
 
     double feature_tracker_min_keypoint_distance() const override;
     size_t feature_tracker_max_keypoint_detection() const override;
     size_t feature_tracker_max_init_frames() const override;
     size_t feature_tracker_max_frames() const override;
+    double feature_tracker_clahe_clip_limit() const override;
+    size_t feature_tracker_clahe_width() const override;
+    size_t feature_tracker_clahe_height() const override;
     bool feature_tracker_predict_keypoints() const override;
 
     size_t initializer_keyframe_num() const override;
@@ -72,9 +79,21 @@ class YamlConfig : public Config {
     size_t solver_iteration_limit() const override;
     double solver_time_limit() const override;
 
+    bool parsac_flag() const override;
+    double parsac_dynamic_probability() const override;
+    double parsac_threshold() const override;
+    double parsac_norm_scale() const override;
+    size_t parsac_keyframe_check_size() const override;
+
+    double rotation_misalignment_threshold() const override;
+    double rotation_ransac_threshold() const override;
+
   private:
     vector<2> m_camera_resolution;
     matrix<3> m_camera_intrinsic;
+    vector<4> m_camera_distortion;
+    size_t m_camera_distortion_flag;
+    double m_camera_time_offset;
     quaternion m_camera_to_body_rotation;
     vector<3> m_camera_to_body_translation;
     quaternion m_imu_to_body_rotation;
@@ -89,13 +108,17 @@ class YamlConfig : public Config {
     vector<3> m_output_to_body_translation;
 
     size_t m_sliding_window_size;
+    size_t m_sliding_window_subframe_size;
     size_t m_sliding_window_tracker_frequent;
     size_t m_sliding_window_force_keyframe_landmarks;
 
     double m_feature_tracker_min_keypoint_distance;
     size_t m_feature_tracker_max_keypoint_detection;
     size_t m_feature_tracker_max_init_frames;
     size_t m_feature_tracker_max_frames;
+    double m_feature_tracker_clahe_clip_limit;
+    size_t m_feature_tracker_clahe_width;
+    size_t m_feature_tracker_clahe_height;
     bool m_feature_tracker_predict_keypoints;
 
     size_t m_initializer_keyframe_num;
@@ -112,6 +135,15 @@ class YamlConfig : public Config {
 
     size_t m_solver_iteration_limit;
     double m_solver_time_limit;
+
+    bool m_parsac_flag;
+    double m_parsac_dynamic_probability;
+    double m_parsac_threshold;
+    double m_parsac_norm_scale;
+    size_t m_parsac_keyframe_check_size;
+
+    double m_rotation_misalignment_threshold;
+    double m_rotation_ransac_threshold;
 };
 
 } // namespace xrslam::extra