From 1de98608d7b587441cc179c4033f27351bbacf29 Mon Sep 17 00:00:00 2001 From: Hanzhe Teng Date: Sun, 29 Oct 2023 22:28:24 -0700 Subject: [PATCH] update download script and dataset info --- docs/about.md | 2 +- docs/calibration.md | 14 ++++--- docs/download.md | 56 ++++++++++++++++--------- scripts/download_citrusfarm.py | 74 +++++++++++++++++++++++++++------- 4 files changed, 107 insertions(+), 39 deletions(-) diff --git a/docs/about.md b/docs/about.md index 7eb6070..f047d57 100644 --- a/docs/about.md +++ b/docs/about.md @@ -6,7 +6,7 @@ title: About The authors of this work are: Hanzhe Teng, Yipeng Wang, Xiaoao Song and Konstantinos Karydis from the [ARCS Lab](https://sites.google.com/view/arcs-lab/people) at the [University of California, Riverside](https://www.ucr.edu/). -You may reach out to us via emails or open a new issue in the [github repository](https://github.com/UCR-Robotics/Citrus-Farm-Dataset). +You may reach out to us via emails or open a new issue in the [Github repository](https://github.com/UCR-Robotics/Citrus-Farm-Dataset). - Hanzhe Teng (hteng007 AT ucr.edu) - Konstantinos Karydis (kkarydis AT ece.ucr.edu) diff --git a/docs/calibration.md b/docs/calibration.md index 2bf81c3..70ec568 100644 --- a/docs/calibration.md +++ b/docs/calibration.md @@ -19,20 +19,22 @@ Most frame names are self-explanatory; you can find its correspondence to each s It is worth noting that `base_link` is the frame used by wheel odometry, and is located at the center bottom of the robot base. Other notes regarding GPS frame: -- With one GPS receiver on the robot, we can access only the 3D position (rather than full 6-DoF pose) of the robot. Therefore, the orientation (quaternion) component of LiDAR-GPS is not very meaningful. -- More precisely, the GPS-RTK data is with respect to the center of the GPS receiver; if your algorithm output is expressed in IMU or LiDAR frame, it is better to convert it to GPS frame first, before computing the error (e.g., Absolute Trajectory Error (ATE)). We provide such evaluation scripts in the [tools](tools.html) as well. +- With one GPS receiver on the robot, we can access only the 3D position (rather than full 6-DoF pose) of the robot. Therefore, the orientation (quaternion) component of LiDAR-GPS extrinsic parameters is not very meaningful. +- More precisely, the GPS-RTK data is with respect to the center of the GPS receiver; if your algorithm output is expressed in IMU or LiDAR frame, it is better to convert it to the GPS frame first and align orientation using SVD, before computing the actual error (e.g., Absolute Trajectory Error (ATE)). We provide such evaluation scripts in the [tools](tools.html) as well. ### Calibration Process ![Calibration images](./figures/calibration_image.png) In summary, these extrinsic parameters are obtained by four steps: - Multi-camera calibration for Monochrome, RGB, NIR (RGN), Thermal cameras using [Kalibr toolbox](https://github.com/ethz-asl/kalibr). -- Camera-IMU calibration for Monochrome camera and IMU using [Kalibr toolbox](https://github.com/ethz-asl/kalibr). -- LiDAR-camera calibration using [ACFR toolbox](https://github.com/acfr/cam_lidar_calibration). +- IMU-Camera calibration for IMU and Monochrome camera using [Kalibr toolbox](https://github.com/ethz-asl/kalibr). +- LiDAR-Camera calibration using [ACFR toolbox](https://github.com/acfr/cam_lidar_calibration). - LiDAR-GPS and LiDAR-baselink are measured and computed directly from CAD models. For details regarding how we performed these calibration steps, please refer to our paper or the README file in the calibration data folder. +The original calibration results (and reports) are also included in the release of this dataset. If you are interested, here is a brief list of the related files in the `Calibration/results` folder: [01-multi-cam-result.yaml](https://ucr-robotics.s3.us-west-2.amazonaws.com/citrus-farm-dataset/Calibration/results/01-multi-cam-result.yaml), [02-imu-cam-result.yaml](https://ucr-robotics.s3.us-west-2.amazonaws.com/citrus-farm-dataset/Calibration/results/02-imu-cam-result.yaml), [03-lidar-cam-result.txt](https://ucr-robotics.s3.us-west-2.amazonaws.com/citrus-farm-dataset/Calibration/results/03-lidar-cam-result.txt), [04-lidar-gps-result.txt](https://ucr-robotics.s3.us-west-2.amazonaws.com/citrus-farm-dataset/Calibration/results/04-lidar-gps-result.txt), [05-baselink-lidar-result.txt](https://ucr-robotics.s3.us-west-2.amazonaws.com/citrus-farm-dataset/Calibration/results/05-baselink-lidar-result.txt). + ## Intrinsic Parameter Calibration ### Camera Specifications @@ -50,10 +52,10 @@ them at 10 Hz to match LiDAR’s operating rate (and also save storage space). ### Camera Intrinsics In Kalibr toolbox, the calibration of intrinsic parameters and extrinsic parameters are performed jointly in the nonlinear optimization process. Therefore, the intrinsic parameters of all four cameras are obtained together with their extrinsic parameters in the multi-camera calibration step. -This file contains all the results: [[01] multi-cam-camchain.yaml](https://ucr-robotics.s3.us-west-2.amazonaws.com/citrus-farm-dataset/Calibration/results/[01]%20multi-cam-camchain.yaml) (included in the Calibration folder of this dataset) +The original calibration result that contains camera intrinsics: [01-multi-cam-result.yaml](https://ucr-robotics.s3.us-west-2.amazonaws.com/citrus-farm-dataset/Calibration/results/01-multi-cam-result.yaml) (included in the `Calibration/results` folder of this dataset) ### IMU Intrinsics The intrinsic parameter calibration of IMU is performed by using this Github repo: [allan_variance_ros](https://github.com/ori-drs/allan_variance_ros). -The calibration result is [microstrain_gx5.yaml](https://ucr-robotics.s3.us-west-2.amazonaws.com/citrus-farm-dataset/Calibration/config/microstrain_gx5.yaml) (included in the Calibration folder of this dataset), which has been used as input to the following camera-IMU calibration step. +The calibration result is [microstrain_gx5.yaml](https://ucr-robotics.s3.us-west-2.amazonaws.com/citrus-farm-dataset/Calibration/config/microstrain_gx5.yaml) (included in the `Calibration/config` folder of this dataset), which has been used as the input to the following IMU-Camera calibration step. Feel free to [reach out to us](about.html) or open a new issue on the [Github repo](https://github.com/UCR-Robotics/Citrus-Farm-Dataset) if you have any further questions. diff --git a/docs/download.md b/docs/download.md index a4ae5d1..11e760a 100644 --- a/docs/download.md +++ b/docs/download.md @@ -3,13 +3,6 @@ layout: article title: Download --- -## Data Format and Usage -The primary data format we used in data collection is [ROS bags](http://wiki.ros.org/rosbag). To simplify data storage and transfer, we split recorded data into blocks of 4GB, and categorized them based on their respective modalities. - -You may download only those ROS bags that are of your interest. After download, simply place these ROS bags in the same folder and run `rosbag play *.bag`. ROS will automatically arrange the data across all bags and sequence the playback according to their timestamps. - -To accommodate users from diverse application domains, we also provide a Python script that can extract data from rosbags and save them as individual files (images, pcd, csv files). See [tools](tools.html) for more information. - ## Folder Structure ``` citrus-farm-dataset/ @@ -26,12 +19,22 @@ citrus-farm-dataset/ ├── 05_13D_Jackal/ ├── 06_14B_Jackal/ ├── 07_14B_Jackal/ -└── Calibration/ - ├── README.docx - ├── config/ - ├── data/ - ├── results/ - └── scripts/ +├── Calibration/ +│ ├── README.pdf +│ ├── config/ +│ ├── data/ +│ ├── results/ +│ └── scripts/ +└── ground_truth/ + ├── 01_13B_Jackal/ + │ ├── gt.bag + │ └── gt.csv + ├── 02_13B_Jackal/ + ├── 03_13B_Jackal/ + ├── 04_13D_Jackal/ + ├── 05_13D_Jackal/ + ├── 06_14B_Jackal/ + └── 07_14B_Jackal/ ``` For a complete file list, please see [dataset_file_list.yaml](https://raw.githubusercontent.com/UCR-Robotics/Citrus-Farm-Dataset/main/dataset_file_list.yaml). @@ -44,8 +47,7 @@ You may use this Python script ([download_citrusfarm.py](https://raw.githubuserc - Change `folder_list` in the script to download only sequences of your interest. - Change `modality_list` in the script to download only modalities of your interest. -If you are a user of AWS Command Line Interface, you can also download all data directly from the S3 bucket: -(You do not need to register any account to use this tool, but just `sudo apt install awscli`.) +If you are a user of AWS, you can also download all data directly from the S3 bucket using AWS CLI tool: ``` aws s3 sync s3://ucr-robotics/citrus-farm-dataset/ /path/to/local/directory ``` @@ -54,6 +56,24 @@ Alternatively, you may download the dataset from two other backup sources: - [Google Drive](https://drive.google.com/drive/folders/12h5CAagVVtz1Od9bK_O6hDMyG8Xh_DLG?usp=sharing) - Baidu Pan (TODO) +## Data Format and Usage +The primary data format we used in data collection is [ROS bags](http://wiki.ros.org/rosbag). +To simplify data storage and transfer, we split recorded data into blocks of 4GB, and categorized them based on their respective modalities. +You may download only those ROS bags that are of your interest. + +After download, simply place these ROS bags in the same folder and play rosbags of your interest at once. +**ROS will automatically read the data across all bags and play them in sequence according to their timestamps.** +There is no need to merge multiple rosbags before playing. +``` +# playback only lidar, IMU and thermal data +rosbag play base_*.bag adk_*.bag + +# playback all data +rosbag play *.bag +``` + +To accommodate users from diverse application domains, we also provide a Python script that can extract data from rosbags and save them as individual files (images, pcd, csv files). See [tools](tools.html) for more information. + ## ROSbag Info | ROS Bag | ROS Topic | Msg Type | Sensor | @@ -69,8 +89,7 @@ Alternatively, you may download the dataset from two other backup sources: | | /flir/blackfly/cam0/time_reference | sensor_msgs/TimeReference | Mono Camera | | mapir_*.bag | /mapir_cam/image_raw | sensor_msgs/Image | R-G-NIR Camera | | | /mapir_cam/time_reference | sensor_msgs/TimeReference | R-G-NIR Camera | -| odom_*.bag | /gps/fix/odometry | nav_msgs/Odometry | GPS-RTK | -| | /jackal_velocity_controller/odom | nav_msgs/Odometry | Wheel Odometry | +| odom_*.bag | /jackal_velocity_controller/odom | nav_msgs/Odometry | Wheel Odometry | | zed_*.bag | /zed2i/zed_node/confidence/confidence_map | sensor_msgs/Image | Zed camera | | | /zed2i/zed_node/depth/camera_info | sensor_msgs/CameraInfo | Zed camera | | | /zed2i/zed_node/depth/depth_registered | sensor_msgs/Image | Zed camera | @@ -81,11 +100,12 @@ Alternatively, you may download the dataset from two other backup sources: | | /zed2i/zed_node/pose | geometry_msgs/PoseStamped | Zed camera | | | /zed2i/zed_node/right/camera_info | sensor_msgs/CameraInfo | Zed camera | | | /zed2i/zed_node/right/image_rect_color | sensor_msgs/Image | Zed camera | +| gt.bag | /gps/fix/odometry | nav_msgs/Odometry | GPS-RTK | Notes about the three GPS-RTK data types: - `/piksi/navsatfix_best_fix` is the raw RTK data recorded in the fixed mode; - `/piksi/debug/receiver_state` is the debugging data to show info such as number of satellites; -- `/gps/fix/odometry` is the post-processed data (via WGS84) that can serve as the **ground-truth trajectories**. +- `/gps/fix/odometry` is the post-processed data (via WGS84) that can serve as the **ground-truth trajectories**. (In the meantime, we also provide equivalent CSV files of ground-truth trajectories, in case they are preferred by users.) - Lastly, we ensured that the GPS-RTK system was always operating in the fixed mode (more accurate than the floating mode) in all experiments, thanks to the high-gain antennas. Notes about time synchronization: diff --git a/scripts/download_citrusfarm.py b/scripts/download_citrusfarm.py index 3b0c1db..477c082 100644 --- a/scripts/download_citrusfarm.py +++ b/scripts/download_citrusfarm.py @@ -2,28 +2,70 @@ import os import yaml import requests +import hashlib + +def ComputeMD5(file_path): + hash_md5 = hashlib.md5() + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + return hash_md5.hexdigest() def DownloadFiles(base_url, folder_dict, folder_list, modality_list): - exempt_folders = ['Calibration', 'Calibration/config', 'Calibration/data', - 'Calibration/results', 'Calibration/scripts'] + data_folders = ["01_13B_Jackal", "02_13B_Jackal", "03_13B_Jackal", "04_13D_Jackal", + "05_13D_Jackal", "06_14B_Jackal", "07_14B_Jackal"] + files_to_verify = [] + # Download Phase for folder in folder_list: - filenames = folder_dict.get(folder, []) + filenames = folder_dict.get(folder, {}) + # Create folder locally if not exists if not os.path.exists(folder): os.makedirs(folder) - for filename in filenames: - # Apply modality filter, with exemptions for certain folders - if folder not in exempt_folders and not any(filename.startswith(modality) for modality in modality_list): + for filename in filenames.keys(): + # Apply modality filter on data folders + if folder in data_folders and not any(filename.startswith(modality) for modality in modality_list): + continue + + local_file_path = f"{folder}/{filename}" + + # Skip download if file already exists + if os.path.exists(local_file_path): + print(f"File {local_file_path} already exists, skipping download.") + files_to_verify.append((folder, filename)) continue # Generate the download URL download_url = f"{base_url}/{folder}/{filename}" - + # Download the file into the specified folder - print(f"Downloading {filename} from {folder}") - wget.download(download_url, f"{folder}/{filename}") + print(f"Downloading {local_file_path}") + wget.download(download_url, local_file_path) + print() + + # Add to list of files to verify + files_to_verify.append((folder, filename)) + + # MD5 Verification Phase + print(f"Verifying MD5 for downloaded files.") + for folder, filename in files_to_verify: + local_file_path = f"{folder}/{filename}" + expected_md5 = folder_dict[folder][filename]['md5'] + computed_md5 = ComputeMD5(local_file_path) + + while expected_md5 != computed_md5: + print(f"MD5 mismatch for {local_file_path}. Removing current file and Redownloading.") + os.remove(local_file_path) + download_url = f"{base_url}/{folder}/{filename}" + wget.download(download_url, local_file_path) + print() + print(f"Redownloaded {local_file_path}. Verifying again.") + computed_md5 = ComputeMD5(local_file_path) + + print(f"MD5 verified for {local_file_path}.") + print(f"MD5 verified for all downloaded files.") if __name__ == "__main__": # Base URL for the S3 bucket and YAML config @@ -34,13 +76,17 @@ def DownloadFiles(base_url, folder_dict, folder_list, modality_list): response = requests.get(yaml_url) config_data = yaml.safe_load(response.text) - folder_dict = config_data.get("folders", {}) + folder_dict = config_data.get("citrus-farm-dataset", {}) # List of folders you want to download - folder_list = ["01_13B_Jackal", "02_13B_Jackal", "03_13B_Jackal", - "04_13D_Jackal", "05_13D_Jackal", "06_14B_Jackal", - "07_14B_Jackal", "Calibration", "Calibration/config", - "Calibration/data", "Calibration/results", "Calibration/scripts"] + folder_list = ["01_13B_Jackal", "02_13B_Jackal", "03_13B_Jackal", "04_13D_Jackal", + "05_13D_Jackal", "06_14B_Jackal", "07_14B_Jackal", + "Calibration", "Calibration/config", "Calibration/data", + "Calibration/results", "Calibration/scripts", + "ground_truth/01_13B_Jackal", "ground_truth/02_13B_Jackal", + "ground_truth/03_13B_Jackal", "ground_truth/04_13D_Jackal", + "ground_truth/05_13D_Jackal", "ground_truth/06_14B_Jackal", + "ground_truth/07_14B_Jackal"] # List of modalities you want to download modality_list = ["adk", # thermal