Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert Cuboid2D to/from KITTI 3D data #1639

Merged
merged 4 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/openvinotoolkit/datumaro/pull/1619>)
- Add PseudoLabeling transform for unlabeled dataset
(<https://github.com/openvinotoolkit/datumaro/pull/1594>)
- Convert Cuboid2D annotation to/from 3D data
(<https://github.com/openvinotoolkit/datumaro/pull/1639>)

### Enhancements
- Raise an appropriate error when exporting a datumaro dataset if its subset name contains path separators.
Expand Down
170 changes: 163 additions & 7 deletions src/datumaro/components/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
)

import attr
import cv2
import numpy as np
import shapely.geometry as sg
from attr import asdict, attrs, field
Expand Down Expand Up @@ -1372,19 +1373,19 @@ class Cuboid2D(Annotation):
[(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x5, y5), (x6, y6), (x7, y7), (x8, y8)].


6---7
2---3
/| /|
5-+-8 |
| 2 + 3
1-+-4 |
| 5 + 6
|/ |/
1---4
8---7
Comment on lines +1376 to +1381
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understood the overall structure, is there any reason to change the bottom and top face?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I aligned the order of points with Kitti format which describes top face first.


Attributes:
_type (AnnotationType): The type of annotation, set to `AnnotationType.bbox`.
_type (AnnotationType): The type of annotation, set to `AnnotationType.cuboid_2d`.

Methods:
__init__: Initializes the Cuboid2D with its coordinates.
wrap: Creates a new Bbox instance with updated attributes.
wrap: Creates a new Cuboid2D instance with updated attributes.
"""

_type = AnnotationType.cuboid_2d
Expand All @@ -1393,11 +1394,166 @@ class Cuboid2D(Annotation):
converter=attr.converters.optional(int), default=None, kw_only=True
)
z_order: int = field(default=0, validator=default_if_none(int), kw_only=True)
y_3d: float = field(default=None, validator=default_if_none(float), kw_only=True)

def __init__(self, _points: Iterable[Tuple[float, float]], *args, **kwargs):
def __init__(
self,
_points: Iterable[Tuple[float, float]],
*args,
**kwargs,
):
kwargs.pop("points", None) # comes from wrap()
self.__attrs_init__(points=_points, *args, **kwargs)

@staticmethod
def _get_plane_equation(points):
x1, y1, z1 = points[0, 0], points[0, 1], points[0, 2]
x2, y2, z2 = points[1, 0], points[1, 1], points[1, 2]
x3, y3, z3 = points[2, 0], points[2, 1], points[2, 2]
a1 = x2 - x1
b1 = y2 - y1
c1 = z2 - z1
a2 = x3 - x1
b2 = y3 - y1
c2 = z3 - z1
a = b1 * c2 - b2 * c1
b = a2 * c1 - a1 * c2
c = a1 * b2 - b1 * a2
d = -a * x1 - b * y1 - c * z1
return np.array([a, b, c, d])

@staticmethod
def _get_denorm(Tr_velo_to_cam_homo):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a question, what is the meaning of Tr_velo_to_cam_homo?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Calibration matrix Tr_velo_to_cam has a shape of 3 x 4. To project 3D points to the 2D plane, we need to have homogeneous coordinates where each point is represented as a vector with 1 additional dimension.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the projection matrix between Velodyne LiDAR to Camera, where LiDAR contains 4 dimensions ([X, Y, Z, 1]) and Camera contains 3 dimensions ([u, v, 1]). velo stands for Velodyne :)

ground_points_lidar = np.array([[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [1.0, 1.0, 0.0]])
ground_points_lidar = np.concatenate(
(ground_points_lidar, np.ones((ground_points_lidar.shape[0], 1))), axis=1
)
ground_points_cam = np.matmul(Tr_velo_to_cam_homo, ground_points_lidar.T).T
denorm = -1 * Cuboid2D._get_plane_equation(ground_points_cam)
return denorm

@staticmethod
def _get_3d_points(dim, location, rotation_y, denorm):
"""Get corner points according to the 3D bounding box parameters.

Args:
dim (List[float]): The dimensions of the 3D bounding box as [l, w, h].
location (List[float]): The location of the 3D bounding box as [x, y, z].
rotation_y (float): The rotation angle around the y-axis.

Returns:
np.ndarray: The corner points of the 3D bounding box.
"""

c, s = np.cos(rotation_y), np.sin(rotation_y)
R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32)
l, w, h = dim[2], dim[1], dim[0]
x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2]
y_corners = [0, 0, 0, 0, -h, -h, -h, -h]
z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2]

corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32)
corners_3d = np.dot(R, corners)

denorm = denorm[:3]
denorm_norm = denorm / np.sqrt(denorm[0] ** 2 + denorm[1] ** 2 + denorm[2] ** 2)
ori_denorm = np.array([0.0, -1.0, 0.0])
theta = -1 * math.acos(np.dot(denorm_norm, ori_denorm))
n_vector = np.cross(denorm, ori_denorm)
n_vector_norm = n_vector / np.sqrt(n_vector[0] ** 2 + n_vector[1] ** 2 + n_vector[2] ** 2)
rotation_matrix, j = cv2.Rodrigues(theta * n_vector_norm)
corners_3d = np.dot(rotation_matrix, corners_3d)
corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(3, 1)
return corners_3d.transpose(1, 0)

@staticmethod
def _project_to_2d(pts_3d, P):
"""Project 3D points to 2D image plane.

Args:
pts_3d (np.ndarray): The 3D points to project.
P (np.ndarray): The projection matrix.

Returns:
np.ndarray: The 2D points projected to the image
"""
# Convert to homogeneous coordinates
pts_3d = pts_3d.T
pts_3d_homo = np.vstack((pts_3d, np.ones(pts_3d.shape[1])))
pts_2d = P @ pts_3d_homo
pts_2d[0, :] = np.divide(pts_2d[0, :], pts_2d[2, :])
pts_2d[1, :] = np.divide(pts_2d[1, :], pts_2d[2, :])
pts_2d = pts_2d[:2, :].T

return pts_2d

@classmethod
def from_3d(cls, dim, location, rotation_y, P, Tr_velo_to_cam):
"""Creates an instance of Cuboid2D class from 3D bounding box parameters.

Args:
dim (np.)
"""
Tr_velo_to_cam_homo = np.eye(4)
Tr_velo_to_cam_homo[:3, :4] = Tr_velo_to_cam
denorm = Cuboid2D._get_denorm(Tr_velo_to_cam_homo)
pts_3d = cls._get_3d_points(dim, location, rotation_y, denorm)
y_3d = np.mean(pts_3d[:4, 1])
pts_2d = cls._project_to_2d(pts_3d, P)

return cls(list(map(tuple, pts_2d)), y_3d=y_3d)

def to_3d(self, P_inv):
recon_3d = []
for idx, coord_2d in enumerate(self.points):
coord_2d = np.append(coord_2d, 1)
coord_3d = P_inv @ coord_2d
if idx < 4:
coord_3d = coord_3d * self.y_3d / coord_3d[1]
else:
coord_3d = coord_3d * recon_3d[idx - 4][0] / coord_3d[0]
recon_3d.append(coord_3d[:3])
recon_3d = np.array(recon_3d)

x = np.mean(recon_3d[:, 0])
z = np.mean(recon_3d[:, 2])

yaws = []
pairs = [(0, 1), (3, 2), (4, 5), (7, 6)]
for p in pairs:
delta_x = recon_3d[p[0]][0] - recon_3d[p[1]][0]
delta_z = recon_3d[p[0]][2] - recon_3d[p[1]][2]
yaws.append(np.arctan2(delta_x, delta_z))
yaw = np.mean(yaws)

widths = []
pairs = [(0, 1), (2, 3), (4, 5), (6, 7)]
for p in pairs:
delta_x = np.sqrt(
(recon_3d[p[0]][0] - recon_3d[p[1]][0]) ** 2
+ (recon_3d[p[0]][2] - recon_3d[p[1]][2]) ** 2
)
widths.append(delta_x)
w = np.mean(widths)

lengths = []
pairs = [(1, 2), (0, 3), (5, 6), (4, 7)]
for p in pairs:
delta_z = np.sqrt(
(recon_3d[p[0]][0] - recon_3d[p[1]][0]) ** 2
+ (recon_3d[p[0]][2] - recon_3d[p[1]][2]) ** 2
)
lengths.append(delta_z)
l = np.mean(lengths)

heights = []
pairs = [(0, 4), (1, 5), (2, 6), (3, 7)]
for p in pairs:
delta_y = np.abs(recon_3d[p[0]][1] - recon_3d[p[1]][1])
heights.append(delta_y)
h = np.mean(heights)
return np.array([h, w, l]), np.array([x, self.y_3d, z]), yaw


@attrs(slots=True, order=False)
class PointsCategories(Categories):
Expand Down
4 changes: 2 additions & 2 deletions src/datumaro/components/visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,8 +679,8 @@ def _draw_cuboid_2d(
# Define the faces based on vertex indices

faces = [
[points[i] for i in [0, 1, 2, 3]], # Bottom face
[points[i] for i in [4, 5, 6, 7]], # Top face
[points[i] for i in [0, 1, 2, 3]], # Top face
[points[i] for i in [4, 5, 6, 7]], # Bottom face
[points[i] for i in [0, 1, 5, 4]], # Front face
[points[i] for i in [1, 2, 6, 5]], # Right face
[points[i] for i in [2, 3, 7, 6]], # Back face
Expand Down
64 changes: 64 additions & 0 deletions tests/unit/test_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from datumaro.components.annotation import (
Annotations,
Cuboid2D,
Ellipse,
ExtractedMask,
HashKey,
Expand Down Expand Up @@ -142,3 +143,66 @@ def test_get_semantic_seg_mask_binary_mask(self, fxt_index_mask, dtype):
semantic_seg_mask = annotations.get_semantic_seg_mask(ignore_index=255, dtype=dtype)

assert np.allclose(semantic_seg_mask, fxt_index_mask)


class Cuboid2DTest:
@pytest.fixture
def fxt_cuboid_2d(self):
return Cuboid2D(
[
(684.172, 237.810),
(750.486, 237.673),
(803.791, 256.313),
(714.712, 256.542),
(684.035, 174.227),
(750.263, 174.203),
(803.399, 170.990),
(714.476, 171.015),
],
y_3d=1.49,
)

@pytest.fixture
def fxt_kitti_data(self):
dimensions = np.array([1.49, 1.56, 4.34])
location = np.array([2.51, 1.49, 14.75])
rot_y = -1.59

return dimensions, location, rot_y

@pytest.fixture
def fxt_P2(self):
return np.array(
[
[7.215377000000e02, 0.000000000000e00, 6.095593000000e02, 4.485728000000e01],
[0.000000000000e00, 7.215377000000e02, 1.728540000000e02, 2.163791000000e-01],
[0.000000000000e00, 0.000000000000e00, 1.000000000000e00, 2.745884000000e-03],
]
)

@pytest.fixture
def fxt_velo_to_cam(self):
return np.array(
[
[7.533745000000e-03, -9.999714000000e-01, -6.166020000000e-04, -4.069766000000e-03],
[1.480249000000e-02, 7.280733000000e-04, -9.998902000000e-01, -7.631618000000e-02],
[9.998621000000e-01, 7.523790000000e-03, 1.480755000000e-02, -2.717806000000e-01],
]
)

def test_create_from_3d(self, fxt_kitti_data, fxt_cuboid_2d, fxt_P2, fxt_velo_to_cam):
actual = Cuboid2D.from_3d(
dim=fxt_kitti_data[0],
location=fxt_kitti_data[1],
rotation_y=fxt_kitti_data[2],
P=fxt_P2,
Tr_velo_to_cam=fxt_velo_to_cam,
)

assert np.allclose(actual.points, fxt_cuboid_2d.points, atol=1e-3)

def test_to_3d(self, fxt_kitti_data, fxt_cuboid_2d, fxt_P2):
P_inv = np.linalg.pinv(fxt_P2)
actual = fxt_cuboid_2d.to_3d(P_inv)
for act, exp in zip(actual, fxt_kitti_data):
assert np.allclose(act, exp, atol=2)
Loading