Sensor Dataset Visualization (#39)

* Add evaluation. * Cleanup history. * Fix docstrings. * Run autoflake. Co-authored-by: Benjamin Wilson <[email protected]>
argoverse · Apr 27, 2022 · bfb19a4 · bfb19a4
1 parent 6bd5d05
commit bfb19a4
Show file tree

Hide file tree

Showing 8 changed files with 564 additions and 222 deletions.
diff --git a/src/av2/datasets/sensor/sensor_dataloader.py b/src/av2/datasets/sensor/sensor_dataloader.py
diff --git a/src/av2/datasets/sensor/utils.py b/src/av2/datasets/sensor/utils.py
@@ -18,12 +18,16 @@ def convert_path_to_named_record(path: Path) -> Dict[str, Union[str, int]]:
     Returns:
         Mapping of name to record field.
     """
-    sensor_name = path.parent.stem
+    sensor_path = path.parent
+    sensor_name = sensor_path.stem
+    log_path = sensor_path.parent.parent if sensor_name == "lidar" else sensor_path.parent.parent.parent
 
     # log_id is 2 directories up for the lidar filepaths, but 3 levels up for images
     # {log_id}/sensors/cameras/ring_*/*.jpg vs.
     # {log_id}/sensors/lidar/*.feather
-    parent_idx = 2 if sensor_name == "lidar" else 3
-    log_id = path.parents[parent_idx].stem
-    sensor_name, timestamp_ns = path.parent.stem, int(path.stem)
-    return {"log_id": log_id, "sensor_name": sensor_name, "timestamp_ns": timestamp_ns}
+    return {
+        "split": log_path.parent.stem,
+        "log_id": log_path.stem,
+        "sensor_name": sensor_name,
+        "timestamp_ns": int(path.stem),
+    }
diff --git a/src/av2/geometry/camera/pinhole_camera.py b/src/av2/geometry/camera/pinhole_camera.py
@@ -181,9 +181,10 @@ def project_cam_to_img(
             is_valid_points: boolean indicator of valid cheirality and within image boundary, as
                 boolean Numpy array of shape (N,).
         """
-        uv = self.intrinsics.K @ points_cam[:3, :]
-        uv = uv.T
-        points_cam = points_cam.T
+        points_cam = points_cam.transpose()
+        uv: NDArrayFloat = self.intrinsics.K @ points_cam
+        uv = uv.transpose()
+        points_cam = points_cam.transpose()
 
         if remove_nan:
             uv, points_cam = remove_nan_values(uv, points_cam)
@@ -241,7 +242,7 @@ def project_ego_to_img_motion_compensated(
                 boolean Numpy array of shape (N,).
 
         Raises:
-            ValueError: If `city_SE3_ego_cam_t` or `city_SE3_ego_lidar_t` is `None`.
+            ValueError: If `city_SE3_egovehicle_cam_t` or `city_SE3_egovehicle_lidar_t` is `None`.
         """
         if city_SE3_ego_cam_t is None:
             raise ValueError("city_SE3_ego_cam_t cannot be `None`!")
@@ -406,6 +407,25 @@ def compute_pixel_ray_directions(self, uv: Union[NDArrayFloat, NDArrayInt]) -> N
             raise RuntimeError("Ray directions must be (N,3)")
         return ray_dirs
 
+    def scale(self, scale: float) -> PinholeCamera:
+        """Scale the intrinsics and image size.
+
+        Args:
+            scale: Scaling factor.
+
+        Returns:
+            The scaled pinhole camera model.
+        """
+        intrinsics = Intrinsics(
+            self.intrinsics.fx_px * scale,
+            self.intrinsics.fy_px * scale,
+            self.intrinsics.cx_px * scale,
+            self.intrinsics.cy_px * scale,
+            round(self.intrinsics.width_px * scale),
+            round(self.intrinsics.height_px * scale),
+        )
+        return PinholeCamera(ego_SE3_cam=self.ego_SE3_cam, intrinsics=intrinsics, cam_name=self.cam_name)
+
 
 def remove_nan_values(uv: NDArrayFloat, points_cam: NDArrayFloat) -> Tuple[NDArrayFloat, NDArrayFloat]:
     """Remove NaN values from camera coordinates and image plane coordinates (accepts corrupt array).

diff --git a/src/av2/geometry/utm.py b/src/av2/geometry/utm.py
@@ -83,7 +83,6 @@ def convert_city_coords_to_utm(points_city: Union[NDArrayFloat, NDArrayInt], cit
     latitude, longitude = CITY_ORIGIN_LATLONG_DICT[city_name]
     # get (easting, northing) of origin
     origin_utm = convert_gps_to_utm(latitude=latitude, longitude=longitude, city_name=city_name)
-
     points_utm: NDArrayFloat = points_city.astype(float) + np.array(origin_utm, dtype=float)
     return points_utm
 

diff --git a/src/av2/rendering/color.py b/src/av2/rendering/color.py
@@ -2,12 +2,14 @@
 
 """Colormap related constants and functions."""
 
+from enum import Enum, unique
 from typing import Final, Sequence, Tuple
 
+import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib.colors import LinearSegmentedColormap
 
-from av2.utils.typing import NDArrayFloat
+from av2.utils.typing import NDArrayByte, NDArrayFloat
 
 RED_HEX: Final[str] = "#df0101"
 GREEN_HEX: Final[str] = "#31b404"
@@ -31,6 +33,31 @@
 TRAFFIC_YELLOW1_BGR: Final[Tuple[int, int, int]] = TRAFFIC_YELLOW1_RGB[::-1]
 
 
+@unique
+class ColorFormats(str, Enum):
+    """Color channel formats."""
+
+    BGR = "BGR"
+    RGB = "RGB"
+
+
+def create_range_map(points_xyz: NDArrayFloat) -> NDArrayByte:
+    """Generate an RGB colormap as a function of the lidar range.
+
+    Args:
+        points_xyz: (N,3) Points (x,y,z).
+
+    Returns:
+        (N,3) RGB colormap.
+    """
+    range = points_xyz[..., 2]
+    range = np.round(range).astype(int)  # type: ignore
+    color = plt.get_cmap("turbo")(np.arange(0, range.max() + 1))
+    color = color[range]
+    range_cmap: NDArrayByte = (color * 255.0).astype(np.uint8)
+    return range_cmap
+
+
 def create_colormap(color_list: Sequence[str], n_colors: int) -> NDArrayFloat:
     """Create hex colorscale to interpolate between requested colors.
 

diff --git a/src/av2/rendering/video.py b/src/av2/rendering/video.py
@@ -2,20 +2,46 @@
 
 """Rendering tools for video visualizations."""
 
+from __future__ import annotations
+
+from enum import Enum, unique
 from pathlib import Path
-from typing import Dict, Final, Union
+from typing import Dict, Final, Mapping, Optional, Set, Union
 
 import av
-import cv2
 import numpy as np
 import pandas as pd
 
+from av2.rendering.color import ColorFormats
 from av2.utils.typing import NDArrayByte
 
+COLOR_FORMAT_TO_PYAV_COLOR_FORMAT: Final[Dict[ColorFormats, str]] = {
+    ColorFormats.RGB: "rgb24",
+    ColorFormats.BGR: "bgr24",
+}
 FFMPEG_OPTIONS: Final[Dict[str, str]] = {"crf": "27"}
 
 
-def tile_cameras(named_sensors: Dict[str, Union[NDArrayByte, pd.DataFrame]]) -> NDArrayByte:
+@unique
+class VideoCodecs(str, Enum):
+    """Available video codecs for encoding mp4 videos.
+
+    NOTE: The codecs available are dependent on the FFmpeg build that
+        you are using. We recommend defaulting to LIBX264.
+    """
+
+    LIBX264 = "libx264"  # https://en.wikipedia.org/wiki/Advanced_Video_Coding
+    LIBX265 = "libx265"  # https://en.wikipedia.org/wiki/High_Efficiency_Video_Coding
+    HEVC_VIDEOTOOLBOX = "hevc_videotoolbox"  # macOS GPU acceleration.
+
+
+HIGH_EFFICIENCY_VIDEO_CODECS: Final[Set[VideoCodecs]] = set([VideoCodecs.LIBX265, VideoCodecs.HEVC_VIDEOTOOLBOX])
+
+
+def tile_cameras(
+    named_sensors: Mapping[str, Union[NDArrayByte, pd.DataFrame]],
+    bev_img: Optional[NDArrayByte] = None,
+) -> NDArrayByte:
     """Combine ring cameras into a tiled image.
 
     NOTE: Images are expected in BGR ordering.
@@ -32,43 +58,62 @@ def tile_cameras(named_sensors: Dict[str, Union[NDArrayByte, pd.DataFrame]]) ->
 
     Args:
         named_sensors: Dictionary of camera names to the (width, height, 3) images.
+        bev_img: (H,W,3) Bird's-eye view image.
 
     Returns:
         Tiled image.
     """
-    landscape_width = 2048
-    landscape_height = 1550
+    landscape_height = 2048
+    landscape_width = 1550
+    for _, v in named_sensors.items():
+        landscape_width = max(v.shape[0], v.shape[1])
+        landscape_height = min(v.shape[0], v.shape[1])
+        break
 
     height = landscape_height + landscape_height + landscape_height
     width = landscape_width + landscape_height + landscape_width
     tiled_im_bgr: NDArrayByte = np.zeros((height, width, 3), dtype=np.uint8)
 
-    ring_rear_left = named_sensors["ring_rear_left"]
-    ring_side_left = named_sensors["ring_side_left"]
-    ring_front_center = named_sensors["ring_front_center"]
-    ring_front_left = named_sensors["ring_front_left"]
-    ring_front_right = named_sensors["ring_front_right"]
-    ring_side_right = named_sensors["ring_side_right"]
-    ring_rear_right = named_sensors["ring_rear_right"]
+    if "ring_front_left" in named_sensors:
+        ring_front_left = named_sensors["ring_front_left"]
+        tiled_im_bgr[:landscape_height, :landscape_width] = ring_front_left
+
+    if "ring_front_center" in named_sensors:
+        ring_front_center = named_sensors["ring_front_center"]
+        tiled_im_bgr[:landscape_width, landscape_width : landscape_width + landscape_height] = ring_front_center
+
+    if "ring_front_right" in named_sensors:
+        ring_front_right = named_sensors["ring_front_right"]
+        tiled_im_bgr[:landscape_height, landscape_width + landscape_height :] = ring_front_right
 
-    tiled_im_bgr[:landscape_height, :landscape_width] = ring_front_left
-    tiled_im_bgr[:landscape_width, landscape_width : landscape_width + landscape_height] = ring_front_center
-    tiled_im_bgr[:landscape_height, landscape_width + landscape_height :] = ring_front_right
+    if "ring_side_left" in named_sensors:
+        ring_side_left = named_sensors["ring_side_left"]
+        tiled_im_bgr[landscape_height : 2 * landscape_height, :landscape_width] = ring_side_left
 
-    tiled_im_bgr[landscape_height:3100, :landscape_width] = ring_side_left
-    tiled_im_bgr[landscape_height:3100, landscape_width + landscape_height :] = ring_side_right
+    if "ring_side_right" in named_sensors:
+        ring_side_right = named_sensors["ring_side_right"]
+        tiled_im_bgr[landscape_height : 2 * landscape_height, landscape_width + landscape_height :] = ring_side_right
 
-    start = (width - 4096) // 2
-    tiled_im_bgr[3100:4650, start : start + landscape_width] = np.fliplr(ring_rear_left)  # type: ignore
-    tiled_im_bgr[3100:4650, start + landscape_width : start + 4096] = np.fliplr(ring_rear_right)  # type: ignore
-    tiled_im_rgb: NDArrayByte = cv2.cvtColor(tiled_im_bgr, cv2.COLOR_BGR2RGB)
-    return tiled_im_rgb
+    if bev_img is not None:
+        tiled_im_bgr[
+            landscape_width : 2 * landscape_width, landscape_width : landscape_width + landscape_height
+        ] = bev_img
+
+    if "ring_rear_left" in named_sensors:
+        ring_rear_left = named_sensors["ring_rear_left"]
+        tiled_im_bgr[2 * landscape_height : 3 * landscape_height, :landscape_width] = ring_rear_left
+
+    if "ring_rear_right" in named_sensors:
+        ring_rear_right = named_sensors["ring_rear_right"]
+        tiled_im_bgr[2 * landscape_height : 3 * landscape_height, width - landscape_width :] = ring_rear_right
+    return tiled_im_bgr
 
 
 def write_video(
     video: NDArrayByte,
     dst: Path,
-    codec: str = "libx264",
+    color_format: ColorFormats = ColorFormats.RGB,
+    codec: VideoCodecs = VideoCodecs.LIBX264,
     fps: int = 10,
     crf: int = 27,
     preset: str = "veryfast",
@@ -78,14 +123,15 @@ def write_video(
     Reference: https://github.com/PyAV-Org/PyAV
 
     Args:
-        video: (N,H,W,3) array representing N RGB frames of identical dimensions.
-        dst: path to save folder.
-        codec: the name of a codec.
-        fps: the frame rate for video.
-        crf: constant rate factor (CRF) parameter of video, controlling the quality.
+        video: (N,H,W,3) Array representing N RGB frames of identical dimensions.
+        dst: Path to save folder.
+        color_format: Format of the color channels.
+        codec: Name of the codec.
+        fps: Frame rate for video.
+        crf: Constant rate factor (CRF) parameter of video, controlling the quality.
             Lower values would result in better quality, at the expense of higher file sizes.
             For x264, the valid Constant Rate Factor (crf) range is 0-51.
-        preset: file encoding speed. Options range from "ultrafast", ..., "fast", ..., "medium", ..., "slow", ...
+        preset: File encoding speed. Options range from "ultrafast", ..., "fast", ..., "medium", ..., "slow", ...
             Higher compression efficiency often translates to slower video encoding speed, at file write time.
     """
     _, H, W, _ = video.shape
@@ -98,6 +144,8 @@ def write_video(
     dst.parent.mkdir(parents=True, exist_ok=True)
     with av.open(str(dst), "w") as output:
         stream = output.add_stream(codec, fps)
+        if codec in HIGH_EFFICIENCY_VIDEO_CODECS:
+            stream.codec_tag = "hvc1"
         stream.width = W
         stream.height = H
         stream.options = {
@@ -106,10 +154,11 @@ def write_video(
             "movflags": "+faststart",
             "preset": preset,
             "profile:v": "main",
-            "tag": "hvc1",
         }
+
+        format = COLOR_FORMAT_TO_PYAV_COLOR_FORMAT[color_format]
         for _, img in enumerate(video):
-            frame = av.VideoFrame.from_ndarray(img)
+            frame = av.VideoFrame.from_ndarray(img, format=format)
             output.mux(stream.encode(frame))
         output.mux(stream.encode(None))