Skip to content

Commit

Permalink
Sensor Dataset Visualization (#39)
Browse files Browse the repository at this point in the history
* Add evaluation.

* Cleanup history.

* Fix docstrings.

* Run autoflake.

Co-authored-by: Benjamin Wilson <[email protected]>
  • Loading branch information
benjaminrwilson and Benjamin Wilson authored Apr 27, 2022
1 parent 6bd5d05 commit bfb19a4
Show file tree
Hide file tree
Showing 8 changed files with 564 additions and 222 deletions.
307 changes: 170 additions & 137 deletions src/av2/datasets/sensor/sensor_dataloader.py

Large diffs are not rendered by default.

14 changes: 9 additions & 5 deletions src/av2/datasets/sensor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@ def convert_path_to_named_record(path: Path) -> Dict[str, Union[str, int]]:
Returns:
Mapping of name to record field.
"""
sensor_name = path.parent.stem
sensor_path = path.parent
sensor_name = sensor_path.stem
log_path = sensor_path.parent.parent if sensor_name == "lidar" else sensor_path.parent.parent.parent

# log_id is 2 directories up for the lidar filepaths, but 3 levels up for images
# {log_id}/sensors/cameras/ring_*/*.jpg vs.
# {log_id}/sensors/lidar/*.feather
parent_idx = 2 if sensor_name == "lidar" else 3
log_id = path.parents[parent_idx].stem
sensor_name, timestamp_ns = path.parent.stem, int(path.stem)
return {"log_id": log_id, "sensor_name": sensor_name, "timestamp_ns": timestamp_ns}
return {
"split": log_path.parent.stem,
"log_id": log_path.stem,
"sensor_name": sensor_name,
"timestamp_ns": int(path.stem),
}
28 changes: 24 additions & 4 deletions src/av2/geometry/camera/pinhole_camera.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,10 @@ def project_cam_to_img(
is_valid_points: boolean indicator of valid cheirality and within image boundary, as
boolean Numpy array of shape (N,).
"""
uv = self.intrinsics.K @ points_cam[:3, :]
uv = uv.T
points_cam = points_cam.T
points_cam = points_cam.transpose()
uv: NDArrayFloat = self.intrinsics.K @ points_cam
uv = uv.transpose()
points_cam = points_cam.transpose()

if remove_nan:
uv, points_cam = remove_nan_values(uv, points_cam)
Expand Down Expand Up @@ -241,7 +242,7 @@ def project_ego_to_img_motion_compensated(
boolean Numpy array of shape (N,).
Raises:
ValueError: If `city_SE3_ego_cam_t` or `city_SE3_ego_lidar_t` is `None`.
ValueError: If `city_SE3_egovehicle_cam_t` or `city_SE3_egovehicle_lidar_t` is `None`.
"""
if city_SE3_ego_cam_t is None:
raise ValueError("city_SE3_ego_cam_t cannot be `None`!")
Expand Down Expand Up @@ -406,6 +407,25 @@ def compute_pixel_ray_directions(self, uv: Union[NDArrayFloat, NDArrayInt]) -> N
raise RuntimeError("Ray directions must be (N,3)")
return ray_dirs

def scale(self, scale: float) -> PinholeCamera:
"""Scale the intrinsics and image size.
Args:
scale: Scaling factor.
Returns:
The scaled pinhole camera model.
"""
intrinsics = Intrinsics(
self.intrinsics.fx_px * scale,
self.intrinsics.fy_px * scale,
self.intrinsics.cx_px * scale,
self.intrinsics.cy_px * scale,
round(self.intrinsics.width_px * scale),
round(self.intrinsics.height_px * scale),
)
return PinholeCamera(ego_SE3_cam=self.ego_SE3_cam, intrinsics=intrinsics, cam_name=self.cam_name)


def remove_nan_values(uv: NDArrayFloat, points_cam: NDArrayFloat) -> Tuple[NDArrayFloat, NDArrayFloat]:
"""Remove NaN values from camera coordinates and image plane coordinates (accepts corrupt array).
Expand Down
1 change: 0 additions & 1 deletion src/av2/geometry/utm.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ def convert_city_coords_to_utm(points_city: Union[NDArrayFloat, NDArrayInt], cit
latitude, longitude = CITY_ORIGIN_LATLONG_DICT[city_name]
# get (easting, northing) of origin
origin_utm = convert_gps_to_utm(latitude=latitude, longitude=longitude, city_name=city_name)

points_utm: NDArrayFloat = points_city.astype(float) + np.array(origin_utm, dtype=float)
return points_utm

Expand Down
29 changes: 28 additions & 1 deletion src/av2/rendering/color.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

"""Colormap related constants and functions."""

from enum import Enum, unique
from typing import Final, Sequence, Tuple

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap

from av2.utils.typing import NDArrayFloat
from av2.utils.typing import NDArrayByte, NDArrayFloat

RED_HEX: Final[str] = "#df0101"
GREEN_HEX: Final[str] = "#31b404"
Expand All @@ -31,6 +33,31 @@
TRAFFIC_YELLOW1_BGR: Final[Tuple[int, int, int]] = TRAFFIC_YELLOW1_RGB[::-1]


@unique
class ColorFormats(str, Enum):
"""Color channel formats."""

BGR = "BGR"
RGB = "RGB"


def create_range_map(points_xyz: NDArrayFloat) -> NDArrayByte:
"""Generate an RGB colormap as a function of the lidar range.
Args:
points_xyz: (N,3) Points (x,y,z).
Returns:
(N,3) RGB colormap.
"""
range = points_xyz[..., 2]
range = np.round(range).astype(int) # type: ignore
color = plt.get_cmap("turbo")(np.arange(0, range.max() + 1))
color = color[range]
range_cmap: NDArrayByte = (color * 255.0).astype(np.uint8)
return range_cmap


def create_colormap(color_list: Sequence[str], n_colors: int) -> NDArrayFloat:
"""Create hex colorscale to interpolate between requested colors.
Expand Down
111 changes: 80 additions & 31 deletions src/av2/rendering/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,46 @@

"""Rendering tools for video visualizations."""

from __future__ import annotations

from enum import Enum, unique
from pathlib import Path
from typing import Dict, Final, Union
from typing import Dict, Final, Mapping, Optional, Set, Union

import av
import cv2
import numpy as np
import pandas as pd

from av2.rendering.color import ColorFormats
from av2.utils.typing import NDArrayByte

COLOR_FORMAT_TO_PYAV_COLOR_FORMAT: Final[Dict[ColorFormats, str]] = {
ColorFormats.RGB: "rgb24",
ColorFormats.BGR: "bgr24",
}
FFMPEG_OPTIONS: Final[Dict[str, str]] = {"crf": "27"}


def tile_cameras(named_sensors: Dict[str, Union[NDArrayByte, pd.DataFrame]]) -> NDArrayByte:
@unique
class VideoCodecs(str, Enum):
"""Available video codecs for encoding mp4 videos.
NOTE: The codecs available are dependent on the FFmpeg build that
you are using. We recommend defaulting to LIBX264.
"""

LIBX264 = "libx264" # https://en.wikipedia.org/wiki/Advanced_Video_Coding
LIBX265 = "libx265" # https://en.wikipedia.org/wiki/High_Efficiency_Video_Coding
HEVC_VIDEOTOOLBOX = "hevc_videotoolbox" # macOS GPU acceleration.


HIGH_EFFICIENCY_VIDEO_CODECS: Final[Set[VideoCodecs]] = set([VideoCodecs.LIBX265, VideoCodecs.HEVC_VIDEOTOOLBOX])


def tile_cameras(
named_sensors: Mapping[str, Union[NDArrayByte, pd.DataFrame]],
bev_img: Optional[NDArrayByte] = None,
) -> NDArrayByte:
"""Combine ring cameras into a tiled image.
NOTE: Images are expected in BGR ordering.
Expand All @@ -32,43 +58,62 @@ def tile_cameras(named_sensors: Dict[str, Union[NDArrayByte, pd.DataFrame]]) ->
Args:
named_sensors: Dictionary of camera names to the (width, height, 3) images.
bev_img: (H,W,3) Bird's-eye view image.
Returns:
Tiled image.
"""
landscape_width = 2048
landscape_height = 1550
landscape_height = 2048
landscape_width = 1550
for _, v in named_sensors.items():
landscape_width = max(v.shape[0], v.shape[1])
landscape_height = min(v.shape[0], v.shape[1])
break

height = landscape_height + landscape_height + landscape_height
width = landscape_width + landscape_height + landscape_width
tiled_im_bgr: NDArrayByte = np.zeros((height, width, 3), dtype=np.uint8)

ring_rear_left = named_sensors["ring_rear_left"]
ring_side_left = named_sensors["ring_side_left"]
ring_front_center = named_sensors["ring_front_center"]
ring_front_left = named_sensors["ring_front_left"]
ring_front_right = named_sensors["ring_front_right"]
ring_side_right = named_sensors["ring_side_right"]
ring_rear_right = named_sensors["ring_rear_right"]
if "ring_front_left" in named_sensors:
ring_front_left = named_sensors["ring_front_left"]
tiled_im_bgr[:landscape_height, :landscape_width] = ring_front_left

if "ring_front_center" in named_sensors:
ring_front_center = named_sensors["ring_front_center"]
tiled_im_bgr[:landscape_width, landscape_width : landscape_width + landscape_height] = ring_front_center

if "ring_front_right" in named_sensors:
ring_front_right = named_sensors["ring_front_right"]
tiled_im_bgr[:landscape_height, landscape_width + landscape_height :] = ring_front_right

tiled_im_bgr[:landscape_height, :landscape_width] = ring_front_left
tiled_im_bgr[:landscape_width, landscape_width : landscape_width + landscape_height] = ring_front_center
tiled_im_bgr[:landscape_height, landscape_width + landscape_height :] = ring_front_right
if "ring_side_left" in named_sensors:
ring_side_left = named_sensors["ring_side_left"]
tiled_im_bgr[landscape_height : 2 * landscape_height, :landscape_width] = ring_side_left

tiled_im_bgr[landscape_height:3100, :landscape_width] = ring_side_left
tiled_im_bgr[landscape_height:3100, landscape_width + landscape_height :] = ring_side_right
if "ring_side_right" in named_sensors:
ring_side_right = named_sensors["ring_side_right"]
tiled_im_bgr[landscape_height : 2 * landscape_height, landscape_width + landscape_height :] = ring_side_right

start = (width - 4096) // 2
tiled_im_bgr[3100:4650, start : start + landscape_width] = np.fliplr(ring_rear_left) # type: ignore
tiled_im_bgr[3100:4650, start + landscape_width : start + 4096] = np.fliplr(ring_rear_right) # type: ignore
tiled_im_rgb: NDArrayByte = cv2.cvtColor(tiled_im_bgr, cv2.COLOR_BGR2RGB)
return tiled_im_rgb
if bev_img is not None:
tiled_im_bgr[
landscape_width : 2 * landscape_width, landscape_width : landscape_width + landscape_height
] = bev_img

if "ring_rear_left" in named_sensors:
ring_rear_left = named_sensors["ring_rear_left"]
tiled_im_bgr[2 * landscape_height : 3 * landscape_height, :landscape_width] = ring_rear_left

if "ring_rear_right" in named_sensors:
ring_rear_right = named_sensors["ring_rear_right"]
tiled_im_bgr[2 * landscape_height : 3 * landscape_height, width - landscape_width :] = ring_rear_right
return tiled_im_bgr


def write_video(
video: NDArrayByte,
dst: Path,
codec: str = "libx264",
color_format: ColorFormats = ColorFormats.RGB,
codec: VideoCodecs = VideoCodecs.LIBX264,
fps: int = 10,
crf: int = 27,
preset: str = "veryfast",
Expand All @@ -78,14 +123,15 @@ def write_video(
Reference: https://github.com/PyAV-Org/PyAV
Args:
video: (N,H,W,3) array representing N RGB frames of identical dimensions.
dst: path to save folder.
codec: the name of a codec.
fps: the frame rate for video.
crf: constant rate factor (CRF) parameter of video, controlling the quality.
video: (N,H,W,3) Array representing N RGB frames of identical dimensions.
dst: Path to save folder.
color_format: Format of the color channels.
codec: Name of the codec.
fps: Frame rate for video.
crf: Constant rate factor (CRF) parameter of video, controlling the quality.
Lower values would result in better quality, at the expense of higher file sizes.
For x264, the valid Constant Rate Factor (crf) range is 0-51.
preset: file encoding speed. Options range from "ultrafast", ..., "fast", ..., "medium", ..., "slow", ...
preset: File encoding speed. Options range from "ultrafast", ..., "fast", ..., "medium", ..., "slow", ...
Higher compression efficiency often translates to slower video encoding speed, at file write time.
"""
_, H, W, _ = video.shape
Expand All @@ -98,6 +144,8 @@ def write_video(
dst.parent.mkdir(parents=True, exist_ok=True)
with av.open(str(dst), "w") as output:
stream = output.add_stream(codec, fps)
if codec in HIGH_EFFICIENCY_VIDEO_CODECS:
stream.codec_tag = "hvc1"
stream.width = W
stream.height = H
stream.options = {
Expand All @@ -106,10 +154,11 @@ def write_video(
"movflags": "+faststart",
"preset": preset,
"profile:v": "main",
"tag": "hvc1",
}

format = COLOR_FORMAT_TO_PYAV_COLOR_FORMAT[color_format]
for _, img in enumerate(video):
frame = av.VideoFrame.from_ndarray(img)
frame = av.VideoFrame.from_ndarray(img, format=format)
output.mux(stream.encode(frame))
output.mux(stream.encode(None))

Expand Down
Loading

0 comments on commit bfb19a4

Please sign in to comment.