Skip to content

Commit

Permalink
Merge pull request #170 from argoai/add-cv2-VideoWriter-support
Browse files Browse the repository at this point in the history
allow options on rendering centerlines, directly generating mp4, and …
  • Loading branch information
johnwlambert authored May 7, 2021
2 parents fade16d + 98c40ba commit 6c360f3
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 50 deletions.
57 changes: 57 additions & 0 deletions argoverse/utils/cv2_video_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/python3

from typing import Optional

import cv2
import numpy as np

"""
Python-based utilities to avoid blowing up the disk with images, as FFMPEG requires.
Inspired by Detectron2 and MSeg:
https://github.com/facebookresearch/detectron2/blob/bab413cdb822af6214f9b7f70a9b7a9505eb86c5/demo/demo.py
https://github.com/mseg-dataset/mseg-semantic/blob/master/mseg_semantic/utils/cv2_video_utils.py
See OpenCV documentation for more details:
https://docs.opencv.org/2.4/modules/highgui/doc/reading_and_writing_images_and_video.html#videowriter-videowriter
"""


class VideoWriter:
"""
Lazy init, so that the user doesn't have to know width/height a priori.
Our default codec is "mp4v", though you may prefer "x264", if available
on your system
"""

def __init__(self, output_fpath: str, fps: int = 30) -> None:
"""Initialize VideoWriter options."""
self.output_fpath = output_fpath
self.fps = fps
self.writer: Optional[cv2.VideoWriter] = None
self.codec = "mp4v"

def init_outf(self, height: int, width: int) -> None:
"""Initialize the output video file."""
self.writer = cv2.VideoWriter(
filename=self.output_fpath,
# some installations of OpenCV may not support x264 (due to its license),
# you can try another format (e.g. MPEG)
fourcc=cv2.VideoWriter_fourcc(*self.codec),
fps=float(self.fps),
frameSize=(width, height),
isColor=True,
)

def add_frame(self, rgb_frame: np.ndarray) -> None:
"""Append a frame of shape (h,w,3) to the end of the video file."""
h, w, _ = rgb_frame.shape
if self.writer is None:
self.init_outf(height=h, width=w)
bgr_frame = rgb_frame[:, :, ::-1]
if self.writer is not None:
self.writer.write(bgr_frame)

def complete(self) -> None:
""" """
if self.writer is not None:
self.writer.release()
49 changes: 36 additions & 13 deletions argoverse/utils/ffmpeg_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# <Copyright 2019, Argo AI, LLC. Released under the MIT license.>

import os
from pathlib import Path

from argoverse.utils.subprocess_utils import run_command

Expand All @@ -24,13 +26,9 @@ def write_video(image_prefix: str, output_prefix: str, fps: int = 10) -> None:
a prefix, e.g. .png/.jpg. Absolute path
output_prefix: absolute path for output video, without .mp4 prefix
fps: integer, frames per second
Returns:
None
"""
cmd = f"ffmpeg -r {fps} -i {image_prefix} -vcodec libx264 -profile:v main"
cmd += " -level 3.1 -preset medium -crf 23 -x264-params ref=4 -acodec"
cmd += f" copy -movflags +faststart -pix_fmt yuv420p -vf scale=920:-2 {output_prefix}_{fps}fps.mp4"
codec_params_string = get_ffmpeg_codec_params_string()
cmd = f"ffmpeg -r {fps} -i {image_prefix} {codec_params_string} {output_prefix}_{fps}fps.mp4"
print(cmd)
run_command(cmd)

Expand All @@ -41,14 +39,39 @@ def write_nonsequential_idx_video(img_wildcard: str, output_fpath: str, fps: int
img_wildcard: string
output_fpath: string
fps: integer, frames per second
"""
codec_params_string = get_ffmpeg_codec_params_string()
cmd = f"ffmpeg -r {fps} -f image2 -i {img_wildcard} {codec_params_string} {output_fpath}"
print(cmd)
run_command(cmd)

Returns:
None

def ffmpeg_compress_video(uncompressed_mp4_path: str, fps: int) -> None:
"""Generate compressed version of video, and delete uncompressed version.
Args:
img_wildcard: path to video to compress
"""
cmd = f"ffmpeg -r {fps} -f image2 -i {img_wildcard}"
cmd += " -vcodec libx264 -profile:v main"
cmd += " -level 3.1 -preset medium -crf 23 -x264-params ref=4 -acodec"
cmd += " copy -movflags +faststart -pix_fmt yuv420p -vf scale=920:-2"
cmd += f" {output_fpath}"
codec_params_string = get_ffmpeg_codec_params_string()
fname_stem = Path(uncompressed_mp4_path).stem
compressed_mp4_path = f"{Path(uncompressed_mp4_path).parent}/{fname_stem}_compressed.mp4"
cmd = f"ffmpeg -r {fps} -i {uncompressed_mp4_path} {codec_params_string} {compressed_mp4_path}"
print(cmd)
run_command(cmd)
os.remove(uncompressed_mp4_path)


def get_ffmpeg_codec_params_string() -> str:
"""Generate command line params for FFMPEG for a widely compatible codec with good compression"""
codec_params = [
"-vcodec libx264",
"-profile:v main",
"-level 3.1",
"-preset medium",
"-crf 23",
"-x264-params ref=4",
"-acodec copy",
"-movflags +faststart",
"-pix_fmt yuv420p",
"-vf scale=920:-2",
]
return " ".join(codec_params)
140 changes: 104 additions & 36 deletions demo_usage/cuboids_to_bboxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import copy
import glob
import logging
import multiprocessing
import os
import sys
from multiprocessing import Pool
from pathlib import Path
from typing import Any, Iterable, List, Mapping, Sequence, Tuple, Union

Expand All @@ -26,7 +26,8 @@
from argoverse.utils.camera_stats import RING_CAMERA_LIST, STEREO_CAMERA_LIST
from argoverse.utils.city_visibility_utils import clip_point_cloud_to_visible_region
from argoverse.utils.cv2_plotting_utils import draw_clipped_line_segment
from argoverse.utils.ffmpeg_utils import write_nonsequential_idx_video
from argoverse.utils.cv2_video_utils import VideoWriter
from argoverse.utils.ffmpeg_utils import ffmpeg_compress_video, write_nonsequential_idx_video
from argoverse.utils.frustum_clipping import generate_frustum_planes
from argoverse.utils.ply_loader import load_ply
from argoverse.utils.se3 import SE3
Expand All @@ -39,6 +40,8 @@

# jigger lane pixel values by [-10,10] range
LANE_COLOR_NOISE = 20
STEREO_FPS = 5
RING_CAM_FPS = 30


def plot_lane_centerlines_in_img(
Expand All @@ -54,7 +57,7 @@ def plot_lane_centerlines_in_img(
) -> np.ndarray:
"""
Args:
city_SE3_egovehicle: SE3 transformation representing egovehicle to city transformation
city_SE3_egovehicle: SE(3) transformation representing egovehicle to city transformation
img: Array of shape (M,N,3) representing updated image
city_name: str, string representing city name, i.e. 'PIT' or 'MIA'
avm: instance of ArgoverseMap
Expand Down Expand Up @@ -105,6 +108,8 @@ def dump_clipped_3d_cuboids_to_images(
data_dir: str,
experiment_prefix: str,
motion_compensate: bool = True,
omit_centerlines: bool = False,
generate_video_only: bool = False,
) -> List[str]:
"""
We bring the 3D points into each camera coordinate system, and do the clipping there in 3D.
Expand All @@ -115,24 +120,40 @@ def dump_clipped_3d_cuboids_to_images(
data_dir: path to dataset with the latest data
experiment_prefix: Output directory
motion_compensate: Whether to motion compensate when projecting
omit_centerlines: whether to omit map vector lane geometry from rendering
generate_video_only: whether to generate mp4s only without dumping individual frames
Returns:
saved_img_fpaths
"""
saved_img_fpaths = []
dl = SimpleArgoverseTrackingDataLoader(data_dir=data_dir, labels_dir=data_dir)
avm = ArgoverseMap()
if not omit_centerlines:
avm = ArgoverseMap()
fps_map = {
cam_name: STEREO_FPS if "stereo" in cam_name else RING_CAM_FPS
for cam_name in RING_CAMERA_LIST + STEREO_CAMERA_LIST
}
category_subdir = "amodal_labels"
if not Path(f"{experiment_prefix}_{category_subdir}").exists():
os.makedirs(f"{experiment_prefix}_{category_subdir}")
video_output_dir = f"{experiment_prefix}_{category_subdir}"

for log_id in log_ids:
save_dir = f"{experiment_prefix}_{log_id}"
if not Path(save_dir).exists():
if not generate_video_only and not Path(save_dir).exists():
# JPG images will be dumped here, if requested by arguments
os.makedirs(save_dir)

city_name = dl.get_city_name(log_id)
log_calib_data = dl.get_log_calibration_data(log_id)

flag_done = False
for cam_idx, camera_name in enumerate(RING_CAMERA_LIST + STEREO_CAMERA_LIST):
fps = fps_map[camera_name]
if generate_video_only:
mp4_path = f"{video_output_dir}/{log_id}_{camera_name}_{fps}fps.mp4"
video_writer = VideoWriter(mp4_path)
cam_im_fpaths = dl.get_ordered_log_cam_fpaths(log_id, camera_name)
for i, im_fpath in enumerate(cam_im_fpaths):
if i % 50 == 0:
Expand Down Expand Up @@ -170,15 +191,17 @@ def dump_clipped_3d_cuboids_to_images(
img = imageio.imread(im_fpath)[:, :, ::-1].copy()
camera_config = get_calibration_config(log_calib_data, camera_name)
planes = generate_frustum_planes(camera_config.intrinsic.copy(), camera_name)
img = plot_lane_centerlines_in_img(
lidar_pts,
city_SE3_egovehicle,
img,
city_name,
avm,
camera_config,
planes,
)

if not omit_centerlines:
img = plot_lane_centerlines_in_img(
lidar_pts,
city_SE3_egovehicle,
img,
city_name,
avm,
camera_config,
planes,
)

for label_idx, label in enumerate(labels):
obj_rec = json_label_dict_to_obj_record(label)
Expand Down Expand Up @@ -216,40 +239,65 @@ def dump_clipped_3d_cuboids_to_images(
copy.deepcopy(camera_config),
)

cv2.imwrite(save_img_fpath, img)
saved_img_fpaths += [save_img_fpath]
if max_num_images_to_render != -1 and len(saved_img_fpaths) > max_num_images_to_render:
if generate_video_only:
video_writer.add_frame(rgb_frame=img[:, :, ::-1])
else:
cv2.imwrite(save_img_fpath, img)
saved_img_fpaths += [save_img_fpath]
if (
not generate_video_only
and max_num_images_to_render != -1
and len(saved_img_fpaths) > max_num_images_to_render
):
flag_done = True
break
if generate_video_only:
video_writer.complete()
ffmpeg_compress_video(mp4_path, fps)
if flag_done:
break
category_subdir = "amodal_labels"

if not Path(f"{experiment_prefix}_{category_subdir}").exists():
os.makedirs(f"{experiment_prefix}_{category_subdir}")

for cam_idx, camera_name in enumerate(RING_CAMERA_LIST + STEREO_CAMERA_LIST):
# Write the cuboid video -- could also write w/ fps=20,30,40
if "stereo" in camera_name:
fps = 5
else:
fps = 30
img_wildcard = f"{save_dir}/{camera_name}_%*.jpg"
output_fpath = f"{experiment_prefix}_{category_subdir}/{log_id}_{camera_name}_{fps}fps.mp4"
write_nonsequential_idx_video(img_wildcard, output_fpath, fps)
if not generate_video_only:
for cam_idx, camera_name in enumerate(RING_CAMERA_LIST + STEREO_CAMERA_LIST):
# Write the cuboid video from individual frames -- could also write w/ fps=20,30,40
fps = fps_map[camera_name]
img_wildcard = f"{save_dir}/{camera_name}_%*.jpg"
output_fpath = f"{video_output_dir}/{log_id}_{camera_name}_{fps}fps.mp4"
write_nonsequential_idx_video(img_wildcard, output_fpath, fps)

return saved_img_fpaths


def main(args: Any):
"""Run the example."""
log_ids = [log_id.strip() for log_id in args.log_ids.split(",")]
dump_clipped_3d_cuboids_to_images(
log_ids,
args.max_num_images_to_render * 9,
args.dataset_dir,
args.experiment_prefix,
)
if args.use_multiprocessing:
single_process_args = [
(
[log_id],
args.max_num_images_to_render * 9,
args.dataset_dir,
args.experiment_prefix,
not args.no_motion_compensation,
args.omit_centerlines,
args.generate_video_only,
)
for log_id in log_ids
]
with Pool(os.cpu_count()) as p:
accum = p.starmap(dump_clipped_3d_cuboids_to_images, single_process_args)

else:
# run in a single process, instead
dump_clipped_3d_cuboids_to_images(
log_ids=log_ids,
max_num_images_to_render=args.max_num_images_to_render * 9,
data_dir=args.dataset_dir,
experiment_prefix=args.experiment_prefix,
motion_compensate=not args.no_motion_compensation,
omit_centerlines=args.omit_centerlines,
generate_video_only=args.generate_video_only,
)


if __name__ == "__main__":
Expand All @@ -262,6 +310,26 @@ def main(args: Any):
help="number of images within which to render 3d cuboids",
)
parser.add_argument("--dataset-dir", type=str, required=True, help="path to the dataset folder")
parser.add_argument(
"--use-multiprocessing",
action="store_true",
help="uses multiprocessing only if arg is specified on command line, otherwise single process",
)
parser.add_argument(
"--no-motion-compensation",
action="store_true",
help="motion compensate by default, unless arg is specified on command line to not do so",
)
parser.add_argument(
"--omit-centerlines",
action="store_true",
help="renders centerlines by default, will omit them if arg is specified on command line",
)
parser.add_argument(
"--generate-video-only",
action="store_true",
help="produces mp4 files only, without dumping any individual frames/images to JPGs",
)
parser.add_argument(
"--log-ids",
type=str,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sim2.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def test_transform_point_cloud() -> None:
pts_a = aTb.transform_point_cloud(copy.deepcopy(pts_b))
pts_a_ = aSb.transform_point_cloud(copy.deepcopy(pts_b))

assert np.allclose(pts_a, pts_a_, atol=1e-7)
assert np.allclose(pts_a, pts_a_, atol=1e-5)


def test_cannot_set_zero_scale() -> None:
Expand Down

0 comments on commit 6c360f3

Please sign in to comment.