From 0e19402fdea33766c18e3f5cfdf0ebca9479667e Mon Sep 17 00:00:00 2001 From: Bartosz Meglicki Date: Wed, 1 Jan 2020 23:05:20 +0100 Subject: [PATCH] complete depth encoding example relevant to #9 --- CMakeLists.txt | 8 ++--- README.md | 81 ++++++++++++++++++++++++++++++-------------------- main.cpp | 57 ++++++++++++++++++----------------- 3 files changed, 82 insertions(+), 64 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fc48b03..3490c25 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,11 +1,11 @@ cmake_minimum_required(VERSION 3.0) project( - realsense-ir-to-vaapi-h264 + realsense-depth-to-vaapi-hevc10 ) add_library(hve SHARED hardware-video-encoder/hve.c) -add_executable(realsense-ir-to-vaapi-h264 main.cpp) -target_include_directories(realsense-ir-to-vaapi-h264 PRIVATE hardware-video-encoder) -target_link_libraries(realsense-ir-to-vaapi-h264 hve avcodec avutil realsense2) +add_executable(realsense-depth-to-vaapi-hevc10 main.cpp) +target_include_directories(realsense-depth-to-vaapi-hevc10 PRIVATE hardware-video-encoder) +target_link_libraries(realsense-depth-to-vaapi-hevc10 hve avcodec avutil realsense2) diff --git a/README.md b/README.md index 8a94fac..52cefe7 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,22 @@ -# realsense-ir-to-vaapi-h264 +# realsense-depth-to-vaapi-hevc10 This program is example how to use: - VAAPI through [HVE](https://github.com/bmegli/hardware-video-encoder) (FFmpeg) to hardware encode - - Realsense D400 greyscale infrared stream - - to H.264 raw video + - Realsense D400 depth stream + - to HEVC Main10 raw "video" + - with 10 bit depth encoding - stored to disk as example - See [hardware-video-streaming](https://github.com/bmegli/hardware-video-streaming) for other related projects. +See [hardware-video-streaming](https://github.com/bmegli/hardware-video-streaming) for other related projects. ## CPU usage As reported by `htop` (percentage used, 100% would mean core fully utilzed). -| Platform | CPU | 640x480 | 1280x720 | -|------------------------|-----------|----------|----------| -| Latte Panda Alpha | M3-7Y30 | 15% | 25% | -| High end laptop (2017) | i7-7820HK | 10% | 12% | +| Platform | CPU | 848x480@30 | +|------------------------|-----------|---------------| +| Latte Panda Alpha | M3-7Y30 |to be done | +| High end laptop (2017) | i7-7820HK | 10% | ## Platforms @@ -25,22 +26,37 @@ Tested on Ubuntu 18.04. ## Hardware - D400 series camera -- Intel VAAPI compatible hardware encoder ([Quick Sync Video](https://ark.intel.com/Search/FeatureFilter?productType=processors&QuickSyncVideo=true)) +- Intel VAAPI compatible hardware encoder ([Quick Sync Video](https://ark.intel.com/Search/FeatureFilter?productType=processors&QuickSyncVideo=true)), at least Kaby Lake -Tested with D435 camera. There is possibility that it will also work with Amd/Nvidia hardware. +Tested with D435 camera. There is possibility that it will also work with AMD hardware. ## What it does -- process user input (width, height, framerate, time to capture) -- init file for raw H.264 output +- process user input (width, height, framerate, depth units, time to capture) +- init file for raw HEVC output - init Realsense D400 device - init VAAPI encoder with HVE -- read greyscale IR data from the camera -- encode to H.264 -- write to raw H.264 file +- read depth data from the camera +- encode to HEVC Main10 profile +- write to raw HEVC file - cleanup -Currently VAAPI NV12 Y is filled with infrared greyscale and color plane is filled with constant value. +Realsense and VAAPI devices are configured to work together (no software depth processing on the host) +- VAAPI is configured for HEVC 10 bit per channel P010LE pixel format +- Realsense is configured to ouput P016LE (Y plane) compatible depth data +- P016LE data is binary compatible with P010LE data +- the data output by Realsense is directly fed to VAAPI hardware encoder +- the P010LE color data is filled with constant value + +We have 10 bits to encode 16 bit Realsense depth data which means range/precission trade-off: +- the trade-off is controlled with depth units (0.0001 - 0.01) +- the best precision/worst range is 6.4 mm/6.5535 m (for depth units 0.0001) +- the worst precission/best range is 64 cm/655.35 m (for depth units 0.01) +- all trade-offs in between are possible + +Note that this program uses video codec for depth map encoding. It will not work perfectly. + +If you are not concerned with CPU usage and realtime requirements consider using [HEVC 3D extension](https://hevc.hhi.fraunhofer.de/3dhevc) reference software encoder instead. As far as I know those extensions are currently not supported by hardware encoders. ## Dependencies @@ -72,10 +88,10 @@ sudo apt-get install libcurl4 cmake # get git sudo apt-get install git # clone the repository (don't forget `--recursive` for submodule!) -git clone --recursive https://github.com/bmegli/realsense-ir-to-vaapi-h264.git +git clone --recursive https://github.com/bmegli/realsense-depth-to-vaapi-hevc10.git # finally build the program -cd realsense-ir-to-vaapi-h264 +cd realsense-depth-to-vaapi-hevc10 mkdir build cd build cmake .. @@ -85,14 +101,14 @@ make ## Running ``` bash -# realsense-ir-to-vaapi-h264 width height framerate nr_of_seconds [device] +# realsense-depth-to-vaapi-hevc10 [device] # e.g -./realsense-ir-to-vaapi-h264 640 360 30 5 +./realsense-depth-to-vaapi-hevc10 848 480 30 0.0001 5 ``` Details: -- width and height have to be supported by D400 camera and H.264 -- framerate has to be supported by D400 camera +- width and height have to be supported by D400 camera and HEVC +- framerate and depth units have to be supported by D400 camera ### Troubleshooting @@ -108,21 +124,27 @@ vainfo --display drm --device /dev/dri/renderD128 Once you identify your Intel device run the program, e.g. ```bash -./realsense-ir-to-vaapi-h264 640 360 30 5 /dev/dri/renderD128 +./realsense-depth-to-vaapi-hevc10 848 480 30 0.0001 5 /dev/dri/renderD128 ``` ## Testing -Play result raw H.264 file with FFmpeg: +Play result raw HEVC file with FFmpeg: ``` bash # output goes to output.h264 file -ffplay output.h264 +ffplay output.hevc ``` +You will see: +- dark colors for near depth (near 0 value) +- light colors for far depth (near 65536 value) +- black where there is no data (0 value) +- all above is somewhat counterintuitive + ## License -realsense-ir-to-vaapi-h264 and HVE are licensed under Mozilla Public License, v. 2.0 +realsense-depth-to-vaapi-hevc10 and HVE are licensed under Mozilla Public License, v. 2.0 This is similiar to LGPL but more permissive: - you can use it as LGPL in prioprietrary software @@ -133,10 +155,3 @@ Like in LGPL, if you modify the code, you have to make your changes available. Making a github fork with your changes satisfies those requirements perfectly. Since you are linking to FFmpeg libraries. Consider also avcodec and avutil licensing. - -## Additional information - -High H.264 profile supports Monochrome Video Format (4:0:0) so there may be room for improvement (I am not sure VAAPI supports it). - - - diff --git a/main.cpp b/main.cpp index 4f7ffbf..5a5e1ab 100644 --- a/main.cpp +++ b/main.cpp @@ -9,10 +9,11 @@ * */ -/* TO DO This program is example how to use: +/* This program is example how to use: * - VAAPI to hardware encode - * - Realsense D400 greyscale infrared stream - * - to H.264 raw video + * - Realsense D400 depth stream + * - to HEVC Main10 raw "video" + * - with 10 bit depth encoding * - stored to disk as example * * See README.md for the details @@ -29,9 +30,15 @@ #include using namespace std; +const int WIDTH=0; //to be input through CLI +const int HEIGHT=0; //to be input through CLI +const int FRAMERATE=0; //to be input through CLI +const char *DEVICE=NULL; //to be input through CLI const char *ENCODER="hevc_vaapi";//NULL for default (h264_vaapi) or FFmpeg encoder e.g. "hevc_vaapi", ... const char *PIXEL_FORMAT="p010le"; //NULL for default (nv12) or pixel format e.g. "rgb0", ... const int PROFILE=FF_PROFILE_HEVC_MAIN_10; //or FF_PROFILE_HEVC_MAIN, ... +const int BFRAMES=2; //max_b_frames, set to 0 to minimize latency, non-zero to minimize size +const int BITRATE=0; //average bitrate in VBR //user supplied input struct input_args @@ -51,7 +58,8 @@ int process_user_input(int argc, char* argv[], input_args* input, hve_config *co int main(int argc, char* argv[]) { struct hve *hardware_encoder; - struct hve_config hardware_config = {0}; + //WIDTH, HEIGHT, FRAMERATE, DEVICE is overwritten with user input + struct hve_config hardware_config = {WIDTH, HEIGHT, FRAMERATE, DEVICE, ENCODER, PIXEL_FORMAT, PROFILE, BFRAMES, BITRATE}; struct input_args user_input = {0}; ofstream out_file("output.hevc", ofstream::binary); @@ -60,10 +68,6 @@ int main(int argc, char* argv[]) if(process_user_input(argc, argv, &user_input, &hardware_config) < 0) return 1; - hardware_config.encoder=ENCODER; - hardware_config.pixel_format=PIXEL_FORMAT; - hardware_config.profile=PROFILE; - if(!out_file) return 2; @@ -71,7 +75,7 @@ int main(int argc, char* argv[]) if( (hardware_encoder = hve_init(&hardware_config)) == NULL) return 3; - + bool status=main_loop(user_input, realsense, hardware_encoder, out_file); hve_close(hardware_encoder); @@ -93,29 +97,29 @@ bool main_loop(const input_args& input, rs2::pipeline& realsense, hve *he, ofstr const int frames = input.seconds * input.framerate; int f, failed; hve_frame frame = {0}; - uint16_t *color_data = NULL; //data of dummy color plane for NV12 + uint16_t *color_data = NULL; //data of dummy color plane for P010LE AVPacket *packet; - + for(f = 0; f < frames; ++f) { rs2::frameset frameset = realsense.wait_for_frames(); - //rs2::video_frame ir_frame = frameset.get_infrared_frame(1); rs2::frame depth = frameset.get_depth_frame(); - const int w = depth.as().get_width(); - const int h = depth.as().get_height(); - const int stride=depth.as().get_stride_in_bytes(); + const int w = depth.as().get_width(); + const int h = depth.as().get_height(); + const int stride=depth.as().get_stride_in_bytes(); if(!color_data) { //prepare dummy color plane for P010LE format, half the size of Y //we can't alloc it in advance, this is the first time we know realsense stride + //the stride will be at least width * 2 (Realsense Z16, VAAPI P010LE) color_data = new uint16_t[stride/2*h/2]; for(int i=0;isize; out_file.write((const char*)packet->data, packet->size); } - + if(failed != HVE_OK) { cerr << "failed to encode frame" << endl; break; - } + } } - + //flush the encoder by sending NULL frame hve_send_frame(he, NULL); //drain the encoder from buffered frames while( (packet=hve_receive_packet(he, &failed)) ) - { + { cout << endl << "encoded in: " << packet->size; out_file.write((const char*)packet->data, packet->size); } cout << endl; - + delete [] color_data; //all the requested frames processed? @@ -196,7 +200,7 @@ int process_user_input(int argc, char* argv[], input_args* input, hve_config *co cerr << endl << "examples: " << endl; cerr << argv[0] << " 848 480 30 0.0001 5" << endl; cerr << argv[0] << " 848 480 30 0.0001 5 /dev/dri/renderD128" << endl; - return -1; + return 1; } config->width = input->width = atoi(argv[1]); @@ -206,15 +210,14 @@ int process_user_input(int argc, char* argv[], input_args* input, hve_config *co input->seconds = atoi(argv[5]); config->device = argv[6]; //NULL as last argv argument, or device path - + cout << "Parsed arguments:" << endl; cout << "width: " << config->width << endl; cout << "height: " << config->height << endl; cout << "framerate: " << config->framerate << endl; cout << "depth units: " << input->depth_units << endl; cout << "seconds: " << input->seconds << endl; - cout << "device: " << (config->device ? config->device : "default") << endl; - + return 0; }