Merge pull request #2165 from vchiluka5:NVIDIA_Optical_Flow

opencv · Jul 7, 2019 · 653bef2 · 653bef2
2 parents 0a2179b + 5e0783e
commit 653bef2
Show file tree

Hide file tree

Showing 7 changed files with 1,337 additions and 0 deletions.
diff --git a/modules/cudaoptflow/CMakeLists.txt b/modules/cudaoptflow/CMakeLists.txt
@@ -7,3 +7,22 @@ set(the_description "CUDA-accelerated Optical Flow")
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow)
 
 ocv_define_module(cudaoptflow opencv_video opencv_optflow opencv_cudaarithm opencv_cudawarping opencv_cudaimgproc OPTIONAL opencv_cudalegacy WRAP python)
+
+set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT "79c6cee80a2df9a196f20afd6b598a9810964c32")
+set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_MD5 "ca5acedee6cb45d0ec610a6732de5c15")
+set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH "${OpenCV_BINARY_DIR}/3rdparty/NVIDIAOpticalFlowSDK_1_0_Headers")
+ocv_download(FILENAME "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT}.zip"
+               HASH ${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_MD5}
+               URL
+                 "https://github.com/NVIDIA/NVIDIAOpticalFlowSDK/archive/"
+               DESTINATION_DIR "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH}"
+               STATUS NVIDIA_OPTICAL_FLOW_1_0_HEADERS_DOWNLOAD_SUCCESS
+               ID "NVIDIA_OPTICAL_FLOW"
+               UNPACK RELATIVE_URL)
+
+if(NOT NVIDIA_OPTICAL_FLOW_1_0_HEADERS_DOWNLOAD_SUCCESS)
+  message(STATUS "Failed to download NVIDIA_Optical_Flow_1_0 Headers")
+else()
+  add_definitions(-DHAVE_NVIDIA_OPTFLOW=1)
+  ocv_include_directories(SYSTEM "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH}/NVIDIAOpticalFlowSDK-${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT}")
+endif()
diff --git a/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp b/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp
@@ -102,6 +102,47 @@ class CV_EXPORTS_W SparseOpticalFlow : public Algorithm
                       OutputArray err = cv::noArray(),
                       Stream& stream = Stream::Null()) = 0;
 };
+/** @brief Base Interface for optical flow algorithms using NVIDIA Optical Flow SDK.
+ */
+class CV_EXPORTS_W NvidiaHWOpticalFlow : public Algorithm
+{
+public:
+    /** @brief Calculates Optical Flow using NVIDIA Optical Flow SDK.
+
+    * NVIDIA GPUs starting with Turing contain a dedicated hardware accelerator for computing optical flow vectors between pairs of images.
+    * The optical flow hardware accelerator generates block-based optical flow vectors.
+    * The size of the block depends on hardware in use, and can be queried using the function getGridSize().
+    * The block-based flow vectors generated by the hardware can be converted to dense representation (i.e. per-pixel flow vectors) using upSampler() helper function, if needed.
+    * The flow vectors are stored in CV_16SC2 format with x and y components of each flow vector in 16-bit signed fixed point representation S10.5.
+
+    @param inputImage Input image.
+    @param referenceImage Reference image of the same size and the same type as input image.
+    @param flow A buffer consisting of inputImage.Size() / getGridSize() flow vectors in CV_16SC2 format.
+    @param stream Stream for the asynchronous version.
+    @param hint Hint buffer if client provides external hints. Must have same size as flow buffer.
+                Caller can provide flow vectors as hints for optical flow calculation.
+    @param cost Cost buffer contains numbers indicating the confidence associated with each of the generated flow vectors.
+                Higher the cost, lower the confidence. Cost buffer is of type CV_32SC1.
+
+    @note
+    - Client must use critical sections around each calc() function if calling it from multiple threads.
+    */
+    CV_WRAP virtual void calc(
+        InputArray inputImage,
+        InputArray referenceImage,
+        InputOutputArray flow,
+        Stream& stream = Stream::Null(),
+        InputArray hint = cv::noArray(),
+        OutputArray cost = cv::noArray()) = 0;
+
+    /** @brief Releases all buffers, contexts and device pointers.
+    */
+    CV_WRAP virtual void collectGarbage() = 0;
+
+    /** @brief Returns grid size of output buffer as per the hardware's capability.
+    */
+    CV_WRAP virtual int getGridSize() const = 0;
+};
 
 //
 // BroxOpticalFlow
@@ -342,6 +383,70 @@ class CV_EXPORTS_W OpticalFlowDual_TVL1 : public DenseOpticalFlow
             bool useInitialFlow = false);
 };
 
+//
+// NvidiaOpticalFlow
+//
+
+/** @brief Class for computing the optical flow vectors between two images using NVIDIA Optical Flow hardware and Optical Flow SDK 1.0.
+@note
+- A sample application demonstrating the use of NVIDIA Optical Flow can be found at
+opencv_source_code/samples/gpu/nvidia_optical_flow.cpp
+- An example application comparing accuracy and performance of NVIDIA Optical Flow with other optical flow algorithms in OpenCV can be found at
+opencv_source_code/samples/gpu/optical_flow.cpp
+*/
+
+class CV_EXPORTS_W NvidiaOpticalFlow_1_0 : public NvidiaHWOpticalFlow
+{
+public:
+    /**
+    * Supported optical flow performance levels.
+    */
+    enum NVIDIA_OF_PERF_LEVEL
+    {
+        NV_OF_PERF_LEVEL_UNDEFINED,
+        NV_OF_PERF_LEVEL_SLOW = 5,                   /**< Slow perf level results in lowest performance and best quality */
+        NV_OF_PERF_LEVEL_MEDIUM = 10,                /**< Medium perf level results in low performance and medium quality */
+        NV_OF_PERF_LEVEL_FAST = 20,                  /**< Fast perf level results in high performance and low quality */
+        NV_OF_PERF_LEVEL_MAX
+    };
+
+    /** @brief The NVIDIA optical flow hardware generates flow vectors at granularity gridSize, which can be queried via function getGridSize().
+    * Upsampler() helper function converts the hardware-generated flow vectors to dense representation (1 flow vector for each pixel)
+    * using nearest neighbour upsampling method.
+
+    @param flow Buffer of type CV_16FC2 containing flow vectors generated by calc().
+    @param width Width of the input image in pixels for which these flow vectors were generated.
+    @param height Height of the input image in pixels for which these flow vectors were generated.
+    @param gridSize Granularity of the optical flow vectors returned by calc() function. Can be queried using getGridSize().
+    @param upsampledFlow Buffer of type CV_32FC2, containing upsampled flow vectors, each flow vector for 1 pixel, in the pitch-linear layout.
+    */
+    CV_WRAP virtual void upSampler(InputArray flow, int width, int height,
+        int gridSize, InputOutputArray upsampledFlow) = 0;
+
+    /** @brief Instantiate NVIDIA Optical Flow
+
+    @param width Width of input image in pixels.
+    @param height Height of input image in pixels.
+    @param perfPreset Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about presets.
+                      Defaults to NV_OF_PERF_LEVEL_SLOW.
+    @param enableTemporalHints Optional parameter. Flag to enable temporal hints. When set to true, the hardware uses the flow vectors
+                               generated in previous call to calc() as internal hints for the current call to calc().
+                               Useful when computing flow vectors between successive video frames. Defaults to false.
+    @param enableExternalHints Optional Parameter. Flag to enable passing external hints buffer to calc(). Defaults to false.
+    @param enableCostBuffer Optional Parameter. Flag to enable cost buffer output from calc(). Defaults to false.
+    @param gpuId Optional parameter to select the GPU ID on which the optical flow should be computed. Useful in multi-GPU systems. Defaults to 0.
+    */
+    CV_WRAP static Ptr<NvidiaOpticalFlow_1_0> create(
+        int width,
+        int height,
+        cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL perfPreset
+        = cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW,
+        bool enableTemporalHints = false,
+        bool enableExternalHints = false,
+        bool enableCostBuffer = false,
+        int gpuId = 0);
+};
+
 //! @}
 
 }} // namespace cv { namespace cuda {

diff --git a/modules/cudaoptflow/perf/perf_optflow.cpp b/modules/cudaoptflow/perf/perf_optflow.cpp
@@ -326,4 +326,57 @@ PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1,
     }
 }
 
+//////////////////////////////////////////////////////
+// NvidiaOpticalFlow_1_0
+
+PERF_TEST_P(ImagePair, NvidiaOpticalFlow_1_0,
+    Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
+{
+    declare.time(10);
+
+    const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(frame0.empty());
+
+    const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(frame1.empty());
+
+    const int width = frame0.size().width;
+    const int height = frame0.size().height;
+    const bool enableTemporalHints = false;
+    const bool enableExternalHints = false;
+    const bool enableCostBuffer = false;
+    const int gpuid = 0;
+
+    if (PERF_RUN_CUDA())
+    {
+        const cv::cuda::GpuMat d_frame0(frame0);
+        const cv::cuda::GpuMat d_frame1(frame1);
+        cv::cuda::GpuMat d_flow;
+        cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> d_nvof;
+        try
+        {
+            d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(width, height,
+                cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST,
+                enableTemporalHints, enableExternalHints, enableCostBuffer, gpuid);
+        }
+        catch (const cv::Exception& e)
+        {
+            if(e.code == Error::StsBadFunc || e.code == Error::StsBadArg || e.code == Error::StsNullPtr)
+                throw SkipTestException("Current configuration is not supported");
+            throw;
+        }
+
+        TEST_CYCLE() d_nvof->calc(d_frame0, d_frame1, d_flow);
+
+        cv::cuda::GpuMat flow[2];
+        cv::cuda::split(d_flow, flow);
+
+        cv::cuda::GpuMat u = flow[0];
+        cv::cuda::GpuMat v = flow[1];
+
+        CUDA_SANITY_CHECK(u, 1e-10);
+        CUDA_SANITY_CHECK(v, 1e-10);
+    }
+}
+
 }} // namespace