Merge pull request PaddlePaddle#13 from LielinJiang/io_add_opencv

Io add opencv
LielinJiang · Jan 28, 2022 · 002915e · 002915e
2 parents 72922e6 + 6527936
commit 002915e
Show file tree

Hide file tree

Showing 5 changed files with 99 additions and 11 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -229,6 +229,7 @@ option(NEW_RELEASE_CUBIN   "PaddlePaddle next-level release strategy for pypi cu
 option(NEW_RELEASE_JIT   "PaddlePaddle next-level release strategy for backup jit package"             OFF)
 option(WITH_ASCEND_INT64 "Compile with int64 kernel for ascend NPU"    OFF)
 option(WITH_POCKETFFT    "Compile with pocketfft support"      ON)
+option(WITH_OPENCV    "Compile with opencv"      ON)
 
 # PY_VERSION
 if(NOT PY_VERSION)
@@ -336,6 +337,18 @@ include(third_party)  # download, build, install third_party, Contains about 20+
 
 include(flags)              # set paddle compile flags
 
+if(WITH_OPENCV)
+    find_package(OpenCV 4.0 QUIET COMPONENTS core imgproc imgcodecs)
+    if(NOT OpenCV_FOUND)
+    find_package(OpenCV 3.0 REQUIRED COMPONENTS core imgproc imgcodecs)
+    endif()
+    message(STATUS "Found OpenCV: ${OpenCV_INCLUDE_DIRS} (found suitable version \"${OpenCV_VERSION}\", minimum required is \"3.0\")")
+    include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS})
+    include_directories(${OpenCV_INCLUDE_DIRS})
+    link_directories(${OpenCV_LIBS})
+    add_definitions(-DPADDLE_WITH_OPENCV)
+endif()
+
 if(WITH_PROFILER)
     find_package(Gperftools REQUIRED)
     include_directories(${GPERFTOOLS_INCLUDE_DIR})

diff --git a/cmake/generic.cmake b/cmake/generic.cmake
@@ -134,6 +134,8 @@ function(common_link TARGET_NAME)
   if (WITH_PROFILER)
     target_link_libraries(${TARGET_NAME} gperftools::profiler)
   endif()
+
+
 endfunction()
 
 # find all third_party modules is used for paddle static library

diff --git a/paddle/fluid/operators/data/CMakeLists.txt b/paddle/fluid/operators/data/CMakeLists.txt
@@ -4,6 +4,10 @@ if(WITH_UNITY_BUILD)
     include(unity_build_rule.cmake)
 endif()
 
+# find_package(ZLIB)
+# include_directories(${ZLIB_INCLUDE_DIRS})
+# TARGET_LINK_LIBRARIES( ${ZLIB_LIBRARIES})
+
 cc_library(pipeline SRCS pipeline.cc DEPS parallel_executor simple_threadpool scope)
 op_library(dataloader_op SRCS dataloader_op.cc dataloader_op.cu.cc DEPS pipeline ${OP_HEADER_DEPS})
 
@@ -13,7 +17,7 @@ cc_library(map_runner SRCS map_runner.cc DEPS parallel_executor simple_threadpoo
 op_library(map_op SRCS map_op.cc map_op.cu.cc DEPS map_runner ${OP_HEADER_DEPS})
 
 cc_library(random_roi_generator SRCS random_roi_generator.cc DEPS ${OP_HEADER_DEPS})
-cc_library(nvjpeg_decoder SRCS nvjpeg_decoder.cc DEPS random_roi_generator ${OP_HEADER_DEPS})
+cc_library(nvjpeg_decoder SRCS nvjpeg_decoder.cc DEPS random_roi_generator ${OP_HEADER_DEPS} ${OpenCV_LIBS})
 op_library(batch_decode_random_crop_op SRCS batch_decode_random_crop_op.cc batch_decode_random_crop_op.cu DEPS nvjpeg_decoder ${OP_HEADER_DEPS})
 op_library(batch_decode_op SRCS batch_decode_op.cc batch_decode_op.cu DEPS nvjpeg_decoder ${OP_HEADER_DEPS})
 

diff --git a/paddle/fluid/operators/data/nvjpeg_decoder.cc b/paddle/fluid/operators/data/nvjpeg_decoder.cc
@@ -74,7 +74,49 @@ NvjpegDecoder::~NvjpegDecoder() {
   PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamDestroy(cuda_stream_));
 }
 
-void NvjpegDecoder::ParseDecodeParams(
+// cv::Mat DecodeRandomCropResize(const unsigned char* data, size_t length,
+//                                 RandomROIGenerator* roi_generator,
+//                                 unsigned char* workspace, size_t workspace_size,
+//                                 unsigned char* dst, int target_width,
+//                                 int target_height) {
+#ifdef PADDLE_WITH_OPENCV
+void NvjpegDecoder::CPUDecodeRandomCropResize(const uint8_t* data, size_t length,
+                                RandomROIGenerator* roi_generator,
+                                unsigned char* workspace, size_t workspace_size,
+                                framework::LoDTensor& temp, framework::LoDTensor* out, platform::Place place) {
+  cv::Mat image =
+      cv::imdecode(cv::Mat(1, length, CV_8UC1, const_cast<unsigned char*>(data)), cv::IMREAD_COLOR);
+  cv::Mat cropped;
+  int height;
+  int width;
+  if (roi_generator) {
+    ROI roi;
+    roi_generator->GenerateRandomROI(image.cols, image.rows, &roi);
+    cv::Rect cv_roi;
+    cv_roi.x = roi.x;
+    cv_roi.y = roi.y;
+    cv_roi.width = roi.w;
+    cv_roi.height = roi.h;
+    height = roi.h;
+    width = roi.w;
+    std::vector<int64_t> out_shape = {3, height, width};
+    temp.Resize(framework::make_ddim(out_shape));
+    platform::CPUPlace cpu;
+    // allocate memory and assign to out_image
+    auto* data = temp.mutable_data<uint8_t>(cpu);
+    cropped.data = data;
+    image(cv_roi).copyTo(cropped);
+    out->Resize(framework::make_ddim(out_shape));
+
+    TensorCopySync(temp, place, out);
+
+  } else {
+    // throw error
+  }
+}
+#endif
+
+int NvjpegDecoder::ParseDecodeParams(
     const uint8_t* bit_stream, size_t bit_len, framework::LoDTensor* out,
     RandomROIGenerator* roi_generator, nvjpegImage_t* out_image,
     platform::Place place) {
@@ -83,9 +125,20 @@ void NvjpegDecoder::ParseDecodeParams(
   int widths[NVJPEG_MAX_COMPONENT];
   int heights[NVJPEG_MAX_COMPONENT];
 
-  PADDLE_ENFORCE_NVJPEG_SUCCESS(
-      platform::dynload::nvjpegGetImageInfo(handle_, bit_stream, bit_len,
-                         &components, &subsampling, widths, heights));
+
+  nvjpegStatus_t status = platform::dynload::nvjpegGetImageInfo(handle_, bit_stream, bit_len,
+                         &components, &subsampling, widths, heights);
+  // PADDLE_ENFORCE_NVJPEG_SUCCESS(
+  //     platform::dynload::nvjpegGetImageInfo(handle_, bit_stream, bit_len,
+  //                        &components, &subsampling, widths, heights));
+
+  if (status != NVJPEG_STATUS_SUCCESS || (components != 3 && components != 1)) {
+#ifdef PADDLE_WITH_OPENCV
+    framework::LoDTensor temp;
+    CPUDecodeRandomCropResize(bit_stream, bit_len, roi_generator, nullptr, 0, temp, out, place);
+    return 1;
+#endif
+  }
 
   int64_t width = static_cast<int64_t>(widths[0]);
   int64_t height = static_cast<int64_t>(heights[0]);
@@ -102,7 +155,7 @@ void NvjpegDecoder::ParseDecodeParams(
       output_components = 3;
     } else {
       PADDLE_THROW(platform::errors::Fatal(
-          "The provided mode is not supported for JPEG files on GPU"));
+          "The provided mode is not supported for JPEG files on GPU: %s!", mode_));
     }
   } else if (mode_ == "gray") {
     output_format = NVJPEG_OUTPUT_Y;
@@ -111,8 +164,9 @@ void NvjpegDecoder::ParseDecodeParams(
     output_format = NVJPEG_OUTPUT_RGBI;
     output_components = 3;
   } else {
+    // std::cout << mode_ << std::endl;
     PADDLE_THROW(platform::errors::Fatal(
-        "The provided mode is not supported for JPEG files on GPU"));
+        "The provided mode is not supported for JPEG files on GPU: %s!", mode_));
   }
 
   PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegDecodeParamsSetOutputFormat(decode_params_, output_format));
@@ -133,6 +187,7 @@ void NvjpegDecoder::ParseDecodeParams(
   auto* data = out->mutable_data<uint8_t>(place);
   out_image->channel[0] = data;
   out_image->pitch[0] = output_components * width;
+  return 0;
 }
 
 void NvjpegDecoder::Decode(const uint8_t* bit_stream, size_t bit_len, nvjpegImage_t* out_image) {
@@ -143,7 +198,8 @@ void NvjpegDecoder::Decode(const uint8_t* bit_stream, size_t bit_len, nvjpegImag
   // decode jpeg in host to pinned buffer
   PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegStateAttachPinnedBuffer(state_, buffer));
   PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegJpegStreamParse(handle_, bit_stream, bit_len, false, false, stream));
-  PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegDecodeJpegHost(handle_, decoder_, state_, decode_params_, stream));
+
+  (platform::dynload::nvjpegDecodeJpegHost(handle_, decoder_, state_, decode_params_, stream));
 
   // transfer and decode to device buffer
   PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegStateAttachDeviceBuffer(state_, device_buffer_));
@@ -157,7 +213,10 @@ void NvjpegDecoder::Run(
     const uint8_t* bit_stream, size_t bit_len, framework::LoDTensor* out,
     RandomROIGenerator* roi_generator, platform::Place& place) {
   nvjpegImage_t image;
-  ParseDecodeParams(bit_stream, bit_len, out, roi_generator, &image, place);
+  int res = ParseDecodeParams(bit_stream, bit_len, out, roi_generator, &image, place);
+  if (res) {
+    return;
+  }
   Decode(bit_stream, bit_len, &image);
 }
 

diff --git a/paddle/fluid/operators/data/nvjpeg_decoder.h b/paddle/fluid/operators/data/nvjpeg_decoder.h
@@ -15,6 +15,11 @@ limitations under the License. */
 #pragma once
 
 #include <vector>
+
+#ifdef PADDLE_WITH_OPENCV
+  #include <opencv2/opencv.hpp>
+#endif
+
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/platform/gpu_info.h"
@@ -53,8 +58,13 @@ class NvjpegDecoder {
 
   private:
     DISABLE_COPY_AND_ASSIGN(NvjpegDecoder);
-
-    void ParseDecodeParams(
+#ifdef PADDLE_WITH_OPENCV
+    void CPUDecodeRandomCropResize(const uint8_t* data, size_t length,
+                                RandomROIGenerator* roi_generator,
+                                unsigned char* workspace, size_t workspace_size,
+                                framework::LoDTensor& temp, framework::LoDTensor* out, platform::Place place);
+#endif
+    int ParseDecodeParams(
         const uint8_t* bit_stream, size_t bit_len, framework::LoDTensor* out,
         RandomROIGenerator* roi_generator, nvjpegImage_t* out_image,
         platform::Place place);