From f482564e1ec3b7fd1c97e2406632c37bf71e59d1 Mon Sep 17 00:00:00 2001 From: LielinJiang Date: Mon, 14 Feb 2022 06:31:08 +0000 Subject: [PATCH 1/2] fix decode error and add layout for decode op --- paddle/fluid/operators/data/CMakeLists.txt | 7 +- .../data/batch_decode_random_crop_op.cc | 7 ++ .../data/batch_decode_random_crop_op.cu | 64 ++++++++++++++++--- .../data/batch_decode_random_crop_op.h | 37 ++++++++++- .../fluid/operators/data/batch_resize_op.cu | 17 +++++ .../operators/data/file_label_loader_op.h | 3 + paddle/fluid/operators/data/nvjpeg_decoder.cc | 28 ++++++-- paddle/fluid/operators/math/math_function.cu | 1 + python/paddle/vision/ops.py | 4 +- 9 files changed, 144 insertions(+), 24 deletions(-) diff --git a/paddle/fluid/operators/data/CMakeLists.txt b/paddle/fluid/operators/data/CMakeLists.txt index bf6470bd02df3..f16b73ca17272 100644 --- a/paddle/fluid/operators/data/CMakeLists.txt +++ b/paddle/fluid/operators/data/CMakeLists.txt @@ -4,10 +4,6 @@ if(WITH_UNITY_BUILD) include(unity_build_rule.cmake) endif() -# find_package(ZLIB) -# include_directories(${ZLIB_INCLUDE_DIRS}) -# TARGET_LINK_LIBRARIES( ${ZLIB_LIBRARIES}) - cc_library(pipeline SRCS pipeline.cc DEPS parallel_executor simple_threadpool scope) op_library(dataloader_op SRCS dataloader_op.cc dataloader_op.cu.cc DEPS pipeline ${OP_HEADER_DEPS}) @@ -23,9 +19,10 @@ op_library(batch_decode_op SRCS batch_decode_op.cc batch_decode_op.cu DEPS nvjpe op_library(random_crop_and_resize_op SRCS random_crop_and_resize_op.cc random_crop_and_resize_op.cu DEPS ${OP_HEADER_DEPS}) op_library(batch_resize_op SRCS batch_resize_op.cc batch_resize_op.cu DEPS ${OP_HEADER_DEPS}) + op_library(file_label_loader_op SRCS file_label_loader_op.cc DEPS ${OP_HEADER_DEPS}) # register_operators() # TODO: add test here -# cc_test(xxx SRCS xxx DEPS xxx) +# cc_test(xxx SRCS xxx DEPS xxx \ No newline at end of file diff --git a/paddle/fluid/operators/data/batch_decode_random_crop_op.cc b/paddle/fluid/operators/data/batch_decode_random_crop_op.cc index 2ca56063936d1..7660f7f3ccb5a 100644 --- a/paddle/fluid/operators/data/batch_decode_random_crop_op.cc +++ b/paddle/fluid/operators/data/batch_decode_random_crop_op.cc @@ -132,6 +132,13 @@ and 255. "for optionally converting the image, can be \"unchanged\" " ",\"gray\" , \"rgb\" .") .SetDefault("unchanged"); + AddAttr( + "data_layout", + "(string, default NCHW) Only used in " + "an optional string from: \"NHWC\", \"NCHW\". " + "Specify that the data format of the input and output data is " + "channel_first or channel_last.") + .SetDefault("NCHW"); AddAttr("aspect_ratio_min", "").SetDefault(3./4.); AddAttr("aspect_ratio_max", "").SetDefault(4./3.); AddAttr("area_min", "").SetDefault(0.08); diff --git a/paddle/fluid/operators/data/batch_decode_random_crop_op.cu b/paddle/fluid/operators/data/batch_decode_random_crop_op.cu index eecf5da9bed9c..c15e9d0ae3e47 100644 --- a/paddle/fluid/operators/data/batch_decode_random_crop_op.cu +++ b/paddle/fluid/operators/data/batch_decode_random_crop_op.cu @@ -16,12 +16,15 @@ #include "paddle/fluid/operators/data/batch_decode_random_crop_op.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" +#include "paddle/fluid/operators/math/math_function.h" +// #include "paddle/fluid/operators/transpose_op.h" namespace paddle { namespace operators { namespace data { using LoDTensorBlockingQueueHolder = operators::reader::LoDTensorBlockingQueueHolder; +using DataLayout = framework::DataLayout; NvjpegDecoderThreadPool* decode_pool = nullptr; // std::seed_seq* rand_seq = nullptr; @@ -50,6 +53,15 @@ class GPUBatchDecodeRandomCropKernel : public framework::OpKernel { auto& out_array = *out->GetMutable(); out_array.resize(inputs->size()); + const std::string data_layout_str = ctx.Attr("data_layout"); + const DataLayout data_layout = + framework::StringToDataLayout(data_layout_str); + + framework::LoDTensorArray temp_array; + if (data_layout == DataLayout::kNCHW) { + temp_array.resize(inputs->size()); + } + auto aspect_ratio_min = ctx.Attr("aspect_ratio_min"); auto aspect_ratio_max = ctx.Attr("aspect_ratio_max"); AspectRatioRange aspect_ratio_range{aspect_ratio_min, aspect_ratio_max}; @@ -66,20 +78,52 @@ class GPUBatchDecodeRandomCropKernel : public framework::OpKernel { const framework::LoDTensor x = inputs->at(i); auto* x_data = x.data(); size_t x_numel = static_cast(x.numel()); - - NvjpegDecodeTask task = { - .bit_stream = x_data, - .bit_len = x_numel, - .tensor = &out_array[i], - .roi_generator = new RandomROIGenerator( - aspect_ratio_range, area_range, rands[i]), - .place = dev - }; - decode_pool->AddTask(std::make_shared(task)); + + if (data_layout == DataLayout::kNCHW){ + NvjpegDecodeTask task = { + .bit_stream = x_data, + .bit_len = x_numel, + .tensor = &temp_array[i], + .roi_generator = new RandomROIGenerator( + aspect_ratio_range, area_range, rands[i]), + .place = dev + }; + decode_pool->AddTask(std::make_shared(task)); + } + else{ + NvjpegDecodeTask task = { + .bit_stream = x_data, + .bit_len = x_numel, + .tensor = &out_array[i], + .roi_generator = new RandomROIGenerator( + aspect_ratio_range, area_range, rands[i]), + .place = dev + }; + decode_pool->AddTask(std::make_shared(task)); + } + } decode_pool->RunAll(true); + if (data_layout == DataLayout::kNCHW){ + const auto& dev_ctx = ctx.cuda_device_context(); + paddle::operators::math::Transpose trans; + std::vector axis = {2, 0, 1}; + // LOG(ERROR) << "start transpose 01!!!"; + for (size_t i = 0; i < inputs->size(); i++) { + // Do transpose + const framework::DDim& in_sizes = temp_array[i].dims(); + // const int ndim = in_sizes.size(); + framework::DDim transposed_input_shape = in_sizes.transpose(axis); + std::vector transposed_input_shape_ = + framework::vectorize(transposed_input_shape); + out_array[i].Resize(transposed_input_shape); + out_array[i].mutable_data(dev_ctx.GetPlace()); + trans(dev_ctx, temp_array[i], &out_array[i], axis); + } + } + LOG(ERROR) << "GPUBatchDecodeJpegKernel Compute finish"; } }; diff --git a/paddle/fluid/operators/data/batch_decode_random_crop_op.h b/paddle/fluid/operators/data/batch_decode_random_crop_op.h index fd23be38341dc..f599c74a7dfb2 100644 --- a/paddle/fluid/operators/data/batch_decode_random_crop_op.h +++ b/paddle/fluid/operators/data/batch_decode_random_crop_op.h @@ -24,11 +24,46 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/operators/data/nvjpeg_decoder.h" - namespace paddle { namespace operators { namespace data { +// template +// void TransCompute(const int dim, const DeviceContext& dev_ctx, +// const framework::Tensor& in, framework::Tensor* out, +// const std::vector& axis) { +// switch (dim) { +// case 1: +// math::Transpose trans1; +// trans1(dev_ctx, in, out, axis); +// break; +// case 2: +// math::Transpose trans2; +// trans2(dev_ctx, in, out, axis); +// break; +// case 3: +// math::Transpose trans3; +// trans3(dev_ctx, in, out, axis); +// break; +// case 4: +// math::Transpose trans4; +// trans4(dev_ctx, in, out, axis); +// break; +// case 5: +// math::Transpose trans5; +// trans5(dev_ctx, in, out, axis); +// break; +// case 6: +// math::Transpose trans6; +// trans6(dev_ctx, in, out, axis); +// break; +// default: +// // for dim >= 7 situation +// math::TransposeNormal trans_normal; +// trans_normal(dev_ctx, in, out, axis); +// } +// } + template class CPUBatchDecodeRandomCropKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/data/batch_resize_op.cu b/paddle/fluid/operators/data/batch_resize_op.cu index e2c0319fdcf05..f7a7f52a3703e 100644 --- a/paddle/fluid/operators/data/batch_resize_op.cu +++ b/paddle/fluid/operators/data/batch_resize_op.cu @@ -234,15 +234,32 @@ class BatchResizeCUDAKernel : public framework::OpKernel { bool align_corners = ctx.Attr("align_corners"); int align_mode = ctx.Attr("align_mode"); + // int img_h, img_w;//, idx_h, idx_w, crop_h, crop_w; + auto* img = &x->at(0); int64_t img_c = data_layout == DataLayout::kNCHW ? \ img->dims()[0] : img->dims()[2]; + LOG(ERROR) << "img channel: " << img_c << " || " << data_layout_str; + // img_h = + // data_layout == DataLayout::kNCHW ? img->dims()[1] : img->dims()[0]; + // img_w = + // data_layout == DataLayout::kNCHW ? img->dims()[2] : img->dims()[1]; + std::vector out_dim = {static_cast(x->size()), + size[0], size[1], img_c}; + if (data_layout == DataLayout::kNCHW) { + out_dim = {static_cast(x->size()), img_c, size[0], size[1]}; + } out->Resize(framework::make_ddim(out_dim)); out->mutable_data(ctx.GetPlace()); + // for (int i = 0; i < x->size(); i++) { + // img = &x->at(i); + // auto out_tensor = out->Slice(i, i + 1); + // TensorCopySync(*img, ctx.GetPlace(), &out_tensor); + // } int img_h, img_w, idx_h, idx_w, crop_h, crop_w; for (int i = 0; i < x->size(); i++) { img = &x->at(i); diff --git a/paddle/fluid/operators/data/file_label_loader_op.h b/paddle/fluid/operators/data/file_label_loader_op.h index bbc34a7b546bf..ffb08a6439b17 100644 --- a/paddle/fluid/operators/data/file_label_loader_op.h +++ b/paddle/fluid/operators/data/file_label_loader_op.h @@ -147,6 +147,7 @@ static void ParseFilesAndLabels(const std::string data_root, } closedir(dir); } + } std::map>> root_to_samples_; @@ -156,6 +157,8 @@ static std::vector>* GetFilesAndLabelsFromCache(cons if (iter == root_to_samples_.end()) { std::vector> samples; ParseFilesAndLabels(data_root, &samples); + std::cout << "files 0: " << samples[0].first << std::endl; + std::cout << "files 1: " << samples[1].first << std::endl; LOG(ERROR) << "Init samples: " << samples.size(); root_to_samples_[data_root] = samples; } diff --git a/paddle/fluid/operators/data/nvjpeg_decoder.cc b/paddle/fluid/operators/data/nvjpeg_decoder.cc index 784b69b28b05c..56159f81e51f6 100644 --- a/paddle/fluid/operators/data/nvjpeg_decoder.cc +++ b/paddle/fluid/operators/data/nvjpeg_decoder.cc @@ -85,7 +85,9 @@ void NvjpegDecoder::CPUDecodeRandomCropResize(const uint8_t* data, size_t length unsigned char* workspace, size_t workspace_size, framework::LoDTensor& temp, framework::LoDTensor* out, platform::Place place) { cv::Mat image = + // cv::imdecode(const_cast(data), cv::IMREAD_COLOR); cv::imdecode(cv::Mat(1, length, CV_8UC1, const_cast(data)), cv::IMREAD_COLOR); + cv::Mat cropped; int height; int width; @@ -93,24 +95,28 @@ void NvjpegDecoder::CPUDecodeRandomCropResize(const uint8_t* data, size_t length ROI roi; roi_generator->GenerateRandomROI(image.cols, image.rows, &roi); cv::Rect cv_roi; + cv_roi.x = roi.x; cv_roi.y = roi.y; cv_roi.width = roi.w; cv_roi.height = roi.h; height = roi.h; width = roi.w; - std::vector out_shape = {3, height, width}; + // std::vector out_shape = {3, height, width}; + std::vector out_shape = {height, width, 3}; temp.Resize(framework::make_ddim(out_shape)); platform::CPUPlace cpu; // allocate memory and assign to out_image auto* data = temp.mutable_data(cpu); - cropped.data = data; + // todo jianglielin: why not work? + // cropped.data = data; image(cv_roi).copyTo(cropped); - out->Resize(framework::make_ddim(out_shape)); - + + std::memcpy(data, cropped.data, 3 * height * width); + TensorCopySync(temp, place, out); - } else { + LOG(ERROR) << "Not Use Opencv decode!!!"; // throw error } } @@ -139,6 +145,9 @@ int NvjpegDecoder::ParseDecodeParams( return 1; #endif } + else{ + // LOG(ERROR) << "Use nvjpeg decode!!!"; + } int64_t width = static_cast(widths[0]); int64_t height = static_cast(heights[0]); @@ -174,13 +183,16 @@ int NvjpegDecoder::ParseDecodeParams( if (roi_generator) { ROI roi; roi_generator->GenerateRandomROI(width, height, &roi); - + // roi.x = 0; + // roi.y = 0; + // roi.w = 500; + // roi.h = 400; PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegDecodeParamsSetROI(decode_params_, roi.x, roi.y, roi.w, roi.h)); height = roi.h; width = roi.w; } - std::vector out_shape = {output_components, height, width}; + std::vector out_shape = {height, width, output_components}; out->Resize(framework::make_ddim(out_shape)); // allocate memory and assign to out_image @@ -217,7 +229,9 @@ void NvjpegDecoder::Run( if (res) { return; } + // LOG(ERROR) << "ParseDecodeParams finish !!!"; Decode(bit_stream, bit_len, &image); + // LOG(ERROR) << "Decode finish !!!"; } NvjpegDecoderThreadPool::NvjpegDecoderThreadPool(const int num_threads, const std::string mode, const int dev_id) diff --git a/paddle/fluid/operators/math/math_function.cu b/paddle/fluid/operators/math/math_function.cu index cfdfa456e39ea..0ee26752aebc3 100644 --- a/paddle/fluid/operators/math/math_function.cu +++ b/paddle/fluid/operators/math/math_function.cu @@ -50,6 +50,7 @@ template struct SetConstant; \ template struct Transpose; \ template struct Transpose; \ + template struct Transpose; \ template struct Transpose; \ template struct Transpose; \ template struct Transpose Date: Mon, 14 Feb 2022 06:37:21 +0000 Subject: [PATCH 2/2] clean code --- .../data/batch_decode_random_crop_op.h | 36 ------------------- .../fluid/operators/data/batch_resize_op.cu | 13 ------- .../operators/data/file_label_loader_op.h | 4 +-- paddle/fluid/operators/data/nvjpeg_decoder.cc | 9 ++--- 4 files changed, 4 insertions(+), 58 deletions(-) diff --git a/paddle/fluid/operators/data/batch_decode_random_crop_op.h b/paddle/fluid/operators/data/batch_decode_random_crop_op.h index f599c74a7dfb2..de96e38ca95ef 100644 --- a/paddle/fluid/operators/data/batch_decode_random_crop_op.h +++ b/paddle/fluid/operators/data/batch_decode_random_crop_op.h @@ -28,42 +28,6 @@ namespace paddle { namespace operators { namespace data { -// template -// void TransCompute(const int dim, const DeviceContext& dev_ctx, -// const framework::Tensor& in, framework::Tensor* out, -// const std::vector& axis) { -// switch (dim) { -// case 1: -// math::Transpose trans1; -// trans1(dev_ctx, in, out, axis); -// break; -// case 2: -// math::Transpose trans2; -// trans2(dev_ctx, in, out, axis); -// break; -// case 3: -// math::Transpose trans3; -// trans3(dev_ctx, in, out, axis); -// break; -// case 4: -// math::Transpose trans4; -// trans4(dev_ctx, in, out, axis); -// break; -// case 5: -// math::Transpose trans5; -// trans5(dev_ctx, in, out, axis); -// break; -// case 6: -// math::Transpose trans6; -// trans6(dev_ctx, in, out, axis); -// break; -// default: -// // for dim >= 7 situation -// math::TransposeNormal trans_normal; -// trans_normal(dev_ctx, in, out, axis); -// } -// } - template class CPUBatchDecodeRandomCropKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/data/batch_resize_op.cu b/paddle/fluid/operators/data/batch_resize_op.cu index f7a7f52a3703e..7728a6b463163 100644 --- a/paddle/fluid/operators/data/batch_resize_op.cu +++ b/paddle/fluid/operators/data/batch_resize_op.cu @@ -234,18 +234,10 @@ class BatchResizeCUDAKernel : public framework::OpKernel { bool align_corners = ctx.Attr("align_corners"); int align_mode = ctx.Attr("align_mode"); - // int img_h, img_w;//, idx_h, idx_w, crop_h, crop_w; - auto* img = &x->at(0); int64_t img_c = data_layout == DataLayout::kNCHW ? \ img->dims()[0] : img->dims()[2]; - LOG(ERROR) << "img channel: " << img_c << " || " << data_layout_str; - // img_h = - // data_layout == DataLayout::kNCHW ? img->dims()[1] : img->dims()[0]; - // img_w = - // data_layout == DataLayout::kNCHW ? img->dims()[2] : img->dims()[1]; - std::vector out_dim = {static_cast(x->size()), size[0], size[1], img_c}; if (data_layout == DataLayout::kNCHW) { @@ -255,11 +247,6 @@ class BatchResizeCUDAKernel : public framework::OpKernel { out->Resize(framework::make_ddim(out_dim)); out->mutable_data(ctx.GetPlace()); - // for (int i = 0; i < x->size(); i++) { - // img = &x->at(i); - // auto out_tensor = out->Slice(i, i + 1); - // TensorCopySync(*img, ctx.GetPlace(), &out_tensor); - // } int img_h, img_w, idx_h, idx_w, crop_h, crop_w; for (int i = 0; i < x->size(); i++) { img = &x->at(i); diff --git a/paddle/fluid/operators/data/file_label_loader_op.h b/paddle/fluid/operators/data/file_label_loader_op.h index ffb08a6439b17..eef07790372a2 100644 --- a/paddle/fluid/operators/data/file_label_loader_op.h +++ b/paddle/fluid/operators/data/file_label_loader_op.h @@ -157,8 +157,8 @@ static std::vector>* GetFilesAndLabelsFromCache(cons if (iter == root_to_samples_.end()) { std::vector> samples; ParseFilesAndLabels(data_root, &samples); - std::cout << "files 0: " << samples[0].first << std::endl; - std::cout << "files 1: " << samples[1].first << std::endl; + // std::cout << "files 0: " << samples[0].first << std::endl; + // std::cout << "files 1: " << samples[1].first << std::endl; LOG(ERROR) << "Init samples: " << samples.size(); root_to_samples_[data_root] = samples; } diff --git a/paddle/fluid/operators/data/nvjpeg_decoder.cc b/paddle/fluid/operators/data/nvjpeg_decoder.cc index 56159f81e51f6..188723ba306fe 100644 --- a/paddle/fluid/operators/data/nvjpeg_decoder.cc +++ b/paddle/fluid/operators/data/nvjpeg_decoder.cc @@ -85,7 +85,6 @@ void NvjpegDecoder::CPUDecodeRandomCropResize(const uint8_t* data, size_t length unsigned char* workspace, size_t workspace_size, framework::LoDTensor& temp, framework::LoDTensor* out, platform::Place place) { cv::Mat image = - // cv::imdecode(const_cast(data), cv::IMREAD_COLOR); cv::imdecode(cv::Mat(1, length, CV_8UC1, const_cast(data)), cv::IMREAD_COLOR); cv::Mat cropped; @@ -95,14 +94,13 @@ void NvjpegDecoder::CPUDecodeRandomCropResize(const uint8_t* data, size_t length ROI roi; roi_generator->GenerateRandomROI(image.cols, image.rows, &roi); cv::Rect cv_roi; - cv_roi.x = roi.x; cv_roi.y = roi.y; cv_roi.width = roi.w; cv_roi.height = roi.h; height = roi.h; width = roi.w; - // std::vector out_shape = {3, height, width}; + std::vector out_shape = {height, width, 3}; temp.Resize(framework::make_ddim(out_shape)); platform::CPUPlace cpu; @@ -183,10 +181,7 @@ int NvjpegDecoder::ParseDecodeParams( if (roi_generator) { ROI roi; roi_generator->GenerateRandomROI(width, height, &roi); - // roi.x = 0; - // roi.y = 0; - // roi.w = 500; - // roi.h = 400; + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegDecodeParamsSetROI(decode_params_, roi.x, roi.y, roi.w, roi.h)); height = roi.h; width = roi.w;