diff --git a/paddle/fluid/operators/data/CMakeLists.txt b/paddle/fluid/operators/data/CMakeLists.txt index 915bda52a4de69..35c081e9bbc78f 100644 --- a/paddle/fluid/operators/data/CMakeLists.txt +++ b/paddle/fluid/operators/data/CMakeLists.txt @@ -4,10 +4,6 @@ if(WITH_UNITY_BUILD) include(unity_build_rule.cmake) endif() -# find_package(ZLIB) -# include_directories(${ZLIB_INCLUDE_DIRS}) -# TARGET_LINK_LIBRARIES( ${ZLIB_LIBRARIES}) - cc_library(pipeline SRCS pipeline.cc DEPS parallel_executor simple_threadpool scope) op_library(dataloader_op SRCS dataloader_op.cc dataloader_op.cu.cc DEPS pipeline ${OP_HEADER_DEPS}) @@ -31,4 +27,4 @@ op_library(random_flip_op SRCS random_flip_op.cc DEPS ${OP_HEADER_DEPS}) # register_operators() # TODO: add test here -# cc_test(xxx SRCS xxx DEPS xxx) +# cc_test(xxx SRCS xxx DEPS xxx \ No newline at end of file diff --git a/paddle/fluid/operators/data/batch_decode_random_crop_op.cc b/paddle/fluid/operators/data/batch_decode_random_crop_op.cc index 2ca56063936d14..7660f7f3ccb5a7 100644 --- a/paddle/fluid/operators/data/batch_decode_random_crop_op.cc +++ b/paddle/fluid/operators/data/batch_decode_random_crop_op.cc @@ -132,6 +132,13 @@ and 255. "for optionally converting the image, can be \"unchanged\" " ",\"gray\" , \"rgb\" .") .SetDefault("unchanged"); + AddAttr( + "data_layout", + "(string, default NCHW) Only used in " + "an optional string from: \"NHWC\", \"NCHW\". " + "Specify that the data format of the input and output data is " + "channel_first or channel_last.") + .SetDefault("NCHW"); AddAttr("aspect_ratio_min", "").SetDefault(3./4.); AddAttr("aspect_ratio_max", "").SetDefault(4./3.); AddAttr("area_min", "").SetDefault(0.08); diff --git a/paddle/fluid/operators/data/batch_decode_random_crop_op.cu b/paddle/fluid/operators/data/batch_decode_random_crop_op.cu index eecf5da9bed9ca..c15e9d0ae3e471 100644 --- a/paddle/fluid/operators/data/batch_decode_random_crop_op.cu +++ b/paddle/fluid/operators/data/batch_decode_random_crop_op.cu @@ -16,12 +16,15 @@ #include "paddle/fluid/operators/data/batch_decode_random_crop_op.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" +#include "paddle/fluid/operators/math/math_function.h" +// #include "paddle/fluid/operators/transpose_op.h" namespace paddle { namespace operators { namespace data { using LoDTensorBlockingQueueHolder = operators::reader::LoDTensorBlockingQueueHolder; +using DataLayout = framework::DataLayout; NvjpegDecoderThreadPool* decode_pool = nullptr; // std::seed_seq* rand_seq = nullptr; @@ -50,6 +53,15 @@ class GPUBatchDecodeRandomCropKernel : public framework::OpKernel { auto& out_array = *out->GetMutable(); out_array.resize(inputs->size()); + const std::string data_layout_str = ctx.Attr("data_layout"); + const DataLayout data_layout = + framework::StringToDataLayout(data_layout_str); + + framework::LoDTensorArray temp_array; + if (data_layout == DataLayout::kNCHW) { + temp_array.resize(inputs->size()); + } + auto aspect_ratio_min = ctx.Attr("aspect_ratio_min"); auto aspect_ratio_max = ctx.Attr("aspect_ratio_max"); AspectRatioRange aspect_ratio_range{aspect_ratio_min, aspect_ratio_max}; @@ -66,20 +78,52 @@ class GPUBatchDecodeRandomCropKernel : public framework::OpKernel { const framework::LoDTensor x = inputs->at(i); auto* x_data = x.data(); size_t x_numel = static_cast(x.numel()); - - NvjpegDecodeTask task = { - .bit_stream = x_data, - .bit_len = x_numel, - .tensor = &out_array[i], - .roi_generator = new RandomROIGenerator( - aspect_ratio_range, area_range, rands[i]), - .place = dev - }; - decode_pool->AddTask(std::make_shared(task)); + + if (data_layout == DataLayout::kNCHW){ + NvjpegDecodeTask task = { + .bit_stream = x_data, + .bit_len = x_numel, + .tensor = &temp_array[i], + .roi_generator = new RandomROIGenerator( + aspect_ratio_range, area_range, rands[i]), + .place = dev + }; + decode_pool->AddTask(std::make_shared(task)); + } + else{ + NvjpegDecodeTask task = { + .bit_stream = x_data, + .bit_len = x_numel, + .tensor = &out_array[i], + .roi_generator = new RandomROIGenerator( + aspect_ratio_range, area_range, rands[i]), + .place = dev + }; + decode_pool->AddTask(std::make_shared(task)); + } + } decode_pool->RunAll(true); + if (data_layout == DataLayout::kNCHW){ + const auto& dev_ctx = ctx.cuda_device_context(); + paddle::operators::math::Transpose trans; + std::vector axis = {2, 0, 1}; + // LOG(ERROR) << "start transpose 01!!!"; + for (size_t i = 0; i < inputs->size(); i++) { + // Do transpose + const framework::DDim& in_sizes = temp_array[i].dims(); + // const int ndim = in_sizes.size(); + framework::DDim transposed_input_shape = in_sizes.transpose(axis); + std::vector transposed_input_shape_ = + framework::vectorize(transposed_input_shape); + out_array[i].Resize(transposed_input_shape); + out_array[i].mutable_data(dev_ctx.GetPlace()); + trans(dev_ctx, temp_array[i], &out_array[i], axis); + } + } + LOG(ERROR) << "GPUBatchDecodeJpegKernel Compute finish"; } }; diff --git a/paddle/fluid/operators/data/batch_decode_random_crop_op.h b/paddle/fluid/operators/data/batch_decode_random_crop_op.h index fd23be38341dc9..de96e38ca95ef9 100644 --- a/paddle/fluid/operators/data/batch_decode_random_crop_op.h +++ b/paddle/fluid/operators/data/batch_decode_random_crop_op.h @@ -24,7 +24,6 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/operators/data/nvjpeg_decoder.h" - namespace paddle { namespace operators { namespace data { diff --git a/paddle/fluid/operators/data/batch_resize_op.cu b/paddle/fluid/operators/data/batch_resize_op.cu index e2c0319fdcf051..7728a6b4631631 100644 --- a/paddle/fluid/operators/data/batch_resize_op.cu +++ b/paddle/fluid/operators/data/batch_resize_op.cu @@ -239,7 +239,11 @@ class BatchResizeCUDAKernel : public framework::OpKernel { img->dims()[0] : img->dims()[2]; std::vector out_dim = {static_cast(x->size()), + size[0], size[1], img_c}; + if (data_layout == DataLayout::kNCHW) { + out_dim = {static_cast(x->size()), img_c, size[0], size[1]}; + } out->Resize(framework::make_ddim(out_dim)); out->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/data/file_label_loader_op.h b/paddle/fluid/operators/data/file_label_loader_op.h index bbc34a7b546bf1..eef07790372a24 100644 --- a/paddle/fluid/operators/data/file_label_loader_op.h +++ b/paddle/fluid/operators/data/file_label_loader_op.h @@ -147,6 +147,7 @@ static void ParseFilesAndLabels(const std::string data_root, } closedir(dir); } + } std::map>> root_to_samples_; @@ -156,6 +157,8 @@ static std::vector>* GetFilesAndLabelsFromCache(cons if (iter == root_to_samples_.end()) { std::vector> samples; ParseFilesAndLabels(data_root, &samples); + // std::cout << "files 0: " << samples[0].first << std::endl; + // std::cout << "files 1: " << samples[1].first << std::endl; LOG(ERROR) << "Init samples: " << samples.size(); root_to_samples_[data_root] = samples; } diff --git a/paddle/fluid/operators/data/nvjpeg_decoder.cc b/paddle/fluid/operators/data/nvjpeg_decoder.cc index 784b69b28b05c6..188723ba306fea 100644 --- a/paddle/fluid/operators/data/nvjpeg_decoder.cc +++ b/paddle/fluid/operators/data/nvjpeg_decoder.cc @@ -86,6 +86,7 @@ void NvjpegDecoder::CPUDecodeRandomCropResize(const uint8_t* data, size_t length framework::LoDTensor& temp, framework::LoDTensor* out, platform::Place place) { cv::Mat image = cv::imdecode(cv::Mat(1, length, CV_8UC1, const_cast(data)), cv::IMREAD_COLOR); + cv::Mat cropped; int height; int width; @@ -99,18 +100,21 @@ void NvjpegDecoder::CPUDecodeRandomCropResize(const uint8_t* data, size_t length cv_roi.height = roi.h; height = roi.h; width = roi.w; - std::vector out_shape = {3, height, width}; + + std::vector out_shape = {height, width, 3}; temp.Resize(framework::make_ddim(out_shape)); platform::CPUPlace cpu; // allocate memory and assign to out_image auto* data = temp.mutable_data(cpu); - cropped.data = data; + // todo jianglielin: why not work? + // cropped.data = data; image(cv_roi).copyTo(cropped); - out->Resize(framework::make_ddim(out_shape)); - + + std::memcpy(data, cropped.data, 3 * height * width); + TensorCopySync(temp, place, out); - } else { + LOG(ERROR) << "Not Use Opencv decode!!!"; // throw error } } @@ -139,6 +143,9 @@ int NvjpegDecoder::ParseDecodeParams( return 1; #endif } + else{ + // LOG(ERROR) << "Use nvjpeg decode!!!"; + } int64_t width = static_cast(widths[0]); int64_t height = static_cast(heights[0]); @@ -180,7 +187,7 @@ int NvjpegDecoder::ParseDecodeParams( width = roi.w; } - std::vector out_shape = {output_components, height, width}; + std::vector out_shape = {height, width, output_components}; out->Resize(framework::make_ddim(out_shape)); // allocate memory and assign to out_image @@ -217,7 +224,9 @@ void NvjpegDecoder::Run( if (res) { return; } + // LOG(ERROR) << "ParseDecodeParams finish !!!"; Decode(bit_stream, bit_len, &image); + // LOG(ERROR) << "Decode finish !!!"; } NvjpegDecoderThreadPool::NvjpegDecoderThreadPool(const int num_threads, const std::string mode, const int dev_id) diff --git a/paddle/fluid/operators/math/math_function.cu b/paddle/fluid/operators/math/math_function.cu index cfdfa456e39eac..0ee26752aebc34 100644 --- a/paddle/fluid/operators/math/math_function.cu +++ b/paddle/fluid/operators/math/math_function.cu @@ -50,6 +50,7 @@ template struct SetConstant; \ template struct Transpose; \ template struct Transpose; \ + template struct Transpose; \ template struct Transpose; \ template struct Transpose; \ template struct Transpose