Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#14 from LielinJiang/fix-decode-channe…
Browse files Browse the repository at this point in the history
…l-error

Fix decode channel error and add layout for decode api
  • Loading branch information
heavengate authored Feb 14, 2022
2 parents a30b9fb + a30d38f commit 19b3a08
Show file tree
Hide file tree
Showing 9 changed files with 88 additions and 23 deletions.
6 changes: 1 addition & 5 deletions paddle/fluid/operators/data/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@ if(WITH_UNITY_BUILD)
include(unity_build_rule.cmake)
endif()

# find_package(ZLIB)
# include_directories(${ZLIB_INCLUDE_DIRS})
# TARGET_LINK_LIBRARIES( ${ZLIB_LIBRARIES})

cc_library(pipeline SRCS pipeline.cc DEPS parallel_executor simple_threadpool scope)
op_library(dataloader_op SRCS dataloader_op.cc dataloader_op.cu.cc DEPS pipeline ${OP_HEADER_DEPS})

Expand All @@ -31,4 +27,4 @@ op_library(random_flip_op SRCS random_flip_op.cc DEPS ${OP_HEADER_DEPS})
# register_operators()

# TODO: add test here
# cc_test(xxx SRCS xxx DEPS xxx)
# cc_test(xxx SRCS xxx DEPS xxx
7 changes: 7 additions & 0 deletions paddle/fluid/operators/data/batch_decode_random_crop_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,13 @@ and 255.
"for optionally converting the image, can be \"unchanged\" "
",\"gray\" , \"rgb\" .")
.SetDefault("unchanged");
AddAttr<std::string>(
"data_layout",
"(string, default NCHW) Only used in "
"an optional string from: \"NHWC\", \"NCHW\". "
"Specify that the data format of the input and output data is "
"channel_first or channel_last.")
.SetDefault("NCHW");
AddAttr<float>("aspect_ratio_min", "").SetDefault(3./4.);
AddAttr<float>("aspect_ratio_max", "").SetDefault(4./3.);
AddAttr<float>("area_min", "").SetDefault(0.08);
Expand Down
64 changes: 54 additions & 10 deletions paddle/fluid/operators/data/batch_decode_random_crop_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,15 @@

#include "paddle/fluid/operators/data/batch_decode_random_crop_op.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#include "paddle/fluid/operators/math/math_function.h"
// #include "paddle/fluid/operators/transpose_op.h"

namespace paddle {
namespace operators {
namespace data {

using LoDTensorBlockingQueueHolder = operators::reader::LoDTensorBlockingQueueHolder;
using DataLayout = framework::DataLayout;

NvjpegDecoderThreadPool* decode_pool = nullptr;
// std::seed_seq* rand_seq = nullptr;
Expand Down Expand Up @@ -50,6 +53,15 @@ class GPUBatchDecodeRandomCropKernel : public framework::OpKernel<T> {
auto& out_array = *out->GetMutable<framework::LoDTensorArray>();
out_array.resize(inputs->size());

const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
const DataLayout data_layout =
framework::StringToDataLayout(data_layout_str);

framework::LoDTensorArray temp_array;
if (data_layout == DataLayout::kNCHW) {
temp_array.resize(inputs->size());
}

auto aspect_ratio_min = ctx.Attr<float>("aspect_ratio_min");
auto aspect_ratio_max = ctx.Attr<float>("aspect_ratio_max");
AspectRatioRange aspect_ratio_range{aspect_ratio_min, aspect_ratio_max};
Expand All @@ -66,20 +78,52 @@ class GPUBatchDecodeRandomCropKernel : public framework::OpKernel<T> {
const framework::LoDTensor x = inputs->at(i);
auto* x_data = x.data<T>();
size_t x_numel = static_cast<size_t>(x.numel());

NvjpegDecodeTask task = {
.bit_stream = x_data,
.bit_len = x_numel,
.tensor = &out_array[i],
.roi_generator = new RandomROIGenerator(
aspect_ratio_range, area_range, rands[i]),
.place = dev
};
decode_pool->AddTask(std::make_shared<NvjpegDecodeTask>(task));

if (data_layout == DataLayout::kNCHW){
NvjpegDecodeTask task = {
.bit_stream = x_data,
.bit_len = x_numel,
.tensor = &temp_array[i],
.roi_generator = new RandomROIGenerator(
aspect_ratio_range, area_range, rands[i]),
.place = dev
};
decode_pool->AddTask(std::make_shared<NvjpegDecodeTask>(task));
}
else{
NvjpegDecodeTask task = {
.bit_stream = x_data,
.bit_len = x_numel,
.tensor = &out_array[i],
.roi_generator = new RandomROIGenerator(
aspect_ratio_range, area_range, rands[i]),
.place = dev
};
decode_pool->AddTask(std::make_shared<NvjpegDecodeTask>(task));
}

}

decode_pool->RunAll(true);

if (data_layout == DataLayout::kNCHW){
const auto& dev_ctx = ctx.cuda_device_context();
paddle::operators::math::Transpose<paddle::platform::CUDADeviceContext, T, 3> trans;
std::vector<int> axis = {2, 0, 1};
// LOG(ERROR) << "start transpose 01!!!";
for (size_t i = 0; i < inputs->size(); i++) {
// Do transpose
const framework::DDim& in_sizes = temp_array[i].dims();
// const int ndim = in_sizes.size();
framework::DDim transposed_input_shape = in_sizes.transpose(axis);
std::vector<int64_t> transposed_input_shape_ =
framework::vectorize(transposed_input_shape);
out_array[i].Resize(transposed_input_shape);
out_array[i].mutable_data<T>(dev_ctx.GetPlace());
trans(dev_ctx, temp_array[i], &out_array[i], axis);
}
}

LOG(ERROR) << "GPUBatchDecodeJpegKernel Compute finish";
}
};
Expand Down
1 change: 0 additions & 1 deletion paddle/fluid/operators/data/batch_decode_random_crop_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/operators/data/nvjpeg_decoder.h"


namespace paddle {
namespace operators {
namespace data {
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/operators/data/batch_resize_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,11 @@ class BatchResizeCUDAKernel : public framework::OpKernel<T> {
img->dims()[0] : img->dims()[2];

std::vector<int64_t> out_dim = {static_cast<int64_t>(x->size()),
size[0], size[1], img_c};
if (data_layout == DataLayout::kNCHW) {
out_dim = {static_cast<int64_t>(x->size()),
img_c, size[0], size[1]};
}
out->Resize(framework::make_ddim(out_dim));
out->mutable_data<T>(ctx.GetPlace());

Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/operators/data/file_label_loader_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ static void ParseFilesAndLabels(const std::string data_root,
}
closedir(dir);
}

}

std::map<std::string, std::vector<std::pair<std::string, int>>> root_to_samples_;
Expand All @@ -156,6 +157,8 @@ static std::vector<std::pair<std::string, int>>* GetFilesAndLabelsFromCache(cons
if (iter == root_to_samples_.end()) {
std::vector<std::pair<std::string, int>> samples;
ParseFilesAndLabels(data_root, &samples);
// std::cout << "files 0: " << samples[0].first << std::endl;
// std::cout << "files 1: " << samples[1].first << std::endl;
LOG(ERROR) << "Init samples: " << samples.size();
root_to_samples_[data_root] = samples;
}
Expand Down
21 changes: 15 additions & 6 deletions paddle/fluid/operators/data/nvjpeg_decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ void NvjpegDecoder::CPUDecodeRandomCropResize(const uint8_t* data, size_t length
framework::LoDTensor& temp, framework::LoDTensor* out, platform::Place place) {
cv::Mat image =
cv::imdecode(cv::Mat(1, length, CV_8UC1, const_cast<unsigned char*>(data)), cv::IMREAD_COLOR);

cv::Mat cropped;
int height;
int width;
Expand All @@ -99,18 +100,21 @@ void NvjpegDecoder::CPUDecodeRandomCropResize(const uint8_t* data, size_t length
cv_roi.height = roi.h;
height = roi.h;
width = roi.w;
std::vector<int64_t> out_shape = {3, height, width};

std::vector<int64_t> out_shape = {height, width, 3};
temp.Resize(framework::make_ddim(out_shape));
platform::CPUPlace cpu;
// allocate memory and assign to out_image
auto* data = temp.mutable_data<uint8_t>(cpu);
cropped.data = data;
// todo jianglielin: why not work?
// cropped.data = data;
image(cv_roi).copyTo(cropped);
out->Resize(framework::make_ddim(out_shape));


std::memcpy(data, cropped.data, 3 * height * width);

TensorCopySync(temp, place, out);

} else {
LOG(ERROR) << "Not Use Opencv decode!!!";
// throw error
}
}
Expand Down Expand Up @@ -139,6 +143,9 @@ int NvjpegDecoder::ParseDecodeParams(
return 1;
#endif
}
else{
// LOG(ERROR) << "Use nvjpeg decode!!!";
}

int64_t width = static_cast<int64_t>(widths[0]);
int64_t height = static_cast<int64_t>(heights[0]);
Expand Down Expand Up @@ -180,7 +187,7 @@ int NvjpegDecoder::ParseDecodeParams(
width = roi.w;
}

std::vector<int64_t> out_shape = {output_components, height, width};
std::vector<int64_t> out_shape = {height, width, output_components};
out->Resize(framework::make_ddim(out_shape));

// allocate memory and assign to out_image
Expand Down Expand Up @@ -217,7 +224,9 @@ void NvjpegDecoder::Run(
if (res) {
return;
}
// LOG(ERROR) << "ParseDecodeParams finish !!!";
Decode(bit_stream, bit_len, &image);
// LOG(ERROR) << "Decode finish !!!";
}

NvjpegDecoderThreadPool::NvjpegDecoderThreadPool(const int num_threads, const std::string mode, const int dev_id)
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/operators/math/math_function.cu
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ template struct SetConstant<platform::CUDADeviceContext,
template struct Transpose<platform::CUDADeviceContext, float16, RANK>; \
template struct Transpose<platform::CUDADeviceContext, bfloat16, RANK>; \
template struct Transpose<platform::CUDADeviceContext, int8_t, RANK>; \
template struct Transpose<platform::CUDADeviceContext, uint8_t, RANK>; \
template struct Transpose<platform::CUDADeviceContext, int32_t, RANK>; \
template struct Transpose<platform::CUDADeviceContext, int64_t, RANK>; \
template struct Transpose<platform::CUDADeviceContext, \
Expand Down
4 changes: 3 additions & 1 deletion python/paddle/vision/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,7 @@ def image_decode(x, mode='unchanged', num_threads=2, name=None):
def image_decode_random_crop(x,
mode='unchanged',
num_threads=2,
data_layout='NCHW',
aspect_ratio_min=3./4.,
aspect_ratio_max=4./3.,
area_min=0.08,
Expand Down Expand Up @@ -982,7 +983,7 @@ def image_decode_random_crop(x,
core.VarDesc.VarType.LOD_TENSOR_ARRAY, False)
program_id = utils._hash_with_id(mode, num_threads, name, local_rank)
return _C_ops.batch_decode_random_crop(
x, out, "mode", mode, "num_threads", num_threads,
x, out, "mode", mode, "num_threads", num_threads, "data_layout", data_layout,
"aspect_ratio_min", aspect_ratio_min,
"aspect_ratio_max", aspect_ratio_max,
"area_min", area_min, "area_max", area_max,
Expand All @@ -992,6 +993,7 @@ def image_decode_random_crop(x,
inputs = {'X': x}
attrs = {"mode": mode,
"num_threads": num_threads,
"data_layout": data_layout,
"aspect_ratio_min": aspect_ratio_min,
"aspect_ratio_max": aspect_ratio_max,
"area_min": area_min,
Expand Down

0 comments on commit 19b3a08

Please sign in to comment.