Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backend]Add DisablePaddleTrtOPs #788

Merged
merged 2 commits into from
Dec 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 108 additions & 75 deletions fastdeploy/backends/paddle/paddle_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,34 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
option_ = option;
if (option.use_gpu) {
config_.EnableUseGpu(option.gpu_mem_init_size, option.gpu_id);
if(option_.external_stream_) {
if (option_.external_stream_) {
config_.SetExecStream(option_.external_stream_);
}
if (option.enable_trt) {
#ifdef ENABLE_TRT_BACKEND
config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_);
auto precision = paddle_infer::PrecisionType::kFloat32;
if (option.trt_option.enable_fp16) {
precision = paddle_infer::PrecisionType::kHalf;
}
bool use_static = false;
if (option.trt_option.serialize_file != "") {
FDWARNING << "Detect that tensorrt cache file has been set to " << option.trt_option.serialize_file << ", but while enable paddle2trt, please notice that the cache file will save to the directory where paddle model saved." << std::endl;
FDWARNING
<< "Detect that tensorrt cache file has been set to "
<< option.trt_option.serialize_file
<< ", but while enable paddle2trt, please notice that the cache "
"file will save to the directory where paddle model saved."
<< std::endl;
use_static = true;
}
config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, option.trt_option.max_batch_size, 3, precision, use_static);
config_.EnableTensorRtEngine(option.trt_option.max_workspace_size,
option.trt_option.max_batch_size, 3,
precision, use_static);
SetTRTDynamicShapeToConfig(option);
#else
FDWARNING << "The FastDeploy is not compiled with TensorRT backend, so will fallback to GPU with Paddle Inference Backend." << std::endl;
FDWARNING << "The FastDeploy is not compiled with TensorRT backend, so "
"will fallback to GPU with Paddle Inference Backend."
<< std::endl;
#endif
}
} else if (option.use_ipu) {
Expand Down Expand Up @@ -98,47 +108,58 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
if (!ReadBinaryFromFile(model_file, &contents)) {
return false;
}
auto reader =
paddle2onnx::PaddleReader(contents.c_str(), contents.size());
auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size());

// If it's a quantized model, and use cpu with mkldnn, automaticaly switch to int8 mode
if (reader.is_quantize_model) {
if (option.use_gpu) {
FDWARNING << "The loaded model is a quantized model, while inference on GPU, please use TensorRT backend to get better performance." << std::endl;
FDWARNING << "The loaded model is a quantized model, while inference on "
"GPU, please use TensorRT backend to get better performance."
<< std::endl;
if (option.enable_trt) {
#ifdef ENABLE_TRT_BACKEND
bool use_static = false;
if (option.trt_option.serialize_file != "") {
FDWARNING << "Detect that tensorrt cache file has been set to " << option.trt_option.serialize_file << ", but while enable paddle2trt, please notice that the cache file will save to the directory where paddle model saved." << std::endl;
FDWARNING
<< "Detect that tensorrt cache file has been set to "
<< option.trt_option.serialize_file
<< ", but while enable paddle2trt, please notice that the cache "
"file will save to the directory where paddle model saved."
<< std::endl;
use_static = true;
}
config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, option.trt_option.max_batch_size, 3, paddle_infer::PrecisionType::kInt8, use_static, false);
config_.EnableTensorRtEngine(option.trt_option.max_workspace_size,
option.trt_option.max_batch_size, 3,
paddle_infer::PrecisionType::kInt8,
use_static, false);
SetTRTDynamicShapeToConfig(option);

#endif
}
}
if (option.enable_mkldnn) {
config_.EnableMkldnnInt8();
} else {
FDWARNING << "The loaded model is a quantized model, while inference on CPU, please enable MKLDNN to get better performance." << std::endl;
FDWARNING << "The loaded model is a quantized model, while inference on "
"CPU, please enable MKLDNN to get better performance."
<< std::endl;
}
}

inputs_desc_.resize(reader.num_inputs);
for (int i = 0; i < reader.num_inputs; ++i) {
std::string name(reader.inputs[i].name);
std::vector<int64_t> shape(
reader.inputs[i].shape,
reader.inputs[i].shape + reader.inputs[i].rank);
std::vector<int64_t> shape(reader.inputs[i].shape,
reader.inputs[i].shape + reader.inputs[i].rank);
inputs_desc_[i].name = name;
inputs_desc_[i].shape.assign(shape.begin(), shape.end());
inputs_desc_[i].dtype = ReaderDataTypeToFD(reader.inputs[i].dtype);
}
outputs_desc_.resize(reader.num_outputs);
for (int i = 0; i < reader.num_outputs; ++i) {
std::string name(reader.outputs[i].name);
std::vector<int64_t> shape(reader.outputs[i].shape, reader.outputs[i].shape + reader.outputs[i].rank);
std::vector<int64_t> shape(reader.outputs[i].shape,
reader.outputs[i].shape +
reader.outputs[i].rank);
outputs_desc_[i].name = name;
outputs_desc_[i].shape.assign(shape.begin(), shape.end());
outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype);
Expand All @@ -147,7 +168,8 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
if (option.collect_shape) {
// Set the shape info file.
auto curr_model_dir = GetDirFromPath(model_file);
std::string shape_range_info = PathJoin(curr_model_dir, "shape_range_info.pbtxt");
std::string shape_range_info =
PathJoin(curr_model_dir, "shape_range_info.pbtxt");
if (!CheckFileExists(shape_range_info)) {
FDINFO << "Start generating shape range info file." << std::endl;
paddle_infer::Config analysis_config;
Expand All @@ -164,7 +186,8 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
CollectShapeRun(predictor_tmp.get(), opt_shape);
FDINFO << "Finish generating shape range info file." << std::endl;
}
FDINFO << "Start loading shape range info file "<< shape_range_info << " to set TensorRT dynamic shape." << std::endl;
FDINFO << "Start loading shape range info file " << shape_range_info
<< " to set TensorRT dynamic shape." << std::endl;
config_.EnableTunedTensorRtDynamicShape(shape_range_info, false);
}
#endif
Expand Down Expand Up @@ -194,8 +217,7 @@ std::vector<TensorInfo> PaddleBackend::GetOutputInfos() {
}

bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs,
bool copy_to_fd) {
std::vector<FDTensor>* outputs, bool copy_to_fd) {
if (inputs.size() != inputs_desc_.size()) {
FDERROR << "[PaddleBackend] Size of inputs(" << inputs.size()
<< ") should keep same with the inputs of this model("
Expand All @@ -211,61 +233,61 @@ bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
predictor_->Run();

// output share backend memory only support CPU or GPU
if(option_.use_ipu) {
if (option_.use_ipu) {
copy_to_fd = true;
}
outputs->resize(outputs_desc_.size());
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name);
if(copy_to_fd) {
if (copy_to_fd) {
(*outputs)[i].is_pinned_memory = option_.enable_pinned_memory;
}
PaddleTensorToFDTensor(handle, &((*outputs)[i]), copy_to_fd);
}
return true;
}

std::unique_ptr<BaseBackend> PaddleBackend::Clone(void *stream, int device_id) {
std::unique_ptr<BaseBackend> new_backend = utils::make_unique<PaddleBackend>();
std::unique_ptr<BaseBackend> PaddleBackend::Clone(void* stream, int device_id) {
std::unique_ptr<BaseBackend> new_backend =
utils::make_unique<PaddleBackend>();
auto casted_backend = dynamic_cast<PaddleBackend*>(new_backend.get());
if(device_id > 0 && option_.use_gpu == true && device_id != option_.gpu_id) {
if (device_id > 0 && option_.use_gpu == true && device_id != option_.gpu_id) {
auto clone_option = option_;
clone_option.gpu_id = device_id;
clone_option.external_stream_ = stream;
casted_backend->InitFromPaddle(clone_option.model_file,
clone_option.params_file,
clone_option);
FDWARNING << "The target device id:"
<< device_id
<< " is different from current device id:"
<< option_.gpu_id
<< ", cannot share memory with current engine."
<< std::endl;
clone_option.params_file, clone_option);
FDWARNING << "The target device id:" << device_id
<< " is different from current device id:" << option_.gpu_id
<< ", cannot share memory with current engine." << std::endl;
return new_backend;
}
casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end());
casted_backend->outputs_desc_.assign(outputs_desc_.begin(), outputs_desc_.end());
casted_backend->outputs_desc_.assign(outputs_desc_.begin(),
outputs_desc_.end());
casted_backend->predictor_ = std::move(predictor_->Clone(stream));
return new_backend;
}

#ifdef ENABLE_TRT_BACKEND
void PaddleBackend::SetTRTDynamicShapeToConfig(const PaddleBackendOption& option) {
std::map<std::string, std::vector<int>> max_shape;
std::map<std::string, std::vector<int>> min_shape;
std::map<std::string, std::vector<int>> opt_shape;
GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape);
void PaddleBackend::SetTRTDynamicShapeToConfig(
const PaddleBackendOption& option) {
std::map<std::string, std::vector<int>> max_shape;
std::map<std::string, std::vector<int>> min_shape;
std::map<std::string, std::vector<int>> opt_shape;
GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape);
if (min_shape.size() > 0) {
FDINFO << "Start setting trt dynamic shape." << std::endl;
if (min_shape.size() > 0) {
config_.SetTRTDynamicShapeInfo(min_shape, max_shape, opt_shape);
}
config_.SetTRTDynamicShapeInfo(min_shape, max_shape, opt_shape);
FDINFO << "Finish setting trt dynamic shape." << std::endl;
}
}

void PaddleBackend::GetDynamicShapeFromOption(const PaddleBackendOption& option,
std::map<std::string, std::vector<int>>* max_shape,
std::map<std::string, std::vector<int>>* min_shape,
std::map<std::string, std::vector<int>>* opt_shape) const {
void PaddleBackend::GetDynamicShapeFromOption(
const PaddleBackendOption& option,
std::map<std::string, std::vector<int>>* max_shape,
std::map<std::string, std::vector<int>>* min_shape,
std::map<std::string, std::vector<int>>* opt_shape) const {
auto print_shape = [](const std::vector<int>& shape) -> std::string {
std::ostringstream oss;
oss << "[";
Expand All @@ -281,55 +303,66 @@ void PaddleBackend::GetDynamicShapeFromOption(const PaddleBackendOption& option,
for (const auto& item : option.trt_option.min_shape) {
auto max_iter = option.trt_option.max_shape.find(item.first);
auto opt_iter = option.trt_option.opt_shape.find(item.first);
FDASSERT(max_iter != option.trt_option.max_shape.end(), "Cannot find %s in TrtBackendOption::min_shape.", item.first.c_str());
FDASSERT(opt_iter != option.trt_option.opt_shape.end(), "Cannot find %s in TrtBackendOption::opt_shape.", item.first.c_str());
(*max_shape)[item.first].assign(max_iter->second.begin(), max_iter->second.end());
(*opt_shape)[item.first].assign(opt_iter->second.begin(), opt_iter->second.end());
FDASSERT(max_iter != option.trt_option.max_shape.end(),
"Cannot find %s in TrtBackendOption::min_shape.",
item.first.c_str());
FDASSERT(opt_iter != option.trt_option.opt_shape.end(),
"Cannot find %s in TrtBackendOption::opt_shape.",
item.first.c_str());
(*max_shape)[item.first].assign(max_iter->second.begin(),
max_iter->second.end());
(*opt_shape)[item.first].assign(opt_iter->second.begin(),
opt_iter->second.end());
(*min_shape)[item.first].assign(item.second.begin(), item.second.end());
FDINFO << item.first << ": the max shape = " << print_shape(max_iter->second)
FDINFO << item.first
<< ": the max shape = " << print_shape(max_iter->second)
<< ", the min shape = " << print_shape(item.second)
<< ", the opt shape = " << print_shape(opt_iter->second) << std::endl;
<< ", the opt shape = " << print_shape(opt_iter->second)
<< std::endl;
}
}

void PaddleBackend::CollectShapeRun(paddle_infer::Predictor* predictor,
void PaddleBackend::CollectShapeRun(
paddle_infer::Predictor* predictor,
const std::map<std::string, std::vector<int>>& shape) const {
auto input_names = predictor->GetInputNames();
auto input_type = predictor->GetInputTypes();
for(auto name : input_names) {
FDASSERT(shape.find(name) != shape.end() && input_type.find(name) != input_type.end(),
"Paddle Input name [%s] is not one of the trt dynamic shape.", name.c_str());
for (auto name : input_names) {
FDASSERT(shape.find(name) != shape.end() &&
input_type.find(name) != input_type.end(),
"Paddle Input name [%s] is not one of the trt dynamic shape.",
name.c_str());
auto tensor = predictor->GetInputHandle(name);
auto shape_value = shape.at(name);
int shape_num = std::accumulate(shape_value.begin(), shape_value.end(), 1,
std::multiplies<int>());
tensor->Reshape(shape_value);
auto dtype = input_type[name];
switch (dtype) {
case paddle_infer::DataType::FLOAT32: {
std::vector<float> input_data(shape_num, 1.0);
tensor->CopyFromCpu(input_data.data());
break;
}
case paddle_infer::DataType::INT32: {
std::vector<int> input_data(shape_num, 1);
tensor->CopyFromCpu(input_data.data());
break;
}
case paddle_infer::DataType::INT64: {
std::vector<int64_t> input_data(shape_num, 1);
tensor->CopyFromCpu(input_data.data());
break;
}
default: {
FDASSERT(false, "Input data Paddle backend only supports FP32/INT32/INT64 currently.");
break;
}
case paddle_infer::DataType::FLOAT32: {
std::vector<float> input_data(shape_num, 1.0);
tensor->CopyFromCpu(input_data.data());
break;
}
case paddle_infer::DataType::INT32: {
std::vector<int> input_data(shape_num, 1);
tensor->CopyFromCpu(input_data.data());
break;
}
case paddle_infer::DataType::INT64: {
std::vector<int64_t> input_data(shape_num, 1);
tensor->CopyFromCpu(input_data.data());
break;
}
default: {
FDASSERT(false, "Input data Paddle backend only supports "
"FP32/INT32/INT64 currently.");
break;
}
}
}
predictor->Run();
}
#endif


} // namespace fastdeploy
26 changes: 13 additions & 13 deletions fastdeploy/backends/paddle/paddle_backend.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
#ifdef ENABLE_PADDLE_FRONTEND
#include "paddle2onnx/converter.h"
#endif
#include "paddle_inference_api.h" // NOLINT
#include "fastdeploy/utils/unique_ptr.h"
#include "paddle_inference_api.h" // NOLINT

#ifdef ENABLE_TRT_BACKEND
#include "fastdeploy/backends/tensorrt/trt_backend.h"
Expand Down Expand Up @@ -60,6 +60,7 @@ struct PaddleBackendOption {
#ifdef ENABLE_TRT_BACKEND
TrtBackendOption trt_option;
bool collect_shape = false;
std::vector<std::string> trt_disabled_ops_{};
#endif

#ifdef WITH_IPU
Expand Down Expand Up @@ -91,8 +92,7 @@ void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor, FDTensor& fd_tensor);
// if copy_to_fd is true, copy memory data to FDTensor
/// else share memory to FDTensor
void PaddleTensorToFDTensor(std::unique_ptr<paddle_infer::Tensor>& tensor,
FDTensor* fd_tensor,
bool copy_to_fd);
FDTensor* fd_tensor, bool copy_to_fd);

// Convert data type from paddle inference to fastdeploy
FDDataType PaddleDataTypeToFD(const paddle_infer::DataType& dtype);
Expand All @@ -106,20 +106,18 @@ class PaddleBackend : public BaseBackend {
virtual ~PaddleBackend() = default;
void BuildOption(const PaddleBackendOption& option);

bool InitFromPaddle(
const std::string& model_file, const std::string& params_file,
const PaddleBackendOption& option = PaddleBackendOption());
bool
InitFromPaddle(const std::string& model_file, const std::string& params_file,
const PaddleBackendOption& option = PaddleBackendOption());

bool Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs,
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
bool copy_to_fd = true) override;


int NumInputs() const override { return inputs_desc_.size(); }

int NumOutputs() const override { return outputs_desc_.size(); }

std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
std::unique_ptr<BaseBackend> Clone(void* stream = nullptr,
int device_id = -1) override;

TensorInfo GetInputInfo(int index) override;
Expand All @@ -129,9 +127,11 @@ class PaddleBackend : public BaseBackend {

private:
#ifdef ENABLE_TRT_BACKEND
void CollectShapeRun(paddle_infer::Predictor* predictor,
const std::map<std::string, std::vector<int>>& shape) const;
void GetDynamicShapeFromOption(const PaddleBackendOption& option,
void
CollectShapeRun(paddle_infer::Predictor* predictor,
const std::map<std::string, std::vector<int>>& shape) const;
void GetDynamicShapeFromOption(
const PaddleBackendOption& option,
std::map<std::string, std::vector<int>>* max_shape,
std::map<std::string, std::vector<int>>* min_shape,
std::map<std::string, std::vector<int>>* opt_shape) const;
Expand Down
Loading