PaddlePaddle · jiangjiajun · Dec 5, 2022 · Dec 3, 2022 · Dec 4, 2022
diff --git a/fastdeploy/backends/paddle/paddle_backend.cc b/fastdeploy/backends/paddle/paddle_backend.cc
@@ -22,24 +22,34 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
   option_ = option;
   if (option.use_gpu) {
     config_.EnableUseGpu(option.gpu_mem_init_size, option.gpu_id);
-    if(option_.external_stream_) {
+    if (option_.external_stream_) {
       config_.SetExecStream(option_.external_stream_);
     }
     if (option.enable_trt) {
 #ifdef ENABLE_TRT_BACKEND
+      config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_);
       auto precision = paddle_infer::PrecisionType::kFloat32;
       if (option.trt_option.enable_fp16) {
         precision = paddle_infer::PrecisionType::kHalf;
       }
       bool use_static = false;
       if (option.trt_option.serialize_file != "") {
-        FDWARNING << "Detect that tensorrt cache file has been set to " << option.trt_option.serialize_file << ", but while enable paddle2trt, please notice that the cache file will save to the directory where paddle model saved." << std::endl;
+        FDWARNING
+            << "Detect that tensorrt cache file has been set to "
+            << option.trt_option.serialize_file
+            << ", but while enable paddle2trt, please notice that the cache "
+               "file will save to the directory where paddle model saved."
+            << std::endl;
         use_static = true;
       }
-      config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, option.trt_option.max_batch_size, 3, precision, use_static);
+      config_.EnableTensorRtEngine(option.trt_option.max_workspace_size,
+                                   option.trt_option.max_batch_size, 3,
+                                   precision, use_static);
       SetTRTDynamicShapeToConfig(option);
 #else
-      FDWARNING << "The FastDeploy is not compiled with TensorRT backend, so will fallback to GPU with Paddle Inference Backend." << std::endl;
+      FDWARNING << "The FastDeploy is not compiled with TensorRT backend, so "
+                   "will fallback to GPU with Paddle Inference Backend."
+                << std::endl;
 #endif
     }
   } else if (option.use_ipu) {
@@ -98,47 +108,58 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
   if (!ReadBinaryFromFile(model_file, &contents)) {
     return false;
   }
-  auto reader =
-      paddle2onnx::PaddleReader(contents.c_str(), contents.size());
+  auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size());
 
   // If it's a quantized model, and use cpu with mkldnn, automaticaly switch to int8 mode
   if (reader.is_quantize_model) {
     if (option.use_gpu) {
-      FDWARNING << "The loaded model is a quantized model, while inference on GPU, please use TensorRT backend to get better performance." << std::endl;
+      FDWARNING << "The loaded model is a quantized model, while inference on "
+                   "GPU, please use TensorRT backend to get better performance."
+                << std::endl;
       if (option.enable_trt) {
 #ifdef ENABLE_TRT_BACKEND
         bool use_static = false;
         if (option.trt_option.serialize_file != "") {
-          FDWARNING << "Detect that tensorrt cache file has been set to " << option.trt_option.serialize_file << ", but while enable paddle2trt, please notice that the cache file will save to the directory where paddle model saved." << std::endl;
+          FDWARNING
+              << "Detect that tensorrt cache file has been set to "
+              << option.trt_option.serialize_file
+              << ", but while enable paddle2trt, please notice that the cache "
+                 "file will save to the directory where paddle model saved."
+              << std::endl;
           use_static = true;
         }
-        config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, option.trt_option.max_batch_size, 3, paddle_infer::PrecisionType::kInt8, use_static, false);
+        config_.EnableTensorRtEngine(option.trt_option.max_workspace_size,
+                                     option.trt_option.max_batch_size, 3,
+                                     paddle_infer::PrecisionType::kInt8,
+                                     use_static, false);
         SetTRTDynamicShapeToConfig(option);
-
 #endif
       }
     }
     if (option.enable_mkldnn) {
       config_.EnableMkldnnInt8();
     } else {
-      FDWARNING << "The loaded model is a quantized model, while inference on CPU, please enable MKLDNN to get better performance." << std::endl;
+      FDWARNING << "The loaded model is a quantized model, while inference on "
+                   "CPU, please enable MKLDNN to get better performance."
+                << std::endl;
     }
   }
 
   inputs_desc_.resize(reader.num_inputs);
   for (int i = 0; i < reader.num_inputs; ++i) {
     std::string name(reader.inputs[i].name);
-    std::vector<int64_t> shape(
-        reader.inputs[i].shape,
-        reader.inputs[i].shape + reader.inputs[i].rank);
+    std::vector<int64_t> shape(reader.inputs[i].shape,
+                               reader.inputs[i].shape + reader.inputs[i].rank);
     inputs_desc_[i].name = name;
     inputs_desc_[i].shape.assign(shape.begin(), shape.end());
     inputs_desc_[i].dtype = ReaderDataTypeToFD(reader.inputs[i].dtype);
   }
   outputs_desc_.resize(reader.num_outputs);
   for (int i = 0; i < reader.num_outputs; ++i) {
     std::string name(reader.outputs[i].name);
-    std::vector<int64_t> shape(reader.outputs[i].shape, reader.outputs[i].shape + reader.outputs[i].rank);
+    std::vector<int64_t> shape(reader.outputs[i].shape,
+                               reader.outputs[i].shape +
+                                   reader.outputs[i].rank);
     outputs_desc_[i].name = name;
     outputs_desc_[i].shape.assign(shape.begin(), shape.end());
     outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype);
@@ -147,7 +168,8 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
   if (option.collect_shape) {
     // Set the shape info file.
     auto curr_model_dir = GetDirFromPath(model_file);
-    std::string shape_range_info = PathJoin(curr_model_dir, "shape_range_info.pbtxt");
+    std::string shape_range_info =
+        PathJoin(curr_model_dir, "shape_range_info.pbtxt");
     if (!CheckFileExists(shape_range_info)) {
       FDINFO << "Start generating shape range info file." << std::endl;
       paddle_infer::Config analysis_config;
@@ -164,7 +186,8 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
       CollectShapeRun(predictor_tmp.get(), opt_shape);
       FDINFO << "Finish generating shape range info file." << std::endl;
     }
-    FDINFO << "Start loading shape range info file "<< shape_range_info << " to set TensorRT dynamic shape." << std::endl;
+    FDINFO << "Start loading shape range info file " << shape_range_info
+           << " to set TensorRT dynamic shape." << std::endl;
     config_.EnableTunedTensorRtDynamicShape(shape_range_info, false);
   }
 #endif
@@ -194,8 +217,7 @@ std::vector<TensorInfo> PaddleBackend::GetOutputInfos() {
 }
 
 bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
-                          std::vector<FDTensor>* outputs,
-                          bool copy_to_fd) {
+                          std::vector<FDTensor>* outputs, bool copy_to_fd) {
   if (inputs.size() != inputs_desc_.size()) {
     FDERROR << "[PaddleBackend] Size of inputs(" << inputs.size()
             << ") should keep same with the inputs of this model("
@@ -211,61 +233,61 @@ bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
   predictor_->Run();
 
   // output share backend memory only support CPU or GPU
-  if(option_.use_ipu) {
+  if (option_.use_ipu) {
     copy_to_fd = true;
   }
   outputs->resize(outputs_desc_.size());
   for (size_t i = 0; i < outputs_desc_.size(); ++i) {
     auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name);
-    if(copy_to_fd) {
+    if (copy_to_fd) {
       (*outputs)[i].is_pinned_memory = option_.enable_pinned_memory;
     }
     PaddleTensorToFDTensor(handle, &((*outputs)[i]), copy_to_fd);
   }
   return true;
 }
 
-std::unique_ptr<BaseBackend> PaddleBackend::Clone(void *stream, int device_id) {
-  std::unique_ptr<BaseBackend> new_backend = utils::make_unique<PaddleBackend>();
+std::unique_ptr<BaseBackend> PaddleBackend::Clone(void* stream, int device_id) {
+  std::unique_ptr<BaseBackend> new_backend =
+      utils::make_unique<PaddleBackend>();
   auto casted_backend = dynamic_cast<PaddleBackend*>(new_backend.get());
-  if(device_id > 0 && option_.use_gpu == true && device_id != option_.gpu_id) {
+  if (device_id > 0 && option_.use_gpu == true && device_id != option_.gpu_id) {
     auto clone_option = option_;
     clone_option.gpu_id = device_id;
     clone_option.external_stream_ = stream;
     casted_backend->InitFromPaddle(clone_option.model_file,
-                                   clone_option.params_file,
-                                   clone_option);
-    FDWARNING << "The target device id:" 
-             << device_id
-             << " is different from current device id:"
-             << option_.gpu_id
-             << ", cannot share memory with current engine."
-             << std::endl;
+                                   clone_option.params_file, clone_option);
+    FDWARNING << "The target device id:" << device_id
+              << " is different from current device id:" << option_.gpu_id
+              << ", cannot share memory with current engine." << std::endl;
     return new_backend;
   }
   casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end());
-  casted_backend->outputs_desc_.assign(outputs_desc_.begin(), outputs_desc_.end());
+  casted_backend->outputs_desc_.assign(outputs_desc_.begin(),
+                                       outputs_desc_.end());
   casted_backend->predictor_ = std::move(predictor_->Clone(stream));
   return new_backend;
 }
 
 #ifdef ENABLE_TRT_BACKEND
-void PaddleBackend::SetTRTDynamicShapeToConfig(const PaddleBackendOption& option) {
-    std::map<std::string, std::vector<int>> max_shape;
-    std::map<std::string, std::vector<int>> min_shape;
-    std::map<std::string, std::vector<int>> opt_shape;
-    GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape);
+void PaddleBackend::SetTRTDynamicShapeToConfig(
+    const PaddleBackendOption& option) {
+  std::map<std::string, std::vector<int>> max_shape;
+  std::map<std::string, std::vector<int>> min_shape;
+  std::map<std::string, std::vector<int>> opt_shape;
+  GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape);
+  if (min_shape.size() > 0) {
     FDINFO << "Start setting trt dynamic shape." << std::endl;
-    if (min_shape.size() > 0) {
-      config_.SetTRTDynamicShapeInfo(min_shape, max_shape, opt_shape);
-    }
+    config_.SetTRTDynamicShapeInfo(min_shape, max_shape, opt_shape);
     FDINFO << "Finish setting trt dynamic shape." << std::endl;
+  }
 }
 
-void PaddleBackend::GetDynamicShapeFromOption(const PaddleBackendOption& option,
-      std::map<std::string, std::vector<int>>* max_shape,
-      std::map<std::string, std::vector<int>>* min_shape,
-      std::map<std::string, std::vector<int>>* opt_shape) const {
+void PaddleBackend::GetDynamicShapeFromOption(
+    const PaddleBackendOption& option,
+    std::map<std::string, std::vector<int>>* max_shape,
+    std::map<std::string, std::vector<int>>* min_shape,
+    std::map<std::string, std::vector<int>>* opt_shape) const {
   auto print_shape = [](const std::vector<int>& shape) -> std::string {
     std::ostringstream oss;
     oss << "[";
@@ -281,55 +303,66 @@ void PaddleBackend::GetDynamicShapeFromOption(const PaddleBackendOption& option,
   for (const auto& item : option.trt_option.min_shape) {
     auto max_iter = option.trt_option.max_shape.find(item.first);
     auto opt_iter = option.trt_option.opt_shape.find(item.first);
-    FDASSERT(max_iter != option.trt_option.max_shape.end(), "Cannot find %s in TrtBackendOption::min_shape.", item.first.c_str());
-    FDASSERT(opt_iter != option.trt_option.opt_shape.end(), "Cannot find %s in TrtBackendOption::opt_shape.", item.first.c_str());
-    (*max_shape)[item.first].assign(max_iter->second.begin(), max_iter->second.end());
-    (*opt_shape)[item.first].assign(opt_iter->second.begin(), opt_iter->second.end());
+    FDASSERT(max_iter != option.trt_option.max_shape.end(),
+             "Cannot find %s in TrtBackendOption::min_shape.",
+             item.first.c_str());
+    FDASSERT(opt_iter != option.trt_option.opt_shape.end(),
+             "Cannot find %s in TrtBackendOption::opt_shape.",
+             item.first.c_str());
+    (*max_shape)[item.first].assign(max_iter->second.begin(),
+                                    max_iter->second.end());
+    (*opt_shape)[item.first].assign(opt_iter->second.begin(),
+                                    opt_iter->second.end());
     (*min_shape)[item.first].assign(item.second.begin(), item.second.end());
-    FDINFO << item.first << ": the max shape = " << print_shape(max_iter->second)
+    FDINFO << item.first
+           << ": the max shape = " << print_shape(max_iter->second)
            << ", the min shape = " << print_shape(item.second)
-           << ", the opt shape = " << print_shape(opt_iter->second) << std::endl;
+           << ", the opt shape = " << print_shape(opt_iter->second)
+           << std::endl;
   }
 }
 
-void PaddleBackend::CollectShapeRun(paddle_infer::Predictor* predictor,
+void PaddleBackend::CollectShapeRun(
+    paddle_infer::Predictor* predictor,
     const std::map<std::string, std::vector<int>>& shape) const {
   auto input_names = predictor->GetInputNames();
   auto input_type = predictor->GetInputTypes();
-  for(auto name : input_names) {
-    FDASSERT(shape.find(name) != shape.end() && input_type.find(name) != input_type.end(),
-      "Paddle Input name [%s] is not one of the trt dynamic shape.", name.c_str());
+  for (auto name : input_names) {
+    FDASSERT(shape.find(name) != shape.end() &&
+                 input_type.find(name) != input_type.end(),
+             "Paddle Input name [%s] is not one of the trt dynamic shape.",
+             name.c_str());
     auto tensor = predictor->GetInputHandle(name);
     auto shape_value = shape.at(name);
     int shape_num = std::accumulate(shape_value.begin(), shape_value.end(), 1,
                                     std::multiplies<int>());
     tensor->Reshape(shape_value);
     auto dtype = input_type[name];
     switch (dtype) {
-      case paddle_infer::DataType::FLOAT32: {
-        std::vector<float> input_data(shape_num, 1.0);
-        tensor->CopyFromCpu(input_data.data());
-        break;
-      }
-      case paddle_infer::DataType::INT32: {
-        std::vector<int> input_data(shape_num, 1);
-        tensor->CopyFromCpu(input_data.data());
-        break;
-      }
-      case paddle_infer::DataType::INT64: {
-        std::vector<int64_t> input_data(shape_num, 1);
-        tensor->CopyFromCpu(input_data.data());
-        break;
-      }
-      default: {
-        FDASSERT(false, "Input data Paddle backend only supports FP32/INT32/INT64 currently.");
-        break;
-      }
+    case paddle_infer::DataType::FLOAT32: {
+      std::vector<float> input_data(shape_num, 1.0);
+      tensor->CopyFromCpu(input_data.data());
+      break;
+    }
+    case paddle_infer::DataType::INT32: {
+      std::vector<int> input_data(shape_num, 1);
+      tensor->CopyFromCpu(input_data.data());
+      break;
+    }
+    case paddle_infer::DataType::INT64: {
+      std::vector<int64_t> input_data(shape_num, 1);
+      tensor->CopyFromCpu(input_data.data());
+      break;
+    }
+    default: {
+      FDASSERT(false, "Input data Paddle backend only supports "
+                      "FP32/INT32/INT64 currently.");
+      break;
+    }
     }
   }
   predictor->Run();
 }
 #endif
 
-
 }  // namespace fastdeploy
diff --git a/fastdeploy/backends/paddle/paddle_backend.h b/fastdeploy/backends/paddle/paddle_backend.h
@@ -23,8 +23,8 @@
 #ifdef ENABLE_PADDLE_FRONTEND
 #include "paddle2onnx/converter.h"
 #endif
-#include "paddle_inference_api.h"  // NOLINT
 #include "fastdeploy/utils/unique_ptr.h"
+#include "paddle_inference_api.h"  // NOLINT
 
 #ifdef ENABLE_TRT_BACKEND
 #include "fastdeploy/backends/tensorrt/trt_backend.h"
@@ -60,6 +60,7 @@ struct PaddleBackendOption {
 #ifdef ENABLE_TRT_BACKEND
   TrtBackendOption trt_option;
   bool collect_shape = false;
+  std::vector<std::string> trt_disabled_ops_{};
 #endif
 
 #ifdef WITH_IPU
@@ -91,8 +92,7 @@ void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor, FDTensor& fd_tensor);
 // if copy_to_fd is true, copy memory data to FDTensor
 /// else share memory to FDTensor
 void PaddleTensorToFDTensor(std::unique_ptr<paddle_infer::Tensor>& tensor,
-                            FDTensor* fd_tensor,
-                            bool copy_to_fd);
+                            FDTensor* fd_tensor, bool copy_to_fd);
 
 // Convert data type from paddle inference to fastdeploy
 FDDataType PaddleDataTypeToFD(const paddle_infer::DataType& dtype);
@@ -106,20 +106,18 @@ class PaddleBackend : public BaseBackend {
   virtual ~PaddleBackend() = default;
   void BuildOption(const PaddleBackendOption& option);
 
-  bool InitFromPaddle(
-      const std::string& model_file, const std::string& params_file,
-      const PaddleBackendOption& option = PaddleBackendOption());
+  bool
+  InitFromPaddle(const std::string& model_file, const std::string& params_file,
+                 const PaddleBackendOption& option = PaddleBackendOption());
 
-  bool Infer(std::vector<FDTensor>& inputs,
-             std::vector<FDTensor>* outputs,
+  bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
              bool copy_to_fd = true) override;
 
-
   int NumInputs() const override { return inputs_desc_.size(); }
 
   int NumOutputs() const override { return outputs_desc_.size(); }
 
-  std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
+  std::unique_ptr<BaseBackend> Clone(void* stream = nullptr,
                                      int device_id = -1) override;
 
   TensorInfo GetInputInfo(int index) override;
@@ -129,9 +127,11 @@ class PaddleBackend : public BaseBackend {
 
  private:
 #ifdef ENABLE_TRT_BACKEND
-  void CollectShapeRun(paddle_infer::Predictor* predictor,
-          const std::map<std::string, std::vector<int>>& shape) const;
-  void GetDynamicShapeFromOption(const PaddleBackendOption& option,
+  void
+  CollectShapeRun(paddle_infer::Predictor* predictor,
+                  const std::map<std::string, std::vector<int>>& shape) const;
+  void GetDynamicShapeFromOption(
+      const PaddleBackendOption& option,
       std::map<std::string, std::vector<int>>* max_shape,
       std::map<std::string, std::vector<int>>* min_shape,
       std::map<std::string, std::vector<int>>* opt_shape) const;