PaddlePaddle · jiweibo · Apr 17, 2023 · Mar 27, 2023 · Mar 27, 2023 · Apr 10, 2023
diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc
@@ -199,7 +199,7 @@ void IRPassManager::CreatePasses(Argument *argument,
                   optim_cache_dir));
         }
         pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir));
-      } else if (use_static_engine || enable_int8) {
+      } else if (use_static_engine || enable_int8 || with_dynamic_shape) {
         std::string model_opt_cache_dir =
             argument->Has("model_dir")
                 ? argument->model_dir()

diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
@@ -14,6 +14,7 @@
 // limitations under the License.
 
 #include "paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h"
+#include <fcntl.h>
 #include <cstddef>
 #include <string>
 #include <unordered_set>
@@ -349,18 +350,38 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
       Get<std::map<std::string, std::vector<int>>>("optim_shape_tensor");
 
   auto allow_build_at_runtime = Get<bool>("trt_allow_build_at_runtime");
+  auto with_dynamic_shape = Get<bool>("with_dynamic_shape");
   auto shape_range_info_path = Get<std::string>("trt_shape_range_info_path");
   auto trt_tuned_dynamic_shape = Get<bool>("trt_tuned_dynamic_shape");
   int max_batch_size = Get<int>("max_batch_size");
   if (trt_tuned_dynamic_shape) {
-    VLOG(1) << "trt dynamic_shape deserialize from " << shape_range_info_path;
-    inference::DeserializeShapeRangeInfo(shape_range_info_path,
-                                         &min_input_shape,
-                                         &max_input_shape,
-                                         &opt_input_shape,
-                                         &min_shape_tensor,
-                                         &max_shape_tensor,
-                                         &opt_shape_tensor);
+    if (!shape_range_info_path.empty()) {
+      VLOG(1) << "trt dynamic_shape deserialize from " << shape_range_info_path;
+      inference::DeserializeShapeRangeInfo(shape_range_info_path,
+                                           &min_input_shape,
+                                           &max_input_shape,
+                                           &opt_input_shape,
+                                           &min_shape_tensor,
+                                           &max_shape_tensor,
+                                           &opt_shape_tensor);
+    } else {
+      shape_range_info_path =
+          Get<std::string>("model_opt_cache_dir") + "shape_range_info.pbtxt";
+      if (open(shape_range_info_path.c_str(), O_RDONLY) != -1) {
+        VLOG(1) << "trt dynamic_shape deserialize from "
+                << shape_range_info_path;
+        inference::DeserializeShapeRangeInfo(shape_range_info_path,
+                                             &min_input_shape,
+                                             &max_input_shape,
+                                             &opt_input_shape,
+                                             &min_shape_tensor,
+                                             &max_shape_tensor,
+                                             &opt_shape_tensor);
+      } else {
+        int fd = open(shape_range_info_path.c_str(), O_RDONLY | O_CREAT);
+        close(fd);
+      }
+    }
   }
 
   // The following procedure is used to rename all the intermediate
@@ -447,6 +468,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
   op_desc->SetAttr("shape_range_info_path", shape_range_info_path);
   op_desc->SetAttr("use_inspector", Get<bool>("use_inspector"));
   op_desc->SetAttr("model_precision", Get<int>("model_precision"));
+  op_desc->SetAttr("with_dynamic_shape", with_dynamic_shape);
 
   // we record all inputs' shapes in attr to check if they are consistent
   // with the real inputs' shapes retrieved from scope when trt runs.
@@ -563,6 +585,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
                   precision_mode,
                   calibrator.get(),
                   Get<int>("gpu_device_id"),
+                  with_dynamic_shape,
                   min_input_shape,
                   max_input_shape,
                   opt_input_shape,
@@ -607,6 +630,12 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
     }
   }
 
+  // If with_dynamic_shape is configured，but min_input_shape is empty,
+  // create trt engine in runtime instead of in pass.
+  if (with_dynamic_shape && min_input_shape.empty()) {
+    return;
+  }
+
   // the following code will NOT run in following situation:
   // 1. calibraion mode (generate trt int8 calibraiton table data)
   // 2. already load serialized trt engine info.

diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -644,8 +644,9 @@ struct PD_INFER_DECL AnalysisConfig {
   /// mode.
   /// \param allow_build_at_runtime allow build trt engine at runtime.
   ///
-  void EnableTunedTensorRtDynamicShape(const std::string& shape_range_info_path,
-                                       bool allow_build_at_runtime = true);
+  void EnableTunedTensorRtDynamicShape(
+      const std::string& shape_range_info_path = "",
+      bool allow_build_at_runtime = true);
 
   ///
   /// \brief A boolean state telling whether to use tuned tensorrt dynamic

diff --git a/paddle/fluid/inference/tensorrt/convert/test_custom_plugin_creater.cc b/paddle/fluid/inference/tensorrt/convert/test_custom_plugin_creater.cc
@@ -177,6 +177,7 @@ TEST(CustomPluginCreater, DynamicShapePlugin) {
                                    AnalysisConfig::Precision::kFloat32,
                                    nullptr,
                                    0,
+                                   true,
                                    min_input_shape,
                                    max_input_shape,
                                    optim_input_shape));

diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h
@@ -224,6 +224,7 @@ class TensorRTEngine {
       AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
       TRTInt8Calibrator* calibrator = nullptr,
       int device_id = 0,
+      bool with_dynamic_shape = false,
       const ShapeMapType min_input_shape = {},
       const ShapeMapType max_input_shape = {},
       const ShapeMapType optim_input_shape = {},
@@ -238,6 +239,7 @@ class TensorRTEngine {
         precision_(precision),
         calibrator_(calibrator),
         device_id_(device_id),
+        with_dynamic_shape_(with_dynamic_shape),
         min_input_shape_(min_input_shape),
         max_input_shape_(max_input_shape),
         optim_input_shape_(optim_input_shape),
@@ -247,31 +249,6 @@ class TensorRTEngine {
         disable_trt_plugin_fp16_(disable_trt_plugin_fp16),
         model_precision_(model_precision),
         logger_(logger) {
-    if (min_input_shape_.size() != 0 && max_input_shape_.size() != 0 &&
-        optim_input_shape_.size() != 0) {
-      PADDLE_ENFORCE_EQ(
-          min_input_shape_.size(),
-          max_input_shape_.size(),
-          platform::errors::InvalidArgument(
-              "The min_input_shape_'s size(%d) should be equal to the "
-              "size(%d) of max_input_shape_",
-              min_input_shape_.size(),
-              max_input_shape_.size()));
-      PADDLE_ENFORCE_EQ(
-          min_input_shape_.size(),
-          optim_input_shape_.size(),
-          platform::errors::InvalidArgument(
-              "The min_input_shape_'s size(%d) should be equal to the "
-              "size(%d) of optim_input_shape_",
-              min_input_shape_.size(),
-              optim_input_shape_.size()));
-#if IS_TRT_VERSION_GE(6000)
-      with_dynamic_shape_ = true;
-#else
-      LOG(WARNING) << "Using dynamic shape of TRT need ensure that the TRT "
-                      "version should be at least 6.";
-#endif
-    }
     dy::initLibNvInferPlugins(&logger, "");
   }
 
@@ -477,55 +454,116 @@ class TensorRTEngine {
   ShapeMapType optim_shape_tensor() { return optim_shape_tensor_; }
 
   bool AdjustDynamicShapeRange(const ShapeMapType& runtime_input_shape,
-                               std::vector<std::string>* changed) {
+                               const ShapeMapType& runtime_shape_tensor,
+                               std::vector<std::string>* changed,
+                               std::vector<std::string>* tensor_changed) {
     bool ret = false;
     changed->clear();
+    tensor_changed->clear();
     for (const auto& it : runtime_input_shape) {
       auto name = it.first;
       auto input_shape = it.second;
-      PADDLE_ENFORCE_EQ(
-          min_input_shape_.count(name),
-          true,
-          platform::errors::InvalidArgument(
-              "TRT dynamic_shape min_input_shape %s not found.", name));
-      PADDLE_ENFORCE_EQ(min_input_shape_[name].size(),
-                        input_shape.size(),
-                        platform::errors::InvalidArgument(
-                            "TRT dynamic_shape min_input_shape %s size not "
-                            "equal, the min_input_shape[%s].size()=%d"
-                            ", but the runtime_input_shape[%s].size()=%d.",
-                            name,
-                            name,
-                            min_input_shape_[name].size(),
-                            name,
-                            input_shape.size()));
-      auto bak_min_shape = min_input_shape_[name];
-      auto bak_max_shape = max_input_shape_[name];
       bool min_change = false;
       bool max_change = false;
-      for (size_t d = 0; d < input_shape.size(); ++d) {
-        if (input_shape[d] < min_input_shape_[name][d]) {
-          ret = true;
-          min_change = true;
-          min_input_shape_[name][d] = input_shape[d];
-        }
-        if (input_shape[d] > max_input_shape_[name][d]) {
-          ret = true;
-          max_change = true;
-          max_input_shape_[name][d] = input_shape[d];
+      std::vector<int> bak_min_shape;
+      std::vector<int> bak_max_shape;
+      if (!min_input_shape_.count(name)) {
+        min_input_shape_[name] = input_shape;
+        max_input_shape_[name] = input_shape;
+        optim_input_shape_[name] = input_shape;
+        min_change = true;
+        max_change = true;
+        ret = true;
+      } else {
+        PADDLE_ENFORCE_EQ(min_input_shape_[name].size(),
+                          input_shape.size(),
+                          platform::errors::InvalidArgument(
+                              "TRT dynamic_shape min_input_shape %s size not "
+                              "equal, the min_input_shape[%s].size()=%d"
+                              ", but the runtime_input_shape[%s].size()=%d.",
+                              name,
+                              name,
+                              min_input_shape_[name].size(),
+                              name,
+                              input_shape.size()));
+
+        bak_min_shape = min_input_shape_[name];
+        bak_max_shape = max_input_shape_[name];
+        for (size_t d = 0; d < input_shape.size(); ++d) {
+          if (input_shape[d] < min_input_shape_[name][d]) {
+            ret = true;
+            min_change = true;
+            min_input_shape_[name][d] = input_shape[d];
+          }
+          if (input_shape[d] > max_input_shape_[name][d]) {
+            ret = true;
+            max_change = true;
+            max_input_shape_[name][d] = input_shape[d];
+          }
         }
       }
-
       if (min_change)
-        LOG(INFO) << "refactor shape range: " << name << ", min_shape from "
-                  << Vec2Str(bak_min_shape) << " to "
+        LOG(INFO) << "refactor tensor shape range: " << name
+                  << ", min_shape from " << Vec2Str(bak_min_shape) << " to "
                   << Vec2Str(min_input_shape_[name]);
       if (max_change)
-        LOG(INFO) << "refactor shape range: " << name << ", max_shape from "
-                  << Vec2Str(bak_max_shape) << " to "
+        LOG(INFO) << "refactor tensor shape range: " << name
+                  << ", max_shape from " << Vec2Str(bak_max_shape) << " to "
                   << Vec2Str(max_input_shape_[name]);
       if (min_change || max_change) changed->push_back(name);
     }
+    for (const auto& it : runtime_shape_tensor) {
+      auto name = it.first;
+      auto shape_tensor = it.second;
+      bool min_change = false;
+      bool max_change = false;
+      std::vector<int> bak_min_shape;
+      std::vector<int> bak_max_shape;
+      if (!min_shape_tensor_.count(name)) {
+        min_shape_tensor_[name] = shape_tensor;
+        max_shape_tensor_[name] = shape_tensor;
+        optim_shape_tensor_[name] = shape_tensor;
+        min_change = true;
+        max_change = true;
+        ret = true;
+      } else {
+        PADDLE_ENFORCE_EQ(min_shape_tensor_[name].size(),
+                          shape_tensor.size(),
+                          platform::errors::InvalidArgument(
+                              "TRT dynamic_shape min_shape_tensor %s size not "
+                              "equal, the min_shape_tensor[%s].size()=%d"
+                              ", but the runtime_shape_tensor[%s].size()=%d.",
+                              name,
+                              name,
+                              min_shape_tensor_[name].size(),
+                              name,
+                              shape_tensor.size()));
+
+        bak_min_shape = min_shape_tensor_[name];
+        bak_max_shape = max_shape_tensor_[name];
+        for (size_t d = 0; d < shape_tensor.size(); ++d) {
+          if (shape_tensor[d] < min_shape_tensor_[name][d]) {
+            ret = true;
+            min_change = true;
+            min_shape_tensor_[name][d] = shape_tensor[d];
+          }
+          if (shape_tensor[d] > max_shape_tensor_[name][d]) {
+            ret = true;
+            max_change = true;
+            max_shape_tensor_[name][d] = shape_tensor[d];
+          }
+        }
+      }
+      if (min_change)
+        LOG(INFO) << "refactor shape tensor range: " << name
+                  << ", min_shape from " << Vec2Str(bak_min_shape) << " to "
+                  << Vec2Str(min_shape_tensor_[name]);
+      if (max_change)
+        LOG(INFO) << "refactor shape tensor range: " << name
+                  << ", max_shape from " << Vec2Str(bak_max_shape) << " to "
+                  << Vec2Str(max_shape_tensor_[name]);
+      if (min_change || max_change) tensor_changed->push_back(name);
+    }
     return ret;
   }
 
@@ -670,6 +708,7 @@ class TensorRTEngine {
   int max_profile_num_{1};
   int cur_profile_num_{0};
   std::unordered_map<PredictorID, int> profile_index_;
+  bool with_dynamic_shape_{false};
   ShapeMapType min_input_shape_;
   ShapeMapType max_input_shape_;
   ShapeMapType optim_input_shape_;
@@ -706,9 +745,6 @@ class TensorRTEngine {
 
   std::unordered_map<std::string, paddle::any> attrs_;
   std::unordered_map<std::string, std::function<void(void)>> attr_dels_;
-
-  // For dynamic shape
-  bool with_dynamic_shape_{false};
 #if IS_TRT_VERSION_GE(6000)
   int binding_num_;
   infer_ptr<nvinfer1::IBuilderConfig> infer_builder_config_;
@@ -772,6 +808,7 @@ class TRTEngineManager {
       AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
       TRTInt8Calibrator* calibrator = nullptr,
       int device_id = 0,
+      bool with_dynamic_shape = false,
       const std::map<std::string, std::vector<int>> min_input_shape = {},
       const std::map<std::string, std::vector<int>> max_input_shape = {},
       const std::map<std::string, std::vector<int>> optim_input_shape = {},
@@ -786,6 +823,7 @@ class TRTEngineManager {
                                  precision,
                                  calibrator,
                                  device_id,
+                                 with_dynamic_shape,
                                  min_input_shape,
                                  max_input_shape,
                                  optim_input_shape,

diff --git a/paddle/fluid/inference/tensorrt/helper.h b/paddle/fluid/inference/tensorrt/helper.h
@@ -190,6 +190,10 @@ inline void PrintITensorShape(nvinfer1::ITensor* X) {
 template <typename T>
 inline std::string Vec2Str(const std::vector<T>& vec) {
   std::ostringstream os;
+  if (vec.empty()) {
+    os << "()";
+    return os.str();
+  }
   os << "(";
   for (size_t i = 0; i < vec.size() - 1; ++i) {
     os << vec[i] << ",";

diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
@@ -70,6 +70,7 @@ class TensorRTDynamicShapeValueEngineTest : public ::testing::Test {
                                  AnalysisConfig::Precision::kFloat32,
                                  nullptr,
                                  0,
+                                 true,
                                  min_input_shape,
                                  max_input_shape,
                                  optim_input_shape,
@@ -196,6 +197,7 @@ class TensorRTDynamicEngineTest : public ::testing::Test {
                                  AnalysisConfig::Precision::kHalf,
                                  nullptr,
                                  0,
+                                 true,
                                  min_input_shape,
                                  max_input_shape,
                                  optim_input_shape,
@@ -373,6 +375,7 @@ class TensorRTDynamicTestFusedTokenPrune : public ::testing::Test {
                                  AnalysisConfig::Precision::kFloat32,
                                  nullptr,
                                  0,
+                                 true,
                                  min_input_shape,
                                  max_input_shape,
                                  optim_input_shape,
@@ -581,6 +584,7 @@ class TensorRTDynamicTestFusedTokenPruneHalf : public ::testing::Test {
                                  AnalysisConfig::Precision::kHalf,
                                  nullptr,
                                  0,
+                                 true,
                                  min_input_shape,
                                  max_input_shape,
                                  optim_input_shape,
@@ -783,6 +787,7 @@ class TensorRTDynamicShapeGNTest : public ::testing::Test {
                                  AnalysisConfig::Precision::kInt8,
                                  nullptr,
                                  0,
+                                 true,
                                  min_input_shape,
                                  max_input_shape,
                                  optim_input_shape,