Skip to content

Commit

Permalink
Support trt engine auto build in runtime for dynamic shape (PaddlePad…
Browse files Browse the repository at this point in the history
  • Loading branch information
JZZ-NOTE authored and jjyaoao committed Apr 19, 2023
1 parent c533c8f commit e653f37
Show file tree
Hide file tree
Showing 13 changed files with 293 additions and 88 deletions.
2 changes: 1 addition & 1 deletion paddle/fluid/inference/analysis/ir_pass_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ void IRPassManager::CreatePasses(Argument *argument,
optim_cache_dir));
}
pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir));
} else if (use_static_engine || enable_int8) {
} else if (use_static_engine || enable_int8 || with_dynamic_shape) {
std::string model_opt_cache_dir =
argument->Has("model_dir")
? argument->model_dir()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
// limitations under the License.

#include "paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h"
#include <fcntl.h>
#include <cstddef>
#include <string>
#include <unordered_set>
Expand Down Expand Up @@ -349,18 +350,38 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
Get<std::map<std::string, std::vector<int>>>("optim_shape_tensor");

auto allow_build_at_runtime = Get<bool>("trt_allow_build_at_runtime");
auto with_dynamic_shape = Get<bool>("with_dynamic_shape");
auto shape_range_info_path = Get<std::string>("trt_shape_range_info_path");
auto trt_tuned_dynamic_shape = Get<bool>("trt_tuned_dynamic_shape");
int max_batch_size = Get<int>("max_batch_size");
if (trt_tuned_dynamic_shape) {
VLOG(1) << "trt dynamic_shape deserialize from " << shape_range_info_path;
inference::DeserializeShapeRangeInfo(shape_range_info_path,
&min_input_shape,
&max_input_shape,
&opt_input_shape,
&min_shape_tensor,
&max_shape_tensor,
&opt_shape_tensor);
if (!shape_range_info_path.empty()) {
VLOG(1) << "trt dynamic_shape deserialize from " << shape_range_info_path;
inference::DeserializeShapeRangeInfo(shape_range_info_path,
&min_input_shape,
&max_input_shape,
&opt_input_shape,
&min_shape_tensor,
&max_shape_tensor,
&opt_shape_tensor);
} else {
shape_range_info_path =
Get<std::string>("model_opt_cache_dir") + "shape_range_info.pbtxt";
if (open(shape_range_info_path.c_str(), O_RDONLY) != -1) {
VLOG(1) << "trt dynamic_shape deserialize from "
<< shape_range_info_path;
inference::DeserializeShapeRangeInfo(shape_range_info_path,
&min_input_shape,
&max_input_shape,
&opt_input_shape,
&min_shape_tensor,
&max_shape_tensor,
&opt_shape_tensor);
} else {
int fd = open(shape_range_info_path.c_str(), O_RDONLY | O_CREAT);
close(fd);
}
}
}

// The following procedure is used to rename all the intermediate
Expand Down Expand Up @@ -447,6 +468,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
op_desc->SetAttr("shape_range_info_path", shape_range_info_path);
op_desc->SetAttr("use_inspector", Get<bool>("use_inspector"));
op_desc->SetAttr("model_precision", Get<int>("model_precision"));
op_desc->SetAttr("with_dynamic_shape", with_dynamic_shape);

// we record all inputs' shapes in attr to check if they are consistent
// with the real inputs' shapes retrieved from scope when trt runs.
Expand Down Expand Up @@ -563,6 +585,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
precision_mode,
calibrator.get(),
Get<int>("gpu_device_id"),
with_dynamic_shape,
min_input_shape,
max_input_shape,
opt_input_shape,
Expand Down Expand Up @@ -607,6 +630,12 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
}
}

// If with_dynamic_shape is configured,but min_input_shape is empty,
// create trt engine in runtime instead of in pass.
if (with_dynamic_shape && min_input_shape.empty()) {
return;
}

// the following code will NOT run in following situation:
// 1. calibraion mode (generate trt int8 calibraiton table data)
// 2. already load serialized trt engine info.
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -644,8 +644,9 @@ struct PD_INFER_DECL AnalysisConfig {
/// mode.
/// \param allow_build_at_runtime allow build trt engine at runtime.
///
void EnableTunedTensorRtDynamicShape(const std::string& shape_range_info_path,
bool allow_build_at_runtime = true);
void EnableTunedTensorRtDynamicShape(
const std::string& shape_range_info_path = "",
bool allow_build_at_runtime = true);

///
/// \brief A boolean state telling whether to use tuned tensorrt dynamic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ TEST(CustomPluginCreater, DynamicShapePlugin) {
AnalysisConfig::Precision::kFloat32,
nullptr,
0,
true,
min_input_shape,
max_input_shape,
optim_input_shape));
Expand Down
162 changes: 100 additions & 62 deletions paddle/fluid/inference/tensorrt/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ class TensorRTEngine {
AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
TRTInt8Calibrator* calibrator = nullptr,
int device_id = 0,
bool with_dynamic_shape = false,
const ShapeMapType min_input_shape = {},
const ShapeMapType max_input_shape = {},
const ShapeMapType optim_input_shape = {},
Expand All @@ -238,6 +239,7 @@ class TensorRTEngine {
precision_(precision),
calibrator_(calibrator),
device_id_(device_id),
with_dynamic_shape_(with_dynamic_shape),
min_input_shape_(min_input_shape),
max_input_shape_(max_input_shape),
optim_input_shape_(optim_input_shape),
Expand All @@ -247,31 +249,6 @@ class TensorRTEngine {
disable_trt_plugin_fp16_(disable_trt_plugin_fp16),
model_precision_(model_precision),
logger_(logger) {
if (min_input_shape_.size() != 0 && max_input_shape_.size() != 0 &&
optim_input_shape_.size() != 0) {
PADDLE_ENFORCE_EQ(
min_input_shape_.size(),
max_input_shape_.size(),
platform::errors::InvalidArgument(
"The min_input_shape_'s size(%d) should be equal to the "
"size(%d) of max_input_shape_",
min_input_shape_.size(),
max_input_shape_.size()));
PADDLE_ENFORCE_EQ(
min_input_shape_.size(),
optim_input_shape_.size(),
platform::errors::InvalidArgument(
"The min_input_shape_'s size(%d) should be equal to the "
"size(%d) of optim_input_shape_",
min_input_shape_.size(),
optim_input_shape_.size()));
#if IS_TRT_VERSION_GE(6000)
with_dynamic_shape_ = true;
#else
LOG(WARNING) << "Using dynamic shape of TRT need ensure that the TRT "
"version should be at least 6.";
#endif
}
dy::initLibNvInferPlugins(&logger, "");
}

Expand Down Expand Up @@ -477,55 +454,116 @@ class TensorRTEngine {
ShapeMapType optim_shape_tensor() { return optim_shape_tensor_; }

bool AdjustDynamicShapeRange(const ShapeMapType& runtime_input_shape,
std::vector<std::string>* changed) {
const ShapeMapType& runtime_shape_tensor,
std::vector<std::string>* changed,
std::vector<std::string>* tensor_changed) {
bool ret = false;
changed->clear();
tensor_changed->clear();
for (const auto& it : runtime_input_shape) {
auto name = it.first;
auto input_shape = it.second;
PADDLE_ENFORCE_EQ(
min_input_shape_.count(name),
true,
platform::errors::InvalidArgument(
"TRT dynamic_shape min_input_shape %s not found.", name));
PADDLE_ENFORCE_EQ(min_input_shape_[name].size(),
input_shape.size(),
platform::errors::InvalidArgument(
"TRT dynamic_shape min_input_shape %s size not "
"equal, the min_input_shape[%s].size()=%d"
", but the runtime_input_shape[%s].size()=%d.",
name,
name,
min_input_shape_[name].size(),
name,
input_shape.size()));
auto bak_min_shape = min_input_shape_[name];
auto bak_max_shape = max_input_shape_[name];
bool min_change = false;
bool max_change = false;
for (size_t d = 0; d < input_shape.size(); ++d) {
if (input_shape[d] < min_input_shape_[name][d]) {
ret = true;
min_change = true;
min_input_shape_[name][d] = input_shape[d];
}
if (input_shape[d] > max_input_shape_[name][d]) {
ret = true;
max_change = true;
max_input_shape_[name][d] = input_shape[d];
std::vector<int> bak_min_shape;
std::vector<int> bak_max_shape;
if (!min_input_shape_.count(name)) {
min_input_shape_[name] = input_shape;
max_input_shape_[name] = input_shape;
optim_input_shape_[name] = input_shape;
min_change = true;
max_change = true;
ret = true;
} else {
PADDLE_ENFORCE_EQ(min_input_shape_[name].size(),
input_shape.size(),
platform::errors::InvalidArgument(
"TRT dynamic_shape min_input_shape %s size not "
"equal, the min_input_shape[%s].size()=%d"
", but the runtime_input_shape[%s].size()=%d.",
name,
name,
min_input_shape_[name].size(),
name,
input_shape.size()));

bak_min_shape = min_input_shape_[name];
bak_max_shape = max_input_shape_[name];
for (size_t d = 0; d < input_shape.size(); ++d) {
if (input_shape[d] < min_input_shape_[name][d]) {
ret = true;
min_change = true;
min_input_shape_[name][d] = input_shape[d];
}
if (input_shape[d] > max_input_shape_[name][d]) {
ret = true;
max_change = true;
max_input_shape_[name][d] = input_shape[d];
}
}
}

if (min_change)
LOG(INFO) << "refactor shape range: " << name << ", min_shape from "
<< Vec2Str(bak_min_shape) << " to "
LOG(INFO) << "refactor tensor shape range: " << name
<< ", min_shape from " << Vec2Str(bak_min_shape) << " to "
<< Vec2Str(min_input_shape_[name]);
if (max_change)
LOG(INFO) << "refactor shape range: " << name << ", max_shape from "
<< Vec2Str(bak_max_shape) << " to "
LOG(INFO) << "refactor tensor shape range: " << name
<< ", max_shape from " << Vec2Str(bak_max_shape) << " to "
<< Vec2Str(max_input_shape_[name]);
if (min_change || max_change) changed->push_back(name);
}
for (const auto& it : runtime_shape_tensor) {
auto name = it.first;
auto shape_tensor = it.second;
bool min_change = false;
bool max_change = false;
std::vector<int> bak_min_shape;
std::vector<int> bak_max_shape;
if (!min_shape_tensor_.count(name)) {
min_shape_tensor_[name] = shape_tensor;
max_shape_tensor_[name] = shape_tensor;
optim_shape_tensor_[name] = shape_tensor;
min_change = true;
max_change = true;
ret = true;
} else {
PADDLE_ENFORCE_EQ(min_shape_tensor_[name].size(),
shape_tensor.size(),
platform::errors::InvalidArgument(
"TRT dynamic_shape min_shape_tensor %s size not "
"equal, the min_shape_tensor[%s].size()=%d"
", but the runtime_shape_tensor[%s].size()=%d.",
name,
name,
min_shape_tensor_[name].size(),
name,
shape_tensor.size()));

bak_min_shape = min_shape_tensor_[name];
bak_max_shape = max_shape_tensor_[name];
for (size_t d = 0; d < shape_tensor.size(); ++d) {
if (shape_tensor[d] < min_shape_tensor_[name][d]) {
ret = true;
min_change = true;
min_shape_tensor_[name][d] = shape_tensor[d];
}
if (shape_tensor[d] > max_shape_tensor_[name][d]) {
ret = true;
max_change = true;
max_shape_tensor_[name][d] = shape_tensor[d];
}
}
}
if (min_change)
LOG(INFO) << "refactor shape tensor range: " << name
<< ", min_shape from " << Vec2Str(bak_min_shape) << " to "
<< Vec2Str(min_shape_tensor_[name]);
if (max_change)
LOG(INFO) << "refactor shape tensor range: " << name
<< ", max_shape from " << Vec2Str(bak_max_shape) << " to "
<< Vec2Str(max_shape_tensor_[name]);
if (min_change || max_change) tensor_changed->push_back(name);
}
return ret;
}

Expand Down Expand Up @@ -670,6 +708,7 @@ class TensorRTEngine {
int max_profile_num_{1};
int cur_profile_num_{0};
std::unordered_map<PredictorID, int> profile_index_;
bool with_dynamic_shape_{false};
ShapeMapType min_input_shape_;
ShapeMapType max_input_shape_;
ShapeMapType optim_input_shape_;
Expand Down Expand Up @@ -706,9 +745,6 @@ class TensorRTEngine {

std::unordered_map<std::string, paddle::any> attrs_;
std::unordered_map<std::string, std::function<void(void)>> attr_dels_;

// For dynamic shape
bool with_dynamic_shape_{false};
#if IS_TRT_VERSION_GE(6000)
int binding_num_;
infer_ptr<nvinfer1::IBuilderConfig> infer_builder_config_;
Expand Down Expand Up @@ -772,6 +808,7 @@ class TRTEngineManager {
AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
TRTInt8Calibrator* calibrator = nullptr,
int device_id = 0,
bool with_dynamic_shape = false,
const std::map<std::string, std::vector<int>> min_input_shape = {},
const std::map<std::string, std::vector<int>> max_input_shape = {},
const std::map<std::string, std::vector<int>> optim_input_shape = {},
Expand All @@ -786,6 +823,7 @@ class TRTEngineManager {
precision,
calibrator,
device_id,
with_dynamic_shape,
min_input_shape,
max_input_shape,
optim_input_shape,
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/inference/tensorrt/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@ inline void PrintITensorShape(nvinfer1::ITensor* X) {
template <typename T>
inline std::string Vec2Str(const std::vector<T>& vec) {
std::ostringstream os;
if (vec.empty()) {
os << "()";
return os.str();
}
os << "(";
for (size_t i = 0; i < vec.size() - 1; ++i) {
os << vec[i] << ",";
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class TensorRTDynamicShapeValueEngineTest : public ::testing::Test {
AnalysisConfig::Precision::kFloat32,
nullptr,
0,
true,
min_input_shape,
max_input_shape,
optim_input_shape,
Expand Down Expand Up @@ -196,6 +197,7 @@ class TensorRTDynamicEngineTest : public ::testing::Test {
AnalysisConfig::Precision::kHalf,
nullptr,
0,
true,
min_input_shape,
max_input_shape,
optim_input_shape,
Expand Down Expand Up @@ -373,6 +375,7 @@ class TensorRTDynamicTestFusedTokenPrune : public ::testing::Test {
AnalysisConfig::Precision::kFloat32,
nullptr,
0,
true,
min_input_shape,
max_input_shape,
optim_input_shape,
Expand Down Expand Up @@ -581,6 +584,7 @@ class TensorRTDynamicTestFusedTokenPruneHalf : public ::testing::Test {
AnalysisConfig::Precision::kHalf,
nullptr,
0,
true,
min_input_shape,
max_input_shape,
optim_input_shape,
Expand Down Expand Up @@ -783,6 +787,7 @@ class TensorRTDynamicShapeGNTest : public ::testing::Test {
AnalysisConfig::Precision::kInt8,
nullptr,
0,
true,
min_input_shape,
max_input_shape,
optim_input_shape,
Expand Down
Loading

0 comments on commit e653f37

Please sign in to comment.