Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support trt engine auto build in runtime for dynamic shape #52162

Merged
merged 12 commits into from
Apr 17, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion paddle/fluid/inference/analysis/ir_pass_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ void IRPassManager::CreatePasses(Argument *argument,
optim_cache_dir));
}
pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir));
} else if (use_static_engine || enable_int8) {
jiweibo marked this conversation as resolved.
Show resolved Hide resolved
} else if (use_static_engine || enable_int8 || with_dynamic_shape) {
std::string model_opt_cache_dir =
argument->Has("model_dir")
? argument->model_dir()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
// limitations under the License.

#include "paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h"
#include <fcntl.h>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个头文件有啥用吗?

#include <cstddef>
#include <string>
#include <unordered_set>
Expand Down Expand Up @@ -349,18 +350,38 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
Get<std::map<std::string, std::vector<int>>>("optim_shape_tensor");

auto allow_build_at_runtime = Get<bool>("trt_allow_build_at_runtime");
auto with_dynamic_shape = Get<bool>("with_dynamic_shape");
auto shape_range_info_path = Get<std::string>("trt_shape_range_info_path");
auto trt_tuned_dynamic_shape = Get<bool>("trt_tuned_dynamic_shape");
int max_batch_size = Get<int>("max_batch_size");
if (trt_tuned_dynamic_shape) {
VLOG(1) << "trt dynamic_shape deserialize from " << shape_range_info_path;
inference::DeserializeShapeRangeInfo(shape_range_info_path,
&min_input_shape,
&max_input_shape,
&opt_input_shape,
&min_shape_tensor,
&max_shape_tensor,
&opt_shape_tensor);
if (!shape_range_info_path.empty()) {
VLOG(1) << "trt dynamic_shape deserialize from " << shape_range_info_path;
inference::DeserializeShapeRangeInfo(shape_range_info_path,
&min_input_shape,
&max_input_shape,
&opt_input_shape,
&min_shape_tensor,
&max_shape_tensor,
&opt_shape_tensor);
} else {
shape_range_info_path =
Get<std::string>("model_opt_cache_dir") + "shape_range_info.pbtxt";
if (open(shape_range_info_path.c_str(), O_RDONLY) != -1) {
VLOG(1) << "trt dynamic_shape deserialize from "
<< shape_range_info_path;
inference::DeserializeShapeRangeInfo(shape_range_info_path,
&min_input_shape,
&max_input_shape,
&opt_input_shape,
&min_shape_tensor,
&max_shape_tensor,
&opt_shape_tensor);
} else {
int fd = open(shape_range_info_path.c_str(), O_RDONLY | O_CREAT);
close(fd);
}
}
}

// The following procedure is used to rename all the intermediate
Expand Down Expand Up @@ -447,6 +468,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
op_desc->SetAttr("shape_range_info_path", shape_range_info_path);
op_desc->SetAttr("use_inspector", Get<bool>("use_inspector"));
op_desc->SetAttr("model_precision", Get<int>("model_precision"));
op_desc->SetAttr("with_dynamic_shape", with_dynamic_shape);

// we record all inputs' shapes in attr to check if they are consistent
// with the real inputs' shapes retrieved from scope when trt runs.
Expand Down Expand Up @@ -563,6 +585,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
precision_mode,
calibrator.get(),
Get<int>("gpu_device_id"),
with_dynamic_shape,
min_input_shape,
max_input_shape,
opt_input_shape,
Expand Down Expand Up @@ -607,6 +630,12 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
}
}

// If with_dynamic_shape is configured,but min_input_shape is empty,
// create trt engine in runtime instead of in pass.
if (with_dynamic_shape && min_input_shape.empty()) {
return;
}

// the following code will NOT run in following situation:
// 1. calibraion mode (generate trt int8 calibraiton table data)
// 2. already load serialized trt engine info.
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -644,8 +644,9 @@ struct PD_INFER_DECL AnalysisConfig {
/// mode.
/// \param allow_build_at_runtime allow build trt engine at runtime.
///
void EnableTunedTensorRtDynamicShape(const std::string& shape_range_info_path,
bool allow_build_at_runtime = true);
void EnableTunedTensorRtDynamicShape(
const std::string& shape_range_info_path = "",
bool allow_build_at_runtime = true);

///
/// \brief A boolean state telling whether to use tuned tensorrt dynamic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ TEST(CustomPluginCreater, DynamicShapePlugin) {
AnalysisConfig::Precision::kFloat32,
nullptr,
0,
true,
min_input_shape,
max_input_shape,
optim_input_shape));
Expand Down
162 changes: 100 additions & 62 deletions paddle/fluid/inference/tensorrt/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ class TensorRTEngine {
AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
TRTInt8Calibrator* calibrator = nullptr,
int device_id = 0,
bool with_dynamic_shape = false,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

为什么要加上这一个选项呢?
以前是根据啥来区分是动态shape还是静态shape呢?

const ShapeMapType min_input_shape = {},
const ShapeMapType max_input_shape = {},
const ShapeMapType optim_input_shape = {},
Expand All @@ -238,6 +239,7 @@ class TensorRTEngine {
precision_(precision),
calibrator_(calibrator),
device_id_(device_id),
with_dynamic_shape_(with_dynamic_shape),
min_input_shape_(min_input_shape),
max_input_shape_(max_input_shape),
optim_input_shape_(optim_input_shape),
Expand All @@ -247,31 +249,6 @@ class TensorRTEngine {
disable_trt_plugin_fp16_(disable_trt_plugin_fp16),
model_precision_(model_precision),
logger_(logger) {
if (min_input_shape_.size() != 0 && max_input_shape_.size() != 0 &&
optim_input_shape_.size() != 0) {
PADDLE_ENFORCE_EQ(
min_input_shape_.size(),
max_input_shape_.size(),
platform::errors::InvalidArgument(
"The min_input_shape_'s size(%d) should be equal to the "
"size(%d) of max_input_shape_",
min_input_shape_.size(),
max_input_shape_.size()));
PADDLE_ENFORCE_EQ(
min_input_shape_.size(),
optim_input_shape_.size(),
platform::errors::InvalidArgument(
"The min_input_shape_'s size(%d) should be equal to the "
"size(%d) of optim_input_shape_",
min_input_shape_.size(),
optim_input_shape_.size()));
#if IS_TRT_VERSION_GE(6000)
with_dynamic_shape_ = true;
#else
LOG(WARNING) << "Using dynamic shape of TRT need ensure that the TRT "
"version should be at least 6.";
#endif
}
dy::initLibNvInferPlugins(&logger, "");
}

Expand Down Expand Up @@ -477,55 +454,116 @@ class TensorRTEngine {
ShapeMapType optim_shape_tensor() { return optim_shape_tensor_; }

bool AdjustDynamicShapeRange(const ShapeMapType& runtime_input_shape,
std::vector<std::string>* changed) {
const ShapeMapType& runtime_shape_tensor,
std::vector<std::string>* changed,
std::vector<std::string>* tensor_changed) {
bool ret = false;
changed->clear();
tensor_changed->clear();
for (const auto& it : runtime_input_shape) {
auto name = it.first;
auto input_shape = it.second;
PADDLE_ENFORCE_EQ(
min_input_shape_.count(name),
true,
platform::errors::InvalidArgument(
"TRT dynamic_shape min_input_shape %s not found.", name));
PADDLE_ENFORCE_EQ(min_input_shape_[name].size(),
input_shape.size(),
platform::errors::InvalidArgument(
"TRT dynamic_shape min_input_shape %s size not "
"equal, the min_input_shape[%s].size()=%d"
", but the runtime_input_shape[%s].size()=%d.",
name,
name,
min_input_shape_[name].size(),
name,
input_shape.size()));
auto bak_min_shape = min_input_shape_[name];
auto bak_max_shape = max_input_shape_[name];
bool min_change = false;
bool max_change = false;
for (size_t d = 0; d < input_shape.size(); ++d) {
if (input_shape[d] < min_input_shape_[name][d]) {
ret = true;
min_change = true;
min_input_shape_[name][d] = input_shape[d];
}
if (input_shape[d] > max_input_shape_[name][d]) {
ret = true;
max_change = true;
max_input_shape_[name][d] = input_shape[d];
std::vector<int> bak_min_shape;
std::vector<int> bak_max_shape;
if (!min_input_shape_.count(name)) {
min_input_shape_[name] = input_shape;
max_input_shape_[name] = input_shape;
optim_input_shape_[name] = input_shape;
min_change = true;
max_change = true;
ret = true;
} else {
PADDLE_ENFORCE_EQ(min_input_shape_[name].size(),
input_shape.size(),
platform::errors::InvalidArgument(
"TRT dynamic_shape min_input_shape %s size not "
"equal, the min_input_shape[%s].size()=%d"
", but the runtime_input_shape[%s].size()=%d.",
name,
name,
min_input_shape_[name].size(),
name,
input_shape.size()));

bak_min_shape = min_input_shape_[name];
bak_max_shape = max_input_shape_[name];
for (size_t d = 0; d < input_shape.size(); ++d) {
if (input_shape[d] < min_input_shape_[name][d]) {
ret = true;
min_change = true;
min_input_shape_[name][d] = input_shape[d];
}
if (input_shape[d] > max_input_shape_[name][d]) {
ret = true;
max_change = true;
max_input_shape_[name][d] = input_shape[d];
}
}
}

if (min_change)
LOG(INFO) << "refactor shape range: " << name << ", min_shape from "
<< Vec2Str(bak_min_shape) << " to "
LOG(INFO) << "refactor tensor shape range: " << name
<< ", min_shape from " << Vec2Str(bak_min_shape) << " to "
<< Vec2Str(min_input_shape_[name]);
if (max_change)
LOG(INFO) << "refactor shape range: " << name << ", max_shape from "
<< Vec2Str(bak_max_shape) << " to "
LOG(INFO) << "refactor tensor shape range: " << name
<< ", max_shape from " << Vec2Str(bak_max_shape) << " to "
<< Vec2Str(max_input_shape_[name]);
if (min_change || max_change) changed->push_back(name);
}
for (const auto& it : runtime_shape_tensor) {
auto name = it.first;
auto shape_tensor = it.second;
bool min_change = false;
bool max_change = false;
std::vector<int> bak_min_shape;
std::vector<int> bak_max_shape;
if (!min_shape_tensor_.count(name)) {
min_shape_tensor_[name] = shape_tensor;
max_shape_tensor_[name] = shape_tensor;
optim_shape_tensor_[name] = shape_tensor;
min_change = true;
max_change = true;
ret = true;
} else {
PADDLE_ENFORCE_EQ(min_shape_tensor_[name].size(),
shape_tensor.size(),
platform::errors::InvalidArgument(
"TRT dynamic_shape min_shape_tensor %s size not "
"equal, the min_shape_tensor[%s].size()=%d"
", but the runtime_shape_tensor[%s].size()=%d.",
name,
name,
min_shape_tensor_[name].size(),
name,
shape_tensor.size()));

bak_min_shape = min_shape_tensor_[name];
bak_max_shape = max_shape_tensor_[name];
for (size_t d = 0; d < shape_tensor.size(); ++d) {
if (shape_tensor[d] < min_shape_tensor_[name][d]) {
ret = true;
min_change = true;
min_shape_tensor_[name][d] = shape_tensor[d];
}
if (shape_tensor[d] > max_shape_tensor_[name][d]) {
ret = true;
max_change = true;
max_shape_tensor_[name][d] = shape_tensor[d];
}
}
}
if (min_change)
LOG(INFO) << "refactor shape tensor range: " << name
<< ", min_shape from " << Vec2Str(bak_min_shape) << " to "
<< Vec2Str(min_shape_tensor_[name]);
if (max_change)
LOG(INFO) << "refactor shape tensor range: " << name
<< ", max_shape from " << Vec2Str(bak_max_shape) << " to "
<< Vec2Str(max_shape_tensor_[name]);
if (min_change || max_change) tensor_changed->push_back(name);
}
return ret;
}

Expand Down Expand Up @@ -670,6 +708,7 @@ class TensorRTEngine {
int max_profile_num_{1};
int cur_profile_num_{0};
std::unordered_map<PredictorID, int> profile_index_;
bool with_dynamic_shape_{false};
ShapeMapType min_input_shape_;
ShapeMapType max_input_shape_;
ShapeMapType optim_input_shape_;
Expand Down Expand Up @@ -706,9 +745,6 @@ class TensorRTEngine {

std::unordered_map<std::string, paddle::any> attrs_;
std::unordered_map<std::string, std::function<void(void)>> attr_dels_;

// For dynamic shape
bool with_dynamic_shape_{false};
#if IS_TRT_VERSION_GE(6000)
int binding_num_;
infer_ptr<nvinfer1::IBuilderConfig> infer_builder_config_;
Expand Down Expand Up @@ -772,6 +808,7 @@ class TRTEngineManager {
AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
TRTInt8Calibrator* calibrator = nullptr,
int device_id = 0,
bool with_dynamic_shape = false,
const std::map<std::string, std::vector<int>> min_input_shape = {},
const std::map<std::string, std::vector<int>> max_input_shape = {},
const std::map<std::string, std::vector<int>> optim_input_shape = {},
Expand All @@ -786,6 +823,7 @@ class TRTEngineManager {
precision,
calibrator,
device_id,
with_dynamic_shape,
min_input_shape,
max_input_shape,
optim_input_shape,
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/inference/tensorrt/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@ inline void PrintITensorShape(nvinfer1::ITensor* X) {
template <typename T>
inline std::string Vec2Str(const std::vector<T>& vec) {
std::ostringstream os;
if (vec.empty()) {
os << "()";
return os.str();
}
os << "(";
for (size_t i = 0; i < vec.size() - 1; ++i) {
os << vec[i] << ",";
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class TensorRTDynamicShapeValueEngineTest : public ::testing::Test {
AnalysisConfig::Precision::kFloat32,
nullptr,
0,
true,
min_input_shape,
max_input_shape,
optim_input_shape,
Expand Down Expand Up @@ -196,6 +197,7 @@ class TensorRTDynamicEngineTest : public ::testing::Test {
AnalysisConfig::Precision::kHalf,
nullptr,
0,
true,
min_input_shape,
max_input_shape,
optim_input_shape,
Expand Down Expand Up @@ -373,6 +375,7 @@ class TensorRTDynamicTestFusedTokenPrune : public ::testing::Test {
AnalysisConfig::Precision::kFloat32,
nullptr,
0,
true,
min_input_shape,
max_input_shape,
optim_input_shape,
Expand Down Expand Up @@ -581,6 +584,7 @@ class TensorRTDynamicTestFusedTokenPruneHalf : public ::testing::Test {
AnalysisConfig::Precision::kHalf,
nullptr,
0,
true,
min_input_shape,
max_input_shape,
optim_input_shape,
Expand Down Expand Up @@ -783,6 +787,7 @@ class TensorRTDynamicShapeGNTest : public ::testing::Test {
AnalysisConfig::Precision::kInt8,
nullptr,
0,
true,
min_input_shape,
max_input_shape,
optim_input_shape,
Expand Down
Loading