From 4020a927bb121f10641babd0ce7ae53e86a680b4 Mon Sep 17 00:00:00 2001 From: zhoutianzi666 <17801055074@163.com> Date: Thu, 16 Feb 2023 03:26:13 +0000 Subject: [PATCH 1/4] allow fall back to fp16 when int8 --- paddle/fluid/inference/tensorrt/engine.cc | 3 +++ paddle/fluid/inference/tensorrt/engine.h | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 812e024a1a513..a8a86c9f41627 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -161,6 +161,7 @@ void TensorRTEngine::FreezeNetwork() { if (enable_fp16) { bool support_fp16 = infer_builder_->platformHasFastFp16(); infer_builder_config_->setFlag(nvinfer1::BuilderFlag::kFP16); + with_fp16_ = true; if (!support_fp16) { LOG(INFO) << "You specify FP16 mode, but the hardware do not support " "FP16 speed up, use FP32 instead."; @@ -173,8 +174,10 @@ void TensorRTEngine::FreezeNetwork() { if (enable_int8) { if (!use_dla_) { infer_builder_config_->setFlag(nvinfer1::BuilderFlag::kFP16); + with_fp16_ = true; } infer_builder_config_->setFlag(nvinfer1::BuilderFlag::kINT8); + with_int8_ = true; if (calibrator_) { infer_builder_config_->setInt8Calibrator(calibrator_); diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 421842cf563db..64561a25acd50 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -356,15 +356,13 @@ class TensorRTEngine { int GetRuntimeBatch(); bool WithFp16() { - bool enable_fp16 = (precision_ == AnalysisConfig::Precision::kHalf); bool support_fp16 = infer_builder_->platformHasFastFp16(); - return enable_fp16 && support_fp16; + return with_fp16_ && support_fp16; } bool WithInt8() { - bool enable_int8 = (precision_ == AnalysisConfig::Precision::kInt8); bool support_int8 = infer_builder_->platformHasFastInt8(); - return enable_int8 && support_int8; + return with_int8_ && support_int8; } int GetDeviceId() { return device_id_; } @@ -671,6 +669,8 @@ class TensorRTEngine { ShapeMapType max_shape_tensor_; ShapeMapType optim_shape_tensor_; bool disable_trt_plugin_fp16_{false}; + bool with_fp16_{false}; + bool with_int8_{false}; phi::DataType model_precision_{phi::DataType::FLOAT32}; bool use_varseqlen_{false}; bool use_dla_{false}; From 6ead908c1e74be6200cdd530410a90a586c728bf Mon Sep 17 00:00:00 2001 From: zhoutianzi666 <17801055074@163.com> Date: Thu, 16 Feb 2023 08:24:24 +0000 Subject: [PATCH 2/4] refine code --- paddle/fluid/inference/tensorrt/engine.cc | 3 --- paddle/fluid/inference/tensorrt/engine.h | 8 ++++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index a8a86c9f41627..812e024a1a513 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -161,7 +161,6 @@ void TensorRTEngine::FreezeNetwork() { if (enable_fp16) { bool support_fp16 = infer_builder_->platformHasFastFp16(); infer_builder_config_->setFlag(nvinfer1::BuilderFlag::kFP16); - with_fp16_ = true; if (!support_fp16) { LOG(INFO) << "You specify FP16 mode, but the hardware do not support " "FP16 speed up, use FP32 instead."; @@ -174,10 +173,8 @@ void TensorRTEngine::FreezeNetwork() { if (enable_int8) { if (!use_dla_) { infer_builder_config_->setFlag(nvinfer1::BuilderFlag::kFP16); - with_fp16_ = true; } infer_builder_config_->setFlag(nvinfer1::BuilderFlag::kINT8); - with_int8_ = true; if (calibrator_) { infer_builder_config_->setInt8Calibrator(calibrator_); diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 64561a25acd50..099cc8880ce44 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -356,13 +356,15 @@ class TensorRTEngine { int GetRuntimeBatch(); bool WithFp16() { + bool enable_fp16 = (precision_ == AnalysisConfig::Precision::kHalf); bool support_fp16 = infer_builder_->platformHasFastFp16(); - return with_fp16_ && support_fp16; + return (enable_fp16 || WithInt8()) && support_fp16; } bool WithInt8() { + bool enable_int8 = (precision_ == AnalysisConfig::Precision::kInt8); bool support_int8 = infer_builder_->platformHasFastInt8(); - return with_int8_ && support_int8; + return enable_int8 && support_int8; } int GetDeviceId() { return device_id_; } @@ -669,8 +671,6 @@ class TensorRTEngine { ShapeMapType max_shape_tensor_; ShapeMapType optim_shape_tensor_; bool disable_trt_plugin_fp16_{false}; - bool with_fp16_{false}; - bool with_int8_{false}; phi::DataType model_precision_{phi::DataType::FLOAT32}; bool use_varseqlen_{false}; bool use_dla_{false}; From ea7fda0d093cd1f750c89c3a12d197a14f9b143c Mon Sep 17 00:00:00 2001 From: zhoutianzi666 <17801055074@163.com> Date: Thu, 16 Feb 2023 08:28:24 +0000 Subject: [PATCH 3/4] refine code --- paddle/fluid/inference/tensorrt/engine.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 099cc8880ce44..52edd7180d9e8 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -358,7 +358,8 @@ class TensorRTEngine { bool WithFp16() { bool enable_fp16 = (precision_ == AnalysisConfig::Precision::kHalf); bool support_fp16 = infer_builder_->platformHasFastFp16(); - return (enable_fp16 || WithInt8()) && support_fp16; + bool fall_back_fp16 = WithInt8() && !use_dla_; + return (enable_fp16 || fall_back_fp16) && support_fp16; } bool WithInt8() { From 1032bda5776dd18f49bfabd4d1d0a481d1bfbd18 Mon Sep 17 00:00:00 2001 From: zhoutianzi666 <17801055074@163.com> Date: Thu, 16 Feb 2023 08:30:04 +0000 Subject: [PATCH 4/4] refine code --- paddle/fluid/inference/tensorrt/engine.h | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 52edd7180d9e8..aa5a7657e28cd 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -358,6 +358,7 @@ class TensorRTEngine { bool WithFp16() { bool enable_fp16 = (precision_ == AnalysisConfig::Precision::kHalf); bool support_fp16 = infer_builder_->platformHasFastFp16(); + // below is consistent with setFlag in engine.cc bool fall_back_fp16 = WithInt8() && !use_dla_; return (enable_fp16 || fall_back_fp16) && support_fp16; }