From 0b5cf1652ce471734c544e458aa6cc456711e9c3 Mon Sep 17 00:00:00 2001 From: yuanlehome Date: Thu, 8 Sep 2022 09:09:51 +0000 Subject: [PATCH] memory alignment --- paddle/fluid/inference/api/analysis_config.cc | 7 ++-- paddle/fluid/inference/tensorrt/engine.cc | 5 ++- paddle/fluid/inference/tensorrt/engine.h | 42 +++++++++++-------- 3 files changed, 33 insertions(+), 21 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 90dd3097833a3..97f6d81e592a8 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -547,17 +547,18 @@ void AnalysisConfig::EnableTensorRtEngine( } use_tensorrt_ = true; - +#if PADDLE_WITH_TENSORRT // https://forums.developer.nvidia.com/t/nvinfer1-createexecutioncontextwithoutdevicememory-returns-nullptr/111878/2 // when trt version less than 7.2, - // createExecutionContextWithoutDeviceMemory() has bug - // so, we cannot enable engine context memory sharing + // createExecutionContextWithoutDeviceMemory() has bug. + // so, we cannot enable engine context memory sharing. #if IS_TRT_VERSION_GE(7200) trt_engine_memory_sharing_ = true; #else LOG(WARNING) << "TensorRT engine context memory sharing needs version 7.2 and after."; trt_engine_memory_sharing_ = false; +#endif #endif tensorrt_workspace_size_ = workspace_size; tensorrt_max_batchsize_ = max_batch_size; diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 25b743313243c..4bd29ef66d63b 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -124,7 +124,10 @@ void TensorRTEngine::Execute(int batch_size, void *context_memory{nullptr}; context_memory = inference::Singleton::Global() - .getContextMemory(this); + .getContextMemory( + predictor_id_per_thread, + phi::GPUPlace(device_id_), + phi::Stream(reinterpret_cast(stream))); infer_context->setDeviceMemory(context_memory); } if (!with_dynamic_shape()) { diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 55c9f9a200286..e7d67d12240a6 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -16,6 +16,7 @@ limitations under the License. */ #include +#include #include #include #include // NOLINT @@ -37,6 +38,8 @@ limitations under the License. */ #include "paddle/fluid/inference/utils/singleton.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/phi/common/data_type.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/stream.h" #include "paddle/utils/any.h" namespace paddle { @@ -705,6 +708,7 @@ class TensorRTEngine { class TRTEngineManager { using PredictorID = int; + using AllocationPtr = phi::Allocator::AllocationPtr; public: bool Empty() const { @@ -753,7 +757,7 @@ class TRTEngineManager { } void DeleteAll() { - // std::unique_lock lock(mutex_); + std::unique_lock lock(mutex_); for (auto& item : engines_) { item.second.reset(nullptr); } @@ -785,37 +789,41 @@ class TRTEngineManager { } } - void* getContextMemory(TensorRTEngine* trt_engine) { + void* getContextMemory(PredictorID predictor_id, + const phi::GPUPlace& place, + const phi::Stream& stream) { std::unique_lock lock(mutex_); - auto predictor_id = trt_engine->predictor_id_per_thread; + auto alignment = getAlignmentSize(place); if (context_memorys_.count(predictor_id) == 0) { - void* context_memory{nullptr}; - cudaMalloc(&context_memory, max_ctx_mem_size_); - if (context_memory == nullptr) { - PADDLE_ENFORCE_EQ( - max_ctx_mem_size_, - 0, - platform::errors::InvalidArgument( - "The context memory size is non-zero, but the " - "memory address we applied for is NULL, we failed to set it.")); - } - context_memorys_[predictor_id] = context_memory; + auto context_memory = + memory::Alloc(place, max_ctx_mem_size_ + alignment, stream); + // context_memory_[predictor_id].reset(context_memory.release()); + context_memorys_[predictor_id] = std::move(context_memory); } - return context_memorys_[predictor_id]; + return getAlignedMemory(context_memorys_[predictor_id]->ptr(), alignment); } void releaseContextMemory(PredictorID predictor_id) { std::unique_lock lock(mutex_); if (context_memorys_.count(predictor_id)) { - cudaFree(context_memorys_[predictor_id]); + context_memorys_[predictor_id].reset(nullptr); context_memorys_.erase(predictor_id); } } private: + size_t getAlignmentSize(const phi::GPUPlace& place) { + const auto& prop = platform::GetDeviceProperties(place.GetDeviceId()); + return prop.textureAlignment; + } + + void* getAlignedMemory(void* addr, size_t alignment) { + return reinterpret_cast(uintptr_t(addr) & (~(alignment - 1))); + } + mutable std::mutex mutex_; size_t max_ctx_mem_size_{0}; - std::unordered_map context_memorys_; + std::unordered_map context_memorys_; std::unordered_map> engines_; };