forked from PaddlePaddle/Paddle
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request PaddlePaddle#64 from cryoco/cuda-inference-demo
add gpu inference demo for linux windows and jetson
- Loading branch information
Showing
10 changed files
with
1,252 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
cmake_minimum_required(VERSION 3.0) | ||
project(cpp_inference_demo CXX C) | ||
option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON) | ||
option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." ON) | ||
option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." OFF) | ||
option(USE_TENSORRT "Compile demo with TensorRT." ON) | ||
|
||
if(NOT WITH_STATIC_LIB) | ||
add_definitions("-DPADDLE_WITH_SHARED_LIB") | ||
else() | ||
# PD_INFER_DECL is mainly used to set the dllimport/dllexport attribute in dynamic library mode. | ||
# Set it to empty in static library mode to avoid compilation issues. | ||
add_definitions("/DPD_INFER_DECL=") | ||
endif() | ||
|
||
macro(safe_set_static_flag) | ||
foreach(flag_var | ||
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE | ||
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) | ||
if(${flag_var} MATCHES "/MD") | ||
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") | ||
endif(${flag_var} MATCHES "/MD") | ||
endforeach(flag_var) | ||
endmacro() | ||
|
||
if(NOT DEFINED PADDLE_LIB) | ||
message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib") | ||
endif() | ||
if(NOT DEFINED DEMO_NAME) | ||
message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name") | ||
endif() | ||
|
||
include_directories("${PADDLE_LIB}/") | ||
set(PADDLE_LIB_THIRD_PARTY_PATH "${PADDLE_LIB}/third_party/install/") | ||
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/include") | ||
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/include") | ||
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/include") | ||
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/include") | ||
|
||
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/lib") | ||
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/lib") | ||
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/lib") | ||
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/lib") | ||
link_directories("${PADDLE_LIB}/paddle/lib") | ||
|
||
if (WIN32) | ||
add_definitions("/DGOOGLE_GLOG_DLL_DECL=") | ||
option(MSVC_STATIC_CRT "use static C Runtime library by default" ON) | ||
if (MSVC_STATIC_CRT) | ||
if (WITH_MKL) | ||
set(FLAG_OPENMP "/openmp") | ||
endif() | ||
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}") | ||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}") | ||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}") | ||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}") | ||
safe_set_static_flag() | ||
if (WITH_STATIC_LIB) | ||
add_definitions(-DSTATIC_LIB) | ||
endif() | ||
endif() | ||
else() | ||
if(WITH_MKL) | ||
set(FLAG_OPENMP "-fopenmp") | ||
endif() | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 ${FLAG_OPENMP}") | ||
endif() | ||
|
||
if(WITH_GPU) | ||
if(NOT WIN32) | ||
set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library") | ||
else() | ||
if(CUDA_LIB STREQUAL "") | ||
set(CUDA_LIB "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\lib\\x64") | ||
endif() | ||
endif(NOT WIN32) | ||
endif() | ||
|
||
if (USE_TENSORRT AND WITH_GPU) | ||
set(TENSORRT_ROOT "" CACHE STRING "The root directory of TensorRT library") | ||
if("${TENSORRT_ROOT}" STREQUAL "") | ||
message(FATAL_ERROR "The TENSORRT_ROOT is empty, you must assign it a value with CMake command. Such as: -DTENSORRT_ROOT=TENSORRT_ROOT_PATH ") | ||
endif() | ||
set(TENSORRT_INCLUDE_DIR ${TENSORRT_ROOT}/include) | ||
set(TENSORRT_LIB_DIR ${TENSORRT_ROOT}/lib) | ||
endif() | ||
|
||
if (NOT WIN32) | ||
if (USE_TENSORRT AND WITH_GPU) | ||
include_directories("${TENSORRT_INCLUDE_DIR}") | ||
link_directories("${TENSORRT_LIB_DIR}") | ||
endif() | ||
endif(NOT WIN32) | ||
|
||
if(WITH_MKL) | ||
set(MATH_LIB_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mklml") | ||
include_directories("${MATH_LIB_PATH}/include") | ||
if(WIN32) | ||
set(MATH_LIB ${MATH_LIB_PATH}/lib/mklml${CMAKE_STATIC_LIBRARY_SUFFIX} | ||
${MATH_LIB_PATH}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
else() | ||
set(MATH_LIB ${MATH_LIB_PATH}/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} | ||
${MATH_LIB_PATH}/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) | ||
endif() | ||
set(MKLDNN_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mkldnn") | ||
if(EXISTS ${MKLDNN_PATH}) | ||
include_directories("${MKLDNN_PATH}/include") | ||
if(WIN32) | ||
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib) | ||
else(WIN32) | ||
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0) | ||
endif(WIN32) | ||
endif() | ||
else() | ||
set(OPENBLAS_LIB_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}openblas") | ||
include_directories("${OPENBLAS_LIB_PATH}/include/openblas") | ||
if(WIN32) | ||
set(MATH_LIB ${OPENBLAS_LIB_PATH}/lib/openblas${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
else() | ||
set(MATH_LIB ${OPENBLAS_LIB_PATH}/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
endif() | ||
endif() | ||
|
||
if(WITH_STATIC_LIB) | ||
set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
else() | ||
if(WIN32) | ||
set(DEPS ${PADDLE_LIB}/paddle/lib/paddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
else() | ||
set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX}) | ||
endif() | ||
endif() | ||
|
||
if (NOT WIN32) | ||
set(EXTERNAL_LIB "-lrt -ldl -lpthread") | ||
set(DEPS ${DEPS} | ||
${MATH_LIB} ${MKLDNN_LIB} | ||
glog gflags protobuf xxhash | ||
${EXTERNAL_LIB}) | ||
else() | ||
set(DEPS ${DEPS} | ||
${MATH_LIB} ${MKLDNN_LIB} | ||
glog gflags_static libprotobuf xxhash ${EXTERNAL_LIB}) | ||
set(DEPS ${DEPS} shlwapi.lib) | ||
endif(NOT WIN32) | ||
|
||
if(WITH_GPU) | ||
if(NOT WIN32) | ||
if (USE_TENSORRT) | ||
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
endif() | ||
set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) | ||
else() | ||
if(USE_TENSORRT) | ||
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
endif() | ||
set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} ) | ||
set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} ) | ||
set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} ) | ||
endif() | ||
endif() | ||
|
||
add_executable(${DEMO_NAME} ${DEMO_NAME}.cc) | ||
target_link_libraries(${DEMO_NAME} ${DEPS}) | ||
if(WIN32) | ||
if(USE_TENSORRT) | ||
add_custom_command(TARGET ${DEMO_NAME} POST_BUILD | ||
COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_SHARED_LIBRARY_SUFFIX} | ||
${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} | ||
COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX} | ||
${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} | ||
) | ||
endif() | ||
if(WITH_MKL) | ||
add_custom_command(TARGET ${DEMO_NAME} POST_BUILD | ||
COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/mklml.dll ${CMAKE_BINARY_DIR}/Release | ||
COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/libiomp5md.dll ${CMAKE_BINARY_DIR}/Release | ||
COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_PATH}/lib/mkldnn.dll ${CMAKE_BINARY_DIR}/Release | ||
) | ||
else() | ||
add_custom_command(TARGET ${DEMO_NAME} POST_BUILD | ||
COMMAND ${CMAKE_COMMAND} -E copy ${OPENBLAS_LIB_PATH}/lib/openblas.dll ${CMAKE_BINARY_DIR}/Release | ||
) | ||
endif() | ||
if(NOT WITH_STATIC_LIB) | ||
add_custom_command(TARGET ${DEMO_NAME} POST_BUILD | ||
COMMAND ${CMAKE_COMMAND} -E copy "${PADDLE_LIB}/paddle/lib/paddle_fluid.dll" ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} | ||
) | ||
endif() | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
# GPU上C++预测部署示例 | ||
|
||
## 1 流程解析 | ||
|
||
1.1 准备预测库 | ||
|
||
请参考[推理库下载文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html)下载Paddle预测库。 | ||
|
||
1.2 准备预测模型 | ||
|
||
使用Paddle训练结束后,得到预测模型,可以用于预测部署。 | ||
|
||
本示例准备了mobilenet_v1预测模型,可以从[链接](https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz)下载,或者wget下载。 | ||
|
||
``` | ||
wget https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz | ||
``` | ||
|
||
1.3 包含头文件 | ||
|
||
使用Paddle预测库,只需要包含 `paddle_inference_api.h` 头文件。 | ||
|
||
``` | ||
#include "paddle/include/paddle_inference_api.h" | ||
``` | ||
|
||
1.4 设置Config | ||
|
||
根据预测部署的实际情况,设置Config,用于后续创建Predictor。 | ||
|
||
Config默认是使用CPU预测,若要使用GPU预测,需要手动开启,设置运行的GPU卡号和分配的初始显存。可以设置开启TensorRT加速、开启IR优化、开启内存优化。使用Paddle-TensorRT相关说明和示例可以参考[文档](https://paddle-inference.readthedocs.io/en/master/optimize/paddle_trt.html)。 | ||
|
||
``` | ||
paddle_infer::Config config; | ||
if (FLAGS_model_dir == "") { | ||
config.SetModel(FLAGS_model_file, FLAGS_params_file); // Load combined model | ||
} else { | ||
config.SetModel(FLAGS_model_dir); // Load no-combined model | ||
} | ||
config.EnableUseGpu(500, 0); | ||
config.SwitchIrOptim(true); | ||
config.EnableMemoryOptim(); | ||
config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 10, PrecisionType::kFloat32, false, false); | ||
``` | ||
|
||
1.5 创建Predictor | ||
|
||
``` | ||
std::shared_ptr<paddle_infer::Predictor> predictor = paddle_infer::CreatePredictor(config); | ||
``` | ||
|
||
1.6 设置输入 | ||
|
||
从Predictor中获取输入的names和handle,然后设置输入数据。 | ||
|
||
``` | ||
auto input_names = predictor->GetInputNames(); | ||
auto input_t = predictor->GetInputHandle(input_names[0]); | ||
std::vector<int> input_shape = {1, 3, 224, 224}; | ||
std::vector<float> input_data(1 * 3 * 224 * 224, 1); | ||
input_t->Reshape(input_shape); | ||
input_t->CopyFromCpu(input_data.data()); | ||
``` | ||
|
||
1.7 执行Predictor | ||
|
||
``` | ||
predictor->Run(); | ||
``` | ||
|
||
1.8 获取输出 | ||
|
||
``` | ||
auto output_names = predictor->GetOutputNames(); | ||
auto output_t = predictor->GetOutputHandle(output_names[0]); | ||
std::vector<int> output_shape = output_t->shape(); | ||
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, | ||
std::multiplies<int>()); | ||
std::vector<float> out_data; | ||
out_data.resize(out_num); | ||
output_t->CopyToCpu(out_data.data()); | ||
``` | ||
|
||
## 2 编译运行示例 | ||
|
||
2.1 编译示例 | ||
|
||
文件`model_test.cc` 为预测的样例程序(程序中的输入为固定值,如果您有opencv或其他方式进行数据读取的需求,需要对程序进行一定的修改)。 | ||
文件`CMakeLists.txt` 为编译构建文件。 | ||
脚本`run_impl.sh` 包含了第三方库、预编译库的信息配置。 | ||
|
||
打开 `run_impl.sh` 文件,设置 LIB_DIR 为准备的预测库路径,比如 `LIB_DIR=/work/Paddle/build/paddle_inference_install_dir`。 | ||
|
||
运行 `sh run_impl.sh`, 会在目录下产生build目录。 | ||
|
||
2.2 运行示例 | ||
|
||
进入build目录,运行样例 | ||
|
||
```shell | ||
cd build | ||
./model_test --model_dir=mobilenetv1_fp32_dir | ||
``` | ||
|
||
运行结束后,程序会将模型结果打印到屏幕,说明运行成功。 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#include <assert.h> | ||
#include <algorithm> | ||
#include <chrono> | ||
#include <iostream> | ||
#include <memory> | ||
#include <numeric> | ||
|
||
#include <gflags/gflags.h> | ||
#include <glog/logging.h> | ||
|
||
#include "paddle/include/paddle_inference_api.h" | ||
|
||
DEFINE_string(model_dir, "", "Directory of the inference model."); | ||
DEFINE_string(model_file, "", "Path of the inference model file."); | ||
DEFINE_string(params_file, "", "Path of the inference params file."); | ||
|
||
int main(int argc, char *argv[]) { | ||
google::ParseCommandLineFlags(&argc, &argv, true); | ||
|
||
// Init config | ||
paddle_infer::Config config; | ||
if (FLAGS_model_dir == "") { | ||
config.SetModel(FLAGS_model_file, FLAGS_params_file); // Load combined model | ||
} else { | ||
config.SetModel(FLAGS_model_dir); // Load no-combined model | ||
} | ||
config.EnableUseGpu(500, 0); | ||
config.SwitchIrOptim(true); | ||
config.EnableMemoryOptim(); | ||
config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 10, PrecisionType::kFloat32, false, false); | ||
|
||
// Create predictor | ||
auto predictor = paddle_infer::CreatePredictor(config); | ||
|
||
// Set input | ||
auto input_names = predictor->GetInputNames(); | ||
auto input_t = predictor->GetInputHandle(input_names[0]); | ||
std::vector<int> input_shape = {1, 3, 224, 224}; | ||
std::vector<float> input_data(1 * 3 * 224 * 224, 1); | ||
input_t->Reshape(input_shape); | ||
input_t->CopyFromCpu(input_data.data()); | ||
|
||
// Run | ||
predictor->Run(); | ||
|
||
// Get output | ||
auto output_names = predictor->GetOutputNames(); | ||
auto output_t = predictor->GetOutputHandle(output_names[0]); | ||
std::vector<int> output_shape = output_t->shape(); | ||
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, | ||
std::multiplies<int>()); | ||
std::vector<float> out_data; | ||
out_data.resize(out_num); | ||
output_t->CopyToCpu(out_data.data()); | ||
|
||
auto max_iter = std::max_element(out_data.begin(), out_data.end()); | ||
LOG(INFO) << "Output max_arg_index:" << max_iter - out_data.begin() | ||
<< ", max_value:" << *max_iter; | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
mkdir -p build | ||
cd build | ||
rm -rf * | ||
|
||
DEMO_NAME=model_test | ||
|
||
WITH_MKL=ON | ||
WITH_GPU=ON | ||
USE_TENSORRT=ON | ||
|
||
LIB_DIR=/work/Paddle/build/paddle_inference_install_dir | ||
CUDNN_LIB=/path/to/cudnn/lib | ||
CUDA_LIB=/path/to/cuda/lib | ||
TENSORRT_ROOT=/path/to/trt/root/dir | ||
|
||
cmake .. -DPADDLE_LIB=${LIB_DIR} \ | ||
-DWITH_MKL=${WITH_MKL} \ | ||
-DDEMO_NAME=${DEMO_NAME} \ | ||
-DWITH_GPU=${WITH_GPU} \ | ||
-DWITH_STATIC_LIB=OFF \ | ||
-DUSE_TENSORRT=${USE_TENSORRT} \ | ||
-DCUDNN_LIB=${CUDNN_LIB} \ | ||
-DCUDA_LIB=${CUDA_LIB} \ | ||
-DTENSORRT_ROOT=${TENSORRT_ROOT} | ||
|
||
make -j |
Oops, something went wrong.