From c3ea8ecac89b5f7ddce333fe9bdca05147f5da18 Mon Sep 17 00:00:00 2001 From: cryoco Date: Mon, 14 Dec 2020 14:45:57 +0800 Subject: [PATCH] add gpu inference demo for linux windows and jetson --- c++/cuda_linux_demo/CMakeLists.txt | 192 +++++++++++++++++++ c++/cuda_linux_demo/README.md | 105 +++++++++++ c++/cuda_linux_demo/model_test.cc | 60 ++++++ c++/cuda_linux_demo/run_impl.sh | 26 +++ docs/demo_tutorial/cuda_jetson_demo.md | 213 +++++++++++++++++++++ docs/demo_tutorial/cuda_linux_demo.md | 213 +++++++++++++++++++++ docs/demo_tutorial/cuda_windows_demo.md | 229 +++++++++++++++++++++++ python/cuda_linux_demo/README.md | 99 ++++++++++ python/cuda_linux_demo/img_preprocess.py | 41 ++++ python/cuda_linux_demo/model_test.py | 74 ++++++++ 10 files changed, 1252 insertions(+) create mode 100644 c++/cuda_linux_demo/CMakeLists.txt create mode 100644 c++/cuda_linux_demo/README.md create mode 100644 c++/cuda_linux_demo/model_test.cc create mode 100755 c++/cuda_linux_demo/run_impl.sh create mode 100644 docs/demo_tutorial/cuda_jetson_demo.md create mode 100644 docs/demo_tutorial/cuda_linux_demo.md create mode 100644 docs/demo_tutorial/cuda_windows_demo.md create mode 100644 python/cuda_linux_demo/README.md create mode 100644 python/cuda_linux_demo/img_preprocess.py create mode 100644 python/cuda_linux_demo/model_test.py diff --git a/c++/cuda_linux_demo/CMakeLists.txt b/c++/cuda_linux_demo/CMakeLists.txt new file mode 100644 index 0000000000000..41100b892a094 --- /dev/null +++ b/c++/cuda_linux_demo/CMakeLists.txt @@ -0,0 +1,192 @@ +cmake_minimum_required(VERSION 3.0) +project(cpp_inference_demo CXX C) +option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON) +option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." ON) +option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." OFF) +option(USE_TENSORRT "Compile demo with TensorRT." ON) + +if(NOT WITH_STATIC_LIB) + add_definitions("-DPADDLE_WITH_SHARED_LIB") +else() + # PD_INFER_DECL is mainly used to set the dllimport/dllexport attribute in dynamic library mode. + # Set it to empty in static library mode to avoid compilation issues. + add_definitions("/DPD_INFER_DECL=") +endif() + +macro(safe_set_static_flag) + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endif(${flag_var} MATCHES "/MD") + endforeach(flag_var) +endmacro() + +if(NOT DEFINED PADDLE_LIB) + message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib") +endif() +if(NOT DEFINED DEMO_NAME) + message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name") +endif() + +include_directories("${PADDLE_LIB}/") +set(PADDLE_LIB_THIRD_PARTY_PATH "${PADDLE_LIB}/third_party/install/") +include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/include") +include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/include") +include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/include") +include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/include") + +link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/lib") +link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/lib") +link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/lib") +link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/lib") +link_directories("${PADDLE_LIB}/paddle/lib") + +if (WIN32) + add_definitions("/DGOOGLE_GLOG_DLL_DECL=") + option(MSVC_STATIC_CRT "use static C Runtime library by default" ON) + if (MSVC_STATIC_CRT) + if (WITH_MKL) + set(FLAG_OPENMP "/openmp") + endif() + set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}") + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}") + safe_set_static_flag() + if (WITH_STATIC_LIB) + add_definitions(-DSTATIC_LIB) + endif() + endif() +else() + if(WITH_MKL) + set(FLAG_OPENMP "-fopenmp") + endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 ${FLAG_OPENMP}") +endif() + +if(WITH_GPU) + if(NOT WIN32) + set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library") + else() + if(CUDA_LIB STREQUAL "") + set(CUDA_LIB "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\lib\\x64") + endif() + endif(NOT WIN32) +endif() + +if (USE_TENSORRT AND WITH_GPU) + set(TENSORRT_ROOT "" CACHE STRING "The root directory of TensorRT library") + if("${TENSORRT_ROOT}" STREQUAL "") + message(FATAL_ERROR "The TENSORRT_ROOT is empty, you must assign it a value with CMake command. Such as: -DTENSORRT_ROOT=TENSORRT_ROOT_PATH ") + endif() + set(TENSORRT_INCLUDE_DIR ${TENSORRT_ROOT}/include) + set(TENSORRT_LIB_DIR ${TENSORRT_ROOT}/lib) +endif() + +if (NOT WIN32) + if (USE_TENSORRT AND WITH_GPU) + include_directories("${TENSORRT_INCLUDE_DIR}") + link_directories("${TENSORRT_LIB_DIR}") + endif() +endif(NOT WIN32) + +if(WITH_MKL) + set(MATH_LIB_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mklml") + include_directories("${MATH_LIB_PATH}/include") + if(WIN32) + set(MATH_LIB ${MATH_LIB_PATH}/lib/mklml${CMAKE_STATIC_LIBRARY_SUFFIX} + ${MATH_LIB_PATH}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX}) + else() + set(MATH_LIB ${MATH_LIB_PATH}/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} + ${MATH_LIB_PATH}/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) + endif() + set(MKLDNN_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mkldnn") + if(EXISTS ${MKLDNN_PATH}) + include_directories("${MKLDNN_PATH}/include") + if(WIN32) + set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib) + else(WIN32) + set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0) + endif(WIN32) + endif() +else() + set(OPENBLAS_LIB_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}openblas") + include_directories("${OPENBLAS_LIB_PATH}/include/openblas") + if(WIN32) + set(MATH_LIB ${OPENBLAS_LIB_PATH}/lib/openblas${CMAKE_STATIC_LIBRARY_SUFFIX}) + else() + set(MATH_LIB ${OPENBLAS_LIB_PATH}/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) + endif() +endif() + +if(WITH_STATIC_LIB) + set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) +else() + if(WIN32) + set(DEPS ${PADDLE_LIB}/paddle/lib/paddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) + else() + set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX}) + endif() +endif() + +if (NOT WIN32) + set(EXTERNAL_LIB "-lrt -ldl -lpthread") + set(DEPS ${DEPS} + ${MATH_LIB} ${MKLDNN_LIB} + glog gflags protobuf xxhash + ${EXTERNAL_LIB}) +else() + set(DEPS ${DEPS} + ${MATH_LIB} ${MKLDNN_LIB} + glog gflags_static libprotobuf xxhash ${EXTERNAL_LIB}) + set(DEPS ${DEPS} shlwapi.lib) +endif(NOT WIN32) + +if(WITH_GPU) + if(NOT WIN32) + if (USE_TENSORRT) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX}) + endif() + set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) + else() + if(USE_TENSORRT) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX}) + endif() + set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} ) + set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} ) + set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} ) + endif() +endif() + +add_executable(${DEMO_NAME} ${DEMO_NAME}.cc) +target_link_libraries(${DEMO_NAME} ${DEPS}) +if(WIN32) + if(USE_TENSORRT) + add_custom_command(TARGET ${DEMO_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_SHARED_LIBRARY_SUFFIX} + ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} + COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX} + ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} + ) + endif() + if(WITH_MKL) + add_custom_command(TARGET ${DEMO_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/mklml.dll ${CMAKE_BINARY_DIR}/Release + COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/libiomp5md.dll ${CMAKE_BINARY_DIR}/Release + COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_PATH}/lib/mkldnn.dll ${CMAKE_BINARY_DIR}/Release + ) + else() + add_custom_command(TARGET ${DEMO_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${OPENBLAS_LIB_PATH}/lib/openblas.dll ${CMAKE_BINARY_DIR}/Release + ) + endif() + if(NOT WITH_STATIC_LIB) + add_custom_command(TARGET ${DEMO_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "${PADDLE_LIB}/paddle/lib/paddle_fluid.dll" ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} + ) + endif() +endif() diff --git a/c++/cuda_linux_demo/README.md b/c++/cuda_linux_demo/README.md new file mode 100644 index 0000000000000..5b01f5c0a06e8 --- /dev/null +++ b/c++/cuda_linux_demo/README.md @@ -0,0 +1,105 @@ +# GPU上C++预测部署示例 + +## 1 流程解析 + +1.1 准备预测库 + +请参考[推理库下载文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html)下载Paddle预测库。 + +1.2 准备预测模型 + +使用Paddle训练结束后,得到预测模型,可以用于预测部署。 + +本示例准备了mobilenet_v1预测模型,可以从[链接](https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz)下载,或者wget下载。 + +``` +wget https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz +``` + +1.3 包含头文件 + +使用Paddle预测库,只需要包含 `paddle_inference_api.h` 头文件。 + +``` +#include "paddle/include/paddle_inference_api.h" +``` + +1.4 设置Config + +根据预测部署的实际情况,设置Config,用于后续创建Predictor。 + +Config默认是使用CPU预测,若要使用GPU预测,需要手动开启,设置运行的GPU卡号和分配的初始显存。可以设置开启TensorRT加速、开启IR优化、开启内存优化。使用Paddle-TensorRT相关说明和示例可以参考[文档](https://paddle-inference.readthedocs.io/en/master/optimize/paddle_trt.html)。 + +``` +paddle_infer::Config config; +if (FLAGS_model_dir == "") { +config.SetModel(FLAGS_model_file, FLAGS_params_file); // Load combined model +} else { +config.SetModel(FLAGS_model_dir); // Load no-combined model +} +config.EnableUseGpu(500, 0); +config.SwitchIrOptim(true); +config.EnableMemoryOptim(); +config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 10, PrecisionType::kFloat32, false, false); +``` + +1.5 创建Predictor + +``` +std::shared_ptr predictor = paddle_infer::CreatePredictor(config); +``` + +1.6 设置输入 + +从Predictor中获取输入的names和handle,然后设置输入数据。 + +``` +auto input_names = predictor->GetInputNames(); +auto input_t = predictor->GetInputHandle(input_names[0]); +std::vector input_shape = {1, 3, 224, 224}; +std::vector input_data(1 * 3 * 224 * 224, 1); +input_t->Reshape(input_shape); +input_t->CopyFromCpu(input_data.data()); +``` + +1.7 执行Predictor + +``` +predictor->Run(); +``` + +1.8 获取输出 + +``` +auto output_names = predictor->GetOutputNames(); +auto output_t = predictor->GetOutputHandle(output_names[0]); +std::vector output_shape = output_t->shape(); +int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, + std::multiplies()); +std::vector out_data; +out_data.resize(out_num); +output_t->CopyToCpu(out_data.data()); +``` + +## 2 编译运行示例 + +2.1 编译示例 + +文件`model_test.cc` 为预测的样例程序(程序中的输入为固定值,如果您有opencv或其他方式进行数据读取的需求,需要对程序进行一定的修改)。 +文件`CMakeLists.txt` 为编译构建文件。 +脚本`run_impl.sh` 包含了第三方库、预编译库的信息配置。 + +打开 `run_impl.sh` 文件,设置 LIB_DIR 为准备的预测库路径,比如 `LIB_DIR=/work/Paddle/build/paddle_inference_install_dir`。 + +运行 `sh run_impl.sh`, 会在目录下产生build目录。 + +2.2 运行示例 + +进入build目录,运行样例 + +```shell +cd build +./model_test --model_dir=mobilenetv1_fp32_dir +``` + +运行结束后,程序会将模型结果打印到屏幕,说明运行成功。 diff --git a/c++/cuda_linux_demo/model_test.cc b/c++/cuda_linux_demo/model_test.cc new file mode 100644 index 0000000000000..8cd479b130fbe --- /dev/null +++ b/c++/cuda_linux_demo/model_test.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "paddle/include/paddle_inference_api.h" + +DEFINE_string(model_dir, "", "Directory of the inference model."); +DEFINE_string(model_file, "", "Path of the inference model file."); +DEFINE_string(params_file, "", "Path of the inference params file."); + +int main(int argc, char *argv[]) { + google::ParseCommandLineFlags(&argc, &argv, true); + + // Init config + paddle_infer::Config config; + if (FLAGS_model_dir == "") { + config.SetModel(FLAGS_model_file, FLAGS_params_file); // Load combined model + } else { + config.SetModel(FLAGS_model_dir); // Load no-combined model + } + config.EnableUseGpu(500, 0); + config.SwitchIrOptim(true); + config.EnableMemoryOptim(); + config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 10, PrecisionType::kFloat32, false, false); + + // Create predictor + auto predictor = paddle_infer::CreatePredictor(config); + + // Set input + auto input_names = predictor->GetInputNames(); + auto input_t = predictor->GetInputHandle(input_names[0]); + std::vector input_shape = {1, 3, 224, 224}; + std::vector input_data(1 * 3 * 224 * 224, 1); + input_t->Reshape(input_shape); + input_t->CopyFromCpu(input_data.data()); + + // Run + predictor->Run(); + + // Get output + auto output_names = predictor->GetOutputNames(); + auto output_t = predictor->GetOutputHandle(output_names[0]); + std::vector output_shape = output_t->shape(); + int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, + std::multiplies()); + std::vector out_data; + out_data.resize(out_num); + output_t->CopyToCpu(out_data.data()); + + auto max_iter = std::max_element(out_data.begin(), out_data.end()); + LOG(INFO) << "Output max_arg_index:" << max_iter - out_data.begin() + << ", max_value:" << *max_iter; + return 0; +} diff --git a/c++/cuda_linux_demo/run_impl.sh b/c++/cuda_linux_demo/run_impl.sh new file mode 100755 index 0000000000000..96112a72e8039 --- /dev/null +++ b/c++/cuda_linux_demo/run_impl.sh @@ -0,0 +1,26 @@ +mkdir -p build +cd build +rm -rf * + +DEMO_NAME=model_test + +WITH_MKL=ON +WITH_GPU=ON +USE_TENSORRT=ON + +LIB_DIR=/work/Paddle/build/paddle_inference_install_dir +CUDNN_LIB=/path/to/cudnn/lib +CUDA_LIB=/path/to/cuda/lib +TENSORRT_ROOT=/path/to/trt/root/dir + +cmake .. -DPADDLE_LIB=${LIB_DIR} \ + -DWITH_MKL=${WITH_MKL} \ + -DDEMO_NAME=${DEMO_NAME} \ + -DWITH_GPU=${WITH_GPU} \ + -DWITH_STATIC_LIB=OFF \ + -DUSE_TENSORRT=${USE_TENSORRT} \ + -DCUDNN_LIB=${CUDNN_LIB} \ + -DCUDA_LIB=${CUDA_LIB} \ + -DTENSORRT_ROOT=${TENSORRT_ROOT} + +make -j diff --git a/docs/demo_tutorial/cuda_jetson_demo.md b/docs/demo_tutorial/cuda_jetson_demo.md new file mode 100644 index 0000000000000..1dbed701b62be --- /dev/null +++ b/docs/demo_tutorial/cuda_jetson_demo.md @@ -0,0 +1,213 @@ +# NV Jetson上预测部署示例 + +## 1 C++预测部署示例 + +C++示例代码在[链接](https://github.com/PaddlePaddle/Paddle-Inference-Demo/tree/master/c%2B%2B/cuda_linux_demo),下面从`流程解析`和`编译运行示例`两方面介绍。 + +### 1.1 流程解析 + +#### 1.1.1 准备预测库 + +请参考[推理库下载文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html)下载Paddle C++预测库,名称前缀包含 `nv_jetson` 的为用于NV Jetson平台的预测库。 + +#### 1.1.2 准备预测模型 + +使用Paddle训练结束后,得到预测模型,可以用于预测部署。 + +本示例准备了mobilenet_v1预测模型,可以从[链接](https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz)下载,或者wget下载。 + +```shell +wget https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz +``` + +#### 1.1.3 包含头文件 + +使用Paddle预测库,只需要包含 `paddle_inference_api.h` 头文件。 + +```cpp +#include "paddle/include/paddle_inference_api.h" +``` + +#### 1.1.4 设置Config + +根据预测部署的实际情况,设置Config,用于后续创建Predictor。 + +Config默认是使用CPU预测,若要使用GPU预测,需要手动开启,设置运行的GPU卡号和分配的初始显存。可以设置开启TensorRT加速、开启IR优化、开启内存优化。使用Paddle-TensorRT相关说明和示例可以参考[文档](https://paddle-inference.readthedocs.io/en/master/optimize/paddle_trt.html)。 + +```cpp +paddle_infer::Config config; +if (FLAGS_model_dir == "") { +config.SetModel(FLAGS_model_file, FLAGS_params_file); // Load combined model +} else { +config.SetModel(FLAGS_model_dir); // Load no-combined model +} +config.EnableUseGpu(500, 0); +config.SwitchIrOptim(true); +config.EnableMemoryOptim(); +config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 10, PrecisionType::kFloat32, false, false); +``` + +#### 1.1.5 创建Predictor + +```cpp +std::shared_ptr predictor = paddle_infer::CreatePredictor(config); +``` + +#### 1.1.6 设置输入 + +从Predictor中获取输入的names和handle,然后设置输入数据。 + +```cpp +auto input_names = predictor->GetInputNames(); +auto input_t = predictor->GetInputHandle(input_names[0]); +std::vector input_shape = {1, 3, 224, 224}; +std::vector input_data(1 * 3 * 224 * 224, 1); +input_t->Reshape(input_shape); +input_t->CopyFromCpu(input_data.data()); +``` + +#### 1.1.7 执行Predictor + +```cpp +predictor->Run(); +``` + +#### 1.1.8 获取输出 + +```cpp +auto output_names = predictor->GetOutputNames(); +auto output_t = predictor->GetOutputHandle(output_names[0]); +std::vector output_shape = output_t->shape(); +int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, + std::multiplies()); +std::vector out_data; +out_data.resize(out_num); +output_t->CopyToCpu(out_data.data()); +``` + +### 1.2 编译运行示例 + +#### 1.2.1 编译示例 + +文件`model_test.cc` 为预测的样例程序(程序中的输入为固定值,如果您有opencv或其他方式进行数据读取的需求,需要对程序进行一定的修改)。 +文件`CMakeLists.txt` 为编译构建文件。 +脚本`run_impl.sh` 包含了第三方库、预编译库的信息配置。 + +根据前面步骤下载Paddle预测库和mobilenetv1模型。 + +打开 `run_impl.sh` 文件,设置 LIB_DIR 为下载的预测库路径,比如 `LIB_DIR=/work/Paddle/build/paddle_inference_install_dir`。 + +运行 `sh run_impl.sh`, 会在当前目录下编译产生build目录。 + +#### 1.2.2 运行示例 + +进入build目录,运行样例。 + +```shell +cd build +./model_test --model_dir=mobilenetv1_fp32_dir +``` + +运行结束后,程序会将模型结果打印到屏幕,说明运行成功。 + +## 2 Python预测部署示例 + +Python预测部署示例代码在[链接](https://github.com/PaddlePaddle/Paddle-Inference-Demo/tree/master/python/cuda_linux_demo),下面从`流程解析`和`编译运行示例`两方面介绍。 + +### 2.1 流程解析 + +#### 2.1.1 准备环境 + +请参考[飞桨官网](https://www.paddlepaddle.org.cn/)安装2.0及以上版本的paddlepaddle-gpu。 + +Python安装opencv:`pip install opencv-python`。 + +#### 2.1.2 准备预测模型 + +使用Paddle训练结束后,得到预测模型,可以用于预测部署。 + +本示例准备了mobilenet_v1预测模型,可以从[链接](https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz)下载,或者wget下载。 + +```shell +wget https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz +tar zxf mobilenetv1_fp32.tar.gz +``` + +#### 2.1.3 Python导入 + +``` +from paddle.inference import Config +from paddle.inference import create_predictor +``` + +#### 2.1.4 设置Config + +根据预测部署的实际情况,设置Config,用于后续创建Predictor。 + +Config默认是使用CPU预测,若要使用GPU预测,需要手动开启,设置运行的GPU卡号和分配的初始显存。可以设置开启TensorRT加速、开启IR优化、开启内存优化。使用Paddle-TensorRT相关说明和示例可以参考[文档](https://paddle-inference.readthedocs.io/en/master/optimize/paddle_trt.html)。 + +```python +# args 是解析的输入参数 +if args.model_dir == "": + config = Config(args.model_file, args.params_file) +else: + config = Config(args.model_dir) +config.enable_use_gpu(500, 0) +config.switch_ir_optim() +config.enable_memory_optim() +config.enable_tensorrt_engine(workspace_size=1 << 30, precision_mode=AnalysisConfig.Precision.Float32,max_batch_size=1, min_subgraph_size=5, use_static=False, use_calib_mode=False) +``` + +#### 2.1.5 创建Predictor + +```python +predictor = create_predictor(config) +``` + +#### 2.1.6 设置输入 + +从Predictor中获取输入的names和handle,然后设置输入数据。 + +```python +img = cv2.imread(args.img_path) +img = preprocess(img) +input_names = predictor.get_input_names() +input_tensor = predictor.get_input_handle(input_names[0]) +input_tensor.reshape(img.shape) +input_tensor.copy_from_cpu(img.copy()) +``` + +#### 2.1.7 执行Predictor + +```python +predictor.run(); +``` + +#### 2.1.8 获取输出 + +```python +output_names = predictor.get_output_names() +output_tensor = predictor.get_output_handle(output_names[0]) +output_data = output_tensor.copy_to_cpu() +``` + +### 2.2 编译运行示例 + +文件`img_preprocess.py`是对图像进行预处理。 +文件`model_test.py`是示例程序。 + +参考前面步骤准备环境、下载预测模型。 + +下载预测图片。 + +```shell +wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ILSVRC2012_val_00000247.jpeg +``` + +执行预测命令。 + +``` +python model_test.py --model_dir mobilenetv1_fp32 --img_path ILSVRC2012_val_00000247.jpeg +``` + +运行结束后,程序会将模型结果打印到屏幕,说明运行成功。 diff --git a/docs/demo_tutorial/cuda_linux_demo.md b/docs/demo_tutorial/cuda_linux_demo.md new file mode 100644 index 0000000000000..e38cb4e67d393 --- /dev/null +++ b/docs/demo_tutorial/cuda_linux_demo.md @@ -0,0 +1,213 @@ +# GPU上预测部署示例 + +## 1 C++预测部署示例 + +C++示例代码在[链接](https://github.com/PaddlePaddle/Paddle-Inference-Demo/tree/master/c%2B%2B/cuda_linux_demo),下面从`流程解析`和`编译运行示例`两方面介绍。 + +### 1.1 流程解析 + +#### 1.1.1 准备预测库 + +请参考[推理库下载文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html)下载Paddle C++预测库,名称中带有 `cuda` 的为用于GPU的预测库。 + +#### 1.1.2 准备预测模型 + +使用Paddle训练结束后,得到预测模型,可以用于预测部署。 + +本示例准备了mobilenet_v1预测模型,可以从[链接](https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz)下载,或者wget下载。 + +```shell +wget https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz +``` + +#### 1.1.3 包含头文件 + +使用Paddle预测库,只需要包含 `paddle_inference_api.h` 头文件。 + +```cpp +#include "paddle/include/paddle_inference_api.h" +``` + +#### 1.1.4 设置Config + +根据预测部署的实际情况,设置Config,用于后续创建Predictor。 + +Config默认是使用CPU预测,若要使用GPU预测,需要手动开启,设置运行的GPU卡号和分配的初始显存。可以设置开启TensorRT加速、开启IR优化、开启内存优化。使用Paddle-TensorRT相关说明和示例可以参考[文档](https://paddle-inference.readthedocs.io/en/master/optimize/paddle_trt.html)。 + +```cpp +paddle_infer::Config config; +if (FLAGS_model_dir == "") { +config.SetModel(FLAGS_model_file, FLAGS_params_file); // Load combined model +} else { +config.SetModel(FLAGS_model_dir); // Load no-combined model +} +config.EnableUseGpu(500, 0); +config.SwitchIrOptim(true); +config.EnableMemoryOptim(); +config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 10, PrecisionType::kFloat32, false, false); +``` + +#### 1.1.5 创建Predictor + +```cpp +std::shared_ptr predictor = paddle_infer::CreatePredictor(config); +``` + +#### 1.1.6 设置输入 + +从Predictor中获取输入的names和handle,然后设置输入数据。 + +```cpp +auto input_names = predictor->GetInputNames(); +auto input_t = predictor->GetInputHandle(input_names[0]); +std::vector input_shape = {1, 3, 224, 224}; +std::vector input_data(1 * 3 * 224 * 224, 1); +input_t->Reshape(input_shape); +input_t->CopyFromCpu(input_data.data()); +``` + +#### 1.1.7 执行Predictor + +```cpp +predictor->Run(); +``` + +#### 1.1.8 获取输出 + +```cpp +auto output_names = predictor->GetOutputNames(); +auto output_t = predictor->GetOutputHandle(output_names[0]); +std::vector output_shape = output_t->shape(); +int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, + std::multiplies()); +std::vector out_data; +out_data.resize(out_num); +output_t->CopyToCpu(out_data.data()); +``` + +### 1.2 编译运行示例 + +#### 1.2.1 编译示例 + +文件`model_test.cc` 为预测的样例程序(程序中的输入为固定值,如果您有opencv或其他方式进行数据读取的需求,需要对程序进行一定的修改)。 +文件`CMakeLists.txt` 为编译构建文件。 +脚本`run_impl.sh` 包含了第三方库、预编译库的信息配置。 + +根据前面步骤下载Paddle预测库和mobilenetv1模型。 + +打开 `run_impl.sh` 文件,设置 LIB_DIR 为下载的预测库路径,比如 `LIB_DIR=/work/Paddle/build/paddle_inference_install_dir`。 + +运行 `sh run_impl.sh`, 会在当前目录下编译产生build目录。 + +#### 1.2.2 运行示例 + +进入build目录,运行样例。 + +```shell +cd build +./model_test --model_dir=mobilenetv1_fp32_dir +``` + +运行结束后,程序会将模型结果打印到屏幕,说明运行成功。 + +## 2 Python预测部署示例 + +Python预测部署示例代码在[链接](https://github.com/PaddlePaddle/Paddle-Inference-Demo/tree/master/python/cuda_linux_demo),下面从`流程解析`和`编译运行示例`两方面介绍。 + +### 2.1 流程解析 + +#### 2.1.1 准备环境 + +请参考[飞桨官网](https://www.paddlepaddle.org.cn/)安装2.0及以上版本的paddlepaddle-gpu。 + +Python安装opencv:`pip install opencv-python`。 + +#### 2.1.2 准备预测模型 + +使用Paddle训练结束后,得到预测模型,可以用于预测部署。 + +本示例准备了mobilenet_v1预测模型,可以从[链接](https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz)下载,或者wget下载。 + +```shell +wget https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz +tar zxf mobilenetv1_fp32.tar.gz +``` + +#### 2.1.3 Python导入 + +``` +from paddle.inference import Config +from paddle.inference import create_predictor +``` + +#### 2.1.4 设置Config + +根据预测部署的实际情况,设置Config,用于后续创建Predictor。 + +Config默认是使用CPU预测,若要使用GPU预测,需要手动开启,设置运行的GPU卡号和分配的初始显存。可以设置开启TensorRT加速、开启IR优化、开启内存优化。使用Paddle-TensorRT相关说明和示例可以参考[文档](https://paddle-inference.readthedocs.io/en/master/optimize/paddle_trt.html)。 + +```python +# args 是解析的输入参数 +if args.model_dir == "": + config = Config(args.model_file, args.params_file) +else: + config = Config(args.model_dir) +config.enable_use_gpu(500, 0) +config.switch_ir_optim() +config.enable_memory_optim() +config.enable_tensorrt_engine(workspace_size=1 << 30, precision_mode=AnalysisConfig.Precision.Float32,max_batch_size=1, min_subgraph_size=5, use_static=False, use_calib_mode=False) +``` + +#### 2.1.5 创建Predictor + +```python +predictor = create_predictor(config) +``` + +#### 2.1.6 设置输入 + +从Predictor中获取输入的names和handle,然后设置输入数据。 + +```python +img = cv2.imread(args.img_path) +img = preprocess(img) +input_names = predictor.get_input_names() +input_tensor = predictor.get_input_handle(input_names[0]) +input_tensor.reshape(img.shape) +input_tensor.copy_from_cpu(img.copy()) +``` + +#### 2.1.7 执行Predictor + +```python +predictor.run(); +``` + +#### 2.1.8 获取输出 + +```python +output_names = predictor.get_output_names() +output_tensor = predictor.get_output_handle(output_names[0]) +output_data = output_tensor.copy_to_cpu() +``` + +### 2.2 编译运行示例 + +文件`img_preprocess.py`是对图像进行预处理。 +文件`model_test.py`是示例程序。 + +参考前面步骤准备环境、下载预测模型。 + +下载预测图片。 + +```shell +wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ILSVRC2012_val_00000247.jpeg +``` + +执行预测命令。 + +``` +python model_test.py --model_dir mobilenetv1_fp32 --img_path ILSVRC2012_val_00000247.jpeg +``` + +运行结束后,程序会将模型结果打印到屏幕,说明运行成功。 diff --git a/docs/demo_tutorial/cuda_windows_demo.md b/docs/demo_tutorial/cuda_windows_demo.md new file mode 100644 index 0000000000000..db9d0814c07fb --- /dev/null +++ b/docs/demo_tutorial/cuda_windows_demo.md @@ -0,0 +1,229 @@ +# Windows上GPU预测部署示例 + +## 1 C++预测部署示例 + +C++示例代码在[链接](https://github.com/PaddlePaddle/Paddle-Inference-Demo/tree/master/c%2B%2B/cuda_linux_demo),下面从`流程解析`和`编译运行示例`两方面介绍。 + +### 1.1 流程解析 + +#### 1.1.1 准备预测库 + +请参考[推理库下载文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/windows_cpp_inference.html)下载windows平台的Paddle GPU C++预测库。 + +#### 1.1.2 准备预测模型 + +使用Paddle训练结束后,得到预测模型,可以用于预测部署。 + +本示例准备了mobilenet_v1预测模型,可以从[链接](https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz)下载,或者wget下载。 + +```shell +wget https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz +``` + +#### 1.1.3 包含头文件 + +使用Paddle预测库,只需要包含 `paddle_inference_api.h` 头文件。 + +```cpp +#include "paddle/include/paddle_inference_api.h" +``` + +#### 1.1.4 设置Config + +根据预测部署的实际情况,设置Config,用于后续创建Predictor。 + +Config默认是使用CPU预测,若要使用GPU预测,需要手动开启,设置运行的GPU卡号和分配的初始显存。可以设置开启IR优化、开启内存优化。 + +```cpp +paddle_infer::Config config; +if (FLAGS_model_dir == "") { +config.SetModel(FLAGS_model_file, FLAGS_params_file); // Load combined model +} else { +config.SetModel(FLAGS_model_dir); // Load no-combined model +} +config.EnableUseGpu(500, 0); +config.SwitchIrOptim(true); +config.EnableMemoryOptim(); +``` + +#### 1.1.5 创建Predictor + +```cpp +std::shared_ptr predictor = paddle_infer::CreatePredictor(config); +``` + +#### 1.1.6 设置输入 + +从Predictor中获取输入的names和handle,然后设置输入数据。 + +```cpp +auto input_names = predictor->GetInputNames(); +auto input_t = predictor->GetInputHandle(input_names[0]); +std::vector input_shape = {1, 3, 224, 224}; +std::vector input_data(1 * 3 * 224 * 224, 1); +input_t->Reshape(input_shape); +input_t->CopyFromCpu(input_data.data()); +``` + +#### 1.1.7 执行Predictor + +```cpp +predictor->Run(); +``` + +#### 1.1.8 获取输出 + +```cpp +auto output_names = predictor->GetOutputNames(); +auto output_t = predictor->GetOutputHandle(output_names[0]); +std::vector output_shape = output_t->shape(); +int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, + std::multiplies()); +std::vector out_data; +out_data.resize(out_num); +output_t->CopyToCpu(out_data.data()); +``` + +### 1.2 编译运行示例 + +#### 1.2.1 编译示例 + +文件`model_test.cc` 为预测的样例程序(程序中的输入为固定值,如果您有opencv或其他方式进行数据读取的需求,需要对程序进行一定的修改)。 +文件`CMakeLists.txt` 为编译构建文件。 + +根据前面步骤下载Paddle预测库和mobilenetv1模型。 + +使用cmake-gui程序生成vs工程: + +- 选择源代码路径,及编译产物路径,如图所示 + +![win_x86_cpu_cmake_1](./images/win_x86_cpu_cmake_1.png) + +- 点击Configure,选择Visual Studio且选择x64版本如图所示,点击Finish,由于我们没有加入必要的CMake Options,会导致configure失败,请继续下一步。 + +![win_x86_cpu_cmake_2](./images/win_x86_cpu_cmake_2.png) + +- 设置CMake Options,点击Add Entry,新增PADDLE_LIB,CMAKE_BUILD_TYPE,DEMO_NAME等选项。具体配置项如下图所示,其中PADDLE_LIB为您下载的预测库路径。 + +![win_x86_cpu_cmake_3](./images/win_x86_cpu_cmake_3.png) + +- 点击Configure,log信息显示Configure done代表配置成功,接下来点击Generate生成vs工程,log信息显示Generate done,代表生成成功,最后点击Open Project打开Visual Studio. + +- 设置为Release/x64,编译,编译产物在build/Release目录下。 + +![win_x86_cpu_vs_1](./images/win_x86_cpu_vs_1.png) + +#### 1.2.2 运行示例 + +首先设置model_test工程为启动首选项。 + +![win_x86_cpu_vs_2](./images/win_x86_cpu_vs_2.png) + +配置输入flags,即设置您之前下载的模型路径。点击Debug选项卡的`model_test Properities..` + +![win_x86_cpu_vs_3](./images/win_x86_cpu_vs_3.png) + +点击Debug选项卡下的Start Without Debugging选项开始执行程序。 + +![win_x86_cpu_vs_4](./images/win_x86_cpu_vs_4.png) + +## 2 Python预测部署示例 + +Python预测部署示例代码在[链接](https://github.com/PaddlePaddle/Paddle-Inference-Demo/tree/master/python/x86_linux_demo),下面从`流程解析`和`编译运行示例`两方面介绍。 + +### 2.1 流程解析 + +#### 2.1.1 准备环境 + +请参考[飞桨官网](https://www.paddlepaddle.org.cn/)安装2.0及以上版本的paddlepaddle-gpu。 + +Python安装opencv:`pip install opencv-python`。 + +#### 2.1.2 准备预测模型 + +使用Paddle训练结束后,得到预测模型,可以用于预测部署。 + +本示例准备了mobilenet_v1预测模型,可以从[链接](https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz)下载,或者wget下载。 + +```shell +wget https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz +tar zxf mobilenetv1_fp32.tar.gz +``` + +#### 2.1.3 Python导入 + +``` +from paddle.inference import Config +from paddle.inference import create_predictor +``` + +#### 2.1.4 设置Config + +根据预测部署的实际情况,设置Config,用于后续创建Predictor。 + +Config默认是使用CPU预测,若要使用GPU预测,需要手动开启,设置运行的GPU卡号和分配的初始显存。可以设置开启IR优化、开启内存优化。 + +```python +# args 是解析的输入参数 +if args.model_dir == "": + config = Config(args.model_file, args.params_file) +else: + config = Config(args.model_dir) +config.enable_use_gpu(500, 0) +config.switch_ir_optim() +config.enable_memory_optim() +``` + +#### 2.1.5 创建Predictor + +```python +predictor = create_predictor(config) +``` + +#### 2.1.6 设置输入 + +从Predictor中获取输入的names和handle,然后设置输入数据。 + +```python +img = cv2.imread(args.img_path) +img = preprocess(img) +input_names = predictor.get_input_names() +input_tensor = predictor.get_input_handle(input_names[0]) +input_tensor.reshape(img.shape) +input_tensor.copy_from_cpu(img.copy()) +``` + +#### 2.1.7 执行Predictor + +```python +predictor.run(); +``` + +#### 2.1.8 获取输出 + +```python +output_names = predictor.get_output_names() +output_tensor = predictor.get_output_handle(output_names[0]) +output_data = output_tensor.copy_to_cpu() +``` + +### 2.2 编译运行示例 + +文件`img_preprocess.py`是对图像进行预处理。 +文件`model_test.py`是示例程序。 + +参考前面步骤准备环境、下载预测模型。 + +下载预测图片。 + +```shell +wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ILSVRC2012_val_00000247.jpeg +``` + +执行预测命令。 + +``` +python model_test.py --model_dir mobilenetv1_fp32 --img_path ILSVRC2012_val_00000247.jpeg +``` + +运行结束后,程序会将模型结果打印到屏幕,说明运行成功。 diff --git a/python/cuda_linux_demo/README.md b/python/cuda_linux_demo/README.md new file mode 100644 index 0000000000000..2fdf9a5db03b5 --- /dev/null +++ b/python/cuda_linux_demo/README.md @@ -0,0 +1,99 @@ +# GPU上Python预测部署示例 + +## 1.1 流程解析 + +1) 准备环境 + +请参考[飞桨官网](https://www.paddlepaddle.org.cn/)安装2.0及以上版本的paddlepaddle-gpu。 + +Python安装opencv:`pip install opencv-python`。 + +2)准备预测模型 + +使用Paddle训练结束后,得到预测模型,可以用于预测部署。 + +本示例准备了mobilenet_v1预测模型,可以从[链接](https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz)下载,或者wget下载。 + +```shell +wget https://paddle-inference-dist.cdn.bcebos.com/PaddleInference/mobilenetv1_fp32.tar.gz +tar zxf mobilenetv1_fp32.tar.gz +``` + +3)Python导入 + +``` +from paddle.inference import Config +from paddle.inference import create_predictor +``` + +4) 设置Config + +根据预测部署的实际情况,设置Config,用于后续创建Predictor。 + +Config默认是使用CPU预测,若要使用GPU预测,需要手动开启,设置运行的GPU卡号和分配的初始显存。可以设置开启TensorRT加速、开启IR优化、开启内存优化。使用Paddle-TensorRT相关说明和示例可以参考[文档](https://paddle-inference.readthedocs.io/en/master/optimize/paddle_trt.html)。 + +```python +# args 是解析的输入参数 +if args.model_dir == "": + config = Config(args.model_file, args.params_file) +else: + config = Config(args.model_dir) +config.enable_use_gpu(500, 0) +config.switch_ir_optim() +config.enable_memory_optim() +config.enable_tensorrt_engine(workspace_size=1 << 30, precision_mode=AnalysisConfig.Precision.Float32,max_batch_size=1, min_subgraph_size=5, use_static=False, use_calib_mode=False) +``` + +5) 创建Predictor + +```python +predictor = create_predictor(config) +``` + +6) 设置输入 + +从Predictor中获取输入的names和handle,然后设置输入数据。 + +```python +img = cv2.imread(args.img_path) +img = preprocess(img) +input_names = predictor.get_input_names() +input_tensor = predictor.get_input_handle(input_names[0]) +input_tensor.reshape(img.shape) +input_tensor.copy_from_cpu(img.copy()) +``` + +7) 执行Predictor + +```python +predictor.run(); +``` + +8) 获取输出 + +```python +output_names = predictor.get_output_names() +output_tensor = predictor.get_output_handle(output_names[0]) +output_data = output_tensor.copy_to_cpu() +``` + +## 1.2 编译运行示例 + +文件`img_preprocess.py`是对图像进行预处理。 +文件`model_test.py`是示例程序。 + +参考前面步骤准备环境、下载预测模型。 + +下载预测图片。 + +```shell +wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ILSVRC2012_val_00000247.jpeg +``` + +执行预测命令。 + +``` +python model_test.py --model_dir mobilenetv1_fp32 --img_path ILSVRC2012_val_00000247.jpeg +`` + +运行结束后,程序会将模型结果打印到屏幕,说明运行成功。 diff --git a/python/cuda_linux_demo/img_preprocess.py b/python/cuda_linux_demo/img_preprocess.py new file mode 100644 index 0000000000000..34321de022caa --- /dev/null +++ b/python/cuda_linux_demo/img_preprocess.py @@ -0,0 +1,41 @@ +import cv2 +import numpy as np + + +def resize_short(img, target_size): + """ resize_short """ + percent = float(target_size) / min(img.shape[0], img.shape[1]) + resized_width = int(round(img.shape[1] * percent)) + resized_height = int(round(img.shape[0] * percent)) + resized = cv2.resize(img, (resized_width, resized_height)) + return resized + + +def crop_image(img, target_size, center): + """ crop_image """ + height, width = img.shape[:2] + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img[int(h_start):int(h_end), int(w_start):int(w_end), :] + return img + + +def preprocess(img): + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + img = resize_short(img, 224) + img = crop_image(img, 224, True) + # bgr-> rgb && hwc->chw + img = img[:, :, ::-1].astype('float32').transpose((2, 0, 1)) / 255 + img_mean = np.array(mean).reshape((3, 1, 1)) + img_std = np.array(std).reshape((3, 1, 1)) + img -= img_mean + img /= img_std + return img[np.newaxis, :] diff --git a/python/cuda_linux_demo/model_test.py b/python/cuda_linux_demo/model_test.py new file mode 100644 index 0000000000000..d2dad951fee62 --- /dev/null +++ b/python/cuda_linux_demo/model_test.py @@ -0,0 +1,74 @@ +import numpy as np +import argparse +import cv2 + +from paddle.inference import Config +from paddle.inference import create_predictor +from img_preprocess import preprocess + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + type=str, + default="", + help= + "Model dir, If you load a non-combined model, specify the directory of the model." + ) + parser.add_argument( + "--model_file", + type=str, + default="", + help="Model filename, Specify this when your model is a combined model." + ) + parser.add_argument( + "--params_file", + type=str, + default="", + help= + "Parameter filename, Specify this when your model is a combined model." + ) + parser.add_argument("--img_path", type=str, default="", help="Input image path.") + parser.add_argument("--threads", + type=int, + default=1, + help="Whether use gpu.") + return parser.parse_args() + +if __name__ == '__main__': + args = parse_args() + assert (args.model_dir != "") or \ + (args.model_file != "" and args.params_file != ""), \ + "Set model path error." + assert args.img_path != "", "Set img_path error." + + # Init config + if args.model_dir == "": + config = Config(args.model_file, args.params_file) + else: + config = Config(args.model_dir) + config.enable_use_gpu(500, 0) + config.switch_ir_optim() + config.enable_memory_optim() + config.enable_tensorrt_engine(workspace_size=1 << 30, precision_mode=AnalysisConfig.Precision.Float32,max_batch_size=1, min_subgraph_size=5, use_static=False, use_calib_mode=False) + + # Create predictor + predictor = create_predictor(config) + + # Set input + img = cv2.imread(args.img_path) + img = preprocess(img) + input_names = predictor.get_input_names() + input_tensor = predictor.get_input_handle(input_names[0]) + input_tensor.reshape(img.shape) + input_tensor.copy_from_cpu(img.copy()) + + # Run + predictor.run() + + # Set output + output_names = predictor.get_output_names() + output_tensor = predictor.get_output_handle(output_names[0]) + output_data = output_tensor.copy_to_cpu() + + print("Predict class index: ", np.argmax(output_data))