diff --git a/deploy/cpp/docs/Jetson_build.md b/deploy/cpp/docs/Jetson_build.md index 78ad817972528..f5d147f8f3a86 100644 --- a/deploy/cpp/docs/Jetson_build.md +++ b/deploy/cpp/docs/Jetson_build.md @@ -158,7 +158,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/ | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| | --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| -| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16)| +| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_benchmark | 是否重复预测来进行benchmark测速 | | --output_dir | 输出图片所在的文件夹, 默认为output | diff --git a/deploy/cpp/docs/linux_build.md b/deploy/cpp/docs/linux_build.md index 059802ada4018..76b961955662d 100644 --- a/deploy/cpp/docs/linux_build.md +++ b/deploy/cpp/docs/linux_build.md @@ -102,7 +102,7 @@ make | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| | --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| -| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16)| +| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_benchmark | 是否重复预测来进行benchmark测速 | | --output_dir | 输出图片所在的文件夹, 默认为output | diff --git a/deploy/cpp/docs/windows_vs2019_build.md b/deploy/cpp/docs/windows_vs2019_build.md index 9fdd23aef8753..34607b21d1bef 100644 --- a/deploy/cpp/docs/windows_vs2019_build.md +++ b/deploy/cpp/docs/windows_vs2019_build.md @@ -97,7 +97,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| | --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| -| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16)| +| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_benchmark | 是否重复预测来进行benchmark测速 | | --output_dir | 输出图片所在的文件夹, 默认为output | diff --git a/deploy/cpp/src/main.cc b/deploy/cpp/src/main.cc index ca25a46e1cc13..cd696be0e3f35 100644 --- a/deploy/cpp/src/main.cc +++ b/deploy/cpp/src/main.cc @@ -37,7 +37,7 @@ DEFINE_string(image_path, "", "Path of input image"); DEFINE_string(video_path, "", "Path of input video"); DEFINE_bool(use_gpu, false, "Infering with GPU or CPU"); DEFINE_bool(use_camera, false, "Use camera or not"); -DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16)"); +DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)"); DEFINE_int32(gpu_id, 0, "Device id of GPU to execute"); DEFINE_int32(camera_id, -1, "Device id of camera to predict"); DEFINE_bool(run_benchmark, false, "Whether to predict a image_file repeatedly for benchmark"); diff --git a/deploy/python/README.md b/deploy/python/README.md index d8874ea6260b7..3abee98cf67b9 100644 --- a/deploy/python/README.md +++ b/deploy/python/README.md @@ -43,7 +43,7 @@ python deploy/python/infer.py --model_dir=/path/to/models --image_file=/path/to/ | --video_file | Option |需要预测的视频 | | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4| | --use_gpu |No|是否GPU,默认为False| -| --run_mode |No|使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16)| +| --run_mode |No|使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --threshold |No|预测得分的阈值,默认为0.5| | --output_dir |No|可视化结果保存的根目录,默认为output/| | --run_benchmark |No|是否运行benchmark,同时需指定--image_file| diff --git a/deploy/python/infer.py b/deploy/python/infer.py index b10576132b6d3..5bfd545541018 100644 --- a/deploy/python/infer.py +++ b/deploy/python/infer.py @@ -321,7 +321,7 @@ def load_predictor(model_dir, Args: model_dir (str): root path of __model__ and __params__ use_gpu (bool): whether use gpu - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8) use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt @@ -335,11 +335,6 @@ def load_predictor(model_dir, raise ValueError( "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}" .format(run_mode, use_gpu)) - if run_mode == 'trt_int8' and not os.path.exists( - os.path.join(model_dir, '_opt_cache')): - raise ValueError( - "TensorRT int8 must calibration first, and model_dir must has _opt_cache dir" - ) use_calib_mode = True if run_mode == 'trt_int8' else False config = Config( os.path.join(model_dir, 'model.pdmodel'), @@ -512,7 +507,7 @@ def main(): "--run_mode", type=str, default='fluid', - help="mode of running(fluid/trt_fp32/trt_fp16)") + help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)") parser.add_argument( "--use_gpu", type=ast.literal_eval, diff --git a/deploy/python/trt_int8_calib.py b/deploy/python/trt_int8_calib.py deleted file mode 100644 index 32f0e0ddea30a..0000000000000 --- a/deploy/python/trt_int8_calib.py +++ /dev/null @@ -1,300 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import argparse -import time -import yaml -import ast -from functools import reduce - -from PIL import Image -import cv2 -import numpy as np -import glob -import paddle -from preprocess import preprocess, Resize, NormalizeImage, Permute, PadStride -from visualize import visualize_box_mask -from paddle.inference import Config -from paddle.inference import create_predictor - -# Global dictionary -SUPPORT_MODELS = { - 'YOLO', - 'RCNN', - 'SSD', - 'FCOS', - 'SOLOv2', - 'TTFNet', -} - - -class Detector(object): - """ - Args: - config (object): config of model, defined by `Config(model_dir)` - model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml - use_gpu (bool): whether use gpu - """ - - def __init__(self, pred_config, model_dir, use_gpu=False): - self.pred_config = pred_config - self.predictor = load_predictor( - model_dir, - min_subgraph_size=self.pred_config.min_subgraph_size, - use_gpu=use_gpu) - - def preprocess(self, im): - preprocess_ops = [] - for op_info in self.pred_config.preprocess_infos: - new_op_info = op_info.copy() - op_type = new_op_info.pop('type') - preprocess_ops.append(eval(op_type)(**new_op_info)) - im, im_info = preprocess(im, preprocess_ops, - self.pred_config.input_shape) - inputs = create_inputs(im, im_info) - return inputs - - def postprocess(self, np_boxes, np_masks, inputs, threshold=0.5): - # postprocess output of predictor - results = {} - if self.pred_config.arch in ['Face']: - h, w = inputs['im_shape'] - scale_y, scale_x = inputs['scale_factor'] - w, h = float(h) / scale_y, float(w) / scale_x - np_boxes[:, 2] *= h - np_boxes[:, 3] *= w - np_boxes[:, 4] *= h - np_boxes[:, 5] *= w - results['boxes'] = np_boxes - if np_masks is not None: - results['masks'] = np_masks - return results - - def predict(self, - image, - threshold=0.5, - warmup=0, - repeats=1, - run_benchmark=False): - ''' - Args: - image (str/np.ndarray): path of image/ np.ndarray read by cv2 - threshold (float): threshold of predicted box' score - Returns: - results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, - matix element:[class, score, x_min, y_min, x_max, y_max] - MaskRCNN's results include 'masks': np.ndarray: - shape: [N, im_h, im_w] - ''' - inputs = self.preprocess(image) - np_boxes, np_masks = None, None - input_names = self.predictor.get_input_names() - for i in range(len(input_names)): - input_tensor = self.predictor.get_input_handle(input_names[i]) - input_tensor.copy_from_cpu(inputs[input_names[i]]) - - for i in range(warmup): - self.predictor.run() - output_names = self.predictor.get_output_names() - boxes_tensor = self.predictor.get_output_handle(output_names[0]) - np_boxes = boxes_tensor.copy_to_cpu() - if self.pred_config.mask: - masks_tensor = self.predictor.get_output_handle(output_names[2]) - np_masks = masks_tensor.copy_to_cpu() - - t1 = time.time() - for i in range(repeats): - self.predictor.run() - output_names = self.predictor.get_output_names() - boxes_tensor = self.predictor.get_output_handle(output_names[0]) - np_boxes = boxes_tensor.copy_to_cpu() - if self.pred_config.mask: - masks_tensor = self.predictor.get_output_handle(output_names[2]) - np_masks = masks_tensor.copy_to_cpu() - t2 = time.time() - ms = (t2 - t1) * 1000.0 / repeats - print("Inference: {} ms per batch image".format(ms)) - - # do not perform postprocess in benchmark mode - results = [] - if not run_benchmark: - if reduce(lambda x, y: x * y, np_boxes.shape) < 6: - print('[WARNNING] No object detected.') - results = {'boxes': np.array([])} - else: - results = self.postprocess( - np_boxes, np_masks, inputs, threshold=threshold) - - return results - - -def create_inputs(im, im_info): - """generate input for different model type - Args: - im (np.ndarray): image (np.ndarray) - im_info (dict): info of image - model_arch (str): model type - Returns: - inputs (dict): input of model - """ - inputs = {} - inputs['image'] = np.array((im, )).astype('float32') - inputs['im_shape'] = np.array((im_info['im_shape'], )).astype('float32') - inputs['scale_factor'] = np.array( - (im_info['scale_factor'], )).astype('float32') - - return inputs - - -class PredictConfig(): - """set config of preprocess, postprocess and visualize - Args: - model_dir (str): root path of model.yml - """ - - def __init__(self, model_dir): - # parsing Yaml config for Preprocess - deploy_file = os.path.join(model_dir, 'infer_cfg.yml') - with open(deploy_file) as f: - yml_conf = yaml.safe_load(f) - self.check_model(yml_conf) - self.arch = yml_conf['arch'] - self.preprocess_infos = yml_conf['Preprocess'] - self.min_subgraph_size = yml_conf['min_subgraph_size'] - self.labels = yml_conf['label_list'] - self.mask = False - if 'mask' in yml_conf: - self.mask = yml_conf['mask'] - self.input_shape = yml_conf['image_shape'] - self.print_config() - - def check_model(self, yml_conf): - """ - Raises: - ValueError: loaded model not in supported model type - """ - for support_model in SUPPORT_MODELS: - if support_model in yml_conf['arch']: - return True - raise ValueError("Unsupported arch: {}, expect {}".format(yml_conf[ - 'arch'], SUPPORT_MODELS)) - - def print_config(self): - print('----------- Model Configuration -----------') - print('%s: %s' % ('Model Arch', self.arch)) - print('%s: ' % ('Transform Order')) - for op_info in self.preprocess_infos: - print('--%s: %s' % ('transform op', op_info['type'])) - print('--------------------------------------------') - - -def load_predictor(model_dir, batch_size=1, use_gpu=False, min_subgraph_size=3): - """set AnalysisConfig, generate AnalysisPredictor - Args: - model_dir (str): root path of __model__ and __params__ - use_gpu (bool): whether use gpu - Returns: - predictor (PaddlePredictor): AnalysisPredictor - Raises: - ValueError: predict by TensorRT need use_gpu == True. - """ - run_mode = 'trt_int8' - if not use_gpu and not run_mode == 'fluid': - raise ValueError( - "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}" - .format(run_mode, use_gpu)) - config = Config( - os.path.join(model_dir, 'model.pdmodel'), - os.path.join(model_dir, 'model.pdiparams')) - precision_map = { - 'trt_int8': Config.Precision.Int8, - 'trt_fp32': Config.Precision.Float32, - 'trt_fp16': Config.Precision.Half - } - if use_gpu: - # initial GPU memory(M), device ID - config.enable_use_gpu(200, 0) - # optimize graph and fuse op - config.switch_ir_optim(True) - else: - config.disable_gpu() - - if run_mode in precision_map.keys(): - config.enable_tensorrt_engine( - workspace_size=1 << 10, - max_batch_size=batch_size, - min_subgraph_size=min_subgraph_size, - precision_mode=precision_map[run_mode], - use_static=False, - use_calib_mode=True) - - # disable print log when predict - config.disable_glog_info() - # enable shared memory - config.enable_memory_optim() - # disable feed, fetch OP, needed by zero_copy_run - config.switch_use_feed_fetch_ops(False) - predictor = create_predictor(config) - return predictor - - -def print_arguments(args): - print('----------- Running Arguments -----------') - for arg, value in sorted(vars(args).items()): - print('%s: %s' % (arg, value)) - print('------------------------------------------') - - -def predict_image_dir(detector): - for image_file in glob.glob(FLAGS.image_dir + '/*.jpg'): - print('image_file is', image_file) - results = detector.predict(image_file, threshold=0.5) - - -def main(): - pred_config = PredictConfig(FLAGS.model_dir) - detector = Detector(pred_config, FLAGS.model_dir, use_gpu=FLAGS.use_gpu) - # predict from image - if FLAGS.image_dir != '': - predict_image_dir(detector) - - -if __name__ == '__main__': - paddle.enable_static() - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - "--model_dir", - type=str, - default=None, - help=("Directory include:'model.pdiparams', 'model.pdmodel', " - "'infer_cfg.yml', created by tools/export_model.py."), - required=True) - parser.add_argument( - "--image_dir", type=str, default='', help="Directory of image file.") - parser.add_argument( - "--use_gpu", - type=ast.literal_eval, - default=False, - help="Whether to predict with GPU.") - print('err?') - parser.add_argument( - "--output_dir", - type=str, - default="output", - help="Directory of output visualization files.") - FLAGS = parser.parse_args() - print_arguments(FLAGS) - - main() diff --git a/static/deploy/cpp/docs/Jetson_build.md b/static/deploy/cpp/docs/Jetson_build.md index 14850700e71cb..8bd0c1efc6453 100644 --- a/static/deploy/cpp/docs/Jetson_build.md +++ b/static/deploy/cpp/docs/Jetson_build.md @@ -155,7 +155,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/ | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| | --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| -| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16)| +| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_benchmark | 是否重复预测来进行benchmark测速 | | --output_dir | 输出图片所在的文件夹, 默认为output | diff --git a/static/deploy/cpp/docs/linux_build.md b/static/deploy/cpp/docs/linux_build.md index 60c609663002c..14e171191ae56 100644 --- a/static/deploy/cpp/docs/linux_build.md +++ b/static/deploy/cpp/docs/linux_build.md @@ -102,7 +102,7 @@ make | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| | --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| -| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16)| +| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_benchmark | 是否重复预测来进行benchmark测速 | | --output_dir | 输出图片所在的文件夹, 默认为output | diff --git a/static/deploy/cpp/docs/windows_vs2019_build.md b/static/deploy/cpp/docs/windows_vs2019_build.md index aca5d3ab970cb..efb2d75c89ad0 100644 --- a/static/deploy/cpp/docs/windows_vs2019_build.md +++ b/static/deploy/cpp/docs/windows_vs2019_build.md @@ -97,7 +97,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| | --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| -| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16)| +| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_benchmark | 是否重复预测来进行benchmark测速 | | --output_dir | 输出图片所在的文件夹, 默认为output | diff --git a/static/deploy/cpp/src/main.cc b/static/deploy/cpp/src/main.cc index 2e3b356c8cc09..eadf64f5e02ed 100644 --- a/static/deploy/cpp/src/main.cc +++ b/static/deploy/cpp/src/main.cc @@ -199,8 +199,8 @@ int main(int argc, char** argv) { return -1; } if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" - || FLAGS_run_mode == "trt_fp16")) { - std::cout << "run_mode should be 'fluid', 'trt_fp32' or 'trt_fp16'."; + || FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) { + std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; return -1; } diff --git a/static/deploy/cpp/src/object_detector.cc b/static/deploy/cpp/src/object_detector.cc index 0e5b814eb0f9c..7b8f0fdd1f8eb 100644 --- a/static/deploy/cpp/src/object_detector.cc +++ b/static/deploy/cpp/src/object_detector.cc @@ -32,17 +32,16 @@ void ObjectDetector::LoadModel(const std::string& model_dir, config.SetModel(prog_file, params_file); if (use_gpu) { config.EnableUseGpu(100, gpu_id); + config.SwitchIrOptim(true); if (run_mode != "fluid") { auto precision = paddle::AnalysisConfig::Precision::kFloat32; if (run_mode == "trt_fp16") { precision = paddle::AnalysisConfig::Precision::kHalf; } else if (run_mode == "trt_int8") { - printf("TensorRT int8 mode is not supported now, " - "please use 'trt_fp32' or 'trt_fp16' instead"); + precision = paddle::AnalysisConfig::Precision::kInt8; + use_calib_mode = true; } else { - if (run_mode != "trt_fp32") { - printf("run_mode should be 'fluid', 'trt_fp32' or 'trt_fp16'"); - } + printf("run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'"); } config.EnableTensorRtEngine( 1 << 10, @@ -50,7 +49,7 @@ void ObjectDetector::LoadModel(const std::string& model_dir, min_subgraph_size, precision, false, - false); + use_calib_mode); } } else { config.DisableGpu(); diff --git a/static/deploy/python/README.md b/static/deploy/python/README.md index b8b3b87be13de..928910f6bcb9e 100644 --- a/static/deploy/python/README.md +++ b/static/deploy/python/README.md @@ -46,7 +46,7 @@ python deploy/python/infer.py --model_dir=/path/to/models --image_file=/path/to/ | --video_file | Option |需要预测的视频 | | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4| | --use_gpu |No|是否GPU,默认为False| -| --run_mode |No|使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16)| +| --run_mode |No|使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --threshold |No|预测得分的阈值,默认为0.5| | --output_dir |No|可视化结果保存的根目录,默认为output/| | --run_benchmark |No|是否运行benchmark,同时需指定--image_file| diff --git a/static/deploy/python/infer.py b/static/deploy/python/infer.py index ae0ff80e95a97..59989a6cb801c 100644 --- a/static/deploy/python/infer.py +++ b/static/deploy/python/infer.py @@ -393,9 +393,7 @@ def load_predictor(model_dir, raise ValueError( "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}" .format(run_mode, use_gpu)) - if run_mode == 'trt_int8': - raise ValueError("TensorRT int8 mode is not supported now, " - "please use trt_fp32 or trt_fp16 instead.") + use_calib_mode = True if run_mode == 'trt_int8' else False precision_map = { 'trt_int8': fluid.core.AnalysisConfig.Precision.Int8, 'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32, @@ -419,7 +417,7 @@ def load_predictor(model_dir, min_subgraph_size=min_subgraph_size, precision_mode=precision_map[run_mode], use_static=False, - use_calib_mode=False) + use_calib_mode=use_calib_mode) # disable print log when predict config.disable_glog_info() @@ -574,7 +572,7 @@ def main(): "--run_mode", type=str, default='fluid', - help="mode of running(fluid/trt_fp32/trt_fp16)") + help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)") parser.add_argument( "--use_gpu", type=ast.literal_eval,