From 6d107592527cdfab8a26ca13d1ba2e7cb899714d Mon Sep 17 00:00:00 2001 From: Zhiqiang Wang Date: Mon, 27 Dec 2021 02:57:59 +0800 Subject: [PATCH] Move `LogitsDecoder` into trt_helper.py and fix docstrings (#256) * Fix example in PredictorTRT * Fix docstrings for PredictorTRT * Move LogitsDecoder into yolort.runtime * Minor fix --- yolort/models/box_head.py | 54 ---------------------------------- yolort/runtime/trt_helper.py | 57 ++++++++++++++++++++++++++++++++++-- yolort/runtime/y_tensorrt.py | 26 +++++++++++----- 3 files changed, 73 insertions(+), 64 deletions(-) diff --git a/yolort/models/box_head.py b/yolort/models/box_head.py index 3db871d4..9ae4dd56 100644 --- a/yolort/models/box_head.py +++ b/yolort/models/box_head.py @@ -353,60 +353,6 @@ def _decode_pred_logits(pred_logits: Tensor): return boxes, scores -class LogitsDecoder(nn.Module): - """ - This is a simplified version of post-processing module, we manually remove - the ``torchvision::ops::nms``, and it will be used later in the procedure of - exporting the ONNX graph for TensorRT. - """ - - def __init__(self, strides: List[int]) -> None: - """ - Args: - strides (List[int]): Strides of the AnchorGenerator. - """ - - super().__init__() - self.strides = strides - - def forward( - self, - head_outputs: List[Tensor], - grids: List[Tensor], - shifts: List[Tensor], - ) -> Tuple[Tensor, Tensor]: - """ - Just concat the predict logits, ignore the original ``torchvision::nms`` module - from original ``yolort.models.box_head.PostProcess``. - - Args: - head_outputs (List[Tensor]): The predicted locations and class/object confidence, - shape of the element is (N, A, H, W, K). - anchors_tuple (Tuple[Tensor, Tensor, Tensor]): - grids (List[Tensor]): Anchor grids. - shifts (List[Tensor]): Anchor shifts. - """ - batch_size = len(head_outputs[0]) - - all_pred_logits = _concat_pred_logits(head_outputs, grids, shifts, self.strides) - - bbox_regression = [] - pred_scores = [] - - for idx in range(batch_size): # image idx, image inference - pred_logits = all_pred_logits[idx] - boxes, scores = _decode_pred_logits(pred_logits) - bbox_regression.append(boxes) - pred_scores.append(scores) - - # The default boxes tensor has shape [batch_size, number_boxes, 4]. - # This will insert a "1" dimension in the second axis, to become - # [batch_size, number_boxes, 1, 4], the shape that plugin/BatchedNMS expects. - boxes = torch.stack(bbox_regression).unsqueeze_(2) - scores = torch.stack(pred_scores) - return boxes, scores - - class PostProcess(nn.Module): """ Performs Non-Maximum Suppression (NMS) on inference results diff --git a/yolort/runtime/trt_helper.py b/yolort/runtime/trt_helper.py index 16bf9c06..25b86e3b 100644 --- a/yolort/runtime/trt_helper.py +++ b/yolort/runtime/trt_helper.py @@ -11,7 +11,7 @@ import logging from pathlib import Path -from typing import Optional, Tuple, Union +from typing import Optional, List, Tuple, Union try: import tensorrt as trt @@ -23,7 +23,7 @@ from yolort.models import YOLO from yolort.models.anchor_utils import AnchorGenerator from yolort.models.backbone_utils import darknet_pan_backbone -from yolort.models.box_head import LogitsDecoder +from yolort.models.box_head import _concat_pred_logits, _decode_pred_logits from yolort.utils import load_from_ultralytics logging.basicConfig(level=logging.INFO) @@ -34,6 +34,59 @@ __all__ = ["YOLOTRTModule", "EngineBuilder"] +class LogitsDecoder(nn.Module): + """ + This is a simplified version of post-processing module, we manually remove + the ``torchvision::ops::nms``, and it will be used later in the procedure of + exporting the ONNX graph for YOLOTRTModule. + """ + + def __init__(self, strides: List[int]) -> None: + """ + Args: + strides (List[int]): Strides of the AnchorGenerator. + """ + + super().__init__() + self.strides = strides + + def forward( + self, + head_outputs: List[Tensor], + grids: List[Tensor], + shifts: List[Tensor], + ) -> Tuple[Tensor, Tensor]: + """ + Just concat the predict logits, ignore the original ``torchvision::nms`` module + from original ``yolort.models.box_head.PostProcess``. + + Args: + head_outputs (List[Tensor]): The predicted locations and class/object confidence, + shape of the element is (N, A, H, W, K). + grids (List[Tensor]): Anchor grids. + shifts (List[Tensor]): Anchor shifts. + """ + batch_size = len(head_outputs[0]) + + all_pred_logits = _concat_pred_logits(head_outputs, grids, shifts, self.strides) + + bbox_regression = [] + pred_scores = [] + + for idx in range(batch_size): # image idx, image inference + pred_logits = all_pred_logits[idx] + boxes, scores = _decode_pred_logits(pred_logits) + bbox_regression.append(boxes) + pred_scores.append(scores) + + # The default boxes tensor has shape [batch_size, number_boxes, 4]. + # This will insert a "1" dimension in the second axis, to become + # [batch_size, number_boxes, 1, 4], the shape that plugin/BatchedNMS expects. + boxes = torch.stack(bbox_regression).unsqueeze_(2) + scores = torch.stack(pred_scores) + return boxes, scores + + class YOLOTRTModule(nn.Module): """ TensorRT deployment friendly wrapper for YOLO. diff --git a/yolort/runtime/y_tensorrt.py b/yolort/runtime/y_tensorrt.py index 32051d0c..dbd3eaca 100644 --- a/yolort/runtime/y_tensorrt.py +++ b/yolort/runtime/y_tensorrt.py @@ -29,17 +29,27 @@ class PredictorTRT: Args: engine_path (str): Path of the ONNX checkpoint. + device (torch.device): The CUDA device to be used for inferencing. + score_thresh (float): Score threshold used for postprocessing the detections. + nms_thresh (float): NMS threshold used for postprocessing the detections. + detections_per_img (int): Number of best detections to keep after NMS. Examples: + >>> import cv2 + >>> import numpy as np >>> import torch >>> from yolort.runtime import PredictorTRT >>> - >>> engine_path = 'yolov5s.engine' - >>> device = torch.device("cuda") - >>> detector = PredictorTRT(engine_path, device) + >>> runtime = PredictorTRT(engine_path, device) >>> >>> img_path = 'bus.jpg' - >>> detections = detector.run_on_image(img_path) + >>> image = cv2.imread(img_path) + >>> image = cv2.resize(image, (320, 320)) + >>> image = image.transpose((2, 0, 1))[::-1] # Convert HWC to CHW, BGR to RGB + >>> image = np.ascontiguousarray(image) + >>> + >>> image = runtime.preprocessing(image) + >>> detections = runtime.run_on_image(image) """ def __init__( @@ -47,7 +57,7 @@ def __init__( engine_path: str, device: torch.device = torch.device("cuda"), score_thresh: float = 0.25, - iou_thresh: float = 0.45, + nms_thresh: float = 0.45, detections_per_img: int = 100, ) -> None: self.engine_path = engine_path @@ -56,7 +66,7 @@ def __init__( self.stride = 32 self.names = [f"class{i}" for i in range(1000)] # assign defaults self.score_thresh = score_thresh - self.iou_thresh = iou_thresh + self.nms_thresh = nms_thresh self.detections_per_img = detections_per_img self.engine = self._build_engine() @@ -98,8 +108,8 @@ def __call__(self, image: Tensor): image (Tensor): an image of shape (C, N, H, W). Returns: - predictions (Tuple[List[float], List[int], List[float, float]]): - stands for scores, labels and boxes respectively. + predictions (Tuple[Tensor, Tensor, Tensor, Tensor]): + stands for boxes, scores, labels and number of boxes respectively. """ assert image.shape == self.bindings["images"].shape, (image.shape, self.bindings["images"].shape) self.binding_addrs["images"] = int(image.data_ptr())