From 765bd00fbba20d82e10f4ca37e8f459a42a50558 Mon Sep 17 00:00:00 2001 From: Zhiqiang Wang Date: Fri, 4 Mar 2022 01:36:13 +0800 Subject: [PATCH] Add `Visualizer` for visualization (#341) * Add Visualizer from detectron2 * Refactor * Move display into cv2_imshow() * Use OpenCV to draw bounding boxes * Cleanup * Refactor the method in Visualizer * Fix docstring * Add metadata attribute in Visualizer * Fix Visualizer._create_text_labels() * Apply pre-commit * Fix Visualizer.overlay_instances() * Apply pre-commit * Cleanup * Add test_visualizer * Apply pre-commit * Add Visualizer.imshow() --- test/test_utils.py | 18 +- yolort/data/builtin_meta.py | 154 ++++++++++ yolort/utils/__init__.py | 2 + yolort/utils/builtin_meta.py | 578 ----------------------------------- yolort/utils/image_utils.py | 24 +- yolort/utils/visualizer.py | 297 ++++++++++++++++++ yolort/utils/yolo2coco.py | 6 +- 7 files changed, 487 insertions(+), 592 deletions(-) create mode 100644 yolort/data/builtin_meta.py delete mode 100644 yolort/utils/builtin_meta.py create mode 100644 yolort/utils/visualizer.py diff --git a/test/test_utils.py b/test/test_utils.py index ba5726d8..bfbacfcf 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -4,13 +4,15 @@ import pytest import torch from torch import Tensor +from torchvision.io import read_image from yolort import models from yolort.models import YOLO from yolort.utils import ( - FeatureExtractor, get_image_from_url, load_from_ultralytics, read_image_to_tensor, + FeatureExtractor, + Visualizer, ) from yolort.utils.image_utils import box_cxcywh_to_xyxy from yolort.v5 import ( @@ -22,6 +24,20 @@ ) +@pytest.mark.parametrize("arch", ["yolov5n"]) +def test_visualizer(arch): + model = models.__dict__[arch](pretrained=True, size=(320, 320), score_thresh=0.45) + model = model.eval() + img_path = "test/assets/zidane.jpg" + preds = model.predict(img_path) + + metalabels_path = "notebooks/assets/coco.names" + image = read_image(img_path) + v = Visualizer(image, metalabels=metalabels_path) + output = v.draw_instance_predictions(preds[0]) + assert isinstance(output, np.ndarray) + + @pytest.mark.parametrize( "arch, version, upstream_version, hash_prefix, use_p6", [ diff --git a/yolort/data/builtin_meta.py b/yolort/data/builtin_meta.py new file mode 100644 index 00000000..be2fc7ab --- /dev/null +++ b/yolort/data/builtin_meta.py @@ -0,0 +1,154 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + +""" +Note: +For your custom dataset, there is no need to hard-code metadata anywhere in the code. +For example, for COCO-format dataset, metadata will be obtained automatically +when calling `load_coco_json`. For other dataset, metadata may also be obtained in other ways +during loading. + +However, we hard-coded metadata for a few common dataset here. +The only goal is to allow users who don't have these dataset to use pre-trained models. +Users don't have to download a COCO json (which contains metadata), in order to visualize a +COCO model (with correct class names and colors). +""" + + +# All coco categories, together with their nice-looking visualization colors +# It's from https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json +COCO_CATEGORIES = [ + {"id": 1, "color": [220, 20, 60], "isthing": 1, "name": "person"}, + {"id": 2, "color": [119, 11, 32], "isthing": 1, "name": "bicycle"}, + {"id": 3, "color": [0, 0, 142], "isthing": 1, "name": "car"}, + {"id": 4, "color": [0, 0, 230], "isthing": 1, "name": "motorcycle"}, + {"id": 5, "color": [106, 0, 228], "isthing": 1, "name": "airplane"}, + {"id": 6, "color": [0, 60, 100], "isthing": 1, "name": "bus"}, + {"id": 7, "color": [0, 80, 100], "isthing": 1, "name": "train"}, + {"id": 8, "color": [0, 0, 70], "isthing": 1, "name": "truck"}, + {"id": 9, "color": [0, 0, 192], "isthing": 1, "name": "boat"}, + {"id": 10, "color": [250, 170, 30], "isthing": 1, "name": "traffic light"}, + {"id": 11, "color": [100, 170, 30], "isthing": 1, "name": "fire hydrant"}, + {"id": 13, "color": [220, 220, 0], "isthing": 1, "name": "stop sign"}, + {"id": 14, "color": [175, 116, 175], "isthing": 1, "name": "parking meter"}, + {"id": 15, "color": [250, 0, 30], "isthing": 1, "name": "bench"}, + {"id": 16, "color": [165, 42, 42], "isthing": 1, "name": "bird"}, + {"id": 17, "color": [255, 77, 255], "isthing": 1, "name": "cat"}, + {"id": 18, "color": [0, 226, 252], "isthing": 1, "name": "dog"}, + {"id": 19, "color": [182, 182, 255], "isthing": 1, "name": "horse"}, + {"id": 20, "color": [0, 82, 0], "isthing": 1, "name": "sheep"}, + {"id": 21, "color": [120, 166, 157], "isthing": 1, "name": "cow"}, + {"id": 22, "color": [110, 76, 0], "isthing": 1, "name": "elephant"}, + {"id": 23, "color": [174, 57, 255], "isthing": 1, "name": "bear"}, + {"id": 24, "color": [199, 100, 0], "isthing": 1, "name": "zebra"}, + {"id": 25, "color": [72, 0, 118], "isthing": 1, "name": "giraffe"}, + {"id": 27, "color": [255, 179, 240], "isthing": 1, "name": "backpack"}, + {"id": 28, "color": [0, 125, 92], "isthing": 1, "name": "umbrella"}, + {"id": 31, "color": [209, 0, 151], "isthing": 1, "name": "handbag"}, + {"id": 32, "color": [188, 208, 182], "isthing": 1, "name": "tie"}, + {"id": 33, "color": [0, 220, 176], "isthing": 1, "name": "suitcase"}, + {"id": 34, "color": [255, 99, 164], "isthing": 1, "name": "frisbee"}, + {"id": 35, "color": [92, 0, 73], "isthing": 1, "name": "skis"}, + {"id": 36, "color": [133, 129, 255], "isthing": 1, "name": "snowboard"}, + {"id": 37, "color": [78, 180, 255], "isthing": 1, "name": "sports ball"}, + {"id": 38, "color": [0, 228, 0], "isthing": 1, "name": "kite"}, + {"id": 39, "color": [174, 255, 243], "isthing": 1, "name": "baseball bat"}, + {"id": 40, "color": [45, 89, 255], "isthing": 1, "name": "baseball glove"}, + {"id": 41, "color": [134, 134, 103], "isthing": 1, "name": "skateboard"}, + {"id": 42, "color": [145, 148, 174], "isthing": 1, "name": "surfboard"}, + {"id": 43, "color": [255, 208, 186], "isthing": 1, "name": "tennis racket"}, + {"id": 44, "color": [197, 226, 255], "isthing": 1, "name": "bottle"}, + {"id": 46, "color": [171, 134, 1], "isthing": 1, "name": "wine glass"}, + {"id": 47, "color": [109, 63, 54], "isthing": 1, "name": "cup"}, + {"id": 48, "color": [207, 138, 255], "isthing": 1, "name": "fork"}, + {"id": 49, "color": [151, 0, 95], "isthing": 1, "name": "knife"}, + {"id": 50, "color": [9, 80, 61], "isthing": 1, "name": "spoon"}, + {"id": 51, "color": [84, 105, 51], "isthing": 1, "name": "bowl"}, + {"id": 52, "color": [74, 65, 105], "isthing": 1, "name": "banana"}, + {"id": 53, "color": [166, 196, 102], "isthing": 1, "name": "apple"}, + {"id": 54, "color": [208, 195, 210], "isthing": 1, "name": "sandwich"}, + {"id": 55, "color": [255, 109, 65], "isthing": 1, "name": "orange"}, + {"id": 56, "color": [0, 143, 149], "isthing": 1, "name": "broccoli"}, + {"id": 57, "color": [179, 0, 194], "isthing": 1, "name": "carrot"}, + {"id": 58, "color": [209, 99, 106], "isthing": 1, "name": "hot dog"}, + {"id": 59, "color": [5, 121, 0], "isthing": 1, "name": "pizza"}, + {"id": 60, "color": [227, 255, 205], "isthing": 1, "name": "donut"}, + {"id": 61, "color": [147, 186, 208], "isthing": 1, "name": "cake"}, + {"id": 62, "color": [153, 69, 1], "isthing": 1, "name": "chair"}, + {"id": 63, "color": [3, 95, 161], "isthing": 1, "name": "couch"}, + {"id": 64, "color": [163, 255, 0], "isthing": 1, "name": "potted plant"}, + {"id": 65, "color": [119, 0, 170], "isthing": 1, "name": "bed"}, + {"id": 67, "color": [0, 182, 199], "isthing": 1, "name": "dining table"}, + {"id": 70, "color": [0, 165, 120], "isthing": 1, "name": "toilet"}, + {"id": 72, "color": [183, 130, 88], "isthing": 1, "name": "tv"}, + {"id": 73, "color": [95, 32, 0], "isthing": 1, "name": "laptop"}, + {"id": 74, "color": [130, 114, 135], "isthing": 1, "name": "mouse"}, + {"id": 75, "color": [110, 129, 133], "isthing": 1, "name": "remote"}, + {"id": 76, "color": [166, 74, 118], "isthing": 1, "name": "keyboard"}, + {"id": 77, "color": [219, 142, 185], "isthing": 1, "name": "cell phone"}, + {"id": 78, "color": [79, 210, 114], "isthing": 1, "name": "microwave"}, + {"id": 79, "color": [178, 90, 62], "isthing": 1, "name": "oven"}, + {"id": 80, "color": [65, 70, 15], "isthing": 1, "name": "toaster"}, + {"id": 81, "color": [127, 167, 115], "isthing": 1, "name": "sink"}, + {"id": 82, "color": [59, 105, 106], "isthing": 1, "name": "refrigerator"}, + {"id": 84, "color": [142, 108, 45], "isthing": 1, "name": "book"}, + {"id": 85, "color": [196, 172, 0], "isthing": 1, "name": "clock"}, + {"id": 86, "color": [95, 54, 80], "isthing": 1, "name": "vase"}, + {"id": 87, "color": [128, 76, 255], "isthing": 1, "name": "scissors"}, + {"id": 88, "color": [201, 57, 1], "isthing": 1, "name": "teddy bear"}, + {"id": 89, "color": [246, 0, 122], "isthing": 1, "name": "hair drier"}, + {"id": 90, "color": [191, 162, 208], "isthing": 1, "name": "toothbrush"}, + {"id": 92, "color": [255, 255, 128], "isthing": 0, "name": "banner"}, + {"id": 93, "color": [147, 211, 203], "isthing": 0, "name": "blanket"}, + {"id": 95, "color": [150, 100, 100], "isthing": 0, "name": "bridge"}, + {"id": 100, "color": [168, 171, 172], "isthing": 0, "name": "cardboard"}, + {"id": 107, "color": [146, 112, 198], "isthing": 0, "name": "counter"}, + {"id": 109, "color": [210, 170, 100], "isthing": 0, "name": "curtain"}, + {"id": 112, "color": [92, 136, 89], "isthing": 0, "name": "door-stuff"}, + {"id": 118, "color": [218, 88, 184], "isthing": 0, "name": "floor-wood"}, + {"id": 119, "color": [241, 129, 0], "isthing": 0, "name": "flower"}, + {"id": 122, "color": [217, 17, 255], "isthing": 0, "name": "fruit"}, + {"id": 125, "color": [124, 74, 181], "isthing": 0, "name": "gravel"}, + {"id": 128, "color": [70, 70, 70], "isthing": 0, "name": "house"}, + {"id": 130, "color": [255, 228, 255], "isthing": 0, "name": "light"}, + {"id": 133, "color": [154, 208, 0], "isthing": 0, "name": "mirror-stuff"}, + {"id": 138, "color": [193, 0, 92], "isthing": 0, "name": "net"}, + {"id": 141, "color": [76, 91, 113], "isthing": 0, "name": "pillow"}, + {"id": 144, "color": [255, 180, 195], "isthing": 0, "name": "platform"}, + {"id": 145, "color": [106, 154, 176], "isthing": 0, "name": "playingfield"}, + {"id": 147, "color": [230, 150, 140], "isthing": 0, "name": "railroad"}, + {"id": 148, "color": [60, 143, 255], "isthing": 0, "name": "river"}, + {"id": 149, "color": [128, 64, 128], "isthing": 0, "name": "road"}, + {"id": 151, "color": [92, 82, 55], "isthing": 0, "name": "roof"}, + {"id": 154, "color": [254, 212, 124], "isthing": 0, "name": "sand"}, + {"id": 155, "color": [73, 77, 174], "isthing": 0, "name": "sea"}, + {"id": 156, "color": [255, 160, 98], "isthing": 0, "name": "shelf"}, + {"id": 159, "color": [255, 255, 255], "isthing": 0, "name": "snow"}, + {"id": 161, "color": [104, 84, 109], "isthing": 0, "name": "stairs"}, + {"id": 166, "color": [169, 164, 131], "isthing": 0, "name": "tent"}, + {"id": 168, "color": [225, 199, 255], "isthing": 0, "name": "towel"}, + {"id": 171, "color": [137, 54, 74], "isthing": 0, "name": "wall-brick"}, + {"id": 175, "color": [135, 158, 223], "isthing": 0, "name": "wall-stone"}, + {"id": 176, "color": [7, 246, 231], "isthing": 0, "name": "wall-tile"}, + {"id": 177, "color": [107, 255, 200], "isthing": 0, "name": "wall-wood"}, + {"id": 178, "color": [58, 41, 149], "isthing": 0, "name": "water-other"}, + {"id": 180, "color": [183, 121, 142], "isthing": 0, "name": "window-blind"}, + {"id": 181, "color": [255, 73, 97], "isthing": 0, "name": "window-other"}, + {"id": 184, "color": [107, 142, 35], "isthing": 0, "name": "tree-merged"}, + {"id": 185, "color": [190, 153, 153], "isthing": 0, "name": "fence-merged"}, + {"id": 186, "color": [146, 139, 141], "isthing": 0, "name": "ceiling-merged"}, + {"id": 187, "color": [70, 130, 180], "isthing": 0, "name": "sky-other-merged"}, + {"id": 188, "color": [134, 199, 156], "isthing": 0, "name": "cabinet-merged"}, + {"id": 189, "color": [209, 226, 140], "isthing": 0, "name": "table-merged"}, + {"id": 190, "color": [96, 36, 108], "isthing": 0, "name": "floor-other-merged"}, + {"id": 191, "color": [96, 96, 96], "isthing": 0, "name": "pavement-merged"}, + {"id": 192, "color": [64, 170, 64], "isthing": 0, "name": "mountain-merged"}, + {"id": 193, "color": [152, 251, 152], "isthing": 0, "name": "grass-merged"}, + {"id": 194, "color": [208, 229, 228], "isthing": 0, "name": "dirt-merged"}, + {"id": 195, "color": [206, 186, 171], "isthing": 0, "name": "paper-merged"}, + {"id": 196, "color": [152, 161, 64], "isthing": 0, "name": "food-other-merged"}, + {"id": 197, "color": [116, 112, 0], "isthing": 0, "name": "building-other-merged"}, + {"id": 198, "color": [0, 114, 143], "isthing": 0, "name": "rock-merged"}, + {"id": 199, "color": [102, 102, 156], "isthing": 0, "name": "wall-other-merged"}, + {"id": 200, "color": [250, 141, 255], "isthing": 0, "name": "rug-merged"}, +] diff --git a/yolort/utils/__init__.py b/yolort/utils/__init__.py index ecdaf098..ea8479ef 100644 --- a/yolort/utils/__init__.py +++ b/yolort/utils/__init__.py @@ -8,6 +8,7 @@ from .hooks import FeatureExtractor from .image_utils import cv2_imshow, get_image_from_url, read_image_to_tensor from .update_module_state import convert_yolov5_to_yolort, load_from_ultralytics +from .visualizer import Visualizer __all__ = [ @@ -19,6 +20,7 @@ "load_from_ultralytics", "load_state_dict_from_url", "read_image_to_tensor", + "Visualizer", ] diff --git a/yolort/utils/builtin_meta.py b/yolort/utils/builtin_meta.py deleted file mode 100644 index a55d8d10..00000000 --- a/yolort/utils/builtin_meta.py +++ /dev/null @@ -1,578 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -""" -Note: -For your custom dataset, there is no need to hard-code metadata anywhere in the code. -For example, for COCO-format dataset, metadata will be obtained automatically -when calling `load_coco_json`. For other dataset, metadata may also be obtained in other ways -during loading. -However, we hard-coded metadata for a few common dataset here. -The only goal is to allow users who don't have these dataset to use pre-trained models. -Users don't have to download a COCO json (which contains metadata), in order to visualize a -COCO model (with correct class names and colors). -""" - - -COCO_CATEGORIES = [ - { - "id": 1, - "color": [220, 20, 60], - "isthing": 1, - "name": "person", - "supercategory": "person", - }, - { - "id": 2, - "color": [119, 11, 32], - "isthing": 1, - "name": "bicycle", - "supercategory": "vehicle", - }, - { - "id": 3, - "color": [0, 0, 142], - "isthing": 1, - "name": "car", - "supercategory": "vehicle", - }, - { - "id": 4, - "color": [0, 0, 230], - "isthing": 1, - "name": "motorcycle", - "supercategory": "vehicle", - }, - { - "id": 5, - "color": [106, 0, 228], - "isthing": 1, - "name": "airplane", - "supercategory": "vehicle", - }, - { - "id": 6, - "color": [0, 60, 100], - "isthing": 1, - "name": "bus", - "supercategory": "vehicle", - }, - { - "id": 7, - "color": [0, 80, 100], - "isthing": 1, - "name": "train", - "supercategory": "vehicle", - }, - { - "id": 8, - "color": [0, 0, 70], - "isthing": 1, - "name": "truck", - "supercategory": "vehicle", - }, - { - "id": 9, - "color": [0, 0, 192], - "isthing": 1, - "name": "boat", - "supercategory": "vehicle", - }, - { - "id": 10, - "color": [250, 170, 30], - "isthing": 1, - "name": "traffic light", - "supercategory": "outdoor", - }, - { - "id": 11, - "color": [100, 170, 30], - "isthing": 1, - "name": "fire hydrant", - "supercategory": "outdoor", - }, - { - "id": 13, - "color": [220, 220, 0], - "isthing": 1, - "name": "stop sign", - "supercategory": "outdoor", - }, - { - "id": 14, - "color": [175, 116, 175], - "isthing": 1, - "name": "parking meter", - "supercategory": "outdoor", - }, - { - "id": 15, - "color": [250, 0, 30], - "isthing": 1, - "name": "bench", - "supercategory": "outdoor", - }, - { - "id": 16, - "color": [165, 42, 42], - "isthing": 1, - "name": "bird", - "supercategory": "animal", - }, - { - "id": 17, - "color": [255, 77, 255], - "isthing": 1, - "name": "cat", - "supercategory": "animal", - }, - { - "id": 18, - "color": [0, 226, 252], - "isthing": 1, - "name": "dog", - "supercategory": "animal", - }, - { - "id": 19, - "color": [182, 182, 255], - "isthing": 1, - "name": "horse", - "supercategory": "animal", - }, - { - "id": 20, - "color": [0, 82, 0], - "isthing": 1, - "name": "sheep", - "supercategory": "animal", - }, - { - "id": 21, - "color": [120, 166, 157], - "isthing": 1, - "name": "cow", - "supercategory": "animal", - }, - { - "id": 22, - "color": [110, 76, 0], - "isthing": 1, - "name": "elephant", - "supercategory": "animal", - }, - { - "id": 23, - "color": [174, 57, 255], - "isthing": 1, - "name": "bear", - "supercategory": "animal", - }, - { - "id": 24, - "color": [199, 100, 0], - "isthing": 1, - "name": "zebra", - "supercategory": "animal", - }, - { - "id": 25, - "color": [72, 0, 118], - "isthing": 1, - "name": "giraffe", - "supercategory": "animal", - }, - { - "id": 27, - "color": [255, 179, 240], - "isthing": 1, - "name": "backpack", - "supercategory": "accessory", - }, - { - "id": 28, - "color": [0, 125, 92], - "isthing": 1, - "name": "umbrella", - "supercategory": "accessory", - }, - { - "id": 31, - "color": [209, 0, 151], - "isthing": 1, - "name": "handbag", - "supercategory": "accessory", - }, - { - "id": 32, - "color": [188, 208, 182], - "isthing": 1, - "name": "tie", - "supercategory": "accessory", - }, - { - "id": 33, - "color": [0, 220, 176], - "isthing": 1, - "name": "suitcase", - "supercategory": "accessory", - }, - { - "id": 34, - "color": [255, 99, 164], - "isthing": 1, - "name": "frisbee", - "supercategory": "sports", - }, - { - "id": 35, - "color": [92, 0, 73], - "isthing": 1, - "name": "skis", - "supercategory": "sports", - }, - { - "id": 36, - "color": [133, 129, 255], - "isthing": 1, - "name": "snowboard", - "supercategory": "sports", - }, - { - "id": 37, - "color": [78, 180, 255], - "isthing": 1, - "name": "sports ball", - "supercategory": "sports", - }, - { - "id": 38, - "color": [0, 228, 0], - "isthing": 1, - "name": "kite", - "supercategory": "sports", - }, - { - "id": 39, - "color": [174, 255, 243], - "isthing": 1, - "name": "baseball bat", - "supercategory": "sports", - }, - { - "id": 40, - "color": [45, 89, 255], - "isthing": 1, - "name": "baseball glove", - "supercategory": "sports", - }, - { - "id": 41, - "color": [134, 134, 103], - "isthing": 1, - "name": "skateboard", - "supercategory": "sports", - }, - { - "id": 42, - "color": [145, 148, 174], - "isthing": 1, - "name": "surfboard", - "supercategory": "sports", - }, - { - "id": 43, - "color": [255, 208, 186], - "isthing": 1, - "name": "tennis racket", - "supercategory": "sports", - }, - { - "id": 44, - "color": [197, 226, 255], - "isthing": 1, - "name": "bottle", - "supercategory": "kitchen", - }, - { - "id": 46, - "color": [171, 134, 1], - "isthing": 1, - "name": "wine glass", - "supercategory": "kitchen", - }, - { - "id": 47, - "color": [109, 63, 54], - "isthing": 1, - "name": "cup", - "supercategory": "kitchen", - }, - { - "id": 48, - "color": [207, 138, 255], - "isthing": 1, - "name": "fork", - "supercategory": "kitchen", - }, - { - "id": 49, - "color": [151, 0, 95], - "isthing": 1, - "name": "knife", - "supercategory": "kitchen", - }, - { - "id": 50, - "color": [9, 80, 61], - "isthing": 1, - "name": "spoon", - "supercategory": "kitchen", - }, - { - "id": 51, - "color": [84, 105, 51], - "isthing": 1, - "name": "bowl", - "supercategory": "kitchen", - }, - { - "id": 52, - "color": [74, 65, 105], - "isthing": 1, - "name": "banana", - "supercategory": "food", - }, - { - "id": 53, - "color": [166, 196, 102], - "isthing": 1, - "name": "apple", - "supercategory": "food", - }, - { - "id": 54, - "color": [208, 195, 210], - "isthing": 1, - "name": "sandwich", - "supercategory": "food", - }, - { - "id": 55, - "color": [255, 109, 65], - "isthing": 1, - "name": "orange", - "supercategory": "food", - }, - { - "id": 56, - "color": [0, 143, 149], - "isthing": 1, - "name": "broccoli", - "supercategory": "food", - }, - { - "id": 57, - "color": [179, 0, 194], - "isthing": 1, - "name": "carrot", - "supercategory": "food", - }, - { - "id": 58, - "color": [209, 99, 106], - "isthing": 1, - "name": "hot dog", - "supercategory": "food", - }, - { - "id": 59, - "color": [5, 121, 0], - "isthing": 1, - "name": "pizza", - "supercategory": "food", - }, - { - "id": 60, - "color": [227, 255, 205], - "isthing": 1, - "name": "donut", - "supercategory": "food", - }, - { - "id": 61, - "color": [147, 186, 208], - "isthing": 1, - "name": "cake", - "supercategory": "food", - }, - { - "id": 62, - "color": [153, 69, 1], - "isthing": 1, - "name": "chair", - "supercategory": "furniture", - }, - { - "id": 63, - "color": [3, 95, 161], - "isthing": 1, - "name": "couch", - "supercategory": "furniture", - }, - { - "id": 64, - "color": [163, 255, 0], - "isthing": 1, - "name": "potted plant", - "supercategory": "furniture", - }, - { - "id": 65, - "color": [119, 0, 170], - "isthing": 1, - "name": "bed", - "supercategory": "furniture", - }, - { - "id": 67, - "color": [0, 182, 199], - "isthing": 1, - "name": "dining table", - "supercategory": "furniture", - }, - { - "id": 70, - "color": [0, 165, 120], - "isthing": 1, - "name": "toilet", - "supercategory": "furniture", - }, - { - "id": 72, - "color": [183, 130, 88], - "isthing": 1, - "name": "tv", - "supercategory": "electronic", - }, - { - "id": 73, - "color": [95, 32, 0], - "isthing": 1, - "name": "laptop", - "supercategory": "electronic", - }, - { - "id": 74, - "color": [130, 114, 135], - "isthing": 1, - "name": "mouse", - "supercategory": "electronic", - }, - { - "id": 75, - "color": [110, 129, 133], - "isthing": 1, - "name": "remote", - "supercategory": "electronic", - }, - { - "id": 76, - "color": [166, 74, 118], - "isthing": 1, - "name": "keyboard", - "supercategory": "electronic", - }, - { - "id": 77, - "color": [219, 142, 185], - "isthing": 1, - "name": "cell phone", - "supercategory": "electronic", - }, - { - "id": 78, - "color": [79, 210, 114], - "isthing": 1, - "name": "microwave", - "supercategory": "appliance", - }, - { - "id": 79, - "color": [178, 90, 62], - "isthing": 1, - "name": "oven", - "supercategory": "appliance", - }, - { - "id": 80, - "color": [65, 70, 15], - "isthing": 1, - "name": "toaster", - "supercategory": "appliance", - }, - { - "id": 81, - "color": [127, 167, 115], - "isthing": 1, - "name": "sink", - "supercategory": "appliance", - }, - { - "id": 82, - "color": [59, 105, 106], - "isthing": 1, - "name": "refrigerator", - "supercategory": "appliance", - }, - { - "id": 84, - "color": [142, 108, 45], - "isthing": 1, - "name": "book", - "supercategory": "indoor", - }, - { - "id": 85, - "color": [196, 172, 0], - "isthing": 1, - "name": "clock", - "supercategory": "indoor", - }, - { - "id": 86, - "color": [95, 54, 80], - "isthing": 1, - "name": "vase", - "supercategory": "indoor", - }, - { - "id": 87, - "color": [128, 76, 255], - "isthing": 1, - "name": "scissors", - "supercategory": "indoor", - }, - { - "id": 88, - "color": [201, 57, 1], - "isthing": 1, - "name": "teddy bear", - "supercategory": "indoor", - }, - { - "id": 89, - "color": [246, 0, 122], - "isthing": 1, - "name": "hair drier", - "supercategory": "indoor", - }, - { - "id": 90, - "color": [191, 162, 208], - "isthing": 1, - "name": "toothbrush", - "supercategory": "indoor", - }, -] diff --git a/yolort/utils/image_utils.py b/yolort/utils/image_utils.py index 90d7b99f..2f0d5308 100644 --- a/yolort/utils/image_utils.py +++ b/yolort/utils/image_utils.py @@ -1,3 +1,5 @@ +# Copyright (c) 2020, yolort team. All rights reserved. + import logging from io import BytesIO from pathlib import Path @@ -7,7 +9,6 @@ import numpy as np import requests import torch -from IPython.display import display from PIL import Image from torch import Tensor from torchvision.ops.boxes import box_convert @@ -48,7 +49,7 @@ def plot_one_box(box, img, color=None, label=None, line_thickness=None): def cv2_imshow( - image: np.ndarray, + img_bgr: np.ndarray, imshow_scale: Optional[float] = None, convert_bgr_to_rgb: bool = True, ) -> None: @@ -56,23 +57,26 @@ def cv2_imshow( A replacement of cv2.imshow() for using in Jupyter notebooks. Args: - image (np.ndarray):. shape (N, M) or (N, M, 1) is an NxM grayscale image. shape (N, M, 3) + img_bgr (np.ndarray):. shape (N, M) or (N, M, 1) is an NxM grayscale image. shape (N, M, 3) is an NxM BGR color image. shape (N, M, 4) is an NxM BGRA color image. imshow_scale (Optional[float]): zoom ratio to show the image convert_bgr_to_rgb (bool): switch to convert BGR to RGB channel. """ - image = image.clip(0, 255).astype("uint8") + + from IPython.display import display + + img_bgr = img_bgr.clip(0, 255).astype("uint8") # cv2 stores colors as BGR; convert to RGB - if convert_bgr_to_rgb and image.ndim == 3: - if image.shape[2] == 4: - image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA) + if convert_bgr_to_rgb and img_bgr.ndim == 3: + if img_bgr.shape[2] == 4: + img_bgr = cv2.cvtColor(img_bgr, cv2.COLOR_BGRA2RGBA) else: - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + img_bgr = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) if imshow_scale is not None: - image = cv2.resize(image, None, fx=imshow_scale, fy=imshow_scale) + img_bgr = cv2.resize(img_bgr, None, fx=imshow_scale, fy=imshow_scale) - display(Image.fromarray(image)) + display(Image.fromarray(img_bgr)) def color_list(): diff --git a/yolort/utils/visualizer.py b/yolort/utils/visualizer.py new file mode 100644 index 00000000..ba4d3f85 --- /dev/null +++ b/yolort/utils/visualizer.py @@ -0,0 +1,297 @@ +# Copyright (c) 2022, yolort team. All rights reserved. + +from typing import Dict, List, Optional, Tuple, Union + +import numpy as np +import torch +from PIL import Image +from torch import Tensor +from yolort.v5.utils.plots import Colors + +try: + import cv2 +except ImportError: + cv2 = None + + +class Visualizer: + """ + Visualizer that draws data about detection on images. + + It contains methods like `draw_{text,box}` that draw primitive objects to images, as well as + high-level wrappers like `draw_{instance_predictions,dataset_dict}` that draw composite data + in some pre-defined style. + + This visualizer focuses on high rendering quality rather than performance. It is not + designed to be used for real-time applications. + + Reference: + We have followed most of the interfaces of detectron2 here, but the implementation will be + a bit different. Check out the following for more details. + https://github.com/facebookresearch/detectron2/blob/9258799/detectron2/utils/visualizer.py + + Args: + image (torch.Tensor or numpy.ndarray): Tensor of shape (C x H x W) or ndarray of + shape (H x W x C) with dtype uint8. + metalabels (string, optional): Concrete label names of different classes. Default: None + instance_mode (int, optional): defines one of the pre-defined style for drawing + instances on an image. Default: None + """ + + def __init__(self, image: Union[Tensor, np.ndarray], *, metalabels: Optional[str] = None) -> None: + + if isinstance(image, torch.Tensor): + if image.dtype != torch.uint8: + raise ValueError(f"Tensor uint8 expected, got {image.dtype}") + if image.dim() != 3: + raise ValueError("Pass individual images, not batches") + if image.size(0) not in {1, 3}: + raise ValueError("Only grayscale and RGB images are supported") + # Handle Grayscale images + if image.size(0) == 1: + image = torch.tile(image, (3, 1, 1)) + self.img = image.permute(1, 2, 0).cpu().numpy() + self.is_bgr = False + elif isinstance(image, np.ndarray): + if image.dtype != np.uint8: + raise ValueError(f"Numpy uint8 expected, got {image.dtype}") + if image.ndim != 3: + raise ValueError("Currently only BGR images are supported") + self.img = image + self.is_bgr = True + else: + raise TypeError(f"Tensor or numpy.ndarray expected, got {type(image)}") + + # Set dataset metadata (e.g. class names) + self.metadata = None + if metalabels is not None: + self.metadata = np.loadtxt(metalabels, dtype="str", delimiter="\n") + + self.line_width = max(round(sum(self.img.shape) / 2 * 0.003), 2) + self.assigned_colors = Colors() + self.output = self.img + + def draw_instance_predictions(self, predictions: Dict[str, Tensor]): + """ + Draw instance-level prediction results on an image. + + Args: + predictions (dict): the output of an instance detection model. Following + fields will be used to draw: "boxes", "labels", "scores". + + Returns: + np.ndarray: image object with visualizations. + """ + + if cv2 is None: + raise ImportError("OpenCV is not installed, please install it first.") + + boxes = self._convert_boxes(predictions["boxes"]) + labels = predictions["labels"].tolist() + colors = self._create_colors(labels) + scores = predictions["scores"].tolist() + labels = self._create_text_labels(labels, scores) + + self.overlay_instances(boxes=boxes, labels=labels, colors=colors) + return self.output + + def imshow(self, scale: Optional[float] = None): + """ + A replacement of cv2.imshow() for using in Jupyter notebooks. + + Args: + scale (float, optional): zoom ratio to show the image. Default: None + """ + from IPython.display import display + + img = self.output + + img = img.clip(0, 255).astype("uint8") + # cv2 stores colors as BGR; convert to RGB + if self.is_bgr and img.ndim == 3: + if img.shape[2] == 4: + img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA) + else: + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + if scale is not None: + img = cv2.resize(img, None, fx=scale, fy=scale) + + display(Image.fromarray(img)) + + def overlay_instances( + self, + boxes: np.ndarray, + labels: List[str], + colors: List[Tuple[int, int, int]], + ) -> np.ndarray: + """ + Overlay bounding boxes and labels on input image. + + Args: + boxes (np.ndarray): Numpy array of size (N, 4) containing bounding boxes + in (xmin, ymin, xmax, ymax) format for the N objects in a single image. + Note that the boxes are absolute coordinates with respect to the image. + In other words: `0 <= xmin < xmax < W` and `0 <= ymin < ymax < H`. + labels (List[string]): List containing the text to be displayed for each + instance. + colors (List[Tuple[int, int, int]]): List containing the color of the label + to be painted. + + Returns: + np.ndarray: image object with visualizations. + """ + + num_instances = len(boxes) + assert len(labels) == num_instances + if num_instances == 0: + return self.output + + # Display in largest to smallest order to reduce occlusion. + areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1) + + sorted_idxs = np.argsort(-areas).tolist() + # Re-order overlapped instances in descending order. + boxes = boxes[sorted_idxs] if boxes is not None else None + labels = [labels[k] for k in sorted_idxs] if labels is not None else None + colors = [colors[k] for k in sorted_idxs] if colors is not None else None + + for box, label, color in zip(boxes, labels, colors): + pt1, pt2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) + self.draw_box(pt1, pt2, color=color) + + lighter_color = self._change_color_brightness(color, brightness_factor=0.7) + self.draw_text(label, pt1, pt2, color=lighter_color) + + return self.output + + def draw_box( + self, + pt1: Tuple[int, int], + pt2: Tuple[int, int], + color: Tuple[int, int, int] = (229, 160, 21), + ) -> np.ndarray: + """ + Draws bounding boxes on given image. + The values of the input image should be uint8 between 0 and 255. + + Args: + pt1 (Tuple[int, int]): Vertex of the rectangle (top left corner). + pt2 (Tuple[int, int]): Vertex of the rectangle opposite to pt1 (bottom right corner). + color (Tuple[int, int, int]): color of the outline of the box. + + Returns: + np.ndarray: image object with box drawn. + """ + cv2.rectangle(self.output, pt1, pt2, color, thickness=self.line_width, lineType=cv2.LINE_AA) + return self.output + + def draw_text( + self, + text: str, + pt1: Tuple[int, int], + pt2: Tuple[int, int], + *, + font_size: Optional[int] = None, + color: Tuple[int, int, int] = (229, 160, 21), + txt_color: Tuple[int, int, int] = (255, 255, 255), + ): + """ + Draws text on given image. + + Args: + text (string): class label + pt1 (Tuple[int, int]): Vertex of the rectangle (top left corner). + pt2 (Tuple[int, int]): Vertex of the rectangle opposite to pt1 (bottom right corner). + font_size (int, optional): font of the text. If not provided, a font size + proportional to the image width is calculated and used. Default: None + color (Tuple[int, int, int]): color of the filled text. + txt_color (Tuple[int, int, int]): color of the text. + + Returns: + np.ndarray: image object with text drawn. + """ + + if font_size is None: + font_size = max(self.line_width - 1, 1) # font thickness + w, h = cv2.getTextSize(text, 0, fontScale=self.line_width / 3, thickness=font_size)[0] + outside = pt1[1] - h - 3 >= 0 # text fits outside box + pt2 = pt1[0] + w, pt1[1] - h - 3 if outside else pt1[1] + h + 3 + cv2.rectangle(self.output, pt1, pt2, color, -1, cv2.LINE_AA) # filled + cv2.putText( + self.output, + text, + (pt1[0], pt1[1] - 2 if outside else pt1[1] + h + 2), + 0, + self.line_width / 3, + txt_color, + thickness=font_size, + lineType=cv2.LINE_AA, + ) + return self.output + + def _convert_boxes(self, boxes: Union[Tensor, np.ndarray]): + """ + Convert different format of boxes to an Nx4 array. + """ + if isinstance(boxes, Tensor): + return boxes.cpu().detach().numpy() + else: + return boxes + + def _create_text_labels( + self, + classes: Optional[List[int]] = None, + scores: Optional[List[float]] = None, + is_crowd: Optional[List[bool]] = None, + ): + """ + Generate labels that classes and scores can match, and set class back to its original + name if concrete class names are provided. + """ + labels = None + if classes is not None: + if self.metadata is not None and len(self.metadata) > 0: + labels = [self.metadata[i] for i in classes] + else: + labels = [str(i) for i in classes] + if scores is not None: + if labels is None: + labels = [f"{score * 100:.0f}%" for score in scores] + else: + labels = [f"{label} {score * 100:.0f}%" for label, score in zip(labels, scores)] + if labels is not None and is_crowd is not None: + labels = [label + ("|crowd" if crowd else "") for label, crowd in zip(labels, is_crowd)] + return labels + + def _create_colors(self, labels: Optional[List[int]] = None): + """ + Generate colors that match the labels. + """ + colors = None + if labels is not None: + colors = [self.assigned_colors(label, bgr=self.is_bgr) for label in labels] + return colors + + def _change_color_brightness( + self, + color: Tuple[int, int, int], + brightness_factor: float, + ) -> Tuple[int, int, int]: + """ + Depending on the brightness_factor, gives a lighter or darker color i.e. a color with + less or more saturation than the original color. + + Args: + color (Tuple[int, int, int]): color of the polygon. + brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of + 0 will correspond to no change, a factor in [-1.0, 0) range will result in + a darker color and a factor in (0, 1.0] range will result in a lighter color. + + Returns: + modified_color (Tuple[int, int, int]): a tuple containing the RGB/BGR values of the + modified color. + """ + assert brightness_factor >= -1.0 and brightness_factor <= 1.0 + # TODO: Implement the details in a follow-up PR + return color diff --git a/yolort/utils/yolo2coco.py b/yolort/utils/yolo2coco.py index c9896e9c..df7ee045 100644 --- a/yolort/utils/yolo2coco.py +++ b/yolort/utils/yolo2coco.py @@ -1,11 +1,11 @@ -# Copyright (c) 2020, Zhiqiang Wang. All Rights Reserved. +# Copyright (c) 2020, yolort team. All Rights Reserved. + import argparse import json from pathlib import Path from PIL import Image - -from .builtin_meta import COCO_CATEGORIES +from yolort.data.builtin_meta import COCO_CATEGORIES class YOLO2COCO: