Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OTE][develop] Add tiling in rotated detection #1420

Merged
merged 10 commits into from
Dec 20, 2022
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 23 additions & 79 deletions external/mmdetection/detection_tasks/apis/detection/openvino_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,52 +76,16 @@
from ote_sdk.utils import Tiler
from ote_sdk.utils.detection_utils import detection2array
from ote_sdk.utils.vis_utils import get_actmap
from typing import Any, Dict, Optional, Tuple, Union, List
from typing import Any, Dict, Optional, Tuple, Union
from zipfile import ZipFile

from mmdet.utils.logger import get_root_logger
from .configuration import OTEDetectionConfig
from . import model_wrappers

from mmcv.ops import nms

logger = get_root_logger()


def multiclass_nms(
scores: np.ndarray,
labels: np.ndarray,
boxes: np.ndarray,
iou_threshold=0.45,
max_num=200,
):
""" Multi-class NMS

strategy: in order to perform NMS independently per class,
we add an offset to all the boxes. The offset is dependent
only on the class idx, and is large enough so that boxes
from different classes do not overlap

Args:
scores (np.ndarray): box scores
labels (np.ndarray): box label indices
boxes (np.ndarray): box coordinates
iou_threshold (float, optional): IoU threshold. Defaults to 0.45.
max_num (int, optional): Max number of objects filter. Defaults to 200.

Returns:
_type_: _description_
"""
max_coordinate = boxes.max()
offsets = labels.astype(boxes.dtype) * (max_coordinate + 1)
boxes_for_nms = boxes + offsets[:, None]
dets, keep = nms(boxes_for_nms, scores, iou_threshold)
if max_num > 0:
dets = dets[:max_num]
keep = keep[:max_num]
return dets, keep


class BaseInferencerWithConverter(BaseInferencer):
@check_input_parameters_type()
def __init__(
Expand Down Expand Up @@ -174,6 +138,28 @@ def predict(
def forward(self, inputs: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
return self.model.infer_sync(inputs)

@check_input_parameters_type()
def predict_tile(
self, image: np.ndarray, tile_size: int, overlap: float, max_number: int
) -> Tuple[AnnotationSceneEntity, np.ndarray, np.ndarray]:
""" Run prediction by tiling image to small patches

Args:
image (np.ndarray): input image
tile_size (int): tile crop size
overlap (float): overlap ratio between tiles
max_number (int): max number of predicted objects allowed

Returns:
detections: AnnotationSceneEntity
features: list including saliency map and feature vector
"""
segm = isinstance(self.converter, (MaskToAnnotationConverter, RotatedRectToAnnotationConverter))
tiler = Tiler(tile_size=tile_size, overlap=overlap, max_number=max_number, model=self.model, segm=segm)
detections, features = tiler.predict(image)
detections = self.converter.convert_to_annotation(detections, metadata={"original_shape": image.shape})
return detections, features


class OpenVINODetectionInferencer(BaseInferencerWithConverter):
@check_input_parameters_type()
Expand Down Expand Up @@ -227,27 +213,6 @@ def post_process(
detections = detection2array(detections)
return self.converter.convert_to_annotation(detections, metadata)

@check_input_parameters_type()
def predict_tile(
self, image: np.ndarray, tile_size: int, overlap: float, max_number: int
) -> Tuple[AnnotationSceneEntity, np.ndarray, np.ndarray]:
""" Run prediction by tiling image to small patches

Args:
image (np.ndarray): input image
tile_size (int): tile crop size
overlap (float): overlap ratio between tiles
max_number (int): max number of predicted objects allowed

Returns:
detections: AnnotationSceneEntity
features: list including saliency map and feature vector
"""
tiler = Tiler(tile_size=tile_size, overlap=overlap, max_number=max_number, model=self.model)
detections, features = tiler.predict(image)
detections = self.converter.convert_to_annotation(detections, metadata={"original_shape": image.shape})
return detections, features


class OpenVINOMaskInferencer(BaseInferencerWithConverter):
@check_input_parameters_type()
Expand Down Expand Up @@ -284,27 +249,6 @@ def __init__(

super().__init__(configuration, model, converter)

@check_input_parameters_type()
def predict_tile(
self, image: np.ndarray, tile_size: int, overlap: float, max_number: int
) -> Tuple[AnnotationSceneEntity, np.ndarray, np.ndarray]:
""" Run prediction by tiling image to small patches

Args:
image (np.ndarray): input image
tile_size (int): tile crop size
overlap (float): overlap ratio between tiles
max_number (int): max number of predicted objects allowed

Returns:
detections: AnnotationSceneEntity
features: list including saliency map and feature vector
"""
tiler = Tiler(tile_size=tile_size, overlap=overlap, max_number=max_number, model=self.model, segm=True)
detections, features = tiler.predict(image)
detections = self.converter.convert_to_annotation(detections, metadata={"original_shape": image.shape})
return detections, features


class OpenVINORotatedRectInferencer(BaseInferencerWithConverter):
@check_input_parameters_type()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
dataset_type = "CocoDataset"
img_size = (1024, 1024)

img_norm_cfg = dict(mean=(103.53, 116.28, 123.675), std=(1.0, 1.0, 1.0), to_rgb=False)
img_norm_cfg = dict(mean=(103.53, 116.28, 123.675), std=(1.0, 1.0, 1.0), to_rgb=True)

train_pipeline = [
dict(type="LoadImageFromFile"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.45, max_per_img=1500, filter_empty_gt=True
)

img_norm_cfg = dict(mean=(103.53, 116.28, 123.675), std=(1.0, 1.0, 1.0), to_rgb=False)
img_norm_cfg = dict(mean=(103.53, 116.28, 123.675), std=(1.0, 1.0, 1.0), to_rgb=True)

train_pipeline = [
dict(type="Resize", img_scale=img_size, keep_ratio=False),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -376,3 +376,95 @@ nncf_optimization:
warning: null
type: PARAMETER_GROUP
visible_in_ui: True
tiling_parameters:
header: Tiling
description: Crop dataset to tiles

enable_tiling:
header: Enable tiling
description: Set to True to allow tiny objects to be better detected.
default_value: false
editable: true
affects_outcome_of: TRAINING
type: BOOLEAN
ui_rules:
action: DISABLE_EDITING
operator: AND
rules: []
type: UI_RULES
value: true
visible_in_ui: true
warning: Tiling trades off speed for accuracy as it increases the number of images to be processed.

enable_adaptive_params:
header: Enable adaptive tiling parameters
description: Config tile size and tile overlap adaptively based on annotated dataset statistic
default_value: True
editable: true
affects_outcome_of: TRAINING
type: BOOLEAN
ui_rules:
action: DISABLE_EDITING
operator: AND
rules: []
type: UI_RULES
value: true
visible_in_ui: true
warning: null

tile_size:
header: Tile Image Size
description: Tile Image Size
affects_outcome_of: TRAINING
default_value: 400
min_value: 100
max_value: 1024
type: INTEGER
editable: true
ui_rules:
action: DISABLE_EDITING
operator: AND
rules: []
type: UI_RULES
value: 400
visible_in_ui: true
warning: null

tile_overlap:
header: Tile Overlap
description: Overlap between each two neighboring tiles.
affects_outcome_of: TRAINING
default_value: 0.2
min_value: 0.0
max_value: 1.0
type: FLOAT
editable: true
ui_rules:
action: DISABLE_EDITING
operator: AND
rules: []
type: UI_RULES
value: 0.2
visible_in_ui: true
warning: null

tile_max_number:
header: Max object per image
description: Max object per image
affects_outcome_of: TRAINING
default_value: 1500
min_value: 1
max_value: 10000
type: INTEGER
editable: true
ui_rules:
action: DISABLE_EDITING
operator: AND
rules: []
type: UI_RULES
value: 1500
visible_in_ui: true
warning: null

type: PARAMETER_GROUP
visible_in_ui: true
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
dataset_type = "CocoDataset"
img_size = (1024, 1024)

img_norm_cfg = dict(mean=(103.53, 116.28, 123.675), std=(1.0, 1.0, 1.0), to_rgb=False)
img_norm_cfg = dict(mean=(103.53, 116.28, 123.675), std=(1.0, 1.0, 1.0), to_rgb=True)

train_pipeline = [
dict(type="LoadImageFromFile"),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
dataset_type = "CocoDataset"

img_size = (1024, 1024)

tile_cfg = dict(
tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.45, max_per_img=1500, filter_empty_gt=True
)

img_norm_cfg = dict(mean=(103.53, 116.28, 123.675), std=(1.0, 1.0, 1.0), to_rgb=True)

train_pipeline = [
dict(type="Resize", img_scale=img_size, keep_ratio=False),
dict(type="RandomFlip", flip_ratio=0.5),
dict(type="Normalize", **img_norm_cfg),
dict(type="Pad", size_divisor=32),
dict(type="DefaultFormatBundle"),
dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels", "gt_masks"]),
]

test_pipeline = [
dict(
type="MultiScaleFlipAug",
img_scale=img_size,
flip=False,
transforms=[
dict(type="Resize", keep_ratio=False),
dict(type="RandomFlip"),
dict(type="Normalize", **img_norm_cfg),
dict(type="Pad", size_divisor=32),
dict(type="ImageToTensor", keys=["img"]),
dict(type="Collect", keys=["img"]),
],
)
]

__dataset_type = "CocoDataset"
__data_root = "data/coco/"

__samples_per_gpu = 4

train_dataset = dict(
type="ImageTilingDataset",
dataset=dict(
type=__dataset_type,
ann_file=__data_root + "annotations/instances_train.json",
img_prefix=__data_root + "images/train",
pipeline=[
dict(type="LoadImageFromFile"),
dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
],
),
pipeline=train_pipeline,
**tile_cfg
)

val_dataset = dict(
type="ImageTilingDataset",
dataset=dict(
type=__dataset_type,
ann_file=__data_root + "annotations/instances_val.json",
img_prefix=__data_root + "images/val",
test_mode=True,
pipeline=[dict(type="LoadImageFromFile")],
),
pipeline=test_pipeline,
**tile_cfg
)

test_dataset = dict(
type="ImageTilingDataset",
dataset=dict(
type=__dataset_type,
ann_file=__data_root + "annotations/instances_test.json",
img_prefix=__data_root + "images/test",
test_mode=True,
pipeline=[dict(type="LoadImageFromFile")],
),
pipeline=test_pipeline,
**tile_cfg
)


data = dict(
samples_per_gpu=__samples_per_gpu, workers_per_gpu=2, train=train_dataset, val=val_dataset, test=test_dataset
)
Loading