From 986d4eb1637afac5bebd0a6d88da82b848869900 Mon Sep 17 00:00:00 2001 From: erenzhou <592267829@qq.com> Date: Tue, 1 Mar 2022 16:02:53 +0800 Subject: [PATCH 1/4] support icdar dataset --- configs/_base_/datasets/icdar.py | 47 ++++ .../s2anet_r50_fpn_1x_icdar2015_le135.py | 132 +++++++++++ mmrotate/datasets/__init__.py | 3 +- mmrotate/datasets/icdar.py | 91 ++++++++ requirements/runtime.txt | 1 + setup.cfg | 2 +- tools/data/dota/split/img_split.py | 3 - tools/data/icdar/README.md | 47 ++++ tools/data/icdar/icdar_converter.py | 220 ++++++++++++++++++ 9 files changed, 541 insertions(+), 5 deletions(-) create mode 100644 configs/_base_/datasets/icdar.py create mode 100644 configs/s2anet/s2anet_r50_fpn_1x_icdar2015_le135.py create mode 100644 mmrotate/datasets/icdar.py create mode 100644 tools/data/icdar/README.md create mode 100644 tools/data/icdar/icdar_converter.py diff --git a/configs/_base_/datasets/icdar.py b/configs/_base_/datasets/icdar.py new file mode 100644 index 000000000..c44d91eb8 --- /dev/null +++ b/configs/_base_/datasets/icdar.py @@ -0,0 +1,47 @@ +# dataset settings +dataset_type = 'IcdarDataset' +data_root = '../datasets/icdar/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='RResize', img_scale=(1024, 1024)), + dict(type='RRandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1024, 1024), + flip=False, + transforms=[ + dict(type='RResize'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=f'{data_root}/instances_training.json', + img_prefix=f'{data_root}/imgs', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=f'{data_root}/instances_test.json', + img_prefix=f'{data_root}/imgs', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=f'{data_root}/instances_test.json', + img_prefix=f'{data_root}/imgs', + pipeline=test_pipeline)) diff --git a/configs/s2anet/s2anet_r50_fpn_1x_icdar2015_le135.py b/configs/s2anet/s2anet_r50_fpn_1x_icdar2015_le135.py new file mode 100644 index 000000000..f6ed55411 --- /dev/null +++ b/configs/s2anet/s2anet_r50_fpn_1x_icdar2015_le135.py @@ -0,0 +1,132 @@ +_base_ = [ + '../_base_/datasets/icdar.py', '../_base_/schedules/schedule_1x.py', + '../_base_/default_runtime.py' +] + +angle_version = 'le135' +model = dict( + type='S2ANet', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + zero_init_residual=False, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_input', + num_outs=5), + fam_head=dict( + type='RotatedRetinaHead', + num_classes=15, + in_channels=256, + stacked_convs=2, + feat_channels=256, + assign_by_circumhbbox=None, + anchor_generator=dict( + type='RotatedAnchorGenerator', + scales=[4], + ratios=[1.0], + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHAOBBoxCoder', + angle_range=angle_version, + norm_factor=1, + edge_swap=False, + proj_xy=True, + target_means=(.0, .0, .0, .0, .0), + target_stds=(1.0, 1.0, 1.0, 1.0, 1.0)), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)), + align_cfgs=dict( + type='AlignConv', + kernel_size=3, + channels=256, + featmap_strides=[8, 16, 32, 64, 128]), + odm_head=dict( + type='ODMRefineHead', + num_classes=15, + in_channels=256, + stacked_convs=2, + feat_channels=256, + assign_by_circumhbbox=None, + anchor_generator=dict( + type='PseudoAnchorGenerator', strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHAOBBoxCoder', + angle_range=angle_version, + norm_factor=1, + edge_swap=False, + proj_xy=True, + target_means=(0.0, 0.0, 0.0, 0.0, 0.0), + target_stds=(1.0, 1.0, 1.0, 1.0, 1.0)), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)), + train_cfg=dict( + fam_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1, + iou_calculator=dict(type='RBboxOverlaps2D')), + allowed_border=-1, + pos_weight=-1, + debug=False), + odm_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1, + iou_calculator=dict(type='RBboxOverlaps2D')), + allowed_border=-1, + pos_weight=-1, + debug=False)), + test_cfg=dict( + nms_pre=2000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(iou_thr=0.1), + max_per_img=2000)) + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='RResize', img_scale=(1024, 1024)), + dict( + type='RRandomFlip', + flip_ratio=[0.25, 0.25, 0.25], + direction=['horizontal', 'vertical', 'diagonal'], + version=angle_version), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +data = dict( + train=dict(pipeline=train_pipeline, version=angle_version), + val=dict(version=angle_version), + test=dict(version=angle_version)) diff --git a/mmrotate/datasets/__init__.py b/mmrotate/datasets/__init__.py index 559c42c34..4f9add15a 100644 --- a/mmrotate/datasets/__init__.py +++ b/mmrotate/datasets/__init__.py @@ -1,7 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. from .builder import build_dataset # noqa: F401, F403 from .dota import DOTADataset # noqa: F401, F403 +from .icdar import IcdarDataset # noqa: F401, F403 from .pipelines import * # noqa: F401, F403 from .sar import SARDataset # noqa: F401, F403 -__all__ = ['SARDataset', 'DOTADataset', 'build_dataset'] +__all__ = ['SARDataset', 'DOTADataset', 'IcdarDataset', 'build_dataset'] diff --git a/mmrotate/datasets/icdar.py b/mmrotate/datasets/icdar.py new file mode 100644 index 000000000..0c1695c69 --- /dev/null +++ b/mmrotate/datasets/icdar.py @@ -0,0 +1,91 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np +from mmdet.datasets.api_wrappers import COCO + +from mmrotate.core import poly2obb_np +from .builder import ROTATED_DATASETS +from .dota import DOTADataset + + +@ROTATED_DATASETS.register_module() +class IcdarDataset(DOTADataset): + """ICDAR text dataset for rotated object detection (Support ICDAR2015 and + ICDAR2017).""" + CLASSES = ('text', ) + + def __init__(self, + ann_file, + pipeline, + version='oc', + difficulty=100, + select_first_k=-1, + **kwargs): + self.version = version + self.select_first_k = select_first_k + + super(DOTADataset, self).__init__(ann_file, pipeline, version, + difficulty, **kwargs) + + def load_annotations(self, ann_file): + """Load annotation from COCO style annotation file. + + Args: + ann_file (str): Path of annotation file. + Returns: + list[dict]: Annotation info. + """ + self.coco = COCO(ann_file) + self.cat_ids = self.coco.get_cat_ids(cat_names=self.CLASSES) + self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)} + self.img_ids = self.coco.get_img_ids() + data_infos = [] + + count = 0 + for i in self.img_ids: + data_info = {} + info = self.coco.load_imgs([i])[0] + data_info['filename'] = info['file_name'] + data_info['ann'] = {} + img_id = info['id'] + ann_ids = self.coco.get_ann_ids(img_ids=[img_id]) + ann_info = self.coco.load_anns(ann_ids) + gt_bboxes = [] + gt_labels = [] + gt_polygons = [] + for ann in ann_info: + if ann.get('ignore', False): + continue + x1, y1, w, h = ann['bbox'] + if ann['area'] <= 0 or w < 1 or h < 1: + continue + if ann['category_id'] not in self.cat_ids: + continue + try: + x, y, w, h, a = poly2obb_np(ann['segmentation'], + self.version) + except: # noqa: E722 + continue + gt_bboxes.append([x, y, w, h, a]) + gt_labels.append(ann['category_id']) + gt_polygons.append(ann['segmentation']) + + if gt_bboxes: + data_info['ann']['bboxes'] = np.array( + gt_bboxes, dtype=np.float32) + data_info['ann']['labels'] = np.array( + gt_labels, dtype=np.int64) + data_info['ann']['polygons'] = np.array( + gt_polygons, dtype=np.float32) + else: + data_info['ann']['bboxes'] = np.zeros((0, 5), + dtype=np.float32) + data_info['ann']['labels'] = np.array([], dtype=np.int64) + data_info['ann']['polygons'] = np.zeros((0, 8), + dtype=np.float32) + + data_infos.append(data_info) + count = count + 1 + if count > self.select_first_k and self.select_first_k > 0: + break + + return data_infos diff --git a/requirements/runtime.txt b/requirements/runtime.txt index 5f73aec4f..0aa34c0fa 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -5,6 +5,7 @@ mmdet numpy pycocotools; platform_system == "Linux" pycocotools-windows; platform_system == "Windows" +shapely six terminaltables torch diff --git a/setup.cfg b/setup.cfg index ea404076c..8c655d294 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,7 +3,7 @@ line_length = 79 multi_line_output = 0 known_standard_library = setuptools known_first_party = mmrotate -known_third_party = PIL,cv2,e2cnn,matplotlib,mmcv,mmdet,numpy,pytest,pytorch_sphinx_theme,terminaltables,torch +known_third_party = PIL,cv2,e2cnn,matplotlib,mmcv,mmdet,numpy,pytest,pytorch_sphinx_theme,shapely,terminaltables,torch no_lines_before = STDLIB,LOCALFOLDER default_section = THIRDPARTY diff --git a/tools/data/dota/split/img_split.py b/tools/data/dota/split/img_split.py index f535652f0..baf58b798 100644 --- a/tools/data/dota/split/img_split.py +++ b/tools/data/dota/split/img_split.py @@ -235,9 +235,6 @@ def bbox_overlaps_iof(bboxes1, bboxes2, eps=1e-6): l, t, r, b = [bboxes2[..., i] for i in range(4)] polys2 = np.stack([l, t, r, t, r, b, l, b], axis=-1) - if shgeo is None: - raise ImportError('Please run "pip install shapely" ' - 'to install shapely first.') sg_polys1 = [shgeo.Polygon(p) for p in bboxes1.reshape(rows, -1, 2)] sg_polys2 = [shgeo.Polygon(p) for p in polys2.reshape(cols, -1, 2)] overlaps = np.zeros(h_overlaps.shape) diff --git a/tools/data/icdar/README.md b/tools/data/icdar/README.md new file mode 100644 index 000000000..fdae98904 --- /dev/null +++ b/tools/data/icdar/README.md @@ -0,0 +1,47 @@ +# ICDAR Dataset + + + +## Overview + +The data structure is as follows: +```text +├── icdar2015 +│ ├── imgs +│ ├── instances_test.json +│ └── instances_training.json +├── icdar2017 +│ ├── imgs +│ ├── instances_training.json +│ └── instances_val.json +``` + +|Dataset|Images| | Annotation Files | | | +| :-------: | :------------------------------------------------------------: | :----------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: | :-------------------------------------: | :--------------------------------------------------------------------------------------------: | +| | | training | validation | testing | | +| ICDAR2015 | [homepage](https://rrc.cvc.uab.es/?ch=4&com=downloads) | [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_training.json) | - | [instances_test.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_test.json) | +| ICDAR2017 | [homepage](https://rrc.cvc.uab.es/?ch=8&com=downloads) | [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2017/instances_training.json) | [instances_val.json](https://download.openmmlab.com/mmocr/data/icdar2017/instances_val.json) | - | | | + + +## Preparation Steps +### ICDAR 2015 +- Step1: Download `ch4_training_images.zip`, `ch4_test_images.zip`, `ch4_training_localization_transcription_gt.zip`, `Challenge4_Test_Task1_GT.zip` from [homepage](https://rrc.cvc.uab.es/?ch=4&com=downloads) +- Step2: +```bash +mkdir icdar2015 && cd icdar2015 +mkdir imgs && mkdir annotations +# For images, +mv ch4_training_images imgs/training +mv ch4_test_images imgs/test +# For annotations, +mv ch4_training_localization_transcription_gt annotations/training +mv Challenge4_Test_Task1_GT annotations/test +``` +- Step3: Download [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_training.json) and [instances_test.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_test.json) and move them to `icdar2015` +- Or, generate `instances_training.json` and `instances_test.json` with following command: +```bash +python tools/data/icdar/icdar_converter.py /path/to/icdar2015 -o /path/to/icdar2015 -d icdar2015 --split-list training test +``` + +### ICDAR 2017 +- Follow similar steps as [ICDAR 2015](#icdar-2015). diff --git a/tools/data/icdar/icdar_converter.py b/tools/data/icdar/icdar_converter.py new file mode 100644 index 000000000..6cf1fe7f7 --- /dev/null +++ b/tools/data/icdar/icdar_converter.py @@ -0,0 +1,220 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import glob +import os.path as osp +from functools import partial + +import mmcv +import numpy as np +from shapely.geometry import Polygon + + +def convert_annotations(image_infos, out_json_name): + """Convert the annotation into coco style. + + Args: + image_infos(list): The list of image information dicts + out_json_name(str): The output json filename + Returns: + out_json(dict): The coco style dict + """ + assert isinstance(image_infos, list) + assert isinstance(out_json_name, str) + assert out_json_name + + out_json = dict() + img_id = 0 + ann_id = 0 + out_json['images'] = [] + out_json['categories'] = [] + out_json['annotations'] = [] + for image_info in image_infos: + image_info['id'] = img_id + anno_infos = image_info.pop('anno_info') + out_json['images'].append(image_info) + for anno_info in anno_infos: + anno_info['image_id'] = img_id + anno_info['id'] = ann_id + out_json['annotations'].append(anno_info) + ann_id += 1 + img_id += 1 + cat = dict(id=1, name='text') + out_json['categories'].append(cat) + + if len(out_json['annotations']) == 0: + out_json.pop('annotations') + mmcv.dump(out_json, out_json_name) + + return out_json + + +def collect_files(img_dir, gt_dir): + """Collect all images and their corresponding groundtruth files. + + Args: + img_dir(str): The image directory + gt_dir(str): The groundtruth directory + + Returns: + files(list): The list of tuples (img_file, groundtruth_file) + """ + assert isinstance(img_dir, str) + assert img_dir + assert isinstance(gt_dir, str) + assert gt_dir + + # note that we handle png and jpg only. Pls convert others such as gif to + # jpg or png offline + suffixes = ['.png', '.PNG', '.jpg', '.JPG', '.jpeg', '.JPEG'] + imgs_list = [] + for suffix in suffixes: + imgs_list.extend(glob.glob(osp.join(img_dir, '*' + suffix))) + + files = [] + for img_file in imgs_list: + gt_file = gt_dir + '/gt_' + osp.splitext( + osp.basename(img_file))[0] + '.txt' + files.append((img_file, gt_file)) + assert len(files), f'No images found in {img_dir}' + print(f'Loaded {len(files)} images from {img_dir}') + + return files + + +def collect_annotations(files, dataset, nproc=1): + """Collect the annotation information. + + Args: + files(list): The list of tuples (image_file, groundtruth_file) + dataset(str): The dataset name, icdar2015 or icdar2017 + nproc(int): The number of process to collect annotations + + Returns: + images(list): The list of image information dicts + """ + assert isinstance(files, list) + assert isinstance(dataset, str) + assert dataset + assert isinstance(nproc, int) + + load_img_info_with_dataset = partial(load_img_info, dataset=dataset) + if nproc > 1: + images = mmcv.track_parallel_progress( + load_img_info_with_dataset, files, nproc=nproc) + else: + images = mmcv.track_progress(load_img_info_with_dataset, files) + + return images + + +def load_img_info(files, dataset): + """Load the information of one image. + + Args: + files(tuple): The tuple of (img_file, groundtruth_file) + dataset(str): Dataset name, icdar2015 or icdar2017 + + Returns: + img_info(dict): The dict of the img and annotation information + """ + assert isinstance(files, tuple) + assert isinstance(dataset, str) + assert dataset + + img_file, gt_file = files + # read imgs with ignoring orientations + img = mmcv.imread(img_file, 'unchanged') + + if dataset == 'icdar2017': + gt_list = mmcv.list_from_file(gt_file) + elif dataset == 'icdar2015': + gt_list = mmcv.list_from_file(gt_file, encoding='utf-8-sig') + else: + raise NotImplementedError(f'Not support {dataset}') + + anno_info = [] + for line in gt_list: + # each line has one ploygen (4 vetices), and others. + # e.g., 695,885,866,888,867,1146,696,1143,Latin,9 + line = line.strip() + strs = line.split(',') + category_id = 1 + xy = [int(x) for x in strs[0:8]] + coordinates = np.array(xy).reshape(-1, 2) + polygon = Polygon(coordinates) + iscrowd = 0 + # set iscrowd to 1 to ignore 1. + if (dataset == 'icdar2015' + and strs[8] == '###') or (dataset == 'icdar2017' + and strs[9] == '###'): + iscrowd = 1 + print('ignore text') + + area = polygon.area + # convert to COCO style XYWH format + min_x, min_y, max_x, max_y = polygon.bounds + bbox = [min_x, min_y, max_x - min_x, max_y - min_y] + + anno = dict( + iscrowd=iscrowd, + category_id=category_id, + bbox=bbox, + area=area, + segmentation=[xy]) + anno_info.append(anno) + split_name = osp.basename(osp.dirname(img_file)) + img_info = dict( + # remove img_prefix for filename + file_name=osp.join(split_name, osp.basename(img_file)), + height=img.shape[0], + width=img.shape[1], + anno_info=anno_info, + segm_file=osp.join(split_name, osp.basename(gt_file))) + return img_info + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert Icdar2015 or Icdar2017 annotations to COCO format' + ) + parser.add_argument('icdar_path', help='icdar root path') + parser.add_argument('-o', '--out-dir', help='output path') + parser.add_argument( + '-d', '--dataset', required=True, help='icdar2017 or icdar2015') + parser.add_argument( + '--split-list', + nargs='+', + help='a list of splits. e.g., "--split-list training test"') + + parser.add_argument( + '--nproc', default=1, type=int, help='number of process') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + icdar_path = args.icdar_path + out_dir = args.out_dir if args.out_dir else icdar_path + mmcv.mkdir_or_exist(out_dir) + + img_dir = osp.join(icdar_path, 'imgs') + gt_dir = osp.join(icdar_path, 'annotations') + + set_name = {} + for split in args.split_list: + set_name.update({split: 'instances_' + split + '.json'}) + assert osp.exists(osp.join(img_dir, split)) + + for split, json_name in set_name.items(): + print(f'Converting {split} into {json_name}') + with mmcv.Timer(print_tmpl='It takes {}s to convert icdar annotation'): + files = collect_files( + osp.join(img_dir, split), osp.join(gt_dir, split)) + image_infos = collect_annotations( + files, args.dataset, nproc=args.nproc) + convert_annotations(image_infos, osp.join(out_dir, json_name)) + + +if __name__ == '__main__': + main() From 2a4517f2ea8f741c87ed45b901e8cb6b4446015e Mon Sep 17 00:00:00 2001 From: erenzhou <592267829@qq.com> Date: Tue, 1 Mar 2022 16:08:28 +0800 Subject: [PATCH 2/4] Update runtime.txt --- requirements/runtime.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements/runtime.txt b/requirements/runtime.txt index 5dc0a7117..e0a1bee52 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -4,6 +4,7 @@ mmcv-full mmdet numpy pycocotools +shapely six terminaltables torch From 35f270d42d8dcabf838ef92cc0e83256d76aab7d Mon Sep 17 00:00:00 2001 From: erenzhou <592267829@qq.com> Date: Tue, 1 Mar 2022 23:40:47 +0800 Subject: [PATCH 3/4] Update icdar.py --- mmrotate/datasets/icdar.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/mmrotate/datasets/icdar.py b/mmrotate/datasets/icdar.py index 0c1695c69..06c4b30d7 100644 --- a/mmrotate/datasets/icdar.py +++ b/mmrotate/datasets/icdar.py @@ -17,14 +17,12 @@ def __init__(self, ann_file, pipeline, version='oc', - difficulty=100, select_first_k=-1, **kwargs): self.version = version self.select_first_k = select_first_k - super(DOTADataset, self).__init__(ann_file, pipeline, version, - difficulty, **kwargs) + super().__init__(ann_file, pipeline, **kwargs) def load_annotations(self, ann_file): """Load annotation from COCO style annotation file. @@ -39,7 +37,6 @@ def load_annotations(self, ann_file): self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)} self.img_ids = self.coco.get_img_ids() data_infos = [] - count = 0 for i in self.img_ids: data_info = {} @@ -61,13 +58,13 @@ def load_annotations(self, ann_file): if ann['category_id'] not in self.cat_ids: continue try: - x, y, w, h, a = poly2obb_np(ann['segmentation'], + x, y, w, h, a = poly2obb_np(ann['segmentation'][0], self.version) except: # noqa: E722 continue gt_bboxes.append([x, y, w, h, a]) gt_labels.append(ann['category_id']) - gt_polygons.append(ann['segmentation']) + gt_polygons.append(ann['segmentation'][0]) if gt_bboxes: data_info['ann']['bboxes'] = np.array( From 578a7ea72aeb8d33860a268e6c2eb3b81a9a3840 Mon Sep 17 00:00:00 2001 From: erenzhou <592267829@qq.com> Date: Wed, 2 Mar 2022 09:08:47 +0800 Subject: [PATCH 4/4] fix bug --- configs/_base_/datasets/icdar.py | 6 +-- .../s2anet_r50_fpn_1x_icdar2015_le135.py | 39 +++++++------------ mmrotate/datasets/icdar.py | 6 +++ 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/configs/_base_/datasets/icdar.py b/configs/_base_/datasets/icdar.py index c44d91eb8..a39fa059c 100644 --- a/configs/_base_/datasets/icdar.py +++ b/configs/_base_/datasets/icdar.py @@ -1,12 +1,12 @@ # dataset settings dataset_type = 'IcdarDataset' -data_root = '../datasets/icdar/' +data_root = 'work_dirs/icdar2015' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), - dict(type='RResize', img_scale=(1024, 1024)), + dict(type='RResize', img_scale=(1000, 800)), dict(type='RRandomFlip', flip_ratio=0.5), dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size_divisor=32), @@ -17,7 +17,7 @@ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', - img_scale=(1024, 1024), + img_scale=(1000, 800), flip=False, transforms=[ dict(type='RResize'), diff --git a/configs/s2anet/s2anet_r50_fpn_1x_icdar2015_le135.py b/configs/s2anet/s2anet_r50_fpn_1x_icdar2015_le135.py index f6ed55411..9664ed341 100644 --- a/configs/s2anet/s2anet_r50_fpn_1x_icdar2015_le135.py +++ b/configs/s2anet/s2anet_r50_fpn_1x_icdar2015_le135.py @@ -26,7 +26,7 @@ num_outs=5), fam_head=dict( type='RotatedRetinaHead', - num_classes=15, + num_classes=1, in_channels=256, stacked_convs=2, feat_channels=256, @@ -58,7 +58,7 @@ featmap_strides=[8, 16, 32, 64, 128]), odm_head=dict( type='ODMRefineHead', - num_classes=15, + num_classes=1, in_channels=256, stacked_convs=2, feat_channels=256, @@ -107,26 +107,17 @@ nms_pre=2000, min_bbox_size=0, score_thr=0.05, - nms=dict(iou_thr=0.1), - max_per_img=2000)) + nms=dict(iou_thr=0.7), + max_per_img=1000)) -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='RResize', img_scale=(1024, 1024)), - dict( - type='RRandomFlip', - flip_ratio=[0.25, 0.25, 0.25], - direction=['horizontal', 'vertical', 'diagonal'], - version=angle_version), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) -] -data = dict( - train=dict(pipeline=train_pipeline, version=angle_version), - val=dict(version=angle_version), - test=dict(version=angle_version)) +# evaluation +evaluation = dict(interval=1, metric='mAP') +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[24, 32, 38]) +runner = dict(type='EpochBasedRunner', max_epochs=40) +checkpoint_config = dict(interval=10) \ No newline at end of file diff --git a/mmrotate/datasets/icdar.py b/mmrotate/datasets/icdar.py index 06c4b30d7..a777930a8 100644 --- a/mmrotate/datasets/icdar.py +++ b/mmrotate/datasets/icdar.py @@ -79,6 +79,12 @@ def load_annotations(self, ann_file): data_info['ann']['labels'] = np.array([], dtype=np.int64) data_info['ann']['polygons'] = np.zeros((0, 8), dtype=np.float32) + data_info['ann']['bboxes_ignore'] = np.zeros( + (0, 5), dtype=np.float32) + data_info['ann']['labels_ignore'] = np.array( + [], dtype=np.int64) + data_info['ann']['polygons_ignore'] = np.zeros( + (0, 8), dtype=np.float32) data_infos.append(data_info) count = count + 1