Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Support ICDAR dataset. #47

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions configs/_base_/datasets/icdar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# dataset settings
dataset_type = 'IcdarDataset'
data_root = 'work_dirs/icdar2015'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='RResize', img_scale=(1000, 800)),
dict(type='RRandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1000, 800),
flip=False,
transforms=[
dict(type='RResize'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img'])
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=f'{data_root}/instances_training.json',
img_prefix=f'{data_root}/imgs',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=f'{data_root}/instances_test.json',
img_prefix=f'{data_root}/imgs',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=f'{data_root}/instances_test.json',
img_prefix=f'{data_root}/imgs',
pipeline=test_pipeline))
123 changes: 123 additions & 0 deletions configs/s2anet/s2anet_r50_fpn_1x_icdar2015_le135.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
_base_ = [
'../_base_/datasets/icdar.py', '../_base_/schedules/schedule_1x.py',
'../_base_/default_runtime.py'
]

angle_version = 'le135'
model = dict(
type='S2ANet',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
zero_init_residual=False,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs='on_input',
num_outs=5),
fam_head=dict(
type='RotatedRetinaHead',
num_classes=1,
in_channels=256,
stacked_convs=2,
feat_channels=256,
assign_by_circumhbbox=None,
anchor_generator=dict(
type='RotatedAnchorGenerator',
scales=[4],
ratios=[1.0],
strides=[8, 16, 32, 64, 128]),
bbox_coder=dict(
type='DeltaXYWHAOBBoxCoder',
angle_range=angle_version,
norm_factor=1,
edge_swap=False,
proj_xy=True,
target_means=(.0, .0, .0, .0, .0),
target_stds=(1.0, 1.0, 1.0, 1.0, 1.0)),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)),
align_cfgs=dict(
type='AlignConv',
kernel_size=3,
channels=256,
featmap_strides=[8, 16, 32, 64, 128]),
odm_head=dict(
type='ODMRefineHead',
num_classes=1,
in_channels=256,
stacked_convs=2,
feat_channels=256,
assign_by_circumhbbox=None,
anchor_generator=dict(
type='PseudoAnchorGenerator', strides=[8, 16, 32, 64, 128]),
bbox_coder=dict(
type='DeltaXYWHAOBBoxCoder',
angle_range=angle_version,
norm_factor=1,
edge_swap=False,
proj_xy=True,
target_means=(0.0, 0.0, 0.0, 0.0, 0.0),
target_stds=(1.0, 1.0, 1.0, 1.0, 1.0)),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)),
train_cfg=dict(
fam_cfg=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.4,
min_pos_iou=0,
ignore_iof_thr=-1,
iou_calculator=dict(type='RBboxOverlaps2D')),
allowed_border=-1,
pos_weight=-1,
debug=False),
odm_cfg=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.4,
min_pos_iou=0,
ignore_iof_thr=-1,
iou_calculator=dict(type='RBboxOverlaps2D')),
allowed_border=-1,
pos_weight=-1,
debug=False)),
test_cfg=dict(
nms_pre=2000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(iou_thr=0.7),
max_per_img=1000))

# evaluation
evaluation = dict(interval=1, metric='mAP')
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[24, 32, 38])
runner = dict(type='EpochBasedRunner', max_epochs=40)
checkpoint_config = dict(interval=10)
3 changes: 2 additions & 1 deletion mmrotate/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .builder import build_dataset # noqa: F401, F403
from .dota import DOTADataset # noqa: F401, F403
from .icdar import IcdarDataset # noqa: F401, F403
from .pipelines import * # noqa: F401, F403
from .sar import SARDataset # noqa: F401, F403

__all__ = ['SARDataset', 'DOTADataset', 'build_dataset']
__all__ = ['SARDataset', 'DOTADataset', 'IcdarDataset', 'build_dataset']
94 changes: 94 additions & 0 deletions mmrotate/datasets/icdar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
from mmdet.datasets.api_wrappers import COCO

from mmrotate.core import poly2obb_np
from .builder import ROTATED_DATASETS
from .dota import DOTADataset


@ROTATED_DATASETS.register_module()
class IcdarDataset(DOTADataset):
"""ICDAR text dataset for rotated object detection (Support ICDAR2015 and
ICDAR2017)."""
CLASSES = ('text', )

def __init__(self,
ann_file,
pipeline,
version='oc',
select_first_k=-1,
**kwargs):
self.version = version
self.select_first_k = select_first_k

super().__init__(ann_file, pipeline, **kwargs)

def load_annotations(self, ann_file):
"""Load annotation from COCO style annotation file.

Args:
ann_file (str): Path of annotation file.
Returns:
list[dict]: Annotation info.
"""
self.coco = COCO(ann_file)
self.cat_ids = self.coco.get_cat_ids(cat_names=self.CLASSES)
self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)}
self.img_ids = self.coco.get_img_ids()
data_infos = []
count = 0
for i in self.img_ids:
data_info = {}
info = self.coco.load_imgs([i])[0]
data_info['filename'] = info['file_name']
data_info['ann'] = {}
img_id = info['id']
ann_ids = self.coco.get_ann_ids(img_ids=[img_id])
ann_info = self.coco.load_anns(ann_ids)
gt_bboxes = []
gt_labels = []
gt_polygons = []
for ann in ann_info:
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
if ann['area'] <= 0 or w < 1 or h < 1:
continue
if ann['category_id'] not in self.cat_ids:
continue
try:
x, y, w, h, a = poly2obb_np(ann['segmentation'][0],
self.version)
except: # noqa: E722
continue
gt_bboxes.append([x, y, w, h, a])
gt_labels.append(ann['category_id'])
gt_polygons.append(ann['segmentation'][0])

if gt_bboxes:
data_info['ann']['bboxes'] = np.array(
gt_bboxes, dtype=np.float32)
data_info['ann']['labels'] = np.array(
gt_labels, dtype=np.int64)
data_info['ann']['polygons'] = np.array(
gt_polygons, dtype=np.float32)
else:
data_info['ann']['bboxes'] = np.zeros((0, 5),
dtype=np.float32)
data_info['ann']['labels'] = np.array([], dtype=np.int64)
data_info['ann']['polygons'] = np.zeros((0, 8),
dtype=np.float32)
data_info['ann']['bboxes_ignore'] = np.zeros(
(0, 5), dtype=np.float32)
data_info['ann']['labels_ignore'] = np.array(
[], dtype=np.int64)
data_info['ann']['polygons_ignore'] = np.zeros(
(0, 8), dtype=np.float32)

data_infos.append(data_info)
count = count + 1
if count > self.select_first_k and self.select_first_k > 0:
break

return data_infos
1 change: 1 addition & 0 deletions requirements/runtime.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ mmcv-full
mmdet
numpy
pycocotools
shapely
six
terminaltables
torch
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ line_length = 79
multi_line_output = 0
known_standard_library = setuptools
known_first_party = mmrotate
known_third_party = PIL,cv2,e2cnn,matplotlib,mmcv,mmdet,numpy,pytest,pytorch_sphinx_theme,terminaltables,torch
known_third_party = PIL,cv2,e2cnn,matplotlib,mmcv,mmdet,numpy,pytest,pytorch_sphinx_theme,shapely,terminaltables,torch
no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY

Expand Down
3 changes: 0 additions & 3 deletions tools/data/dota/split/img_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,9 +235,6 @@ def bbox_overlaps_iof(bboxes1, bboxes2, eps=1e-6):

l, t, r, b = [bboxes2[..., i] for i in range(4)]
polys2 = np.stack([l, t, r, t, r, b, l, b], axis=-1)
if shgeo is None:
raise ImportError('Please run "pip install shapely" '
'to install shapely first.')
sg_polys1 = [shgeo.Polygon(p) for p in bboxes1.reshape(rows, -1, 2)]
sg_polys2 = [shgeo.Polygon(p) for p in polys2.reshape(cols, -1, 2)]
overlaps = np.zeros(h_overlaps.shape)
Expand Down
47 changes: 47 additions & 0 deletions tools/data/icdar/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# ICDAR Dataset

<!-- [DATASET] -->

## Overview

The data structure is as follows:
```text
├── icdar2015
│ ├── imgs
│ ├── instances_test.json
│ └── instances_training.json
├── icdar2017
│ ├── imgs
│ ├── instances_training.json
│ └── instances_val.json
```

|Dataset|Images| | Annotation Files | | |
| :-------: | :------------------------------------------------------------: | :----------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: | :-------------------------------------: | :--------------------------------------------------------------------------------------------: |
| | | training | validation | testing | |
| ICDAR2015 | [homepage](https://rrc.cvc.uab.es/?ch=4&com=downloads) | [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_training.json) | - | [instances_test.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_test.json) |
| ICDAR2017 | [homepage](https://rrc.cvc.uab.es/?ch=8&com=downloads) | [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2017/instances_training.json) | [instances_val.json](https://download.openmmlab.com/mmocr/data/icdar2017/instances_val.json) | - | | |


## Preparation Steps
### ICDAR 2015
- Step1: Download `ch4_training_images.zip`, `ch4_test_images.zip`, `ch4_training_localization_transcription_gt.zip`, `Challenge4_Test_Task1_GT.zip` from [homepage](https://rrc.cvc.uab.es/?ch=4&com=downloads)
- Step2:
```bash
mkdir icdar2015 && cd icdar2015
mkdir imgs && mkdir annotations
# For images,
mv ch4_training_images imgs/training
mv ch4_test_images imgs/test
# For annotations,
mv ch4_training_localization_transcription_gt annotations/training
mv Challenge4_Test_Task1_GT annotations/test
```
- Step3: Download [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_training.json) and [instances_test.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_test.json) and move them to `icdar2015`
- Or, generate `instances_training.json` and `instances_test.json` with following command:
```bash
python tools/data/icdar/icdar_converter.py /path/to/icdar2015 -o /path/to/icdar2015 -d icdar2015 --split-list training test
```

### ICDAR 2017
- Follow similar steps as [ICDAR 2015](#icdar-2015).
Loading