[Feature] Add TextOCR to Dataset Preparer (#1543)

* add textocr * cfg gen Co-authored-by: gaotongxiao <[email protected]>
open-mmlab · Dec 20, 2022 · 24bfb18 · 24bfb18
1 parent fb78c94
commit 24bfb18
Show file tree

Hide file tree

Showing 5 changed files with 146 additions and 0 deletions.
diff --git a/dataset_zoo/textocr/metafile.yml b/dataset_zoo/textocr/metafile.yml
@@ -0,0 +1,27 @@
+Name: 'Text OCR'
+Paper:
+  Title: 'TextOCR: Towards large-scale end-to-end reasoning for arbitrary-shaped scene text'
+  URL: https://openaccess.thecvf.com/content/CVPR2021/papers/Singh_TextOCR_Towards_Large-Scale_End-to-End_Reasoning_for_Arbitrary-Shaped_Scene_Text_CVPR_2021_paper.pdf
+  Venue: CVPR
+  Year: '2021'
+  BibTeX: '@inproceedings{singh2021textocr,
+    title={{TextOCR}: Towards large-scale end-to-end reasoning for arbitrary-shaped scene text},
+    author={Singh, Amanpreet and Pang, Guan and Toh, Mandy and Huang, Jing and Galuba, Wojciech and Hassner, Tal},
+    journal={The Conference on Computer Vision and Pattern Recognition},
+    year={2021}}'
+Data:
+  Website: https://paperswithcode.com/dataset/textocr
+  Language:
+    - English
+  Scene:
+    - Natural Scene
+  Granularity:
+    - Word
+  Tasks:
+    - textdet
+    - textrecog
+    - textspotting
+  License:
+    Type: CC BY 4.0
+    Link: https://creativecommons.org/licenses/by/4.0/
+  Format: .json
diff --git a/dataset_zoo/textocr/sample_anno.md b/dataset_zoo/textocr/sample_anno.md
@@ -0,0 +1,57 @@
+**Text Detection/Recognition/Spotting**
+
+```json
+{
+  "imgs": {
+    "OpenImages_ImageID_1": {
+      "id": "OpenImages_ImageID_1",
+      "width": "INT, Width of the image",
+      "height": "INT, Height of the image",
+      "set": "Split train|val|test",
+      "filename": "train|test/OpenImages_ImageID_1.jpg"
+    },
+    "OpenImages_ImageID_2": {
+      "...": "..."
+    }
+  },
+  "anns": {
+    "OpenImages_ImageID_1_1": {
+      "id": "STR, OpenImages_ImageID_1_1, Specifies the nth annotation for an image",
+      "image_id": "OpenImages_ImageID_1",
+      "bbox": [
+        "FLOAT x1",
+        "FLOAT y1",
+        "FLOAT x2",
+        "FLOAT y2"
+      ],
+      "points": [
+        "FLOAT x1",
+        "FLOAT y1",
+        "FLOAT x2",
+        "FLOAT y2",
+        "...",
+        "FLOAT xN",
+        "FLOAT yN"
+      ],
+      "utf8_string": "text for this annotation",
+      "area": "FLOAT, area of this box"
+    },
+    "OpenImages_ImageID_1_2": {
+      "...": "..."
+    },
+    "OpenImages_ImageID_2_1": {
+      "...": "..."
+    }
+  },
+  "img2Anns": {
+    "OpenImages_ImageID_1": [
+      "OpenImages_ImageID_1_1",
+      "OpenImages_ImageID_1_2",
+      "OpenImages_ImageID_1_2"
+    ],
+    "OpenImages_ImageID_N": [
+      "..."
+    ]
+  }
+}
+```
diff --git a/dataset_zoo/textocr/textdet.py b/dataset_zoo/textocr/textdet.py
@@ -0,0 +1,52 @@
+data_root = 'data/textocr'
+cache_path = 'data/cache'
+
+data_obtainer = dict(
+    type='NaiveDataObtainer',
+    cache_path=cache_path,
+    data_root=data_root,
+    files=[
+        dict(
+            url='https://dl.fbaipublicfiles.com/textvqa/images/'
+            'train_val_images.zip',
+            save_name='textocr_textdet_train_val_img.zip',
+            md5='d12dd8098899044e4ae1af34db7ecfef',
+            split=['train', 'val'],
+            content=['image'],
+            mapping=[[
+                'textocr_textdet_train_val_img/train_images',
+                'textdet_imgs/train'
+            ]]),
+        dict(
+            url='https://dl.fbaipublicfiles.com/textvqa/data/textocr/'
+            'TextOCR_0.1_train.json',
+            save_name='textocr_textdet_train.json',
+            md5='0f8ba1beefd2ca4d08a4f82bcbe6cfb4',
+            split=['train'],
+            content=['annotation'],
+            mapping=[['textocr_textdet_train.json',
+                      'annotations/train.json']]),
+        dict(
+            url='https://dl.fbaipublicfiles.com/textvqa/data/textocr/'
+            'TextOCR_0.1_val.json',
+            save_name='textocr_textdet_val.json',
+            md5='fb151383ea7b3c530cde9ef0d5c08347',
+            split=['val'],
+            content=['annotation'],
+            mapping=[['textocr_textdet_val.json', 'annotations/val.json']]),
+    ])
+
+data_converter = dict(
+    type='TextDetDataConverter',
+    splits=['train', 'val'],
+    data_root=data_root,
+    gatherer=dict(
+        type='mono_gather', train_ann='train.json', val_ann='val.json'),
+    parser=dict(
+        type='COCOTextDetAnnParser',
+        variant='textocr',
+        data_root=data_root + '/textdet_imgs/'),
+    dumper=dict(type='JsonDumper'),
+    delete=['annotations', 'textocr_textdet_train_val_img'])
+
+config_generator = dict(type='TextDetConfigGenerator', data_root=data_root)
diff --git a/dataset_zoo/textocr/textrecog.py b/dataset_zoo/textocr/textrecog.py
@@ -0,0 +1,5 @@
+_base_ = ['textdet.py']
+
+data_converter = dict(type='TextRecogCropConverter')
+
+config_generator = dict(type='TextRecogConfigGenerator')
diff --git a/dataset_zoo/textocr/textspotting.py b/dataset_zoo/textocr/textspotting.py
@@ -0,0 +1,5 @@
+_base_ = ['textdet.py']
+
+data_converter = dict(type='TextSpottingDataConverter')
+
+config_generator = dict(type='TextSpottingConfigGenerator')