Fixes in ICDAR and Market-1501 dataset formats (cvat-ai#114)

* ICDAR: - id replaced with index attribute - made the color and center attributes to strings - added checks * Market-1501: - added saving the file with the names of the images, if `save_images = False` - added checks Co-authored-by: Maxim Zhiltsov <[email protected]>
TOsmanov · Feb 26, 2021 · 7e7adf6 · 7e7adf6
1 parent dad5c05
commit 7e7adf6
Show file tree

Hide file tree

Showing 5 changed files with 162 additions and 91 deletions.
diff --git a/datumaro/plugins/icdar_format/converter.py b/datumaro/plugins/icdar_format/converter.py
@@ -6,9 +6,9 @@
 import os.path as osp
 
 from datumaro.components.converter import Converter
-from datumaro.components.extractor import AnnotationType
+from datumaro.components.extractor import AnnotationType, CompiledMask
 from datumaro.util.image import save_image
-from datumaro.util.mask_tools import paint_mask, merge_masks
+from datumaro.util.mask_tools import paint_mask
 
 from .format import IcdarPath, IcdarTask
 
@@ -17,7 +17,7 @@ class _WordRecognitionConverter:
     def __init__(self):
         self.annotations = ''
 
-    def save_annotations(self, item):
+    def save_annotations(self, item, path):
         self.annotations += '%s, ' % (item.id + IcdarPath.IMAGE_EXT)
         for ann in item.annotations:
             if ann.type != AnnotationType.caption:
@@ -38,7 +38,7 @@ class _TextLocalizationConverter:
     def __init__(self):
         self.annotations = {}
 
-    def save_annotations(self, item):
+    def save_annotations(self, item, path):
         annotation = ''
         for ann in item.annotations:
             if ann.type == AnnotationType.bbox:
@@ -65,59 +65,62 @@ def is_empty(self):
 class _TextSegmentationConverter:
     def __init__(self):
         self.annotations = {}
-        self.masks = {}
 
-    def save_annotations(self, item):
-        masks = []
+    def save_annotations(self, item, path):
         annotation = ''
         colormap = [(255, 255, 255)]
         anns = [a for a in item.annotations
             if a.type == AnnotationType.mask]
-        anns = sorted(anns, key=lambda a: a.id)
-        group = anns[0].group
-        for ann in anns:
-            if ann.group != group or ann.group == 0:
+        if anns:
+            is_not_index = len([p for p in anns if 'index' not in p.attributes])
+            if is_not_index:
+                raise Exception("Item %s: a mask must have"
+                    "'index' attribute" % item.id)
+            anns = sorted(anns, key=lambda a: a.attributes['index'])
+            group = anns[0].group
+            for ann in anns:
+                if ann.group != group or (not ann.group and anns[0].group != 0):
+                    annotation += '\n'
+                text = ''
+                if ann.attributes:
+                    if 'text' in ann.attributes:
+                        text = ann.attributes['text']
+                    if text == ' ':
+                        annotation += '#'
+                    if 'color' in ann.attributes and \
+                            len(ann.attributes['color'].split()) == 3:
+                        color = ann.attributes['color'].split()
+                        colormap.append(
+                            (int(color[0]), int(color[1]), int(color[2])))
+                        annotation += ' '.join(p for p in color)
+                    else:
+                        raise Exception("Item %s: a mask must have "
+                            "an RGB color attribute, e. g. '10 7 50'" % item.id)
+                    if 'center' in ann.attributes:
+                        annotation += ' %s' % ann.attributes['center']
+                    else:
+                        annotation += ' - -'
+                bbox = ann.get_bbox()
+                annotation += ' %s %s %s %s' % (bbox[0], bbox[1],
+                    bbox[0] + bbox[2], bbox[1] + bbox[3])
+                annotation += ' \"%s\"' % text
                 annotation += '\n'
-            text = ''
-            if ann.attributes:
-                if 'text' in ann.attributes:
-                    text = ann.attributes['text']
-                if text == ' ':
-                    annotation += '#'
-                if 'color' in ann.attributes:
-                    colormap.append(ann.attributes['color'])
-                    annotation += ' '.join(str(p)
-                        for p in ann.attributes['color'])
-                else:
-                    annotation += '- - -'
-                if 'center' in ann.attributes:
-                    annotation += ' '
-                    annotation += ' '.join(str(p)
-                        for p in ann.attributes['center'])
-                else:
-                    annotation += ' - -'
-            bbox = ann.get_bbox()
-            annotation += ' %s %s %s %s' % (bbox[0], bbox[1],
-                bbox[0] + bbox[2], bbox[1] + bbox[3])
-            annotation += ' \"%s\"' % text
-            annotation += '\n'
-            group = ann.group
-            masks.append(ann.as_class_mask(ann.id))
-
-        mask = merge_masks(masks)
-        mask = paint_mask(mask,
-            { i: colormap[i] for i in range(len(colormap)) })
+                group = ann.group
+
+            mask = CompiledMask.from_instance_masks(anns,
+                instance_labels=[m.attributes['index'] + 1 for m in anns])
+            mask = paint_mask(mask.class_mask,
+                { i: colormap[i] for i in range(len(colormap)) })
+            save_image(osp.join(path, item.id + '_GT' + IcdarPath.GT_EXT),
+                mask, create_dir=True)
         self.annotations[item.id] = annotation
-        self.masks[item.id] = mask
 
     def write(self, path):
         os.makedirs(path, exist_ok=True)
         for item in self.annotations:
             file = osp.join(path, item + '_GT' + '.txt')
             with open(file, 'w') as f:
                 f.write(self.annotations[item])
-            save_image(osp.join(path, item + '_GT' + IcdarPath.GT_EXT),
-                self.masks[item], create_dir=True)
 
     def is_empty(self):
         return len(self.annotations) == 0
@@ -161,12 +164,13 @@ def apply(self):
         for subset_name, subset in self._extractor.subsets().items():
             task_converters = self._make_task_converters()
             for item in subset:
-                for task_conv in task_converters.values():
+                for task, task_conv in task_converters.items():
                     if item.has_image and self._save_images:
                         self._save_image(item, osp.join(
                             self._save_dir, subset_name, IcdarPath.IMAGES_DIR,
                             item.id + IcdarPath.IMAGE_EXT))
-                    task_conv.save_annotations(item)
+                    task_conv.save_annotations(item, osp.join(self._save_dir,
+                        IcdarPath.TASK_DIR[task], subset_name))
 
             for task, task_conv in task_converters.items():
                 if task_conv.is_empty() and not self._tasks:

diff --git a/datumaro/plugins/icdar_format/extractor.py b/datumaro/plugins/icdar_format/extractor.py
@@ -151,7 +151,7 @@ def _load_segmentation_items(self):
                     if len(objects) != 10:
                         continue
 
-                    centers.append([float(objects[3]), float(objects[4])])
+                    centers.append(objects[3] + ' ' + objects[4])
                     groups.append(group)
                     colors.append(tuple(int(o) for o in objects[:3]))
                     char = objects[9]
@@ -177,10 +177,10 @@ def _load_segmentation_items(self):
                     if label_id == 0:
                         continue
                     i = int(label_id)
-                    annotations.append(Mask(id=i, group=groups[i],
+                    annotations.append(Mask(group=groups[i],
                         image=self._lazy_extract_mask(mask, label_id),
-                        attributes={ 'color': colors[i], 'text': chars[i],
-                            'center': centers[i] }
+                        attributes={ 'index': i - 1, 'color': ' '.join(str(p) for p in colors[i]),
+                            'text': chars[i], 'center': centers[i] }
                     ))
         return items
 
@@ -229,4 +229,4 @@ def find_sources(cls, path):
                 sources += cls._find_sources_recursive(path, ext,
                     extractor_type, file_filter=lambda p:
                         osp.basename(p) != IcdarPath.VOCABULARY_FILE)
-            return sources
+        return sources
diff --git a/datumaro/plugins/market1501_format.py b/datumaro/plugins/market1501_format.py
@@ -4,6 +4,7 @@
 
 import os.path as osp
 import re
+from distutils.util import strtobool
 from glob import glob
 
 from datumaro.components.converter import Converter
@@ -16,17 +17,24 @@ class Market1501Path:
     BBOX_DIR = 'bounding_box_'
     IMAGE_EXT = '.jpg'
     PATTERN = re.compile(r'([-\d]+)_c(\d)')
+    IMAGE_NAMES = 'images_'
 
 class Market1501Extractor(SourceExtractor):
     def __init__(self, path):
         if not osp.isdir(path):
-            raise NotADirectoryError("Can't open folder with annotation files '%s'" % path)
+            raise NotADirectoryError(
+                "Can't open folder with annotation files '%s'" % path)
+
         subset = ''
         for dirname in glob(osp.join(path, '*')):
             if osp.basename(dirname).startswith(Market1501Path.BBOX_DIR):
                 subset = osp.basename(dirname).replace(Market1501Path.BBOX_DIR, '')
+            if osp.basename(dirname).startswith(Market1501Path.IMAGE_NAMES):
+                subset = osp.basename(dirname).replace(Market1501Path.IMAGE_NAMES, '')
+                subset = osp.splitext(subset)[0]
                 break
         super().__init__(subset=subset)
+
         self._path = path
         self._items = list(self._load_items(path).values())
 
@@ -36,26 +44,38 @@ def _load_items(self, path):
         paths = glob(osp.join(path, Market1501Path.QUERY_DIR, '*'))
         paths += glob(osp.join(path, Market1501Path.BBOX_DIR + self._subset, '*'))
 
+        anno_file = osp.join(path,
+            Market1501Path.IMAGE_NAMES + self._subset + '.txt')
+        if len(paths) == 0 and osp.isfile(anno_file):
+            with open(anno_file, encoding='utf-8') as f:
+                for line in f:
+                    paths.append(line.strip())
+
         for image_path in paths:
-            if not osp.isfile(image_path) or \
-                    osp.splitext(image_path)[-1] != Market1501Path.IMAGE_EXT:
+            if osp.splitext(image_path)[-1] != Market1501Path.IMAGE_EXT:
                 continue
 
             item_id = osp.splitext(osp.basename(image_path))[0]
-            attributes = {}
-            pid, camid = map(int, Market1501Path.PATTERN.search(image_path).groups())
+            pid, camid = -1, -1
+            search = Market1501Path.PATTERN.search(image_path)
+            if search:
+                pid, camid = map(int, search.groups())
+                if 19 < len(item_id):
+                    item_id = item_id[19:]
+            items[item_id] = DatasetItem(id=item_id, subset=self._subset,
+                image=image_path)
+
             if pid == -1:
                 continue
 
+            attributes = items[item_id].attributes
             camid -= 1
             attributes['person_id'] = pid
             attributes['camera_id'] = camid
             if osp.basename(osp.dirname(image_path)) == Market1501Path.QUERY_DIR:
                 attributes['query'] = True
             else:
                 attributes['query'] = False
-            items[item_id] = DatasetItem(id=item_id, subset=self._subset,
-                image=image_path, attributes=attributes)
         return items
 
 class Market1501Importer(Importer):
@@ -70,12 +90,33 @@ class Market1501Converter(Converter):
 
     def apply(self):
         for subset_name, subset in self._extractor.subsets().items():
+            annotation = ''
             for item in subset:
+                image_name = item.id
+                if Market1501Path.PATTERN.search(image_name) == None:
+                    if 'person_id' in item.attributes and \
+                            'camera_id' in item.attributes:
+                        image_pattern = '{:04d}_c{}s1_000000_00{}'
+                        pid = int(item.attributes.get('person_id'))
+                        camid = int(item.attributes.get('camera_id')) + 1
+                        image_name = image_pattern.format(pid, camid, item.id)
+
+                dirname = Market1501Path.BBOX_DIR + subset_name
+                if 'query' in item.attributes:
+                    query = item.attributes.get('query')
+                    if isinstance(query, str):
+                        query = strtobool(query)
+                    if query:
+                        dirname = Market1501Path.QUERY_DIR
+                image_path = osp.join(self._save_dir, dirname,
+                    image_name + Market1501Path.IMAGE_EXT)
                 if item.has_image and self._save_images:
-                    if item.attributes and 'query' in item.attributes:
-                        if item.attributes.get('query'):
-                            dirname = Market1501Path.QUERY_DIR
-                        else:
-                            dirname = Market1501Path.BBOX_DIR + subset_name
-                        self._save_image(item, osp.join(self._save_dir,
-                            dirname, item.id + Market1501Path.IMAGE_EXT))
+                    self._save_image(item, image_path)
+                else:
+                    annotation += '%s\n' % image_path
+
+            if 0 < len(annotation):
+                annotation_file = osp.join(self._save_dir,
+                    Market1501Path.IMAGE_NAMES + subset_name + '.txt')
+                with open(annotation_file, 'w') as f:
+                    f.write(annotation)
diff --git a/tests/test_icdar_format.py b/tests/test_icdar_format.py
@@ -69,20 +69,20 @@ def test_can_import_masks(self):
             DatasetItem(id='1', subset='train',
                 image=np.ones((2, 5, 3)),
                 annotations=[
-                    Mask(id=1, group=0,
+                    Mask(group=0,
                         image=np.array([[0, 1, 1, 0, 0], [0, 0, 0, 0, 0]]),
-                        attributes={ 'color': (108, 225, 132),
-                            'text': 'F', 'center': [0, 1]
+                        attributes={ 'index': 0, 'color': '108 225 132',
+                            'text': 'F', 'center': '0 1'
                         }),
-                    Mask(id=2, group=1,
+                    Mask(group=1,
                         image=np.array([[0, 0, 0, 1, 0], [0, 0, 0, 1, 0]]),
-                        attributes = { 'color': (82, 174, 214),
-                            'text': 'T', 'center': [1, 3]
+                        attributes={ 'index': 1, 'color': '82 174 214',
+                            'text': 'T', 'center': '1 3'
                         }),
-                    Mask(id=3, group=1,
+                    Mask(group=1,
                         image=np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 1]]),
-                        attributes = { 'color': (241, 73, 144),
-                            'text': 'h', 'center': [1, 4]
+                        attributes={ 'index': 2, 'color': '241 73 144',
+                            'text': 'h', 'center': '1 4'
                         }),
                 ]
             ),
@@ -145,27 +145,27 @@ def test_can_save_and_load_masks(self):
         expected_dataset = Dataset.from_iterable([
             DatasetItem(id=1, subset='train',
                 annotations=[
-                    Mask(id=2, image=np.array([[0, 0, 0, 1, 1]]), group=1,
-                        attributes={ 'color': (82, 174, 214), 'text': 'j',
-                            'center': [0, 3] }),
-                    Mask(id=1, image=np.array([[0, 1, 1, 0, 0]]), group=1,
-                        attributes={ 'color': (108, 225, 132), 'text': 'F',
-                            'center': [0, 1] }),
+                    Mask(image=np.array([[0, 0, 0, 1, 1]]), group=1,
+                        attributes={ 'index': 1, 'color': '82 174 214', 'text': 'j',
+                            'center': '0 3' }),
+                    Mask(image=np.array([[0, 1, 1, 0, 0]]), group=1,
+                        attributes={ 'index': 0, 'color': '108 225 132', 'text': 'F',
+                            'center': '0 1' }),
                 ]),
             DatasetItem(id=2, subset='train',
                 annotations=[
-                    Mask(id=4, image=np.array([[0, 0, 0, 0, 0, 1]]), group=0,
-                        attributes={ 'color': (183, 6, 28), 'text': ' ',
-                            'center': [0, 5] }),
-                    Mask(id=1, image=np.array([[1, 0, 0, 0, 0, 0]]), group=1,
-                        attributes={ 'color': (108, 225, 132), 'text': 'L',
-                            'center': [0, 0] }),
-                    Mask(id=2, image=np.array([[0, 0, 0, 1, 1, 0]]), group=1,
-                        attributes={ 'color': (82, 174, 214), 'text': 'o',
-                            'center': [0, 3] }),
-                    Mask(id=3, image=np.array([[0, 1, 1, 0, 0, 0]]), group=0,
-                        attributes={ 'color': (241, 73, 144), 'text': 'P',
-                            'center': [0, 1] }),
+                    Mask(image=np.array([[0, 0, 0, 0, 0, 1]]), group=0,
+                        attributes={ 'index': 3, 'color': '183 6 28', 'text': ' ',
+                            'center': '0 5' }),
+                    Mask(image=np.array([[1, 0, 0, 0, 0, 0]]), group=1,
+                        attributes={ 'index': 0, 'color': '108 225 132', 'text': 'L',
+                            'center': '0 0' }),
+                    Mask(image=np.array([[0, 0, 0, 1, 1, 0]]), group=1,
+                        attributes={ 'index': 1, 'color': '82 174 214', 'text': 'o',
+                            'center': '0 3' }),
+                    Mask(image=np.array([[0, 1, 1, 0, 0, 0]]), group=0,
+                        attributes={ 'index': 2, 'color': '241 73 144', 'text': 'P',
+                            'center': '0 1' }),
                 ]),
         ])