Support arbitrary image extensions in formats (Market1501, VggFace2, …

…WiderFace, YOLO, VOC) (cvat-ai#172) * support image exts in market1501 * support image exts in vggface2 * support image exts in widerface * update yolo format * support arbitrary exts in voc * update tests * update voc tests
TOsmanov · Mar 19, 2021 · 22728f2 · 22728f2
1 parent cdd5184
commit 22728f2
Show file tree

Hide file tree

Showing 13 changed files with 305 additions and 170 deletions.
diff --git a/datumaro/plugins/market1501_format.py b/datumaro/plugins/market1501_format.py
@@ -2,80 +2,96 @@
 #
 # SPDX-License-Identifier: MIT
 
+import os
 import os.path as osp
 import re
 from distutils.util import strtobool
-from glob import glob
+from itertools import chain
 
 from datumaro.components.converter import Converter
 from datumaro.components.extractor import (DatasetItem, Importer,
     SourceExtractor)
+from datumaro.util.image import find_images
 
 
 class Market1501Path:
     QUERY_DIR = 'query'
     BBOX_DIR = 'bounding_box_'
     IMAGE_EXT = '.jpg'
-    PATTERN = re.compile(r'([-\d]+)_c(\d)')
-    IMAGE_NAMES = 'images_'
+    PATTERN = re.compile(r'^(-?\d+)_c(\d+)(?:s\d+_\d+_00(.*))?')
+    LIST_PREFIX = 'images_'
+    UNKNOWN_ID = -1
 
 class Market1501Extractor(SourceExtractor):
-    def __init__(self, path):
+    def __init__(self, path, subset=None):
         if not osp.isdir(path):
             raise NotADirectoryError(
                 "Can't open folder with annotation files '%s'" % path)
 
-        subset = ''
-        for dirname in glob(osp.join(path, '*')):
-            if osp.basename(dirname).startswith(Market1501Path.BBOX_DIR):
-                subset = osp.basename(dirname).replace(Market1501Path.BBOX_DIR, '')
-            if osp.basename(dirname).startswith(Market1501Path.IMAGE_NAMES):
-                subset = osp.basename(dirname).replace(Market1501Path.IMAGE_NAMES, '')
-                subset = osp.splitext(subset)[0]
-                break
+        if not subset:
+            subset = ''
+            for p in os.listdir(path):
+                pf = osp.join(path, p)
+
+                if p.startswith(Market1501Path.BBOX_DIR) and osp.isdir(pf):
+                    subset = p.replace(Market1501Path.BBOX_DIR, '')
+                    break
+
+                if p.startswith(Market1501Path.LIST_PREFIX) and osp.isfile(pf):
+                    subset = p.replace(Market1501Path.LIST_PREFIX, '')
+                    subset = osp.splitext(subset)[0]
+                    break
         super().__init__(subset=subset)
 
         self._path = path
         self._items = list(self._load_items(path).values())
 
-    def _load_items(self, path):
+    def _load_items(self, rootdir):
         items = {}
 
-        paths = glob(osp.join(path, Market1501Path.QUERY_DIR, '*'))
-        paths += glob(osp.join(path, Market1501Path.BBOX_DIR + self._subset, '*'))
-
-        anno_file = osp.join(path,
-            Market1501Path.IMAGE_NAMES + self._subset + '.txt')
-        if len(paths) == 0 and osp.isfile(anno_file):
+        paths = []
+        anno_file = osp.join(rootdir,
+            Market1501Path.LIST_PREFIX + self._subset + '.txt')
+        if osp.isfile(anno_file):
             with open(anno_file, encoding='utf-8') as f:
                 for line in f:
-                    paths.append(line.strip())
+                    paths.append(osp.join(rootdir, line.strip()))
+        else:
+            paths = list(chain(
+                find_images(osp.join(rootdir,
+                        Market1501Path.QUERY_DIR),
+                    recursive=True),
+                find_images(osp.join(rootdir,
+                        Market1501Path.BBOX_DIR + self._subset),
+                    recursive=True),
+            ))
 
         for image_path in paths:
-            if osp.splitext(image_path)[-1] != Market1501Path.IMAGE_EXT:
-                continue
-
-            item_id = osp.splitext(osp.basename(image_path))[0]
-            pid, camid = -1, -1
-            search = Market1501Path.PATTERN.search(image_path)
+            item_id = osp.splitext(osp.normpath(image_path))[0]
+            if osp.isabs(image_path):
+                item_id = osp.relpath(item_id, rootdir)
+            subdir, item_id = item_id.split(os.sep, maxsplit=1)
+
+            pid = Market1501Path.UNKNOWN_ID
+            camid = Market1501Path.UNKNOWN_ID
+            search = Market1501Path.PATTERN.search(osp.basename(item_id))
             if search:
-                pid, camid = map(int, search.groups())
-                if 19 < len(item_id):
-                    item_id = item_id[19:]
-            items[item_id] = DatasetItem(id=item_id, subset=self._subset,
-                image=image_path)
-
-            if pid == -1:
-                continue
-
-            attributes = items[item_id].attributes
-            camid -= 1
+                pid, camid = map(int, search.groups()[0:2])
+                camid -= 1 # make ids 0-based
+                custom_name = search.groups()[2]
+                if custom_name:
+                    item_id = osp.join(osp.dirname(item_id), custom_name)
+
+            item = items.get(item_id)
+            if item is None:
+                item = DatasetItem(id=item_id, subset=self._subset,
+                    image=image_path)
+                items[item_id] = item
+
+            attributes = item.attributes
+            attributes['query'] = subdir == Market1501Path.QUERY_DIR
             attributes['person_id'] = pid
             attributes['camera_id'] = camid
-            if osp.basename(osp.dirname(image_path)) == Market1501Path.QUERY_DIR:
-                attributes['query'] = True
-            else:
-                attributes['query'] = False
         return items
 
 class Market1501Importer(Importer):
@@ -86,20 +102,23 @@ def find_sources(cls, path):
         return [{ 'url': path, 'format': 'market1501' }]
 
 class Market1501Converter(Converter):
-    DEFAULT_IMAGE_EXT = '.jpg'
+    DEFAULT_IMAGE_EXT = Market1501Path.IMAGE_EXT
 
     def apply(self):
         for subset_name, subset in self._extractor.subsets().items():
             annotation = ''
+
             for item in subset:
                 image_name = item.id
                 if Market1501Path.PATTERN.search(image_name) == None:
                     if 'person_id' in item.attributes and \
                             'camera_id' in item.attributes:
                         image_pattern = '{:04d}_c{}s1_000000_00{}'
-                        pid = int(item.attributes.get('person_id'))
-                        camid = int(item.attributes.get('camera_id')) + 1
-                        image_name = image_pattern.format(pid, camid, item.id)
+                        pid = int(item.attributes['person_id'])
+                        camid = int(item.attributes['camera_id']) + 1
+                        dirname, basename = osp.split(item.id)
+                        image_name = osp.join(dirname,
+                            image_pattern.format(pid, camid, basename))
 
                 dirname = Market1501Path.BBOX_DIR + subset_name
                 if 'query' in item.attributes:
@@ -108,15 +127,15 @@ def apply(self):
                         query = strtobool(query)
                     if query:
                         dirname = Market1501Path.QUERY_DIR
-                image_path = osp.join(self._save_dir, dirname,
-                    image_name + Market1501Path.IMAGE_EXT)
-                if item.has_image and self._save_images:
-                    self._save_image(item, image_path)
-                else:
-                    annotation += '%s\n' % image_path
-
-            if 0 < len(annotation):
-                annotation_file = osp.join(self._save_dir,
-                    Market1501Path.IMAGE_NAMES + subset_name + '.txt')
-                with open(annotation_file, 'w') as f:
-                    f.write(annotation)
+
+                image_path = self._make_image_filename(item,
+                    name=image_name, subdir=dirname)
+                if self._save_images and item.has_image:
+                    self._save_image(item, osp.join(self._save_dir, image_path))
+
+                annotation += '%s\n' % image_path
+
+            annotation_file = osp.join(self._save_dir,
+                Market1501Path.LIST_PREFIX + subset_name + '.txt')
+            with open(annotation_file, 'w') as f:
+                f.write(annotation)
diff --git a/datumaro/plugins/vgg_face2_format.py b/datumaro/plugins/vgg_face2_format.py
@@ -9,6 +9,7 @@
 from datumaro.components.converter import Converter
 from datumaro.components.extractor import (AnnotationType, Bbox, DatasetItem,
     Importer, Label, LabelCategories, Points, SourceExtractor)
+from datumaro.util.image import find_images
 
 
 class VggFace2Path:
@@ -20,15 +21,16 @@ class VggFace2Path:
     IMAGES_DIR_NO_LABEL = 'no_label'
 
 class VggFace2Extractor(SourceExtractor):
-    def __init__(self, path):
+    def __init__(self, path, subset=None):
         if not osp.isfile(path):
             raise Exception("Can't read .csv annotation file '%s'" % path)
         self._path = path
         self._dataset_dir = osp.dirname(osp.dirname(path))
 
-        subset = osp.splitext(osp.basename(path))[0]
-        if subset.startswith(VggFace2Path.LANDMARKS_FILE):
-            subset = subset.split('_')[2]
+        if not subset:
+            subset = osp.splitext(osp.basename(path))[0]
+            if subset.startswith(VggFace2Path.LANDMARKS_FILE):
+                subset = subset.split('_')[2]
         super().__init__(subset=subset)
 
         self._categories = self._load_categories()
@@ -68,6 +70,13 @@ def _split_item_path(path):
 
         items = {}
 
+        image_dir = osp.join(self._dataset_dir, self._subset)
+        if osp.isdir(image_dir):
+            images = { osp.splitext(osp.relpath(p, image_dir))[0]: p
+                for p in find_images(image_dir, recursive=True) }
+        else:
+            images = {}
+
         with open(path, encoding='utf-8') as content:
             landmarks_table = list(csv.DictReader(content))
         for row in landmarks_table:
@@ -77,10 +86,8 @@ def _split_item_path(path):
                 item_id, label = _split_item_path(item_id)
 
             if item_id not in items:
-                image_path = osp.join(self._dataset_dir, self._subset,
-                    row['NAME_ID'] + VggFace2Path.IMAGE_EXT)
                 items[item_id] = DatasetItem(id=item_id, subset=self._subset,
-                    image=image_path)
+                    image=images.get(row['NAME_ID']))
 
             annotations = items[item_id].annotations
             if [a for a in annotations if a.type == AnnotationType.points]:
@@ -105,10 +112,8 @@ def _split_item_path(path):
                     item_id, label = _split_item_path(item_id)
 
                 if item_id not in items:
-                    image_path = osp.join(self._dataset_dir, self._subset,
-                        row['NAME_ID'] + VggFace2Path.IMAGE_EXT)
                     items[item_id] = DatasetItem(id=item_id, subset=self._subset,
-                        image=image_path)
+                        image=images.get(row['NAME_ID']))
 
                 annotations = items[item_id].annotations
                 if [a for a in annotations if a.type == AnnotationType.bbox]:

diff --git a/datumaro/plugins/voc_format/extractor.py b/datumaro/plugins/voc_format/extractor.py
@@ -12,8 +12,8 @@
 from datumaro.components.extractor import (SourceExtractor, DatasetItem,
     AnnotationType, Label, Mask, Bbox, CompiledMask
 )
-from datumaro.util import dir_items
-from datumaro.util.image import Image
+from datumaro.util.os_util import dir_items
+from datumaro.util.image import Image, find_images
 from datumaro.util.mask_tools import lazy_mask, invert_colormap
 
 from .format import (
@@ -82,13 +82,19 @@ def __init__(self, path):
 
     def __iter__(self):
         raw_anns = self._load_annotations()
+
+        image_dir = osp.join(self._dataset_dir, VocPath.IMAGES_DIR)
+        if osp.isdir(image_dir):
+            images = { osp.splitext(osp.relpath(p, image_dir))[0]: p
+                for p in find_images(image_dir, recursive=True) }
+        else:
+            images = {}
+
         for item_id in self._items:
             log.debug("Reading item '%s'" % item_id)
-            image = osp.join(self._dataset_dir, VocPath.IMAGES_DIR,
-                item_id + VocPath.IMAGE_EXT)
             anns = self._parse_annotations(raw_anns, item_id)
             yield DatasetItem(id=item_id, subset=self._subset,
-                image=image, annotations=anns)
+                image=images.get(item_id), annotations=anns)
 
     def _load_annotations(self):
         annotations = defaultdict(list)
@@ -251,13 +257,18 @@ def __init__(self, path):
         super().__init__(path, task=VocTask.segmentation)
 
     def __iter__(self):
+        image_dir = osp.join(self._dataset_dir, VocPath.IMAGES_DIR)
+        if osp.isdir(image_dir):
+            images = { osp.splitext(osp.relpath(p, image_dir))[0]: p
+                for p in find_images(image_dir, recursive=True) }
+        else:
+            images = {}
+
         for item_id in self._items:
             log.debug("Reading item '%s'" % item_id)
-            image = osp.join(self._dataset_dir, VocPath.IMAGES_DIR,
-                item_id + VocPath.IMAGE_EXT)
             anns = self._load_annotations(item_id)
             yield DatasetItem(id=item_id, subset=self._subset,
-                image=image, annotations=anns)
+                image=images.get(item_id), annotations=anns)
 
     @staticmethod
     def _lazy_extract_mask(mask, c):

diff --git a/datumaro/plugins/widerface_format.py b/datumaro/plugins/widerface_format.py
@@ -23,15 +23,16 @@ class WiderFacePath:
         'occluded', 'pose', 'invalid']
 
 class WiderFaceExtractor(SourceExtractor):
-    def __init__(self, path):
+    def __init__(self, path, subset=None):
         if not osp.isfile(path):
             raise Exception("Can't read annotation file '%s'" % path)
         self._path = path
         self._dataset_dir = osp.dirname(osp.dirname(path))
 
-        subset = osp.splitext(osp.basename(path))[0]
-        if re.fullmatch(r'wider_face_\S+_bbx_gt', subset):
-            subset = subset.split('_')[2]
+        if not subset:
+            subset = osp.splitext(osp.basename(path))[0]
+            if re.fullmatch(r'wider_face_\S+_bbx_gt', subset):
+                subset = subset.split('_')[2]
         super().__init__(subset=subset)
 
         self._categories = self._load_categories()
@@ -65,15 +66,18 @@ def _load_items(self, path):
         with open(path, 'r', encoding='utf-8') as f:
             lines = f.readlines()
 
-        image_ids = [image_id for image_id, line in enumerate(lines)
-            if WiderFacePath.IMAGE_EXT in line]
+        line_ids = [line_idx for line_idx, line in enumerate(lines)
+            if ('/' in line or '\\' in line) and '.' in line] \
+            # a heuristic for paths
+
+        for line_idx in line_ids:
+            image_path = lines[line_idx].strip()
+            item_id = osp.splitext(image_path)[0]
 
-        for image_id in image_ids:
-            image = lines[image_id]
             image_path = osp.join(self._dataset_dir,
                 WiderFacePath.SUBSET_DIR + self._subset,
-                WiderFacePath.IMAGES_DIR, image[:-1])
-            item_id = image[:-(len(WiderFacePath.IMAGE_EXT) + 1)]
+                WiderFacePath.IMAGES_DIR, image_path)
+
             annotations = []
             if '/' in item_id:
                 label_name = item_id.split('/')[0]
@@ -85,8 +89,15 @@ def _load_items(self, path):
                     annotations.append(Label(label=label))
                 item_id = item_id[len(item_id.split('/')[0]) + 1:]
 
-            bbox_count = lines[image_id + 1]
-            bbox_lines = lines[image_id + 2 : image_id + int(bbox_count) + 2]
+            items[item_id] = DatasetItem(id=item_id, subset=self._subset,
+                image=image_path, annotations=annotations)
+
+            try:
+                bbox_count = int(lines[line_idx + 1]) # can be the next image
+            except ValueError:
+                continue
+
+            bbox_lines = lines[line_idx + 2 : line_idx + bbox_count + 2]
             for bbox in bbox_lines:
                 bbox_list = bbox.split()
                 if 4 <= len(bbox_list):
@@ -111,8 +122,6 @@ def _load_items(self, path):
                         attributes=attributes, label=label
                     ))
 
-            items[item_id] = DatasetItem(id=item_id, subset=self._subset,
-                image=image_path, annotations=annotations)
         return items
 
 class WiderFaceImporter(Importer):