[Datumaro] Fix coco import conflict with labels (#1548)

cvat-ai · May 17, 2020 · 4299090 · 4299090
1 parent 0934d77
commit 4299090
Show file tree

Hide file tree

Showing 10 changed files with 47 additions and 35 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -40,6 +40,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Task/Job buttons has no "Open in new tab" option (<https://github.com/opencv/cvat/pull/1419>)
 - Delete point context menu option has no shortcut hint (<https://github.com/opencv/cvat/pull/1416>)
 - Fixed issue with unnecessary tag activation in cvat-canvas (<https://github.com/opencv/cvat/issues/1540>)
+- Fixed full COCO dataset import error with conflicting labels in keypoints and detection (https://github.com/opencv/cvat/pull/1548)
+- Fixed COCO keypoints skeleton parsing and saving (https://github.com/opencv/cvat/issues/1539)
 
 ### Security
 -

diff --git a/datumaro/datumaro/components/extractor.py b/datumaro/datumaro/components/extractor.py
@@ -481,7 +481,7 @@ def iou(self, other):
         return compute_iou(self.get_bbox(), other.get_bbox())
 
 class PointsCategories(Categories):
-    Category = namedtuple('Category', ['labels', 'adjacent'])
+    Category = namedtuple('Category', ['labels', 'joints'])
 
     def __init__(self, items=None, attributes=None):
         super().__init__(attributes=attributes)
@@ -490,12 +490,13 @@ def __init__(self, items=None, attributes=None):
             items = {}
         self.items = items
 
-    def add(self, label_id, labels=None, adjacent=None):
+    def add(self, label_id, labels=None, joints=None):
         if labels is None:
             labels = []
-        if adjacent is None:
-            adjacent = []
-        self.items[label_id] = self.Category(labels, set(adjacent))
+        if joints is None:
+            joints = []
+        joints = set(map(tuple, joints))
+        self.items[label_id] = self.Category(labels, joints)
 
     def __eq__(self, other):
         if not super().__eq__(other):

diff --git a/datumaro/datumaro/plugins/coco_format/converter.py b/datumaro/datumaro/plugins/coco_format/converter.py
@@ -337,7 +337,7 @@ def save_categories(self, dataset):
                 if kp_cat is not None:
                     cat.update({
                         'keypoints': [str(l) for l in kp_cat.labels],
-                        'skeleton': [int(i) for i in kp_cat.adjacent],
+                        'skeleton': [list(map(int, j)) for j in kp_cat.joints],
                     })
             self.categories.append(cat)
 
@@ -464,8 +464,8 @@ def __init__(self, extractor, save_dir,
         self._save_images = save_images
 
         assert segmentation_mode is None or \
-            segmentation_mode in SegmentationMode or \
-            isinstance(segmentation_mode, str)
+            isinstance(segmentation_mode, str) or \
+            segmentation_mode in SegmentationMode
         if segmentation_mode is None:
             segmentation_mode = SegmentationMode.guess
         if isinstance(segmentation_mode, str):

diff --git a/datumaro/datumaro/plugins/coco_format/extractor.py b/datumaro/datumaro/plugins/coco_format/extractor.py
@@ -70,7 +70,9 @@ def _load_categories(self, loader):
         self._categories = {}
 
         if self._task in [CocoTask.instances, CocoTask.labels,
-                CocoTask.person_keypoints, CocoTask.stuff, CocoTask.panoptic]:
+                CocoTask.person_keypoints,
+                # TODO: Task.stuff, CocoTask.panoptic
+                ]:
             label_categories, label_map = self._load_label_categories(loader)
             self._categories[AnnotationType.label] = label_categories
             self._label_map = label_map
@@ -101,7 +103,8 @@ def _load_person_kp_categories(self, loader):
         for cat in cats:
             label_id = self._label_map[cat['id']]
             categories.add(label_id=label_id,
-                labels=cat['keypoints'], adjacent=cat['skeleton'])
+                labels=cat['keypoints'], joints=cat['skeleton']
+            )
 
         return categories
 
@@ -246,4 +249,4 @@ def __init__(self, path, **kwargs):
 class CocoLabelsExtractor(_CocoExtractor):
     def __init__(self, path, **kwargs):
         kwargs['task'] = CocoTask.labels
-        super().__init__(path, **kwargs)
+        super().__init__(path, **kwargs)
diff --git a/datumaro/datumaro/plugins/coco_format/format.py b/datumaro/datumaro/plugins/coco_format/format.py
@@ -12,12 +12,12 @@
     'captions',
     'labels', # extension, does not exist in the original COCO format
     'image_info',
-    'panoptic',
-    'stuff',
+    # 'panoptic',
+    # 'stuff',
 ])
 
 class CocoPath:
     IMAGES_DIR = 'images'
     ANNOTATIONS_DIR = 'annotations'
 
-    IMAGE_EXT = '.jpg'
+    IMAGE_EXT = '.jpg'
diff --git a/datumaro/datumaro/plugins/coco_format/importer.py b/datumaro/datumaro/plugins/coco_format/importer.py
@@ -37,8 +37,26 @@ def __call__(self, path, **extra_params):
         if len(subsets) == 0:
             raise Exception("Failed to find 'coco' dataset at '%s'" % path)
 
+        # TODO: should be removed when proper label merging is implemented
+        conflicting_types = {CocoTask.instances,
+            CocoTask.person_keypoints, CocoTask.labels}
+        ann_types = set(t for s in subsets.values() for t in s) \
+            & conflicting_types
+        if 1 <= len(ann_types):
+            selected_ann_type = sorted(ann_types, key=lambda x: x.name)[0]
+        if 1 < len(ann_types):
+            log.warning("Not implemented: "
+                "Found potentially conflicting source types with labels: %s. "
+                "Only one type will be used: %s" \
+                % (", ".join(t.name for t in ann_types), selected_ann_type.name))
+
         for ann_files in subsets.values():
             for ann_type, ann_file in ann_files.items():
+                if ann_type in conflicting_types:
+                    if ann_type is not selected_ann_type:
+                        log.warning("Not implemented: "
+                            "conflicting source '%s' is skipped." % ann_file)
+                        continue
                 log.info("Found a dataset at '%s'" % ann_file)
 
                 source_name = osp.splitext(osp.basename(ann_file))[0]
@@ -71,6 +89,7 @@ def find_subsets(path):
                     "type '%s', the only known are: %s" % \
                     (subset_path, ann_type,
                         ', '.join([e.name for e in CocoTask])))
+                continue
             subset_name = name_parts[1]
             subsets[subset_name][ann_type] = subset_path
-        return dict(subsets)
+        return dict(subsets)
diff --git a/datumaro/datumaro/plugins/datumaro_format/converter.py b/datumaro/datumaro/plugins/datumaro_format/converter.py
@@ -212,7 +212,7 @@ def _convert_points_categories(self, obj):
             converted['items'].append({
                 'label_id': int(label_id),
                 'labels': [cast(label, str) for label in item.labels],
-                'adjacent': [int(v) for v in item.adjacent],
+                'joints': [list(map(int, j)) for j in item.joints],
             })
         return converted
 

diff --git a/datumaro/datumaro/plugins/datumaro_format/extractor.py b/datumaro/datumaro/plugins/datumaro_format/extractor.py
@@ -70,7 +70,7 @@ def _load_categories(parsed):
             point_categories = PointsCategories()
             for item in parsed_points_cat['items']:
                 point_categories.add(int(item['label_id']),
-                    item['labels'], adjacent=item['adjacent'])
+                    item['labels'], joints=item['joints'])
 
             categories[AnnotationType.points] = point_categories
 

diff --git a/datumaro/tests/test_coco_format.py b/datumaro/tests/test_coco_format.py
@@ -535,7 +535,7 @@ def test_can_save_and_load_keypoints(self):
         points_categories = PointsCategories()
         for i in range(10):
             label_categories.add(str(i))
-            points_categories.add(i, [])
+            points_categories.add(i, joints=[[0, 1], [1, 2]])
         categories = {
             AnnotationType.label: label_categories,
             AnnotationType.points: points_categories,
@@ -624,25 +624,12 @@ def test_can_save_dataset_with_no_subsets(self):
         class TestExtractor(Extractor):
             def __iter__(self):
                 return iter([
-                    DatasetItem(id=1, annotations=[
-                        Label(2, id=1, group=1),
-                    ]),
-
-                    DatasetItem(id=2, annotations=[
-                        Label(3, id=2, group=2),
-                    ]),
+                    DatasetItem(id=1),
+                    DatasetItem(id=2),
                 ])
 
             def categories(self):
-                label_cat = LabelCategories()
-                point_cat = PointsCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                    point_cat.add(label)
-                return {
-                    AnnotationType.label: label_cat,
-                    AnnotationType.points: point_cat,
-                }
+                return { AnnotationType.label: LabelCategories() }
 
         with TestDir() as test_dir:
             self._test_save_and_load(TestExtractor(),

diff --git a/datumaro/tests/test_datumaro_format.py b/datumaro/tests/test_datumaro_format.py
@@ -63,7 +63,7 @@ def categories(self):
 
             points_categories = PointsCategories()
             for index, _ in enumerate(label_categories.items):
-                points_categories.add(index, ['cat1', 'cat2'], adjacent=[0, 1])
+                points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])
 
             return {
                 AnnotationType.label: label_categories,