From 42990906b9c47f3c98759e33f2c1134b879c1146 Mon Sep 17 00:00:00 2001 From: zhiltsov-max Date: Sun, 17 May 2020 07:58:29 +0300 Subject: [PATCH] [Datumaro] Fix coco import conflict with labels (#1548) --- CHANGELOG.md | 2 ++ datumaro/datumaro/components/extractor.py | 11 +++++----- .../datumaro/plugins/coco_format/converter.py | 6 +++--- .../datumaro/plugins/coco_format/extractor.py | 9 +++++--- .../datumaro/plugins/coco_format/format.py | 6 +++--- .../datumaro/plugins/coco_format/importer.py | 21 ++++++++++++++++++- .../plugins/datumaro_format/converter.py | 2 +- .../plugins/datumaro_format/extractor.py | 2 +- datumaro/tests/test_coco_format.py | 21 ++++--------------- datumaro/tests/test_datumaro_format.py | 2 +- 10 files changed, 47 insertions(+), 35 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 38c47f48e9b9..3181d404db1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Task/Job buttons has no "Open in new tab" option () - Delete point context menu option has no shortcut hint () - Fixed issue with unnecessary tag activation in cvat-canvas () +- Fixed full COCO dataset import error with conflicting labels in keypoints and detection (https://github.com/opencv/cvat/pull/1548) +- Fixed COCO keypoints skeleton parsing and saving (https://github.com/opencv/cvat/issues/1539) ### Security - diff --git a/datumaro/datumaro/components/extractor.py b/datumaro/datumaro/components/extractor.py index 37248d2a3c66..461fdd4b9ec0 100644 --- a/datumaro/datumaro/components/extractor.py +++ b/datumaro/datumaro/components/extractor.py @@ -481,7 +481,7 @@ def iou(self, other): return compute_iou(self.get_bbox(), other.get_bbox()) class PointsCategories(Categories): - Category = namedtuple('Category', ['labels', 'adjacent']) + Category = namedtuple('Category', ['labels', 'joints']) def __init__(self, items=None, attributes=None): super().__init__(attributes=attributes) @@ -490,12 +490,13 @@ def __init__(self, items=None, attributes=None): items = {} self.items = items - def add(self, label_id, labels=None, adjacent=None): + def add(self, label_id, labels=None, joints=None): if labels is None: labels = [] - if adjacent is None: - adjacent = [] - self.items[label_id] = self.Category(labels, set(adjacent)) + if joints is None: + joints = [] + joints = set(map(tuple, joints)) + self.items[label_id] = self.Category(labels, joints) def __eq__(self, other): if not super().__eq__(other): diff --git a/datumaro/datumaro/plugins/coco_format/converter.py b/datumaro/datumaro/plugins/coco_format/converter.py index 1ba42e0e919c..9d1d7289ecb4 100644 --- a/datumaro/datumaro/plugins/coco_format/converter.py +++ b/datumaro/datumaro/plugins/coco_format/converter.py @@ -337,7 +337,7 @@ def save_categories(self, dataset): if kp_cat is not None: cat.update({ 'keypoints': [str(l) for l in kp_cat.labels], - 'skeleton': [int(i) for i in kp_cat.adjacent], + 'skeleton': [list(map(int, j)) for j in kp_cat.joints], }) self.categories.append(cat) @@ -464,8 +464,8 @@ def __init__(self, extractor, save_dir, self._save_images = save_images assert segmentation_mode is None or \ - segmentation_mode in SegmentationMode or \ - isinstance(segmentation_mode, str) + isinstance(segmentation_mode, str) or \ + segmentation_mode in SegmentationMode if segmentation_mode is None: segmentation_mode = SegmentationMode.guess if isinstance(segmentation_mode, str): diff --git a/datumaro/datumaro/plugins/coco_format/extractor.py b/datumaro/datumaro/plugins/coco_format/extractor.py index 250404c695ee..a4f52f814048 100644 --- a/datumaro/datumaro/plugins/coco_format/extractor.py +++ b/datumaro/datumaro/plugins/coco_format/extractor.py @@ -70,7 +70,9 @@ def _load_categories(self, loader): self._categories = {} if self._task in [CocoTask.instances, CocoTask.labels, - CocoTask.person_keypoints, CocoTask.stuff, CocoTask.panoptic]: + CocoTask.person_keypoints, + # TODO: Task.stuff, CocoTask.panoptic + ]: label_categories, label_map = self._load_label_categories(loader) self._categories[AnnotationType.label] = label_categories self._label_map = label_map @@ -101,7 +103,8 @@ def _load_person_kp_categories(self, loader): for cat in cats: label_id = self._label_map[cat['id']] categories.add(label_id=label_id, - labels=cat['keypoints'], adjacent=cat['skeleton']) + labels=cat['keypoints'], joints=cat['skeleton'] + ) return categories @@ -246,4 +249,4 @@ def __init__(self, path, **kwargs): class CocoLabelsExtractor(_CocoExtractor): def __init__(self, path, **kwargs): kwargs['task'] = CocoTask.labels - super().__init__(path, **kwargs) \ No newline at end of file + super().__init__(path, **kwargs) diff --git a/datumaro/datumaro/plugins/coco_format/format.py b/datumaro/datumaro/plugins/coco_format/format.py index 2a9cddc2c1be..6db04f0c8dcc 100644 --- a/datumaro/datumaro/plugins/coco_format/format.py +++ b/datumaro/datumaro/plugins/coco_format/format.py @@ -12,12 +12,12 @@ 'captions', 'labels', # extension, does not exist in the original COCO format 'image_info', - 'panoptic', - 'stuff', + # 'panoptic', + # 'stuff', ]) class CocoPath: IMAGES_DIR = 'images' ANNOTATIONS_DIR = 'annotations' - IMAGE_EXT = '.jpg' \ No newline at end of file + IMAGE_EXT = '.jpg' diff --git a/datumaro/datumaro/plugins/coco_format/importer.py b/datumaro/datumaro/plugins/coco_format/importer.py index 932944fa6a43..4c32064bf20b 100644 --- a/datumaro/datumaro/plugins/coco_format/importer.py +++ b/datumaro/datumaro/plugins/coco_format/importer.py @@ -37,8 +37,26 @@ def __call__(self, path, **extra_params): if len(subsets) == 0: raise Exception("Failed to find 'coco' dataset at '%s'" % path) + # TODO: should be removed when proper label merging is implemented + conflicting_types = {CocoTask.instances, + CocoTask.person_keypoints, CocoTask.labels} + ann_types = set(t for s in subsets.values() for t in s) \ + & conflicting_types + if 1 <= len(ann_types): + selected_ann_type = sorted(ann_types, key=lambda x: x.name)[0] + if 1 < len(ann_types): + log.warning("Not implemented: " + "Found potentially conflicting source types with labels: %s. " + "Only one type will be used: %s" \ + % (", ".join(t.name for t in ann_types), selected_ann_type.name)) + for ann_files in subsets.values(): for ann_type, ann_file in ann_files.items(): + if ann_type in conflicting_types: + if ann_type is not selected_ann_type: + log.warning("Not implemented: " + "conflicting source '%s' is skipped." % ann_file) + continue log.info("Found a dataset at '%s'" % ann_file) source_name = osp.splitext(osp.basename(ann_file))[0] @@ -71,6 +89,7 @@ def find_subsets(path): "type '%s', the only known are: %s" % \ (subset_path, ann_type, ', '.join([e.name for e in CocoTask]))) + continue subset_name = name_parts[1] subsets[subset_name][ann_type] = subset_path - return dict(subsets) \ No newline at end of file + return dict(subsets) diff --git a/datumaro/datumaro/plugins/datumaro_format/converter.py b/datumaro/datumaro/plugins/datumaro_format/converter.py index 4ad786f19f82..abf532a4225b 100644 --- a/datumaro/datumaro/plugins/datumaro_format/converter.py +++ b/datumaro/datumaro/plugins/datumaro_format/converter.py @@ -212,7 +212,7 @@ def _convert_points_categories(self, obj): converted['items'].append({ 'label_id': int(label_id), 'labels': [cast(label, str) for label in item.labels], - 'adjacent': [int(v) for v in item.adjacent], + 'joints': [list(map(int, j)) for j in item.joints], }) return converted diff --git a/datumaro/datumaro/plugins/datumaro_format/extractor.py b/datumaro/datumaro/plugins/datumaro_format/extractor.py index 4a19565ca4f6..954e280716a1 100644 --- a/datumaro/datumaro/plugins/datumaro_format/extractor.py +++ b/datumaro/datumaro/plugins/datumaro_format/extractor.py @@ -70,7 +70,7 @@ def _load_categories(parsed): point_categories = PointsCategories() for item in parsed_points_cat['items']: point_categories.add(int(item['label_id']), - item['labels'], adjacent=item['adjacent']) + item['labels'], joints=item['joints']) categories[AnnotationType.points] = point_categories diff --git a/datumaro/tests/test_coco_format.py b/datumaro/tests/test_coco_format.py index d1727128d23a..19ae08053aae 100644 --- a/datumaro/tests/test_coco_format.py +++ b/datumaro/tests/test_coco_format.py @@ -535,7 +535,7 @@ def test_can_save_and_load_keypoints(self): points_categories = PointsCategories() for i in range(10): label_categories.add(str(i)) - points_categories.add(i, []) + points_categories.add(i, joints=[[0, 1], [1, 2]]) categories = { AnnotationType.label: label_categories, AnnotationType.points: points_categories, @@ -624,25 +624,12 @@ def test_can_save_dataset_with_no_subsets(self): class TestExtractor(Extractor): def __iter__(self): return iter([ - DatasetItem(id=1, annotations=[ - Label(2, id=1, group=1), - ]), - - DatasetItem(id=2, annotations=[ - Label(3, id=2, group=2), - ]), + DatasetItem(id=1), + DatasetItem(id=2), ]) def categories(self): - label_cat = LabelCategories() - point_cat = PointsCategories() - for label in range(10): - label_cat.add('label_' + str(label)) - point_cat.add(label) - return { - AnnotationType.label: label_cat, - AnnotationType.points: point_cat, - } + return { AnnotationType.label: LabelCategories() } with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), diff --git a/datumaro/tests/test_datumaro_format.py b/datumaro/tests/test_datumaro_format.py index 4b71ddaed5fc..84146fc09f58 100644 --- a/datumaro/tests/test_datumaro_format.py +++ b/datumaro/tests/test_datumaro_format.py @@ -63,7 +63,7 @@ def categories(self): points_categories = PointsCategories() for index, _ in enumerate(label_categories.items): - points_categories.add(index, ['cat1', 'cat2'], adjacent=[0, 1]) + points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]]) return { AnnotationType.label: label_categories,