From 032b0c151083f08e87bade0692bee946fc0a2663 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Wed, 31 Mar 2021 13:04:07 +0300 Subject: [PATCH] Format fixes in COCO and VOC (#195) * Allow splitting and merging of image directories in COCO export * Avoid producing conflicting attributes in VOC segmentation * update changelog --- CHANGELOG.md | 1 + datumaro/plugins/coco_format/converter.py | 26 +++++++++++------ datumaro/plugins/voc_format/extractor.py | 22 ++++++-------- tests/test_coco_format.py | 35 ++++++++++++++++++++--- 4 files changed, 58 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f2b5feadd6c..f732be7d7489 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Added an option to allow undeclared annotation attributes in CVAT format export () +- COCO exports images in separate dirs by subsets. Added an option to control this () ### Deprecated - diff --git a/datumaro/plugins/coco_format/converter.py b/datumaro/plugins/coco_format/converter.py index b1ae77f90b22..0caf89de3b35 100644 --- a/datumaro/plugins/coco_format/converter.py +++ b/datumaro/plugins/coco_format/converter.py @@ -478,9 +478,12 @@ def build_cmdline_parser(cls, **kwargs): parser.add_argument('--allow-attributes', type=str_to_bool, default=True, help="Allow export of attributes (default: %(default)s)") - parser.add_argument('--reindex', action='store_true', - help="Assign new indices to images and annotations " - "(default: %(default)s)") + parser.add_argument('--reindex', type=str_to_bool, default=False, + help="Assign new indices to images and annotations, " + "useful to avoid merge conflicts (default: %(default)s)") + parser.add_argument('--merge-images', type=str_to_bool, default=False, + help="Save all images into a single " + "directory (default: %(default)s)") parser.add_argument('--tasks', type=cls._split_tasks_string, help="COCO task filter, comma-separated list of {%s} " "(default: all)" % ', '.join(t.name for t in CocoTask)) @@ -498,7 +501,8 @@ def build_cmdline_parser(cls, **kwargs): def __init__(self, extractor, save_dir, tasks=None, segmentation_mode=None, crop_covered=False, - allow_attributes=True, reindex=False, **kwargs): + allow_attributes=True, reindex=False, merge_images=False, + **kwargs): super().__init__(extractor, save_dir, **kwargs) assert tasks is None or isinstance(tasks, (CocoTask, list, str)) @@ -526,6 +530,7 @@ def __init__(self, extractor, save_dir, self._crop_covered = crop_covered self._allow_attributes = allow_attributes self._reindex = reindex + self._merge_images = merge_images self._image_ids = {} @@ -556,10 +561,6 @@ def _get_image_id(self, item): self._image_ids[item.id] = image_id return image_id - def _save_image(self, item, path=None): - super()._save_image(item, - osp.join(self._images_dir, self._make_image_filename(item))) - def apply(self): self._make_dirs() @@ -571,7 +572,8 @@ def apply(self): for item in subset: if self._save_images: if item.has_image: - self._save_image(item) + self._save_image(item, subdir=osp.join(self._images_dir, + '' if self._merge_images else subset_name)) else: log.debug("Item '%s' has no image info", item.id) for task_conv in task_converters.values(): @@ -605,6 +607,12 @@ def patch(cls, dataset, patch, save_dir, **kwargs): if osp.isfile(image_path): os.unlink(image_path) + image_path = osp.join(images_dir, subset, + conv._make_image_filename(item)) + if osp.isfile(image_path): + os.unlink(image_path) + + class CocoInstancesConverter(CocoConverter): def __init__(self, *args, **kwargs): kwargs['tasks'] = CocoTask.instances diff --git a/datumaro/plugins/voc_format/extractor.py b/datumaro/plugins/voc_format/extractor.py index 993b825350f9..9df7cc066d16 100644 --- a/datumaro/plugins/voc_format/extractor.py +++ b/datumaro/plugins/voc_format/extractor.py @@ -302,22 +302,18 @@ def _load_annotations(self, item_id): for i in range(compiled_mask.instance_count)} for instance_id, label_id in instance_labels.items(): + if len(label_cat) <= label_id: + raise Exception( + "Item %s: a mask has unexpected class number %s" % + (item_id, label_id)) + image = compiled_mask.lazy_extract(instance_id) - attributes = {} - if label_id is not None: - actions = {a: False - for a in label_cat.items[label_id].attributes - } - attributes.update(actions) - - item_annotations.append(Mask( - image=image, label=label_id, - attributes=attributes, group=instance_id - )) + item_annotations.append(Mask(image=image, label=label_id, + group=instance_id)) elif class_mask is not None: - log.warn("item '%s': has only class segmentation, " - "instance masks will not be available" % item_id) + log.warning("Item %s: only class segmentations available" % item_id) + class_mask = class_mask() classes = np.unique(class_mask) for label_id in classes: diff --git a/tests/test_coco_format.py b/tests/test_coco_format.py index b884009b2266..ae24b4d88e3e 100644 --- a/tests/test_coco_format.py +++ b/tests/test_coco_format.py @@ -630,6 +630,33 @@ def test_reindex(self): partial(CocoConverter.convert, reindex=True), test_dir, target_dataset=target_dataset) + def test_can_save_images_in_single_dir(self): + dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.ones((2, 4, 3)), + attributes={'id': 1}), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(dataset, + partial(CocoImageInfoConverter.convert, save_images=True, + merge_images=True), + test_dir, require_images=True) + self.assertTrue(osp.isfile(osp.join(test_dir, 'images', '1.jpg'))) + + def test_can_save_images_in_separate_dirs(self): + dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.ones((2, 4, 3)), + attributes={'id': 1}), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(dataset, + partial(CocoImageInfoConverter.convert, save_images=True, + merge_images=False), + test_dir, require_images=True) + self.assertTrue(osp.isfile(osp.join( + test_dir, 'images', 'train', '1.jpg'))) + def test_inplace_save_writes_only_updated_data(self): with TestDir() as path: # generate initial dataset @@ -642,8 +669,8 @@ def test_inplace_save_writes_only_updated_data(self): os.unlink(osp.join(path, 'annotations', 'image_info_a.json')) os.unlink(osp.join(path, 'annotations', 'image_info_b.json')) os.unlink(osp.join(path, 'annotations', 'image_info_c.json')) - self.assertFalse(osp.isfile(osp.join(path, 'images', '2.jpg'))) - self.assertTrue(osp.isfile(osp.join(path, 'images', '3.jpg'))) + self.assertFalse(osp.isfile(osp.join(path, 'images', 'b', '2.jpg'))) + self.assertTrue(osp.isfile(osp.join(path, 'images', 'c', '3.jpg'))) dataset.put(DatasetItem(2, subset='a', image=np.ones((3, 2, 3)))) dataset.remove(3, 'c') @@ -655,5 +682,5 @@ def test_inplace_save_writes_only_updated_data(self): path, 'annotations', 'image_info_b.json'))) self.assertFalse(osp.isfile(osp.join( path, 'annotations', 'image_info_c.json'))) - self.assertTrue(osp.isfile(osp.join(path, 'images', '2.jpg'))) - self.assertFalse(osp.isfile(osp.join(path, 'images', '3.jpg'))) \ No newline at end of file + self.assertTrue(osp.isfile(osp.join(path, 'images', 'a', '2.jpg'))) + self.assertFalse(osp.isfile(osp.join(path, 'images', 'c', '3.jpg'))) \ No newline at end of file