Skip to content

Commit

Permalink
Format fixes in COCO and VOC (cvat-ai#195)
Browse files Browse the repository at this point in the history
* Allow splitting and merging of image directories in COCO export

* Avoid producing conflicting attributes in VOC segmentation

* update changelog
  • Loading branch information
Maxim Zhiltsov authored Mar 31, 2021
1 parent 0f18908 commit 032b0c1
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 26 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changed
- Added an option to allow undeclared annotation attributes in CVAT format export (<https://github.com/openvinotoolkit/datumaro/pull/192>)
- COCO exports images in separate dirs by subsets. Added an option to control this (<https://github.com/openvinotoolkit/datumaro/pull/195>)

### Deprecated
-
Expand Down
26 changes: 17 additions & 9 deletions datumaro/plugins/coco_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,9 +478,12 @@ def build_cmdline_parser(cls, **kwargs):
parser.add_argument('--allow-attributes',
type=str_to_bool, default=True,
help="Allow export of attributes (default: %(default)s)")
parser.add_argument('--reindex', action='store_true',
help="Assign new indices to images and annotations "
"(default: %(default)s)")
parser.add_argument('--reindex', type=str_to_bool, default=False,
help="Assign new indices to images and annotations, "
"useful to avoid merge conflicts (default: %(default)s)")
parser.add_argument('--merge-images', type=str_to_bool, default=False,
help="Save all images into a single "
"directory (default: %(default)s)")
parser.add_argument('--tasks', type=cls._split_tasks_string,
help="COCO task filter, comma-separated list of {%s} "
"(default: all)" % ', '.join(t.name for t in CocoTask))
Expand All @@ -498,7 +501,8 @@ def build_cmdline_parser(cls, **kwargs):

def __init__(self, extractor, save_dir,
tasks=None, segmentation_mode=None, crop_covered=False,
allow_attributes=True, reindex=False, **kwargs):
allow_attributes=True, reindex=False, merge_images=False,
**kwargs):
super().__init__(extractor, save_dir, **kwargs)

assert tasks is None or isinstance(tasks, (CocoTask, list, str))
Expand Down Expand Up @@ -526,6 +530,7 @@ def __init__(self, extractor, save_dir,
self._crop_covered = crop_covered
self._allow_attributes = allow_attributes
self._reindex = reindex
self._merge_images = merge_images

self._image_ids = {}

Expand Down Expand Up @@ -556,10 +561,6 @@ def _get_image_id(self, item):
self._image_ids[item.id] = image_id
return image_id

def _save_image(self, item, path=None):
super()._save_image(item,
osp.join(self._images_dir, self._make_image_filename(item)))

def apply(self):
self._make_dirs()

Expand All @@ -571,7 +572,8 @@ def apply(self):
for item in subset:
if self._save_images:
if item.has_image:
self._save_image(item)
self._save_image(item, subdir=osp.join(self._images_dir,
'' if self._merge_images else subset_name))
else:
log.debug("Item '%s' has no image info", item.id)
for task_conv in task_converters.values():
Expand Down Expand Up @@ -605,6 +607,12 @@ def patch(cls, dataset, patch, save_dir, **kwargs):
if osp.isfile(image_path):
os.unlink(image_path)

image_path = osp.join(images_dir, subset,
conv._make_image_filename(item))
if osp.isfile(image_path):
os.unlink(image_path)


class CocoInstancesConverter(CocoConverter):
def __init__(self, *args, **kwargs):
kwargs['tasks'] = CocoTask.instances
Expand Down
22 changes: 9 additions & 13 deletions datumaro/plugins/voc_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,22 +302,18 @@ def _load_annotations(self, item_id):
for i in range(compiled_mask.instance_count)}

for instance_id, label_id in instance_labels.items():
if len(label_cat) <= label_id:
raise Exception(
"Item %s: a mask has unexpected class number %s" %
(item_id, label_id))

image = compiled_mask.lazy_extract(instance_id)

attributes = {}
if label_id is not None:
actions = {a: False
for a in label_cat.items[label_id].attributes
}
attributes.update(actions)

item_annotations.append(Mask(
image=image, label=label_id,
attributes=attributes, group=instance_id
))
item_annotations.append(Mask(image=image, label=label_id,
group=instance_id))
elif class_mask is not None:
log.warn("item '%s': has only class segmentation, "
"instance masks will not be available" % item_id)
log.warning("Item %s: only class segmentations available" % item_id)

class_mask = class_mask()
classes = np.unique(class_mask)
for label_id in classes:
Expand Down
35 changes: 31 additions & 4 deletions tests/test_coco_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,33 @@ def test_reindex(self):
partial(CocoConverter.convert, reindex=True),
test_dir, target_dataset=target_dataset)

def test_can_save_images_in_single_dir(self):
dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train', image=np.ones((2, 4, 3)),
attributes={'id': 1}),
])

with TestDir() as test_dir:
self._test_save_and_load(dataset,
partial(CocoImageInfoConverter.convert, save_images=True,
merge_images=True),
test_dir, require_images=True)
self.assertTrue(osp.isfile(osp.join(test_dir, 'images', '1.jpg')))

def test_can_save_images_in_separate_dirs(self):
dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train', image=np.ones((2, 4, 3)),
attributes={'id': 1}),
])

with TestDir() as test_dir:
self._test_save_and_load(dataset,
partial(CocoImageInfoConverter.convert, save_images=True,
merge_images=False),
test_dir, require_images=True)
self.assertTrue(osp.isfile(osp.join(
test_dir, 'images', 'train', '1.jpg')))

def test_inplace_save_writes_only_updated_data(self):
with TestDir() as path:
# generate initial dataset
Expand All @@ -642,8 +669,8 @@ def test_inplace_save_writes_only_updated_data(self):
os.unlink(osp.join(path, 'annotations', 'image_info_a.json'))
os.unlink(osp.join(path, 'annotations', 'image_info_b.json'))
os.unlink(osp.join(path, 'annotations', 'image_info_c.json'))
self.assertFalse(osp.isfile(osp.join(path, 'images', '2.jpg')))
self.assertTrue(osp.isfile(osp.join(path, 'images', '3.jpg')))
self.assertFalse(osp.isfile(osp.join(path, 'images', 'b', '2.jpg')))
self.assertTrue(osp.isfile(osp.join(path, 'images', 'c', '3.jpg')))

dataset.put(DatasetItem(2, subset='a', image=np.ones((3, 2, 3))))
dataset.remove(3, 'c')
Expand All @@ -655,5 +682,5 @@ def test_inplace_save_writes_only_updated_data(self):
path, 'annotations', 'image_info_b.json')))
self.assertFalse(osp.isfile(osp.join(
path, 'annotations', 'image_info_c.json')))
self.assertTrue(osp.isfile(osp.join(path, 'images', '2.jpg')))
self.assertFalse(osp.isfile(osp.join(path, 'images', '3.jpg')))
self.assertTrue(osp.isfile(osp.join(path, 'images', 'a', '2.jpg')))
self.assertFalse(osp.isfile(osp.join(path, 'images', 'c', '3.jpg')))

0 comments on commit 032b0c1

Please sign in to comment.