Skip to content

Commit

Permalink
Dataset meta file for some formats (#575)
Browse files Browse the repository at this point in the history
* add meta file to VGGFace2 and OpenImages

* add meta file to MOT and MOTS.

* add meta file to CamVid and LabelMe

* add meta file to Mapillary Vistas

* add meta file to KITTI Raw

* sort imports

* add meta file to COCO

* remove unused import

* fixes

* update documentation

* fix COCO documentation

* update Changelog

* fix MOT

* fix COCO

* fixes

* similar fixes for other formats

* fix test for MOT
  • Loading branch information
yasakova-anastasia authored Dec 9, 2021
1 parent 253111f commit 8e1203a
Show file tree
Hide file tree
Showing 43 changed files with 550 additions and 60 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/openvinotoolkit/datumaro/pull/571>)
- Support for Accuracy Checker dataset meta files in formats
(<https://github.com/openvinotoolkit/datumaro/pull/553>,
<https://github.com/openvinotoolkit/datumaro/pull/569>)
<https://github.com/openvinotoolkit/datumaro/pull/569>,
<https://github.com/openvinotoolkit/datumaro/pull/575>)
- Import for VoTT dataset format
(<https://github.com/openvinotoolkit/datumaro/pull/573>)

Expand Down
4 changes: 2 additions & 2 deletions datumaro/plugins/ade20k2017_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def __init__(self, path):
self._categories = {}

if has_meta_file(self._path):
self._categories = { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(self._path).keys())) }
self._categories = { AnnotationType.label: LabelCategories.
from_iterable(parse_meta_file(self._path).keys()) }

for subset in self._subsets:
self._load_items(subset)
Expand Down
4 changes: 2 additions & 2 deletions datumaro/plugins/ade20k2020_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def __init__(self, path):
self._categories = {}

if has_meta_file(self._path):
self._categories = { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(self._path).keys())) }
self._categories = { AnnotationType.label: LabelCategories.
from_iterable(parse_meta_file(self._path).keys()) }

for subset in self._subsets:
self._load_items(subset)
Expand Down
4 changes: 2 additions & 2 deletions datumaro/plugins/align_celeba_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def __init__(self, path):

self._categories = { AnnotationType.label: LabelCategories() }
if has_meta_file(path):
self._categories = { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(path).keys())) }
self._categories = { AnnotationType.label: LabelCategories.
from_iterable(parse_meta_file(path).keys()) }

self._items = list(self._load_items(path).values())

Expand Down
32 changes: 22 additions & 10 deletions datumaro/plugins/camvid_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
from datumaro.util.annotation_util import make_label_id_mapping
from datumaro.util.image import save_image
from datumaro.util.mask_tools import generate_colormap, lazy_mask, paint_mask
from datumaro.util.meta_file_util import (
has_meta_file, is_meta_file, parse_meta_file,
)

CamvidLabelMap = OrderedDict([
('Void', (0, 0, 0)),
Expand Down Expand Up @@ -170,11 +173,14 @@ def __init__(self, path, subset=None):

def _load_categories(self, path):
label_map = None
label_map_path = osp.join(path, CamvidPath.LABELMAP_FILE)
if osp.isfile(label_map_path):
label_map = parse_label_map(label_map_path)
if has_meta_file(path):
label_map = parse_meta_file(path)
else:
label_map = CamvidLabelMap
label_map_path = osp.join(path, CamvidPath.LABELMAP_FILE)
if osp.isfile(label_map_path):
label_map = parse_label_map(label_map_path)
else:
label_map = CamvidLabelMap
self._labels = [label for label in label_map]
return make_camvid_categories(label_map)

Expand Down Expand Up @@ -335,11 +341,14 @@ def save_segm_lists(self, subset_name, segm_list):
f.write('%s %s\n' % (image_path, mask_path))

def save_label_map(self):
path = osp.join(self._save_dir, CamvidPath.LABELMAP_FILE)
labels = self._extractor.categories()[AnnotationType.label]
if len(self._label_map) > len(labels):
self._label_map.pop('background')
write_label_map(path, self._label_map)
if self._save_dataset_meta:
self._save_meta_file(self._save_dir)
else:
path = osp.join(self._save_dir, CamvidPath.LABELMAP_FILE)
labels = self._extractor.categories()[AnnotationType.label]
if len(self._label_map) > len(labels):
self._label_map.pop('background')
write_label_map(path, self._label_map)

def _load_categories(self, label_map_source):
if label_map_source == LabelmapType.camvid.name:
Expand Down Expand Up @@ -370,7 +379,10 @@ def _load_categories(self, label_map_source):
sorted(label_map_source.items(), key=lambda e: e[0]))

elif isinstance(label_map_source, str) and osp.isfile(label_map_source):
label_map = parse_label_map(label_map_source)
if is_meta_file(label_map_source):
label_map = parse_meta_file(label_map_source)
else:
label_map = parse_label_map(label_map_source)

else:
raise Exception("Wrong labelmap specified, "
Expand Down
4 changes: 2 additions & 2 deletions datumaro/plugins/celeba_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def __init__(self, path):

self._categories = { AnnotationType.label: LabelCategories() }
if has_meta_file(path):
self._categories = { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(path).keys())) }
self._categories = { AnnotationType.label: LabelCategories.
from_iterable(parse_meta_file(path).keys()) }

self._items = list(self._load_items(path).values())

Expand Down
4 changes: 2 additions & 2 deletions datumaro/plugins/cifar_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ def __init__(self, path, subset=None):

def _load_categories(self, path):
if has_meta_file(path):
return { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(path).keys())) }
return { AnnotationType.label: LabelCategories.
from_iterable(parse_meta_file(path).keys()) }

label_cat = LabelCategories()

Expand Down
3 changes: 3 additions & 0 deletions datumaro/plugins/coco_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,9 @@ def _get_image_id(self, item):
def apply(self):
self._make_dirs()

if self._save_dataset_meta:
self._save_meta_file(self._save_dir)

for subset_name, subset in self._extractor.subsets().items():
task_converters = self._make_task_converters()
for task_conv in task_converters.values():
Expand Down
9 changes: 9 additions & 0 deletions datumaro/plugins/coco_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from datumaro.components.media import Image
from datumaro.util.image import lazy_image, load_image
from datumaro.util.mask_tools import bgr2index
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file
from datumaro.util.os_util import suppress_output

from .format import CocoPath, CocoTask
Expand Down Expand Up @@ -49,6 +50,7 @@ def __init__(self, path, task, *,
images_dir = osp.join(images_dir, subset or DEFAULT_SUBSET_NAME)
self._images_dir = images_dir
self._task = task
self._rootpath = rootpath

self._merge_instance_polygons = merge_instance_polygons

Expand Down Expand Up @@ -103,6 +105,13 @@ def _load_categories(self, loader, *, keep_original_ids):


def _load_label_categories(self, raw_cats, *, keep_original_ids):
if has_meta_file(self._rootpath):
labels = parse_meta_file(self._rootpath).keys()
self._categories = { AnnotationType.label: LabelCategories.
from_iterable(labels) }
self._label_map = { (i + 1): label for i, label in enumerate(labels) }
return

categories = LabelCategories()
label_map = {}

Expand Down
4 changes: 2 additions & 2 deletions datumaro/plugins/imagenet_txt_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def __init__(self, path: str, *,
self._generate_labels = True
elif labels_source == _LabelsSource.file:
if has_meta_file(root_dir):
labels = list(parse_meta_file(root_dir).keys())
labels = parse_meta_file(root_dir).keys()
else:
labels = self._parse_labels(
osp.join(root_dir, labels_file))
Expand All @@ -92,7 +92,7 @@ def _parse_labels(path):
return [s.strip() for s in labels_file]

def _load_categories(self, labels):
return { AnnotationType.label: LabelCategories().from_iterable(labels) }
return { AnnotationType.label: LabelCategories.from_iterable(labels) }

def _load_items(self, path):
items = {}
Expand Down
4 changes: 2 additions & 2 deletions datumaro/plugins/kitti_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def _load_categories(self, path):
return self._load_categories_segmentation(path)
elif self._task == KittiTask.detection:
if has_meta_file(path):
return { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(path).keys())) }
return { AnnotationType.label: LabelCategories.
from_iterable(parse_meta_file(path).keys()) }

return {AnnotationType.label: LabelCategories()}

Expand Down
3 changes: 3 additions & 0 deletions datumaro/plugins/kitti_raw_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,9 @@ def _write_item(self, item, index):
def apply(self):
os.makedirs(self._save_dir, exist_ok=True)

if self._save_dataset_meta:
self._save_meta_file(self._save_dir)

if 1 < len(self._extractor.subsets()):
log.warning("Kitti RAW format supports only a single "
"subset. Subset information will be ignored on export.")
Expand Down
14 changes: 10 additions & 4 deletions datumaro/plugins/kitti_raw_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from datumaro.components.format_detection import FormatDetectionContext
from datumaro.util import cast
from datumaro.util.image import find_images
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file

from .format import KittiRawPath, OcclusionStates, TruncationStates

Expand Down Expand Up @@ -136,11 +137,16 @@ def _parse(cls, path):

special_attrs = KittiRawPath.SPECIAL_ATTRS
common_attrs = ['occluded']
label_cat = LabelCategories(attributes=common_attrs)
for label, attrs in sorted(labels.items(), key=lambda e: e[0]):
label_cat.add(label, attributes=set(attrs) - special_attrs)

categories = {AnnotationType.label: label_cat}
if has_meta_file(path):
categories = { AnnotationType.label: LabelCategories.
from_iterable(parse_meta_file(path).keys()) }
else:
label_cat = LabelCategories(attributes=common_attrs)
for label, attrs in sorted(labels.items(), key=lambda e: e[0]):
label_cat.add(label, attributes=set(attrs) - special_attrs)

categories = {AnnotationType.label: label_cat}

items = {}
for idx, track in enumerate(tracks):
Expand Down
18 changes: 15 additions & 3 deletions datumaro/plugins/labelme_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from datumaro.util import cast, escape, unescape
from datumaro.util.image import save_image
from datumaro.util.mask_tools import find_mask_bbox, load_mask
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file
from datumaro.util.os_util import split_path


Expand Down Expand Up @@ -53,9 +54,15 @@ def __init__(self, path):
def _parse(self, dataset_root):
items = []
subsets = set()
categories = { AnnotationType.label:
LabelCategories(attributes={ 'occluded', 'username' })
}

if has_meta_file(dataset_root):
categories = { AnnotationType.label:
LabelCategories(attributes={ 'occluded', 'username' }).
from_iterable(parse_meta_file(dataset_root).keys()) }
else:
categories = { AnnotationType.label:
LabelCategories(attributes={ 'occluded', 'username' })
}

for xml_path in sorted(
glob(osp.join(dataset_root, '**', '*.xml'), recursive=True)):
Expand Down Expand Up @@ -323,6 +330,11 @@ class LabelMeConverter(Converter):
DEFAULT_IMAGE_EXT = LabelMePath.IMAGE_EXT

def apply(self):
os.makedirs(self._save_dir, exist_ok=True)

if self._save_dataset_meta:
self._save_meta_file(self._save_dir)

for subset_name, subset in self._extractor.subsets().items():
subset_dir = osp.join(self._save_dir, subset_name)
os.makedirs(subset_dir, exist_ok=True)
Expand Down
4 changes: 2 additions & 2 deletions datumaro/plugins/lfw_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def __init__(self, path, subset=None):

def _load_categories(self, path):
if has_meta_file(self._dataset_dir):
return { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(self._dataset_dir).keys())) }
return { AnnotationType.label: LabelCategories.
from_iterable(parse_meta_file(self._dataset_dir).keys()) }

label_cat = LabelCategories()
if osp.isfile(path):
Expand Down
7 changes: 6 additions & 1 deletion datumaro/plugins/mapillary_vistas_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from datumaro.components.media import Image
from datumaro.util.image import find_images, lazy_image, load_image
from datumaro.util.mask_tools import bgr2index
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file

from .format import (
MapillaryVistasLabelMaps, MapillaryVistasPath, MapillaryVistasTask,
Expand Down Expand Up @@ -56,7 +57,11 @@ def __init__(self, path, task,
self._task = task

if self._task == MapillaryVistasTask.instances:
self._categories = self._load_instances_categories()
if has_meta_file(path):
self._categories = make_mapillary_instance_categories(
parse_meta_file(path))
else:
self._categories = self._load_instances_categories()
self._items = self._load_instances_items()
else:
panoptic_config = self._load_panoptic_config(self._annotations_dir)
Expand Down
4 changes: 2 additions & 2 deletions datumaro/plugins/mnist_csv_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def __init__(self, path, subset=None):

def _load_categories(self):
if has_meta_file(self._dataset_dir):
return { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(self._dataset_dir).keys())) }
return { AnnotationType.label: LabelCategories.
from_iterable(parse_meta_file(self._dataset_dir).keys()) }

label_cat = LabelCategories()

Expand Down
4 changes: 2 additions & 2 deletions datumaro/plugins/mnist_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def __init__(self, path, subset=None):

def _load_categories(self):
if has_meta_file(self._dataset_dir):
return { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(self._dataset_dir).keys())) }
return { AnnotationType.label: LabelCategories.
from_iterable(parse_meta_file(self._dataset_dir).keys()) }

label_cat = LabelCategories()

Expand Down
16 changes: 11 additions & 5 deletions datumaro/plugins/mot_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from datumaro.components.media import Image
from datumaro.util import cast
from datumaro.util.image import find_images
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file

MotLabel = Enum('MotLabel', [
('pedestrian', 1),
Expand Down Expand Up @@ -87,6 +88,8 @@ def __init__(self, path, labels=None, occlusion_threshold=0, is_gt=None,
is_gt = True
self._is_gt = is_gt

if has_meta_file(seq_root):
labels = list(parse_meta_file(seq_root).keys())
if labels is None:
labels = osp.join(osp.dirname(path), MotPath.LABELS_FILE)
if not osp.isfile(labels):
Expand Down Expand Up @@ -264,8 +267,11 @@ def apply(self):
else:
log.debug("Item '%s' has no image", item.id)

labels_file = osp.join(anno_dir, MotPath.LABELS_FILE)
with open(labels_file, 'w', encoding='utf-8') as f:
f.write('\n'.join(l.name
for l in extractor.categories()[AnnotationType.label])
)
if self._save_dataset_meta:
self._save_meta_file(self._save_dir)
else:
labels_file = osp.join(anno_dir, MotPath.LABELS_FILE)
with open(labels_file, 'w', encoding='utf-8') as f:
f.write('\n'.join(l.name
for l in extractor.categories()[AnnotationType.label])
)
14 changes: 12 additions & 2 deletions datumaro/plugins/mots_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.util.image import find_images, load_image, save_image
from datumaro.util.mask_tools import merge_masks
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class MotsPath:
Expand Down Expand Up @@ -44,8 +45,12 @@ def __init__(self, path, subset=None):
super().__init__(subset=subset)
self._images_dir = osp.join(path, 'images')
self._anno_dir = osp.join(path, MotsPath.MASKS_DIR)
self._categories = self._parse_categories(
osp.join(self._anno_dir, MotsPath.LABELS_FILE))
if has_meta_file(path):
self._categories = { AnnotationType.label: LabelCategories.
from_iterable(parse_meta_file(path).keys()) }
else:
self._categories = self._parse_categories(
osp.join(self._anno_dir, MotsPath.LABELS_FILE))
self._items = self._parse_items()

def _parse_categories(self, path):
Expand Down Expand Up @@ -122,6 +127,11 @@ class MotsPngConverter(Converter):
DEFAULT_IMAGE_EXT = MotsPath.IMAGE_EXT

def apply(self):
os.makedirs(self._save_dir, exist_ok=True)

if self._save_dataset_meta:
self._save_meta_file(self._save_dir)

for subset_name, subset in self._extractor.subsets().items():
subset_dir = osp.join(self._save_dir, subset_name)
image_dir = osp.join(subset_dir, MotsPath.IMAGE_DIR)
Expand Down
Loading

0 comments on commit 8e1203a

Please sign in to comment.