diff --git a/datumaro/datumaro/plugins/coco_format/converter.py b/datumaro/datumaro/plugins/coco_format/converter.py index 403a6a83eb6..b5766d046c2 100644 --- a/datumaro/datumaro/plugins/coco_format/converter.py +++ b/datumaro/datumaro/plugins/coco_format/converter.py @@ -17,7 +17,7 @@ AnnotationType, Points ) from datumaro.components.cli_plugin import CliPlugin -from datumaro.util import find +from datumaro.util import find, cast from datumaro.util.image import save_image import datumaro.util.mask_tools as mask_tools import datumaro.util.annotation_tools as anno_tools @@ -25,15 +25,6 @@ from .format import CocoTask, CocoPath -def _cast(value, type_conv, default=None): - if value is None: - return default - try: - return type_conv(value) - except Exception: - return default - - SegmentationMode = Enum('SegmentationMode', ['guess', 'polygons', 'mask']) class _TaskConverter: @@ -82,7 +73,7 @@ def save_image_info(self, item, filename): 'id': self._get_image_id(item), 'width': int(w), 'height': int(h), - 'file_name': _cast(filename, str, ''), + 'file_name': cast(filename, str, ''), 'license': 0, 'flickr_url': '', 'coco_url': '', @@ -162,8 +153,8 @@ def save_categories(self, dataset): for idx, cat in enumerate(label_categories.items): self.categories.append({ 'id': 1 + idx, - 'name': _cast(cat.name, str, ''), - 'supercategory': _cast(cat.parent, str, ''), + 'name': cast(cat.name, str, ''), + 'supercategory': cast(cat.parent, str, ''), }) @classmethod @@ -309,7 +300,7 @@ def convert_instance(self, instance, item): elem = { 'id': self._get_ann_id(ann), 'image_id': self._get_image_id(item), - 'category_id': _cast(ann.label, int, -1) + 1, + 'category_id': cast(ann.label, int, -1) + 1, 'segmentation': segmentation, 'area': float(area), 'bbox': list(map(float, bbox)), @@ -334,10 +325,11 @@ def save_categories(self, dataset): for idx, label_cat in enumerate(label_categories.items): cat = { 'id': 1 + idx, - 'name': _cast(label_cat.name, str, ''), - 'supercategory': _cast(label_cat.parent, str, ''), + 'name': cast(label_cat.name, str, ''), + 'supercategory': cast(label_cat.parent, str, ''), 'keypoints': [], 'skeleton': [], + } if point_categories is not None: @@ -416,8 +408,8 @@ def save_categories(self, dataset): for idx, cat in enumerate(label_categories.items): self.categories.append({ 'id': 1 + idx, - 'name': _cast(cat.name, str, ''), - 'supercategory': _cast(cat.parent, str, ''), + 'name': cast(cat.name, str, ''), + 'supercategory': cast(cat.parent, str, ''), }) def save_annotations(self, item): @@ -504,7 +496,7 @@ def _make_task_converters(self): def _get_image_id(self, item): image_id = self._image_ids.get(item.id) if image_id is None: - image_id = _cast(item.id, int, len(self._image_ids) + 1) + image_id = cast(item.id, int, len(self._image_ids) + 1) self._image_ids[item.id] = image_id return image_id diff --git a/datumaro/datumaro/plugins/cvat_format/converter.py b/datumaro/datumaro/plugins/cvat_format/converter.py index a64addad02b..1d364184477 100644 --- a/datumaro/datumaro/plugins/cvat_format/converter.py +++ b/datumaro/datumaro/plugins/cvat_format/converter.py @@ -12,19 +12,12 @@ from datumaro.components.cli_plugin import CliPlugin from datumaro.components.converter import Converter from datumaro.components.extractor import DEFAULT_SUBSET_NAME, AnnotationType +from datumaro.util import cast from datumaro.util.image import save_image from .format import CvatPath -def _cast(value, type_conv, default=None): - if value is None: - return default - try: - return type_conv(value) - except Exception: - return default - def pairwise(iterable): a = iter(iterable) return zip(a, a) @@ -188,7 +181,7 @@ def _save_image(self, item): def _write_item(self, item, index): image_info = OrderedDict([ - ("id", str(_cast(item.id, int, index))), + ("id", str(cast(item.id, int, index))), ]) if item.has_image: size = item.image.size diff --git a/datumaro/datumaro/plugins/datumaro_format/converter.py b/datumaro/datumaro/plugins/datumaro_format/converter.py index cc860cbad3d..08dc0062dd2 100644 --- a/datumaro/datumaro/plugins/datumaro_format/converter.py +++ b/datumaro/datumaro/plugins/datumaro_format/converter.py @@ -16,6 +16,7 @@ Label, Mask, RleMask, Points, Polygon, PolyLine, Bbox, Caption, LabelCategories, MaskCategories, PointsCategories ) +from datumaro.util import cast from datumaro.util.image import save_image import pycocotools.mask as mask_utils from datumaro.components.cli_plugin import CliPlugin @@ -23,14 +24,6 @@ from .format import DatumaroPath -def _cast(value, type_conv, default=None): - if value is None: - return default - try: - return type_conv(value) - except Exception: - return default - class _SubsetWriter: def __init__(self, name, context): self._name = name @@ -108,10 +101,10 @@ def _convert_annotation(self, obj): assert isinstance(obj, Annotation) ann_json = { - 'id': _cast(obj.id, int), - 'type': _cast(obj.type.name, str), + 'id': cast(obj.id, int), + 'type': cast(obj.type.name, str), 'attributes': obj.attributes, - 'group': _cast(obj.group, int, 0), + 'group': cast(obj.group, int, 0), } return ann_json @@ -119,7 +112,7 @@ def _convert_label_object(self, obj): converted = self._convert_annotation(obj) converted.update({ - 'label_id': _cast(obj.label, int), + 'label_id': cast(obj.label, int), }) return converted @@ -133,7 +126,7 @@ def _convert_mask_object(self, obj): np.require(obj.image, dtype=np.uint8, requirements='F')) converted.update({ - 'label_id': _cast(obj.label, int), + 'label_id': cast(obj.label, int), 'rle': { # serialize as compressed COCO mask 'counts': rle['counts'].decode('ascii'), @@ -146,7 +139,7 @@ def _convert_polyline_object(self, obj): converted = self._convert_annotation(obj) converted.update({ - 'label_id': _cast(obj.label, int), + 'label_id': cast(obj.label, int), 'points': [float(p) for p in obj.points], }) return converted @@ -155,7 +148,7 @@ def _convert_polygon_object(self, obj): converted = self._convert_annotation(obj) converted.update({ - 'label_id': _cast(obj.label, int), + 'label_id': cast(obj.label, int), 'points': [float(p) for p in obj.points], }) return converted @@ -164,7 +157,7 @@ def _convert_bbox_object(self, obj): converted = self._convert_annotation(obj) converted.update({ - 'label_id': _cast(obj.label, int), + 'label_id': cast(obj.label, int), 'bbox': [float(p) for p in obj.get_bbox()], }) return converted @@ -173,7 +166,7 @@ def _convert_points_object(self, obj): converted = self._convert_annotation(obj) converted.update({ - 'label_id': _cast(obj.label, int), + 'label_id': cast(obj.label, int), 'points': [float(p) for p in obj.points], 'visibility': [int(v.value) for v in obj.visibility], }) @@ -183,7 +176,7 @@ def _convert_caption_object(self, obj): converted = self._convert_annotation(obj) converted.update({ - 'caption': _cast(obj.caption, str), + 'caption': cast(obj.caption, str), }) return converted @@ -193,8 +186,8 @@ def _convert_label_categories(self, obj): } for label in obj.items: converted['labels'].append({ - 'name': _cast(label.name, str), - 'parent': _cast(label.parent, str), + 'name': cast(label.name, str), + 'parent': cast(label.parent, str), }) return converted @@ -218,7 +211,7 @@ def _convert_points_categories(self, obj): for label_id, item in obj.items.items(): converted['items'].append({ 'label_id': int(label_id), - 'labels': [_cast(label, str) for label in item.labels], + 'labels': [cast(label, str) for label in item.labels], 'adjacent': [int(v) for v in item.adjacent], }) return converted diff --git a/datumaro/datumaro/plugins/mot_format.py b/datumaro/datumaro/plugins/mot_format.py new file mode 100644 index 00000000000..18d3695b145 --- /dev/null +++ b/datumaro/datumaro/plugins/mot_format.py @@ -0,0 +1,341 @@ + +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +# The Multiple Object Tracking Benchmark challenge format support +# Format description: https://arxiv.org/pdf/1906.04567.pdf +# Another description: https://motchallenge.net/instructions + +from collections import OrderedDict +import csv +from enum import Enum +import logging as log +import os +import os.path as osp + +from datumaro.components.extractor import (SourceExtractor, + DatasetItem, AnnotationType, Bbox, LabelCategories +) +from datumaro.components.extractor import Importer +from datumaro.components.converter import Converter +from datumaro.components.cli_plugin import CliPlugin +from datumaro.util import cast +from datumaro.util.image import Image, save_image + + +MotLabel = Enum('MotLabel', [ + ('pedestrian', 1), + ('person on vehicle', 2), + ('car', 3), + ('bicycle', 4), + ('motorbike', 5), + ('non motorized vehicle', 6), + ('static person', 7), + ('distractor', 8), + ('occluder', 9), + ('occluder on the ground', 10), + ('occluder full', 11), + ('reflection', 12), +]) + +class MotPath: + IMAGE_DIR = 'img1' + SEQINFO_FILE = 'seqinfo.ini' + LABELS_FILE = 'labels.txt' + GT_FILENAME = 'gt.txt' + DET_FILENAME = 'det.txt' + + IMAGE_EXT = '.jpg' + + FIELDS = [ + 'frame_id', + 'track_id', + 'x', + 'y', + 'w', + 'h', + 'confidence', # or 'not ignored' flag for GT anns + 'class_id', + 'visibility' + ] + + +class MotSeqExtractor(SourceExtractor): + def __init__(self, path, labels=None, occlusion_threshold=0, is_gt=None): + super().__init__() + + assert osp.isfile(path) + self._path = path + seq_root = osp.dirname(osp.dirname(path)) + + self._image_dir = '' + if osp.isdir(osp.join(seq_root, MotPath.IMAGE_DIR)): + self._image_dir = osp.join(seq_root, MotPath.IMAGE_DIR) + + seq_info = osp.join(seq_root, MotPath.SEQINFO_FILE) + if osp.isfile(seq_info): + seq_info = self._parse_seq_info(seq_info) + self._image_dir = osp.join(seq_root, seq_info['imdir']) + else: + seq_info = None + self._seq_info = seq_info + + self._occlusion_threshold = float(occlusion_threshold) + + assert is_gt in {None, True, False} + if is_gt is None: + if osp.basename(path) == MotPath.DET_FILENAME: + is_gt = False + else: + is_gt = True + self._is_gt = is_gt + + self._subset = None + + if labels is None: + if osp.isfile(osp.join(seq_root, MotPath.LABELS_FILE)): + labels = osp.join(seq_root, MotPath.LABELS_FILE) + else: + labels = [lbl.name for lbl in MotLabel] + if isinstance(labels, str): + labels = self._parse_labels(labels) + elif isinstance(labels, list): + assert all(isinstance(lbl, str) for lbl in labels), labels + else: + raise TypeError("Unexpected type of 'labels' argument: %s" % labels) + self._categories = self._load_categories(labels) + self._items = self._load_items(path) + + def categories(self): + return self._categories + + def __iter__(self): + for item in self._items.values(): + yield item + + def __len__(self): + return len(self._items) + + def subsets(self): + if self._subset: + return [self._subset] + return None + + def get_subset(self, name): + if name != self._subset: + return None + return self + + @staticmethod + def _parse_labels(path): + with open(path, encoding='utf-8') as labels_file: + return [s.strip() for s in labels_file] + + def _load_categories(self, labels): + attributes = ['track_id'] + if self._is_gt: + attributes += ['occluded', 'visibility', 'ignored'] + else: + attributes += ['score'] + label_cat = LabelCategories(attributes=attributes) + for label in labels: + label_cat.add(label) + + return { AnnotationType.label: label_cat } + + def _load_items(self, path): + labels_count = len(self._categories[AnnotationType.label].items) + items = OrderedDict() + + if self._seq_info: + for frame_id in range(self._seq_info['seqlength']): + items[frame_id] = DatasetItem( + id=frame_id, + subset=self._subset, + image=Image( + path=osp.join(self._image_dir, + '%06d%s' % (frame_id, self._seq_info['imext'])), + size=(self._seq_info['imheight'], self._seq_info['imwidth']) + ) + ) + elif osp.isdir(self._image_dir): + for p in os.listdir(self._image_dir): + if p.endswith(MotPath.IMAGE_EXT): + frame_id = int(osp.splitext(p)[0]) + items[frame_id] = DatasetItem( + id=frame_id, + subset=self._subset, + image=osp.join(self._image_dir, p), + ) + + with open(path, newline='', encoding='utf-8') as csv_file: + # NOTE: Different MOT files have different count of fields + # (7, 9 or 10). This is handled by reader: + # - all extra fields go to a separate field + # - all unmet fields have None values + for row in csv.DictReader(csv_file, fieldnames=MotPath.FIELDS): + frame_id = int(row['frame_id']) + item = items.get(frame_id) + if item is None: + item = DatasetItem(id=frame_id, subset=self._subset) + annotations = item.annotations + + x, y = float(row['x']), float(row['y']) + w, h = float(row['w']), float(row['h']) + label_id = row.get('class_id') + if label_id and label_id != '-1': + label_id = int(label_id) - 1 + assert label_id < labels_count, label_id + else: + label_id = None + + attributes = {} + + # Annotations for detection task are not related to any track + track_id = int(row['track_id']) + if 0 < track_id: + attributes['track_id'] = track_id + + confidence = cast(row.get('confidence'), float, 1) + visibility = cast(row.get('visibility'), float, 1) + if self._is_gt: + attributes['visibility'] = visibility + attributes['occluded'] = \ + visibility <= self._occlusion_threshold + attributes['ignored'] = confidence == 0 + else: + attributes['score'] = float(confidence) + + annotations.append(Bbox(x, y, w, h, label=label_id, + attributes=attributes)) + + items[frame_id] = item + return items + + @classmethod + def _parse_seq_info(cls, path): + fields = {} + with open(path, encoding='utf-8') as f: + for line in f: + entry = line.lower().strip().split('=', maxsplit=1) + if len(entry) == 2: + fields[entry[0]] = entry[1] + cls._check_seq_info(fields) + for k in { 'framerate', 'seqlength', 'imwidth', 'imheight' }: + fields[k] = int(fields[k]) + return fields + + @staticmethod + def _check_seq_info(seq_info): + assert set(seq_info) == {'name', 'imdir', 'framerate', 'seqlength', 'imwidth', 'imheight', 'imext'}, seq_info + +class MotSeqImporter(Importer): + _EXTRACTOR_NAME = 'mot_seq' + + @classmethod + def detect(cls, path): + return len(cls.find_subsets(path)) != 0 + + def __call__(self, path, **extra_params): + from datumaro.components.project import Project # cyclic import + project = Project() + + subsets = self.find_subsets(path) + if len(subsets) == 0: + raise Exception("Failed to find 'mot' dataset at '%s'" % path) + + for ann_file in subsets: + log.info("Found a dataset at '%s'" % ann_file) + + source_name = osp.splitext(osp.basename(ann_file))[0] + project.add_source(source_name, { + 'url': ann_file, + 'format': self._EXTRACTOR_NAME, + 'options': extra_params, + }) + + return project + + @staticmethod + def find_subsets(path): + subsets = [] + if path.endswith('.txt') and osp.isfile(path): + subsets = [path] + elif osp.isdir(path): + p = osp.join(path, 'gt', MotPath.GT_FILENAME) + if osp.isfile(p): + subsets.append(p) + return subsets + +class MotSeqGtConverter(Converter, CliPlugin): + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().__init__(**kwargs) + parser.add_argument('--save-images', action='store_true', + help="Save images (default: %(default)s)") + return parser + + def __init__(self, save_images=False): + super().__init__() + + self._save_images = save_images + + def __call__(self, extractor, save_dir): + images_dir = osp.join(save_dir, MotPath.IMAGE_DIR) + os.makedirs(images_dir, exist_ok=True) + self._images_dir = images_dir + + anno_dir = osp.join(save_dir, 'gt') + os.makedirs(anno_dir, exist_ok=True) + anno_file = osp.join(anno_dir, MotPath.GT_FILENAME) + with open(anno_file, 'w', encoding="utf-8") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=MotPath.FIELDS) + for idx, item in enumerate(extractor): + log.debug("Converting item '%s'", item.id) + + frame_id = cast(item.id, int, 1 + idx) + + for anno in item.annotations: + if anno.type != AnnotationType.bbox: + continue + + writer.writerow({ + 'frame_id': frame_id, + 'track_id': int(anno.attributes.get('track_id', -1)), + 'x': anno.x, + 'y': anno.y, + 'w': anno.w, + 'h': anno.h, + 'confidence': int(anno.attributes.get('ignored') != True), + 'class_id': 1 + cast(anno.label, int, -2), + 'visibility': float( + anno.attributes.get('visibility', + 1 - float( + anno.attributes.get('occluded', False) + ) + ) + ) + }) + + if self._save_images: + if item.has_image and item.image.has_data: + self._save_image(item, index=frame_id) + else: + log.debug("Item '%s' has no image" % item.id) + + labels_file = osp.join(save_dir, MotPath.LABELS_FILE) + with open(labels_file, 'w', encoding='utf-8') as f: + f.write('\n'.join(l.name + for l in extractor.categories()[AnnotationType.label].items) + ) + + def _save_image(self, item, index): + if item.image.filename: + frame_id = osp.splitext(item.image.filename)[0] + else: + frame_id = item.id + frame_id = cast(frame_id, int, index) + image_filename = '%06d%s' % (frame_id, MotPath.IMAGE_EXT) + save_image(osp.join(self._images_dir, image_filename), + item.image.data) \ No newline at end of file diff --git a/datumaro/datumaro/util/__init__.py b/datumaro/datumaro/util/__init__.py index 87c800fe515..7c36fe8efad 100644 --- a/datumaro/datumaro/util/__init__.py +++ b/datumaro/datumaro/util/__init__.py @@ -4,6 +4,7 @@ # SPDX-License-Identifier: MIT import os +import os.path as osp def find(iterable, pred=lambda x: True, default=None): @@ -17,4 +18,28 @@ def dir_items(path, ext, truncate_ext=False): if truncate_ext: f = f[:ext_pos] items.append(f) - return items \ No newline at end of file + return items + +def split_path(path): + path = osp.normpath(path) + parts = [] + + while True: + path, part = osp.split(path) + if part: + parts.append(part) + else: + if path: + parts.append(path) + break + parts.reverse() + + return parts + +def cast(value, type_conv, default=None): + if value is None: + return default + try: + return type_conv(value) + except Exception: + return default \ No newline at end of file diff --git a/datumaro/tests/test_mot_format.py b/datumaro/tests/test_mot_format.py new file mode 100644 index 00000000000..efe62502572 --- /dev/null +++ b/datumaro/tests/test_mot_format.py @@ -0,0 +1,146 @@ +import numpy as np + +from unittest import TestCase + +from datumaro.components.extractor import (Extractor, DatasetItem, + AnnotationType, Bbox, LabelCategories +) +from datumaro.plugins.mot_format import MotSeqGtConverter, MotSeqImporter +from datumaro.util.test_utils import TestDir, compare_datasets + + +class MotConverterTest(TestCase): + def _test_save_and_load(self, source_dataset, converter, test_dir, + target_dataset=None, importer_args=None): + converter(source_dataset, test_dir) + + if importer_args is None: + importer_args = {} + parsed_dataset = MotSeqImporter()(test_dir, **importer_args) \ + .make_dataset() + + if target_dataset is None: + target_dataset = source_dataset + + compare_datasets(self, expected=target_dataset, actual=parsed_dataset) + + def test_can_save_bboxes(self): + class SrcExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='train', + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=2, attributes={ + 'occluded': True, + }), + Bbox(0, 4, 4, 4, label=3, attributes={ + 'visibility': 0.4, + }), + Bbox(2, 4, 4, 4, attributes={ + 'ignored': True + }), + ] + ), + + DatasetItem(id=2, subset='val', + image=np.ones((8, 8, 3)), + annotations=[ + Bbox(1, 2, 4, 2, label=3), + ] + ), + + DatasetItem(id=3, subset='test', + image=np.ones((5, 4, 3)) * 3, + ), + ]) + + def categories(self): + label_cat = LabelCategories() + for label in range(10): + label_cat.add('label_' + str(label)) + return { + AnnotationType.label: label_cat, + } + + class DstExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=1, + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=2, attributes={ + 'occluded': True, + 'visibility': 0.0, + 'ignored': False, + }), + Bbox(0, 4, 4, 4, label=3, attributes={ + 'occluded': False, + 'visibility': 0.4, + 'ignored': False, + }), + Bbox(2, 4, 4, 4, attributes={ + 'occluded': False, + 'visibility': 1.0, + 'ignored': True, + }), + ] + ), + + DatasetItem(id=2, + image=np.ones((8, 8, 3)), + annotations=[ + Bbox(1, 2, 4, 2, label=3, attributes={ + 'occluded': False, + 'visibility': 1.0, + 'ignored': False, + }), + ] + ), + + DatasetItem(id=3, + image=np.ones((5, 4, 3)) * 3, + ), + ]) + + def categories(self): + label_cat = LabelCategories() + for label in range(10): + label_cat.add('label_' + str(label)) + return { + AnnotationType.label: label_cat, + } + + with TestDir() as test_dir: + self._test_save_and_load( + SrcExtractor(), MotSeqGtConverter(save_images=True), + test_dir, target_dataset=DstExtractor()) + +class MotImporterTest(TestCase): + def test_can_detect(self): + class TestExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='train', + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=2), + ] + ), + ]) + + def categories(self): + label_cat = LabelCategories() + for label in range(10): + label_cat.add('label_' + str(label)) + return { + AnnotationType.label: label_cat, + } + + def generate_dummy_dataset(path): + MotSeqGtConverter()(TestExtractor(), save_dir=path) + + with TestDir() as test_dir: + generate_dummy_dataset(test_dir) + + self.assertTrue(MotSeqImporter.detect(test_dir))