From 7b703bbd5c5700a12ba806d2a1dfa7fc0bfed2a6 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Sat, 19 Sep 2020 08:32:02 +0300 Subject: [PATCH] Add MOTS png format support (#21) --- CHANGELOG.md | 1 + README.md | 1 + datumaro/components/extractor.py | 34 +++- datumaro/plugins/mots_format.py | 153 ++++++++++++++++++ datumaro/util/image.py | 24 +-- datumaro/util/mask_tools.py | 3 +- docs/user_manual.md | 3 + tests/assets/mots_dataset/train/images/1.jpg | Bin 0 -> 332 bytes tests/assets/mots_dataset/train/images/2.jpg | Bin 0 -> 332 bytes .../assets/mots_dataset/train/instances/1.png | Bin 0 -> 76 bytes .../assets/mots_dataset/train/instances/2.png | Bin 0 -> 72 bytes .../mots_dataset/train/instances/labels.txt | 4 + tests/assets/mots_dataset/val/images/3.jpg | Bin 0 -> 332 bytes tests/assets/mots_dataset/val/instances/3.png | Bin 0 -> 74 bytes .../mots_dataset/val/instances/labels.txt | 4 + tests/test_mots_format.py | 100 ++++++++++++ 16 files changed, 311 insertions(+), 16 deletions(-) create mode 100644 datumaro/plugins/mots_format.py create mode 100644 tests/assets/mots_dataset/train/images/1.jpg create mode 100644 tests/assets/mots_dataset/train/images/2.jpg create mode 100644 tests/assets/mots_dataset/train/instances/1.png create mode 100644 tests/assets/mots_dataset/train/instances/2.png create mode 100644 tests/assets/mots_dataset/train/instances/labels.txt create mode 100644 tests/assets/mots_dataset/val/images/3.jpg create mode 100644 tests/assets/mots_dataset/val/instances/3.png create mode 100644 tests/assets/mots_dataset/val/instances/labels.txt create mode 100644 tests/test_mots_format.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 18da8bbde6d9..dfef9cb38ae8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - `reindex` option in COCO and CVAT converters () - Support for relative paths in LabelMe format () +- MOTS png mask format support () ### Changed - diff --git a/README.md b/README.md index 584e1d821ec6..6367ec732a95 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,7 @@ CVAT annotations ---> Publication, statistics etc. - [YOLO](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) (`bboxes`) - [TF Detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md) (`bboxes`, `masks`) - [MOT sequences](https://arxiv.org/pdf/1906.04567.pdf) + - [MOTS PNG](https://www.vision.rwth-aachen.de/page/mots) - [CVAT](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md) - [LabelMe](http://labelme.csail.mit.edu/Release3.0) - Dataset building diff --git a/datumaro/components/extractor.py b/datumaro/components/extractor.py index dcb7b036c049..48850c7e7f1e 100644 --- a/datumaro/components/extractor.py +++ b/datumaro/components/extractor.py @@ -3,9 +3,10 @@ # # SPDX-License-Identifier: MIT -from collections import namedtuple from enum import Enum +from typing import List, Dict import numpy as np +import os.path as osp import attr from attr import attrs, attrib @@ -584,6 +585,9 @@ def __init__(self, length=None, subset=None): subset = None self._subset = subset + self._categories = {} + self._items = [] + def subsets(self): return [self._subset] @@ -592,13 +596,39 @@ def get_subset(self, name): raise Exception("Unknown subset '%s' requested" % name) return self + def categories(self): + return self._categories + + def __iter__(self): + for item in self._items: + yield item + + def __len__(self): + return len(self._items) + class Importer: @classmethod def detect(cls, path): + return len(cls.find_subsets(path)) != 0 + + @classmethod + def find_subsets(cls, path) -> List[Dict]: + """Returns a list of Sources""" raise NotImplementedError() def __call__(self, path, **extra_params): - raise NotImplementedError() + from datumaro.components.project import Project # cyclic import + project = Project() + + subsets = self.find_subsets(path) + if len(subsets) == 0: + raise Exception("Failed to find dataset at '%s'" % path) + + for desc in subsets: + source_name = osp.splitext(osp.basename(desc['url']))[0] + project.add_source(source_name, desc) + + return project class Transform(Extractor): @staticmethod diff --git a/datumaro/plugins/mots_format.py b/datumaro/plugins/mots_format.py new file mode 100644 index 000000000000..aae3aefa6366 --- /dev/null +++ b/datumaro/plugins/mots_format.py @@ -0,0 +1,153 @@ +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +# Implements MOTS format https://www.vision.rwth-aachen.de/page/mots + +from enum import Enum +from glob import glob +import logging as log +import numpy as np +import os +import os.path as osp + +from datumaro.components.extractor import (SourceExtractor, DEFAULT_SUBSET_NAME, + DatasetItem, AnnotationType, Mask, LabelCategories +) +from datumaro.components.extractor import Importer +from datumaro.components.converter import Converter +from datumaro.util.image import load_image, save_image +from datumaro.util.mask_tools import merge_masks + + +class MotsPath: + MASKS_DIR = 'instances' + IMAGE_DIR = 'images' + IMAGE_EXT = '.jpg' + LABELS_FILE = 'labels.txt' + MAX_INSTANCES = 1000 + +MotsLabels = Enum('MotsLabels', [ + ('background', 0), + ('car', 1), + ('pedestrian', 2), + ('ignored', 10), +]) + +class MotsPngExtractor(SourceExtractor): + @staticmethod + def detect_dataset(path): + if osp.isdir(osp.join(path, MotsPath.MASKS_DIR)): + return [{'url': path, 'format': 'mots_png'}] + return [] + + def __init__(self, path, subset_name=None): + assert osp.isdir(path), path + super().__init__(subset=subset_name) + self._images_dir = osp.join(path, 'images') + self._anno_dir = osp.join(path, MotsPath.MASKS_DIR) + self._categories = self._parse_categories( + osp.join(self._anno_dir, MotsPath.LABELS_FILE)) + self._items = self._parse_items() + + def _parse_categories(self, path): + if osp.isfile(path): + with open(path) as f: + labels = [l.strip() for l in f] + else: + labels = [l.name for l in MotsLabels] + return { AnnotationType.label: LabelCategories.from_iterable(labels) } + + def _parse_items(self): + items = [] + for p in sorted(p for p in + glob(self._anno_dir + '/**/*.png', recursive=True)): + item_id = osp.splitext(osp.relpath(p, self._anno_dir))[0] + items.append(DatasetItem(id=item_id, subset=self._subset, + image=osp.join(self._images_dir, item_id + MotsPath.IMAGE_EXT), + annotations=self._parse_annotations(p))) + return items + + @staticmethod + def _lazy_extract_mask(mask, v): + return lambda: mask == v + + def _parse_annotations(self, path): + combined_mask = load_image(path, dtype=np.uint16) + masks = [] + for obj_id in np.unique(combined_mask): + class_id, instance_id = divmod(obj_id, MotsPath.MAX_INSTANCES) + z_order = 0 + if class_id == 0: + continue # background + if class_id == 10 and \ + len(self._categories[AnnotationType.label].items) < 10: + z_order = 1 + class_id = self._categories[AnnotationType.label].find( + MotsLabels.ignored.name)[0] + else: + class_id -= 1 + masks.append(Mask(self._lazy_extract_mask(combined_mask, obj_id), + label=class_id, z_order=z_order, + attributes={'track_id': instance_id})) + return masks + + +class MotsImporter(Importer): + @classmethod + def find_subsets(cls, path): + if not osp.isdir(path): + raise Exception("Expected directory path, got '%s'" % path) + path = osp.normpath(path) + + subsets = [] + subsets.extend(MotsPngExtractor.detect_dataset(path)) + if not subsets: + for p in os.listdir(path): + detected = MotsPngExtractor.detect_dataset(osp.join(path, p)) + for s in detected: + s.setdefault('options', {})['subset_name'] = p + subsets.extend(detected) + return subsets + + +class MotsPngConverter(Converter): + DEFAULT_IMAGE_EXT = MotsPath.IMAGE_EXT + + def apply(self): + for subset_name in self._extractor.subsets(): + subset = self._extractor.get_subset(subset_name) + subset_name = subset_name or DEFAULT_SUBSET_NAME + + subset_dir = osp.join(self._save_dir, subset_name) + images_dir = osp.join(subset_dir, MotsPath.IMAGE_DIR) + anno_dir = osp.join(subset_dir, MotsPath.MASKS_DIR) + + for item in subset: + log.debug("Converting item '%s'", item.id) + + if self._save_images: + if item.has_image and item.image.has_data: + self._save_image(item, + osp.join(images_dir, self._make_image_filename(item))) + else: + log.debug("Item '%s' has no image", item.id) + + self._save_annotations(item, anno_dir) + + with open(osp.join(anno_dir, MotsPath.LABELS_FILE), 'w') as f: + f.write('\n'.join(l.name + for l in subset.categories()[AnnotationType.label].items)) + + def _save_annotations(self, item, anno_dir): + masks = [a for a in item.annotations if a.type == AnnotationType.mask] + if not masks: + return + + instance_ids = [int(a.attributes['track_id']) for a in masks] + masks = sorted(zip(masks, instance_ids), key=lambda e: e[0].z_order) + mask = merge_masks([ + m.image * (MotsPath.MAX_INSTANCES * (1 + m.label) + id) + for m, id in masks]) + save_image(osp.join(anno_dir, item.id + '.png'), mask, + create_dir=True, dtype=np.uint16) diff --git a/datumaro/util/image.py b/datumaro/util/image.py index 860c7f2d505f..625424be5bf8 100644 --- a/datumaro/util/image.py +++ b/datumaro/util/image.py @@ -23,7 +23,7 @@ from datumaro.util.image_cache import ImageCache as _ImageCache -def load_image(path): +def load_image(path, dtype=np.float32): """ Reads an image in the HWC Grayscale/BGR(A) float [0; 255] format. """ @@ -31,11 +31,11 @@ def load_image(path): if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2: import cv2 image = cv2.imread(path, cv2.IMREAD_UNCHANGED) - image = image.astype(np.float32) + image = image.astype(dtype) elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL: from PIL import Image image = Image.open(path) - image = np.asarray(image, dtype=np.float32) + image = np.asarray(image, dtype=dtype) if len(image.shape) == 3 and image.shape[2] in {3, 4}: image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR else: @@ -48,7 +48,7 @@ def load_image(path): assert image.shape[2] in {3, 4} return image -def save_image(path, image, create_dir=False, **kwargs): +def save_image(path, image, create_dir=False, dtype=np.uint8, **kwargs): # NOTE: Check destination path for existence # OpenCV silently fails if target directory does not exist dst_dir = osp.dirname(path) @@ -72,7 +72,7 @@ def save_image(path, image, create_dir=False, **kwargs): int(cv2.IMWRITE_JPEG_QUALITY), kwargs.get('jpeg_quality', 75) ] - image = image.astype(np.uint8) + image = image.astype(dtype) cv2.imwrite(path, image, params=params) elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL: from PIL import Image @@ -82,7 +82,7 @@ def save_image(path, image, create_dir=False, **kwargs): if kwargs.get('jpeg_quality') == 100: params['subsampling'] = 0 - image = image.astype(np.uint8) + image = image.astype(dtype) if len(image.shape) == 3 and image.shape[2] in {3, 4}: image[:, :, :3] = image[:, :, 2::-1] # BGR to RGB image = Image.fromarray(image) @@ -90,7 +90,7 @@ def save_image(path, image, create_dir=False, **kwargs): else: raise NotImplementedError() -def encode_image(image, ext, **kwargs): +def encode_image(image, ext, dtype=np.uint8, **kwargs): if not kwargs: kwargs = {} @@ -107,7 +107,7 @@ def encode_image(image, ext, **kwargs): int(cv2.IMWRITE_JPEG_QUALITY), kwargs.get('jpeg_quality', 75) ] - image = image.astype(np.uint8) + image = image.astype(dtype) success, result = cv2.imencode(ext, image, params=params) if not success: raise Exception("Failed to encode image to '%s' format" % (ext)) @@ -123,7 +123,7 @@ def encode_image(image, ext, **kwargs): if kwargs.get('jpeg_quality') == 100: params['subsampling'] = 0 - image = image.astype(np.uint8) + image = image.astype(dtype) if len(image.shape) == 3 and image.shape[2] in {3, 4}: image[:, :, :3] = image[:, :, 2::-1] # BGR to RGB image = Image.fromarray(image) @@ -133,16 +133,16 @@ def encode_image(image, ext, **kwargs): else: raise NotImplementedError() -def decode_image(image_bytes): +def decode_image(image_bytes, dtype=np.float32): if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2: import cv2 image = np.frombuffer(image_bytes, dtype=np.uint8) image = cv2.imdecode(image, cv2.IMREAD_UNCHANGED) - image = image.astype(np.float32) + image = image.astype(dtype) elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL: from PIL import Image image = Image.open(BytesIO(image_bytes)) - image = np.asarray(image, dtype=np.float32) + image = np.asarray(image, dtype=dtype) if len(image.shape) == 3 and image.shape[2] in {3, 4}: image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR else: diff --git a/datumaro/util/mask_tools.py b/datumaro/util/mask_tools.py index 95c8633a23c9..72224bccf7ee 100644 --- a/datumaro/util/mask_tools.py +++ b/datumaro/util/mask_tools.py @@ -106,8 +106,7 @@ def make_binary_mask(mask): def load_mask(path, inverse_colormap=None): - mask = load_image(path) - mask = mask.astype(np.uint8) + mask = load_image(path, dtype=np.uint8) if inverse_colormap is not None: if len(mask.shape) == 3 and mask.shape[2] != 1: mask = unpaint_mask(mask, inverse_colormap) diff --git a/docs/user_manual.md b/docs/user_manual.md index b9276ac9e1f5..11955a66f2c4 100644 --- a/docs/user_manual.md +++ b/docs/user_manual.md @@ -94,6 +94,9 @@ List of supported formats: - MOT sequences - [Format specification](https://arxiv.org/pdf/1906.04567.pdf) - [Dataset example](../tests/assets/mot_dataset) +- MOTS (png) + - [Format specification](https://www.vision.rwth-aachen.de/page/mots) + - [Dataset example](../tests/assets/mots_dataset) - CVAT - [Format specification](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md) - [Dataset example](../tests/assets/cvat_dataset) diff --git a/tests/assets/mots_dataset/train/images/1.jpg b/tests/assets/mots_dataset/train/images/1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4e4102ce4325b60e0817ae6099e23bec5dfd3ac0 GIT binary patch literal 332 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<ECr+Nabot8FYu9hwy!G(W<0ns_J%91?)yGetzkL1n V{m0K=|8FsHfc#_6@PX_9O#rwCY2p9? literal 0 HcmV?d00001 diff --git a/tests/assets/mots_dataset/train/images/2.jpg b/tests/assets/mots_dataset/train/images/2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4e4102ce4325b60e0817ae6099e23bec5dfd3ac0 GIT binary patch literal 332 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<ECr+Nabot8FYu9hwy!G(W<0ns_J%91?)yGetzkL1n V{m0K=|8FsHfc#_6@PX_9O#rwCY2p9? literal 0 HcmV?d00001 diff --git a/tests/assets/mots_dataset/train/instances/1.png b/tests/assets/mots_dataset/train/instances/1.png new file mode 100644 index 0000000000000000000000000000000000000000..b31985596cffc102104adbc234ee10b2b0562901 GIT binary patch literal 76 zcmeAS@N?(olHy`uVBq!ia0vp^tU$~t03;ZaS)bzopr09Mr#A^-pY literal 0 HcmV?d00001 diff --git a/tests/assets/mots_dataset/train/instances/2.png b/tests/assets/mots_dataset/train/instances/2.png new file mode 100644 index 0000000000000000000000000000000000000000..4d3f5aaef37e1d5d239a80f94c84fb1c79bff235 GIT binary patch literal 72 zcmeAS@N?(olHy`uVBq!ia0vp^tU$~t03;ZaS)b)7z8Sq Un1Wv3E(9s@boFyt=akR{0LWqwZ~y=R literal 0 HcmV?d00001 diff --git a/tests/assets/mots_dataset/train/instances/labels.txt b/tests/assets/mots_dataset/train/instances/labels.txt new file mode 100644 index 000000000000..27a7ea605619 --- /dev/null +++ b/tests/assets/mots_dataset/train/instances/labels.txt @@ -0,0 +1,4 @@ +a +b +c +d \ No newline at end of file diff --git a/tests/assets/mots_dataset/val/images/3.jpg b/tests/assets/mots_dataset/val/images/3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4e4102ce4325b60e0817ae6099e23bec5dfd3ac0 GIT binary patch literal 332 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<ECr+Nabot8FYu9hwy!G(W<0ns_J%91?)yGetzkL1n V{m0K=|8FsHfc#_6@PX_9O#rwCY2p9? literal 0 HcmV?d00001 diff --git a/tests/assets/mots_dataset/val/instances/3.png b/tests/assets/mots_dataset/val/instances/3.png new file mode 100644 index 0000000000000000000000000000000000000000..a90646dd83cbe495035e19346dd1790504d416e7 GIT binary patch literal 74 zcmeAS@N?(olHy`uVBq!ia0vp^tU$~t03;ZaS)b