Add MOTS png format support (cvat-ai#21)

TOsmanov · Sep 19, 2020 · 7b703bb · 7b703bb
1 parent c2d6c79
commit 7b703bb
Show file tree

Hide file tree

Showing 16 changed files with 311 additions and 16 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 - `reindex` option in COCO and CVAT converters (<https://github.com/openvinotoolkit/datumaro/pull/18>)
 - Support for relative paths in LabelMe format (<https://github.com/openvinotoolkit/datumaro/pull/19>)
+- MOTS png mask format support (<https://github.com/openvinotoolkit/datumaro/21>)
 
 ### Changed
 -

diff --git a/README.md b/README.md
@@ -112,6 +112,7 @@ CVAT annotations                             ---> Publication, statistics etc.
   - [YOLO](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) (`bboxes`)
   - [TF Detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md) (`bboxes`, `masks`)
   - [MOT sequences](https://arxiv.org/pdf/1906.04567.pdf)
+  - [MOTS PNG](https://www.vision.rwth-aachen.de/page/mots)
   - [CVAT](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md)
   - [LabelMe](http://labelme.csail.mit.edu/Release3.0)
 - Dataset building

diff --git a/datumaro/components/extractor.py b/datumaro/components/extractor.py
@@ -3,9 +3,10 @@
 #
 # SPDX-License-Identifier: MIT
 
-from collections import namedtuple
 from enum import Enum
+from typing import List, Dict
 import numpy as np
+import os.path as osp
 
 import attr
 from attr import attrs, attrib
@@ -584,6 +585,9 @@ def __init__(self, length=None, subset=None):
             subset = None
         self._subset = subset
 
+        self._categories = {}
+        self._items = []
+
     def subsets(self):
         return [self._subset]
 
@@ -592,13 +596,39 @@ def get_subset(self, name):
             raise Exception("Unknown subset '%s' requested" % name)
         return self
 
+    def categories(self):
+        return self._categories
+
+    def __iter__(self):
+        for item in self._items:
+            yield item
+
+    def __len__(self):
+        return len(self._items)
+
 class Importer:
     @classmethod
     def detect(cls, path):
+        return len(cls.find_subsets(path)) != 0
+
+    @classmethod
+    def find_subsets(cls, path) -> List[Dict]:
+        """Returns a list of Sources"""
         raise NotImplementedError()
 
     def __call__(self, path, **extra_params):
-        raise NotImplementedError()
+        from datumaro.components.project import Project # cyclic import
+        project = Project()
+
+        subsets = self.find_subsets(path)
+        if len(subsets) == 0:
+            raise Exception("Failed to find dataset at '%s'" % path)
+
+        for desc in subsets:
+            source_name = osp.splitext(osp.basename(desc['url']))[0]
+            project.add_source(source_name, desc)
+
+        return project
 
 class Transform(Extractor):
     @staticmethod

diff --git a/datumaro/plugins/mots_format.py b/datumaro/plugins/mots_format.py
@@ -0,0 +1,153 @@
+# Copyright (C) 2020 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+# Implements MOTS format https://www.vision.rwth-aachen.de/page/mots
+
+from enum import Enum
+from glob import glob
+import logging as log
+import numpy as np
+import os
+import os.path as osp
+
+from datumaro.components.extractor import (SourceExtractor, DEFAULT_SUBSET_NAME,
+    DatasetItem, AnnotationType, Mask, LabelCategories
+)
+from datumaro.components.extractor import Importer
+from datumaro.components.converter import Converter
+from datumaro.util.image import load_image, save_image
+from datumaro.util.mask_tools import merge_masks
+
+
+class MotsPath:
+    MASKS_DIR = 'instances'
+    IMAGE_DIR = 'images'
+    IMAGE_EXT = '.jpg'
+    LABELS_FILE = 'labels.txt'
+    MAX_INSTANCES = 1000
+
+MotsLabels = Enum('MotsLabels', [
+    ('background', 0),
+    ('car', 1),
+    ('pedestrian', 2),
+    ('ignored', 10),
+])
+
+class MotsPngExtractor(SourceExtractor):
+    @staticmethod
+    def detect_dataset(path):
+        if osp.isdir(osp.join(path, MotsPath.MASKS_DIR)):
+            return [{'url': path, 'format': 'mots_png'}]
+        return []
+
+    def __init__(self, path, subset_name=None):
+        assert osp.isdir(path), path
+        super().__init__(subset=subset_name)
+        self._images_dir = osp.join(path, 'images')
+        self._anno_dir = osp.join(path, MotsPath.MASKS_DIR)
+        self._categories = self._parse_categories(
+            osp.join(self._anno_dir, MotsPath.LABELS_FILE))
+        self._items = self._parse_items()
+
+    def _parse_categories(self, path):
+        if osp.isfile(path):
+            with open(path) as f:
+                labels = [l.strip() for l in f]
+        else:
+            labels = [l.name for l in MotsLabels]
+        return { AnnotationType.label: LabelCategories.from_iterable(labels) }
+
+    def _parse_items(self):
+        items = []
+        for p in sorted(p for p in
+                glob(self._anno_dir + '/**/*.png', recursive=True)):
+            item_id = osp.splitext(osp.relpath(p, self._anno_dir))[0]
+            items.append(DatasetItem(id=item_id, subset=self._subset,
+                image=osp.join(self._images_dir, item_id + MotsPath.IMAGE_EXT),
+                annotations=self._parse_annotations(p)))
+        return items
+
+    @staticmethod
+    def _lazy_extract_mask(mask, v):
+        return lambda: mask == v
+
+    def _parse_annotations(self, path):
+        combined_mask = load_image(path, dtype=np.uint16)
+        masks = []
+        for obj_id in np.unique(combined_mask):
+            class_id, instance_id = divmod(obj_id, MotsPath.MAX_INSTANCES)
+            z_order = 0
+            if class_id == 0:
+                continue # background
+            if class_id == 10 and \
+                    len(self._categories[AnnotationType.label].items) < 10:
+                z_order = 1
+                class_id = self._categories[AnnotationType.label].find(
+                    MotsLabels.ignored.name)[0]
+            else:
+                class_id -= 1
+            masks.append(Mask(self._lazy_extract_mask(combined_mask, obj_id),
+                label=class_id, z_order=z_order,
+                attributes={'track_id': instance_id}))
+        return masks
+
+
+class MotsImporter(Importer):
+    @classmethod
+    def find_subsets(cls, path):
+        if not osp.isdir(path):
+            raise Exception("Expected directory path, got '%s'" % path)
+        path = osp.normpath(path)
+
+        subsets = []
+        subsets.extend(MotsPngExtractor.detect_dataset(path))
+        if not subsets:
+            for p in os.listdir(path):
+                detected = MotsPngExtractor.detect_dataset(osp.join(path, p))
+                for s in detected:
+                    s.setdefault('options', {})['subset_name'] = p
+                subsets.extend(detected)
+        return subsets
+
+
+class MotsPngConverter(Converter):
+    DEFAULT_IMAGE_EXT = MotsPath.IMAGE_EXT
+
+    def apply(self):
+        for subset_name in self._extractor.subsets():
+            subset = self._extractor.get_subset(subset_name)
+            subset_name = subset_name or DEFAULT_SUBSET_NAME
+
+            subset_dir = osp.join(self._save_dir, subset_name)
+            images_dir = osp.join(subset_dir, MotsPath.IMAGE_DIR)
+            anno_dir = osp.join(subset_dir, MotsPath.MASKS_DIR)
+
+            for item in subset:
+                log.debug("Converting item '%s'", item.id)
+
+                if self._save_images:
+                    if item.has_image and item.image.has_data:
+                        self._save_image(item,
+                            osp.join(images_dir, self._make_image_filename(item)))
+                    else:
+                        log.debug("Item '%s' has no image", item.id)
+
+                self._save_annotations(item, anno_dir)
+
+            with open(osp.join(anno_dir, MotsPath.LABELS_FILE), 'w') as f:
+                f.write('\n'.join(l.name
+                    for l in subset.categories()[AnnotationType.label].items))
+
+    def _save_annotations(self, item, anno_dir):
+        masks = [a for a in item.annotations if a.type == AnnotationType.mask]
+        if not masks:
+            return
+
+        instance_ids = [int(a.attributes['track_id']) for a in masks]
+        masks = sorted(zip(masks, instance_ids), key=lambda e: e[0].z_order)
+        mask = merge_masks([
+            m.image * (MotsPath.MAX_INSTANCES * (1 + m.label) + id)
+            for m, id in masks])
+        save_image(osp.join(anno_dir, item.id + '.png'), mask,
+            create_dir=True, dtype=np.uint16)
diff --git a/datumaro/util/image.py b/datumaro/util/image.py
@@ -23,19 +23,19 @@
 from datumaro.util.image_cache import ImageCache as _ImageCache
 
 
-def load_image(path):
+def load_image(path, dtype=np.float32):
     """
     Reads an image in the HWC Grayscale/BGR(A) float [0; 255] format.
     """
 
     if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2:
         import cv2
         image = cv2.imread(path, cv2.IMREAD_UNCHANGED)
-        image = image.astype(np.float32)
+        image = image.astype(dtype)
     elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
         from PIL import Image
         image = Image.open(path)
-        image = np.asarray(image, dtype=np.float32)
+        image = np.asarray(image, dtype=dtype)
         if len(image.shape) == 3 and image.shape[2] in {3, 4}:
             image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR
     else:
@@ -48,7 +48,7 @@ def load_image(path):
         assert image.shape[2] in {3, 4}
     return image
 
-def save_image(path, image, create_dir=False, **kwargs):
+def save_image(path, image, create_dir=False, dtype=np.uint8, **kwargs):
     # NOTE: Check destination path for existence
     # OpenCV silently fails if target directory does not exist
     dst_dir = osp.dirname(path)
@@ -72,7 +72,7 @@ def save_image(path, image, create_dir=False, **kwargs):
                 int(cv2.IMWRITE_JPEG_QUALITY), kwargs.get('jpeg_quality', 75)
             ]
 
-        image = image.astype(np.uint8)
+        image = image.astype(dtype)
         cv2.imwrite(path, image, params=params)
     elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
         from PIL import Image
@@ -82,15 +82,15 @@ def save_image(path, image, create_dir=False, **kwargs):
         if kwargs.get('jpeg_quality') == 100:
             params['subsampling'] = 0
 
-        image = image.astype(np.uint8)
+        image = image.astype(dtype)
         if len(image.shape) == 3 and image.shape[2] in {3, 4}:
             image[:, :, :3] = image[:, :, 2::-1] # BGR to RGB
         image = Image.fromarray(image)
         image.save(path, **params)
     else:
         raise NotImplementedError()
 
-def encode_image(image, ext, **kwargs):
+def encode_image(image, ext, dtype=np.uint8, **kwargs):
     if not kwargs:
         kwargs = {}
 
@@ -107,7 +107,7 @@ def encode_image(image, ext, **kwargs):
                 int(cv2.IMWRITE_JPEG_QUALITY), kwargs.get('jpeg_quality', 75)
             ]
 
-        image = image.astype(np.uint8)
+        image = image.astype(dtype)
         success, result = cv2.imencode(ext, image, params=params)
         if not success:
             raise Exception("Failed to encode image to '%s' format" % (ext))
@@ -123,7 +123,7 @@ def encode_image(image, ext, **kwargs):
         if kwargs.get('jpeg_quality') == 100:
             params['subsampling'] = 0
 
-        image = image.astype(np.uint8)
+        image = image.astype(dtype)
         if len(image.shape) == 3 and image.shape[2] in {3, 4}:
             image[:, :, :3] = image[:, :, 2::-1] # BGR to RGB
         image = Image.fromarray(image)
@@ -133,16 +133,16 @@ def encode_image(image, ext, **kwargs):
     else:
         raise NotImplementedError()
 
-def decode_image(image_bytes):
+def decode_image(image_bytes, dtype=np.float32):
     if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2:
         import cv2
         image = np.frombuffer(image_bytes, dtype=np.uint8)
         image = cv2.imdecode(image, cv2.IMREAD_UNCHANGED)
-        image = image.astype(np.float32)
+        image = image.astype(dtype)
     elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
         from PIL import Image
         image = Image.open(BytesIO(image_bytes))
-        image = np.asarray(image, dtype=np.float32)
+        image = np.asarray(image, dtype=dtype)
         if len(image.shape) == 3 and image.shape[2] in {3, 4}:
             image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR
     else:

diff --git a/datumaro/util/mask_tools.py b/datumaro/util/mask_tools.py
@@ -106,8 +106,7 @@ def make_binary_mask(mask):
 
 
 def load_mask(path, inverse_colormap=None):
-    mask = load_image(path)
-    mask = mask.astype(np.uint8)
+    mask = load_image(path, dtype=np.uint8)
     if inverse_colormap is not None:
         if len(mask.shape) == 3 and mask.shape[2] != 1:
             mask = unpaint_mask(mask, inverse_colormap)

diff --git a/docs/user_manual.md b/docs/user_manual.md
@@ -94,6 +94,9 @@ List of supported formats:
 - MOT sequences
   - [Format specification](https://arxiv.org/pdf/1906.04567.pdf)
   - [Dataset example](../tests/assets/mot_dataset)
+- MOTS (png)
+  - [Format specification](https://www.vision.rwth-aachen.de/page/mots)
+  - [Dataset example](../tests/assets/mots_dataset)
 - CVAT
   - [Format specification](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md)
   - [Dataset example](../tests/assets/cvat_dataset)

diff --git a/tests/assets/mots_dataset/train/images/1.jpg b/tests/assets/mots_dataset/train/images/1.jpg
diff --git a/tests/assets/mots_dataset/train/images/2.jpg b/tests/assets/mots_dataset/train/images/2.jpg
diff --git a/tests/assets/mots_dataset/train/instances/1.png b/tests/assets/mots_dataset/train/instances/1.png
diff --git a/tests/assets/mots_dataset/train/instances/2.png b/tests/assets/mots_dataset/train/instances/2.png
diff --git a/tests/assets/mots_dataset/train/instances/labels.txt b/tests/assets/mots_dataset/train/instances/labels.txt
@@ -0,0 +1,4 @@
+a
+b
+c
+d
diff --git a/tests/assets/mots_dataset/val/images/3.jpg b/tests/assets/mots_dataset/val/images/3.jpg
diff --git a/tests/assets/mots_dataset/val/instances/3.png b/tests/assets/mots_dataset/val/instances/3.png
diff --git a/tests/assets/mots_dataset/val/instances/labels.txt b/tests/assets/mots_dataset/val/instances/labels.txt
@@ -0,0 +1,4 @@
+a
+b
+c
+d
-Original file line number
+Diff line change
@@ -0,0 +1,4 @@
+    a
+    b
+    c
+    d