Skip to content

Commit

Permalink
Add MOTS png format support (cvat-ai#21)
Browse files Browse the repository at this point in the history
  • Loading branch information
Maxim Zhiltsov authored Sep 19, 2020
1 parent c2d6c79 commit 7b703bb
Show file tree
Hide file tree
Showing 16 changed files with 311 additions and 16 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- `reindex` option in COCO and CVAT converters (<https://github.com/openvinotoolkit/datumaro/pull/18>)
- Support for relative paths in LabelMe format (<https://github.com/openvinotoolkit/datumaro/pull/19>)
- MOTS png mask format support (<https://github.com/openvinotoolkit/datumaro/21>)

### Changed
-
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ CVAT annotations ---> Publication, statistics etc.
- [YOLO](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) (`bboxes`)
- [TF Detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md) (`bboxes`, `masks`)
- [MOT sequences](https://arxiv.org/pdf/1906.04567.pdf)
- [MOTS PNG](https://www.vision.rwth-aachen.de/page/mots)
- [CVAT](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md)
- [LabelMe](http://labelme.csail.mit.edu/Release3.0)
- Dataset building
Expand Down
34 changes: 32 additions & 2 deletions datumaro/components/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
#
# SPDX-License-Identifier: MIT

from collections import namedtuple
from enum import Enum
from typing import List, Dict
import numpy as np
import os.path as osp

import attr
from attr import attrs, attrib
Expand Down Expand Up @@ -584,6 +585,9 @@ def __init__(self, length=None, subset=None):
subset = None
self._subset = subset

self._categories = {}
self._items = []

def subsets(self):
return [self._subset]

Expand All @@ -592,13 +596,39 @@ def get_subset(self, name):
raise Exception("Unknown subset '%s' requested" % name)
return self

def categories(self):
return self._categories

def __iter__(self):
for item in self._items:
yield item

def __len__(self):
return len(self._items)

class Importer:
@classmethod
def detect(cls, path):
return len(cls.find_subsets(path)) != 0

@classmethod
def find_subsets(cls, path) -> List[Dict]:
"""Returns a list of Sources"""
raise NotImplementedError()

def __call__(self, path, **extra_params):
raise NotImplementedError()
from datumaro.components.project import Project # cyclic import
project = Project()

subsets = self.find_subsets(path)
if len(subsets) == 0:
raise Exception("Failed to find dataset at '%s'" % path)

for desc in subsets:
source_name = osp.splitext(osp.basename(desc['url']))[0]
project.add_source(source_name, desc)

return project

class Transform(Extractor):
@staticmethod
Expand Down
153 changes: 153 additions & 0 deletions datumaro/plugins/mots_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
# Copyright (C) 2020 Intel Corporation
#
# SPDX-License-Identifier: MIT

# Implements MOTS format https://www.vision.rwth-aachen.de/page/mots

from enum import Enum
from glob import glob
import logging as log
import numpy as np
import os
import os.path as osp

from datumaro.components.extractor import (SourceExtractor, DEFAULT_SUBSET_NAME,
DatasetItem, AnnotationType, Mask, LabelCategories
)
from datumaro.components.extractor import Importer
from datumaro.components.converter import Converter
from datumaro.util.image import load_image, save_image
from datumaro.util.mask_tools import merge_masks


class MotsPath:
MASKS_DIR = 'instances'
IMAGE_DIR = 'images'
IMAGE_EXT = '.jpg'
LABELS_FILE = 'labels.txt'
MAX_INSTANCES = 1000

MotsLabels = Enum('MotsLabels', [
('background', 0),
('car', 1),
('pedestrian', 2),
('ignored', 10),
])

class MotsPngExtractor(SourceExtractor):
@staticmethod
def detect_dataset(path):
if osp.isdir(osp.join(path, MotsPath.MASKS_DIR)):
return [{'url': path, 'format': 'mots_png'}]
return []

def __init__(self, path, subset_name=None):
assert osp.isdir(path), path
super().__init__(subset=subset_name)
self._images_dir = osp.join(path, 'images')
self._anno_dir = osp.join(path, MotsPath.MASKS_DIR)
self._categories = self._parse_categories(
osp.join(self._anno_dir, MotsPath.LABELS_FILE))
self._items = self._parse_items()

def _parse_categories(self, path):
if osp.isfile(path):
with open(path) as f:
labels = [l.strip() for l in f]
else:
labels = [l.name for l in MotsLabels]
return { AnnotationType.label: LabelCategories.from_iterable(labels) }

def _parse_items(self):
items = []
for p in sorted(p for p in
glob(self._anno_dir + '/**/*.png', recursive=True)):
item_id = osp.splitext(osp.relpath(p, self._anno_dir))[0]
items.append(DatasetItem(id=item_id, subset=self._subset,
image=osp.join(self._images_dir, item_id + MotsPath.IMAGE_EXT),
annotations=self._parse_annotations(p)))
return items

@staticmethod
def _lazy_extract_mask(mask, v):
return lambda: mask == v

def _parse_annotations(self, path):
combined_mask = load_image(path, dtype=np.uint16)
masks = []
for obj_id in np.unique(combined_mask):
class_id, instance_id = divmod(obj_id, MotsPath.MAX_INSTANCES)
z_order = 0
if class_id == 0:
continue # background
if class_id == 10 and \
len(self._categories[AnnotationType.label].items) < 10:
z_order = 1
class_id = self._categories[AnnotationType.label].find(
MotsLabels.ignored.name)[0]
else:
class_id -= 1
masks.append(Mask(self._lazy_extract_mask(combined_mask, obj_id),
label=class_id, z_order=z_order,
attributes={'track_id': instance_id}))
return masks


class MotsImporter(Importer):
@classmethod
def find_subsets(cls, path):
if not osp.isdir(path):
raise Exception("Expected directory path, got '%s'" % path)
path = osp.normpath(path)

subsets = []
subsets.extend(MotsPngExtractor.detect_dataset(path))
if not subsets:
for p in os.listdir(path):
detected = MotsPngExtractor.detect_dataset(osp.join(path, p))
for s in detected:
s.setdefault('options', {})['subset_name'] = p
subsets.extend(detected)
return subsets


class MotsPngConverter(Converter):
DEFAULT_IMAGE_EXT = MotsPath.IMAGE_EXT

def apply(self):
for subset_name in self._extractor.subsets():
subset = self._extractor.get_subset(subset_name)
subset_name = subset_name or DEFAULT_SUBSET_NAME

subset_dir = osp.join(self._save_dir, subset_name)
images_dir = osp.join(subset_dir, MotsPath.IMAGE_DIR)
anno_dir = osp.join(subset_dir, MotsPath.MASKS_DIR)

for item in subset:
log.debug("Converting item '%s'", item.id)

if self._save_images:
if item.has_image and item.image.has_data:
self._save_image(item,
osp.join(images_dir, self._make_image_filename(item)))
else:
log.debug("Item '%s' has no image", item.id)

self._save_annotations(item, anno_dir)

with open(osp.join(anno_dir, MotsPath.LABELS_FILE), 'w') as f:
f.write('\n'.join(l.name
for l in subset.categories()[AnnotationType.label].items))

def _save_annotations(self, item, anno_dir):
masks = [a for a in item.annotations if a.type == AnnotationType.mask]
if not masks:
return

instance_ids = [int(a.attributes['track_id']) for a in masks]
masks = sorted(zip(masks, instance_ids), key=lambda e: e[0].z_order)
mask = merge_masks([
m.image * (MotsPath.MAX_INSTANCES * (1 + m.label) + id)
for m, id in masks])
save_image(osp.join(anno_dir, item.id + '.png'), mask,
create_dir=True, dtype=np.uint16)
24 changes: 12 additions & 12 deletions datumaro/util/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,19 @@
from datumaro.util.image_cache import ImageCache as _ImageCache


def load_image(path):
def load_image(path, dtype=np.float32):
"""
Reads an image in the HWC Grayscale/BGR(A) float [0; 255] format.
"""

if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2:
import cv2
image = cv2.imread(path, cv2.IMREAD_UNCHANGED)
image = image.astype(np.float32)
image = image.astype(dtype)
elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
from PIL import Image
image = Image.open(path)
image = np.asarray(image, dtype=np.float32)
image = np.asarray(image, dtype=dtype)
if len(image.shape) == 3 and image.shape[2] in {3, 4}:
image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR
else:
Expand All @@ -48,7 +48,7 @@ def load_image(path):
assert image.shape[2] in {3, 4}
return image

def save_image(path, image, create_dir=False, **kwargs):
def save_image(path, image, create_dir=False, dtype=np.uint8, **kwargs):
# NOTE: Check destination path for existence
# OpenCV silently fails if target directory does not exist
dst_dir = osp.dirname(path)
Expand All @@ -72,7 +72,7 @@ def save_image(path, image, create_dir=False, **kwargs):
int(cv2.IMWRITE_JPEG_QUALITY), kwargs.get('jpeg_quality', 75)
]

image = image.astype(np.uint8)
image = image.astype(dtype)
cv2.imwrite(path, image, params=params)
elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
from PIL import Image
Expand All @@ -82,15 +82,15 @@ def save_image(path, image, create_dir=False, **kwargs):
if kwargs.get('jpeg_quality') == 100:
params['subsampling'] = 0

image = image.astype(np.uint8)
image = image.astype(dtype)
if len(image.shape) == 3 and image.shape[2] in {3, 4}:
image[:, :, :3] = image[:, :, 2::-1] # BGR to RGB
image = Image.fromarray(image)
image.save(path, **params)
else:
raise NotImplementedError()

def encode_image(image, ext, **kwargs):
def encode_image(image, ext, dtype=np.uint8, **kwargs):
if not kwargs:
kwargs = {}

Expand All @@ -107,7 +107,7 @@ def encode_image(image, ext, **kwargs):
int(cv2.IMWRITE_JPEG_QUALITY), kwargs.get('jpeg_quality', 75)
]

image = image.astype(np.uint8)
image = image.astype(dtype)
success, result = cv2.imencode(ext, image, params=params)
if not success:
raise Exception("Failed to encode image to '%s' format" % (ext))
Expand All @@ -123,7 +123,7 @@ def encode_image(image, ext, **kwargs):
if kwargs.get('jpeg_quality') == 100:
params['subsampling'] = 0

image = image.astype(np.uint8)
image = image.astype(dtype)
if len(image.shape) == 3 and image.shape[2] in {3, 4}:
image[:, :, :3] = image[:, :, 2::-1] # BGR to RGB
image = Image.fromarray(image)
Expand All @@ -133,16 +133,16 @@ def encode_image(image, ext, **kwargs):
else:
raise NotImplementedError()

def decode_image(image_bytes):
def decode_image(image_bytes, dtype=np.float32):
if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2:
import cv2
image = np.frombuffer(image_bytes, dtype=np.uint8)
image = cv2.imdecode(image, cv2.IMREAD_UNCHANGED)
image = image.astype(np.float32)
image = image.astype(dtype)
elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
from PIL import Image
image = Image.open(BytesIO(image_bytes))
image = np.asarray(image, dtype=np.float32)
image = np.asarray(image, dtype=dtype)
if len(image.shape) == 3 and image.shape[2] in {3, 4}:
image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR
else:
Expand Down
3 changes: 1 addition & 2 deletions datumaro/util/mask_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,7 @@ def make_binary_mask(mask):


def load_mask(path, inverse_colormap=None):
mask = load_image(path)
mask = mask.astype(np.uint8)
mask = load_image(path, dtype=np.uint8)
if inverse_colormap is not None:
if len(mask.shape) == 3 and mask.shape[2] != 1:
mask = unpaint_mask(mask, inverse_colormap)
Expand Down
3 changes: 3 additions & 0 deletions docs/user_manual.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ List of supported formats:
- MOT sequences
- [Format specification](https://arxiv.org/pdf/1906.04567.pdf)
- [Dataset example](../tests/assets/mot_dataset)
- MOTS (png)
- [Format specification](https://www.vision.rwth-aachen.de/page/mots)
- [Dataset example](../tests/assets/mots_dataset)
- CVAT
- [Format specification](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md)
- [Dataset example](../tests/assets/cvat_dataset)
Expand Down
Binary file added tests/assets/mots_dataset/train/images/1.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/assets/mots_dataset/train/images/2.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/assets/mots_dataset/train/instances/1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/assets/mots_dataset/train/instances/2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 4 additions & 0 deletions tests/assets/mots_dataset/train/instances/labels.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
a
b
c
d
Binary file added tests/assets/mots_dataset/val/images/3.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/assets/mots_dataset/val/instances/3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 4 additions & 0 deletions tests/assets/mots_dataset/val/instances/labels.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
a
b
c
d
Loading

0 comments on commit 7b703bb

Please sign in to comment.