Fix cuboids / 3d / M6 (cvat-ai#320)

* CVAT-3D Milestone-6: Added Supervisely Point Cloud and KITTI Raw 3D formats * Added Cuboid3d annotations * Added docs for new formats Co-authored-by: cdp <cdp123> Co-authored-by: Jayraj <[email protected]> Co-authored-by: Roman Donchenko <[email protected]>
TOsmanov · Jul 8, 2021 · da1ecb3 · da1ecb3
1 parent d52e4c0
commit da1ecb3
Show file tree

Hide file tree

Showing 39 changed files with 3,567 additions and 48 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -19,9 +19,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   only images and image-level labels can be read/written
   (<https://github.com/openvinotoolkit/datumaro/pull/291>,
   <https://github.com/openvinotoolkit/datumaro/pull/315>).
+- Support for Supervisely Point Cloud dataset format (<https://github.com/openvinotoolkit/datumaro/pull/245>)
+- Support for KITTI Raw / Velodyne Points dataset format (<https://github.com/openvinotoolkit/datumaro/pull/245>)
 
 ### Changed
-- Tensorflow AVX check is made optional in API and is disabled by default (<https://github.com/openvinotoolkit/datumaro/pull/305>)
+- Tensorflow AVX check is made optional in API and disabled by default (<https://github.com/openvinotoolkit/datumaro/pull/305>)
 - Extensions for images in ImageNet_txt are now mandatory (<https://github.com/openvinotoolkit/datumaro/pull/302>)
 - Several dependencies now have lower bounds (<https://github.com/openvinotoolkit/datumaro/pull/308>)
 

diff --git a/README.md b/README.md
@@ -142,7 +142,8 @@ CVAT annotations                             ---> Publication, statistics etc.
   - [MNIST in CSV](https://pjreddie.com/projects/mnist-in-csv/) (`classification`)
   - [CamVid](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/)
   - [Cityscapes](https://www.cityscapes-dataset.com/)
-  - [Kitti](http://www.cvlibs.net/datasets/kitti/index.php) (`segmentation`, `detection`)
+  - [Kitti](http://www.cvlibs.net/datasets/kitti/index.php) (`segmentation`, `detection`, `3D raw` / `velodyne points`)
+  - [Supervisely](https://docs.supervise.ly/data-organization/00_ann_format_navi) (`point cloud`)
   - [CVAT](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md)
   - [LabelMe](http://labelme.csail.mit.edu/Release3.0)
   - [ICDAR13/15](https://rrc.cvc.uab.es/?ch=2) (`word_recognition`, `text_localization`, `text_segmentation`)

diff --git a/datumaro/components/converter.py b/datumaro/components/converter.py
@@ -1,15 +1,16 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
+# Copyright (C) 2019-2021 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
+from typing import Union
 import logging as log
 import os
 import os.path as osp
 import shutil
 
 from datumaro.components.cli_plugin import CliPlugin
-from datumaro.util.image import ByteImage, save_image
+from datumaro.components.extractor import DatasetItem
+from datumaro.util.image import Image
 
 
 class Converter(CliPlugin):
@@ -50,24 +51,34 @@ def __init__(self, extractor, save_dir, save_images=False,
         self._extractor = extractor
         self._save_dir = save_dir
 
-    def _find_image_ext(self, item):
+    def _find_image_ext(self, item: Union[DatasetItem, Image]):
         src_ext = None
-        if item.has_image:
+
+        if isinstance(item, DatasetItem) and item.has_image:
             src_ext = item.image.ext
+        elif isinstance(item, Image):
+            src_ext = item.ext
 
         return self._image_ext or src_ext or self._default_image_ext
 
-    def _make_image_filename(self, item, *, name=None, subdir=None):
+    def _make_item_filename(self, item, *, name=None, subdir=None):
         name = name or item.id
         subdir = subdir or ''
-        return osp.join(subdir, name + self._find_image_ext(item))
+        return osp.join(subdir, name)
+
+    def _make_image_filename(self, item, *, name=None, subdir=None):
+        return self._make_item_filename(item, name=name, subdir=subdir) + \
+            self._find_image_ext(item)
+
+    def _make_pcd_filename(self, item, *, name=None, subdir=None):
+        return self._make_item_filename(item, name=name, subdir=subdir) + '.pcd'
 
     def _save_image(self, item, path=None, *,
             name=None, subdir=None, basedir=None):
         assert not ((subdir or name or basedir) and path), \
             "Can't use both subdir or name or basedir and path arguments"
 
-        if not item.image.has_data:
+        if not item.has_image or not item.image.has_data:
             log.warning("Item '%s' has no image", item.id)
             return
 
@@ -76,15 +87,23 @@ def _save_image(self, item, path=None, *,
             self._make_image_filename(item, name=name, subdir=subdir))
         path = osp.abspath(path)
 
-        src_ext = item.image.ext.lower()
-        dst_ext = osp.splitext(osp.basename(path))[1].lower()
+        item.image.save(path)
+
+    def _save_point_cloud(self, item=None, path=None, *,
+            name=None, subdir=None, basedir=None):
+        assert not ((subdir or name or basedir) and path), \
+            "Can't use both subdir or name or basedir and path arguments"
+
+        if not item.point_cloud:
+            log.warning("Item '%s' has no pcd", item.id)
+            return
+
+        basedir = basedir or self._save_dir
+        path = path or osp.join(basedir,
+            self._make_pcd_filename(item, name=name, subdir=subdir))
+        path = osp.abspath(path)
 
         os.makedirs(osp.dirname(path), exist_ok=True)
-        if src_ext == dst_ext and osp.isfile(item.image.path):
-            if item.image.path != path:
-                shutil.copyfile(item.image.path, path)
-        elif src_ext == dst_ext and isinstance(item.image, ByteImage):
-            with open(path, 'wb') as f:
-                f.write(item.image.get_bytes())
-        else:
-            save_image(path, item.image.data)
+        if item.point_cloud and osp.isfile(item.point_cloud):
+            if item.point_cloud != path:
+                shutil.copyfile(item.point_cloud, path)
diff --git a/datumaro/components/extractor.py b/datumaro/components/extractor.py
@@ -1,4 +1,3 @@
-
 # Copyright (C) 2019-2021 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
@@ -25,6 +24,7 @@ class AnnotationType(Enum):
     polyline = auto()
     bbox = auto()
     caption = auto()
+    cuboid_3d = auto()
 
 _COORDINATE_ROUNDING_DIGITS = 2
 
@@ -359,6 +359,72 @@ def as_polygon(self):
     def get_area(self):
         return 0
 
+
+@attrs
+class Cuboid3d(Annotation):
+    _type = AnnotationType.cuboid_3d
+    _points = attrib(type=list, default=None)
+    label = attrib(converter=attr.converters.optional(int),
+        default=None, kw_only=True)
+
+    @_points.validator
+    def _points_validator(self, attribute, points):
+        if points is None:
+            points = [0, 0, 0,  0, 0, 0,  1, 1, 1]
+        else:
+            assert len(points) == 3 + 3 + 3, points
+            points = [round(p, _COORDINATE_ROUNDING_DIGITS) for p in points]
+        self._points = points
+
+    # will be overridden by attrs, then will be overridden again by us
+    # attrs' method will be renamed to __attrs_init__
+    def __init__(self, position, rotation=None, scale=None, **kwargs):
+        assert len(position) == 3, position
+        if not rotation:
+            rotation = [0] * 3
+        if not scale:
+            scale = [1] * 3
+        kwargs.pop('points', None)
+        self.__attrs_init__(points=[*position, *rotation, *scale], **kwargs)
+    __actual_init__ = __init__ # save pointer
+
+    @property
+    def position(self):
+        """[x, y, z]"""
+        return self._points[0:3]
+
+    @position.setter
+    def _set_poistion(self, value):
+        # TODO: fix the issue with separate coordinate rounding:
+        # self.position[0] = 12.345676
+        # - the number assigned won't be rounded.
+        self.position[:] = \
+            [round(p, _COORDINATE_ROUNDING_DIGITS) for p in value]
+
+    @property
+    def rotation(self):
+        """[rx, ry, rz]"""
+        return self._points[3:6]
+
+    @rotation.setter
+    def _set_rotation(self, value):
+        self.rotation[:] = \
+            [round(p, _COORDINATE_ROUNDING_DIGITS) for p in value]
+
+    @property
+    def scale(self):
+        """[sx, sy, sz]"""
+        return self._points[6:9]
+
+    @scale.setter
+    def _set_scale(self, value):
+        self.scale[:] = \
+            [round(p, _COORDINATE_ROUNDING_DIGITS) for p in value]
+
+assert not hasattr(Cuboid3d, '__attrs_init__') # hopefully, it will be supported
+setattr(Cuboid3d, '__attrs_init__', Cuboid3d.__init__)
+setattr(Cuboid3d, '__init__', Cuboid3d.__actual_init__)
+
 @attrs
 class Polygon(_Shape):
     _type = AnnotationType.polygon
@@ -517,24 +583,49 @@ class DatasetItem:
         type=str, validator=not_empty)
     annotations = attrib(factory=list, validator=default_if_none(list))
     subset = attrib(converter=lambda v: v or DEFAULT_SUBSET_NAME, default=None)
+
+    # Currently unused
     path = attrib(factory=list, validator=default_if_none(list))
 
+    # TODO: introduce "media" field with type info. Replace image and pcd.
     image = attrib(type=Image, default=None)
-    @image.validator
-    def _image_validator(self, attribute, image):
+    # TODO: introduce pcd type like Image
+    point_cloud = attrib(type=str, default=None)
+    related_images = attrib(type=List[Image], default=None)
+
+    def __attrs_post_init__(self):
+        if (self.has_image and self.has_point_cloud):
+            raise ValueError("Can't set both image and point cloud info")
+        if self.related_images and not self.has_point_cloud:
+            raise ValueError("Related images require point cloud")
+
+    def _image_converter(image):
         if callable(image) or isinstance(image, np.ndarray):
             image = Image(data=image)
         elif isinstance(image, str):
             image = Image(path=image)
-        assert image is None or isinstance(image, Image)
-        self.image = image
+        assert image is None or isinstance(image, Image), type(image)
+        return image
+    image.converter = _image_converter
+
+    def _related_image_converter(images):
+        return list(map(__class__._image_converter, images or []))
+    related_images.converter = _related_image_converter
+
+    @point_cloud.validator
+    def _point_cloud_validator(self, attribute, pcd):
+        assert pcd is None or isinstance(pcd, str), type(pcd)
 
     attributes = attrib(factory=dict, validator=default_if_none(dict))
 
     @property
     def has_image(self):
         return self.image is not None
 
+    @property
+    def has_point_cloud(self):
+        return self.point_cloud is not None
+
     def wrap(item, **kwargs):
         return attr.evolve(item, **kwargs)
 

diff --git a/datumaro/components/operations.py b/datumaro/components/operations.py
@@ -447,6 +447,8 @@ def _for_type(t, **kwargs):
                 return _make(PointsMerger, **kwargs)
             elif t is AnnotationType.caption:
                 return _make(CaptionsMerger, **kwargs)
+            elif t is AnnotationType.cuboid_3d:
+                return _make(Cuboid3dMerger, **kwargs)
             else:
                 raise NotImplementedError("Type %s is not supported" % t)
 
@@ -781,6 +783,11 @@ class CaptionsMatcher(AnnotationMatcher):
     def match_annotations(self, sources):
         raise NotImplementedError()
 
+@attrs
+class Cuboid3dMatcher(_ShapeMatcher):
+    @staticmethod
+    def distance(a, b):
+        raise NotImplementedError()
 
 @attrs(kw_only=True)
 class AnnotationMerger:
@@ -823,19 +830,7 @@ class _ShapeMerger(AnnotationMerger, _ShapeMatcher):
     quorum = attrib(converter=int, default=0)
 
     def merge_clusters(self, clusters):
-        merged = []
-        for cluster in clusters:
-            label, label_score = self.find_cluster_label(cluster)
-            shape, shape_score = self.merge_cluster_shape(cluster)
-
-            shape.z_order = max(cluster, key=lambda a: a.z_order).z_order
-            shape.label = label
-            shape.attributes['score'] = label_score * shape_score \
-                if label is not None else shape_score
-
-            merged.append(shape)
-
-        return merged
+        return list(map(self.merge_cluster, clusters))
 
     def find_cluster_label(self, cluster):
         votes = {}
@@ -866,6 +861,17 @@ def merge_cluster_shape(self, cluster):
             for s in cluster) / len(cluster)
         return shape, shape_score
 
+    def merge_cluster(self, cluster):
+        label, label_score = self.find_cluster_label(cluster)
+        shape, shape_score = self.merge_cluster_shape(cluster)
+
+        shape.z_order = max(cluster, key=lambda a: a.z_order).z_order
+        shape.label = label
+        shape.attributes['score'] = label_score * shape_score \
+            if label is not None else shape_score
+
+        return shape
+
 @attrs
 class BboxMerger(_ShapeMerger, BboxMatcher):
     pass
@@ -890,6 +896,26 @@ class LineMerger(_ShapeMerger, LineMatcher):
 class CaptionsMerger(AnnotationMerger, CaptionsMatcher):
     pass
 
+@attrs
+class Cuboid3dMerger(_ShapeMerger, Cuboid3dMatcher):
+    @staticmethod
+    def _merge_cluster_shape_mean_box_nearest(cluster):
+        raise NotImplementedError()
+        # mbbox = Bbox(*mean_cuboid(cluster))
+        # dist = (segment_iou(mbbox, s) for s in cluster)
+        # nearest_pos, _ = max(enumerate(dist), key=lambda e: e[1])
+        # return cluster[nearest_pos]
+
+    def merge_cluster(self, cluster):
+        label, label_score = self.find_cluster_label(cluster)
+        shape, shape_score = self.merge_cluster_shape(cluster)
+
+        shape.label = label
+        shape.attributes['score'] = label_score * shape_score \
+            if label is not None else shape_score
+
+        return shape
+
 def match_segments(a_segms, b_segms, distance=segment_iou, dist_thresh=1.0,
         label_matcher=lambda a, b: a.label == b.label):
     assert callable(distance), distance

diff --git a/datumaro/plugins/kitti_raw_format/__init__.py b/datumaro/plugins/kitti_raw_format/__init__.py