From 530ce1749236ae57e744b3bf2e39bb9dab2281f3 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Thu, 12 Mar 2020 17:11:53 +0300
Subject: [PATCH 01/26] Move formats to dataset manager

---
 cvat/apps/annotation/settings.py              |  2 +-
 cvat/apps/dataset_manager/formats/__init__.py |  0
 .../formats}/coco.py                          | 10 +++--
 .../formats}/cvat.py                          |  0
 .../formats}/labelme.py                       |  0
 .../formats}/mask.py                          | 38 ++++++++++++-------
 .../formats}/mot.py                           |  0
 .../formats}/pascal_voc.py                    | 23 +++++++----
 .../formats}/tfrecord.py                      |  9 ++++-
 .../formats}/yolo.py                          |  8 +++-
 10 files changed, 61 insertions(+), 29 deletions(-)
 create mode 100644 cvat/apps/dataset_manager/formats/__init__.py
 rename cvat/apps/{annotation => dataset_manager/formats}/coco.py (86%)
 rename cvat/apps/{annotation => dataset_manager/formats}/cvat.py (100%)
 rename cvat/apps/{annotation => dataset_manager/formats}/labelme.py (100%)
 rename cvat/apps/{annotation => dataset_manager/formats}/mask.py (61%)
 rename cvat/apps/{annotation => dataset_manager/formats}/mot.py (100%)
 rename cvat/apps/{annotation => dataset_manager/formats}/pascal_voc.py (80%)
 rename cvat/apps/{annotation => dataset_manager/formats}/tfrecord.py (86%)
 rename cvat/apps/{annotation => dataset_manager/formats}/yolo.py (92%)

diff --git a/cvat/apps/annotation/settings.py b/cvat/apps/annotation/settings.py
index e1e5f82b42c8..9099a387c05c 100644
--- a/cvat/apps/annotation/settings.py
+++ b/cvat/apps/annotation/settings.py
@@ -4,7 +4,7 @@
 
 import os
 
-path_prefix = os.path.join('cvat', 'apps', 'annotation')
+path_prefix = os.path.join('cvat', 'apps', 'dataset_manager', 'formats')
 BUILTIN_FORMATS = (
     os.path.join(path_prefix, 'cvat.py'),
     os.path.join(path_prefix, 'pascal_voc.py'),
diff --git a/cvat/apps/dataset_manager/formats/__init__.py b/cvat/apps/dataset_manager/formats/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/cvat/apps/annotation/coco.py b/cvat/apps/dataset_manager/formats/coco.py
similarity index 86%
rename from cvat/apps/annotation/coco.py
rename to cvat/apps/dataset_manager/formats/coco.py
index 942aa61f6b33..fe323b645db0 100644
--- a/cvat/apps/annotation/coco.py
+++ b/cvat/apps/dataset_manager/formats/coco.py
@@ -29,15 +29,19 @@ def load(file_object, annotations):
     dm_dataset = CocoInstancesExtractor(file_object.name)
     import_dm_annotations(dm_dataset, annotations)
 
+from datumaro.plugins.coco_format.converter import \
+    CocoInstancesConverter as _CocoInstancesConverter
+class CvatCocoConverter(_CocoInstancesConverter):
+    NAME = 'cvat_coco'
+
 def dump(file_object, annotations):
     import os.path as osp
     import shutil
     from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor
-    from datumaro.components.project import Environment
     from tempfile import TemporaryDirectory
+
     extractor = CvatAnnotationsExtractor('', annotations)
-    converter = Environment().make_converter('coco_instances',
-        crop_covered=True)
+    converter = CvatCocoConverter()
     with TemporaryDirectory() as temp_dir:
         converter(extractor, save_dir=temp_dir)
 
diff --git a/cvat/apps/annotation/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py
similarity index 100%
rename from cvat/apps/annotation/cvat.py
rename to cvat/apps/dataset_manager/formats/cvat.py
diff --git a/cvat/apps/annotation/labelme.py b/cvat/apps/dataset_manager/formats/labelme.py
similarity index 100%
rename from cvat/apps/annotation/labelme.py
rename to cvat/apps/dataset_manager/formats/labelme.py
diff --git a/cvat/apps/annotation/mask.py b/cvat/apps/dataset_manager/formats/mask.py
similarity index 61%
rename from cvat/apps/annotation/mask.py
rename to cvat/apps/dataset_manager/formats/mask.py
index dbdb48df1c97..7496ca96e605 100644
--- a/cvat/apps/annotation/mask.py
+++ b/cvat/apps/dataset_manager/formats/mask.py
@@ -22,27 +22,37 @@
     ],
 }
 
+class CvatMaskConverter:
+    def __init__(self, save_images=False):
+        self._save_images = save_images
+
+    def __call__(self, extractor, save_dir):
+        from datumaro.components.project import Environment, Dataset
+
+        env = Environment()
+        polygons_to_masks = env.transforms.get('polygons_to_masks')
+        boxes_to_masks = env.transforms.get('boxes_to_masks')
+        merge_instance_segments = env.transforms.get('merge_instance_segments')
+        id_from_image = env.transforms.get('id_from_image_name')
+
+        extractor = extractor.transform(polygons_to_masks)
+        extractor = extractor.transform(boxes_to_masks)
+        extractor = extractor.transform(merge_instance_segments)
+        extractor = extractor.transform(id_from_image)
+        extractor = Dataset.from_extractors(extractor) # apply lazy transforms
+
+        converter = env.make_converter('voc_segmentation',
+            apply_colormap=True, label_map='source',
+            save_images=self._save_images)
+        converter(extractor, save_dir=temp_dir)
 
 def dump(file_object, annotations):
     from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor
     from cvat.apps.dataset_manager.util import make_zip_archive
-    from datumaro.components.project import Environment, Dataset
     from tempfile import TemporaryDirectory
 
-    env = Environment()
-    polygons_to_masks = env.transforms.get('polygons_to_masks')
-    boxes_to_masks = env.transforms.get('boxes_to_masks')
-    merge_instance_segments = env.transforms.get('merge_instance_segments')
-    id_from_image = env.transforms.get('id_from_image_name')
-
     extractor = CvatAnnotationsExtractor('', annotations)
-    extractor = extractor.transform(polygons_to_masks)
-    extractor = extractor.transform(boxes_to_masks)
-    extractor = extractor.transform(merge_instance_segments)
-    extractor = extractor.transform(id_from_image)
-    extractor = Dataset.from_extractors(extractor) # apply lazy transforms
-    converter = env.make_converter('voc_segmentation',
-        apply_colormap=True, label_map='source')
+    converter = CvatMaskConverter()
     with TemporaryDirectory() as temp_dir:
         converter(extractor, save_dir=temp_dir)
         make_zip_archive(temp_dir, file_object)
diff --git a/cvat/apps/annotation/mot.py b/cvat/apps/dataset_manager/formats/mot.py
similarity index 100%
rename from cvat/apps/annotation/mot.py
rename to cvat/apps/dataset_manager/formats/mot.py
diff --git a/cvat/apps/annotation/pascal_voc.py b/cvat/apps/dataset_manager/formats/pascal_voc.py
similarity index 80%
rename from cvat/apps/annotation/pascal_voc.py
rename to cvat/apps/dataset_manager/formats/pascal_voc.py
index 2dd0aa48f510..302f37c7f71b 100644
--- a/cvat/apps/annotation/pascal_voc.py
+++ b/cvat/apps/dataset_manager/formats/pascal_voc.py
@@ -62,19 +62,28 @@ def load(file_object, annotations):
         dm_dataset = dm_project.make_dataset()
         import_dm_annotations(dm_dataset, annotations)
 
+class CvatVocConverter:
+    def __init__(self, save_images=False):
+        self._save_images = save_images
+
+    def __call__(self, extractor, save_dir):
+        from datumaro.components.project import Environment, Dataset
+        env = Environment()
+        id_from_image = env.transforms.get('id_from_image_name')
+
+        extractor = extractor.transform(id_from_image)
+        extractor = Dataset.from_extractors(extractor) # apply lazy transforms
+
+        converter = env.make_converter('voc', save_images=self._save_images)
+        converter(extractor, save_dir=temp_dir)
+
 def dump(file_object, annotations):
     from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor
     from cvat.apps.dataset_manager.util import make_zip_archive
-    from datumaro.components.project import Environment, Dataset
     from tempfile import TemporaryDirectory
 
-    env = Environment()
-    id_from_image = env.transforms.get('id_from_image_name')
-
     extractor = CvatAnnotationsExtractor('', annotations)
-    extractor = extractor.transform(id_from_image)
-    extractor = Dataset.from_extractors(extractor) # apply lazy transforms
-    converter = env.make_converter('voc')
+    converter = CvatVocConverter()
     with TemporaryDirectory() as temp_dir:
         converter(extractor, save_dir=temp_dir)
         make_zip_archive(temp_dir, file_object)
\ No newline at end of file
diff --git a/cvat/apps/annotation/tfrecord.py b/cvat/apps/dataset_manager/formats/tfrecord.py
similarity index 86%
rename from cvat/apps/annotation/tfrecord.py
rename to cvat/apps/dataset_manager/formats/tfrecord.py
index 647d5c26442f..db6dee6944fc 100644
--- a/cvat/apps/annotation/tfrecord.py
+++ b/cvat/apps/dataset_manager/formats/tfrecord.py
@@ -22,13 +22,18 @@
     ],
 }
 
+from datumaro.plugins.tf_detection_api_format.converter import \
+    TfDetectionApiConverter as _TfDetectionApiConverter
+class CvatTfrecordConverter(_TfDetectionApiConverter):
+    NAME = 'cvat_tfrecord'
+
 def dump(file_object, annotations):
     from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor
     from cvat.apps.dataset_manager.util import make_zip_archive
-    from datumaro.components.project import Environment
     from tempfile import TemporaryDirectory
+
     extractor = CvatAnnotationsExtractor('', annotations)
-    converter = Environment().make_converter('tf_detection_api')
+    converter = CvatTfrecordConverter()
     with TemporaryDirectory() as temp_dir:
         converter(extractor, save_dir=temp_dir)
         make_zip_archive(temp_dir, file_object)
diff --git a/cvat/apps/annotation/yolo.py b/cvat/apps/dataset_manager/formats/yolo.py
similarity index 92%
rename from cvat/apps/annotation/yolo.py
rename to cvat/apps/dataset_manager/formats/yolo.py
index 379ea45abe3d..4d397e35385d 100644
--- a/cvat/apps/annotation/yolo.py
+++ b/cvat/apps/dataset_manager/formats/yolo.py
@@ -57,13 +57,17 @@ def load(file_object, annotations):
         dm_dataset = dm_project.make_dataset()
         import_dm_annotations(dm_dataset, annotations)
 
+from datumaro.plugins.yolo_format.converter import \
+    YoloConverter as _YoloConverter
+class CvatYoloConverter(_YoloConverter):
+    NAME = 'cvat_yolo'
+
 def dump(file_object, annotations):
     from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor
     from cvat.apps.dataset_manager.util import make_zip_archive
-    from datumaro.components.project import Environment
     from tempfile import TemporaryDirectory
     extractor = CvatAnnotationsExtractor('', annotations)
-    converter = Environment().make_converter('yolo')
+    converter = CvatYoloConverter()
     with TemporaryDirectory() as temp_dir:
         converter(extractor, save_dir=temp_dir)
         make_zip_archive(temp_dir, file_object)

From 38318a43db4937632197c09460a9f4d83ece4fe5 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Thu, 12 Mar 2020 17:12:21 +0300
Subject: [PATCH 02/26] Unify datataset export and anno export implementations

---
 cvat/apps/dataset_manager/task.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py
index 4c5e941081ae..1635d4ebf7f2 100644
--- a/cvat/apps/dataset_manager/task.py
+++ b/cvat/apps/dataset_manager/task.py
@@ -94,8 +94,10 @@ def _load(self):
             CvatImagesDirExtractor)
 
     def _import_from_task(self, user):
-        self._project = Project.generate(self._project_dir,
-            config={'project_name': self._db_task.name})
+        self._project = Project.generate(self._project_dir, config={
+            'project_name': self._db_task.name,
+            'plugins_dir': osp.join(osp.dirname(__file__), 'formats'),
+        })
 
         self._project.add_source('task_%s_images' % self._db_task.id, {
             'url': self._db_task.get_data_dirname(),
@@ -314,22 +316,22 @@ def clear_export_cache(task_id, file_path, file_ctime):
     },
     {
         'name': 'PASCAL VOC 2012',
-        'tag': 'voc',
+        'tag': 'cvat_voc',
         'is_default': False,
     },
     {
         'name': 'MS COCO',
-        'tag': 'coco',
+        'tag': 'cvat_coco',
         'is_default': False,
     },
     {
         'name': 'YOLO',
-        'tag': 'yolo',
+        'tag': 'cvat_yolo',
         'is_default': False,
     },
     {
-        'name': 'TF Detection API TFrecord',
-        'tag': 'tf_detection_api',
+        'name': 'TF Detection API',
+        'tag': 'cvat_tfrecord',
         'is_default': False,
     },
 ]

From f459afb4999f5a9f1671d8228dc224fff8d2139f Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Mon, 23 Mar 2020 17:54:21 +0300
Subject: [PATCH 03/26] Add track_id to TrackedShape, export tracked shapes

---
 cvat/apps/annotation/annotation.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/cvat/apps/annotation/annotation.py b/cvat/apps/annotation/annotation.py
index 70054255c242..73594f225111 100644
--- a/cvat/apps/annotation/annotation.py
+++ b/cvat/apps/annotation/annotation.py
@@ -104,7 +104,7 @@ class Annotation:
     Attribute = namedtuple('Attribute', 'name, value')
     LabeledShape = namedtuple('LabeledShape', 'type, frame, label, points, occluded, attributes, group, z_order')
     LabeledShape.__new__.__defaults__ = (0, 0)
-    TrackedShape = namedtuple('TrackedShape', 'type, points, occluded, frame, attributes, outside, keyframe, z_order')
+    TrackedShape = namedtuple('TrackedShape', 'type, points, occluded, frame, attributes, outside, keyframe, z_order, track_id')
     TrackedShape.__new__.__defaults__ = (0, )
     Track = namedtuple('Track', 'label, group, shapes')
     Tag = namedtuple('Tag', 'frame, label, attributes, group')
@@ -274,9 +274,11 @@ def _export_tracked_shape(self, shape):
             frame=self._db_task.start_frame + shape["frame"] * self._frame_step,
             points=shape["points"],
             occluded=shape["occluded"],
+            z_order=shape.get("z_order", 0),
+            group=shape.get("group", 0),
             outside=shape.get("outside", False),
             keyframe=shape.get("keyframe", True),
-            z_order=shape["z_order"],
+            track_id=shape["track_id"],
             attributes=self._export_attributes(shape["attributes"]),
         )
 
@@ -323,7 +325,11 @@ def _get_frame(annotations, shape):
         annotations = {}
         data_manager = DataManager(self._annotation_ir)
         for shape in sorted(data_manager.to_shapes(self._db_task.size), key=lambda s: s.get("z_order", 0)):
-            _get_frame(annotations, shape).labeled_shapes.append(self._export_labeled_shape(shape))
+            if 'track_id' in shape:
+                exported_shape = self._export_tracked_shape(shape)
+            else:
+                exported_shape = self._export_labeled_shape(shape)
+            _get_frame(annotations, shape).labeled_shapes.append(exported_shape)
 
         for tag in self._annotation_ir.tags:
             _get_frame(annotations, tag).tags.append(self._export_tag(tag))

From 041889e764f098b4db774069df64ab1f9c37e860 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Mon, 23 Mar 2020 17:55:05 +0300
Subject: [PATCH 04/26] Replace MOT format

---
 cvat/apps/dataset_manager/bindings.py    |   4 +
 cvat/apps/dataset_manager/formats/mot.py | 143 ++++++++++-------------
 cvat/apps/engine/tests/test_rest_api.py  |   2 +-
 3 files changed, 67 insertions(+), 82 deletions(-)

diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py
index d1d98af279aa..a75956a01b20 100644
--- a/cvat/apps/dataset_manager/bindings.py
+++ b/cvat/apps/dataset_manager/bindings.py
@@ -149,6 +149,10 @@ def convert_attrs(label, cvat_attrs):
             anno_attr['occluded'] = shape_obj.occluded
             anno_attr['z_order'] = shape_obj.z_order
 
+            if 'track_id' in shape_obj:
+                anno_attr['track_id'] = shape_obj.track_id
+                anno_attr['keyframe'] = shape_obj.keyframe
+
             anno_points = shape_obj.points
             if shape_obj.type == ShapeType.POINTS:
                 anno = datumaro.Points(anno_points,
diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py
index b7f63d1e79b0..2e8ccca7fa27 100644
--- a/cvat/apps/dataset_manager/formats/mot.py
+++ b/cvat/apps/dataset_manager/formats/mot.py
@@ -5,7 +5,7 @@
         {
             "display_name": "{name} {format} {version}",
             "format": "CSV",
-            "version": "1.0",
+            "version": "1.1",
             "handler": "dump"
         },
     ],
@@ -13,97 +13,78 @@
         {
             "display_name": "{name} {format} {version}",
             "format": "CSV",
-            "version": "1.0",
+            "version": "1.1",
             "handler": "load",
         }
     ],
 }
 
 
-MOT = [
-    "frame_id",
-    "track_id",
-    "xtl",
-    "ytl",
-    "width",
-    "height",
-    "confidence",
-    "class_id",
-    "visibility"
-]
-
+from datumaro.plugins.mot_format import \
+    MotConverter as _MotConverter
+class CvatMotConverter(_MotConverter):
+    NAME = 'cvat_mot'
 
 def dump(file_object, annotations):
-    """ Export track shapes in MOT CSV format. Due to limitations of the MOT
-    format, this process only supports rectangular interpolation mode
-    annotations.
-    """
-    import csv
-    import io
+    from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor
+    from cvat.apps.dataset_manager.util import make_zip_archive
+    from tempfile import TemporaryDirectory
 
-    # csv requires a text buffer
-    with io.TextIOWrapper(file_object, encoding="utf-8") as csv_file:
-        writer = csv.DictWriter(csv_file, fieldnames=MOT)
-        for i, track in enumerate(annotations.tracks):
-            for shape in track.shapes:
-                # MOT doesn't support polygons or 'outside' property
-                if shape.type != 'rectangle':
-                    continue
-                writer.writerow({
-                    "frame_id": shape.frame,
-                    "track_id": i,
-                    "xtl": shape.points[0],
-                    "ytl": shape.points[1],
-                    "width": shape.points[2] - shape.points[0],
-                    "height": shape.points[3] - shape.points[1],
-                    "confidence": 1,
-                    "class_id": track.label,
-                    "visibility": 1 - int(shape.occluded)
-                })
+    extractor = CvatAnnotationsExtractor('', annotations)
+    converter = CvatMotConverter()
+    with TemporaryDirectory() as temp_dir:
+        converter(extractor, save_dir=temp_dir)
+        make_zip_archive(temp_dir, file_object)
 
 
 def load(file_object, annotations):
-    """ Read MOT CSV format and convert objects to annotated tracks.
-    """
-    import csv
-    import io
-    tracks = {}
-    # csv requires a text buffer
-    with io.TextIOWrapper(file_object, encoding="utf-8") as csv_file:
-        reader = csv.DictReader(csv_file, fieldnames=MOT)
-        for row in reader:
-            # create one shape per row
-            xtl = float(row["xtl"])
-            ytl = float(row["ytl"])
-            xbr = xtl + float(row["width"])
-            ybr = ytl + float(row["height"])
-            shape = annotations.TrackedShape(
-                type="rectangle",
-                points=[xtl, ytl, xbr, ybr],
-                occluded=float(row["visibility"]) == 0,
-                outside=False,
-                keyframe=False,
-                z_order=0,
-                frame=int(row["frame_id"]),
-                attributes=[],
-            )
-            # build trajectories as lists of shapes in track dict
-            track_id = int(row["track_id"])
-            if track_id not in tracks:
-                tracks[track_id] = annotations.Track(row["class_id"], track_id, [])
-            tracks[track_id].shapes.append(shape)
+    from pyunpack import Archive
+    from tempfile import TemporaryDirectory
+    from datumaro.plugins.mot_format import MotImporter
+    import datumaro.components.extractor as datumaro
+    from cvat.apps.dataset_manager.bindings import match_frame
+
+    archive_file = file_object if isinstance(file_object, str) else getattr(file_object, "name")
+    with TemporaryDirectory() as tmp_dir:
+        Archive(archive_file).extractall(tmp_dir)
+
+        tracks = {}
+
+        dm_dataset = MotImporter()(tmp_dir).make_dataset()
+        label_cat = dm_dataset.categories()[datumaro.AnnotationType.label]
+
+        for item in dm_dataset:
+            frame_id = match_frame(item, annotations)
+
+            for ann in item.annotations:
+                if ann.type != datumaro.AnnotationType.bbox:
+                    continue
+
+                track_id = ann.attributes.get('track_id')
+                if track_id is None:
+                    continue
+
+                shape = annotations.TrackedShape(
+                    type='rectangle',
+                    points=ann.points,
+                    occluded=ann.attributes.get('occluded') == True,
+                    outside=False,
+                    keyframe=False,
+                    z_order=ann.z_order,
+                    frame=frame_id,
+                    attributes=[],
+                )
+
+                # build trajectories as lists of shapes in track dict
+                track_id = int(track_id)
+                if track_id not in tracks:
+                    tracks[track_id] = annotations.Track(
+                        label_cat.items[ann.label].name, 0, [])
+                tracks[track_id].shapes.append(shape)
+
         for track in tracks.values():
-            # Set outside=True for the last shape since MOT has no support
-            # for this flag
-            last = annotations.TrackedShape(
-                type=track.shapes[-1].type,
-                points=track.shapes[-1].points,
-                occluded=track.shapes[-1].occluded,
-                outside=True,
-                keyframe=track.shapes[-1].keyframe,
-                z_order=track.shapes[-1].z_order,
-                frame=track.shapes[-1].frame,
-                attributes=track.shapes[-1].attributes,
-            )
-            track.shapes[-1] = last
+            # MOT annotations do not require frames to be ordered
+            track.shapes.sort(key=lambda t: t.frame)
+            # Set outside=True for the last shape in a track to finish the track
+            track.shapes[-1] = track.shapes[-1]._replace(outside=True)
             annotations.add_track(track)
diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py
index f3da0410623c..d7860ea2792b 100644
--- a/cvat/apps/engine/tests/test_rest_api.py
+++ b/cvat/apps/engine/tests/test_rest_api.py
@@ -2717,7 +2717,7 @@ def _get_initial_annotation(annotation_format):
                 annotations["shapes"] = rectangle_shapes_wo_attrs + polygon_shapes_wo_attrs
                 annotations["tracks"] = rectangle_tracks_wo_attrs
 
-            elif annotation_format == "MOT CSV 1.0":
+            elif annotation_format == "MOT CSV 1.1":
                 annotations["tracks"] = rectangle_tracks_wo_attrs
 
             elif annotation_format == "LabelMe ZIP 3.0 for images":

From 23085bcfc66b78fcfa3efb59642e53b854b08716 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Mon, 23 Mar 2020 18:07:59 +0300
Subject: [PATCH 05/26] Replace LabelMe format

---
 cvat/apps/dataset_manager/formats/labelme.py | 312 +++----------------
 cvat/apps/engine/tests/test_rest_api.py      |   2 +-
 2 files changed, 37 insertions(+), 277 deletions(-)

diff --git a/cvat/apps/dataset_manager/formats/labelme.py b/cvat/apps/dataset_manager/formats/labelme.py
index baacb388ef01..3d619a3a839c 100644
--- a/cvat/apps/dataset_manager/formats/labelme.py
+++ b/cvat/apps/dataset_manager/formats/labelme.py
@@ -6,10 +6,10 @@
     "name": "LabelMe",
     "dumpers": [
         {
-            "display_name": "{name} {format} {version} for images",
+            "display_name": "{name} {format} {version}",
             "format": "ZIP",
             "version": "3.0",
-            "handler": "dump_as_labelme_annotation"
+            "handler": "dump"
         }
     ],
     "loaders": [
@@ -23,285 +23,45 @@
 }
 
 
-_DEFAULT_USERNAME = 'cvat'
-_MASKS_DIR = 'Masks'
-
-
-def dump_frame_anno(frame_annotation):
-    from collections import defaultdict
-    from lxml import etree as ET
-
-    root_elem = ET.Element('annotation')
-
-    ET.SubElement(root_elem, 'filename').text = frame_annotation.name
-    ET.SubElement(root_elem, 'folder').text = ''
-
-    source_elem = ET.SubElement(root_elem, 'source')
-    ET.SubElement(source_elem, 'sourceImage').text = ''
-    ET.SubElement(source_elem, 'sourceAnnotation').text = 'CVAT'
-
-    image_elem = ET.SubElement(root_elem, 'imagesize')
-    ET.SubElement(image_elem, 'nrows').text = str(frame_annotation.height)
-    ET.SubElement(image_elem, 'ncols').text = str(frame_annotation.width)
-
-    groups = defaultdict(list)
-
-    for obj_id, shape in enumerate(frame_annotation.labeled_shapes):
-        obj_elem = ET.SubElement(root_elem, 'object')
-        ET.SubElement(obj_elem, 'name').text = str(shape.label)
-        ET.SubElement(obj_elem, 'deleted').text = '0'
-        ET.SubElement(obj_elem, 'verified').text = '0'
-        ET.SubElement(obj_elem, 'occluded').text = \
-            'yes' if shape.occluded else 'no'
-        ET.SubElement(obj_elem, 'date').text = ''
-        ET.SubElement(obj_elem, 'id').text = str(obj_id)
-
-        parts_elem = ET.SubElement(obj_elem, 'parts')
-        if shape.group:
-            groups[shape.group].append((obj_id, parts_elem))
-        else:
-            ET.SubElement(parts_elem, 'hasparts').text = ''
-            ET.SubElement(parts_elem, 'ispartof').text = ''
-
-        if shape.type == 'rectangle':
-            ET.SubElement(obj_elem, 'type').text = 'bounding_box'
-
-            poly_elem = ET.SubElement(obj_elem, 'polygon')
-            x0, y0, x1, y1 = shape.points
-            points = [ (x0, y0), (x1, y0), (x1, y1), (x0, y1) ]
-            for x, y in points:
-                point_elem = ET.SubElement(poly_elem, 'pt')
-                ET.SubElement(point_elem, 'x').text = '%.2f' % x
-                ET.SubElement(point_elem, 'y').text = '%.2f' % y
-
-            ET.SubElement(poly_elem, 'username').text = _DEFAULT_USERNAME
-        elif shape.type == 'polygon':
-            poly_elem = ET.SubElement(obj_elem, 'polygon')
-            for x, y in zip(shape.points[::2], shape.points[1::2]):
-                point_elem = ET.SubElement(poly_elem, 'pt')
-                ET.SubElement(point_elem, 'x').text = '%.2f' % x
-                ET.SubElement(point_elem, 'y').text = '%.2f' % y
-
-            ET.SubElement(poly_elem, 'username').text = _DEFAULT_USERNAME
-        elif shape.type == 'polyline':
-            pass
-        elif shape.type == 'points':
-            pass
-        else:
-            raise NotImplementedError("Unknown shape type '%s'" % shape.type)
-
-        attrs = ['%s=%s' % (a.name, a.value) for a in shape.attributes]
-        ET.SubElement(obj_elem, 'attributes').text = ', '.join(attrs)
-
-    for _, group in groups.items():
-        leader_id, leader_parts_elem = group[0]
-        leader_parts = [str(o_id) for o_id, _ in group[1:]]
-        ET.SubElement(leader_parts_elem, 'hasparts').text = \
-            ','.join(leader_parts)
-        ET.SubElement(leader_parts_elem, 'ispartof').text = ''
-
-        for obj_id, parts_elem in group[1:]:
-            ET.SubElement(parts_elem, 'hasparts').text = ''
-            ET.SubElement(parts_elem, 'ispartof').text = str(leader_id)
-
-    return ET.tostring(root_elem, encoding='unicode', pretty_print=True)
-
-def dump_as_labelme_annotation(file_object, annotations):
-    import os.path as osp
-    from zipfile import ZipFile, ZIP_DEFLATED
-
-    with ZipFile(file_object, 'w', compression=ZIP_DEFLATED) as output_zip:
-        for frame_annotation in annotations.group_by_frame():
-            xml_data = dump_frame_anno(frame_annotation)
-            filename = osp.splitext(frame_annotation.name)[0] + '.xml'
-            output_zip.writestr(filename, xml_data)
-
-def parse_xml_annotations(xml_data, annotations, input_zip):
-    from datumaro.util.mask_tools import mask_to_polygons
-    from io import BytesIO
-    from lxml import etree as ET
-    import numpy as np
-    import os.path as osp
-    from PIL import Image
-
-    def parse_attributes(attributes_string):
-        parsed = []
-        if not attributes_string:
-            return parsed
+class CvatLabelMeConverter:
+    def __init__(self, save_images=False):
+        self._save_images = save_images
 
-        read = attributes_string.split(',')
-        read = [a.strip() for a in read if a.strip()]
-        for attr in read:
-            if '=' in attr:
-                name, value = attr.split('=', maxsplit=1)
-                parsed.append(annotations.Attribute(name, value))
-            else:
-                parsed.append(annotations.Attribute(attr, '1'))
+    def __call__(self, extractor, save_dir):
+        from datumaro.components.project import Environment, Dataset
 
-        return parsed
+        env = Environment()
+        id_from_image = env.transforms.get('id_from_image_name')
 
+        extractor = extractor.transform(id_from_image)
+        extractor = Dataset.from_extractors(extractor) # apply lazy transforms
 
-    root_elem = ET.fromstring(xml_data)
+        converter = env.make_converter('label_me', save_images=self._save_images)
+        converter(extractor, save_dir=temp_dir)
 
-    frame_number = annotations.match_frame(root_elem.find('filename').text)
+def dump(file_object, annotations):
+    from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor
+    from cvat.apps.dataset_manager.util import make_zip_archive
+    from tempfile import TemporaryDirectory
 
-    parsed_annotations = dict()
-    group_assignments = dict()
-    root_annotations = set()
-    for obj_elem in root_elem.iter('object'):
-        obj_id = int(obj_elem.find('id').text)
-
-        ann_items = []
-
-        attributes = []
-        attributes_elem = obj_elem.find('attributes')
-        if attributes_elem is not None and attributes_elem.text:
-            attributes = parse_attributes(attributes_elem.text)
-
-        occluded = False
-        occluded_elem = obj_elem.find('occluded')
-        if occluded_elem is not None and occluded_elem.text:
-            occluded = (occluded_elem.text == 'yes')
-
-        deleted = False
-        deleted_elem = obj_elem.find('deleted')
-        if deleted_elem is not None and deleted_elem.text:
-            deleted = bool(int(deleted_elem.text))
-
-        poly_elem = obj_elem.find('polygon')
-        segm_elem = obj_elem.find('segm')
-        type_elem = obj_elem.find('type') # the only value is 'bounding_box'
-        if poly_elem is not None:
-            points = []
-            for point_elem in poly_elem.iter('pt'):
-                x = float(point_elem.find('x').text)
-                y = float(point_elem.find('y').text)
-                points.append(x)
-                points.append(y)
-            label = obj_elem.find('name').text
-            if label and attributes:
-                label_id = annotations._get_label_id(label)
-                if label_id:
-                    attributes = [a for a in attributes
-                        if annotations._get_attribute_id(label_id, a.name)
-                    ]
-                else:
-                    attributes = []
-            else:
-                attributes = []
-
-            if type_elem is not None and type_elem.text == 'bounding_box':
-                xmin = min(points[::2])
-                xmax = max(points[::2])
-                ymin = min(points[1::2])
-                ymax = max(points[1::2])
-                ann_items.append(annotations.LabeledShape(
-                    type='rectangle',
-                    frame=frame_number,
-                    label=label,
-                    points=[xmin, ymin, xmax, ymax],
-                    occluded=occluded,
-                    attributes=attributes,
-                ))
-            else:
-                ann_items.append(annotations.LabeledShape(
-                    type='polygon',
-                    frame=frame_number,
-                    label=label,
-                    points=points,
-                    occluded=occluded,
-                    attributes=attributes,
-                ))
-        elif segm_elem is not None:
-            label = obj_elem.find('name').text
-            if label and attributes:
-                label_id = annotations._get_label_id(label)
-                if label_id:
-                    attributes = [a for a in attributes
-                        if annotations._get_attribute_id(label_id, a.name)
-                    ]
-                else:
-                    attributes = []
-            else:
-                attributes = []
-
-            mask_file = segm_elem.find('mask').text
-            mask = input_zip.read(osp.join(_MASKS_DIR, mask_file))
-            mask = np.asarray(Image.open(BytesIO(mask)).convert('L'))
-            mask = (mask != 0)
-            polygons = mask_to_polygons(mask)
-
-            for polygon in polygons:
-                ann_items.append(annotations.LabeledShape(
-                    type='polygon',
-                    frame=frame_number,
-                    label=label,
-                    points=polygon,
-                    occluded=occluded,
-                    attributes=attributes,
-                ))
-
-        if not deleted:
-            parsed_annotations[obj_id] = ann_items
-
-        parts_elem = obj_elem.find('parts')
-        if parts_elem is not None:
-            children_ids = []
-            hasparts_elem = parts_elem.find('hasparts')
-            if hasparts_elem is not None and hasparts_elem.text:
-                children_ids = [int(c) for c in hasparts_elem.text.split(',')]
-
-            parent_ids = []
-            ispartof_elem = parts_elem.find('ispartof')
-            if ispartof_elem is not None and ispartof_elem.text:
-                parent_ids = [int(c) for c in ispartof_elem.text.split(',')]
-
-            if children_ids and not parent_ids and hasparts_elem.text:
-                root_annotations.add(obj_id)
-            group_assignments[obj_id] = [None, children_ids]
-
-    # assign a single group to the whole subtree
-    current_group_id = 0
-    annotations_to_visit = list(root_annotations)
-    while annotations_to_visit:
-        ann_id = annotations_to_visit.pop()
-        ann_assignment = group_assignments[ann_id]
-        group_id, children_ids = ann_assignment
-        if group_id:
-            continue
-
-        if ann_id in root_annotations:
-            current_group_id += 1 # start a new group
-
-        group_id = current_group_id
-        ann_assignment[0] = group_id
-
-        # continue with children
-        annotations_to_visit.extend(children_ids)
-
-    assert current_group_id == len(root_annotations)
-
-    for ann_id, ann_items in parsed_annotations.items():
-        group_id = 0
-        if ann_id in group_assignments:
-            ann_assignment = group_assignments[ann_id]
-            group_id = ann_assignment[0]
-
-        for ann_item in ann_items:
-            if group_id:
-                ann_item = ann_item._replace(group=group_id)
-            if isinstance(ann_item, annotations.LabeledShape):
-                annotations.add_shape(ann_item)
-            else:
-                raise NotImplementedError()
+    extractor = CvatAnnotationsExtractor('', annotations)
+    converter = CvatMotConverter()
+    with TemporaryDirectory() as temp_dir:
+        converter(extractor, save_dir=temp_dir)
+        make_zip_archive(temp_dir, file_object)
 
 def load(file_object, annotations):
-    from zipfile import ZipFile
-
-    with ZipFile(file_object, 'r') as input_zip:
-        for filename in input_zip.namelist():
-            if not filename.endswith('.xml'):
-                continue
-
-            xml_data = input_zip.read(filename)
-            parse_xml_annotations(xml_data, annotations, input_zip)
+    from pyunpack import Archive
+    from tempfile import TemporaryDirectory
+    from datumaro.plugins.labelme_format import LabelMeImporter
+    from datumaro.components.project import Environment
+    from cvat.apps.dataset_manager.bindings import import_dm_annotations
+
+    archive_file = file_object if isinstance(file_object, str) else getattr(file_object, "name")
+    with TemporaryDirectory() as tmp_dir:
+        Archive(archive_file).extractall(tmp_dir)
+
+        dm_dataset = LabelMeImporter()(tmp_dir).make_dataset()
+        masks_to_polygons = Environment().transforms.get('masks_to_polygons')
+        dm_dataset = dm_dataset.transform(masks_to_polygons)
+        import_dm_annotations(dm_dataset, annotations)
diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py
index d7860ea2792b..597af7596e8e 100644
--- a/cvat/apps/engine/tests/test_rest_api.py
+++ b/cvat/apps/engine/tests/test_rest_api.py
@@ -2720,7 +2720,7 @@ def _get_initial_annotation(annotation_format):
             elif annotation_format == "MOT CSV 1.1":
                 annotations["tracks"] = rectangle_tracks_wo_attrs
 
-            elif annotation_format == "LabelMe ZIP 3.0 for images":
+            elif annotation_format == "LabelMe ZIP 3.0":
                 annotations["shapes"] = rectangle_shapes_with_attrs + \
                                         rectangle_shapes_wo_attrs + \
                                         polygon_shapes_wo_attrs + \

From 493be2a546a44c6527f57f34a2125d5ab92fee20 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Mon, 23 Mar 2020 18:13:45 +0300
Subject: [PATCH 06/26] Add new formats to dm

---
 cvat/apps/dataset_manager/task.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py
index 1635d4ebf7f2..00a4ec70c1ef 100644
--- a/cvat/apps/dataset_manager/task.py
+++ b/cvat/apps/dataset_manager/task.py
@@ -334,6 +334,16 @@ def clear_export_cache(task_id, file_path, file_ctime):
         'tag': 'cvat_tfrecord',
         'is_default': False,
     },
+    {
+        'name': 'MOT',
+        'tag': 'cvat_mot',
+        'is_default': False,
+    },
+    {
+        'name': 'LabelMe',
+        'tag': 'cvat_label_me',
+        'is_default': False,
+    },
 ]
 
 def get_export_formats():

From 867f8b974e0d66326ea0af66cb1a4861276ea49c Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Tue, 24 Mar 2020 16:39:35 +0300
Subject: [PATCH 07/26] Add dm tests

---
 cvat/apps/dataset_manager/_tests.py | 309 ++++++++++++++++++++++++++++
 1 file changed, 309 insertions(+)
 create mode 100644 cvat/apps/dataset_manager/_tests.py

diff --git a/cvat/apps/dataset_manager/_tests.py b/cvat/apps/dataset_manager/_tests.py
new file mode 100644
index 000000000000..3f43a0f18496
--- /dev/null
+++ b/cvat/apps/dataset_manager/_tests.py
@@ -0,0 +1,309 @@
+
+# Copyright (C) 2020 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+class _GitImportFix:
+    import sys
+    former_path = sys.path[:]
+
+    @classmethod
+    def apply(cls):
+        # HACK: fix application and module name clash
+        # 'git' app is found earlier than a library in the path.
+        # The clash is introduced by unittest discover
+        import sys
+        print('apply')
+
+        apps_dir = __file__[:__file__.rfind('/dataset_manager/')]
+        assert 'apps' in apps_dir
+        try:
+            sys.path.remove(apps_dir)
+        except ValueError:
+            pass
+
+        for name in list(sys.modules):
+            if name.startswith('git.') or name == 'git':
+                m = sys.modules.pop(name, None)
+                del m
+
+        import git
+        assert apps_dir not in git.__file__
+
+    @classmethod
+    def restore(cls):
+        import sys
+        print('restore')
+
+        for name in list(sys.modules):
+            if name.startswith('git.') or name == 'git':
+                m = sys.modules.pop(name)
+                del m
+
+        sys.path.insert(0, __file__[:__file__.rfind('/dataset_manager/')])
+
+        import importlib
+        importlib.invalidate_caches()
+
+def _setUpModule():
+    _GitImportFix.apply()
+    import cvat.apps.dataset_manager.task as dm
+    from cvat.apps.engine.models import Task
+    globals()['dm'] = dm
+    globals()['Task'] = Task
+
+    import sys
+    sys.path.insert(0, __file__[:__file__.rfind('/dataset_manager/')])
+
+def tearDownModule():
+    _GitImportFix.restore()
+
+from io import BytesIO
+import os
+import random
+import tempfile
+
+from PIL import Image
+from django.contrib.auth.models import User, Group
+from rest_framework.test import APITestCase, APIClient
+from rest_framework import status
+
+_setUpModule()
+
+
+def generate_image_file(filename):
+    f = BytesIO()
+    width = random.randint(10, 200)
+    height = random.randint(10, 200)
+    image = Image.new('RGB', size=(width, height))
+    image.save(f, 'jpeg')
+    f.name = filename
+    f.seek(0)
+
+    return f
+
+def create_db_users(cls):
+    group_user, _ = Group.objects.get_or_create(name="user")
+
+    user_dummy = User.objects.create_superuser(username="test", password="test", email="")
+    user_dummy.groups.add(group_user)
+
+    cls.user = user_dummy
+
+class ForceLogin:
+    def __init__(self, user, client):
+        self.user = user
+        self.client = client
+
+    def __enter__(self):
+        if self.user:
+            self.client.force_login(self.user,
+                backend='django.contrib.auth.backends.ModelBackend')
+
+        return self
+
+    def __exit__(self, exception_type, exception_value, traceback):
+        if self.user:
+            self.client.logout()
+
+class TaskExportTest(APITestCase):
+    def setUp(self):
+        self.client = APIClient()
+
+    @classmethod
+    def setUpTestData(cls):
+        create_db_users(cls)
+
+    def _generate_task(self):
+        task = {
+            "name": "my task #1",
+            "owner": '',
+            "assignee": '',
+            "overlap": 0,
+            "segment_size": 100,
+            "z_order": False,
+            "image_quality": 75,
+            "labels": [
+                {
+                    "name": "car",
+                    "attributes": [
+                        {
+                            "name": "model",
+                            "mutable": False,
+                            "input_type": "select",
+                            "default_value": "mazda",
+                            "values": ["bmw", "mazda", "renault"]
+                        },
+                        {
+                            "name": "parked",
+                            "mutable": True,
+                            "input_type": "checkbox",
+                            "default_value": False
+                        },
+                    ]
+                },
+                {"name": "person"},
+            ]
+        }
+        task = self._create_task(task, 3)
+
+        annotations = {
+            "version": 0,
+            "tags": [
+                {
+                    "frame": 0,
+                    "label_id": task["labels"][0]["id"],
+                    "group": None,
+                    "attributes": []
+                }
+            ],
+            "shapes": [
+                {
+                    "frame": 0,
+                    "label_id": task["labels"][0]["id"],
+                    "group": None,
+                    "attributes": [
+                        {
+                            "spec_id": task["labels"][0]["attributes"][0]["id"],
+                            "value": task["labels"][0]["attributes"][0]["values"][0]
+                        },
+                        {
+                            "spec_id": task["labels"][0]["attributes"][1]["id"],
+                            "value": task["labels"][0]["attributes"][0]["default_value"]
+                        }
+                    ],
+                    "points": [1.0, 2.1, 100, 300.222],
+                    "type": "rectangle",
+                    "occluded": False
+                },
+                {
+                    "frame": 1,
+                    "label_id": task["labels"][1]["id"],
+                    "group": None,
+                    "attributes": [],
+                    "points": [2.0, 2.1, 100, 300.222, 400, 500, 1, 3],
+                    "type": "polygon",
+                    "occluded": False
+                },
+            ],
+            "tracks": [
+                {
+                    "frame": 0,
+                    "label_id": task["labels"][0]["id"],
+                    "group": None,
+                    "attributes": [
+                        {
+                            "spec_id": task["labels"][0]["attributes"][0]["id"],
+                            "value": task["labels"][0]["attributes"][0]["values"][0]
+                        },
+                    ],
+                    "shapes": [
+                        {
+                            "frame": 0,
+                            "points": [1.0, 2.1, 100, 300.222],
+                            "type": "rectangle",
+                            "occluded": False,
+                            "outside": False,
+                            "attributes": [
+                                {
+                                    "spec_id": task["labels"][0]["attributes"][1]["id"],
+                                    "value": task["labels"][0]["attributes"][1]["default_value"]
+                                }
+                            ]
+                        },
+                        {
+                            "frame": 1,
+                            "attributes": [],
+                            "points": [2.0, 2.1, 100, 300.222],
+                            "type": "rectangle",
+                            "occluded": True,
+                            "outside": True
+                        },
+                    ]
+                },
+                {
+                    "frame": 1,
+                    "label_id": task["labels"][1]["id"],
+                    "group": None,
+                    "attributes": [],
+                    "shapes": [
+                        {
+                            "frame": 1,
+                            "attributes": [],
+                            "points": [1.0, 2.1, 100, 300.222],
+                            "type": "rectangle",
+                            "occluded": False,
+                            "outside": False
+                        }
+                    ]
+                },
+            ]
+        }
+        self._put_api_v1_task_id_annotations(task["id"], annotations)
+
+        return task, annotations
+
+    def _create_task(self, data, size):
+        with ForceLogin(self.user, self.client):
+            response = self.client.post('/api/v1/tasks', data=data, format="json")
+            assert response.status_code == status.HTTP_201_CREATED, response.status_code
+            tid = response.data["id"]
+
+            images = {
+                "client_files[%d]" % i: generate_image_file("image_%d.jpg" % i)
+                for i in range(size)
+            }
+            response = self.client.post("/api/v1/tasks/{}/data".format(tid), data=images)
+            assert response.status_code == status.HTTP_202_ACCEPTED, response.status_code
+
+            response = self.client.get("/api/v1/tasks/{}".format(tid))
+            task = response.data
+
+        return task
+
+    def _put_api_v1_task_id_annotations(self, tid, data):
+        with ForceLogin(self.user, self.client):
+            response = self.client.put("/api/v1/tasks/{}/annotations".format(tid),
+                data=data, format="json")
+
+        return response
+
+    def _test_export(self, format_name, save_images=False):
+        self.assertTrue(format_name in [f['tag'] for f in dm.EXPORT_FORMATS])
+
+        task, _ = self._generate_task()
+        project = dm.TaskProject.from_task(
+            Task.objects.get(pk=task["id"]), self.user.username)
+
+        with tempfile.TemporaryDirectory() as test_dir:
+            project.export(format_name, test_dir, save_images=save_images)
+
+            self.assertTrue(os.listdir(test_dir))
+
+    def test_datumaro(self):
+        self._test_export(dm.EXPORT_FORMAT_DATUMARO_PROJECT, save_images=False)
+
+    def test_coco(self):
+        self._test_export('coco', save_images=True)
+
+    def test_voc(self):
+        self._test_export('voc', save_images=True)
+
+    def test_tf_detection_api(self):
+        self._test_export('tf_detection_api', save_images=True)
+
+    def test_yolo(self):
+        self._test_export('yolo', save_images=True)
+
+    def test_mot(self):
+        self._test_export('mot', save_images=True)
+
+    def test_labelme(self):
+        self._test_export('label_me', save_images=True)
+
+    def test_formats_query(self):
+        formats = dm.get_export_formats()
+
+        expected = set(f['tag'] for f in dm.EXPORT_FORMATS)
+        actual = set(f['tag'] for f in formats)
+        self.assertSetEqual(expected, actual)

From 6b40c12719649ccc359d8cff106ce27b934d46a9 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Wed, 25 Mar 2020 14:05:01 +0300
Subject: [PATCH 08/26] Extend TrackedShape

---
 cvat/apps/annotation/annotation.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/cvat/apps/annotation/annotation.py b/cvat/apps/annotation/annotation.py
index 73594f225111..de21fd300a16 100644
--- a/cvat/apps/annotation/annotation.py
+++ b/cvat/apps/annotation/annotation.py
@@ -104,8 +104,8 @@ class Annotation:
     Attribute = namedtuple('Attribute', 'name, value')
     LabeledShape = namedtuple('LabeledShape', 'type, frame, label, points, occluded, attributes, group, z_order')
     LabeledShape.__new__.__defaults__ = (0, 0)
-    TrackedShape = namedtuple('TrackedShape', 'type, points, occluded, frame, attributes, outside, keyframe, z_order, track_id')
-    TrackedShape.__new__.__defaults__ = (0, )
+    TrackedShape = namedtuple('TrackedShape', 'type, frame, points, occluded, outside, keyframe, attributes, group, z_order, label, track_id')
+    TrackedShape.__new__.__defaults__ = (0, 0, None, 0)
     Track = namedtuple('Track', 'label, group, shapes')
     Tag = namedtuple('Tag', 'frame, label, attributes, group')
     Tag.__new__.__defaults__ = (0, )
@@ -272,6 +272,7 @@ def _export_tracked_shape(self, shape):
         return Annotation.TrackedShape(
             type=shape["type"],
             frame=self._db_task.start_frame + shape["frame"] * self._frame_step,
+            label=self._get_label_name(shape["label_id"]),
             points=shape["points"],
             occluded=shape["occluded"],
             z_order=shape.get("z_order", 0),
@@ -343,14 +344,17 @@ def shapes(self):
 
     @property
     def tracks(self):
-        for track in self._annotation_ir.tracks:
+        for idx, track in enumerate(self._annotation_ir.tracks):
             tracked_shapes = TrackManager.get_interpolated_shapes(track, 0, self._db_task.size)
             for tracked_shape in tracked_shapes:
                 tracked_shape["attributes"] += track["attributes"]
+                tracked_shape["track_id"] = idx
+                tracked_shape["group"] = track["group"]
+                tracked_shape["label_id"] = track["label_id"]
 
             yield Annotation.Track(
                 label=self._get_label_name(track["label_id"]),
-                group=track['group'],
+                group=track["group"],
                 shapes=[self._export_tracked_shape(shape) for shape in tracked_shapes],
             )
 

From 5ee72e1b056a435b973881ede8177c95366dcef4 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Wed, 25 Mar 2020 14:06:53 +0300
Subject: [PATCH 09/26] Enable dm test in CI

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 8cbae296cccb..decc8f27da24 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,5 +13,6 @@ before_script:
 
 script:
   - docker-compose -f docker-compose.yml -f docker-compose.ci.yml run cvat_ci /bin/bash -c 'python3 manage.py test cvat/apps utils/cli'
+  - docker-compose -f docker-compose.yml -f docker-compose.ci.yml run cvat_ci /bin/bash -c 'python3 manage.py test --pattern="_tests.py" cvat/apps/dataset_manager'
   - docker-compose -f docker-compose.yml -f docker-compose.ci.yml run cvat_ci /bin/bash -c 'python3 manage.py test datumaro/'
   - docker-compose -f docker-compose.yml -f docker-compose.ci.yml run cvat_ci /bin/bash -c 'cd cvat-core && npm install && npm run test && npm run coveralls'

From b86386e1af599f10c77872843a8726db1087036f Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Wed, 25 Mar 2020 14:08:54 +0300
Subject: [PATCH 10/26] Fix tests

---
 cvat/apps/annotation/format.py                  |  8 ++------
 cvat/apps/dataset_manager/_tests.py             | 12 ++++++------
 cvat/apps/dataset_manager/formats/labelme.py    |  5 +++--
 cvat/apps/dataset_manager/formats/mask.py       |  5 +++--
 cvat/apps/dataset_manager/formats/mot.py        |  6 +++---
 cvat/apps/dataset_manager/formats/pascal_voc.py |  5 +++--
 cvat/apps/dataset_manager/task.py               |  9 ++++++---
 cvat/apps/engine/data_manager.py                | 11 +++++------
 cvat/apps/engine/tests/test_rest_api.py         |  7 ++++++-
 9 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/cvat/apps/annotation/format.py b/cvat/apps/annotation/format.py
index 497c38125ba8..9ac2a00ca1b8 100644
--- a/cvat/apps/annotation/format.py
+++ b/cvat/apps/annotation/format.py
@@ -3,22 +3,18 @@
 # SPDX-License-Identifier: MIT
 
 from cvat.apps.annotation import models
-from django.conf import settings
 from django.core.exceptions import ObjectDoesNotExist
 from cvat.apps.annotation.serializers import AnnotationFormatSerializer
 from django.core.files import File
 
-import os
 from copy import deepcopy
 
 def register_format(format_file):
     source_code = open(format_file, 'r').read()
-    global_vars = {
-        "__builtins__": {},
-    }
+    global_vars = {}
     exec(source_code, global_vars)
     if "format_spec" not in global_vars or not isinstance(global_vars["format_spec"], dict):
-        raise Exception("Could not find \'format_spec\' definition in format file specification")
+        raise Exception("Could not find 'format_spec' definition in format file specification")
 
     format_spec = deepcopy(global_vars["format_spec"])
     format_spec["handler_file"] = File(open(format_file))
diff --git a/cvat/apps/dataset_manager/_tests.py b/cvat/apps/dataset_manager/_tests.py
index 3f43a0f18496..4f859463f8f4 100644
--- a/cvat/apps/dataset_manager/_tests.py
+++ b/cvat/apps/dataset_manager/_tests.py
@@ -284,22 +284,22 @@ def test_datumaro(self):
         self._test_export(dm.EXPORT_FORMAT_DATUMARO_PROJECT, save_images=False)
 
     def test_coco(self):
-        self._test_export('coco', save_images=True)
+        self._test_export('cvat_coco', save_images=True)
 
     def test_voc(self):
-        self._test_export('voc', save_images=True)
+        self._test_export('cvat_voc', save_images=True)
 
     def test_tf_detection_api(self):
-        self._test_export('tf_detection_api', save_images=True)
+        self._test_export('cvat_tfrecord', save_images=True)
 
     def test_yolo(self):
-        self._test_export('yolo', save_images=True)
+        self._test_export('cvat_yolo', save_images=True)
 
     def test_mot(self):
-        self._test_export('mot', save_images=True)
+        self._test_export('cvat_mot', save_images=True)
 
     def test_labelme(self):
-        self._test_export('label_me', save_images=True)
+        self._test_export('cvat_label_me', save_images=True)
 
     def test_formats_query(self):
         formats = dm.get_export_formats()
diff --git a/cvat/apps/dataset_manager/formats/labelme.py b/cvat/apps/dataset_manager/formats/labelme.py
index 3d619a3a839c..e0cbc9f4cc9d 100644
--- a/cvat/apps/dataset_manager/formats/labelme.py
+++ b/cvat/apps/dataset_manager/formats/labelme.py
@@ -23,7 +23,8 @@
 }
 
 
-class CvatLabelMeConverter:
+from datumaro.components.converter import Converter
+class CvatLabelMeConverter(Converter):
     def __init__(self, save_images=False):
         self._save_images = save_images
 
@@ -37,7 +38,7 @@ def __call__(self, extractor, save_dir):
         extractor = Dataset.from_extractors(extractor) # apply lazy transforms
 
         converter = env.make_converter('label_me', save_images=self._save_images)
-        converter(extractor, save_dir=temp_dir)
+        converter(extractor, save_dir=save_dir)
 
 def dump(file_object, annotations):
     from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor
diff --git a/cvat/apps/dataset_manager/formats/mask.py b/cvat/apps/dataset_manager/formats/mask.py
index 7496ca96e605..c18553b32650 100644
--- a/cvat/apps/dataset_manager/formats/mask.py
+++ b/cvat/apps/dataset_manager/formats/mask.py
@@ -22,7 +22,8 @@
     ],
 }
 
-class CvatMaskConverter:
+from datumaro.components.converter import Converter
+class CvatMaskConverter(Converter):
     def __init__(self, save_images=False):
         self._save_images = save_images
 
@@ -44,7 +45,7 @@ def __call__(self, extractor, save_dir):
         converter = env.make_converter('voc_segmentation',
             apply_colormap=True, label_map='source',
             save_images=self._save_images)
-        converter(extractor, save_dir=temp_dir)
+        converter(extractor, save_dir=save_dir)
 
 def dump(file_object, annotations):
     from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor
diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py
index 2e8ccca7fa27..025b37a85827 100644
--- a/cvat/apps/dataset_manager/formats/mot.py
+++ b/cvat/apps/dataset_manager/formats/mot.py
@@ -21,7 +21,7 @@
 
 
 from datumaro.plugins.mot_format import \
-    MotConverter as _MotConverter
+    MotSeqGtConverter as _MotConverter
 class CvatMotConverter(_MotConverter):
     NAME = 'cvat_mot'
 
@@ -40,7 +40,7 @@ def dump(file_object, annotations):
 def load(file_object, annotations):
     from pyunpack import Archive
     from tempfile import TemporaryDirectory
-    from datumaro.plugins.mot_format import MotImporter
+    from datumaro.plugins.mot_format import MotSeqImporter
     import datumaro.components.extractor as datumaro
     from cvat.apps.dataset_manager.bindings import match_frame
 
@@ -50,7 +50,7 @@ def load(file_object, annotations):
 
         tracks = {}
 
-        dm_dataset = MotImporter()(tmp_dir).make_dataset()
+        dm_dataset = MotSeqImporter()(tmp_dir).make_dataset()
         label_cat = dm_dataset.categories()[datumaro.AnnotationType.label]
 
         for item in dm_dataset:
diff --git a/cvat/apps/dataset_manager/formats/pascal_voc.py b/cvat/apps/dataset_manager/formats/pascal_voc.py
index f52cb5e3c268..a74d14ba3f2c 100644
--- a/cvat/apps/dataset_manager/formats/pascal_voc.py
+++ b/cvat/apps/dataset_manager/formats/pascal_voc.py
@@ -62,7 +62,8 @@ def load(file_object, annotations):
         dm_dataset = dm_project.make_dataset()
         import_dm_annotations(dm_dataset, annotations)
 
-class CvatVocConverter:
+from datumaro.components.converter import Converter
+class CvatVocConverter(Converter):
     def __init__(self, save_images=False):
         self._save_images = save_images
 
@@ -76,7 +77,7 @@ def __call__(self, extractor, save_dir):
 
         converter = env.make_converter('voc', label_map='source',
             save_images=self._save_images)
-        converter(extractor, save_dir=temp_dir)
+        converter(extractor, save_dir=save_dir)
 
 def dump(file_object, annotations):
     from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor
diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py
index 00a4ec70c1ef..7919d89f471d 100644
--- a/cvat/apps/dataset_manager/task.py
+++ b/cvat/apps/dataset_manager/task.py
@@ -18,13 +18,14 @@
 from cvat.apps.engine.models import Task
 from .util import current_function_name, make_zip_archive
 
-_CVAT_ROOT_DIR = __file__[:__file__.rfind('cvat/')]
+_CVAT_ROOT_DIR = __file__[:__file__.rfind(osp.join('cvat', ''))]
 _DATUMARO_REPO_PATH = osp.join(_CVAT_ROOT_DIR, 'datumaro')
 sys.path.append(_DATUMARO_REPO_PATH)
 from datumaro.components.project import Project, Environment
 import datumaro.components.extractor as datumaro
 from .bindings import CvatImagesDirExtractor, CvatTaskExtractor
 
+_FORMATS_DIR = osp.join(osp.dirname(__file__), 'formats')
 
 _MODULE_NAME = __package__ + '.' + osp.splitext(osp.basename(__file__))[0]
 def log_exception(logger=None, exc_info=True):
@@ -96,7 +97,7 @@ def _load(self):
     def _import_from_task(self, user):
         self._project = Project.generate(self._project_dir, config={
             'project_name': self._db_task.name,
-            'plugins_dir': osp.join(osp.dirname(__file__), 'formats'),
+            'plugins_dir': _FORMATS_DIR,
         })
 
         self._project.add_source('task_%s_images' % self._db_task.id, {
@@ -347,7 +348,9 @@ def clear_export_cache(task_id, file_path, file_ctime):
 ]
 
 def get_export_formats():
-    converters = Environment().converters
+    converters = Environment(config={
+        'plugins_dir': _FORMATS_DIR
+    }).converters
 
     available_formats = set(converters.items)
     available_formats.add(EXPORT_FORMAT_DATUMARO_PROJECT)
diff --git a/cvat/apps/engine/data_manager.py b/cvat/apps/engine/data_manager.py
index 00586ea57e0f..4922fc28087b 100644
--- a/cvat/apps/engine/data_manager.py
+++ b/cvat/apps/engine/data_manager.py
@@ -222,12 +222,11 @@ def to_shapes(self, end_frame):
         shapes = []
         for idx, track in enumerate(self.objects):
             for shape in TrackManager.get_interpolated_shapes(track, 0, end_frame):
-                if not shape["outside"]:
-                    shape["label_id"] = track["label_id"]
-                    shape["group"] = track["group"]
-                    shape["track_id"] = idx
-                    shape["attributes"] += track["attributes"]
-                    shapes.append(shape)
+                shape["label_id"] = track["label_id"]
+                shape["group"] = track["group"]
+                shape["track_id"] = idx
+                shape["attributes"] += track["attributes"]
+                shapes.append(shape)
         return shapes
 
     @staticmethod
diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py
index 597af7596e8e..6c060bb2ba59 100644
--- a/cvat/apps/engine/tests/test_rest_api.py
+++ b/cvat/apps/engine/tests/test_rest_api.py
@@ -2143,7 +2143,12 @@ def _get_annotation_formats(self, user):
     def _check_response(self, response, data):
         if not response.status_code in [
             status.HTTP_401_UNAUTHORIZED, status.HTTP_403_FORBIDDEN]:
-            compare_objects(self, data, response.data, ignore_keys=["id"])
+            try:
+                compare_objects(self, data, response.data, ignore_keys=["id"])
+            except AssertionError as e:
+                print("Objects are not equal: ", data, response.data)
+                print(e)
+                raise
 
     def _run_api_v1_tasks_id_annotations(self, owner, assignee, annotator):
         task, _ = self._create_task(owner, assignee)

From 04b77d9e654827c16d135f687c216857b26ebd71 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Wed, 25 Mar 2020 14:14:33 +0300
Subject: [PATCH 11/26] Add import

---
 datumaro/datumaro/plugins/labelme_format.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datumaro/datumaro/plugins/labelme_format.py b/datumaro/datumaro/plugins/labelme_format.py
index 41069da9dab9..8eb8542b769c 100644
--- a/datumaro/datumaro/plugins/labelme_format.py
+++ b/datumaro/datumaro/plugins/labelme_format.py
@@ -10,7 +10,7 @@
 import os
 import os.path as osp
 
-from datumaro.components.extractor import (SourceExtractor,
+from datumaro.components.extractor import (SourceExtractor, DEFAULT_SUBSET_NAME,
     DatasetItem, AnnotationType, Mask, Bbox, Polygon, LabelCategories
 )
 from datumaro.components.extractor import Importer

From a96b06350fc5b7a397f4775eef36083e76fe8870 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Wed, 25 Mar 2020 17:06:16 +0300
Subject: [PATCH 12/26] Fix tests

---
 cvat/apps/dataset_manager/formats/labelme.py |  2 +-
 cvat/apps/dataset_manager/formats/mot.py     |  4 ++--
 cvat/apps/engine/tests/test_rest_api.py      |  2 +-
 datumaro/datumaro/plugins/labelme_format.py  | 14 +++++++++-----
 4 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/cvat/apps/dataset_manager/formats/labelme.py b/cvat/apps/dataset_manager/formats/labelme.py
index e0cbc9f4cc9d..8cc0d880cc18 100644
--- a/cvat/apps/dataset_manager/formats/labelme.py
+++ b/cvat/apps/dataset_manager/formats/labelme.py
@@ -46,7 +46,7 @@ def dump(file_object, annotations):
     from tempfile import TemporaryDirectory
 
     extractor = CvatAnnotationsExtractor('', annotations)
-    converter = CvatMotConverter()
+    converter = CvatLabelMeConverter()
     with TemporaryDirectory() as temp_dir:
         converter(extractor, save_dir=temp_dir)
         make_zip_archive(temp_dir, file_object)
diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py
index 025b37a85827..46d9fd27ec7e 100644
--- a/cvat/apps/dataset_manager/formats/mot.py
+++ b/cvat/apps/dataset_manager/formats/mot.py
@@ -4,7 +4,7 @@
     "dumpers": [
         {
             "display_name": "{name} {format} {version}",
-            "format": "CSV",
+            "format": "ZIP",
             "version": "1.1",
             "handler": "dump"
         },
@@ -12,7 +12,7 @@
     "loaders": [
         {
             "display_name": "{name} {format} {version}",
-            "format": "CSV",
+            "format": "ZIP",
             "version": "1.1",
             "handler": "load",
         }
diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py
index 6c060bb2ba59..c2e7b66ca682 100644
--- a/cvat/apps/engine/tests/test_rest_api.py
+++ b/cvat/apps/engine/tests/test_rest_api.py
@@ -2722,7 +2722,7 @@ def _get_initial_annotation(annotation_format):
                 annotations["shapes"] = rectangle_shapes_wo_attrs + polygon_shapes_wo_attrs
                 annotations["tracks"] = rectangle_tracks_wo_attrs
 
-            elif annotation_format == "MOT CSV 1.1":
+            elif annotation_format == "MOT ZIP 1.1":
                 annotations["tracks"] = rectangle_tracks_wo_attrs
 
             elif annotation_format == "LabelMe ZIP 3.0":
diff --git a/datumaro/datumaro/plugins/labelme_format.py b/datumaro/datumaro/plugins/labelme_format.py
index 8eb8542b769c..e95f4328e79e 100644
--- a/datumaro/datumaro/plugins/labelme_format.py
+++ b/datumaro/datumaro/plugins/labelme_format.py
@@ -95,9 +95,16 @@ def parse_attributes(attr_str):
             for attr in [a.strip() for a in attr_str.split(',') if a.strip()]:
                 if '=' in attr:
                     name, value = attr.split('=', maxsplit=1)
+                    if value.lower() in {'true', 'false'}:
+                        value = value.lower() == 'true'
+                    else:
+                        try:
+                            value = float(value)
+                        except Exception:
+                            pass
                     parsed.append((name, value))
                 else:
-                    parsed.append((attr, '1'))
+                    parsed.append((attr, True))
 
             return parsed
 
@@ -440,10 +447,7 @@ def _save_item(self, item, subset_dir):
 
             attrs = []
             for k, v in ann.attributes.items():
-                if isinstance(v, bool):
-                    attrs.append(k)
-                else:
-                    attrs.append('%s=%s' % (k, v))
+                attrs.append('%s=%s' % (k, v))
             ET.SubElement(obj_elem, 'attributes').text = ', '.join(attrs)
 
             obj_id += 1

From 93fc0aa1b8efa1b61d1ed7b78b448d17116a1da0 Mon Sep 17 00:00:00 2001
From: Zhiltsov Max <zhiltsov.max35@gmail.com>
Date: Wed, 25 Mar 2020 17:12:23 +0300
Subject: [PATCH 13/26] Fix mot track ids

---
 datumaro/datumaro/plugins/mot_format.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/datumaro/datumaro/plugins/mot_format.py b/datumaro/datumaro/plugins/mot_format.py
index 18d3695b1450..2d2046d53223 100644
--- a/datumaro/datumaro/plugins/mot_format.py
+++ b/datumaro/datumaro/plugins/mot_format.py
@@ -291,6 +291,8 @@ def __call__(self, extractor, save_dir):
         anno_file = osp.join(anno_dir, MotPath.GT_FILENAME)
         with open(anno_file, 'w', encoding="utf-8") as csv_file:
             writer = csv.DictWriter(csv_file, fieldnames=MotPath.FIELDS)
+
+            track_id_mapping = {-1: -1}
             for idx, item in enumerate(extractor):
                 log.debug("Converting item '%s'", item.id)
 
@@ -300,9 +302,13 @@ def __call__(self, extractor, save_dir):
                     if anno.type != AnnotationType.bbox:
                         continue
 
+                    track_id = int(anno.attributes.get('track_id', -1))
+                    if track_id not in track_id_mapping:
+                        track_id_mapping[track_id] = len(track_id_mapping)
+                    track_id = track_id_mapping[track_id]
                     writer.writerow({
                         'frame_id': frame_id,
-                        'track_id': int(anno.attributes.get('track_id', -1)),
+                        'track_id': track_id,
                         'x': anno.x,
                         'y': anno.y,
                         'w': anno.w,

From 872b25ecd9790ee00d1af4c45be7b01914467e61 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Wed, 25 Mar 2020 17:15:59 +0300
Subject: [PATCH 14/26] Fix mot format

---
 cvat/apps/dataset_manager/bindings.py    | 2 +-
 cvat/apps/dataset_manager/formats/mot.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py
index a75956a01b20..6bf050de8dbb 100644
--- a/cvat/apps/dataset_manager/bindings.py
+++ b/cvat/apps/dataset_manager/bindings.py
@@ -149,7 +149,7 @@ def convert_attrs(label, cvat_attrs):
             anno_attr['occluded'] = shape_obj.occluded
             anno_attr['z_order'] = shape_obj.z_order
 
-            if 'track_id' in shape_obj:
+            if hasattr(shape_obj, 'track_id'):
                 anno_attr['track_id'] = shape_obj.track_id
                 anno_attr['keyframe'] = shape_obj.keyframe
 
diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py
index 46d9fd27ec7e..ced2fccabfe8 100644
--- a/cvat/apps/dataset_manager/formats/mot.py
+++ b/cvat/apps/dataset_manager/formats/mot.py
@@ -76,7 +76,6 @@ def load(file_object, annotations):
                 )
 
                 # build trajectories as lists of shapes in track dict
-                track_id = int(track_id)
                 if track_id not in tracks:
                     tracks[track_id] = annotations.Track(
                         label_cat.items[ann.label].name, 0, [])

From 0acfc6c86ed03bd6a5996d58bf5d55319ecd0e92 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Wed, 25 Mar 2020 17:26:17 +0300
Subject: [PATCH 15/26] Update attribute logic in labelme tests

---
 datumaro/tests/test_labelme_format.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/datumaro/tests/test_labelme_format.py b/datumaro/tests/test_labelme_format.py
index 35fa2ca848b4..098fd26392ed 100644
--- a/datumaro/tests/test_labelme_format.py
+++ b/datumaro/tests/test_labelme_format.py
@@ -36,7 +36,10 @@ def __iter__(self):
                         annotations=[
                             Bbox(0, 4, 4, 8, label=2, group=2),
                             Polygon([0, 4, 4, 4, 5, 6], label=3, attributes={
-                                'occluded': True
+                                'occluded': True,
+                                'a1': 'qwe',
+                                'a2': True,
+                                'a3': 123,
                             }),
                             Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
                                 attributes={ 'username': 'test' }),
@@ -70,6 +73,9 @@ def __iter__(self):
                             Polygon([0, 4, 4, 4, 5, 6], label=1, id=1,
                                 attributes={
                                     'occluded': True, 'username': '',
+                                    'a1': 'qwe',
+                                    'a2': True,
+                                    'a3': 123,
                                 }
                             ),
                             Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
@@ -150,7 +156,7 @@ def __iter__(self):
                             Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12],
                                 label=2, group=2, id=2,
                                 attributes={
-                                    'a1': '1',
+                                    'a1': True,
                                     'occluded': True,
                                     'username': 'anonymous'
                                 }
@@ -158,21 +164,21 @@ def __iter__(self):
                             Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25],
                                 label=3, group=2, id=3,
                                 attributes={
-                                    'kj': '1',
+                                    'kj': True,
                                     'occluded': False,
                                     'username': 'anonymous'
                                 }
                             ),
                             Bbox(13, 19, 10, 11, label=4, group=2, id=4,
                                 attributes={
-                                    'hg': '1',
+                                    'hg': True,
                                     'occluded': True,
                                     'username': 'anonymous'
                                 }
                             ),
                             Mask(mask2, label=5, group=1, id=5,
                                 attributes={
-                                    'd': '1',
+                                    'd': True,
                                     'occluded': False,
                                     'username': 'anonymous'
                                 }
@@ -180,7 +186,7 @@ def __iter__(self):
                             Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22],
                                 label=6, group=1, id=6,
                                 attributes={
-                                    'gfd lkj lkj hi': '1',
+                                    'gfd lkj lkj hi': True,
                                     'occluded': False,
                                     'username': 'anonymous'
                                 }

From 5030c5d9b95488e7e4c7b3260ec09c1e5cd8fec7 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Thu, 26 Mar 2020 11:13:20 +0300
Subject: [PATCH 16/26] Use common code in yolo

---
 cvat/apps/dataset_manager/formats/yolo.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/cvat/apps/dataset_manager/formats/yolo.py b/cvat/apps/dataset_manager/formats/yolo.py
index 4d397e35385d..f21ebe43634a 100644
--- a/cvat/apps/dataset_manager/formats/yolo.py
+++ b/cvat/apps/dataset_manager/formats/yolo.py
@@ -27,8 +27,9 @@ def load(file_object, annotations):
     import os.path as osp
     from tempfile import TemporaryDirectory
     from glob import glob
+    from datumaro.components.extractor import DatasetItem
     from datumaro.plugins.yolo_format.importer import YoloImporter
-    from cvat.apps.dataset_manager.bindings import import_dm_annotations
+    from cvat.apps.dataset_manager.bindings import import_dm_annotations, match_frame
 
     archive_file = file_object if isinstance(file_object, str) else getattr(file_object, "name")
     with TemporaryDirectory() as tmp_dir:
@@ -37,21 +38,15 @@ def load(file_object, annotations):
         image_info = {}
         anno_files = glob(osp.join(tmp_dir, '**', '*.txt'), recursive=True)
         for filename in anno_files:
-            filename = osp.basename(filename)
+            filename = osp.splitext(osp.basename(filename))[0]
             frame_info = None
             try:
-                frame_info = annotations.frame_info[
-                    int(osp.splitext(filename)[0])]
-            except Exception:
-                pass
-            try:
-                frame_info = annotations.match_frame(filename)
-                frame_info = annotations.frame_info[frame_info]
+                frame_id = match_frame(DatasetItem(id=filename), annotations)
+                frame_info = annotations.frame_info[frame_id]
             except Exception:
                 pass
             if frame_info is not None:
-                image_info[osp.splitext(filename)[0]] = \
-                    (frame_info['height'], frame_info['width'])
+                image_info[filename] = (frame_info['height'], frame_info['width'])
 
         dm_project = YoloImporter()(tmp_dir, image_info=image_info)
         dm_dataset = dm_project.make_dataset()

From 370118778918682e0caed9fb9344139fdc293612 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Thu, 26 Mar 2020 13:07:03 +0300
Subject: [PATCH 17/26] Put datumaro in path in settings

---
 cvat/apps/dataset_manager/task.py | 5 +----
 cvat/settings/base.py             | 3 +++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py
index 7919d89f471d..7f091c4ea60a 100644
--- a/cvat/apps/dataset_manager/task.py
+++ b/cvat/apps/dataset_manager/task.py
@@ -8,19 +8,16 @@
 import os
 import os.path as osp
 import shutil
-import sys
 import tempfile
 
 from django.utils import timezone
 import django_rq
 
+from cvat.settings.base import DATUMARO_PATH as _DATUMARO_REPO_PATH
 from cvat.apps.engine.log import slogger
 from cvat.apps.engine.models import Task
 from .util import current_function_name, make_zip_archive
 
-_CVAT_ROOT_DIR = __file__[:__file__.rfind(osp.join('cvat', ''))]
-_DATUMARO_REPO_PATH = osp.join(_CVAT_ROOT_DIR, 'datumaro')
-sys.path.append(_DATUMARO_REPO_PATH)
 from datumaro.components.project import Project, Environment
 import datumaro.components.extractor as datumaro
 from .bindings import CvatImagesDirExtractor, CvatTaskExtractor
diff --git a/cvat/settings/base.py b/cvat/settings/base.py
index e5f577d448b6..b15e20994279 100644
--- a/cvat/settings/base.py
+++ b/cvat/settings/base.py
@@ -401,3 +401,6 @@ def generate_ssh_keys():
 DATA_UPLOAD_MAX_NUMBER_FIELDS = None   # this django check disabled
 LOCAL_LOAD_MAX_FILES_COUNT = 500
 LOCAL_LOAD_MAX_FILES_SIZE = 512 * 1024 * 1024  # 512 MB
+
+DATUMARO_PATH = os.path.join(BASE_DIR, 'datumaro')
+sys.path.append(DATUMARO_PATH)
\ No newline at end of file

From 875c701415548ba9fb17b6e1134de9e138dc8380 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Thu, 26 Mar 2020 14:00:00 +0300
Subject: [PATCH 18/26] Expect labels file in MOT next to annotations file

---
 datumaro/datumaro/plugins/mot_format.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/datumaro/datumaro/plugins/mot_format.py b/datumaro/datumaro/plugins/mot_format.py
index 2d2046d53223..331e0e693bb3 100644
--- a/datumaro/datumaro/plugins/mot_format.py
+++ b/datumaro/datumaro/plugins/mot_format.py
@@ -94,9 +94,8 @@ def __init__(self, path, labels=None, occlusion_threshold=0, is_gt=None):
         self._subset = None
 
         if labels is None:
-            if osp.isfile(osp.join(seq_root, MotPath.LABELS_FILE)):
-                labels = osp.join(seq_root, MotPath.LABELS_FILE)
-            else:
+            labels = osp.join(osp.dirname(path), MotPath.LABELS_FILE)
+            if not osp.isfile(labels):
                 labels = [lbl.name for lbl in MotLabel]
         if isinstance(labels, str):
             labels = self._parse_labels(labels)
@@ -330,7 +329,7 @@ def __call__(self, extractor, save_dir):
                     else:
                         log.debug("Item '%s' has no image" % item.id)
 
-        labels_file = osp.join(save_dir, MotPath.LABELS_FILE)
+        labels_file = osp.join(anno_dir, MotPath.LABELS_FILE)
         with open(labels_file, 'w', encoding='utf-8') as f:
             f.write('\n'.join(l.name
                 for l in extractor.categories()[AnnotationType.label].items)

From b316bb0243565b1fe49f082bc03021a9b081e7cd Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Thu, 26 Mar 2020 14:00:19 +0300
Subject: [PATCH 19/26] Add MOT format description

---
 cvat/apps/annotation/README.md | 35 +++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/cvat/apps/annotation/README.md b/cvat/apps/annotation/README.md
index 279d7a4d6068..867520f23a8b 100644
--- a/cvat/apps/annotation/README.md
+++ b/cvat/apps/annotation/README.md
@@ -564,4 +564,37 @@ python create_pascal_tf_record.py --data_dir <path to VOCdevkit> --set train --y
       └── image2.png
   ```
 - supported shapes: Polygons
-- additional comments: the CVAT task should be created with the full label set that may be in the annotation files
\ No newline at end of file
+- additional comments: the CVAT task should be created with the full label set that may be in the annotation files
+
+### [MOT sequence](https://arxiv.org/pdf/1906.04567.pdf)
+#### Dumper
+- downloaded file: a zip archive of the following structure:
+  ```bash
+  taskname.zip/
+  └── gt/
+      ├── labels.txt
+      └── gt.txt
+
+  # labels.txt
+  cat
+  dog
+  person
+  ...
+
+  # gt.txt
+  # frame_id, track_id, x, y, w, h, not ignored, class_id, visibility, <ignored>
+  1,1,1363,569,103,241,1,1,0.86014
+  ...
+
+  ```
+- supported annotations: Rectangle shapes and tracks
+- supported attributes: `visibility` (number), `ignored` (checkbox)
+
+#### Loader
+- uploaded file: a zip archive of the structure above or:
+  ```bash
+  taskname.zip/
+  ├── labels.txt # for non-official labels
+  └── gt.txt
+  ```
+- supported annotations: Rectangle tracks
\ No newline at end of file

From fba95df78562c5e326f1bb89c1ed02a3efcbf9ff Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Thu, 26 Mar 2020 14:02:20 +0300
Subject: [PATCH 20/26] Add import

---
 cvat/apps/dataset_manager/task.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py
index 7f091c4ea60a..33c44287465c 100644
--- a/cvat/apps/dataset_manager/task.py
+++ b/cvat/apps/dataset_manager/task.py
@@ -13,7 +13,8 @@
 from django.utils import timezone
 import django_rq
 
-from cvat.settings.base import DATUMARO_PATH as _DATUMARO_REPO_PATH
+from cvat.settings.base import DATUMARO_PATH as _DATUMARO_REPO_PATH, \
+    BASE_DIR as _CVAT_ROOT_DIR
 from cvat.apps.engine.log import slogger
 from cvat.apps.engine.models import Task
 from .util import current_function_name, make_zip_archive

From b67f7c4e313f28d2709104c15b56e39050c7fb38 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Thu, 26 Mar 2020 14:16:06 +0300
Subject: [PATCH 21/26] Add labelme format description

---
 cvat/apps/annotation/README.md | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/cvat/apps/annotation/README.md b/cvat/apps/annotation/README.md
index 867520f23a8b..b7e8ce6f549d 100644
--- a/cvat/apps/annotation/README.md
+++ b/cvat/apps/annotation/README.md
@@ -571,6 +571,8 @@ python create_pascal_tf_record.py --data_dir <path to VOCdevkit> --set train --y
 - downloaded file: a zip archive of the following structure:
   ```bash
   taskname.zip/
+  ├── images/
+  |   └── img1.jpg
   └── gt/
       ├── labels.txt
       └── gt.txt
@@ -597,4 +599,25 @@ python create_pascal_tf_record.py --data_dir <path to VOCdevkit> --set train --y
   ├── labels.txt # for non-official labels
   └── gt.txt
   ```
-- supported annotations: Rectangle tracks
\ No newline at end of file
+- supported annotations: Rectangle tracks
+
+### [LabelMe](http://labelme.csail.mit.edu/Release3.0)
+#### Dumper
+- downloaded file: a zip archive of the following structure:
+  ```bash
+  taskname.zip/
+  ├── img1.jpg
+  └── img1.xml
+  ```
+- supported annotations: Rectangles, Polygons (with attributes)
+
+#### Loader
+- uploaded file: a zip archive of the following structure:
+  ```bash
+  taskname.zip/
+  ├── Masks/
+  |   └── img1_mask1.png
+  ├── img1.xml
+  └── img2.xml
+  ```
+- supported annotations: Rectangles, Polygons, Masks (as polygons)

From 5ab79373c9f04d75ce9c51ffa5273510e1a57f28 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Thu, 26 Mar 2020 14:30:58 +0300
Subject: [PATCH 22/26] Linter fix

---
 cvat/apps/annotation/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cvat/apps/annotation/README.md b/cvat/apps/annotation/README.md
index b7e8ce6f549d..17a6e56c5d02 100644
--- a/cvat/apps/annotation/README.md
+++ b/cvat/apps/annotation/README.md
@@ -1,3 +1,4 @@
+<!--lint disable list-item-indent-->
 ## Description
 
 The purpose of this application is to add support for multiple annotation formats for CVAT.

From f08652d7ff1976df165796d86cc7759689173956 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Thu, 26 Mar 2020 14:53:16 +0300
Subject: [PATCH 23/26] Linter fix2

---
 cvat/apps/annotation/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cvat/apps/annotation/README.md b/cvat/apps/annotation/README.md
index 17a6e56c5d02..e86748a65210 100644
--- a/cvat/apps/annotation/README.md
+++ b/cvat/apps/annotation/README.md
@@ -1,4 +1,5 @@
 <!--lint disable list-item-indent-->
+<!--lint disable no-duplicate-headings-->
 ## Description
 
 The purpose of this application is to add support for multiple annotation formats for CVAT.

From 0df9bc450e9eef45b2319d9c07935493f39adbb7 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Thu, 26 Mar 2020 15:16:55 +0300
Subject: [PATCH 24/26] Compare attributes ordered

---
 cvat/apps/engine/tests/test_rest_api.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py
index c2e7b66ca682..18ed9526f041 100644
--- a/cvat/apps/engine/tests/test_rest_api.py
+++ b/cvat/apps/engine/tests/test_rest_api.py
@@ -13,7 +13,6 @@
 from django.contrib.auth.models import User, Group
 from cvat.apps.engine.models import (Task, Segment, Job, StatusChoice,
     AttributeType, Project)
-from cvat.apps.annotation.models import AnnotationFormat
 from unittest import mock
 import io
 import xml.etree.ElementTree as ET
@@ -1546,10 +1545,15 @@ def test_api_v1_tasks_id_data_no_auth(self):
 def compare_objects(self, obj1, obj2, ignore_keys, fp_tolerance=.001):
     if isinstance(obj1, dict):
         self.assertTrue(isinstance(obj2, dict), "{} != {}".format(obj1, obj2))
-        for k in obj1.keys():
+        for k, v1 in obj1.items():
             if k in ignore_keys:
                 continue
-            compare_objects(self, obj1[k], obj2.get(k), ignore_keys)
+            v2 = obj2[k]
+            if k == 'attributes':
+                key = lambda a: a['spec_id']
+                v1.sort(key=key)
+                v2.sort(key=key)
+            compare_objects(self, v1, v2, ignore_keys)
     elif isinstance(obj1, list):
         self.assertTrue(isinstance(obj2, list), "{} != {}".format(obj1, obj2))
         self.assertEqual(len(obj1), len(obj2), "{} != {}".format(obj1, obj2))

From f754641cf47a000aff0c269c08439b4780c228fe Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Fri, 27 Mar 2020 12:58:28 +0300
Subject: [PATCH 25/26] Update docs

---
 cvat/apps/annotation/README.md | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/cvat/apps/annotation/README.md b/cvat/apps/annotation/README.md
index e86748a65210..101a28ac2f26 100644
--- a/cvat/apps/annotation/README.md
+++ b/cvat/apps/annotation/README.md
@@ -527,10 +527,10 @@ python create_pascal_tf_record.py --data_dir <path to VOCdevkit> --set train --y
   │   └── Segmentation/
   │       └── default.txt # list of image names without extension
   ├── SegmentationClass/ # merged class masks
-  │   └── image1.png
+  │   ├── image1.png
   │   └── image2.png
   └── SegmentationObject/ # merged instance masks
-      └── image1.png
+      ├── image1.png
       └── image2.png
   ```
   Mask is a png image with several (RGB) channels where each pixel has own color which corresponds to a label.
@@ -559,10 +559,10 @@ python create_pascal_tf_record.py --data_dir <path to VOCdevkit> --set train --y
   │   └── Segmentation/
   │       └── <any_subset_name>.txt
   ├── SegmentationClass/
-  │   └── image1.png
+  │   ├── image1.png
   │   └── image2.png
   └── SegmentationObject/
-      └── image.png
+      ├── image1.png
       └── image2.png
   ```
 - supported shapes: Polygons
@@ -573,8 +573,9 @@ python create_pascal_tf_record.py --data_dir <path to VOCdevkit> --set train --y
 - downloaded file: a zip archive of the following structure:
   ```bash
   taskname.zip/
-  ├── images/
-  |   └── img1.jpg
+  ├── img1/
+  |   ├── imgage1.jpg
+  |   └── imgage2.jpg
   └── gt/
       ├── labels.txt
       └── gt.txt
@@ -586,7 +587,7 @@ python create_pascal_tf_record.py --data_dir <path to VOCdevkit> --set train --y
   ...
 
   # gt.txt
-  # frame_id, track_id, x, y, w, h, not ignored, class_id, visibility, <ignored>
+  # frame_id, track_id, x, y, w, h, "not ignored", class_id, visibility, <skipped>
   1,1,1363,569,103,241,1,1,0.86014
   ...
 
@@ -598,7 +599,7 @@ python create_pascal_tf_record.py --data_dir <path to VOCdevkit> --set train --y
 - uploaded file: a zip archive of the structure above or:
   ```bash
   taskname.zip/
-  ├── labels.txt # for non-official labels
+  ├── labels.txt # optional, mandatory for non-official labels
   └── gt.txt
   ```
 - supported annotations: Rectangle tracks
@@ -618,8 +619,10 @@ python create_pascal_tf_record.py --data_dir <path to VOCdevkit> --set train --y
   ```bash
   taskname.zip/
   ├── Masks/
-  |   └── img1_mask1.png
+  |   ├── img1_mask1.png
+  |   └── img1_mask2.png
   ├── img1.xml
-  └── img2.xml
+  ├── img2.xml
+  └── img3.xml
   ```
 - supported annotations: Rectangles, Polygons, Masks (as polygons)

From 9bd00c69fa6f62bf8d1bbb705f0d382cc8361ff2 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Mon, 30 Mar 2020 13:44:38 +0300
Subject: [PATCH 26/26] Update tests

---
 cvat/apps/dataset_manager/_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cvat/apps/dataset_manager/_tests.py b/cvat/apps/dataset_manager/_tests.py
index 4f859463f8f4..1a5300756e1f 100644
--- a/cvat/apps/dataset_manager/_tests.py
+++ b/cvat/apps/dataset_manager/_tests.py
@@ -122,7 +122,6 @@ def _generate_task(self):
             "overlap": 0,
             "segment_size": 100,
             "z_order": False,
-            "image_quality": 75,
             "labels": [
                 {
                     "name": "car",
@@ -253,6 +252,7 @@ def _create_task(self, data, size):
                 "client_files[%d]" % i: generate_image_file("image_%d.jpg" % i)
                 for i in range(size)
             }
+            images["image_quality"] = 75
             response = self.client.post("/api/v1/tasks/{}/data".format(tid), data=images)
             assert response.status_code == status.HTTP_202_ACCEPTED, response.status_code