Merged develop

cvat-ai · Jun 22, 2020 · aad9c09 · aad9c09
2 parents f86410b + 2a349d0
commit aad9c09
Show file tree

Hide file tree

Showing 8 changed files with 168 additions and 40 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Changed
 - Removed information about e-mail from the basic user information (<https://github.com/opencv/cvat/pull/1627>)
 - Update https install manual. Makes it easier and more robust. Includes automatic renewing of lets encrypt certificates.
+- Implemented import and export of annotations with relative image paths (<https://github.com/opencv/cvat/pull/1463>)
 - Using only single click to start editing or remove a point (<https://github.com/opencv/cvat/pull/1571>)
 
 ### Deprecated

diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py
@@ -5,6 +5,7 @@
 
 import os.path as osp
 from collections import OrderedDict, namedtuple
+from pathlib import Path
 
 from django.utils import timezone
 
@@ -125,8 +126,8 @@ def _init_frame_info(self):
             } for db_image in self._db_task.data.images.all()}
 
         self._frame_mapping = {
-            self._get_filename(info["path"]): frame
-            for frame, info in self._frame_info.items()
+            self._get_filename(info["path"]): frame_number
+            for frame_number, info in self._frame_info.items()
         }
 
     def _init_meta(self):
@@ -398,16 +399,27 @@ def db_task(self):
 
     @staticmethod
     def _get_filename(path):
-        return osp.splitext(osp.basename(path))[0]
-
-    def match_frame(self, filename):
-        # try to match by filename
-        _filename = self._get_filename(filename)
-        if _filename in self._frame_mapping:
-            return self._frame_mapping[_filename]
-
-        raise Exception(
-            "Cannot match filename or determine frame number for {} filename".format(filename))
+        return osp.splitext(path)[0]
+
+    def match_frame(self, path, root_hint=None):
+        path = self._get_filename(path)
+        match = self._frame_mapping.get(path)
+        if not match and root_hint and not path.startswith(root_hint):
+            path = osp.join(root_hint, path)
+            match = self._frame_mapping.get(path)
+        return match
+
+    def match_frame_fuzzy(self, path):
+        # Preconditions:
+        # - The input dataset is full, i.e. all items present. Partial dataset
+        # matching can't be correct for all input cases.
+        # - path is the longest path of input dataset in terms of path parts
+
+        path = Path(self._get_filename(path)).parts
+        for p, v in self._frame_mapping.items():
+            if Path(p).parts[-len(path):] == path: # endswith() for paths
+                return v
+        return None
 
 class CvatTaskDataExtractor(datumaro.SourceExtractor):
     def __init__(self, task_data, include_images=False):
@@ -450,8 +462,7 @@ def categories(self):
     def _load_categories(cvat_anno):
         categories = {}
 
-        label_categories = datumaro.LabelCategories(
-            attributes=['occluded', 'z_order'])
+        label_categories = datumaro.LabelCategories(attributes=['occluded'])
 
         for _, label in cvat_anno.meta['task']['labels']:
             label_categories.add(label['name'])
@@ -537,20 +548,14 @@ def convert_attrs(label, cvat_attrs):
 
         return item_anno
 
-def match_frame(item, task_data):
+def match_dm_item(item, task_data, root_hint=None):
     is_video = task_data.meta['task']['mode'] == 'interpolation'
 
     frame_number = None
     if frame_number is None and item.has_image:
-        try:
-            frame_number = task_data.match_frame(item.image.path)
-        except Exception:
-            pass
+        frame_number = task_data.match_frame(item.image.path, root_hint)
     if frame_number is None:
-        try:
-            frame_number = task_data.match_frame(item.id)
-        except Exception:
-            pass
+        frame_number = task_data.match_frame(item.id, root_hint)
     if frame_number is None:
         frame_number = cast(item.attributes.get('frame', item.id), int)
     if frame_number is None and is_video:
@@ -561,6 +566,19 @@ def match_frame(item, task_data):
             item.id)
     return frame_number
 
+def find_dataset_root(dm_dataset, task_data):
+    longest_path = max(dm_dataset, key=lambda x: len(Path(x.id).parts)).id
+    longest_match = task_data.match_frame_fuzzy(longest_path)
+    if longest_match is None:
+        return None
+
+    longest_match = osp.dirname(task_data.frame_info[longest_match]['path'])
+    prefix = longest_match[:-len(osp.dirname(longest_path)) or None]
+    if prefix.endswith('/'):
+        prefix = prefix[:-1]
+    return prefix
+
+
 def import_dm_annotations(dm_dataset, task_data):
     shapes = {
         datumaro.AnnotationType.bbox: ShapeType.RECTANGLE,
@@ -569,10 +587,16 @@ def import_dm_annotations(dm_dataset, task_data):
         datumaro.AnnotationType.points: ShapeType.POINTS,
     }
 
+    if len(dm_dataset) == 0:
+        return
+
     label_cat = dm_dataset.categories()[datumaro.AnnotationType.label]
 
+    root_hint = find_dataset_root(dm_dataset, task_data)
+
     for item in dm_dataset:
-        frame_number = task_data.abs_frame_id(match_frame(item, task_data))
+        frame_number = task_data.abs_frame_id(
+            match_dm_item(item, task_data, root_hint=root_hint))
 
         # do not store one-item groups
         group_map = {0: 0}

diff --git a/cvat/apps/dataset_manager/formats/yolo.py b/cvat/apps/dataset_manager/formats/yolo.py
@@ -9,10 +9,11 @@
 from pyunpack import Archive
 
 from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor,
-    import_dm_annotations, match_frame)
+    import_dm_annotations, match_dm_item, find_dataset_root)
 from cvat.apps.dataset_manager.util import make_zip_archive
 from datumaro.components.extractor import DatasetItem
 from datumaro.components.project import Dataset
+from datumaro.plugins.yolo_format.extractor import YoloExtractor
 
 from .registry import dm_env, exporter, importer
 
@@ -33,17 +34,20 @@ def _import(src_file, task_data):
         Archive(src_file.name).extractall(tmp_dir)
 
         image_info = {}
-        anno_files = glob(osp.join(tmp_dir, '**', '*.txt'), recursive=True)
-        for filename in anno_files:
-            filename = osp.splitext(osp.basename(filename))[0]
+        frames = [YoloExtractor.name_from_path(osp.relpath(p, tmp_dir))
+            for p in glob(osp.join(tmp_dir, '**', '*.txt'), recursive=True)]
+        root_hint = find_dataset_root(
+            [DatasetItem(id=frame) for frame in frames], task_data)
+        for frame in frames:
             frame_info = None
             try:
-                frame_id = match_frame(DatasetItem(id=filename), task_data)
+                frame_id = match_dm_item(DatasetItem(id=frame), task_data,
+                    root_hint=root_hint)
                 frame_info = task_data.frame_info[frame_id]
             except Exception:
                 pass
             if frame_info is not None:
-                image_info[filename] = (frame_info['height'], frame_info['width'])
+                image_info[frame] = (frame_info['height'], frame_info['width'])
 
         dataset = dm_env.make_importer('yolo')(tmp_dir, image_info=image_info) \
             .make_dataset()

diff --git a/cvat/apps/dataset_manager/tests/_test_formats.py b/cvat/apps/dataset_manager/tests/_test_formats.py
@@ -70,6 +70,10 @@ def _setUpModule():
 from rest_framework.test import APITestCase, APIClient
 from rest_framework import status
 
+from cvat.apps.dataset_manager.annotation import AnnotationIR
+from cvat.apps.dataset_manager.bindings import TaskData, find_dataset_root
+from cvat.apps.engine.models import Task
+
 _setUpModule()
 
 from cvat.apps.dataset_manager.annotation import AnnotationIR
@@ -256,7 +260,7 @@ def _generate_annotations(self, task):
         self._put_api_v1_task_id_annotations(task["id"], annotations)
         return annotations
 
-    def _generate_task_images(self, count):
+    def _generate_task_images(self, count): # pylint: disable=no-self-use
         images = {
             "client_files[%d]" % i: generate_image_file("image_%d.jpg" % i)
             for i in range(count)
@@ -385,6 +389,7 @@ def load_dataset(src):
 
                             # NOTE: can't import cvat.utils.cli
                             # for whatever reason, so remove the dependency
+                            #
                             project.config.remove('sources')
 
                             return project.make_dataset()
@@ -436,3 +441,97 @@ def test_can_make_abs_frame_id_from_known(self):
         task_data = TaskData(AnnotationIR(), Task.objects.get(pk=task['id']))
 
         self.assertEqual(5, task_data.abs_frame_id(2))
+
+class FrameMatchingTest(_DbTestBase):
+    def _generate_task_images(self, paths): # pylint: disable=no-self-use
+        f = BytesIO()
+        with zipfile.ZipFile(f, 'w') as archive:
+            for path in paths:
+                archive.writestr(path, generate_image_file(path).getvalue())
+        f.name = 'images.zip'
+        f.seek(0)
+
+        return {
+            'client_files[0]': f,
+            'image_quality': 75,
+        }
+
+    def _generate_task(self, images):
+        task = {
+            "name": "my task #1",
+            "owner": '',
+            "assignee": '',
+            "overlap": 0,
+            "segment_size": 100,
+            "z_order": False,
+            "labels": [
+                {
+                    "name": "car",
+                    "attributes": [
+                        {
+                            "name": "model",
+                            "mutable": False,
+                            "input_type": "select",
+                            "default_value": "mazda",
+                            "values": ["bmw", "mazda", "renault"]
+                        },
+                        {
+                            "name": "parked",
+                            "mutable": True,
+                            "input_type": "checkbox",
+                            "default_value": False
+                        },
+                    ]
+                },
+                {"name": "person"},
+            ]
+        }
+        return self._create_task(task, images)
+
+    def test_frame_matching(self):
+        task_paths = [
+            'a.jpg',
+            'a/a.jpg',
+            'a/b.jpg',
+            'b/a.jpg',
+            'b/c.jpg',
+            'a/b/c.jpg',
+            'a/b/d.jpg',
+        ]
+
+        images = self._generate_task_images(task_paths)
+        task = self._generate_task(images)
+        task_data = TaskData(AnnotationIR(), Task.objects.get(pk=task["id"]))
+
+        for input_path, expected, root in [
+            ('z.jpg', None, ''), # unknown item
+            ('z/a.jpg', None, ''), # unknown item
+
+            ('d.jpg', 'a/b/d.jpg', 'a/b'), # match with root hint
+            ('b/d.jpg', 'a/b/d.jpg', 'a'), # match with root hint
+        ] + list(zip(task_paths, task_paths, [None] * len(task_paths))): # exact matches
+            with self.subTest(input=input_path):
+                actual = task_data.match_frame(input_path, root)
+                if actual is not None:
+                    actual = task_data.frame_info[actual]['path']
+                self.assertEqual(expected, actual)
+
+    def test_dataset_root(self):
+        for task_paths, dataset_paths, expected in [
+            ([ 'a.jpg', 'b/c/a.jpg' ], [ 'a.jpg', 'b/c/a.jpg' ], ''),
+            ([ 'b/a.jpg', 'b/c/a.jpg' ], [ 'a.jpg', 'c/a.jpg' ], 'b'), # 'images from share' case
+            ([ 'b/c/a.jpg' ], [ 'a.jpg' ], 'b/c'), # 'images from share' case
+            ([ 'a.jpg' ], [ 'z.jpg' ], None),
+        ]:
+            with self.subTest(expected=expected):
+                images = self._generate_task_images(task_paths)
+                task = self._generate_task(images)
+                task_data = TaskData(AnnotationIR(),
+                    Task.objects.get(pk=task["id"]))
+                dataset = [
+                    datumaro.components.extractor.DatasetItem(
+                        id=osp.splitext(p)[0])
+                    for p in dataset_paths]
+
+                root = find_dataset_root(dataset, task_data)
+                self.assertEqual(expected, root)
diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt
@@ -1,18 +1,18 @@
-click==6.7
+click==7.1.2
 Django==2.2.13
 django-appconf==1.0.4
 django-auth-ldap==2.2.0
 django-cacheops==5.0
 django-compressor==2.4
 django-rq==2.0.0
-EasyProcess==0.2.3
+EasyProcess==0.3
 Pillow==7.1.2
 numpy==1.18.5
 python-ldap==3.2.0
 pytz==2020.1
 pyunpack==0.2.1
 rcssmin==1.0.6
-redis==3.2.0
+redis==3.5.3
 rjsmin==1.1.0
 requests==2.24.0
 rq==1.0.0
@@ -22,7 +22,7 @@ sqlparse==0.2.4
 django-sendfile==0.3.11
 dj-pagination==2.4.0
 python-logstash==0.4.6
-django-revproxy==0.9.15
+django-revproxy==0.10.0
 rules==2.0
 GitPython==3.1.3
 coreapi==2.3.3
@@ -39,9 +39,9 @@ cython==0.29.20
 matplotlib==3.0.3
 scikit-image==0.15.0
 tensorflow==1.15.2
-keras==2.3.1
+keras==2.4.2
 opencv-python==4.1.0.25
-h5py==2.9.0
+h5py==2.10.0
 imgaug==0.4.0
 django-cors-headers==3.3.0
 furl==2.0.0

diff --git a/cvat/requirements/development.txt b/cvat/requirements/development.txt
@@ -6,7 +6,7 @@ mccabe==0.6.1
 pylint==2.5.3
 pylint-django==2.0.15
 pylint-plugin-utils==0.6
-rope==0.11
+rope==0.17.0
 wrapt==1.12.1
 django-extensions==2.0.6
 Werkzeug==0.15.3

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -89,7 +89,7 @@ services:
       - cvat
       - cvat_ui
     environment:
-      CVAT_HOST: localhost
+      CVAT_HOST: 192.168.0.12
     ports:
       - "8080:80"
     volumes:

diff --git a/package.json b/package.json
@@ -15,7 +15,7 @@
     "eslint-plugin-no-unsafe-innerhtml": "^1.0.16",
     "eslint-plugin-no-unsanitized": "^3.0.2",
     "eslint-plugin-react": "^7.14.3",
-    "eslint-plugin-react-hooks": "^2.5.1",
+    "eslint-plugin-react-hooks": "^4.0.4",
     "eslint-plugin-security": "^1.4.0",
     "remark-lint-emphasis-marker": "^2.0.0",
     "remark-lint-list-item-spacing": "^2.0.0",