cvat-ai · nmanovic · Dec 26, 2018 · Dec 10, 2018 · Dec 13, 2018 · Dec 13, 2018
@@ -0,0 +1,114 @@
+## Auto annotation
+
+### Description
+
+This application will be enabled automatically if OpenVINO&trade; component is installed. It allows to use custom models for preannotation.
+Supported only DLDT framework from OpenVINO&trade; toolkit. If you woold like to annotate task with custom model please convert it to the
+intermediate representation (IR) format via model optimizer tool.
+See [OpenVINO documentation](https://software.intel.com/en-us/articles/OpenVINO-InferEngine) for details.
+
+### Usage
+To annotate task with a custom model you need prepare 4 files:
+1. __Model config__ - a text file that contains network configuration. The following file extension is expected:
+   * __*.xml__
+1. __Model weights__ - a binary file that contains trained weights. The following file extension is expected:
+   * __*.bin__
+1. __Label map__ - simple json file that contains the `label_map` dictionary like object with string values for label numbers.
+Please note values in `label_map` should be exactly equal to the labels wich task was created, otherwise it will be ignored.
+  Example:
+    ```json
+    {
+      "label_map": {
+        "0": "background",
+        "1": "aeroplane",
+        "2": "bicycle",
+        "3": "bird",
+        "4": "boat",
+        "5": "bottle",
+        "6": "bus",
+        "7": "car",
+        "8": "cat",
+        "9": "chair",
+        "10": "cow",
+        "11": "diningtable",
+        "12": "dog",
+        "13": "horse",
+        "14": "motorbike",
+        "15": "person",
+        "16": "pottedplant",
+        "17": "sheep",
+        "18": "sofa",
+        "19": "train",
+        "20": "tvmonitor"
+      }
+    }
+    ```
+1. __Interpretation script__ - python file that used to convert output results from net to CVAT format. This code running inside restricted environment.
+List of builtins functions that available to use:
+   * __str__
+   * __int__
+   * __float__
+   * __max__
+   * __min__
+   * __range__
+
+   Also two variables are available in scope:
+   * **detections** list with detection results(see description below)
+   * **results** dictionary where convertation results shoud be added (see examples below for details)
+
+   `detection` is a python's list of dictionaries that represent detections for each frame of task with folloing keys:
+   * __frame_id__ - frame number
+   * __frame_height__ - frame height
+   * __frame_width__ - frame width
+   * __detections__ - output np.ndarray (See [ExecutableNetwork.infer](https://software.intel.com/en-us/articles/OpenVINO-InferEngine#inpage-nav-11-6-3) for details).
+
+   `results` is dictionary with structure:
+   ```python
+    {
+      "boxes": [],
+      "polygons": [],
+      "polylines": [],
+      "points": [],
+      "box_paths": [],
+      "polygon_paths": [],
+      "polyline_paths": [],
+      "points_paths": [],
+    }
+    ```
+
+    Example for SSD based network:
+    ```python
+    def process_results(detections, results):
+    def clip(value):
+        return max(min(1.0, value), 0.0)
+
+    boxes = results['boxes']
+
+    for frame_results in detections:
+        frame_height = frame_results['frame_height']
+        frame_width = frame_results['frame_width']
+        frame_number = frame_results['frame_id']
+
+        for i in range(frame_results['detections'].shape[2]):
+            confidence = frame_results['detections'][0, 0, i, 2]
+            if confidence < 0.5: continue
+            class_id = int(frame_results['detections'][0, 0, i, 1])
+            xtl = '{:.2f}'.format(clip(frame_results['detections'][0, 0, i, 3]) * frame_width)
+            ytl = '{:.2f}'.format(clip(frame_results['detections'][0, 0, i, 4]) * frame_height)
+            xbr = '{:.2f}'.format(clip(frame_results['detections'][0, 0, i, 5]) * frame_width)
+            ybr = '{:.2f}'.format(clip(frame_results['detections'][0, 0, i, 6]) * frame_height)
+
+            boxes.append({
+                'label': class_id,
+                'frame': frame_number,
+                'xtl': xtl,
+                'ytl': ytl,
+                'xbr': xbr,
+                'ybr': ybr,
+                'attributes': {
+                    'confidence': '{:.2f}'.format(confidence),
+                }
+            })
+
+    process_results(detections, results)
+    ```
@@ -0,0 +1,8 @@
+
+# Copyright (C) 2018 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from cvat.settings.base import JS_3RDPARTY
+
+JS_3RDPARTY['dashboard'] = JS_3RDPARTY.get('dashboard', []) + ['auto_annotation/js/auto_annotation.js']
@@ -0,0 +1,4 @@
+
+# Copyright (C) 2018 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
@@ -0,0 +1,11 @@
+
+# Copyright (C) 2018 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from django.apps import AppConfig
+
+
+class AutoAnnotationConfig(AppConfig):
+    name = "auto_annotation"
+
@@ -0,0 +1,24 @@
+
+# Copyright (C) 2018 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import cv2
+
+class ImageLoader():
+    def __init__(self, image_list):
+        self.image_list = image_list
+
+    def __getitem__(self, i):
+        return self.image_list[i]
+
+    def __iter__(self):
+        for imagename in self.image_list:
+            yield imagename, self._load_image(imagename)
+
+    def __len__(self):
+        return len(self.image_list)
+
+    @staticmethod
+    def _load_image(path_to_image):
+        return cv2.imread(path_to_image)
@@ -0,0 +1,5 @@
+
+# Copyright (C) 2018 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
@@ -0,0 +1,58 @@
+
+# Copyright (C) 2018 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import json
+import cv2
+import os
+import subprocess
+
+from openvino.inference_engine import IENetwork, IEPlugin
+
+class ModelLoader():
+    def __init__(self, model, weights):
+        self._model = model
+        self._weights = weights
+
+        IE_PLUGINS_PATH = os.getenv("IE_PLUGINS_PATH")
+        if not IE_PLUGINS_PATH:
+            raise OSError("Inference engine plugin path env not found in the system.")
+
+        plugin = IEPlugin(device="CPU", plugin_dirs=[IE_PLUGINS_PATH])
+        if (self._check_instruction("avx2")):
+            plugin.add_cpu_extension(os.path.join(IE_PLUGINS_PATH, "libcpu_extension_avx2.so"))
+        elif (self._check_instruction("sse4")):
+            plugin.add_cpu_extension(os.path.join(IE_PLUGINS_PATH, "libcpu_extension_sse4.so"))
+        else:
+            raise Exception("Inference engine requires a support of avx2 or sse4.")
+
+        network = IENetwork.from_ir(model=self._model, weights=self._weights)
+        supported_layers = plugin.get_supported_layers(network)
+        not_supported_layers = [l for l in network.layers.keys() if l not in supported_layers]
+        if len(not_supported_layers) != 0:
+            raise Exception("Following layers are not supported by the plugin for specified device {}:\n {}".
+                      format(plugin.device, ", ".join(not_supported_layers)))
+
+        self._input_blob_name = next(iter(network.inputs))
+        self._output_blob_name = next(iter(network.outputs))
+
+        self._net = plugin.load(network=network, num_requests=2)
+        self._input_layout = network.inputs[self._input_blob_name]
+
+    def infer(self, image):
+        _, _, h, w = self._input_layout
+        in_frame = image if image.shape[:-1] == (h, w) else cv2.resize(image, (w, h))
+        in_frame = in_frame.transpose((2, 0, 1))  # Change data layout from HWC to CHW
+        return self._net.infer(inputs={self._input_blob_name: in_frame})[self._output_blob_name].copy()
+
+    @staticmethod
+    def _check_instruction(instruction):
+        return instruction == str.strip(
+            subprocess.check_output(
+                "lscpu | grep -o \"{}\" | head -1".format(instruction), shell=True
+            ).decode("utf-8"))
+
+def load_label_map(labels_path):
+        with open(labels_path, "r") as f:
+            return json.load(f)["label_map"]
@@ -0,0 +1,4 @@
+
+# Copyright (C) 2018 Intel Corporation
+#
+# SPDX-License-Identifier: MIT