From ad11b587b516b1562d78f7fe9c82f9cb9ec161a1 Mon Sep 17 00:00:00 2001
From: Mikhail Treskin <mikhail.treskin@retailnext.net>
Date: Fri, 18 Feb 2022 15:14:37 +0300
Subject: [PATCH] Handle attributes returned from nuclio detector (#3917)

---
 CHANGELOG.md                                  |   1 +
 cvat-ui/package-lock.json                     |   4 +-
 cvat-ui/package.json                          |   2 +-
 .../controls-side-bar/tools-control.tsx       |  15 ++-
 cvat/apps/lambda_manager/views.py             | 115 +++++++++++++++---
 serverless/common/openvino/model_loader.py    |  19 ++-
 .../intel/face-detection-0205/function.yaml   |  76 ++++++++++++
 .../omz/intel/face-detection-0205/main.py     |  33 +++++
 .../face-detection-0205/model_handler.py      |  71 +++++++++++
 9 files changed, 302 insertions(+), 34 deletions(-)
 create mode 100644 serverless/openvino/omz/intel/face-detection-0205/function.yaml
 create mode 100644 serverless/openvino/omz/intel/face-detection-0205/main.py
 create mode 100644 serverless/openvino/omz/intel/face-detection-0205/model_handler.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 62372d0614d0..e8e37d70e02d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## \[2.0.0] - Unreleased
 ### Added
+- Handle attributes comming from nuclio detectors (<https://github.com/openvinotoolkit/cvat/pull/3917>)
 - Add additional environment variables for Nuclio configuration (<https://github.com/openvinotoolkit/cvat/pull/3894>)
 - Add KITTI segmentation and detection format (<https://github.com/openvinotoolkit/cvat/pull/3757>)
 - Add LFW format (<https://github.com/openvinotoolkit/cvat/pull/3770>)
diff --git a/cvat-ui/package-lock.json b/cvat-ui/package-lock.json
index b2303605db9e..895eedef66b3 100644
--- a/cvat-ui/package-lock.json
+++ b/cvat-ui/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "cvat-ui",
-  "version": "1.35.1",
+  "version": "1.35.2",
   "lockfileVersion": 2,
   "requires": true,
   "packages": {
     "": {
       "name": "cvat-ui",
-      "version": "1.35.1",
+      "version": "1.35.2",
       "license": "MIT",
       "dependencies": {
         "@ant-design/icons": "^4.6.3",
diff --git a/cvat-ui/package.json b/cvat-ui/package.json
index e3d604dc692f..e01199e4b3f3 100644
--- a/cvat-ui/package.json
+++ b/cvat-ui/package.json
@@ -1,6 +1,6 @@
 {
   "name": "cvat-ui",
-  "version": "1.35.1",
+  "version": "1.35.2",
   "description": "CVAT single-page application",
   "main": "src/index.tsx",
   "scripts": {
diff --git a/cvat-ui/src/components/annotation-page/standard-workspace/controls-side-bar/tools-control.tsx b/cvat-ui/src/components/annotation-page/standard-workspace/controls-side-bar/tools-control.tsx
index 572b810c878c..e2f8563846a9 100644
--- a/cvat-ui/src/components/annotation-page/standard-workspace/controls-side-bar/tools-control.tsx
+++ b/cvat-ui/src/components/annotation-page/standard-workspace/controls-side-bar/tools-control.tsx
@@ -1,4 +1,4 @@
-// Copyright (C) 2020-2021 Intel Corporation
+// Copyright (C) 2020-2022 Intel Corporation
 //
 // SPDX-License-Identifier: MIT
 
@@ -1014,6 +1014,13 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> {
                 </Row>
             );
         }
+        const attrsMap: Record<string, Record<string, number>> = {};
+        jobInstance.labels.forEach((label: any) => {
+            attrsMap[label.name] = {};
+            label.attributes.forEach((attr: any) => {
+                attrsMap[label.name][attr.name] = attr.id;
+            });
+        });
 
         return (
             <DetectorRunner
@@ -1034,7 +1041,11 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> {
                                 frame,
                                 occluded: false,
                                 source: 'auto',
-                                attributes: {},
+                                attributes: (data.attributes as { name: string, value: string }[])
+                                    .reduce((mapping, attr) => {
+                                        mapping[attrsMap[data.label][attr.name]] = attr.value;
+                                        return mapping;
+                                    }, {} as Record<number, string>),
                                 zOrder: curZOrder,
                             }),
                         );
diff --git a/cvat/apps/lambda_manager/views.py b/cvat/apps/lambda_manager/views.py
index 18cc4db60640..f5ec851b0669 100644
--- a/cvat/apps/lambda_manager/views.py
+++ b/cvat/apps/lambda_manager/views.py
@@ -1,7 +1,12 @@
+# Copyright (C) 2021 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
 import base64
 import json
 from functools import wraps
 from enum import Enum
+from copy import deepcopy
 
 import django_rq
 import requests
@@ -101,6 +106,13 @@ def __init__(self, gateway, data):
                 "`{}` lambda function has non-unique labels".format(self.id),
                 code=status.HTTP_404_NOT_FOUND)
         self.labels = labels
+        # mapping of labels and corresponding supported attributes
+        self.func_attributes = {item['name']: item.get('attributes', []) for item in spec}
+        for label, attributes in self.func_attributes.items():
+            if len([attr['name'] for attr in attributes]) != len(set([attr['name'] for attr in attributes])):
+                raise ValidationError(
+                    "`{}` lambda function has non-unique attributes for label {}".format(self.id, label),
+                    code=status.HTTP_404_NOT_FOUND)
         # state of the function
         self.state = data['status']['state']
         # description of the function
@@ -141,6 +153,10 @@ def to_dict(self):
             response.update({
                 'state': self.state
             })
+        if self.kind is LambdaType.DETECTOR:
+            response.update({
+                'attributes': self.func_attributes
+            })
 
         return response
 
@@ -153,11 +169,17 @@ def invoke(self, db_task, data):
                     "threshold": threshold,
                 })
             quality = data.get("quality")
-            mapping = data.get("mapping")
-            mapping_by_default = {db_label.name:db_label.name
-                for db_label in (
-                        db_task.project.label_set if db_task.project_id else db_task.label_set
-                    ).all()}
+            mapping = data.get("mapping", {})
+            mapping_by_default = {}
+            task_attributes = {}
+            for db_label in (db_task.project.label_set if db_task.project_id else db_task.label_set).prefetch_related("attributespec_set").all():
+                mapping_by_default[db_label.name] = db_label.name
+                task_attributes[db_label.name] = {}
+                for attribute in db_label.attributespec_set.all():
+                    task_attributes[db_label.name][attribute.name] = {
+                        'input_rype': attribute.input_type,
+                        'values': attribute.values.split('\n')
+                    }
             if not mapping:
                 # use mapping by default to avoid labels in mapping which
                 # don't exist in the task
@@ -165,7 +187,14 @@ def invoke(self, db_task, data):
             else:
                 # filter labels in mapping which don't exist in the task
                 mapping = {k:v for k,v in mapping.items() if v in mapping_by_default}
-
+            supported_attrs = {}
+            for func_label, func_attrs in self.func_attributes.items():
+                if func_label in mapping:
+                    supported_attrs[func_label] = {}
+                    task_attr_names = [task_attr for task_attr in task_attributes[mapping[func_label]]]
+                    for attr in func_attrs:
+                        if attr['name'] in task_attr_names:
+                            supported_attrs[func_label].update({attr["name"] : attr})
             if self.kind == LambdaType.DETECTOR:
                 payload.update({
                     "image": self._get_image(db_task, data["frame"], quality)
@@ -207,13 +236,53 @@ def invoke(self, db_task, data):
                 code=status.HTTP_400_BAD_REQUEST)
 
         response = self.gateway.invoke(self, payload)
+        response_filtered = []
+        def check_attr_value(value, func_attr, db_attr):
+            if db_attr is None:
+                return False
+            func_attr_type = func_attr["input_type"]
+            db_attr_type = db_attr["input_type"]
+            # Check if attribute types are equal for function configuration and db spec
+            if func_attr_type == db_attr_type:
+                if func_attr_type == "number":
+                    return value.isnumeric()
+                elif func_attr_type == "checkbox":
+                    return value in ["true", "false"]
+                elif func_attr_type in ["select", "radio", "text"]:
+                    return True
+                else:
+                    return False
+            else:
+                if func_attr_type == "number":
+                    return db_attr_type in ["select", "radio", "text"] and value.isnumeric()
+                elif func_attr_type == "text":
+                    return db_attr_type == "text" or \
+                           (db_attr_type in ["select", "radio"] and len(value.split(" ")) == 1)
+                elif func_attr_type == "select":
+                    return db_attr["input_type"] in ["radio", "text"]
+                elif func_attr_type == "radio":
+                    return db_attr["input_type"] in ["select", "text"]
+                elif func_attr_type == "checkbox":
+                    return value in ["true", "false"]
+                else:
+                    return False
         if self.kind == LambdaType.DETECTOR:
-            if mapping:
-                for item in response:
-                    item["label"] = mapping.get(item["label"])
-                response = [item for item in response if item["label"]]
-
-        return response
+            for item in response:
+                if item['label'] in mapping:
+                    attributes = deepcopy(item.get("attributes", []))
+                    item["attributes"] = []
+                    for attr in attributes:
+                        db_attr = supported_attrs.get(item['label'], {}).get(attr["name"])
+                        func_attr = [func_attr for func_attr in self.func_attributes.get(item['label'], []) if func_attr['name'] == attr["name"]]
+                        # Skip current attribute if it was not declared as supportd in function config
+                        if not func_attr:
+                            continue
+                        if attr["name"] in supported_attrs.get(item['label'], {}) and check_attr_value(attr["value"], func_attr[0], db_attr):
+                            item["attributes"].append(attr)
+                    item['label'] = mapping[item['label']]
+                    response_filtered.append(item)
+
+        return response_filtered
 
     def _get_image(self, db_task, frame, quality):
         if quality is None or quality == "original":
@@ -381,27 +450,31 @@ def reset(self):
                 break
 
             for anno in annotations:
-                label_id = labels.get(anno["label"])
-                if label_id is None:
+                label = labels.get(anno["label"])
+                if label is None:
                     continue # Invalid label provided
+                if anno.get('attributes'):
+                    attrs = [{'spec_id': label['attributes'][attr['name']], 'value': attr['value']} for attr in anno.get('attributes') if attr['name'] in label['attributes']]
+                else:
+                    attrs = []
                 if anno["type"].lower() == "tag":
                     results.append_tag({
                         "frame": frame,
-                        "label_id": label_id,
+                        "label_id": label['id'],
                         "source": "auto",
-                        "attributes": [],
+                        "attributes": attrs,
                         "group": None,
                     })
                 else:
                     results.append_shape({
                         "frame": frame,
-                        "label_id": label_id,
+                        "label_id": label['id'],
                         "type": anno["type"],
                         "occluded": False,
                         "points": anno["points"],
                         "z_order": 0,
                         "group": None,
-                        "attributes": [],
+                        "attributes": attrs,
                         "source": "auto"
                     })
 
@@ -512,7 +585,11 @@ def __call__(function, task, quality, cleanup, **kwargs):
         if cleanup:
             dm.task.delete_task_data(db_task.id)
         db_labels = (db_task.project.label_set if db_task.project_id else db_task.label_set).prefetch_related("attributespec_set").all()
-        labels = {db_label.name:db_label.id for db_label in db_labels}
+        labels = {}
+        for label in db_labels:
+            labels[label.name] = {'id':label.id, 'attributes': {}}
+            for attr in label.attributespec_set.values():
+                labels[label.name]['attributes'][attr['name']] = attr['id']
 
         if function.kind == LambdaType.DETECTOR:
             LambdaJob._call_detector(function, db_task, labels, quality,
diff --git a/serverless/common/openvino/model_loader.py b/serverless/common/openvino/model_loader.py
index ca3354d1aee3..85c547c9a17e 100644
--- a/serverless/common/openvino/model_loader.py
+++ b/serverless/common/openvino/model_loader.py
@@ -1,4 +1,3 @@
-
 # Copyright (C) 2020 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
@@ -13,14 +12,6 @@ def __init__(self, model, weights):
         network = ie_core.read_network(model, weights)
         self._network = network
 
-        # Check compatibility
-        supported_layers = ie_core.query_network(network, "CPU")
-        not_supported_layers = [l for l in network.layers.keys() if l not in supported_layers]
-        if len(not_supported_layers) != 0:
-            raise Exception(
-                "Following layers are not supported by the plugin for specified device {}:\n {}"
-                .format(ie_core.device, ", ".join(not_supported_layers)))
-
         # Initialize input blobs
         self._input_info_name = None
         for blob_name in network.inputs:
@@ -41,7 +32,8 @@ def __init__(self, model, weights):
         input_type = network.inputs[self._input_blob_name]
         self._input_layout = input_type if isinstance(input_type, list) else input_type.shape
 
-    def infer(self, image, preprocessing=True):
+
+    def _prepare_inputs(self, image, preprocessing):
         image = np.array(image)
         _, _, h, w = self._input_layout
         if preprocessing:
@@ -57,13 +49,20 @@ def infer(self, image, preprocessing=True):
         inputs = {self._input_blob_name: image}
         if self._input_info_name:
             inputs[self._input_info_name] = [h, w, 1]
+        return inputs
 
+    def infer(self, image, preprocessing=True):
+        inputs = self._prepare_inputs(image, preprocessing)
         results = self._net.infer(inputs)
         if len(results) == 1:
             return results[self._output_blob_name].copy()
         else:
             return results.copy()
 
+    def async_infer(self, image, preprocessing=True, request_id=0):
+        inputs = self._prepare_inputs(image, preprocessing)
+        return self._net.start_async(request_id=request_id, inputs=inputs)
+
     def input_size(self):
         return self._input_layout[2:]
 
diff --git a/serverless/openvino/omz/intel/face-detection-0205/function.yaml b/serverless/openvino/omz/intel/face-detection-0205/function.yaml
new file mode 100644
index 000000000000..131c8c4fa63c
--- /dev/null
+++ b/serverless/openvino/omz/intel/face-detection-0205/function.yaml
@@ -0,0 +1,76 @@
+metadata:
+  name: openvino-omz-face-detection-0205
+  namespace: cvat
+  annotations:
+    name: Attributed face detection
+    type: detector
+    framework: openvino
+    spec: |
+      [
+        { "id": 0, "name": "face", "attributes": [
+          {
+            "name": "age",
+            "input_type": "number",
+            "values": ["0", "150", "1"]
+          },
+          {
+            "name": "gender",
+            "input_type": "select",
+            "values": ["female", "male"]
+          },
+          {
+            "name": "emotion",
+            "input_type": "select",
+            "values": ["neutral", "happy", "sad", "surprise", "anger"]
+          }]
+        }
+      ]
+
+spec:
+  description: Detection network finding faces and defining age, gender and emotion attributes
+  runtime: 'python:3.6'
+  handler: main:handler
+  eventTimeout: 30000s
+  env:
+    - name: NUCLIO_PYTHON_EXE_PATH
+      value: /opt/nuclio/common/openvino/python3
+
+  build:
+    image: cvat/openvino.omz.intel.face-detection-0205
+    baseImage: openvino/ubuntu18_dev:2021.1
+
+    directives:
+      preCopy:
+        - kind: USER
+          value: root
+        - kind: WORKDIR
+          value: /opt/nuclio
+        - kind: RUN
+          value: ln -s /usr/bin/pip3 /usr/bin/pip
+        - kind: RUN
+          value: /opt/intel/openvino/deployment_tools/open_model_zoo/tools/downloader/downloader.py --name face-detection-0205 -o /opt/nuclio/open_model_zoo
+        - kind: RUN
+          value: /opt/intel/openvino/deployment_tools/open_model_zoo/tools/downloader/downloader.py --name emotions-recognition-retail-0003 -o /opt/nuclio/open_model_zoo
+        - kind: RUN
+          value: /opt/intel/openvino/deployment_tools/open_model_zoo/tools/downloader/downloader.py --name age-gender-recognition-retail-0013 -o /opt/nuclio/open_model_zoo
+
+      postCopy:
+        - kind: RUN
+          value: apt update && DEBIAN_FRONTEND=noninteractive apt install --no-install-recommends -y python3-skimage
+        - kind: RUN
+          value: pip3 install "numpy<1.16.0" # workaround for skimage
+
+  triggers:
+    myHttpTrigger:
+      maxWorkers: 2
+      kind: 'http'
+      workerAvailabilityTimeoutMilliseconds: 10000
+      attributes:
+        maxRequestBodySize: 33554432 # 32MB
+
+  platform:
+    attributes:
+      restartPolicy:
+        name: always
+        maximumRetryCount: 3
+      mountMode: volume
diff --git a/serverless/openvino/omz/intel/face-detection-0205/main.py b/serverless/openvino/omz/intel/face-detection-0205/main.py
new file mode 100644
index 000000000000..53b4f3e3d819
--- /dev/null
+++ b/serverless/openvino/omz/intel/face-detection-0205/main.py
@@ -0,0 +1,33 @@
+# Copyright (C) 2020-2022 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import json
+import base64
+from PIL import Image
+import io
+from model_handler import FaceDetectorHandler, AttributesExtractorHandler
+
+def init_context(context):
+    context.logger.info("Init context...  0%")
+
+     # Read the DL model
+    context.user_data.detector_model = FaceDetectorHandler()
+    context.user_data.attributes_model = AttributesExtractorHandler()
+
+    context.logger.info("Init context...100%")
+
+def handler(context, event):
+    context.logger.info("Run face-detection-0206 model")
+    data = event.body
+    buf = io.BytesIO(base64.b64decode(data["image"]))
+    threshold = float(data.get("threshold", 0.5))
+    image = Image.open(buf)
+
+    results, faces = context.user_data.detector_model.infer(image, threshold)
+    for idx, face in enumerate(faces):
+        attributes =  context.user_data.attributes_model.infer(face)
+        results[idx].update(attributes)
+
+    return context.Response(body=json.dumps(results), headers={},
+        content_type='application/json', status_code=200)
diff --git a/serverless/openvino/omz/intel/face-detection-0205/model_handler.py b/serverless/openvino/omz/intel/face-detection-0205/model_handler.py
new file mode 100644
index 000000000000..9189d1d17f5a
--- /dev/null
+++ b/serverless/openvino/omz/intel/face-detection-0205/model_handler.py
@@ -0,0 +1,71 @@
+# Copyright (C) 2020-2022 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import os
+import numpy as np
+from model_loader import ModelLoader
+
+class FaceDetectorHandler:
+    def __init__(self):
+        base_dir = os.path.abspath(os.environ.get("DETECTOR_MODEL_PATH",
+            "/opt/nuclio/open_model_zoo/intel/face-detection-0205/FP32"))
+        model_xml = os.path.join(base_dir, "face-detection-0205.xml")
+        model_bin = os.path.join(base_dir, "face-detection-0205.bin")
+        self.model = ModelLoader(model_xml, model_bin)
+
+    def infer(self, image, threshold):
+        infer_res = self.model.infer(image)["boxes"]
+        infer_res = infer_res[infer_res[:,4] > threshold]
+
+        results = []
+        faces = []
+        h_scale = image.height / 416
+        w_scale = image.width / 416
+        for face in infer_res:
+            xmin = int(face[0] * w_scale)
+            ymin = int(face[1] * h_scale)
+            xmax = int(face[2] * w_scale)
+            ymax = int(face[3] * h_scale)
+            confidence = face[4]
+
+            faces.append(np.array(image)[ymin:ymax, xmin:xmax])
+            results.append({
+                "confidence": str(confidence),
+                "label": "face",
+                "points": [xmin, ymin, xmax, ymax],
+                "type": "rectangle",
+                "attributes": []
+            })
+
+        return results, faces
+
+class AttributesExtractorHandler:
+    def __init__(self):
+        age_gender_base_dir = os.path.abspath(os.environ.get("AGE_GENDER_MODEL_PATH",
+            "/opt/nuclio/open_model_zoo/intel/age-gender-recognition-retail-0013/FP32"))
+        age_gender_model_xml = os.path.join(age_gender_base_dir, "age-gender-recognition-retail-0013.xml")
+        age_gender_model_bin = os.path.join(age_gender_base_dir, "age-gender-recognition-retail-0013.bin")
+        self.age_gender_model = ModelLoader(age_gender_model_xml, age_gender_model_bin)
+        emotions_base_dir = os.path.abspath(os.environ.get("EMOTIONS_MODEL_PATH",
+            "/opt/nuclio/open_model_zoo/intel/emotions-recognition-retail-0003/FP32"))
+        emotions_model_xml = os.path.join(emotions_base_dir, "emotions-recognition-retail-0003.xml")
+        emotions_model_bin = os.path.join(emotions_base_dir, "emotions-recognition-retail-0003.bin")
+        self.emotions_model = ModelLoader(emotions_model_xml, emotions_model_bin)
+        self.genders_map = ["female", "male"]
+        self.emotions_map = ["neutral", "happy", "sad", "surprise", "anger"]
+
+    def infer(self, image):
+        age_gender_request = self.age_gender_model.async_infer(image)
+        emotions_request = self.emotions_model.async_infer(image)
+        # Wait until both age_gender and emotion recognition async inferences finish
+        while not (age_gender_request.wait(0) == 0 and emotions_request.wait(0) == 0):
+            continue
+        age = int(np.squeeze(age_gender_request.output_blobs["age_conv3"].buffer) * 100)
+        gender = self.genders_map[np.argmax(np.squeeze(age_gender_request.output_blobs["prob"].buffer))]
+        emotion = self.emotions_map[np.argmax(np.squeeze(emotions_request.output_blobs['prob_emotion'].buffer))]
+        return {"attributes": [
+            {"name": "age", "value": str(age)},
+            {"name": "gender", "value": gender},
+            {"name": "emotion", "value": emotion}
+        ]}