From ad11b587b516b1562d78f7fe9c82f9cb9ec161a1 Mon Sep 17 00:00:00 2001 From: Mikhail Treskin Date: Fri, 18 Feb 2022 15:14:37 +0300 Subject: [PATCH] Handle attributes returned from nuclio detector (#3917) --- CHANGELOG.md | 1 + cvat-ui/package-lock.json | 4 +- cvat-ui/package.json | 2 +- .../controls-side-bar/tools-control.tsx | 15 ++- cvat/apps/lambda_manager/views.py | 115 +++++++++++++++--- serverless/common/openvino/model_loader.py | 19 ++- .../intel/face-detection-0205/function.yaml | 76 ++++++++++++ .../omz/intel/face-detection-0205/main.py | 33 +++++ .../face-detection-0205/model_handler.py | 71 +++++++++++ 9 files changed, 302 insertions(+), 34 deletions(-) create mode 100644 serverless/openvino/omz/intel/face-detection-0205/function.yaml create mode 100644 serverless/openvino/omz/intel/face-detection-0205/main.py create mode 100644 serverless/openvino/omz/intel/face-detection-0205/model_handler.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 62372d0614d0..e8e37d70e02d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## \[2.0.0] - Unreleased ### Added +- Handle attributes comming from nuclio detectors () - Add additional environment variables for Nuclio configuration () - Add KITTI segmentation and detection format () - Add LFW format () diff --git a/cvat-ui/package-lock.json b/cvat-ui/package-lock.json index b2303605db9e..895eedef66b3 100644 --- a/cvat-ui/package-lock.json +++ b/cvat-ui/package-lock.json @@ -1,12 +1,12 @@ { "name": "cvat-ui", - "version": "1.35.1", + "version": "1.35.2", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "cvat-ui", - "version": "1.35.1", + "version": "1.35.2", "license": "MIT", "dependencies": { "@ant-design/icons": "^4.6.3", diff --git a/cvat-ui/package.json b/cvat-ui/package.json index e3d604dc692f..e01199e4b3f3 100644 --- a/cvat-ui/package.json +++ b/cvat-ui/package.json @@ -1,6 +1,6 @@ { "name": "cvat-ui", - "version": "1.35.1", + "version": "1.35.2", "description": "CVAT single-page application", "main": "src/index.tsx", "scripts": { diff --git a/cvat-ui/src/components/annotation-page/standard-workspace/controls-side-bar/tools-control.tsx b/cvat-ui/src/components/annotation-page/standard-workspace/controls-side-bar/tools-control.tsx index 572b810c878c..e2f8563846a9 100644 --- a/cvat-ui/src/components/annotation-page/standard-workspace/controls-side-bar/tools-control.tsx +++ b/cvat-ui/src/components/annotation-page/standard-workspace/controls-side-bar/tools-control.tsx @@ -1,4 +1,4 @@ -// Copyright (C) 2020-2021 Intel Corporation +// Copyright (C) 2020-2022 Intel Corporation // // SPDX-License-Identifier: MIT @@ -1014,6 +1014,13 @@ export class ToolsControlComponent extends React.PureComponent { ); } + const attrsMap: Record> = {}; + jobInstance.labels.forEach((label: any) => { + attrsMap[label.name] = {}; + label.attributes.forEach((attr: any) => { + attrsMap[label.name][attr.name] = attr.id; + }); + }); return ( { frame, occluded: false, source: 'auto', - attributes: {}, + attributes: (data.attributes as { name: string, value: string }[]) + .reduce((mapping, attr) => { + mapping[attrsMap[data.label][attr.name]] = attr.value; + return mapping; + }, {} as Record), zOrder: curZOrder, }), ); diff --git a/cvat/apps/lambda_manager/views.py b/cvat/apps/lambda_manager/views.py index 18cc4db60640..f5ec851b0669 100644 --- a/cvat/apps/lambda_manager/views.py +++ b/cvat/apps/lambda_manager/views.py @@ -1,7 +1,12 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + import base64 import json from functools import wraps from enum import Enum +from copy import deepcopy import django_rq import requests @@ -101,6 +106,13 @@ def __init__(self, gateway, data): "`{}` lambda function has non-unique labels".format(self.id), code=status.HTTP_404_NOT_FOUND) self.labels = labels + # mapping of labels and corresponding supported attributes + self.func_attributes = {item['name']: item.get('attributes', []) for item in spec} + for label, attributes in self.func_attributes.items(): + if len([attr['name'] for attr in attributes]) != len(set([attr['name'] for attr in attributes])): + raise ValidationError( + "`{}` lambda function has non-unique attributes for label {}".format(self.id, label), + code=status.HTTP_404_NOT_FOUND) # state of the function self.state = data['status']['state'] # description of the function @@ -141,6 +153,10 @@ def to_dict(self): response.update({ 'state': self.state }) + if self.kind is LambdaType.DETECTOR: + response.update({ + 'attributes': self.func_attributes + }) return response @@ -153,11 +169,17 @@ def invoke(self, db_task, data): "threshold": threshold, }) quality = data.get("quality") - mapping = data.get("mapping") - mapping_by_default = {db_label.name:db_label.name - for db_label in ( - db_task.project.label_set if db_task.project_id else db_task.label_set - ).all()} + mapping = data.get("mapping", {}) + mapping_by_default = {} + task_attributes = {} + for db_label in (db_task.project.label_set if db_task.project_id else db_task.label_set).prefetch_related("attributespec_set").all(): + mapping_by_default[db_label.name] = db_label.name + task_attributes[db_label.name] = {} + for attribute in db_label.attributespec_set.all(): + task_attributes[db_label.name][attribute.name] = { + 'input_rype': attribute.input_type, + 'values': attribute.values.split('\n') + } if not mapping: # use mapping by default to avoid labels in mapping which # don't exist in the task @@ -165,7 +187,14 @@ def invoke(self, db_task, data): else: # filter labels in mapping which don't exist in the task mapping = {k:v for k,v in mapping.items() if v in mapping_by_default} - + supported_attrs = {} + for func_label, func_attrs in self.func_attributes.items(): + if func_label in mapping: + supported_attrs[func_label] = {} + task_attr_names = [task_attr for task_attr in task_attributes[mapping[func_label]]] + for attr in func_attrs: + if attr['name'] in task_attr_names: + supported_attrs[func_label].update({attr["name"] : attr}) if self.kind == LambdaType.DETECTOR: payload.update({ "image": self._get_image(db_task, data["frame"], quality) @@ -207,13 +236,53 @@ def invoke(self, db_task, data): code=status.HTTP_400_BAD_REQUEST) response = self.gateway.invoke(self, payload) + response_filtered = [] + def check_attr_value(value, func_attr, db_attr): + if db_attr is None: + return False + func_attr_type = func_attr["input_type"] + db_attr_type = db_attr["input_type"] + # Check if attribute types are equal for function configuration and db spec + if func_attr_type == db_attr_type: + if func_attr_type == "number": + return value.isnumeric() + elif func_attr_type == "checkbox": + return value in ["true", "false"] + elif func_attr_type in ["select", "radio", "text"]: + return True + else: + return False + else: + if func_attr_type == "number": + return db_attr_type in ["select", "radio", "text"] and value.isnumeric() + elif func_attr_type == "text": + return db_attr_type == "text" or \ + (db_attr_type in ["select", "radio"] and len(value.split(" ")) == 1) + elif func_attr_type == "select": + return db_attr["input_type"] in ["radio", "text"] + elif func_attr_type == "radio": + return db_attr["input_type"] in ["select", "text"] + elif func_attr_type == "checkbox": + return value in ["true", "false"] + else: + return False if self.kind == LambdaType.DETECTOR: - if mapping: - for item in response: - item["label"] = mapping.get(item["label"]) - response = [item for item in response if item["label"]] - - return response + for item in response: + if item['label'] in mapping: + attributes = deepcopy(item.get("attributes", [])) + item["attributes"] = [] + for attr in attributes: + db_attr = supported_attrs.get(item['label'], {}).get(attr["name"]) + func_attr = [func_attr for func_attr in self.func_attributes.get(item['label'], []) if func_attr['name'] == attr["name"]] + # Skip current attribute if it was not declared as supportd in function config + if not func_attr: + continue + if attr["name"] in supported_attrs.get(item['label'], {}) and check_attr_value(attr["value"], func_attr[0], db_attr): + item["attributes"].append(attr) + item['label'] = mapping[item['label']] + response_filtered.append(item) + + return response_filtered def _get_image(self, db_task, frame, quality): if quality is None or quality == "original": @@ -381,27 +450,31 @@ def reset(self): break for anno in annotations: - label_id = labels.get(anno["label"]) - if label_id is None: + label = labels.get(anno["label"]) + if label is None: continue # Invalid label provided + if anno.get('attributes'): + attrs = [{'spec_id': label['attributes'][attr['name']], 'value': attr['value']} for attr in anno.get('attributes') if attr['name'] in label['attributes']] + else: + attrs = [] if anno["type"].lower() == "tag": results.append_tag({ "frame": frame, - "label_id": label_id, + "label_id": label['id'], "source": "auto", - "attributes": [], + "attributes": attrs, "group": None, }) else: results.append_shape({ "frame": frame, - "label_id": label_id, + "label_id": label['id'], "type": anno["type"], "occluded": False, "points": anno["points"], "z_order": 0, "group": None, - "attributes": [], + "attributes": attrs, "source": "auto" }) @@ -512,7 +585,11 @@ def __call__(function, task, quality, cleanup, **kwargs): if cleanup: dm.task.delete_task_data(db_task.id) db_labels = (db_task.project.label_set if db_task.project_id else db_task.label_set).prefetch_related("attributespec_set").all() - labels = {db_label.name:db_label.id for db_label in db_labels} + labels = {} + for label in db_labels: + labels[label.name] = {'id':label.id, 'attributes': {}} + for attr in label.attributespec_set.values(): + labels[label.name]['attributes'][attr['name']] = attr['id'] if function.kind == LambdaType.DETECTOR: LambdaJob._call_detector(function, db_task, labels, quality, diff --git a/serverless/common/openvino/model_loader.py b/serverless/common/openvino/model_loader.py index ca3354d1aee3..85c547c9a17e 100644 --- a/serverless/common/openvino/model_loader.py +++ b/serverless/common/openvino/model_loader.py @@ -1,4 +1,3 @@ - # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT @@ -13,14 +12,6 @@ def __init__(self, model, weights): network = ie_core.read_network(model, weights) self._network = network - # Check compatibility - supported_layers = ie_core.query_network(network, "CPU") - not_supported_layers = [l for l in network.layers.keys() if l not in supported_layers] - if len(not_supported_layers) != 0: - raise Exception( - "Following layers are not supported by the plugin for specified device {}:\n {}" - .format(ie_core.device, ", ".join(not_supported_layers))) - # Initialize input blobs self._input_info_name = None for blob_name in network.inputs: @@ -41,7 +32,8 @@ def __init__(self, model, weights): input_type = network.inputs[self._input_blob_name] self._input_layout = input_type if isinstance(input_type, list) else input_type.shape - def infer(self, image, preprocessing=True): + + def _prepare_inputs(self, image, preprocessing): image = np.array(image) _, _, h, w = self._input_layout if preprocessing: @@ -57,13 +49,20 @@ def infer(self, image, preprocessing=True): inputs = {self._input_blob_name: image} if self._input_info_name: inputs[self._input_info_name] = [h, w, 1] + return inputs + def infer(self, image, preprocessing=True): + inputs = self._prepare_inputs(image, preprocessing) results = self._net.infer(inputs) if len(results) == 1: return results[self._output_blob_name].copy() else: return results.copy() + def async_infer(self, image, preprocessing=True, request_id=0): + inputs = self._prepare_inputs(image, preprocessing) + return self._net.start_async(request_id=request_id, inputs=inputs) + def input_size(self): return self._input_layout[2:] diff --git a/serverless/openvino/omz/intel/face-detection-0205/function.yaml b/serverless/openvino/omz/intel/face-detection-0205/function.yaml new file mode 100644 index 000000000000..131c8c4fa63c --- /dev/null +++ b/serverless/openvino/omz/intel/face-detection-0205/function.yaml @@ -0,0 +1,76 @@ +metadata: + name: openvino-omz-face-detection-0205 + namespace: cvat + annotations: + name: Attributed face detection + type: detector + framework: openvino + spec: | + [ + { "id": 0, "name": "face", "attributes": [ + { + "name": "age", + "input_type": "number", + "values": ["0", "150", "1"] + }, + { + "name": "gender", + "input_type": "select", + "values": ["female", "male"] + }, + { + "name": "emotion", + "input_type": "select", + "values": ["neutral", "happy", "sad", "surprise", "anger"] + }] + } + ] + +spec: + description: Detection network finding faces and defining age, gender and emotion attributes + runtime: 'python:3.6' + handler: main:handler + eventTimeout: 30000s + env: + - name: NUCLIO_PYTHON_EXE_PATH + value: /opt/nuclio/common/openvino/python3 + + build: + image: cvat/openvino.omz.intel.face-detection-0205 + baseImage: openvino/ubuntu18_dev:2021.1 + + directives: + preCopy: + - kind: USER + value: root + - kind: WORKDIR + value: /opt/nuclio + - kind: RUN + value: ln -s /usr/bin/pip3 /usr/bin/pip + - kind: RUN + value: /opt/intel/openvino/deployment_tools/open_model_zoo/tools/downloader/downloader.py --name face-detection-0205 -o /opt/nuclio/open_model_zoo + - kind: RUN + value: /opt/intel/openvino/deployment_tools/open_model_zoo/tools/downloader/downloader.py --name emotions-recognition-retail-0003 -o /opt/nuclio/open_model_zoo + - kind: RUN + value: /opt/intel/openvino/deployment_tools/open_model_zoo/tools/downloader/downloader.py --name age-gender-recognition-retail-0013 -o /opt/nuclio/open_model_zoo + + postCopy: + - kind: RUN + value: apt update && DEBIAN_FRONTEND=noninteractive apt install --no-install-recommends -y python3-skimage + - kind: RUN + value: pip3 install "numpy<1.16.0" # workaround for skimage + + triggers: + myHttpTrigger: + maxWorkers: 2 + kind: 'http' + workerAvailabilityTimeoutMilliseconds: 10000 + attributes: + maxRequestBodySize: 33554432 # 32MB + + platform: + attributes: + restartPolicy: + name: always + maximumRetryCount: 3 + mountMode: volume diff --git a/serverless/openvino/omz/intel/face-detection-0205/main.py b/serverless/openvino/omz/intel/face-detection-0205/main.py new file mode 100644 index 000000000000..53b4f3e3d819 --- /dev/null +++ b/serverless/openvino/omz/intel/face-detection-0205/main.py @@ -0,0 +1,33 @@ +# Copyright (C) 2020-2022 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import json +import base64 +from PIL import Image +import io +from model_handler import FaceDetectorHandler, AttributesExtractorHandler + +def init_context(context): + context.logger.info("Init context... 0%") + + # Read the DL model + context.user_data.detector_model = FaceDetectorHandler() + context.user_data.attributes_model = AttributesExtractorHandler() + + context.logger.info("Init context...100%") + +def handler(context, event): + context.logger.info("Run face-detection-0206 model") + data = event.body + buf = io.BytesIO(base64.b64decode(data["image"])) + threshold = float(data.get("threshold", 0.5)) + image = Image.open(buf) + + results, faces = context.user_data.detector_model.infer(image, threshold) + for idx, face in enumerate(faces): + attributes = context.user_data.attributes_model.infer(face) + results[idx].update(attributes) + + return context.Response(body=json.dumps(results), headers={}, + content_type='application/json', status_code=200) diff --git a/serverless/openvino/omz/intel/face-detection-0205/model_handler.py b/serverless/openvino/omz/intel/face-detection-0205/model_handler.py new file mode 100644 index 000000000000..9189d1d17f5a --- /dev/null +++ b/serverless/openvino/omz/intel/face-detection-0205/model_handler.py @@ -0,0 +1,71 @@ +# Copyright (C) 2020-2022 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os +import numpy as np +from model_loader import ModelLoader + +class FaceDetectorHandler: + def __init__(self): + base_dir = os.path.abspath(os.environ.get("DETECTOR_MODEL_PATH", + "/opt/nuclio/open_model_zoo/intel/face-detection-0205/FP32")) + model_xml = os.path.join(base_dir, "face-detection-0205.xml") + model_bin = os.path.join(base_dir, "face-detection-0205.bin") + self.model = ModelLoader(model_xml, model_bin) + + def infer(self, image, threshold): + infer_res = self.model.infer(image)["boxes"] + infer_res = infer_res[infer_res[:,4] > threshold] + + results = [] + faces = [] + h_scale = image.height / 416 + w_scale = image.width / 416 + for face in infer_res: + xmin = int(face[0] * w_scale) + ymin = int(face[1] * h_scale) + xmax = int(face[2] * w_scale) + ymax = int(face[3] * h_scale) + confidence = face[4] + + faces.append(np.array(image)[ymin:ymax, xmin:xmax]) + results.append({ + "confidence": str(confidence), + "label": "face", + "points": [xmin, ymin, xmax, ymax], + "type": "rectangle", + "attributes": [] + }) + + return results, faces + +class AttributesExtractorHandler: + def __init__(self): + age_gender_base_dir = os.path.abspath(os.environ.get("AGE_GENDER_MODEL_PATH", + "/opt/nuclio/open_model_zoo/intel/age-gender-recognition-retail-0013/FP32")) + age_gender_model_xml = os.path.join(age_gender_base_dir, "age-gender-recognition-retail-0013.xml") + age_gender_model_bin = os.path.join(age_gender_base_dir, "age-gender-recognition-retail-0013.bin") + self.age_gender_model = ModelLoader(age_gender_model_xml, age_gender_model_bin) + emotions_base_dir = os.path.abspath(os.environ.get("EMOTIONS_MODEL_PATH", + "/opt/nuclio/open_model_zoo/intel/emotions-recognition-retail-0003/FP32")) + emotions_model_xml = os.path.join(emotions_base_dir, "emotions-recognition-retail-0003.xml") + emotions_model_bin = os.path.join(emotions_base_dir, "emotions-recognition-retail-0003.bin") + self.emotions_model = ModelLoader(emotions_model_xml, emotions_model_bin) + self.genders_map = ["female", "male"] + self.emotions_map = ["neutral", "happy", "sad", "surprise", "anger"] + + def infer(self, image): + age_gender_request = self.age_gender_model.async_infer(image) + emotions_request = self.emotions_model.async_infer(image) + # Wait until both age_gender and emotion recognition async inferences finish + while not (age_gender_request.wait(0) == 0 and emotions_request.wait(0) == 0): + continue + age = int(np.squeeze(age_gender_request.output_blobs["age_conv3"].buffer) * 100) + gender = self.genders_map[np.argmax(np.squeeze(age_gender_request.output_blobs["prob"].buffer))] + emotion = self.emotions_map[np.argmax(np.squeeze(emotions_request.output_blobs['prob_emotion'].buffer))] + return {"attributes": [ + {"name": "age", "value": str(age)}, + {"name": "gender", "value": gender}, + {"name": "emotion", "value": emotion} + ]}