From 451ae14b4635f8cb7ea3ef585de9235771fc2980 Mon Sep 17 00:00:00 2001 From: iliya mironov Date: Thu, 22 Jul 2021 21:12:44 +0300 Subject: [PATCH] Add json validate (#6449) * Add json validate * Fix json schema * Fix schema loader * Add unit test * Update bom file * Update all requarments * Update dev requarments * Update requrments * Update path to schema * Update schema * Add some unit tests * Move schema to root dir * Update schema path in bom file * Fix unit test * Fix bom * Change path to schema * update setup * Fix setup * Fix mo args test * Refactoring some code * Refactoring according to review * Update sort imports * Remove id attribute from schema * Refactoring validator * Fix according to review * Move schema from json to dict. Update unit tests. * Fix BOM file * Update bom file --- model-optimizer/automation/package_BOM.txt | 1 + .../mo/utils/custom_replacement_config.py | 34 +++-- model-optimizer/mo/utils/json_schema.py | 129 ++++++++++++++++++ model-optimizer/requirements.txt | 1 + model-optimizer/requirements_caffe.txt | 1 + model-optimizer/requirements_dev.txt | 1 + model-optimizer/requirements_kaldi.txt | 1 + model-optimizer/requirements_mxnet.txt | 1 + model-optimizer/requirements_onnx.txt | 1 + model-optimizer/requirements_tf.txt | 1 + model-optimizer/requirements_tf2.txt | 1 + .../utils/custom_replacement_config_test.py | 40 ++++++ tests/stress_tests/scripts/requirements.txt | 1 + 13 files changed, 204 insertions(+), 9 deletions(-) create mode 100644 model-optimizer/mo/utils/json_schema.py create mode 100644 model-optimizer/unit_tests/mo/utils/custom_replacement_config_test.py diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt index 4adb1e22aa2e2d..939f75b685d292 100644 --- a/model-optimizer/automation/package_BOM.txt +++ b/model-optimizer/automation/package_BOM.txt @@ -1070,6 +1070,7 @@ mo/utils/ir_reader/extenders/topk_extender.py mo/utils/ir_reader/extenders/variadic_split_extender.py mo/utils/ir_reader/layer_to_class.py mo/utils/ir_reader/restore_graph.py +mo/utils/json_schema.py mo/utils/logger.py mo/utils/model_analysis.py mo/utils/pipeline_config.py diff --git a/model-optimizer/mo/utils/custom_replacement_config.py b/model-optimizer/mo/utils/custom_replacement_config.py index 32f7b6808f563b..47176e36e4ac18 100644 --- a/model-optimizer/mo/utils/custom_replacement_config.py +++ b/model-optimizer/mo/utils/custom_replacement_config.py @@ -1,6 +1,7 @@ # Copyright (C) 2018-2021 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import fastjsonschema as json_validate import json import logging as log import os @@ -9,7 +10,8 @@ from mo.graph.graph import Node, Graph from mo.utils.error import Error from mo.utils.graph import nodes_matching_name_pattern, sub_graph_between_nodes -from mo.utils.utils import refer_to_faq_msg +from mo.utils.json_schema import schema_dict +from mo.utils.utils import get_mo_root_dir, refer_to_faq_msg class CustomReplacementDescriptor(object): @@ -297,12 +299,12 @@ def update_custom_replacement_attributes(self, graph: Graph): log.debug("Node {} doesn't have output edges. Consider it output".format(node_name)) output_tensors.add((generate_pattern_for_node(graph, pattern, node_name), 0)) - if not self.has('inputs'): + if not self.has('inputs') or len(self._replacement_desc['inputs']) == 0: self._replacement_desc['inputs'] = [[{'node': desc[0], 'port': desc[1]} for desc in inp] for inp in sorted(input_nodes_mapping.values())] log.debug('Updated inputs of sub-graph for instance "{}"'.format(self.instances)) - if not self.has('outputs'): + if not self.has('outputs') or len(self._replacement_desc['outputs']) == 0: self._replacement_desc['outputs'] = [{'node': node, 'port': port} for node, port in sorted(output_tensors)] log.debug('Updated outputs of sub-graph for instance "{}"'.format(self.instances)) @@ -342,13 +344,8 @@ def parse_custom_replacement_config_file(file_name: str): if not os.path.exists(file_name): raise Error("Custom replacements configuration file '{}' does not exist. ".format(file_name) + refer_to_faq_msg(69)) - try: - with open(file_name, 'r') as f: - data = json.load(f) - except Exception as exc: - raise Error("Failed to parse custom replacements configuration file '{}': {}. ".format(file_name, exc) + - refer_to_faq_msg(70)) from exc + data = load_and_validate_json_config(file_name) result = list() validation_errors = list() for attrs in data: @@ -394,3 +391,22 @@ def generate_pattern_for_node(graph: Graph, sub_graph_pattern: str, node_name: s raise RuntimeError('The pattern that uniquely identifies node "{}" using sub-graph pattern "{}" has not been found'. format(node_name, sub_graph_pattern)) + + +def load_and_validate_json_config(config_file_name: str): + """ + Reads and validate custom replacement configuration file config_file_name. + :param config_file_name: name of the file to read from. + :return: A dictionary serialized from json config file. + """ + + try: + with open(config_file_name, 'r') as f: + json_config = json.load(f) + validator = json_validate.compile(schema_dict) + validator(json_config) + except Exception as e: + raise Error("Failed to parse custom replacements configuration file '{}': {}. ".format(config_file_name, e) + + refer_to_faq_msg(70)) from e + + return json_config diff --git a/model-optimizer/mo/utils/json_schema.py b/model-optimizer/mo/utils/json_schema.py new file mode 100644 index 00000000000000..a201818eb72719 --- /dev/null +++ b/model-optimizer/mo/utils/json_schema.py @@ -0,0 +1,129 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +schema_dict = { + "definitions": {}, + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Root", + "type": "array", + "default": [], + "items": { + "$id": "#root/items", + "title": "Items", + "type": "object", + "required": [ + "id", + "match_kind" + ], + "properties": { + "custom_attributes": { + "$id": "#root/items/custom_attributes", + "title": "Custom_attributes", + "type": "object", + "properties": { + } + }, + "id": { + "$id": "#root/items/id", + "title": "Id", + "type": "string", + "pattern": "^.*$", + "minLength": 1 + }, + "inputs": { + "$id": "#root/items/inputs", + "title": "Inputs", + "type": "array", + "default": [], + "items": { + "$id": "#root/items/inputs/items", + "title": "Items", + "type": "array", + "default": [], + "items": { + "$id": "#root/items/inputs/items/items", + "title": "Items", + "type": "object", + "properties": { + "node": { + "$id": "#root/items/inputs/items/items/node", + "title": "Node", + "type": "string", + "default": "", + "pattern": "^.*$" + }, + "port": { + "$id": "#root/items/inputs/items/items/port", + "title": "Port", + "type": "integer", + "default": 0 + } + }, + "required": ["node", "port"] + } + + } + }, + "instances": { + "$id": "#root/items/instances", + "title": "Instances", + "type": ["array", "object"], + "items": { + "$id": "#root/items/instances/items", + "title": "Items", + "type": "string", + "default": "", + "pattern": "^.*$" + } + }, + "match_kind": { + "$id": "#root/items/match_kind", + "title": "Match_kind", + "type": "string", + "enum": ["points", "scope", "general"], + "default": "points", + "pattern": "^.*$" + }, + "outputs": { + "$id": "#root/items/outputs", + "title": "Outputs", + "type": "array", + "default": [], + "items": { + "$id": "#root/items/outputs/items", + "title": "Items", + "type": "object", + "properties": { + "node": { + "$id": "#root/items/outputs/items/node", + "title": "Node", + "type": "string", + "default": "", + "pattern": "^.*$" + }, + "port": { + "$id": "#root/items/outputs/items/port", + "title": "Port", + "type": "integer", + "default": 0 + } + }, + "required": ["node", "port"] + } + + }, + "include_inputs_to_sub_graph": { + "$id": "#root/items/include_inputs_to_sub_graph", + "title": "Include_inputs_to_sub_graph", + "type": "boolean", + "default": False + }, + "include_outputs_to_sub_graph": { + "$id": "#root/items/include_outputs_to_sub_graph", + "title": "Include_outputs_to_sub_graph", + "type": "boolean", + "default": False + } + } + } +} diff --git a/model-optimizer/requirements.txt b/model-optimizer/requirements.txt index cc0ec363dc3e19..b54ca23344b580 100644 --- a/model-optimizer/requirements.txt +++ b/model-optimizer/requirements.txt @@ -8,3 +8,4 @@ onnx>=1.8.1 defusedxml>=0.7.1 urllib3>=1.26.4 requests>=2.25.1 +fastjsonschema~=2.15.1 diff --git a/model-optimizer/requirements_caffe.txt b/model-optimizer/requirements_caffe.txt index 6dd20d0ec83707..d1eef645f44490 100644 --- a/model-optimizer/requirements_caffe.txt +++ b/model-optimizer/requirements_caffe.txt @@ -3,3 +3,4 @@ numpy>=1.16.6,<1.20 protobuf>=3.15.6 defusedxml>=0.7.1 requests>=2.25.1 +fastjsonschema~=2.15.1 diff --git a/model-optimizer/requirements_dev.txt b/model-optimizer/requirements_dev.txt index a1c72e7eeb0ab5..67640a482294cd 100644 --- a/model-optimizer/requirements_dev.txt +++ b/model-optimizer/requirements_dev.txt @@ -6,3 +6,4 @@ test-generator==0.1.1 defusedxml>=0.5.0 requests>=2.20.0 pytest>=6.2.4 +fastjsonschema~=2.15.1 diff --git a/model-optimizer/requirements_kaldi.txt b/model-optimizer/requirements_kaldi.txt index dfbdea1a50a2b0..1068d95240cb7b 100644 --- a/model-optimizer/requirements_kaldi.txt +++ b/model-optimizer/requirements_kaldi.txt @@ -2,3 +2,4 @@ networkx~=2.5 numpy>=1.16.6,<1.20 defusedxml>=0.7.1 requests>=2.25.1 +fastjsonschema~=2.15.1 diff --git a/model-optimizer/requirements_mxnet.txt b/model-optimizer/requirements_mxnet.txt index 1fc809413292f1..61897faa5003da 100644 --- a/model-optimizer/requirements_mxnet.txt +++ b/model-optimizer/requirements_mxnet.txt @@ -5,3 +5,4 @@ numpy>=1.16.6,<1.20 defusedxml>=0.7.1 urllib3>=1.26.4 requests>=2.25.1 +fastjsonschema~=2.15.1 diff --git a/model-optimizer/requirements_onnx.txt b/model-optimizer/requirements_onnx.txt index 0997041cfcc4b0..a6415939ccda25 100644 --- a/model-optimizer/requirements_onnx.txt +++ b/model-optimizer/requirements_onnx.txt @@ -3,3 +3,4 @@ networkx~=2.5 numpy>=1.16.6,<1.20 defusedxml>=0.7.1 requests>=2.25.1 +fastjsonschema~=2.15.1 diff --git a/model-optimizer/requirements_tf.txt b/model-optimizer/requirements_tf.txt index 6d75cff9fa95e7..11eee1b8af1abe 100644 --- a/model-optimizer/requirements_tf.txt +++ b/model-optimizer/requirements_tf.txt @@ -4,3 +4,4 @@ networkx~=2.5 numpy>=1.16.6,<1.19 defusedxml>=0.7.1 requests>=2.25.1 +fastjsonschema~=2.15.1 diff --git a/model-optimizer/requirements_tf2.txt b/model-optimizer/requirements_tf2.txt index 02d71d2585d0e3..760999011abc68 100644 --- a/model-optimizer/requirements_tf2.txt +++ b/model-optimizer/requirements_tf2.txt @@ -3,3 +3,4 @@ networkx~=2.5 numpy>=1.16.6,<1.20 defusedxml>=0.7.1 requests>=2.25.1 +fastjsonschema~=2.15.1 diff --git a/model-optimizer/unit_tests/mo/utils/custom_replacement_config_test.py b/model-optimizer/unit_tests/mo/utils/custom_replacement_config_test.py new file mode 100644 index 00000000000000..3407ca5816e1b9 --- /dev/null +++ b/model-optimizer/unit_tests/mo/utils/custom_replacement_config_test.py @@ -0,0 +1,40 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import unittest +from fnmatch import fnmatch +from generator import generator, generate + +from mo.utils.custom_replacement_config import load_and_validate_json_config +from mo.utils.error import Error +from mo.utils.utils import get_mo_root_dir + + +def get_json_configs(mo_root_dir): + config_path = os.path.join(mo_root_dir, 'extensions', 'front') + pattern = "*.json" + config_files_list = [] + for path, subdirs, files in os.walk(config_path): + for name in files: + if fnmatch(name, pattern): + config_files_list.append((os.path.join(path, name),)) + return config_files_list + +@generator +class TestSchema(unittest.TestCase): + base_dir = get_mo_root_dir() + schema_file = os.path.join(base_dir, 'mo', 'utils', 'schema.json') + transformation_configs = get_json_configs(base_dir) + test_json1 = '[{"id": "", "match_kind": "general", "custom_attributes": {}}]' + test_json2 = '[{"id": "someid", "match_kind": "abc", "custom_attributes": {}}]' + + @generate(*transformation_configs) + def test_schema_file(self, transformation_config): + self.assertTrue(load_and_validate_json_config(transformation_config)) + + def test_schema_id_empty(self): + self.assertRaises(Error, load_and_validate_json_config, self.test_json1) + + def test_schema_match_kind_wrong(self): + self.assertRaises(Error, load_and_validate_json_config, self.test_json2) diff --git a/tests/stress_tests/scripts/requirements.txt b/tests/stress_tests/scripts/requirements.txt index 8fe0104b0ae4e0..5d067c9229777f 100644 --- a/tests/stress_tests/scripts/requirements.txt +++ b/tests/stress_tests/scripts/requirements.txt @@ -1,5 +1,6 @@ pymongo Jinja2 PyYAML +fastjsonschema~=2.15.1 h5py<3.0.0 # WA for OMZ Keras models. Details: https://github.com/openvinotoolkit/open_model_zoo/issues/1806 \ No newline at end of file