From f7e0d0a17730013f587dfe4647365a54a3ff57c8 Mon Sep 17 00:00:00 2001 From: "Vafin, Maxim" Date: Wed, 19 May 2021 01:06:40 +0300 Subject: [PATCH 1/4] Fix security issue with XML parsing --- .../mo/back/ie_ir_ver_2/emitter.py | 13 ++++--- .../mo/middle/passes/tensor_names.py | 7 +++- .../mo/utils/ir_engine/ir_engine.py | 24 +++++++++---- .../mo/utils/ir_reader/restore_graph_test.py | 35 +++++++++++++++++++ 4 files changed, 67 insertions(+), 12 deletions(-) create mode 100644 model-optimizer/unit_tests/mo/utils/ir_reader/restore_graph_test.py diff --git a/model-optimizer/mo/back/ie_ir_ver_2/emitter.py b/model-optimizer/mo/back/ie_ir_ver_2/emitter.py index 55ead23a271f51..f8a6d9e4c4a1c4 100644 --- a/model-optimizer/mo/back/ie_ir_ver_2/emitter.py +++ b/model-optimizer/mo/back/ie_ir_ver_2/emitter.py @@ -2,8 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 import hashlib -from xml.etree.ElementTree import Element, SubElement, tostring, ElementTree +# xml.etree.ElementTree is imported to modify XML, it is not used to parse. To eliminate a risk of it to be used to +# parse XML in future development defusedxml.defuse_stdlib() is called +from xml.etree.ElementTree import Element, SubElement, tostring # nosec +from defusedxml import defuse_stdlib +from defusedxml.ElementTree import parse from defusedxml.minidom import parseString from mo.graph.graph import * @@ -12,6 +16,8 @@ from mo.utils.utils import refer_to_faq_msg from mo.utils.version import get_version +defuse_stdlib() + def serialize_constants(graph: Graph, bin_file_name: str, data_type=np.float32): """ @@ -444,8 +450,7 @@ def append_ir_info(file: str, meta_info: dict = dict(), mean_data: [list, None] path_to_xml = file + ".xml" path_to_bin = file + ".bin" - et = ElementTree() - et.parse(path_to_xml) + et = parse(path_to_xml) net = et.getroot() if mean_data: @@ -462,4 +467,4 @@ def append_ir_info(file: str, meta_info: dict = dict(), mean_data: [list, None] pretty_xml_as_string = parseString(tostring(net)).toprettyxml() with open(path_to_xml, 'wb') as file: - file.write(bytes(pretty_xml_as_string, "UTF-8")) \ No newline at end of file + file.write(bytes(pretty_xml_as_string, "UTF-8")) diff --git a/model-optimizer/mo/middle/passes/tensor_names.py b/model-optimizer/mo/middle/passes/tensor_names.py index 4e291023679ff5..e71f618ed06277 100644 --- a/model-optimizer/mo/middle/passes/tensor_names.py +++ b/model-optimizer/mo/middle/passes/tensor_names.py @@ -1,12 +1,17 @@ # Copyright (C) 2018-2021 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from xml.etree.ElementTree import Element, SubElement, tostring +# xml.etree.ElementTree is imported to modify XML, it is not used to parse. To eliminate a risk of it to be used to +# parse XML in future development defusedxml.defuse_stdlib() is called +from xml.etree.ElementTree import Element, SubElement, tostring # nosec +from defusedxml import defuse_stdlib from defusedxml.minidom import parseString from mo.graph.graph import Node, Graph +defuse_stdlib() + def propagate_op_name_to_tensor(graph: Graph): for node in graph.nodes(): diff --git a/model-optimizer/mo/utils/ir_engine/ir_engine.py b/model-optimizer/mo/utils/ir_engine/ir_engine.py index b124e6d30046a6..439e954019000b 100644 --- a/model-optimizer/mo/utils/ir_engine/ir_engine.py +++ b/model-optimizer/mo/utils/ir_engine/ir_engine.py @@ -5,7 +5,12 @@ import logging as log import os import sys -import xml.etree.ElementTree as ET + +# ElementTree is included to build it from Element which is already parsed XML, it is not used to parse anything. To +# eliminate a risk of it to be used to parse XML in future development defusedxml.defuse_stdlib() is called +from xml.etree.ElementTree import ElementTree # nosec +from defusedxml import defuse_stdlib +from defusedxml.ElementTree import parse from argparse import Namespace from collections import namedtuple, defaultdict from pathlib import Path @@ -15,6 +20,8 @@ from mo.graph.graph import Node, Graph from mo.utils.ir_engine.compare_graphs import compare_graphs +defuse_stdlib() + log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.DEBUG, stream=sys.stdout) @@ -38,7 +45,7 @@ def __init__(self, path_to_xml: str, path_to_bin=None, precision="FP32", xml_tre self.__load_ir() def __load_xml(self): - xml_tree = self.xml_tree or ET.parse(self.path_to_xml) + xml_tree = self.xml_tree or parse(self.path_to_xml) xml_root = xml_tree.getroot() xml_layers = {} xml_edges = [] @@ -50,7 +57,8 @@ def __load_xml(self): self.graph = Graph() self.graph.graph['hashes'] = {} - self.graph.graph['ir_version'] = int(xml_root.attrib['version']) if xml_root.attrib.get('version') is not None else None + self.graph.graph['ir_version'] = int(xml_root.attrib['version']) if xml_root.attrib.get( + 'version') is not None else None self.graph.graph['layout'] = 'NCHW' self.graph.name = xml_root.attrib['name'] if xml_root.attrib.get('name') is not None else None @@ -88,7 +96,6 @@ def __load_xml(self): elif elem.tag in ['version', 'cli_params']: self.meta_data['quantization_parameters'][elem.tag] = elem.attrib['value'] - self.graph.graph['cmd_params'] = Namespace(**self.meta_data) # TODO check what we need all this attrs if len(statistics): @@ -206,8 +213,11 @@ def __load_layer(self, layer): new_attrs = self.__normalize_attrs(attr.attrib) if layer.attrib['type'] == 'Const': assert 'offset' in new_attrs and 'size' in new_attrs, \ - 'Incorrect attributes for Const layer, {} instead of {}!'.format(new_attrs.keys(), ['offset', 'size']) - new_attrs.update(self.__prepare_bin_attrs(layer, 0, 'custom', new_attrs['offset'], new_attrs['size'], layer[1][0].attrib['precision'])) + 'Incorrect attributes for Const layer, {} instead of {}!'.format(new_attrs.keys(), + ['offset', 'size']) + new_attrs.update( + self.__prepare_bin_attrs(layer, 0, 'custom', new_attrs['offset'], new_attrs['size'], + layer[1][0].attrib['precision'])) layer_attrs.update(new_attrs) elif attr.tag == 'input': inputs_counter = len(attr) @@ -237,7 +247,7 @@ def __load_layer(self, layer): body_ir = IREngine(path_to_xml=None, path_to_bin=self.path_to_bin, - xml_tree=ET.ElementTree(xml_body_child[0])) + xml_tree=ElementTree(xml_body_child[0])) self.graph.graph['hashes'].update(body_ir.graph.graph['hashes']) # Find port_map section and take an input_port_map & output_port_map diff --git a/model-optimizer/unit_tests/mo/utils/ir_reader/restore_graph_test.py b/model-optimizer/unit_tests/mo/utils/ir_reader/restore_graph_test.py new file mode 100644 index 00000000000000..988014f3840ef5 --- /dev/null +++ b/model-optimizer/unit_tests/mo/utils/ir_reader/restore_graph_test.py @@ -0,0 +1,35 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import unittest +import tempfile + +from mo.utils.ir_reader.restore_graph import restore_graph_from_ir +from defusedxml.common import EntitiesForbidden + + +class TestIRReader(unittest.TestCase): + def setUp(self): + self.xml_bomb = b'\n' \ + b'\n' \ + b' \n' \ + b' \n' \ + b' \n' \ + b' \n' \ + b' \n' \ + b' \n' \ + b' \n' \ + b' \n' \ + b' \n' \ + b' \n' \ + b']>\n' \ + b'&lol9;' + + def test_read_xml_bomb(self): + bomb_file = tempfile.NamedTemporaryFile(delete=False) + bomb_file.write(self.xml_bomb) + bomb_file.close() + self.assertRaises(EntitiesForbidden, restore_graph_from_ir, bomb_file.name) + os.remove(bomb_file.name) From 6d3cc15a592472aac186f2092eab0577e65272cd Mon Sep 17 00:00:00 2001 From: "Vafin, Maxim" Date: Fri, 21 May 2021 14:25:58 +0300 Subject: [PATCH 2/4] Apply review feedback --- model-optimizer/mo/back/ie_ir_ver_2/emitter.py | 5 +++-- model-optimizer/mo/middle/passes/tensor_names.py | 5 +++-- model-optimizer/mo/utils/ir_engine/ir_engine.py | 15 ++++++--------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/model-optimizer/mo/back/ie_ir_ver_2/emitter.py b/model-optimizer/mo/back/ie_ir_ver_2/emitter.py index f8a6d9e4c4a1c4..f37686a616f5f1 100644 --- a/model-optimizer/mo/back/ie_ir_ver_2/emitter.py +++ b/model-optimizer/mo/back/ie_ir_ver_2/emitter.py @@ -2,8 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import hashlib -# xml.etree.ElementTree is imported to modify XML, it is not used to parse. To eliminate a risk of it to be used to -# parse XML in future development defusedxml.defuse_stdlib() is called +# Objects from xml.etree.ElementTree are imported to modify XML and aren't used to parse. from xml.etree.ElementTree import Element, SubElement, tostring # nosec from defusedxml import defuse_stdlib @@ -16,6 +15,8 @@ from mo.utils.utils import refer_to_faq_msg from mo.utils.version import get_version +# To reduce a risk of xml.etree.ElementTree objects to be used to parse XML in future development +# defusedxml.defuse_stdlib() is called to patch xml library with safe methods. defuse_stdlib() diff --git a/model-optimizer/mo/middle/passes/tensor_names.py b/model-optimizer/mo/middle/passes/tensor_names.py index e71f618ed06277..5b09d8e8f2401e 100644 --- a/model-optimizer/mo/middle/passes/tensor_names.py +++ b/model-optimizer/mo/middle/passes/tensor_names.py @@ -1,8 +1,7 @@ # Copyright (C) 2018-2021 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# xml.etree.ElementTree is imported to modify XML, it is not used to parse. To eliminate a risk of it to be used to -# parse XML in future development defusedxml.defuse_stdlib() is called +# Objects from xml.etree.ElementTree are imported to modify XML and aren't used to parse. from xml.etree.ElementTree import Element, SubElement, tostring # nosec from defusedxml import defuse_stdlib @@ -10,6 +9,8 @@ from mo.graph.graph import Node, Graph +# To eliminate a risk of xml.etree.ElementTree objects to be used to parse XML in future development +# defusedxml.defuse_stdlib() is called to patch xml library with safe methods. defuse_stdlib() diff --git a/model-optimizer/mo/utils/ir_engine/ir_engine.py b/model-optimizer/mo/utils/ir_engine/ir_engine.py index 439e954019000b..8223f317097cc5 100644 --- a/model-optimizer/mo/utils/ir_engine/ir_engine.py +++ b/model-optimizer/mo/utils/ir_engine/ir_engine.py @@ -6,8 +6,7 @@ import os import sys -# ElementTree is included to build it from Element which is already parsed XML, it is not used to parse anything. To -# eliminate a risk of it to be used to parse XML in future development defusedxml.defuse_stdlib() is called +# ElementTree is included to build it from Element which is already parsed XML, it is not used for parsing. from xml.etree.ElementTree import ElementTree # nosec from defusedxml import defuse_stdlib from defusedxml.ElementTree import parse @@ -20,6 +19,8 @@ from mo.graph.graph import Node, Graph from mo.utils.ir_engine.compare_graphs import compare_graphs +# To reduce a risk of xml.etree.ElementTree objects to be used to parse XML in future development +# defusedxml.defuse_stdlib() is called to patch xml library with safe methods. defuse_stdlib() log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.DEBUG, stream=sys.stdout) @@ -57,8 +58,7 @@ def __load_xml(self): self.graph = Graph() self.graph.graph['hashes'] = {} - self.graph.graph['ir_version'] = int(xml_root.attrib['version']) if xml_root.attrib.get( - 'version') is not None else None + self.graph.graph['ir_version'] = int(xml_root.attrib['version']) if xml_root.attrib.get('version') is not None else None self.graph.graph['layout'] = 'NCHW' self.graph.name = xml_root.attrib['name'] if xml_root.attrib.get('name') is not None else None @@ -213,11 +213,8 @@ def __load_layer(self, layer): new_attrs = self.__normalize_attrs(attr.attrib) if layer.attrib['type'] == 'Const': assert 'offset' in new_attrs and 'size' in new_attrs, \ - 'Incorrect attributes for Const layer, {} instead of {}!'.format(new_attrs.keys(), - ['offset', 'size']) - new_attrs.update( - self.__prepare_bin_attrs(layer, 0, 'custom', new_attrs['offset'], new_attrs['size'], - layer[1][0].attrib['precision'])) + 'Incorrect attributes for Const layer, {} instead of {}!'.format(new_attrs.keys(), ['offset', 'size']) + new_attrs.update(self.__prepare_bin_attrs(layer, 0, 'custom', new_attrs['offset'], new_attrs['size'], layer[1][0].attrib['precision'])) layer_attrs.update(new_attrs) elif attr.tag == 'input': inputs_counter = len(attr) From 9cd5d7aa05d0c7abe240248326cb4b11608e4f43 Mon Sep 17 00:00:00 2001 From: "Vafin, Maxim" Date: Fri, 21 May 2021 15:06:26 +0300 Subject: [PATCH 3/4] Rework defusing stdlib solution --- .../mo/back/ie_ir_ver_2/emitter.py | 100 +++++++++--------- .../mo/middle/passes/tensor_names.py | 21 ++-- .../mo/utils/ir_engine/ir_engine.py | 10 +- 3 files changed, 62 insertions(+), 69 deletions(-) diff --git a/model-optimizer/mo/back/ie_ir_ver_2/emitter.py b/model-optimizer/mo/back/ie_ir_ver_2/emitter.py index f37686a616f5f1..fe79a672e53e7c 100644 --- a/model-optimizer/mo/back/ie_ir_ver_2/emitter.py +++ b/model-optimizer/mo/back/ie_ir_ver_2/emitter.py @@ -2,11 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 import hashlib -# Objects from xml.etree.ElementTree are imported to modify XML and aren't used to parse. -from xml.etree.ElementTree import Element, SubElement, tostring # nosec from defusedxml import defuse_stdlib -from defusedxml.ElementTree import parse +import defusedxml.ElementTree as ET from defusedxml.minidom import parseString from mo.graph.graph import * @@ -15,9 +13,7 @@ from mo.utils.utils import refer_to_faq_msg from mo.utils.version import get_version -# To reduce a risk of xml.etree.ElementTree objects to be used to parse XML in future development -# defusedxml.defuse_stdlib() is called to patch xml library with safe methods. -defuse_stdlib() +ET_defused = defuse_stdlib()[ET] def serialize_constants(graph: Graph, bin_file_name: str, data_type=np.float32): @@ -110,9 +106,9 @@ def serialize_mean_image(bin_file_name: str, mean_data=[]): return mean_offset, mean_size -def xml_shape(shape: np.ndarray, element: Element): +def xml_shape(shape: np.ndarray, element: ET_defused.Element): for d in shape: - dim = SubElement(element, 'dim') + dim = ET_defused.SubElement(element, 'dim') if d < 0: raise Error('The value "{}" for shape is less 0. May be the input shape of the topology is ' 'wrong.'.format(d)) @@ -124,14 +120,14 @@ def xml_shape(shape: np.ndarray, element: Element): dim.text = str(d) -def xml_ports(node: Node, element: Element, edges: Element): +def xml_ports(node: Node, element: ET_defused.Element, edges: ET_defused.Element): # input ports inputs = None # will create input section only if at least one input is available for u, d in node.get_sorted_inputs(): if 'bin' not in d and ('xml_skip' not in d or not d['xml_skip']): if inputs is None: - inputs = SubElement(element, 'input') - port = SubElement(inputs, 'port') + inputs = ET_defused.SubElement(element, 'input') + port = ET_defused.SubElement(inputs, 'port') port.set('id', str(d['in'])) assert node.graph.node[u]['shape'] is not None, 'Input shape is not calculated properly for node {}'.format( node.id) @@ -142,7 +138,7 @@ def xml_ports(node: Node, element: Element, edges: Element): assert (len(in_nodes) <= 1) if len(in_nodes) == 1: src, _, out_attrs = in_nodes[0] - edge = SubElement(edges, 'edge') + edge = ET_defused.SubElement(edges, 'edge') edge.set('from-layer', str(src)) edge.set('from-port', str(out_attrs['out'])) edge.set('to-layer', str(node.node)) @@ -154,8 +150,8 @@ def xml_ports(node: Node, element: Element, edges: Element): for v, d in node.get_sorted_outputs(): if 'xml_skip' not in d or not d['xml_skip']: if outputs is None: - outputs = SubElement(element, 'output') - port = SubElement(outputs, 'port') + outputs = ET_defused.SubElement(element, 'output') + port = ET_defused.SubElement(outputs, 'port') port.set('id', str(d['out'])) # we need to check operation type, if it is const op, we don't renumber out ports # because they are already counted from zero @@ -173,13 +169,13 @@ def xml_ports(node: Node, element: Element, edges: Element): xml_shape(node.graph.node[v]['shape'], port) -def xml_consts(graph: Graph, node: Node, element: Element): +def xml_consts(graph: Graph, node: Node, element: ET_defused.Element): blobs = None # sub-element that will be created on-demand for u, d in node.get_sorted_inputs(): if 'bin' in d and (node.type != 'Const'): if not blobs: - blobs = SubElement(element, 'blobs') - const = SubElement(blobs, d['bin']) + blobs = ET_defused.SubElement(element, 'blobs') + const = ET_defused.SubElement(blobs, d['bin']) try: const.set('offset', str(graph.node[u]['offset'])) const.set('size', str(graph.node[u]['size'])) @@ -198,11 +194,11 @@ def serialize_element( graph: Graph, node, schema: list, - parent_element: Element, - edges: Element, + parent_element: ET_defused.Element, + edges: ET_defused.Element, unsupported): name, attrs, subelements = schema - element = SubElement(parent_element, name) + element = ET_defused.SubElement(parent_element, name) for attr in attrs: if isinstance(attr, tuple): key = attr[0] @@ -249,8 +245,8 @@ def serialize_node_attributes( graph: Graph, # the current network graph node, # dictionary-like object that should be serialized schema: list, - parent_element: Element, - edges: Element, + parent_element: ET_defused.Element, + edges: ET_defused.Element, unsupported): # the Result op may be marked so it should not appear in the IR. For example, refer to transformation # model-optimizer/extensions/back/TopKNormalizer.py @@ -287,16 +283,16 @@ def serialize_node_attributes( ) from e -def create_pre_process_block_for_image(net: Element, ref_layer_names: list, mean_offset: tuple, +def create_pre_process_block_for_image(net: ET_defused.Element, ref_layer_names: list, mean_offset: tuple, mean_size: tuple): - pre_process = SubElement(net, 'pre-process') + pre_process = ET_defused.SubElement(net, 'pre-process') pre_process.set('mean-precision', 'FP32') # TODO: to think about need to output FP16 mean values # TODO: extend it for several inputs pre_process.set('reference-layer-name', ref_layer_names[0]) for idx in range(len(mean_size)): - channel_xml = SubElement(pre_process, 'channel') + channel_xml = ET_defused.SubElement(pre_process, 'channel') channel_xml.set('id', str(idx)) - mean_xml = SubElement(channel_xml, 'mean') + mean_xml = ET_defused.SubElement(channel_xml, 'mean') mean_xml.set('offset', str(mean_offset[idx])) mean_xml.set('size', str(mean_size[idx])) @@ -313,18 +309,18 @@ def create_pre_process_block(net, ref_layer_name, means, scales=None): Returns: pre-process XML element """ - pre_process = SubElement(net, 'pre-process') + pre_process = ET_defused.SubElement(net, 'pre-process') pre_process.set('reference-layer-name', ref_layer_name) for idx in range(len(means)): - channel_xml = SubElement(pre_process, 'channel') + channel_xml = ET_defused.SubElement(pre_process, 'channel') channel_xml.set('id', str(idx)) - mean_xml = SubElement(channel_xml, 'mean') + mean_xml = ET_defused.SubElement(channel_xml, 'mean') mean_xml.set('value', str(means[idx])) if scales: - scale_xml = SubElement(channel_xml, 'scale') + scale_xml = ET_defused.SubElement(channel_xml, 'scale') scale_xml.set('value', str(scales[idx])) return pre_process @@ -332,45 +328,45 @@ def create_pre_process_block(net, ref_layer_name, means, scales=None): def add_quantization_statistics(graph, net_element): if 'statistics' in graph.graph: - stats = SubElement(net_element, 'statistics') + stats = ET_defused.SubElement(net_element, 'statistics') for tensor, interval in graph.graph['statistics'].items(): - layer = SubElement(stats, 'layer') - name = SubElement(layer, 'name') + layer = ET_defused.SubElement(stats, 'layer') + name = ET_defused.SubElement(layer, 'name') name.text = tensor - min = SubElement(layer, 'min') + min = ET_defused.SubElement(layer, 'min') min.text = interval['min'] - max = SubElement(layer, 'max') + max = ET_defused.SubElement(layer, 'max') max.text = interval['max'] log.info('Statistics were inserted to IR') -def add_quantization_info_section(net: Element, meta_info: dict): +def add_quantization_info_section(net: ET_defused.Element, meta_info: dict): if 'quantization_parameters' in meta_info: parameters = meta_info['quantization_parameters'] - quant_params = SubElement(net, 'quantization_parameters') + quant_params = ET_defused.SubElement(net, 'quantization_parameters') - config = SubElement(quant_params, 'config') + config = ET_defused.SubElement(quant_params, 'config') config.text = parameters['config'] - version = SubElement(quant_params, 'version') + version = ET_defused.SubElement(quant_params, 'version') version.set('value', parameters['version']) - cli_params = SubElement(quant_params, 'cli_params') + cli_params = ET_defused.SubElement(quant_params, 'cli_params') cli_params.set('value', parameters['cli_params']) -def add_meta_data(net: Element, meta_info: dict): - meta = SubElement(net, 'meta_data') - SubElement(meta, 'MO_version').set('value', get_version()) - parameters = SubElement(meta, 'cli_parameters') - [SubElement(parameters, str(key)).set('value', str(meta_info[key])) for key in sorted(meta_info.keys()) if +def add_meta_data(net: ET_defused.Element, meta_info: dict): + meta = ET_defused.SubElement(net, 'meta_data') + ET_defused.SubElement(meta, 'MO_version').set('value', get_version()) + parameters = ET_defused.SubElement(meta, 'cli_parameters') + [ET_defused.SubElement(parameters, str(key)).set('value', str(meta_info[key])) for key in sorted(meta_info.keys()) if key not in ('unset', 'quantization_parameters')] - SubElement(parameters, 'unset').set('unset_cli_parameters', ', '.join(sorted(meta_info['unset']))) + ET_defused.SubElement(parameters, 'unset').set('unset_cli_parameters', ', '.join(sorted(meta_info['unset']))) def serialize_network(graph, net_element, unsupported): - layers = SubElement(net_element, 'layers') - edges = SubElement(net_element, 'edges') + layers = ET_defused.SubElement(net_element, 'layers') + edges = ET_defused.SubElement(net_element, 'edges') if graph is None: return nodes = sorted(graph.nodes()) @@ -405,7 +401,7 @@ def generate_ie_ir(graph: Graph, file_name: str, input_names: tuple = (), mean_o mean_offset: offset in binary file, where mean file values start mean_size: size of the mean file """ - net = Element('net') + net = ET_defused.Element('net') net.set('name', graph.name) net.set('version', str((graph.graph['ir_version']))) @@ -422,7 +418,7 @@ def generate_ie_ir(graph: Graph, file_name: str, input_names: tuple = (), mean_o add_quantization_statistics(graph, net) add_meta_data(net, meta_info) add_quantization_info_section(net, meta_info) - xml_string = tostring(net) + xml_string = ET_defused.tostring(net) xml_doc = parseString(xml_string) pretty_xml_as_string = xml_doc.toprettyxml() if len(unsupported.unsupported): @@ -451,7 +447,7 @@ def append_ir_info(file: str, meta_info: dict = dict(), mean_data: [list, None] path_to_xml = file + ".xml" path_to_bin = file + ".bin" - et = parse(path_to_xml) + et = ET.parse(path_to_xml) net = et.getroot() if mean_data: @@ -466,6 +462,6 @@ def append_ir_info(file: str, meta_info: dict = dict(), mean_data: [list, None] if elem.tail: elem.tail = elem.tail.strip() - pretty_xml_as_string = parseString(tostring(net)).toprettyxml() + pretty_xml_as_string = parseString(ET_defused.tostring(net)).toprettyxml() with open(path_to_xml, 'wb') as file: file.write(bytes(pretty_xml_as_string, "UTF-8")) diff --git a/model-optimizer/mo/middle/passes/tensor_names.py b/model-optimizer/mo/middle/passes/tensor_names.py index 5b09d8e8f2401e..44d6b605edb6bd 100644 --- a/model-optimizer/mo/middle/passes/tensor_names.py +++ b/model-optimizer/mo/middle/passes/tensor_names.py @@ -1,18 +1,12 @@ # Copyright (C) 2018-2021 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# Objects from xml.etree.ElementTree are imported to modify XML and aren't used to parse. -from xml.etree.ElementTree import Element, SubElement, tostring # nosec - from defusedxml import defuse_stdlib from defusedxml.minidom import parseString +import defusedxml.ElementTree as ET from mo.graph.graph import Node, Graph -# To eliminate a risk of xml.etree.ElementTree objects to be used to parse XML in future development -# defusedxml.defuse_stdlib() is called to patch xml library with safe methods. -defuse_stdlib() - def propagate_op_name_to_tensor(graph: Graph): for node in graph.nodes(): @@ -26,7 +20,10 @@ def propagate_op_name_to_tensor(graph: Graph): def output_tensor_names_map(graph: Graph, xml_file_name: str): - mapping = Element('mapping') + defused = defuse_stdlib() + ET_defused = defused[ET] + + mapping = ET_defused.Element('mapping') for node in graph: node = Node(graph, node) if node.has_valid('fw_tensor_debug_info') and node.has_valid('ie_tensor_name'): @@ -34,9 +31,9 @@ def output_tensor_names_map(graph: Graph, xml_file_name: str): # Check that debug info has valid fw attrs if not all(attr is not None for attr in fw_tensor_debug_info): continue - map = SubElement(mapping, 'map') - fw = SubElement(map, 'framework') - ie = SubElement(map, 'IR') + map = ET_defused.SubElement(mapping, 'map') + fw = ET_defused.SubElement(map, 'framework') + ie = ET_defused.SubElement(map, 'IR') fw.set('name', fw_tensor_debug_info[0]) fw.set('out_port_id', str(fw_tensor_debug_info[1])) @@ -48,4 +45,4 @@ def output_tensor_names_map(graph: Graph, xml_file_name: str): if node.has_valid('ie_tensor_id'): ie.set('id', str(node.ie_tensor_id)) with open(xml_file_name, 'w') as file: - file.write(parseString(tostring(mapping)).toprettyxml()) + file.write(parseString(ET_defused.tostring(mapping)).toprettyxml()) diff --git a/model-optimizer/mo/utils/ir_engine/ir_engine.py b/model-optimizer/mo/utils/ir_engine/ir_engine.py index 8223f317097cc5..5816cd98bd0587 100644 --- a/model-optimizer/mo/utils/ir_engine/ir_engine.py +++ b/model-optimizer/mo/utils/ir_engine/ir_engine.py @@ -6,10 +6,8 @@ import os import sys -# ElementTree is included to build it from Element which is already parsed XML, it is not used for parsing. -from xml.etree.ElementTree import ElementTree # nosec from defusedxml import defuse_stdlib -from defusedxml.ElementTree import parse +import defusedxml.ElementTree as ET from argparse import Namespace from collections import namedtuple, defaultdict from pathlib import Path @@ -46,7 +44,7 @@ def __init__(self, path_to_xml: str, path_to_bin=None, precision="FP32", xml_tre self.__load_ir() def __load_xml(self): - xml_tree = self.xml_tree or parse(self.path_to_xml) + xml_tree = self.xml_tree or ET.parse(self.path_to_xml) xml_root = xml_tree.getroot() xml_layers = {} xml_edges = [] @@ -242,9 +240,11 @@ def __load_layer(self, layer): xml_body_child = list(layer.iterfind('body')) assert len(xml_body_child) == 1 + ET_defused = defuse_stdlib()[ET] + body_ir = IREngine(path_to_xml=None, path_to_bin=self.path_to_bin, - xml_tree=ElementTree(xml_body_child[0])) + xml_tree=ET_defused.ElementTree(xml_body_child[0])) self.graph.graph['hashes'].update(body_ir.graph.graph['hashes']) # Find port_map section and take an input_port_map & output_port_map From e0e311cab76a8e015145b3a02eb69aeb6551e6ff Mon Sep 17 00:00:00 2001 From: "Vafin, Maxim" Date: Tue, 25 May 2021 14:49:00 +0300 Subject: [PATCH 4/4] Apply review feedback --- .../mo/back/ie_ir_ver_2/emitter.py | 95 ++++++++++--------- .../mo/middle/passes/tensor_names.py | 20 ++-- .../mo/utils/ir_engine/ir_engine.py | 11 +-- 3 files changed, 66 insertions(+), 60 deletions(-) diff --git a/model-optimizer/mo/back/ie_ir_ver_2/emitter.py b/model-optimizer/mo/back/ie_ir_ver_2/emitter.py index fe79a672e53e7c..b0cd0d53f32a3b 100644 --- a/model-optimizer/mo/back/ie_ir_ver_2/emitter.py +++ b/model-optimizer/mo/back/ie_ir_ver_2/emitter.py @@ -13,7 +13,12 @@ from mo.utils.utils import refer_to_faq_msg from mo.utils.version import get_version +# defuse_stdlib provide patched version of xml.etree.ElementTree which allows to use objects from xml.etree.ElementTree +# in a safe manner without including unsafe xml.etree.ElementTree ET_defused = defuse_stdlib()[ET] +Element = ET_defused.Element +SubElement = ET_defused.SubElement +tostring = ET_defused.tostring def serialize_constants(graph: Graph, bin_file_name: str, data_type=np.float32): @@ -106,9 +111,9 @@ def serialize_mean_image(bin_file_name: str, mean_data=[]): return mean_offset, mean_size -def xml_shape(shape: np.ndarray, element: ET_defused.Element): +def xml_shape(shape: np.ndarray, element: Element): for d in shape: - dim = ET_defused.SubElement(element, 'dim') + dim = SubElement(element, 'dim') if d < 0: raise Error('The value "{}" for shape is less 0. May be the input shape of the topology is ' 'wrong.'.format(d)) @@ -120,14 +125,14 @@ def xml_shape(shape: np.ndarray, element: ET_defused.Element): dim.text = str(d) -def xml_ports(node: Node, element: ET_defused.Element, edges: ET_defused.Element): +def xml_ports(node: Node, element: Element, edges: Element): # input ports inputs = None # will create input section only if at least one input is available for u, d in node.get_sorted_inputs(): if 'bin' not in d and ('xml_skip' not in d or not d['xml_skip']): if inputs is None: - inputs = ET_defused.SubElement(element, 'input') - port = ET_defused.SubElement(inputs, 'port') + inputs = SubElement(element, 'input') + port = SubElement(inputs, 'port') port.set('id', str(d['in'])) assert node.graph.node[u]['shape'] is not None, 'Input shape is not calculated properly for node {}'.format( node.id) @@ -138,7 +143,7 @@ def xml_ports(node: Node, element: ET_defused.Element, edges: ET_defused.Element assert (len(in_nodes) <= 1) if len(in_nodes) == 1: src, _, out_attrs = in_nodes[0] - edge = ET_defused.SubElement(edges, 'edge') + edge = SubElement(edges, 'edge') edge.set('from-layer', str(src)) edge.set('from-port', str(out_attrs['out'])) edge.set('to-layer', str(node.node)) @@ -150,8 +155,8 @@ def xml_ports(node: Node, element: ET_defused.Element, edges: ET_defused.Element for v, d in node.get_sorted_outputs(): if 'xml_skip' not in d or not d['xml_skip']: if outputs is None: - outputs = ET_defused.SubElement(element, 'output') - port = ET_defused.SubElement(outputs, 'port') + outputs = SubElement(element, 'output') + port = SubElement(outputs, 'port') port.set('id', str(d['out'])) # we need to check operation type, if it is const op, we don't renumber out ports # because they are already counted from zero @@ -169,13 +174,13 @@ def xml_ports(node: Node, element: ET_defused.Element, edges: ET_defused.Element xml_shape(node.graph.node[v]['shape'], port) -def xml_consts(graph: Graph, node: Node, element: ET_defused.Element): +def xml_consts(graph: Graph, node: Node, element: Element): blobs = None # sub-element that will be created on-demand for u, d in node.get_sorted_inputs(): if 'bin' in d and (node.type != 'Const'): if not blobs: - blobs = ET_defused.SubElement(element, 'blobs') - const = ET_defused.SubElement(blobs, d['bin']) + blobs = SubElement(element, 'blobs') + const = SubElement(blobs, d['bin']) try: const.set('offset', str(graph.node[u]['offset'])) const.set('size', str(graph.node[u]['size'])) @@ -194,11 +199,11 @@ def serialize_element( graph: Graph, node, schema: list, - parent_element: ET_defused.Element, - edges: ET_defused.Element, + parent_element: Element, + edges: Element, unsupported): name, attrs, subelements = schema - element = ET_defused.SubElement(parent_element, name) + element = SubElement(parent_element, name) for attr in attrs: if isinstance(attr, tuple): key = attr[0] @@ -245,8 +250,8 @@ def serialize_node_attributes( graph: Graph, # the current network graph node, # dictionary-like object that should be serialized schema: list, - parent_element: ET_defused.Element, - edges: ET_defused.Element, + parent_element: Element, + edges: Element, unsupported): # the Result op may be marked so it should not appear in the IR. For example, refer to transformation # model-optimizer/extensions/back/TopKNormalizer.py @@ -283,16 +288,16 @@ def serialize_node_attributes( ) from e -def create_pre_process_block_for_image(net: ET_defused.Element, ref_layer_names: list, mean_offset: tuple, +def create_pre_process_block_for_image(net: Element, ref_layer_names: list, mean_offset: tuple, mean_size: tuple): - pre_process = ET_defused.SubElement(net, 'pre-process') + pre_process = SubElement(net, 'pre-process') pre_process.set('mean-precision', 'FP32') # TODO: to think about need to output FP16 mean values # TODO: extend it for several inputs pre_process.set('reference-layer-name', ref_layer_names[0]) for idx in range(len(mean_size)): - channel_xml = ET_defused.SubElement(pre_process, 'channel') + channel_xml = SubElement(pre_process, 'channel') channel_xml.set('id', str(idx)) - mean_xml = ET_defused.SubElement(channel_xml, 'mean') + mean_xml = SubElement(channel_xml, 'mean') mean_xml.set('offset', str(mean_offset[idx])) mean_xml.set('size', str(mean_size[idx])) @@ -309,18 +314,18 @@ def create_pre_process_block(net, ref_layer_name, means, scales=None): Returns: pre-process XML element """ - pre_process = ET_defused.SubElement(net, 'pre-process') + pre_process = SubElement(net, 'pre-process') pre_process.set('reference-layer-name', ref_layer_name) for idx in range(len(means)): - channel_xml = ET_defused.SubElement(pre_process, 'channel') + channel_xml = SubElement(pre_process, 'channel') channel_xml.set('id', str(idx)) - mean_xml = ET_defused.SubElement(channel_xml, 'mean') + mean_xml = SubElement(channel_xml, 'mean') mean_xml.set('value', str(means[idx])) if scales: - scale_xml = ET_defused.SubElement(channel_xml, 'scale') + scale_xml = SubElement(channel_xml, 'scale') scale_xml.set('value', str(scales[idx])) return pre_process @@ -328,45 +333,45 @@ def create_pre_process_block(net, ref_layer_name, means, scales=None): def add_quantization_statistics(graph, net_element): if 'statistics' in graph.graph: - stats = ET_defused.SubElement(net_element, 'statistics') + stats = SubElement(net_element, 'statistics') for tensor, interval in graph.graph['statistics'].items(): - layer = ET_defused.SubElement(stats, 'layer') - name = ET_defused.SubElement(layer, 'name') + layer = SubElement(stats, 'layer') + name = SubElement(layer, 'name') name.text = tensor - min = ET_defused.SubElement(layer, 'min') + min = SubElement(layer, 'min') min.text = interval['min'] - max = ET_defused.SubElement(layer, 'max') + max = SubElement(layer, 'max') max.text = interval['max'] log.info('Statistics were inserted to IR') -def add_quantization_info_section(net: ET_defused.Element, meta_info: dict): +def add_quantization_info_section(net: Element, meta_info: dict): if 'quantization_parameters' in meta_info: parameters = meta_info['quantization_parameters'] - quant_params = ET_defused.SubElement(net, 'quantization_parameters') + quant_params = SubElement(net, 'quantization_parameters') - config = ET_defused.SubElement(quant_params, 'config') + config = SubElement(quant_params, 'config') config.text = parameters['config'] - version = ET_defused.SubElement(quant_params, 'version') + version = SubElement(quant_params, 'version') version.set('value', parameters['version']) - cli_params = ET_defused.SubElement(quant_params, 'cli_params') + cli_params = SubElement(quant_params, 'cli_params') cli_params.set('value', parameters['cli_params']) -def add_meta_data(net: ET_defused.Element, meta_info: dict): - meta = ET_defused.SubElement(net, 'meta_data') - ET_defused.SubElement(meta, 'MO_version').set('value', get_version()) - parameters = ET_defused.SubElement(meta, 'cli_parameters') - [ET_defused.SubElement(parameters, str(key)).set('value', str(meta_info[key])) for key in sorted(meta_info.keys()) if +def add_meta_data(net: Element, meta_info: dict): + meta = SubElement(net, 'meta_data') + SubElement(meta, 'MO_version').set('value', get_version()) + parameters = SubElement(meta, 'cli_parameters') + [SubElement(parameters, str(key)).set('value', str(meta_info[key])) for key in sorted(meta_info.keys()) if key not in ('unset', 'quantization_parameters')] - ET_defused.SubElement(parameters, 'unset').set('unset_cli_parameters', ', '.join(sorted(meta_info['unset']))) + SubElement(parameters, 'unset').set('unset_cli_parameters', ', '.join(sorted(meta_info['unset']))) def serialize_network(graph, net_element, unsupported): - layers = ET_defused.SubElement(net_element, 'layers') - edges = ET_defused.SubElement(net_element, 'edges') + layers = SubElement(net_element, 'layers') + edges = SubElement(net_element, 'edges') if graph is None: return nodes = sorted(graph.nodes()) @@ -401,7 +406,7 @@ def generate_ie_ir(graph: Graph, file_name: str, input_names: tuple = (), mean_o mean_offset: offset in binary file, where mean file values start mean_size: size of the mean file """ - net = ET_defused.Element('net') + net = Element('net') net.set('name', graph.name) net.set('version', str((graph.graph['ir_version']))) @@ -418,7 +423,7 @@ def generate_ie_ir(graph: Graph, file_name: str, input_names: tuple = (), mean_o add_quantization_statistics(graph, net) add_meta_data(net, meta_info) add_quantization_info_section(net, meta_info) - xml_string = ET_defused.tostring(net) + xml_string = tostring(net) xml_doc = parseString(xml_string) pretty_xml_as_string = xml_doc.toprettyxml() if len(unsupported.unsupported): @@ -462,6 +467,6 @@ def append_ir_info(file: str, meta_info: dict = dict(), mean_data: [list, None] if elem.tail: elem.tail = elem.tail.strip() - pretty_xml_as_string = parseString(ET_defused.tostring(net)).toprettyxml() + pretty_xml_as_string = parseString(tostring(net)).toprettyxml() with open(path_to_xml, 'wb') as file: file.write(bytes(pretty_xml_as_string, "UTF-8")) diff --git a/model-optimizer/mo/middle/passes/tensor_names.py b/model-optimizer/mo/middle/passes/tensor_names.py index 44d6b605edb6bd..d8bb5591e0c783 100644 --- a/model-optimizer/mo/middle/passes/tensor_names.py +++ b/model-optimizer/mo/middle/passes/tensor_names.py @@ -7,6 +7,13 @@ from mo.graph.graph import Node, Graph +# defuse_stdlib provide patched version of xml.etree.ElementTree which allows to use objects from xml.etree.ElementTree +# in a safe manner without including unsafe xml.etree.ElementTree +ET_defused = defuse_stdlib()[ET] +Element = ET_defused.Element +SubElement = ET_defused.SubElement +tostring = ET_defused.tostring + def propagate_op_name_to_tensor(graph: Graph): for node in graph.nodes(): @@ -20,10 +27,7 @@ def propagate_op_name_to_tensor(graph: Graph): def output_tensor_names_map(graph: Graph, xml_file_name: str): - defused = defuse_stdlib() - ET_defused = defused[ET] - - mapping = ET_defused.Element('mapping') + mapping = Element('mapping') for node in graph: node = Node(graph, node) if node.has_valid('fw_tensor_debug_info') and node.has_valid('ie_tensor_name'): @@ -31,9 +35,9 @@ def output_tensor_names_map(graph: Graph, xml_file_name: str): # Check that debug info has valid fw attrs if not all(attr is not None for attr in fw_tensor_debug_info): continue - map = ET_defused.SubElement(mapping, 'map') - fw = ET_defused.SubElement(map, 'framework') - ie = ET_defused.SubElement(map, 'IR') + map = SubElement(mapping, 'map') + fw = SubElement(map, 'framework') + ie = SubElement(map, 'IR') fw.set('name', fw_tensor_debug_info[0]) fw.set('out_port_id', str(fw_tensor_debug_info[1])) @@ -45,4 +49,4 @@ def output_tensor_names_map(graph: Graph, xml_file_name: str): if node.has_valid('ie_tensor_id'): ie.set('id', str(node.ie_tensor_id)) with open(xml_file_name, 'w') as file: - file.write(parseString(ET_defused.tostring(mapping)).toprettyxml()) + file.write(parseString(tostring(mapping)).toprettyxml()) diff --git a/model-optimizer/mo/utils/ir_engine/ir_engine.py b/model-optimizer/mo/utils/ir_engine/ir_engine.py index 5816cd98bd0587..35d95486c5f734 100644 --- a/model-optimizer/mo/utils/ir_engine/ir_engine.py +++ b/model-optimizer/mo/utils/ir_engine/ir_engine.py @@ -17,12 +17,11 @@ from mo.graph.graph import Node, Graph from mo.utils.ir_engine.compare_graphs import compare_graphs -# To reduce a risk of xml.etree.ElementTree objects to be used to parse XML in future development -# defusedxml.defuse_stdlib() is called to patch xml library with safe methods. -defuse_stdlib() - log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.DEBUG, stream=sys.stdout) +# defuse_stdlib provide patched version of xml.etree.ElementTree which allows to use objects from xml.etree.ElementTree +# in a safe manner without including unsafe xml.etree.ElementTree +ElementTree = defuse_stdlib()[ET].ElementTree class IREngine(object): def __init__(self, path_to_xml: str, path_to_bin=None, precision="FP32", xml_tree=None): @@ -240,11 +239,9 @@ def __load_layer(self, layer): xml_body_child = list(layer.iterfind('body')) assert len(xml_body_child) == 1 - ET_defused = defuse_stdlib()[ET] - body_ir = IREngine(path_to_xml=None, path_to_bin=self.path_to_bin, - xml_tree=ET_defused.ElementTree(xml_body_child[0])) + xml_tree=ElementTree(xml_body_child[0])) self.graph.graph['hashes'].update(body_ir.graph.graph['hashes']) # Find port_map section and take an input_port_map & output_port_map