From 4bb9bbefd2bdf952868fab1d011b19e622d141a1 Mon Sep 17 00:00:00 2001 From: "Vafin, Maxim" Date: Wed, 19 May 2021 01:06:40 +0300 Subject: [PATCH] Fix security issue with XML parsing --- .../mo/back/ie_ir_ver_2/emitter.py | 13 ++++++---- .../mo/middle/passes/tensor_names.py | 7 +++++- .../mo/utils/ir_engine/ir_engine.py | 24 +++++++++++++------ 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/model-optimizer/mo/back/ie_ir_ver_2/emitter.py b/model-optimizer/mo/back/ie_ir_ver_2/emitter.py index 55ead23a271f51..f8a6d9e4c4a1c4 100644 --- a/model-optimizer/mo/back/ie_ir_ver_2/emitter.py +++ b/model-optimizer/mo/back/ie_ir_ver_2/emitter.py @@ -2,8 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 import hashlib -from xml.etree.ElementTree import Element, SubElement, tostring, ElementTree +# xml.etree.ElementTree is imported to modify XML, it is not used to parse. To eliminate a risk of it to be used to +# parse XML in future development defusedxml.defuse_stdlib() is called +from xml.etree.ElementTree import Element, SubElement, tostring # nosec +from defusedxml import defuse_stdlib +from defusedxml.ElementTree import parse from defusedxml.minidom import parseString from mo.graph.graph import * @@ -12,6 +16,8 @@ from mo.utils.utils import refer_to_faq_msg from mo.utils.version import get_version +defuse_stdlib() + def serialize_constants(graph: Graph, bin_file_name: str, data_type=np.float32): """ @@ -444,8 +450,7 @@ def append_ir_info(file: str, meta_info: dict = dict(), mean_data: [list, None] path_to_xml = file + ".xml" path_to_bin = file + ".bin" - et = ElementTree() - et.parse(path_to_xml) + et = parse(path_to_xml) net = et.getroot() if mean_data: @@ -462,4 +467,4 @@ def append_ir_info(file: str, meta_info: dict = dict(), mean_data: [list, None] pretty_xml_as_string = parseString(tostring(net)).toprettyxml() with open(path_to_xml, 'wb') as file: - file.write(bytes(pretty_xml_as_string, "UTF-8")) \ No newline at end of file + file.write(bytes(pretty_xml_as_string, "UTF-8")) diff --git a/model-optimizer/mo/middle/passes/tensor_names.py b/model-optimizer/mo/middle/passes/tensor_names.py index 4e291023679ff5..e71f618ed06277 100644 --- a/model-optimizer/mo/middle/passes/tensor_names.py +++ b/model-optimizer/mo/middle/passes/tensor_names.py @@ -1,12 +1,17 @@ # Copyright (C) 2018-2021 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from xml.etree.ElementTree import Element, SubElement, tostring +# xml.etree.ElementTree is imported to modify XML, it is not used to parse. To eliminate a risk of it to be used to +# parse XML in future development defusedxml.defuse_stdlib() is called +from xml.etree.ElementTree import Element, SubElement, tostring # nosec +from defusedxml import defuse_stdlib from defusedxml.minidom import parseString from mo.graph.graph import Node, Graph +defuse_stdlib() + def propagate_op_name_to_tensor(graph: Graph): for node in graph.nodes(): diff --git a/model-optimizer/mo/utils/ir_engine/ir_engine.py b/model-optimizer/mo/utils/ir_engine/ir_engine.py index b124e6d30046a6..439e954019000b 100644 --- a/model-optimizer/mo/utils/ir_engine/ir_engine.py +++ b/model-optimizer/mo/utils/ir_engine/ir_engine.py @@ -5,7 +5,12 @@ import logging as log import os import sys -import xml.etree.ElementTree as ET + +# ElementTree is included to build it from Element which is already parsed XML, it is not used to parse anything. To +# eliminate a risk of it to be used to parse XML in future development defusedxml.defuse_stdlib() is called +from xml.etree.ElementTree import ElementTree # nosec +from defusedxml import defuse_stdlib +from defusedxml.ElementTree import parse from argparse import Namespace from collections import namedtuple, defaultdict from pathlib import Path @@ -15,6 +20,8 @@ from mo.graph.graph import Node, Graph from mo.utils.ir_engine.compare_graphs import compare_graphs +defuse_stdlib() + log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.DEBUG, stream=sys.stdout) @@ -38,7 +45,7 @@ def __init__(self, path_to_xml: str, path_to_bin=None, precision="FP32", xml_tre self.__load_ir() def __load_xml(self): - xml_tree = self.xml_tree or ET.parse(self.path_to_xml) + xml_tree = self.xml_tree or parse(self.path_to_xml) xml_root = xml_tree.getroot() xml_layers = {} xml_edges = [] @@ -50,7 +57,8 @@ def __load_xml(self): self.graph = Graph() self.graph.graph['hashes'] = {} - self.graph.graph['ir_version'] = int(xml_root.attrib['version']) if xml_root.attrib.get('version') is not None else None + self.graph.graph['ir_version'] = int(xml_root.attrib['version']) if xml_root.attrib.get( + 'version') is not None else None self.graph.graph['layout'] = 'NCHW' self.graph.name = xml_root.attrib['name'] if xml_root.attrib.get('name') is not None else None @@ -88,7 +96,6 @@ def __load_xml(self): elif elem.tag in ['version', 'cli_params']: self.meta_data['quantization_parameters'][elem.tag] = elem.attrib['value'] - self.graph.graph['cmd_params'] = Namespace(**self.meta_data) # TODO check what we need all this attrs if len(statistics): @@ -206,8 +213,11 @@ def __load_layer(self, layer): new_attrs = self.__normalize_attrs(attr.attrib) if layer.attrib['type'] == 'Const': assert 'offset' in new_attrs and 'size' in new_attrs, \ - 'Incorrect attributes for Const layer, {} instead of {}!'.format(new_attrs.keys(), ['offset', 'size']) - new_attrs.update(self.__prepare_bin_attrs(layer, 0, 'custom', new_attrs['offset'], new_attrs['size'], layer[1][0].attrib['precision'])) + 'Incorrect attributes for Const layer, {} instead of {}!'.format(new_attrs.keys(), + ['offset', 'size']) + new_attrs.update( + self.__prepare_bin_attrs(layer, 0, 'custom', new_attrs['offset'], new_attrs['size'], + layer[1][0].attrib['precision'])) layer_attrs.update(new_attrs) elif attr.tag == 'input': inputs_counter = len(attr) @@ -237,7 +247,7 @@ def __load_layer(self, layer): body_ir = IREngine(path_to_xml=None, path_to_bin=self.path_to_bin, - xml_tree=ET.ElementTree(xml_body_child[0])) + xml_tree=ElementTree(xml_body_child[0])) self.graph.graph['hashes'].update(body_ir.graph.graph['hashes']) # Find port_map section and take an input_port_map & output_port_map