Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix security issue with XML parsing #5686

Merged
merged 5 commits into from
May 26, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions model-optimizer/mo/back/ie_ir_ver_2/emitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@
# SPDX-License-Identifier: Apache-2.0

import hashlib
from xml.etree.ElementTree import Element, SubElement, tostring, ElementTree
# xml.etree.ElementTree is imported to modify XML, it is not used to parse. To eliminate a risk of it to be used to
# parse XML in future development defusedxml.defuse_stdlib() is called
from xml.etree.ElementTree import Element, SubElement, tostring # nosec

from defusedxml import defuse_stdlib
from defusedxml.ElementTree import parse
from defusedxml.minidom import parseString

from mo.graph.graph import *
Expand All @@ -12,6 +16,8 @@
from mo.utils.utils import refer_to_faq_msg
from mo.utils.version import get_version

defuse_stdlib()
mvafin marked this conversation as resolved.
Show resolved Hide resolved


def serialize_constants(graph: Graph, bin_file_name: str, data_type=np.float32):
"""
Expand Down Expand Up @@ -444,8 +450,7 @@ def append_ir_info(file: str, meta_info: dict = dict(), mean_data: [list, None]
path_to_xml = file + ".xml"
path_to_bin = file + ".bin"

et = ElementTree()
et.parse(path_to_xml)
et = parse(path_to_xml)
net = et.getroot()

if mean_data:
Expand All @@ -462,4 +467,4 @@ def append_ir_info(file: str, meta_info: dict = dict(), mean_data: [list, None]

pretty_xml_as_string = parseString(tostring(net)).toprettyxml()
with open(path_to_xml, 'wb') as file:
file.write(bytes(pretty_xml_as_string, "UTF-8"))
file.write(bytes(pretty_xml_as_string, "UTF-8"))
7 changes: 6 additions & 1 deletion model-optimizer/mo/middle/passes/tensor_names.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

from xml.etree.ElementTree import Element, SubElement, tostring
# xml.etree.ElementTree is imported to modify XML, it is not used to parse. To eliminate a risk of it to be used to
# parse XML in future development defusedxml.defuse_stdlib() is called
from xml.etree.ElementTree import Element, SubElement, tostring # nosec

from defusedxml import defuse_stdlib
from defusedxml.minidom import parseString

from mo.graph.graph import Node, Graph

defuse_stdlib()


def propagate_op_name_to_tensor(graph: Graph):
for node in graph.nodes():
Expand Down
24 changes: 17 additions & 7 deletions model-optimizer/mo/utils/ir_engine/ir_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@
import logging as log
import os
import sys
import xml.etree.ElementTree as ET

# ElementTree is included to build it from Element which is already parsed XML, it is not used to parse anything. To
# eliminate a risk of it to be used to parse XML in future development defusedxml.defuse_stdlib() is called
from xml.etree.ElementTree import ElementTree # nosec
from defusedxml import defuse_stdlib
from defusedxml.ElementTree import parse
from argparse import Namespace
from collections import namedtuple, defaultdict
from pathlib import Path
Expand All @@ -15,6 +20,8 @@
from mo.graph.graph import Node, Graph
from mo.utils.ir_engine.compare_graphs import compare_graphs

defuse_stdlib()

log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.DEBUG, stream=sys.stdout)


Expand All @@ -38,7 +45,7 @@ def __init__(self, path_to_xml: str, path_to_bin=None, precision="FP32", xml_tre
self.__load_ir()

def __load_xml(self):
xml_tree = self.xml_tree or ET.parse(self.path_to_xml)
xml_tree = self.xml_tree or parse(self.path_to_xml)
xml_root = xml_tree.getroot()
xml_layers = {}
xml_edges = []
Expand All @@ -50,7 +57,8 @@ def __load_xml(self):
self.graph = Graph()
self.graph.graph['hashes'] = {}

self.graph.graph['ir_version'] = int(xml_root.attrib['version']) if xml_root.attrib.get('version') is not None else None
self.graph.graph['ir_version'] = int(xml_root.attrib['version']) if xml_root.attrib.get(
'version') is not None else None
mvafin marked this conversation as resolved.
Show resolved Hide resolved
self.graph.graph['layout'] = 'NCHW'
self.graph.name = xml_root.attrib['name'] if xml_root.attrib.get('name') is not None else None

Expand Down Expand Up @@ -88,7 +96,6 @@ def __load_xml(self):
elif elem.tag in ['version', 'cli_params']:
self.meta_data['quantization_parameters'][elem.tag] = elem.attrib['value']


self.graph.graph['cmd_params'] = Namespace(**self.meta_data) # TODO check what we need all this attrs

if len(statistics):
Expand Down Expand Up @@ -206,8 +213,11 @@ def __load_layer(self, layer):
new_attrs = self.__normalize_attrs(attr.attrib)
if layer.attrib['type'] == 'Const':
assert 'offset' in new_attrs and 'size' in new_attrs, \
'Incorrect attributes for Const layer, {} instead of {}!'.format(new_attrs.keys(), ['offset', 'size'])
new_attrs.update(self.__prepare_bin_attrs(layer, 0, 'custom', new_attrs['offset'], new_attrs['size'], layer[1][0].attrib['precision']))
'Incorrect attributes for Const layer, {} instead of {}!'.format(new_attrs.keys(),
['offset', 'size'])
new_attrs.update(
self.__prepare_bin_attrs(layer, 0, 'custom', new_attrs['offset'], new_attrs['size'],
layer[1][0].attrib['precision']))
mvafin marked this conversation as resolved.
Show resolved Hide resolved
layer_attrs.update(new_attrs)
elif attr.tag == 'input':
inputs_counter = len(attr)
Expand Down Expand Up @@ -237,7 +247,7 @@ def __load_layer(self, layer):

body_ir = IREngine(path_to_xml=None,
path_to_bin=self.path_to_bin,
xml_tree=ET.ElementTree(xml_body_child[0]))
xml_tree=ElementTree(xml_body_child[0]))
self.graph.graph['hashes'].update(body_ir.graph.graph['hashes'])

# Find port_map section and take an input_port_map & output_port_map
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import os
import unittest
import tempfile

from mo.utils.ir_reader.restore_graph import restore_graph_from_ir
from defusedxml.common import EntitiesForbidden


class TestIRReader(unittest.TestCase):
def setUp(self):
self.xml_bomb = b'<?xml version="1.0"?>\n' \
b'<!DOCTYPE lolz [\n' \
b' <!ENTITY lol "lol">\n' \
b' <!ELEMENT lolz (#PCDATA)>\n' \
b' <!ENTITY lol1 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">\n' \
b' <!ENTITY lol2 "&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;">\n' \
b' <!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">\n' \
b' <!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">\n' \
b' <!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;">\n' \
b' <!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;">\n' \
b' <!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;">\n' \
b' <!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;">\n' \
b' <!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;">\n' \
b']>\n' \
b'<lolz>&lol9;</lolz>'

def test_read_xml_bomb(self):
bomb_file = tempfile.NamedTemporaryFile(delete=False)
bomb_file.write(self.xml_bomb)
bomb_file.close()
self.assertRaises(EntitiesForbidden, restore_graph_from_ir, bomb_file.name)
os.remove(bomb_file.name)