-
Notifications
You must be signed in to change notification settings - Fork 0
/
myonnx.py
295 lines (270 loc) · 14.6 KB
/
myonnx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
###########################################################################
import itertools
import numpy as np
import onnx
from onnx import helper, numpy_helper
from onnx import onnx_pb as onnx_proto
def _npfloat16_to_int(np_list):
'''
Convert numpy float16 to python int.
:param np_list: numpy float16 list
:return int_list: python int list
'''
return [int(bin(_.view('H'))[2:].zfill(16), 2) for _ in np_list]
def convert_np_to_float16(np_array, min_positive_val=1e-7, max_finite_val=1e4):
'''
Convert float32 numpy array to float16 without changing sign or finiteness.
Positive values less than min_positive_val are mapped to min_positive_val.
Positive finite values greater than max_finite_val are mapped to max_finite_val.
Similar for negative values. NaN, 0, inf, and -inf are unchanged.
'''
def between(a, b, c):
return np.logical_and(a < b, b < c)
np_array = np.where(between(0, np_array, min_positive_val), min_positive_val, np_array)
np_array = np.where(between(-min_positive_val, np_array, 0), -min_positive_val, np_array)
np_array = np.where(between(max_finite_val, np_array, float('inf')), max_finite_val, np_array)
np_array = np.where(between(float('-inf'), np_array, -max_finite_val), -max_finite_val, np_array)
return np.float16(np_array)
def convert_tensor_float_to_float16(tensor, min_positive_val=1e-7, max_finite_val=1e4):
'''
Convert tensor float to float16.
:param tensor: TensorProto object
:return tensor_float16: converted TensorProto object
Example:
::
from onnxmltools.utils.float16_converter import convert_tensor_float_to_float16
new_tensor = convert_tensor_float_to_float16(tensor)
'''
if not isinstance(tensor, onnx_proto.TensorProto):
raise ValueError('Expected input type is an ONNX TensorProto but got %s' % type(tensor))
if tensor.data_type == onnx_proto.TensorProto.FLOAT:
tensor.data_type = onnx_proto.TensorProto.FLOAT16
# convert float_data (float type) to float16 and write to int32_data
if tensor.float_data:
float16_data = convert_np_to_float16(np.array(tensor.float_data), min_positive_val, max_finite_val)
int_list = _npfloat16_to_int(float16_data)
tensor.int32_data[:] = int_list
tensor.float_data[:] = []
# convert raw_data (bytes type)
if tensor.raw_data:
# convert n.raw_data to float
float32_list = np.fromstring(tensor.raw_data, dtype='float32')
# convert float to float16
float16_list = convert_np_to_float16(float32_list, min_positive_val, max_finite_val)
# convert float16 to bytes and write back to raw_data
tensor.raw_data = float16_list.tostring()
return tensor
def make_value_info_from_tensor(tensor):
shape = numpy_helper.to_array(tensor).shape
return helper.make_tensor_value_info(tensor.name, tensor.data_type, shape)
def convert_float_to_float16(model, min_positive_val=1e-7, max_finite_val=1e4,
keep_io_types=False, disable_shape_infer=False):
'''
Convert tensor float type in the ONNX ModelProto input to tensor float16.
:param model: ONNX ModelProto object
:param disable_shape_infer: Type/shape information is needed for conversion to work.
Set to True only if the model already has type/shape information for all tensors.
:return: converted ONNX ModelProto object
Examples:
::
Example 1: Convert ONNX ModelProto object:
from onnxmltools.utils.float16_converter import convert_float_to_float16
new_onnx_model = convert_float_to_float16(onnx_model)
Example 2: Convert ONNX model binary file:
from onnxmltools.utils.float16_converter import convert_float_to_float16
from onnxmltools.utils import load_model, save_model
onnx_model = load_model('model.onnx')
new_onnx_model = convert_float_to_float16(onnx_model)
save_model(new_onnx_model, 'new_model.onnx')
'''
func_infer_shape = None
if not disable_shape_infer and onnx.__version__ >= '1.2':
try:
from onnx.shape_inference import infer_shapes
func_infer_shape = infer_shapes
finally:
pass
if not isinstance(model, onnx_proto.ModelProto):
raise ValueError('Expected model type is an ONNX ModelProto but got %s' % type(model))
# create black list
op_black_list = ['ArrayFeatureExtractor', 'Binarizer', 'CastMap', 'CategoryMapper', 'DictVectorizer',
'FeatureVectorizer', 'Imputer', 'LabelEncoder', 'LinearClassifier', 'LinearRegressor',
'Normalizer', 'OneHotEncoder', 'SVMClassifier', 'SVMRegressor', 'Scaler', 'TreeEnsembleClassifier',
'TreeEnsembleRegressor', 'ZipMap', 'NonMaxSuppression', 'TopK', 'RoiAlign', 'Resize',
'Range', 'CumSum', 'Min', 'Max', 'Upsample']
# create a queue for BFS
queue = []
value_info_list = []
node_list = []
# type inference on input model
if func_infer_shape is not None:
model = func_infer_shape(model)
queue.append(model)
name_mapping = {}
graph_io_to_skip = set()
io_casts = set()
if keep_io_types:
for i, n in enumerate(model.graph.input):
if n.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT:
output_name = 'graph_input_cast_' + str(i)
name_mapping[n.name] = output_name
graph_io_to_skip.add(n.name)
node_name = 'graph_input_cast' + str(i)
new_value_info = model.graph.value_info.add()
new_value_info.CopyFrom(n)
new_value_info.name = output_name
new_value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16
# add Cast node (from tensor(float) to tensor(float16) after graph input
new_node = [helper.make_node('Cast', [n.name], [output_name], to=10, name=node_name)]
model.graph.node.extend(new_node)
value_info_list.append(new_value_info)
io_casts.add(node_name)
for i, n in enumerate(model.graph.output):
if n.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT:
input_name = 'graph_output_cast_' + str(i)
name_mapping[n.name] = input_name
graph_io_to_skip.add(n.name)
node_name = 'graph_output_cast' + str(i)
# add Cast node (from tensor(float16) to tensor(float) before graph output
new_value_info = model.graph.value_info.add()
new_value_info.CopyFrom(n)
new_value_info.name = input_name
new_value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16
new_node = [helper.make_node('Cast', [input_name], [n.name], to=1, name=node_name)]
model.graph.node.extend(new_node)
value_info_list.append(new_value_info)
io_casts.add(node_name)
while queue:
next_level = []
for q in queue:
# if q is model, push q.graph (GraphProto)
if isinstance(q, onnx_proto.ModelProto):
next_level.append(q.graph)
# if q is model.graph, push q.node.attribute (AttributeProto)
if isinstance(q, onnx_proto.GraphProto):
for n in q.node:
# if n is in the black list (doesn't support float16), no conversion for the node,
# and save the node for further processing
if n.name in io_casts:
continue
for i in range(len(n.input)):
if n.input[i] in name_mapping:
n.input[i] = name_mapping[n.input[i]]
for i in range(len(n.output)):
if n.output[i] in name_mapping:
n.output[i] = name_mapping[n.output[i]]
if n.op_type in op_black_list:
node_list.append(n)
else:
if n.op_type == 'Cast':
for attr in n.attribute:
if attr.name == 'to' and attr.i == 1:
attr.i = 10
break
for attr in n.attribute:
next_level.append(attr)
# if q is model.graph.node.attribute, push q.g and q.graphs (GraphProto)
# and process node.attribute.t and node.attribute.tensors (TensorProto)
if isinstance(q, onnx_proto.AttributeProto):
next_level.append(q.g)
for n in q.graphs:
next_level.append(n)
q.t.CopyFrom(convert_tensor_float_to_float16(q.t, min_positive_val, max_finite_val))
for n in q.tensors:
n = convert_tensor_float_to_float16(n, min_positive_val, max_finite_val)
# if q is graph, process graph.initializer(TensorProto), input, output and value_info (ValueInfoProto)
if isinstance(q, onnx_proto.GraphProto):
for n in q.initializer: # TensorProto type
if n.data_type == onnx_proto.TensorProto.FLOAT:
n = convert_tensor_float_to_float16(n, min_positive_val, max_finite_val)
value_info_list.append(make_value_info_from_tensor(n))
# for all ValueInfoProto with tensor(float) type in input, output and value_info, convert them to
# tensor(float16) except map and seq(map). And save them in value_info_list for further processing
for n in itertools.chain(q.input, q.output, q.value_info):
if n.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT:
if n.name not in graph_io_to_skip:
n.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16
value_info_list.append(n)
queue = next_level
# process the nodes in black list that doesn't support tensor(float16)
for node in node_list:
# if input's name is in the value_info_list meaning input is tensor(float16) type,
# insert a float16 to float Cast node before the node,
# change current node's input name and create new value_info for the new name
for i in range(len(node.input)):
input = node.input[i]
for value_info in value_info_list:
if input == value_info.name:
# create new value_info for current node's new input name
new_value_info = model.graph.value_info.add()
new_value_info.CopyFrom(value_info)
output_name = node.name + '_input_cast_' + str(i)
new_value_info.name = output_name
new_value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT
# add Cast node (from tensor(float16) to tensor(float) before current node
node_name = node.name + '_input_cast' + str(i)
new_node = [helper.make_node('Cast', [input], [output_name], to=1, name=node_name)]
model.graph.node.extend(new_node)
# change current node's input name
node.input[i] = output_name
break
# if output's name is in the value_info_list meaning output is tensor(float16) type, insert a float to
# float16 Cast node after the node, change current node's output name and create new value_info for the new name
for i in range(len(node.output)):
output = node.output[i]
for value_info in value_info_list:
if output == value_info.name:
# create new value_info for current node's new output
new_value_info = model.graph.value_info.add()
new_value_info.CopyFrom(value_info)
input_name = node.name + '_output_cast_' + str(i)
new_value_info.name = input_name
new_value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT
# add Cast node (from tensor(float) to tensor(float16) after current node
node_name = node.name + '_output_cast' + str(i)
new_node = [helper.make_node('Cast', [input_name], [output], to=10, name=node_name)]
model.graph.node.extend(new_node)
# change current node's input name
node.output[i] = input_name
break
return model
def convert_float_to_float16_model_path(model_path, min_positive_val=1e-7, max_finite_val=1e4, keep_io_types=False):
'''
Convert tensor float type in the ONNX Model to tensor float16.
*It is to fix an issue that infer_shapes func cannot be used to infer >2GB models.
*But this function can be applied to all model sizes.
:param model_path: ONNX Model path
:return: converted ONNX ModelProto object
Examples
::
#Convert to ONNX ModelProto object and save model binary file:
from onnxmltools.utils.float16_converter import convert_float_to_float16_model_path
new_onnx_model = convert_float_to_float16_model_path('model.onnx')
onnx.save(new_onnx_model, 'new_model.onnx')
'''
disable_shape_infer = False
if onnx.__version__ >= '1.8':
try:
# infer_shapes_path can be applied to all model sizes
from onnx.shape_inference import infer_shapes_path
import tempfile
import os
# shape_infer_model_path should be in the same folder of model_path
with tempfile.NamedTemporaryFile(dir=os.path.dirname(model_path)) as tmpfile:
shape_infer_model_path = tmpfile.name
infer_shapes_path(model_path, shape_infer_model_path)
model = onnx.load(shape_infer_model_path)
disable_shape_infer = True
finally:
pass
if not disable_shape_infer:
model = onnx.load(model_path)
return convert_float_to_float16(model, min_positive_val, max_finite_val, keep_io_types, disable_shape_infer)
import onnxruntime
new_onnx_model = convert_float_to_float16_model_path('lite.onnx')
onnx.save(new_onnx_model, 'lite_con.onnx')
print('saved!!')
session = onnxruntime.InferenceSession('lite_con.onnx')