Skip to content

Commit

Permalink
Improve original_model_run.py
Browse files Browse the repository at this point in the history
- use onnxruntime to run all models
- implement dumping intermediate results
- implement batch inference. Apparently onnxruntime does not support
  it directly if the ONNX model is not exported with a dynamic batch
  size [1]. I rewrite the model to work-around it.
- Split out model configs so that it can be used in both transform.py
  and original_model_run.py

Other changes: implement start for load_data_mnist()

[1] microsoft/onnxruntime#5577
  • Loading branch information
Chih-Hsuan Yen committed Oct 14, 2021
1 parent bbb76a8 commit 2a095f8
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 149 deletions.
53 changes: 53 additions & 0 deletions configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from utils import (
load_data_mnist,
load_data_cifar10,
load_data_google_speech,
)

# intermediate_values_size should < 65536, or TI's compiler gets confused
configs = {
'mnist': {
# https://github.com/onnx/models/raw/master/vision/classification/mnist/model/mnist-8.onnx
'onnx_model': 'data/mnist-8.onnx',
'scale': 8,
'input_scale': 8,
'num_slots': 2,
'intermediate_values_size': 26000,
'data_loader': load_data_mnist,
'n_all_samples': 10000,
# multiply by 2 for Q15
'sample_size': 2 * 28 * 28,
'op_filters': 4,
'first_sample_outputs': [ -1.247997, 0.624493, 8.609308, 9.392411, -13.685033, -6.018567, -23.386677, 28.214134, -6.762523, 3.924627 ],
'fp32_accuracy': 0.9889,
},
'cifar10': {
'onnx_model': 'data/squeezenet_cifar10.onnx',
'scale': 8,
'input_scale': 8,
'num_slots': 3,
'intermediate_values_size': 65000,
'data_loader': load_data_cifar10,
'n_all_samples': 10000,
'sample_size': 2 * 32 * 32 * 3,
'op_filters': 4,
'first_sample_outputs': [ 4.895500, 4.331344, 4.631835, 11.602396, 4.454658, 10.819544, 5.423588, 6.451203, 5.806091, 5.272837 ],
'fp32_accuracy': 0.7704,
},
'kws': {
'onnx_model': 'data/KWS-DNN_S.onnx',
'scale': 8,
'input_scale': 120,
'num_slots': 2,
'intermediate_values_size': 20000,
'data_loader': load_data_google_speech,
'n_all_samples': 4890,
'sample_size': 2 * 25 * 10, # MFCC gives 25x10 tensors
'op_filters': 4,
'first_sample_outputs': [ -29.228327, 5.429047, 22.146973, 3.142066, -10.448060, -9.513299, 15.832925, -4.655487, -14.588447, -1.577156, -5.864228, -6.609077 ],
# Much lower than reported on the paper due to mismatched window_size_ms/window_stride_ms (?)
# See: https://github.com/ARM-software/ML-KWS-for-MCU/issues/44
'fp32_accuracy': 0.6323,
},
}

114 changes: 31 additions & 83 deletions original_model_run.py
Original file line number Diff line number Diff line change
@@ -1,82 +1,50 @@
import argparse
import functools
import os.path

import numpy as np
import onnx
import onnxruntime.backend as backend
import tensorflow as tf
from tensorflow.keras import backend as K

from utils import load_data_mnist, load_data_cifar10, load_data_google_speech, GOOGLE_SPEECH_SAMPLE_RATE, kws_dnn_model
from configs import configs
from utils import change_batch_size, find_tensor_value_info

def onnxruntime_inference_one(model, images):
def onnxruntime_inference(model, images):
rep = backend.prepare(model)
return rep.run(images.astype(np.float32))
return rep.run(np.concatenate(images).astype(np.float32))

def onnxruntime_get_intermediate_tensor(model, images):
# FIXME: only the last layer is returned for now.
# Any way to extract intermediate layers?
rep = backend.prepare(model)
output_name = model.graph.output[0].name
outputs = rep.run(images[0].astype(np.float32))
yield output_name, outputs

# Modified from https://stackoverflow.com/a/41712013/3786245
def keras_get_intermediate_tensor(model, images):
for layer in model.layers:
output = layer.output
yield output.name, K.function([model.input], [output])(images)

def keras_inference_one(model, images):
layer_outs = model(images)
# Tensorflow 2.x uses .numpy instead of .eval for eager execution
return layer_outs.numpy()[0]

def tensorflow_inference_layer(decoded_wavs, idx):
with tf.compat.v1.Session() as sess:
op = sess.graph.get_operations()[idx]
tensor = sess.graph.get_tensor_by_name(op.outputs[0].name)
return sess.run(tensor, {
'decoded_sample_data:0': decoded_wavs[0],
'decoded_sample_data:1': GOOGLE_SPEECH_SAMPLE_RATE,
})

def tensorflow_get_intermediate_tensor(graph_def, decoded_wavs):
for idx, node in enumerate(graph_def.node):
if node.op in ('Const', 'Identity', 'Placeholder'):
continue
tensor_name = node.name
tensor_values = tensorflow_inference_layer(decoded_wavs, idx)
yield tensor_name, tensor_values

def tensorflow_inference_one(decoded_wav):
return tensorflow_inference_layer([decoded_wav], -1)[0]
for idx, node in enumerate(model.graph.node):
# Creating a new model with a given node as the output
# XXX: Is there a faster way?
tmp_model = onnx.ModelProto()
tmp_model.CopyFrom(model)
new_output = find_tensor_value_info(model, node.output[0])
tmp_model.graph.output[0].CopyFrom(new_output)
onnx.checker.check_model(tmp_model)

rep = backend.prepare(tmp_model)
outputs = rep.run(images[0].astype(np.float32))
yield new_output.name, outputs

def print_float(val):
print('%13.6f' % val, end='')

def print_tensor(tensor):
shape = tf.shape(tensor)
shape = np.shape(tensor)
print(f'Original shape: {shape}')
dimensions = tf.shape(shape)[0]
dimensions = np.shape(shape)[0]
if dimensions and shape[0] == 1:
tensor = tensor[0]
dimensions -= 1
shape = shape[1:]
if dimensions and shape[-1] == 1:
tensor = np.squeeze(tensor, axis=-1)
dimensions -= 1
shape = shape[:-1]
print(f'New shape: {shape}')
if dimensions == 4:
N, H, W, C = shape
N, C, H, W = shape
assert N == 1
for c in range(C):
print(f'Channel {c}')
for h in range(H):
for w in range(W):
print_float(tensor[0, h, w, c])
print_float(tensor[0, c, h, w])
print()
print()
elif dimensions == 2:
Expand All @@ -99,52 +67,32 @@ def print_tensor(tensor):

def main():
parser = argparse.ArgumentParser()
parser.add_argument('config', choices=['mnist', 'cifar10', 'kws'])
parser.add_argument('config', choices=configs.keys())
parser.add_argument('--limit', type=int, default=0)
args = parser.parse_args()

if args.limit == 0:
args.limit = None

if args.config == 'mnist':
# model is from https://github.com/onnx/models/tree/master/mnist
# https://github.com/onnx/onnx/blob/master/docs/PythonAPIOverview.md
model = onnx.load_model('./data/mnist-8.onnx')
onnx.checker.check_model(model)

get_intermediate_tensor = functools.partial(onnxruntime_get_intermediate_tensor, model)
inference_one = functools.partial(onnxruntime_inference_one, model)
model_data = load_data_mnist(start=0, limit=args.limit)
elif args.config == 'cifar10':
squeezenet_cifar10_path = './data/SqueezeNet_vs_CIFAR10/models'
with open(os.path.join(squeezenet_cifar10_path, 'squeeze_net.json')) as f:
model_json = f.read()
model = tf.keras.models.model_from_json(model_json)
model.load_weights(os.path.join(squeezenet_cifar10_path, 'squeeze_net.h5'))
config = configs[args.config]

get_intermediate_tensor = functools.partial(keras_get_intermediate_tensor, model)
inference_one = functools.partial(keras_inference_one, model)
model_data = load_data_cifar10(start=0, limit=args.limit)
elif args.config == 'kws':
with open(kws_dnn_model(), 'rb') as f:
graph_def = tf.compat.v1.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def)
# https://github.com/onnx/onnx/blob/master/docs/PythonAPIOverview.md
model = onnx.load_model(config['onnx_model'].replace('.onnx', '-opt.onnx'))
change_batch_size(model, 'N')
onnx.checker.check_model(model)

get_intermediate_tensor = functools.partial(tensorflow_get_intermediate_tensor, graph_def)
inference_one = tensorflow_inference_one
model_data = load_data_google_speech(start=0, limit=args.limit, for_onnx=False)
model_data = config['data_loader'](start=0, limit=args.limit)

# Testing
if args.limit == 1:
for layer_name, layer_out in get_intermediate_tensor(model_data.images):
for layer_name, layer_out in onnxruntime_get_intermediate_tensor(model, model_data.images):
print(f'Layer: {layer_name}')
print_tensor(layer_out)
else:
correct = 0
for idx, image in enumerate(model_data.images):
layer_outs = inference_one(image)
predicted = np.argmax(layer_outs)
layer_outs = onnxruntime_inference(model, model_data.images)[0]
for idx, layer_out in enumerate(layer_outs):
predicted = np.argmax(layer_out)
if predicted == model_data.labels[idx]:
print(f'Correct at idx={idx}')
correct += 1
Expand Down
72 changes: 8 additions & 64 deletions transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,8 @@
import onnxoptimizer
import numpy as np

from utils import (
load_data_mnist,
load_data_cifar10,
load_data_google_speech,
)
from configs import configs
from utils import find_initializer

logging.basicConfig()
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -175,54 +172,6 @@ def __getattr__(self, name):
def get_prev_node(n):
return nodes[names[n.input[0]] - Constants.N_INPUT]


# intermediate_values_size should < 65536, or TI's compiler gets confused
configs = {
'mnist': {
# https://github.com/onnx/models/raw/master/vision/classification/mnist/model/mnist-8.onnx
'onnx_model': 'data/mnist-8.onnx',
'scale': 8,
'input_scale': 8,
'num_slots': 2,
'intermediate_values_size': 26000,
'data_loader': load_data_mnist,
'n_all_samples': 10000,
# multiply by 2 for Q15
'sample_size': 2 * 28 * 28,
'op_filters': 4,
'first_sample_outputs': [ -1.247997, 0.624493, 8.609308, 9.392411, -13.685033, -6.018567, -23.386677, 28.214134, -6.762523, 3.924627 ],
'fp32_accuracy': 0.9889,
},
'cifar10': {
'onnx_model': 'data/squeezenet_cifar10.onnx',
'scale': 8,
'input_scale': 8,
'num_slots': 3,
'intermediate_values_size': 65000,
'data_loader': load_data_cifar10,
'n_all_samples': 10000,
'sample_size': 2 * 32 * 32 * 3,
'op_filters': 4,
'first_sample_outputs': [ 4.895500, 4.331344, 4.631835, 11.602396, 4.454658, 10.819544, 5.423588, 6.451203, 5.806091, 5.272837 ],
'fp32_accuracy': 0.7704,
},
'kws': {
'onnx_model': 'data/KWS-DNN_S.onnx',
'scale': 8,
'input_scale': 120,
'num_slots': 2,
'intermediate_values_size': 20000,
'data_loader': load_data_google_speech,
'n_all_samples': 4890,
'sample_size': 2 * 25 * 10, # MFCC gives 25x10 tensors
'op_filters': 4,
'first_sample_outputs': [ -29.228327, 5.429047, 22.146973, 3.142066, -10.448060, -9.513299, 15.832925, -4.655487, -14.588447, -1.577156, -5.864228, -6.609077 ],
# Much lower than reported on the paper due to mismatched window_size_ms/window_stride_ms (?)
# See: https://github.com/ARM-software/ML-KWS-for-MCU/issues/44
'fp32_accuracy': 0.6323,
},
}

lea_buffer_size = {
# (4096 - 0x138 (LEASTACK) - 2 * 8 (MSP_LEA_MAC_PARAMS)) / sizeof(int16_t)
'msp430': 1884,
Expand Down Expand Up @@ -295,16 +244,11 @@ def get_attr(node, attr_name):
# Remove Squeeze and Reshape nodes with constants as the input
replaced_nodes_map = {}

def find_initializer(name):
for initializer in g.initializer:
if initializer.name == name:
return initializer

def replace_squeeze(node, inp):
# Since opset 13, axes is an input instead of an attribute
try:
axes_name = node.input[1]
axes = find_initializer(axes_name).int64_data
axes = find_initializer(onnx_model, axes_name).int64_data
except IndexError:
axes = get_attr(node, 'axes')
new_dims = [dim for dim_idx, dim in enumerate(inp.dims) if dim_idx not in axes]
Expand All @@ -314,7 +258,7 @@ def replace_squeeze(node, inp):

def replace_reshape(node, inp):
dims_name = node.input[1]
new_dims = find_initializer(dims_name).int64_data
new_dims = find_initializer(onnx_model, dims_name).int64_data
assert new_dims
inp.dims[:] = new_dims

Expand All @@ -327,7 +271,7 @@ def replace_nodes():
for n in g.node:
if n.op_type not in ('Squeeze', 'Reshape'):
continue
inp = find_initializer(n.input[0])
inp = find_initializer(onnx_model, n.input[0])
if inp:
replace_handlers[n.op_type](n, inp)
replaced_nodes_map[n.output[0]] = n.input[0]
Expand Down Expand Up @@ -436,11 +380,11 @@ def determine_conv_tile_c(n):
logger.debug('Determine tile size for Conv node %s', n.name)

output_value_info = find_tensor_value_info(n.output[0])
filter_info = find_initializer(n.input[1])
filter_info = find_initializer(onnx_model, n.input[1])
node_flags = n.flags.b.extra.conv

is_separate_tiling = False
if not find_initializer(n.input[0]):
if not find_initializer(onnx_model, n.input[0]):
input_node = find_node_by_output(n.input[0])
if input_node and input_node.op_type == 'Concat':
is_separate_tiling = True
Expand Down Expand Up @@ -499,7 +443,7 @@ def determine_gemm_tile_sizes(n):
logger.debug('Determine tile size for Gemm node %s', n.name)

A = find_tensor_value_info(n.input[0])
B = find_initializer(n.input[1])
B = find_initializer(onnx_model, n.input[1])
A_shape = A.type.tensor_type.shape
A_rows = A_shape.dim[0].dim_value
A_cols = A_shape.dim[1].dim_value
Expand Down
Loading

0 comments on commit 2a095f8

Please sign in to comment.