Improve original_model_run.py

- use onnxruntime to run all models - implement dumping intermediate results - implement batch inference. Apparently onnxruntime does not support it directly if the ONNX model is not exported with a dynamic batch size [1]. I rewrite the model to work-around it. - Split out model configs so that it can be used in both transform.py and original_model_run.py Other changes: implement start for load_data_mnist() [1] microsoft/onnxruntime#5577
EMCLab-Sinica · Oct 14, 2021 · 2a095f8 · 2a095f8
1 parent bbb76a8
commit 2a095f8
Show file tree

Hide file tree

Showing 4 changed files with 169 additions and 149 deletions.
diff --git a/configs.py b/configs.py
@@ -0,0 +1,53 @@
+from utils import (
+    load_data_mnist,
+    load_data_cifar10,
+    load_data_google_speech,
+)
+
+# intermediate_values_size should < 65536, or TI's compiler gets confused
+configs = {
+    'mnist': {
+        # https://github.com/onnx/models/raw/master/vision/classification/mnist/model/mnist-8.onnx
+        'onnx_model': 'data/mnist-8.onnx',
+        'scale': 8,
+        'input_scale': 8,
+        'num_slots': 2,
+        'intermediate_values_size': 26000,
+        'data_loader': load_data_mnist,
+        'n_all_samples': 10000,
+        # multiply by 2 for Q15
+        'sample_size': 2 * 28 * 28,
+        'op_filters': 4,
+        'first_sample_outputs': [ -1.247997, 0.624493, 8.609308, 9.392411, -13.685033, -6.018567, -23.386677, 28.214134, -6.762523, 3.924627 ],
+        'fp32_accuracy': 0.9889,
+    },
+    'cifar10': {
+        'onnx_model': 'data/squeezenet_cifar10.onnx',
+        'scale': 8,
+        'input_scale': 8,
+        'num_slots': 3,
+        'intermediate_values_size': 65000,
+        'data_loader': load_data_cifar10,
+        'n_all_samples': 10000,
+        'sample_size': 2 * 32 * 32 * 3,
+        'op_filters': 4,
+        'first_sample_outputs': [ 4.895500, 4.331344, 4.631835, 11.602396, 4.454658, 10.819544, 5.423588, 6.451203, 5.806091, 5.272837 ],
+        'fp32_accuracy': 0.7704,
+    },
+    'kws': {
+        'onnx_model': 'data/KWS-DNN_S.onnx',
+        'scale': 8,
+        'input_scale': 120,
+        'num_slots': 2,
+        'intermediate_values_size': 20000,
+        'data_loader': load_data_google_speech,
+        'n_all_samples': 4890,
+        'sample_size': 2 * 25 * 10,  # MFCC gives 25x10 tensors
+        'op_filters': 4,
+        'first_sample_outputs': [ -29.228327, 5.429047, 22.146973, 3.142066, -10.448060, -9.513299, 15.832925, -4.655487, -14.588447, -1.577156, -5.864228, -6.609077 ],
+        # Much lower than reported on the paper due to mismatched window_size_ms/window_stride_ms (?)
+        # See: https://github.com/ARM-software/ML-KWS-for-MCU/issues/44
+        'fp32_accuracy': 0.6323,
+    },
+}
+
diff --git a/original_model_run.py b/original_model_run.py
@@ -1,82 +1,50 @@
 import argparse
-import functools
-import os.path
 
 import numpy as np
 import onnx
 import onnxruntime.backend as backend
-import tensorflow as tf
-from tensorflow.keras import backend as K
 
-from utils import load_data_mnist, load_data_cifar10, load_data_google_speech, GOOGLE_SPEECH_SAMPLE_RATE, kws_dnn_model
+from configs import configs
+from utils import change_batch_size, find_tensor_value_info
 
-def onnxruntime_inference_one(model, images):
+def onnxruntime_inference(model, images):
     rep = backend.prepare(model)
-    return rep.run(images.astype(np.float32))
+    return rep.run(np.concatenate(images).astype(np.float32))
 
 def onnxruntime_get_intermediate_tensor(model, images):
-    # FIXME: only the last layer is returned for now.
-    # Any way to extract intermediate layers?
-    rep = backend.prepare(model)
-    output_name = model.graph.output[0].name
-    outputs = rep.run(images[0].astype(np.float32))
-    yield output_name, outputs
-
-# Modified from https://stackoverflow.com/a/41712013/3786245
-def keras_get_intermediate_tensor(model, images):
-    for layer in model.layers:
-        output = layer.output
-        yield output.name, K.function([model.input], [output])(images)
-
-def keras_inference_one(model, images):
-    layer_outs = model(images)
-    # Tensorflow 2.x uses .numpy instead of .eval for eager execution
-    return layer_outs.numpy()[0]
-
-def tensorflow_inference_layer(decoded_wavs, idx):
-    with tf.compat.v1.Session() as sess:
-        op = sess.graph.get_operations()[idx]
-        tensor = sess.graph.get_tensor_by_name(op.outputs[0].name)
-        return sess.run(tensor, {
-            'decoded_sample_data:0': decoded_wavs[0],
-            'decoded_sample_data:1': GOOGLE_SPEECH_SAMPLE_RATE,
-        })
-
-def tensorflow_get_intermediate_tensor(graph_def, decoded_wavs):
-    for idx, node in enumerate(graph_def.node):
-        if node.op in ('Const', 'Identity', 'Placeholder'):
-            continue
-        tensor_name = node.name
-        tensor_values = tensorflow_inference_layer(decoded_wavs, idx)
-        yield tensor_name, tensor_values
-
-def tensorflow_inference_one(decoded_wav):
-    return tensorflow_inference_layer([decoded_wav], -1)[0]
+    for idx, node in enumerate(model.graph.node):
+        # Creating a new model with a given node as the output
+        # XXX: Is there a faster way?
+        tmp_model = onnx.ModelProto()
+        tmp_model.CopyFrom(model)
+        new_output = find_tensor_value_info(model, node.output[0])
+        tmp_model.graph.output[0].CopyFrom(new_output)
+        onnx.checker.check_model(tmp_model)
+
+        rep = backend.prepare(tmp_model)
+        outputs = rep.run(images[0].astype(np.float32))
+        yield new_output.name, outputs
 
 def print_float(val):
     print('%13.6f' % val, end='')
 
 def print_tensor(tensor):
-    shape = tf.shape(tensor)
+    shape = np.shape(tensor)
     print(f'Original shape: {shape}')
-    dimensions = tf.shape(shape)[0]
+    dimensions = np.shape(shape)[0]
     if dimensions and shape[0] == 1:
         tensor = tensor[0]
         dimensions -= 1
         shape = shape[1:]
-    if dimensions and shape[-1] == 1:
-        tensor = np.squeeze(tensor, axis=-1)
-        dimensions -= 1
-        shape = shape[:-1]
     print(f'New shape: {shape}')
     if dimensions == 4:
-        N, H, W, C = shape
+        N, C, H, W = shape
         assert N == 1
         for c in range(C):
             print(f'Channel {c}')
             for h in range(H):
                 for w in range(W):
-                    print_float(tensor[0, h, w, c])
+                    print_float(tensor[0, c, h, w])
                 print()
             print()
     elif dimensions == 2:
@@ -99,52 +67,32 @@ def print_tensor(tensor):
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument('config', choices=['mnist', 'cifar10', 'kws'])
+    parser.add_argument('config', choices=configs.keys())
     parser.add_argument('--limit', type=int, default=0)
     args = parser.parse_args()
 
     if args.limit == 0:
         args.limit = None
 
-    if args.config == 'mnist':
-        # model is from https://github.com/onnx/models/tree/master/mnist
-        # https://github.com/onnx/onnx/blob/master/docs/PythonAPIOverview.md
-        model = onnx.load_model('./data/mnist-8.onnx')
-        onnx.checker.check_model(model)
-
-        get_intermediate_tensor = functools.partial(onnxruntime_get_intermediate_tensor, model)
-        inference_one = functools.partial(onnxruntime_inference_one, model)
-        model_data = load_data_mnist(start=0, limit=args.limit)
-    elif args.config == 'cifar10':
-        squeezenet_cifar10_path = './data/SqueezeNet_vs_CIFAR10/models'
-        with open(os.path.join(squeezenet_cifar10_path, 'squeeze_net.json')) as f:
-            model_json = f.read()
-        model = tf.keras.models.model_from_json(model_json)
-        model.load_weights(os.path.join(squeezenet_cifar10_path, 'squeeze_net.h5'))
+    config = configs[args.config]
 
-        get_intermediate_tensor = functools.partial(keras_get_intermediate_tensor, model)
-        inference_one = functools.partial(keras_inference_one, model)
-        model_data = load_data_cifar10(start=0, limit=args.limit)
-    elif args.config == 'kws':
-        with open(kws_dnn_model(), 'rb') as f:
-            graph_def = tf.compat.v1.GraphDef()
-            graph_def.ParseFromString(f.read())
-            tf.import_graph_def(graph_def)
+    # https://github.com/onnx/onnx/blob/master/docs/PythonAPIOverview.md
+    model = onnx.load_model(config['onnx_model'].replace('.onnx', '-opt.onnx'))
+    change_batch_size(model, 'N')
+    onnx.checker.check_model(model)
 
-        get_intermediate_tensor = functools.partial(tensorflow_get_intermediate_tensor, graph_def)
-        inference_one = tensorflow_inference_one
-        model_data = load_data_google_speech(start=0, limit=args.limit, for_onnx=False)
+    model_data = config['data_loader'](start=0, limit=args.limit)
 
     # Testing
     if args.limit == 1:
-        for layer_name, layer_out in get_intermediate_tensor(model_data.images):
+        for layer_name, layer_out in onnxruntime_get_intermediate_tensor(model, model_data.images):
             print(f'Layer: {layer_name}')
             print_tensor(layer_out)
     else:
         correct = 0
-        for idx, image in enumerate(model_data.images):
-            layer_outs = inference_one(image)
-            predicted = np.argmax(layer_outs)
+        layer_outs = onnxruntime_inference(model, model_data.images)[0]
+        for idx, layer_out in enumerate(layer_outs):
+            predicted = np.argmax(layer_out)
             if predicted == model_data.labels[idx]:
                 print(f'Correct at idx={idx}')
                 correct += 1

diff --git a/transform.py b/transform.py
@@ -18,11 +18,8 @@
 import onnxoptimizer
 import numpy as np
 
-from utils import (
-    load_data_mnist,
-    load_data_cifar10,
-    load_data_google_speech,
-)
+from configs import configs
+from utils import find_initializer
 
 logging.basicConfig()
 logger = logging.getLogger(__name__)
@@ -175,54 +172,6 @@ def __getattr__(self, name):
 def get_prev_node(n):
     return nodes[names[n.input[0]] - Constants.N_INPUT]
 
-
-# intermediate_values_size should < 65536, or TI's compiler gets confused
-configs = {
-    'mnist': {
-        # https://github.com/onnx/models/raw/master/vision/classification/mnist/model/mnist-8.onnx
-        'onnx_model': 'data/mnist-8.onnx',
-        'scale': 8,
-        'input_scale': 8,
-        'num_slots': 2,
-        'intermediate_values_size': 26000,
-        'data_loader': load_data_mnist,
-        'n_all_samples': 10000,
-        # multiply by 2 for Q15
-        'sample_size': 2 * 28 * 28,
-        'op_filters': 4,
-        'first_sample_outputs': [ -1.247997, 0.624493, 8.609308, 9.392411, -13.685033, -6.018567, -23.386677, 28.214134, -6.762523, 3.924627 ],
-        'fp32_accuracy': 0.9889,
-    },
-    'cifar10': {
-        'onnx_model': 'data/squeezenet_cifar10.onnx',
-        'scale': 8,
-        'input_scale': 8,
-        'num_slots': 3,
-        'intermediate_values_size': 65000,
-        'data_loader': load_data_cifar10,
-        'n_all_samples': 10000,
-        'sample_size': 2 * 32 * 32 * 3,
-        'op_filters': 4,
-        'first_sample_outputs': [ 4.895500, 4.331344, 4.631835, 11.602396, 4.454658, 10.819544, 5.423588, 6.451203, 5.806091, 5.272837 ],
-        'fp32_accuracy': 0.7704,
-    },
-    'kws': {
-        'onnx_model': 'data/KWS-DNN_S.onnx',
-        'scale': 8,
-        'input_scale': 120,
-        'num_slots': 2,
-        'intermediate_values_size': 20000,
-        'data_loader': load_data_google_speech,
-        'n_all_samples': 4890,
-        'sample_size': 2 * 25 * 10,  # MFCC gives 25x10 tensors
-        'op_filters': 4,
-        'first_sample_outputs': [ -29.228327, 5.429047, 22.146973, 3.142066, -10.448060, -9.513299, 15.832925, -4.655487, -14.588447, -1.577156, -5.864228, -6.609077 ],
-        # Much lower than reported on the paper due to mismatched window_size_ms/window_stride_ms (?)
-        # See: https://github.com/ARM-software/ML-KWS-for-MCU/issues/44
-        'fp32_accuracy': 0.6323,
-    },
-}
-
 lea_buffer_size = {
     # (4096 - 0x138 (LEASTACK) - 2 * 8 (MSP_LEA_MAC_PARAMS)) / sizeof(int16_t)
     'msp430': 1884,
@@ -295,16 +244,11 @@ def get_attr(node, attr_name):
 # Remove Squeeze and Reshape nodes with constants as the input
 replaced_nodes_map = {}
 
-def find_initializer(name):
-    for initializer in g.initializer:
-        if initializer.name == name:
-            return initializer
-
 def replace_squeeze(node, inp):
     # Since opset 13, axes is an input instead of an attribute
     try:
         axes_name = node.input[1]
-        axes = find_initializer(axes_name).int64_data
+        axes = find_initializer(onnx_model, axes_name).int64_data
     except IndexError:
         axes = get_attr(node, 'axes')
     new_dims = [dim for dim_idx, dim in enumerate(inp.dims) if dim_idx not in axes]
@@ -314,7 +258,7 @@ def replace_squeeze(node, inp):
 
 def replace_reshape(node, inp):
     dims_name = node.input[1]
-    new_dims = find_initializer(dims_name).int64_data
+    new_dims = find_initializer(onnx_model, dims_name).int64_data
     assert new_dims
     inp.dims[:] = new_dims
 
@@ -327,7 +271,7 @@ def replace_nodes():
     for n in g.node:
         if n.op_type not in ('Squeeze', 'Reshape'):
             continue
-        inp = find_initializer(n.input[0])
+        inp = find_initializer(onnx_model, n.input[0])
         if inp:
             replace_handlers[n.op_type](n, inp)
             replaced_nodes_map[n.output[0]] = n.input[0]
@@ -436,11 +380,11 @@ def determine_conv_tile_c(n):
     logger.debug('Determine tile size for Conv node %s', n.name)
 
     output_value_info = find_tensor_value_info(n.output[0])
-    filter_info = find_initializer(n.input[1])
+    filter_info = find_initializer(onnx_model, n.input[1])
     node_flags = n.flags.b.extra.conv
 
     is_separate_tiling = False
-    if not find_initializer(n.input[0]):
+    if not find_initializer(onnx_model, n.input[0]):
         input_node = find_node_by_output(n.input[0])
         if input_node and input_node.op_type == 'Concat':
             is_separate_tiling = True
@@ -499,7 +443,7 @@ def determine_gemm_tile_sizes(n):
     logger.debug('Determine tile size for Gemm node %s', n.name)
 
     A = find_tensor_value_info(n.input[0])
-    B = find_initializer(n.input[1])
+    B = find_initializer(onnx_model, n.input[1])
     A_shape = A.type.tensor_type.shape
     A_rows = A_shape.dim[0].dim_value
     A_cols = A_shape.dim[1].dim_value