From 5ca146a2f0245c4cc2101f9b7bfd71c80d41241a Mon Sep 17 00:00:00 2001 From: Animesh Jain Date: Wed, 14 Aug 2019 16:56:58 -0700 Subject: [PATCH] [QNN] Concatenate operator (#3730) --- python/tvm/relay/qnn/op/qnn.py | 73 +++++++++++ tests/python/relay/test_qnn_concatenate.py | 145 +++++++++++++++++++++ 2 files changed, 218 insertions(+) create mode 100644 tests/python/relay/test_qnn_concatenate.py diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py index 1717bc42fe94..62c3da973e9a 100644 --- a/python/tvm/relay/qnn/op/qnn.py +++ b/python/tvm/relay/qnn/op/qnn.py @@ -18,6 +18,7 @@ """QNN dialect operators.""" from __future__ import absolute_import as _abs +from tvm import relay from . import _make def requantize(data, @@ -72,3 +73,75 @@ def requantize(data, output_zero_point, rounding, out_dtype) + +def concatenate(data, + input_scales, + input_zero_points, + output_scale, + output_zero_point, + axis): + """Concatenate the quantized input tensors along the given axis. + + Parameters + ---------- + data : Union(List[relay.Expr], Tuple[relay.Expr]) + The list of quantized tensors. + + input_scales : List[float32] + The list of scales of input quantized tensors. + + input_zero_points : List[int32] + The list of zero points of input quantized tensors. + + output_scale : float32 + The scale of the output quantized tensor. + + output_zero_point : int32 + The zero point of the output quantized tensor. + + axis : int + The axis along which the tensors are concatenated. + + Returns + ------- + result: relay.Expr + The concatenated quantized tensor. + """ + + data = list(data) + requantized_exprs = list(data) + + # Find the dtype of the input expr. This is required for the requantize op. Since, this is + # concatenate op, the dtype of the input is same as dtype of the output. + data0 = relay.transform.infer_type(data[0]) + in_dtype = data0.checked_type.dtype + + # First check if all the input qnn params match. If yes, we can call concatenate first, followed + # by a requantize. + if all(scale == input_scales[0] for scale in input_scales)\ + and all(zero_point == input_zero_points[0] for zero_point in input_zero_points): + out = relay.concatenate(tuple(data), axis) + input_scale = input_scales[0] + input_zero_point = input_zero_points[0] + if input_scale != output_scale or input_zero_point != output_zero_point: + out = requantize(data=out, + input_scale=input_scales[0], + input_zero_point=input_zero_points[0], + output_scale=output_scale, + output_zero_point=output_zero_point, + out_dtype=in_dtype) + return out + + # If the output qnn params do not match the input qnn params, we can call requantize on the + # input expr first, followed by a concatenate on the requantized input exprs. + for idx, quantized_expr in enumerate(data): + input_scale = input_scales[idx] + input_zero_point = input_zero_points[idx] + if input_scale != output_scale or input_zero_point != output_zero_point: + requantized_exprs[idx] = requantize(data=quantized_expr, + input_scale=input_scale, + input_zero_point=input_zero_point, + output_scale=output_scale, + output_zero_point=output_zero_point, + out_dtype=in_dtype) + return relay.concatenate(tuple(requantized_exprs), axis) diff --git a/tests/python/relay/test_qnn_concatenate.py b/tests/python/relay/test_qnn_concatenate.py new file mode 100644 index 000000000000..b0745cf251c4 --- /dev/null +++ b/tests/python/relay/test_qnn_concatenate.py @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import tvm +import numpy as np +from tvm import relay +from tvm.contrib import graph_runtime +import topi.testing + +def test_same_io_qnn_params(): + data_dtype = 'int32' + axis = 0 + x_data = np.arange(-32, 32, 1).reshape(1, 64).astype(data_dtype) + y_data = np.arange(-64, 64, 2).reshape(1, 64).astype(data_dtype) + x_scale = (62 + 64) / (np.power(2, 32) - 1.0) + y_scale = (62 + 64) / (np.power(2, 32) - 1.0) + + x = relay.var("x", shape=(1, 64), dtype=data_dtype) + y = relay.var("y", shape=(1, 64), dtype=data_dtype) + z = relay.qnn.op.concatenate((x, y), + input_scales=[x_scale, y_scale], + input_zero_points=[0, 0], + output_scale=y_scale, + output_zero_point=0, + axis=axis) + + func = relay.Function([x, y], z) + assert func.astext().count('requantize') == 0 + mod = relay.Module.from_expr(func) + mod = relay.transform.Legalize()(mod) + func = mod["main"] + + golden_output = np.concatenate((x_data, y_data), axis=axis) + + intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm") + op_res = intrp.evaluate(func)(x_data, y_data) + np.testing.assert_equal(op_res.asnumpy(), golden_output) + +def test_different_io_qnn_params(): + data_dtype = 'int32' + axis = 0 + x_data = np.arange(-32, 32, 1).reshape(1, 64).astype(data_dtype) + y_data = np.arange(-64, 64, 2).reshape(1, 64).astype(data_dtype) + x_scale = (62 + 64) / (np.power(2, 32) - 1.0) + y_scale = (62 + 64) / (np.power(2, 32) - 1.0) + + x = relay.var("x", shape=(1, 64), dtype=data_dtype) + y = relay.var("y", shape=(1, 64), dtype=data_dtype) + z = relay.qnn.op.concatenate((x, y), + input_scales=[x_scale, y_scale], + input_zero_points=[3, 4], + output_scale=y_scale, + output_zero_point=1, + axis=axis) + + func = relay.Function([x, y], z) + assert func.astext().count('requantize') == 2 + mod = relay.Module.from_expr(func) + mod = relay.transform.Legalize()(mod) + func = mod["main"] + + golden_output = np.concatenate((x_data - 2, y_data - 3), axis=axis) + + intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm") + op_res = intrp.evaluate(func)(x_data, y_data) + np.testing.assert_equal(op_res.asnumpy(), golden_output) + +def test_few_same_io_qnn_params(): + data_dtype = 'int32' + axis = 0 + x_data = np.arange(-32, 32, 1).reshape(1, 64).astype(data_dtype) + y_data = np.arange(-64, 64, 2).reshape(1, 64).astype(data_dtype) + x_scale = (62 + 64) / (np.power(2, 32) - 1.0) + y_scale = (62 + 64) / (np.power(2, 32) - 1.0) + + x = relay.var("x", shape=(1, 64), dtype=data_dtype) + y = relay.var("y", shape=(1, 64), dtype=data_dtype) + z = relay.qnn.op.concatenate((x, y), + input_scales=[x_scale, y_scale], + input_zero_points=[0, 1], + output_scale=y_scale, + output_zero_point=1, + axis=axis) + + func = relay.Function([x, y], z) + assert func.astext().count('requantize') == 1 + mod = relay.Module.from_expr(func) + mod = relay.transform.Legalize()(mod) + func = mod["main"] + + golden_output = np.concatenate((x_data + 1, y_data), axis=axis) + + intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm") + op_res = intrp.evaluate(func)(x_data, y_data) + np.testing.assert_equal(op_res.asnumpy(), golden_output) + +def test_same_i_qnn_params(): + data_dtype = 'int32' + axis = 0 + x_data = np.arange(-32, 32, 1).reshape(1, 64).astype(data_dtype) + y_data = np.arange(-64, 64, 2).reshape(1, 64).astype(data_dtype) + x_scale = (62 + 64) / (np.power(2, 32) - 1.0) + y_scale = (62 + 64) / (np.power(2, 32) - 1.0) + + x = relay.var("x", shape=(1, 64), dtype=data_dtype) + y = relay.var("y", shape=(1, 64), dtype=data_dtype) + z = relay.qnn.op.concatenate((x, y), + input_scales=[x_scale, y_scale], + input_zero_points=[0, 0], + output_scale=y_scale, + output_zero_point=1, + axis=axis) + + func = relay.Function([x, y], z) + assert func.astext().count('requantize') == 1 + mod = relay.Module.from_expr(func) + mod = relay.transform.Legalize()(mod) + func = mod["main"] + + golden_output = np.concatenate((x_data + 1, y_data + 1), axis=axis) + + intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm") + op_res = intrp.evaluate(func)(x_data, y_data) + np.testing.assert_equal(op_res.asnumpy(), golden_output) + + +if __name__ == '__main__': + test_same_io_qnn_params() + test_different_io_qnn_params() + test_few_same_io_qnn_params() + test_same_i_qnn_params()