[QNN] Add operator.

apache · Aug 12, 2019 · 64311a1 · 64311a1
1 parent 3ac27fc
commit 64311a1
Show file tree

Hide file tree

Showing 2 changed files with 315 additions and 0 deletions.
diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
@@ -18,6 +18,8 @@
 """QNN dialect operators."""
 
 from __future__ import absolute_import as _abs
+import tvm
+from tvm import relay
 from . import _make
 
 def requantize(data,
@@ -72,3 +74,120 @@ def requantize(data,
                             output_zero_point,
                             rounding,
                             out_dtype)
+
+
+def add(lhs, rhs, lhs_scale, lhs_zero_point, rhs_scale, rhs_zero_point, output_scale,
+        output_zero_point):
+    """Quantized addition with numpy-style broadcasting.
+
+    Parameters
+    ----------
+    lhs : relay.Expr
+        The left hand side quantized input data.
+
+    rhs : relay.Expr
+        The right hand side quantized input data.
+
+    lhs_scale: float
+        The scale of the lhs quantized expr.
+
+    lhs_zero_point: int
+       The zero point of lhs quantized expr.
+
+    rhs_scale: float
+        The scale of the rhs quantized expr.
+
+    rhs_zero_point: int
+       The zero point of rhs quantized expr.
+
+    output_scale: float
+        The scale of the output quantized expr.
+
+    output_zero_point: int
+       The zero point of output quantized expr.
+
+    Returns
+    -------
+    result : relay.Expr
+        The computed result.
+
+    """
+
+    # Find the dtype of the input expr. This is required for the requantize op. Since, this is
+    # add op, the dtype of the input is same as dtype of the output.
+    data0 = relay.transform.infer_type(lhs)
+    in_dtype = data0.checked_type.dtype
+    assert in_dtype in ('int8', 'uint8')
+
+    # First, check if the qnn params of lhs and rhs match. If yes, we can avoid one requantize by
+    # calling add first and then requantize. The whole process can be represented as follows
+    #
+    #          scale_c * (Q_c - zp_c) = scale * (Q_a - zp) + scale * (Q_b - zp)
+    #          scale_c * (Q_c - zp_c) = scale * (Q_a + Q_b - zp - zp)
+    #
+    # RHS looks like a quantized tensor with scale = scale, and zero_point = zp + zp
+    # This can be handled by first subtracting the zero point, followed by requantize with the
+    # output qnn params. The add op is done in int16 precision.
+
+    if lhs_scale == rhs_scale and lhs_zero_point == rhs_zero_point:
+        lhs = relay.cast(lhs, dtype='int16')
+        rhs = relay.cast(rhs, dtype='int16')
+        out = relay.add(lhs, rhs)
+        out = relay.subtract(out, relay.const(lhs_zero_point, dtype='int16'))
+        if lhs_scale != output_scale or lhs_zero_point != output_zero_point:
+            out = requantize(data=out,
+                             input_scale=lhs_scale,
+                             input_zero_point=lhs_zero_point,
+                             output_scale=output_scale,
+                             output_zero_point=output_zero_point,
+                             out_dtype=in_dtype)
+        else:
+            out = relay.clip(out,
+                             a_min=tvm.api.min_value(in_dtype).value,
+                             a_max=tvm.api.max_value(in_dtype).value)
+            out = relay.cast(out, dtype=in_dtype)
+        return out
+
+    # Since the input qnn params can be different than output qnn params, we first requantize the
+    # input tensors to the output qnn params. Then we call relay.add on the requantized inputs. This
+    # addition results in extra addition of the output zero point. We futher subtract the zero
+    # point. The whole process can be represented using following equations
+    #
+    #          scale_c * (Q_c - zp_c) = scale_a * (Q_a - zp_a) + scale_b * (Q_b - zp_b)
+    #
+    # After requantizing Q_a and Q_b, equation becomes,
+    #          scale_c * (Q_c - zp_c) = scale_c * (Q_a' - zp_c) + scale_c * (Q_b' - zp_c)
+    #          scale_c * (Q_c - zp_c) = scale_c * (Q_a' + Q_b' - zp_c - zp_c)
+    #
+    # Comparing the LHS and RHS, it results in
+    #          Q_c = Q_a' + Q_b' - zp_c
+    # The add op is done in int16 precision.
+
+    requantized_lhs = lhs
+    if lhs_scale != output_scale or lhs_zero_point != output_zero_point:
+        requantized_lhs = requantize(data=lhs,
+                                     input_scale=lhs_scale,
+                                     input_zero_point=lhs_zero_point,
+                                     output_scale=output_scale,
+                                     output_zero_point=output_zero_point,
+                                     out_dtype=in_dtype)
+
+    requantized_rhs = rhs
+    if rhs_scale != output_scale or rhs_zero_point != output_zero_point:
+        requantized_rhs = requantize(data=rhs,
+                                     input_scale=rhs_scale,
+                                     input_zero_point=rhs_zero_point,
+                                     output_scale=output_scale,
+                                     output_zero_point=output_zero_point,
+                                     out_dtype=in_dtype)
+
+    requantized_lhs = relay.cast(requantized_lhs, dtype='int16')
+    requantized_rhs = relay.cast(requantized_rhs, dtype='int16')
+    out = relay.add(requantized_lhs, requantized_rhs)
+    out = relay.subtract(out, relay.const(output_zero_point, dtype='int16'))
+    # Go back to in_dtype. Clip to min and max, followed by a cast.
+    out = relay.clip(out,
+                     a_min=tvm.api.min_value(in_dtype).value,
+                     a_max=tvm.api.max_value(in_dtype).value)
+    out = relay.cast(out, dtype=in_dtype)
+    return out
diff --git a/tests/python/relay/test_qnn_add.py b/tests/python/relay/test_qnn_add.py
@@ -0,0 +1,196 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import tvm
+import numpy as np
+from tvm import relay
+from tvm.contrib import graph_runtime
+import topi.testing
+
+def test_tflite_same_io_qnn_params():
+    data_dtype = 'uint8'
+
+    x = relay.var("x", shape=(1, 4), dtype=data_dtype)
+    y = relay.var("y", shape=(1, 4), dtype=data_dtype)
+    z = relay.qnn.op.add(lhs=x, rhs=y,
+                         lhs_scale=0.00784314,
+                         lhs_zero_point=127,
+                         rhs_scale=0.00784314,
+                         rhs_zero_point=127,
+                         output_scale=0.00784314,
+                         output_zero_point=127)
+
+    func = relay.Function([x, y], z)
+    mod = relay.Module.from_expr(func)
+    mod = relay.transform.Legalize()(mod)
+    func = mod["main"]
+
+    x_datas = [np.array((140, 153, 165, 178)).reshape((1,4)),
+               np.array((25, 153, 178, 216)).reshape((1,4)),
+               np.array((25, 153, 216, 165)).reshape((1,4))]
+    y_datas = [np.array((204, 178, 165, 140)).reshape((1,4)),
+               np.array((204, 178, 191, 25)).reshape((1,4)),
+               np.array((204, 178, 25, 191)).reshape((1,4))]
+    golden_outputs = [np.array((217,204,203,191)).reshape((1, 4)),
+                      np.array((102, 204, 242, 114)).reshape((1,4)),
+                      np.array((102, 204, 114, 229)).reshape((1,4))]
+
+    for i in range(0, 3):
+        x_data = x_datas[i]
+        y_data = y_datas[i]
+        golden_output = golden_outputs[i]
+
+        intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
+        op_res = intrp.evaluate(func)(x_data, y_data)
+        np.testing.assert_equal(op_res.asnumpy(), golden_output)
+
+
+def test_tflite_different_io_qnn_params():
+    data_dtype = 'uint8'
+
+    x = relay.var("x", shape=(1, 4), dtype=data_dtype)
+    y = relay.var("y", shape=(1, 4), dtype=data_dtype)
+    z = relay.qnn.op.add(lhs=x, rhs=y,
+                         lhs_scale=0.0156863,
+                         lhs_zero_point=127,
+                         rhs_scale=0.0117647,
+                         rhs_zero_point=85,
+                         output_scale=0.0235294,
+                         output_zero_point=128)
+
+    func = relay.Function([x, y], z)
+    mod = relay.Module.from_expr(func)
+    mod = relay.transform.Legalize()(mod)
+    func = mod["main"]
+
+    x_datas = [np.array((76, 140, 153, 172)).reshape((1,4)),
+               np.array((133, 140, 146, 153)).reshape((1,4)),
+               np.array((76, 140, 172, 146)).reshape((1,4))]
+    y_datas = [np.array((136, 119, 128, 17)).reshape((1,4)),
+               np.array((136, 119, 111, 94)).reshape((1,4)),
+               np.array((136, 119, 17, 128)).reshape((1,4))]
+    golden_outputs = [np.array((120, 154, 167, 124)).reshape((1, 4)),
+                      np.array((158, 154, 154, 150)).reshape((1,4)),
+                      np.array((120, 154, 124, 163)).reshape((1,4))]
+
+    for i in range(0, 3):
+        x_data = x_datas[i]
+        y_data = y_datas[i]
+        golden_output = golden_outputs[i]
+
+        intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
+        op_res = intrp.evaluate(func)(x_data, y_data)
+        np.testing.assert_equal(op_res.asnumpy(), golden_output)
+
+
+def test_saturation():
+    # Same params
+    data_dtype = 'uint8'
+    x = relay.var("x", shape=(1, 4), dtype=data_dtype)
+    y = relay.var("y", shape=(1, 4), dtype=data_dtype)
+    z = relay.qnn.op.add(lhs=x, rhs=y,
+                         lhs_scale=0.125,
+                         lhs_zero_point=0,
+                         rhs_scale=0.125,
+                         rhs_zero_point=0,
+                         output_scale=0.125,
+                         output_zero_point=0)
+
+    func = relay.Function([x, y], z)
+    mod = relay.Module.from_expr(func)
+    mod = relay.transform.Legalize()(mod)
+    func = mod["main"]
+
+    x_data = np.array((255, 1, 1, 0)).reshape((1,4))
+    y_data = np.array((255, 255, 128, 0)).reshape((1,4))
+    golden_output = np.array((255, 255, 129, 0)).reshape((1, 4))
+
+    intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
+    op_res = intrp.evaluate(func)(x_data, y_data)
+    np.testing.assert_equal(op_res.asnumpy(), golden_output)
+
+    # Same params, different scale
+    z = relay.qnn.op.add(lhs=x, rhs=y,
+                         lhs_scale=0.125,
+                         lhs_zero_point=0,
+                         rhs_scale=0.125,
+                         rhs_zero_point=0,
+                         output_scale=0.25,
+                         output_zero_point=0)
+
+    func = relay.Function([x, y], z)
+    mod = relay.Module.from_expr(func)
+    mod = relay.transform.Legalize()(mod)
+    func = mod["main"]
+
+    x_data = np.array((255, 1, 1, 0)).reshape((1,4))
+    y_data = np.array((255, 255, 127, 0)).reshape((1,4))
+    golden_output = np.array((255, 128, 64, 0)).reshape((1, 4))
+
+    intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
+    op_res = intrp.evaluate(func)(x_data, y_data)
+    np.testing.assert_equal(op_res.asnumpy(), golden_output)
+
+    # Same io params, different output scale
+    z = relay.qnn.op.add(lhs=x, rhs=y,
+                         lhs_scale=0.125,
+                         lhs_zero_point=0,
+                         rhs_scale=0.125,
+                         rhs_zero_point=0,
+                         output_scale=0.25,
+                         output_zero_point=0)
+
+    func = relay.Function([x, y], z)
+    mod = relay.Module.from_expr(func)
+    mod = relay.transform.Legalize()(mod)
+    func = mod["main"]
+
+    x_data = np.array((255, 1, 1, 0)).reshape((1,4))
+    y_data = np.array((255, 255, 127, 0)).reshape((1,4))
+    golden_output = np.array((255, 128, 64, 0)).reshape((1, 4))
+
+    intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
+    op_res = intrp.evaluate(func)(x_data, y_data)
+    np.testing.assert_equal(op_res.asnumpy(), golden_output)
+
+    # All params different
+    z = relay.qnn.op.add(lhs=x, rhs=y,
+                         lhs_scale=0.5,
+                         lhs_zero_point=0,
+                         rhs_scale=0.25,
+                         rhs_zero_point=0,
+                         output_scale=0.125,
+                         output_zero_point=0)
+
+    func = relay.Function([x, y], z)
+    mod = relay.Module.from_expr(func)
+    mod = relay.transform.Legalize()(mod)
+    func = mod["main"]
+
+    x_data = np.array((255, 0, 1, 0)).reshape((1,4))
+    y_data = np.array((0, 128, 64, 0)).reshape((1,4))
+    golden_output = np.array((255, 255, 132, 0)).reshape((1, 4))
+
+    intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
+    op_res = intrp.evaluate(func)(x_data, y_data)
+    np.testing.assert_equal(op_res.asnumpy(), golden_output)
+
+
+if __name__ == '__main__':
+    test_tflite_same_io_qnn_params()
+    test_tflite_different_io_qnn_params()
+    test_saturation()