Skip to content

Commit

Permalink
[QNN] Add operator.
Browse files Browse the repository at this point in the history
  • Loading branch information
anijain2305 committed Aug 12, 2019
1 parent 3ac27fc commit 64311a1
Show file tree
Hide file tree
Showing 2 changed files with 315 additions and 0 deletions.
119 changes: 119 additions & 0 deletions python/tvm/relay/qnn/op/qnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
"""QNN dialect operators."""

from __future__ import absolute_import as _abs
import tvm
from tvm import relay
from . import _make

def requantize(data,
Expand Down Expand Up @@ -72,3 +74,120 @@ def requantize(data,
output_zero_point,
rounding,
out_dtype)


def add(lhs, rhs, lhs_scale, lhs_zero_point, rhs_scale, rhs_zero_point, output_scale,
output_zero_point):
"""Quantized addition with numpy-style broadcasting.
Parameters
----------
lhs : relay.Expr
The left hand side quantized input data.
rhs : relay.Expr
The right hand side quantized input data.
lhs_scale: float
The scale of the lhs quantized expr.
lhs_zero_point: int
The zero point of lhs quantized expr.
rhs_scale: float
The scale of the rhs quantized expr.
rhs_zero_point: int
The zero point of rhs quantized expr.
output_scale: float
The scale of the output quantized expr.
output_zero_point: int
The zero point of output quantized expr.
Returns
-------
result : relay.Expr
The computed result.
"""

# Find the dtype of the input expr. This is required for the requantize op. Since, this is
# add op, the dtype of the input is same as dtype of the output.
data0 = relay.transform.infer_type(lhs)
in_dtype = data0.checked_type.dtype
assert in_dtype in ('int8', 'uint8')

# First, check if the qnn params of lhs and rhs match. If yes, we can avoid one requantize by
# calling add first and then requantize. The whole process can be represented as follows
#
# scale_c * (Q_c - zp_c) = scale * (Q_a - zp) + scale * (Q_b - zp)
# scale_c * (Q_c - zp_c) = scale * (Q_a + Q_b - zp - zp)
#
# RHS looks like a quantized tensor with scale = scale, and zero_point = zp + zp
# This can be handled by first subtracting the zero point, followed by requantize with the
# output qnn params. The add op is done in int16 precision.

if lhs_scale == rhs_scale and lhs_zero_point == rhs_zero_point:
lhs = relay.cast(lhs, dtype='int16')
rhs = relay.cast(rhs, dtype='int16')
out = relay.add(lhs, rhs)
out = relay.subtract(out, relay.const(lhs_zero_point, dtype='int16'))
if lhs_scale != output_scale or lhs_zero_point != output_zero_point:
out = requantize(data=out,
input_scale=lhs_scale,
input_zero_point=lhs_zero_point,
output_scale=output_scale,
output_zero_point=output_zero_point,
out_dtype=in_dtype)
else:
out = relay.clip(out,
a_min=tvm.api.min_value(in_dtype).value,
a_max=tvm.api.max_value(in_dtype).value)
out = relay.cast(out, dtype=in_dtype)
return out

# Since the input qnn params can be different than output qnn params, we first requantize the
# input tensors to the output qnn params. Then we call relay.add on the requantized inputs. This
# addition results in extra addition of the output zero point. We futher subtract the zero
# point. The whole process can be represented using following equations
#
# scale_c * (Q_c - zp_c) = scale_a * (Q_a - zp_a) + scale_b * (Q_b - zp_b)
#
# After requantizing Q_a and Q_b, equation becomes,
# scale_c * (Q_c - zp_c) = scale_c * (Q_a' - zp_c) + scale_c * (Q_b' - zp_c)
# scale_c * (Q_c - zp_c) = scale_c * (Q_a' + Q_b' - zp_c - zp_c)
#
# Comparing the LHS and RHS, it results in
# Q_c = Q_a' + Q_b' - zp_c
# The add op is done in int16 precision.

requantized_lhs = lhs
if lhs_scale != output_scale or lhs_zero_point != output_zero_point:
requantized_lhs = requantize(data=lhs,
input_scale=lhs_scale,
input_zero_point=lhs_zero_point,
output_scale=output_scale,
output_zero_point=output_zero_point,
out_dtype=in_dtype)

requantized_rhs = rhs
if rhs_scale != output_scale or rhs_zero_point != output_zero_point:
requantized_rhs = requantize(data=rhs,
input_scale=rhs_scale,
input_zero_point=rhs_zero_point,
output_scale=output_scale,
output_zero_point=output_zero_point,
out_dtype=in_dtype)

requantized_lhs = relay.cast(requantized_lhs, dtype='int16')
requantized_rhs = relay.cast(requantized_rhs, dtype='int16')
out = relay.add(requantized_lhs, requantized_rhs)
out = relay.subtract(out, relay.const(output_zero_point, dtype='int16'))
# Go back to in_dtype. Clip to min and max, followed by a cast.
out = relay.clip(out,
a_min=tvm.api.min_value(in_dtype).value,
a_max=tvm.api.max_value(in_dtype).value)
out = relay.cast(out, dtype=in_dtype)
return out
196 changes: 196 additions & 0 deletions tests/python/relay/test_qnn_add.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import tvm
import numpy as np
from tvm import relay
from tvm.contrib import graph_runtime
import topi.testing

def test_tflite_same_io_qnn_params():
data_dtype = 'uint8'

x = relay.var("x", shape=(1, 4), dtype=data_dtype)
y = relay.var("y", shape=(1, 4), dtype=data_dtype)
z = relay.qnn.op.add(lhs=x, rhs=y,
lhs_scale=0.00784314,
lhs_zero_point=127,
rhs_scale=0.00784314,
rhs_zero_point=127,
output_scale=0.00784314,
output_zero_point=127)

func = relay.Function([x, y], z)
mod = relay.Module.from_expr(func)
mod = relay.transform.Legalize()(mod)
func = mod["main"]

x_datas = [np.array((140, 153, 165, 178)).reshape((1,4)),
np.array((25, 153, 178, 216)).reshape((1,4)),
np.array((25, 153, 216, 165)).reshape((1,4))]
y_datas = [np.array((204, 178, 165, 140)).reshape((1,4)),
np.array((204, 178, 191, 25)).reshape((1,4)),
np.array((204, 178, 25, 191)).reshape((1,4))]
golden_outputs = [np.array((217,204,203,191)).reshape((1, 4)),
np.array((102, 204, 242, 114)).reshape((1,4)),
np.array((102, 204, 114, 229)).reshape((1,4))]

for i in range(0, 3):
x_data = x_datas[i]
y_data = y_datas[i]
golden_output = golden_outputs[i]

intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
op_res = intrp.evaluate(func)(x_data, y_data)
np.testing.assert_equal(op_res.asnumpy(), golden_output)


def test_tflite_different_io_qnn_params():
data_dtype = 'uint8'

x = relay.var("x", shape=(1, 4), dtype=data_dtype)
y = relay.var("y", shape=(1, 4), dtype=data_dtype)
z = relay.qnn.op.add(lhs=x, rhs=y,
lhs_scale=0.0156863,
lhs_zero_point=127,
rhs_scale=0.0117647,
rhs_zero_point=85,
output_scale=0.0235294,
output_zero_point=128)

func = relay.Function([x, y], z)
mod = relay.Module.from_expr(func)
mod = relay.transform.Legalize()(mod)
func = mod["main"]

x_datas = [np.array((76, 140, 153, 172)).reshape((1,4)),
np.array((133, 140, 146, 153)).reshape((1,4)),
np.array((76, 140, 172, 146)).reshape((1,4))]
y_datas = [np.array((136, 119, 128, 17)).reshape((1,4)),
np.array((136, 119, 111, 94)).reshape((1,4)),
np.array((136, 119, 17, 128)).reshape((1,4))]
golden_outputs = [np.array((120, 154, 167, 124)).reshape((1, 4)),
np.array((158, 154, 154, 150)).reshape((1,4)),
np.array((120, 154, 124, 163)).reshape((1,4))]

for i in range(0, 3):
x_data = x_datas[i]
y_data = y_datas[i]
golden_output = golden_outputs[i]

intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
op_res = intrp.evaluate(func)(x_data, y_data)
np.testing.assert_equal(op_res.asnumpy(), golden_output)


def test_saturation():
# Same params
data_dtype = 'uint8'
x = relay.var("x", shape=(1, 4), dtype=data_dtype)
y = relay.var("y", shape=(1, 4), dtype=data_dtype)
z = relay.qnn.op.add(lhs=x, rhs=y,
lhs_scale=0.125,
lhs_zero_point=0,
rhs_scale=0.125,
rhs_zero_point=0,
output_scale=0.125,
output_zero_point=0)

func = relay.Function([x, y], z)
mod = relay.Module.from_expr(func)
mod = relay.transform.Legalize()(mod)
func = mod["main"]

x_data = np.array((255, 1, 1, 0)).reshape((1,4))
y_data = np.array((255, 255, 128, 0)).reshape((1,4))
golden_output = np.array((255, 255, 129, 0)).reshape((1, 4))

intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
op_res = intrp.evaluate(func)(x_data, y_data)
np.testing.assert_equal(op_res.asnumpy(), golden_output)

# Same params, different scale
z = relay.qnn.op.add(lhs=x, rhs=y,
lhs_scale=0.125,
lhs_zero_point=0,
rhs_scale=0.125,
rhs_zero_point=0,
output_scale=0.25,
output_zero_point=0)

func = relay.Function([x, y], z)
mod = relay.Module.from_expr(func)
mod = relay.transform.Legalize()(mod)
func = mod["main"]

x_data = np.array((255, 1, 1, 0)).reshape((1,4))
y_data = np.array((255, 255, 127, 0)).reshape((1,4))
golden_output = np.array((255, 128, 64, 0)).reshape((1, 4))

intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
op_res = intrp.evaluate(func)(x_data, y_data)
np.testing.assert_equal(op_res.asnumpy(), golden_output)

# Same io params, different output scale
z = relay.qnn.op.add(lhs=x, rhs=y,
lhs_scale=0.125,
lhs_zero_point=0,
rhs_scale=0.125,
rhs_zero_point=0,
output_scale=0.25,
output_zero_point=0)

func = relay.Function([x, y], z)
mod = relay.Module.from_expr(func)
mod = relay.transform.Legalize()(mod)
func = mod["main"]

x_data = np.array((255, 1, 1, 0)).reshape((1,4))
y_data = np.array((255, 255, 127, 0)).reshape((1,4))
golden_output = np.array((255, 128, 64, 0)).reshape((1, 4))

intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
op_res = intrp.evaluate(func)(x_data, y_data)
np.testing.assert_equal(op_res.asnumpy(), golden_output)

# All params different
z = relay.qnn.op.add(lhs=x, rhs=y,
lhs_scale=0.5,
lhs_zero_point=0,
rhs_scale=0.25,
rhs_zero_point=0,
output_scale=0.125,
output_zero_point=0)

func = relay.Function([x, y], z)
mod = relay.Module.from_expr(func)
mod = relay.transform.Legalize()(mod)
func = mod["main"]

x_data = np.array((255, 0, 1, 0)).reshape((1,4))
y_data = np.array((0, 128, 64, 0)).reshape((1,4))
golden_output = np.array((255, 255, 132, 0)).reshape((1, 4))

intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
op_res = intrp.evaluate(func)(x_data, y_data)
np.testing.assert_equal(op_res.asnumpy(), golden_output)


if __name__ == '__main__':
test_tflite_same_io_qnn_params()
test_tflite_different_io_qnn_params()
test_saturation()

0 comments on commit 64311a1

Please sign in to comment.