From 41c82e46afb44d897ae416448770cc2f6206bb62 Mon Sep 17 00:00:00 2001
From: Animesh Jain <anijain@umich.edu>
Date: Thu, 22 Aug 2019 04:56:31 +0000
Subject: [PATCH] [QNN] Concat - Refactoring to C++

---
 include/tvm/relay/qnn/attrs.h              |  28 +++++
 python/tvm/relay/qnn/op/qnn.py             |  55 +++-----
 src/relay/op/tensor/transform.cc           |  83 +-----------
 src/relay/op/tensor/transform.h            | 122 ++++++++++++++++++
 src/relay/pass/pattern_util.h              |   7 ++
 src/relay/qnn/op/concatenate.cc            | 140 +++++++++++++++++++++
 src/relay/qnn/util.h                       |   3 +
 tests/python/relay/test_qnn_concatenate.py |   4 -
 8 files changed, 317 insertions(+), 125 deletions(-)
 create mode 100644 src/relay/op/tensor/transform.h
 create mode 100644 src/relay/qnn/op/concatenate.cc
diff --git a/include/tvm/relay/qnn/attrs.h b/include/tvm/relay/qnn/attrs.h
index 1ebdeaa7d708a..358b493f12e1c 100644
--- a/include/tvm/relay/qnn/attrs.h
+++ b/include/tvm/relay/qnn/attrs.h
@@ -97,6 +97,34 @@ struct DequantizeAttrs : public tvm::AttrsNode<DequantizeAttrs> {
   }
 };
 
+/*! \brief Attributes used in QNN concatenate operators */
+struct QnnConcatenateAttrs : public tvm::AttrsNode<QnnConcatenateAttrs> {
+  Array<tvm::Expr> input_scales;
+  Array<tvm::Expr> input_zero_points;
+  double output_scale;
+  int32_t output_zero_point;
+  int axis;
+
+  TVM_DECLARE_ATTRS(QnnConcatenateAttrs, "relay.attrs.QnnConcatenateAttrs") {
+    TVM_ATTR_FIELD(input_scales)
+        .describe("The list of scales of input quantized tensors.");
+
+    TVM_ATTR_FIELD(input_zero_points)
+        .describe("The list of zero points of input quantized tensors.");
+
+    TVM_ATTR_FIELD(output_zero_point)
+      .describe("The zero_point for the output tensor.");
+
+    TVM_ATTR_FIELD(output_scale)
+      .describe("The scale for the output tensor.");
+
+    TVM_ATTR_FIELD(axis)
+        .describe("The axis at which the input arrays are concatenated."
+                  "Should lie in range `[-ndim, ndim)`.")
+        .set_default(0);
+  }
+};  // struct QnnConcatenateAttrs
+
 }  // namespace qnn
 }  // namespace relay
 }  // namespace tvm
diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
index b153cd58876a7..7eb0408785dcc 100644
--- a/python/tvm/relay/qnn/op/qnn.py
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -18,7 +18,8 @@
 """QNN dialect operators."""
 
 from __future__ import absolute_import as _abs
-from tvm import relay
+from tvm.expr import FloatImm, IntImm
+from tvm.relay.expr import Tuple
 from . import _make
 
 def requantize(data,
@@ -134,6 +135,8 @@ def dequantize(data,
     return _make.dequantize(data,
                             input_scale,
                             input_zero_point)
+
+
 def concatenate(data,
                 input_scales,
                 input_zero_points,
@@ -169,42 +172,14 @@ def concatenate(data,
     """
 
     data = list(data)
-    requantized_exprs = list(data)
-
-    # Find the dtype of the input expr. This is required for the requantize op. Since, this is
-    # concatenate op, the dtype of the input is same as dtype of the output.
-    mod = relay.Module.from_expr(data[0])
-    mod = relay.transform.InferType()(mod)
-    entry = mod["main"]
-    data0 = entry if isinstance(data[0], relay.Function) else entry.body
-    in_dtype = data0.checked_type.dtype
-
-    # First check if all the input qnn params match. If yes, we can call concatenate first, followed
-    # by a requantize.
-    if all(scale == input_scales[0] for scale in input_scales)\
-            and all(zero_point == input_zero_points[0] for zero_point in input_zero_points):
-        out = relay.concatenate(tuple(data), axis)
-        input_scale = input_scales[0]
-        input_zero_point = input_zero_points[0]
-        if input_scale != output_scale or input_zero_point != output_zero_point:
-            out = requantize(data=out,
-                             input_scale=input_scales[0],
-                             input_zero_point=input_zero_points[0],
-                             output_scale=output_scale,
-                             output_zero_point=output_zero_point,
-                             out_dtype=in_dtype)
-        return out
-
-    # If the output qnn params do not match the input qnn params, we can call requantize on the
-    # input expr first, followed by a concatenate on the requantized input exprs.
-    for idx, quantized_expr in enumerate(data):
-        input_scale = input_scales[idx]
-        input_zero_point = input_zero_points[idx]
-        if input_scale != output_scale or input_zero_point != output_zero_point:
-            requantized_exprs[idx] = requantize(data=quantized_expr,
-                                                input_scale=input_scale,
-                                                input_zero_point=input_zero_point,
-                                                output_scale=output_scale,
-                                                output_zero_point=output_zero_point,
-                                                out_dtype=in_dtype)
-    return relay.concatenate(tuple(requantized_exprs), axis)
+    if not data:
+        raise ValueError("relay.concatenate requires data to be non-empty.")
+    if not isinstance(axis, int):
+        raise ValueError("For now, we only support integer axis")
+
+    return _make.concatenate(Tuple(data),
+                             [FloatImm("float64", x) for x in input_scales],
+                             [IntImm("int32", x) for x in input_zero_points],
+                             output_scale,
+                             output_zero_point,
+                             axis)
diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc
index 03a92b35d3969..e2b812d8f75bc 100644
--- a/src/relay/op/tensor/transform.cc
+++ b/src/relay/op/tensor/transform.cc
@@ -37,6 +37,7 @@
 #include "../op_common.h"
 #include "../../../arithmetic/compute_expr.h"
 #include "../../pass/alter_op_layout.h"
+#include "transform.h"
 
 namespace tvm {
 namespace relay {
@@ -210,86 +211,6 @@ RELAY_REGISTER_OP("expand_dims")
 // relay.concatenate
 TVM_REGISTER_NODE_TYPE(ConcatenateAttrs);
 
-bool ConcatenateRel(const Array<Type>& types,
-                    int num_inputs,
-                    const Attrs& attrs,
-                    const TypeReporter& reporter) {
-  // types: [data, result]
-  CHECK_EQ(types.size(), 2);
-  /* If we receive a tuple we can continue, if we receive
-   * anything but an incomplete type we should signal an
-   * error.
-  */
-  const auto* tensor_tuple = types[0].as<TupleTypeNode>();
-  if (tensor_tuple == nullptr) {
-    throw relay::Error(
-        RELAY_ERROR(
-          "concatenate requires a tuple of tensors as the first argument, found "
-        << PrettyPrint(types[0])));
-  } else if (types[0].as<IncompleteTypeNode>() != nullptr) {
-    return false;
-  }
-
-  const auto* param = attrs.as<ConcatenateAttrs>();
-  if (tensor_tuple->fields[0].as<IncompleteTypeNode>()) {
-    return false;
-  }
-  const auto& first = Downcast<TensorType>(tensor_tuple->fields[0]);
-  // Sanity check: ndim and dtype.
-  const int ndim = static_cast<int>(first->shape.size());
-  const DataType dtype = first->dtype;
-
-  for (const Type& ele : tensor_tuple->fields) {
-    if (ele.as<IncompleteTypeNode>()) {
-      return false;
-    }
-
-    const auto& e = Downcast<TensorType>(ele);
-
-    int e_ndim = static_cast<int>(e->shape.size());
-    const DataType& e_dtype = e->dtype;
-    if (e_ndim != ndim) {
-      throw relay::Error("relay.concatenate requires all tensors have the same ndim");
-    }
-    if (e_dtype != dtype) {
-      throw relay::Error("relay.concatenate requires all tensors have the same dtype");
-    }
-  }
-  // Sanity check: axis
-  int axis = param->axis;
-  if (!(-ndim <= axis && axis < ndim)) {
-    throw relay::Error(RELAY_ERROR(
-      "concatenate only accepts `axis` in [-ndim, ndim)" <<
-      ", but got axis = " << axis <<
-      ", and ndim = " << ndim));
-  }
-  axis = axis < 0 ? ndim + axis : axis;
-  // Calculate shape
-  std::vector<IndexExpr> oshape(first->shape.begin(), first->shape.end());
-  IndexExpr &concat_dim = oshape[axis];
-  bool has_any = false;
-  if (concat_dim.as<Any>()) {
-    has_any = true;
-  } else {
-    for (int i = 1; i < static_cast<int>(tensor_tuple->fields.size()); ++i) {
-      const auto& e = Downcast<TensorType>(tensor_tuple->fields[i]);
-      if (e->shape[axis].as<Any>()) {
-        has_any = true;
-        break;
-      }
-      concat_dim += e->shape[axis];
-    }
-  }
-
-  if (has_any) {
-    concat_dim = Any::make();
-  }
-
-  auto rtype = TensorTypeNode::make(oshape, dtype);
-  reporter->Assign(types[1], rtype);
-  return true;
-}
-
 Array<Tensor> ConcatenateCompute(const Attrs& attrs,
                           const Array<Tensor>& inputs,
                           const Type& out_type,
@@ -358,7 +279,7 @@ RELAY_REGISTER_OP("concatenate")
 .set_num_inputs(1)
 .add_argument("data", "Tensor", "The input list of tensors.")
 .set_support_level(1)
-.add_type_rel("Concatenate", ConcatenateRel)
+.add_type_rel("Concatenate", ConcatenateRel<ConcatenateAttrs>)
 .set_attr<FInferCorrectLayout>("FInferCorrectLayout", ConcatenateLayout)
 .set_attr<FTVMCompute>("FTVMCompute", ConcatenateCompute)
 .set_attr<TOpPattern>("TOpPattern", kInjective);
diff --git a/src/relay/op/tensor/transform.h b/src/relay/op/tensor/transform.h
new file mode 100644
index 0000000000000..64ff56f8032b0
--- /dev/null
+++ b/src/relay/op/tensor/transform.h
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file src/relay/op/tensor/transform.h
+ * \brief Tranform op attributes that can be shared among Relay and its dialects.
+ */
+#ifndef TVM_RELAY_OP_TENSOR_TRANSFORM_H_
+#define TVM_RELAY_OP_TENSOR_TRANSFORM_H_
+
+#include <algorithm>
+#include <limits>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+namespace tvm {
+namespace relay {
+
+template <typename AttrType>
+bool ConcatenateRel(const Array<Type>& types,
+                    int num_inputs,
+                    const Attrs& attrs,
+                    const TypeReporter& reporter) {
+  // types: [data, result]
+  CHECK_EQ(types.size(), 2);
+  /* If we receive a tuple we can continue, if we receive
+   * anything but an incomplete type we should signal an
+   * error.
+  */
+  const auto* tensor_tuple = types[0].as<TupleTypeNode>();
+  if (tensor_tuple == nullptr) {
+    throw relay::Error(
+        RELAY_ERROR(
+          "concatenate requires a tuple of tensors as the first argument, found "
+        << PrettyPrint(types[0])));
+  } else if (types[0].as<IncompleteTypeNode>() != nullptr) {
+    return false;
+  }
+
+  const auto* param = attrs.as<AttrType>();
+  CHECK(param != nullptr);
+  if (tensor_tuple->fields[0].as<IncompleteTypeNode>()) {
+    return false;
+  }
+  const auto& first = Downcast<TensorType>(tensor_tuple->fields[0]);
+  // Sanity check: ndim and dtype.
+  const int ndim = static_cast<int>(first->shape.size());
+  const DataType dtype = first->dtype;
+
+  for (const Type& ele : tensor_tuple->fields) {
+    if (ele.as<IncompleteTypeNode>()) {
+      return false;
+    }
+
+    const auto& e = Downcast<TensorType>(ele);
+
+    int e_ndim = static_cast<int>(e->shape.size());
+    const DataType& e_dtype = e->dtype;
+    if (e_ndim != ndim) {
+      throw relay::Error("relay.concatenate requires all tensors have the same ndim");
+    }
+    if (e_dtype != dtype) {
+      throw relay::Error("relay.concatenate requires all tensors have the same dtype");
+    }
+  }
+  // Sanity check: axis
+  int axis = param->axis;
+  if (!(-ndim <= axis && axis < ndim)) {
+    throw relay::Error(RELAY_ERROR(
+      "concatenate only accepts `axis` in [-ndim, ndim)" <<
+      ", but got axis = " << axis <<
+      ", and ndim = " << ndim));
+  }
+  axis = axis < 0 ? ndim + axis : axis;
+  // Calculate shape
+  std::vector<IndexExpr> oshape(first->shape.begin(), first->shape.end());
+  IndexExpr &concat_dim = oshape[axis];
+  bool has_any = false;
+  if (concat_dim.as<Any>()) {
+    has_any = true;
+  } else {
+    for (int i = 1; i < static_cast<int>(tensor_tuple->fields.size()); ++i) {
+      const auto& e = Downcast<TensorType>(tensor_tuple->fields[i]);
+      if (e->shape[axis].as<Any>()) {
+        has_any = true;
+        break;
+      }
+      concat_dim += e->shape[axis];
+    }
+  }
+
+  if (has_any) {
+    concat_dim = Any::make();
+  }
+
+  auto rtype = TensorTypeNode::make(oshape, dtype);
+  reporter->Assign(types[1], rtype);
+  return true;
+}
+
+}  // namespace relay
+}  // namespace tvm
+#endif  // TVM_RELAY_OP_TENSOR_TRANSFORM_H_
diff --git a/src/relay/pass/pattern_util.h b/src/relay/pass/pattern_util.h
index 18e5df3e04dfd..069a34d3b15f5 100644
--- a/src/relay/pass/pattern_util.h
+++ b/src/relay/pass/pattern_util.h
@@ -415,6 +415,13 @@ static inline Expr Full(Expr fill_value,
   return CallNode::make(op, {fill_value}, Attrs(attrs), {});
 }
 
+static inline Expr Concatenate(Expr data, int axis) {
+  auto attrs = make_node<ConcatenateAttrs>();
+  attrs->axis = axis;
+  static const Op& op = Op::Get("concatenate");
+  return CallNode::make(op, {data}, Attrs(attrs), {});
+}
+
 Expr MakeConcatenate(Expr data, int axis);
 
 Expr MakeStridedSlice(Expr data, Array<Integer> begin, Array<Integer> end, Array<Integer> strides);
diff --git a/src/relay/qnn/op/concatenate.cc b/src/relay/qnn/op/concatenate.cc
new file mode 100644
index 0000000000000..a40286af94c50
--- /dev/null
+++ b/src/relay/qnn/op/concatenate.cc
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file src/relay/qnn/op/concatenate.cc
+ * \brief QNN concatenate operator. It concatenates quantized input tensors along a given axis.
+ */
+
+#include <tvm/ir.h>
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/op_attr_types.h>
+#include <tvm/relay/qnn/attrs.h>
+#include "../../op/tensor/transform.h"
+#include "../../pass/pattern_util.h"
+#include "../util.h"
+
+namespace tvm {
+namespace relay {
+namespace qnn {
+
+TVM_REGISTER_NODE_TYPE(QnnConcatenateAttrs);
+
+Expr MakeQnnConcatenate(Expr data, Array<tvm::Expr> input_scales,
+                        Array<tvm::Expr> input_zero_points, double output_scale,
+                        int32_t output_zero_point, int axis) {
+  auto attrs = make_node<QnnConcatenateAttrs>();
+  attrs->input_scales = input_scales;
+  attrs->input_zero_points = input_zero_points;
+  attrs->output_scale = output_scale;
+  attrs->output_zero_point = output_zero_point;
+  attrs->axis = axis;
+  static const Op& op = Op::Get("qnn.concatenate");
+  return CallNode::make(op, {data}, Attrs(attrs), {});
+}
+
+/*
+ * \brief Legalizes the QNN concatenate op.
+ * \param ref_call The original call that will be lowered.
+ * \param new_args The new mutated args to the call node.
+ * \param ctx The node context.
+ * \return The sequence of Relay ops for concatenate op.
+ */
+Expr ConcatenateLegalize(const Attrs& attrs, const Array<Expr>& new_args,
+                         const Array<tvm::relay::Type>& arg_types) {
+  // Get the attrs.
+  CHECK_EQ(new_args.size(), 1);
+  auto& data = new_args[0];
+  const auto* concatenate_attrs = attrs.as<QnnConcatenateAttrs>();
+  CHECK(concatenate_attrs != nullptr);
+  auto input_scales = concatenate_attrs->input_scales;
+  auto input_zero_points = concatenate_attrs->input_zero_points;
+  auto output_scale = concatenate_attrs->output_scale;
+  auto output_zero_point = concatenate_attrs->output_zero_point;
+
+  // Get the input dtype and shape.
+  CHECK_EQ(arg_types.size(), 1);
+  auto tuple_type = arg_types[0].as<TupleTypeNode>();
+  CHECK(tuple_type != nullptr);
+  auto tensor_type = tuple_type->fields[0].as<TensorTypeNode>();
+  auto input_dtype = tensor_type->dtype;
+  auto input_shape = tensor_type->shape;
+
+  // FIXME (anijain2305) - The lowering can be further optimized. Instead of inserting requantize in
+  // the start, we can insert requantize at the end if and only if all the input tensors have same
+  // qnn params. This can be done in future.
+
+  // If the output qnn params do not match the input qnn params, we can call requantize on the input
+  // expr first, followed by a concatenate on the requantized input exprs.
+
+  auto tuple_data = data.as<TupleNode>();
+  CHECK(tuple_data != nullptr);
+
+  int idx = 0;
+  Array<Expr> requantized_exprs;
+  for (auto quantized_expr : tuple_data->fields) {
+    // Get the input scale for the idx quantized input tensor.
+    auto input_scale_expr = input_scales[idx].as<tvm::ir::FloatImm>();
+    CHECK(input_scale_expr != nullptr);
+    auto input_scale = input_scale_expr->value;
+
+    // Get the zero point for the idx quantized input tensor.
+    auto input_zero_point_expr = input_zero_points[idx].as<tvm::ir::IntImm>();
+    CHECK(input_zero_point_expr != nullptr);
+    auto input_zero_point = input_zero_point_expr->value;
+
+    // Check if output and input qnn params are same. If not, requantize.
+    if (input_scale != output_scale || input_zero_point != output_zero_point) {
+      // Create the requantize attrs for calling the requantize op.
+      auto requantize_attrs = make_node<RequantizeAttrs>();
+      requantize_attrs->input_scale = input_scale;
+      requantize_attrs->input_zero_point = input_zero_point;
+      requantize_attrs->output_scale = output_scale;
+      requantize_attrs->output_zero_point = output_zero_point;
+      requantize_attrs->rounding = "TONEAREST";
+      requantize_attrs->out_dtype = input_dtype;
+
+      auto requantized_expr =
+          RequantizeLower(quantized_expr, requantize_attrs.operator->(), input_shape);
+      requantized_exprs.push_back(requantized_expr);
+    } else {
+      requantized_exprs.push_back(quantized_expr);
+    }
+    idx++;
+  }
+  return Concatenate(TupleNode::make(requantized_exprs), concatenate_attrs->axis);
+}
+
+RELAY_REGISTER_OP("qnn.concatenate")
+.describe(R"code(Concatenate the quantized input tensors along the given axis.
+)code" TVM_ADD_FILELINE)
+.set_attrs_type_key("relay.attrs.QnnConcatenateAttrs")
+.set_num_inputs(1)
+.add_argument("data", "Tensor", "The tensor to concatenate.")
+.set_support_level(11)
+.add_type_rel("QnnConcatenate", ConcatenateRel<QnnConcatenateAttrs>)
+.set_attr<FTVMLegalize>("FTVMLegalize", ConcatenateLegalize);
+
+TVM_REGISTER_API("relay.qnn.op._make.concatenate")
+.set_body_typed(MakeQnnConcatenate);
+
+}  // namespace qnn
+}  // namespace relay
+}  // namespace tvm
diff --git a/src/relay/qnn/util.h b/src/relay/qnn/util.h
index 1ada7ecd070e8..b10707a88d36c 100644
--- a/src/relay/qnn/util.h
+++ b/src/relay/qnn/util.h
@@ -67,6 +67,9 @@ static inline const int32_t GetQmax(const DataType& dtype) {
   }
 }
 
+Expr RequantizeLower(const Expr& input_tensor, const RequantizeAttrs* param,
+                     const Array<IndexExpr>& input_shape);
+
 }  // namespace qnn
 }  // namespace relay
 }  // namespace tvm
diff --git a/tests/python/relay/test_qnn_concatenate.py b/tests/python/relay/test_qnn_concatenate.py
index b0745cf251c48..0a595435d81ab 100644
--- a/tests/python/relay/test_qnn_concatenate.py
+++ b/tests/python/relay/test_qnn_concatenate.py
@@ -39,7 +39,6 @@ def test_same_io_qnn_params():
                                  axis=axis)
 
     func = relay.Function([x, y], z)
-    assert func.astext().count('requantize') == 0
     mod = relay.Module.from_expr(func)
     mod = relay.transform.Legalize()(mod)
     func = mod["main"]
@@ -68,7 +67,6 @@ def test_different_io_qnn_params():
                                  axis=axis)
 
     func = relay.Function([x, y], z)
-    assert func.astext().count('requantize') == 2
     mod = relay.Module.from_expr(func)
     mod = relay.transform.Legalize()(mod)
     func = mod["main"]
@@ -97,7 +95,6 @@ def test_few_same_io_qnn_params():
                                  axis=axis)
 
     func = relay.Function([x, y], z)
-    assert func.astext().count('requantize') == 1
     mod = relay.Module.from_expr(func)
     mod = relay.transform.Legalize()(mod)
     func = mod["main"]
@@ -126,7 +123,6 @@ def test_same_i_qnn_params():
                                  axis=axis)
 
     func = relay.Function([x, y], z)
-    assert func.astext().count('requantize') == 1
     mod = relay.Module.from_expr(func)
     mod = relay.transform.Legalize()(mod)
     func = mod["main"]