Skip to content

Commit

Permalink
[ONNX] QLinearConvolution (openvinotoolkit#7210)
Browse files Browse the repository at this point in the history
  • Loading branch information
tsocha authored and akuporos committed Sep 6, 2021
1 parent d384efe commit 868b783
Show file tree
Hide file tree
Showing 21 changed files with 533 additions and 115 deletions.
6 changes: 6 additions & 0 deletions ngraph/core/include/ngraph/op/fake_quantize.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ class NGRAPH_API FakeQuantize : public ngraph::op::Op {
m_auto_broadcast = auto_broadcast;
}

bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
bool has_evaluate() const override;
bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override {
return false;
}

private:
std::size_t m_levels;
AutoBroadcastSpec m_auto_broadcast = op::AutoBroadcastType::NUMPY;
Expand Down
79 changes: 79 additions & 0 deletions ngraph/core/src/op/fake_quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/select.hpp"
#include "ngraph/runtime/reference/fake_quantize.hpp"
#include "ngraph/shape.hpp"
#include "ngraph/type/element_type.hpp"

using namespace std;
using namespace ngraph;
Expand Down Expand Up @@ -73,3 +75,80 @@ shared_ptr<Node> op::FakeQuantize::clone_with_new_inputs(const OutputVector& new
m_levels,
m_auto_broadcast);
}

namespace fakequantizeop {
template <element::Type_t ET>
bool evaluate(const HostTensorPtr& arg0,
const HostTensorPtr& arg1,
const HostTensorPtr& arg2,
const HostTensorPtr& arg3,
const HostTensorPtr& arg4,
const HostTensorPtr& out,
const ngraph::op::FakeQuantize* parent) {
NGRAPH_OP_SCOPE(v0_FakeQuantize_evaluate);
using T = typename element_type_traits<ET>::value_type;
runtime::reference::fake_quantize<T>(arg0->get_data_ptr<const T>(),
arg1->get_data_ptr<const T>(),
arg2->get_data_ptr<const T>(),
arg3->get_data_ptr<const T>(),
arg4->get_data_ptr<const T>(),
out->get_data_ptr<T>(),
parent->get_input_shape(0),
parent->get_input_shape(1),
parent->get_input_shape(2),
parent->get_input_shape(3),
parent->get_input_shape(4),
parent->get_levels(),
parent->get_auto_broadcast());
return true;
}

bool evaluate_fakequantize(const HostTensorPtr& arg0,
const HostTensorPtr& arg1,
const HostTensorPtr& arg2,
const HostTensorPtr& arg3,
const HostTensorPtr& arg4,
const HostTensorPtr& out,
const ngraph::op::FakeQuantize* parent) {
bool rc = true;
switch (arg0->get_element_type()) {
NGRAPH_TYPE_CASE(evaluate_fakequantize, i32, arg0, arg1, arg2, arg3, arg4, out, parent);
NGRAPH_TYPE_CASE(evaluate_fakequantize, i64, arg0, arg1, arg2, arg3, arg4, out, parent);
NGRAPH_TYPE_CASE(evaluate_fakequantize, u32, arg0, arg1, arg2, arg3, arg4, out, parent);
NGRAPH_TYPE_CASE(evaluate_fakequantize, u64, arg0, arg1, arg2, arg3, arg4, out, parent);
NGRAPH_TYPE_CASE(evaluate_fakequantize, f16, arg0, arg1, arg2, arg3, arg4, out, parent);
NGRAPH_TYPE_CASE(evaluate_fakequantize, f32, arg0, arg1, arg2, arg3, arg4, out, parent);
default:
rc = false;
break;
}
return rc;
}
} // namespace fakequantizeop

bool ngraph::op::FakeQuantize::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
NGRAPH_OP_SCOPE(v0_FakeQuantize_evaluate);
return fakequantizeop::evaluate_fakequantize(inputs[0],
inputs[1],
inputs[2],
inputs[3],
inputs[4],
outputs[0],
this);
}

bool ngraph::op::FakeQuantize::has_evaluate() const {
NGRAPH_OP_SCOPE(v0_FakeQuantize_has_evaluate);
switch (get_input_element_type(0)) {
case ngraph::element::i32:
case ngraph::element::i64:
case ngraph::element::u32:
case ngraph::element::u64:
case ngraph::element::f16:
case ngraph::element::f32:
return true;
default:
break;
}
return false;
}
21 changes: 12 additions & 9 deletions ngraph/frontend/onnx/frontend/src/op/conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@
#include "ngraph/builder/reshape.hpp"
#include "ngraph/op/group_conv.hpp"
#include "ngraph/op/util/attr_types.hpp"
#include "onnx_import/core/null_node.hpp"
#include "utils/convpool.hpp"
#include "utils/reshape.hpp"

namespace ngraph {
namespace onnx_import {
namespace op {
namespace set_1 {
namespace {
namespace detail {
std::shared_ptr<ngraph::op::Op> make_ng_convolution(const Output<ngraph::Node>& data,
const Output<ngraph::Node>& filters,
const ngraph::Strides& strides,
Expand Down Expand Up @@ -57,14 +58,13 @@ std::shared_ptr<ngraph::Node> add_bias(const Output<ngraph::Node>& ng_conv, cons
return {
std::make_shared<default_opset::Add>(ng_conv, reshape::reshape_channel_shaped_node_to_nchw(bias, conv_rank))};
}
} // namespace

OutputVector conv(const Node& node) {
OutputVector conv(const Node& node,
Output<ngraph::Node> data,
Output<ngraph::Node> filters,
Output<ngraph::Node> bias) {
// in the current implementation we assume that the data input rank is static
// and only the 'batch' dimension can be dynamic
const OutputVector& inputs = node.get_ng_inputs();
const auto data = inputs.at(0);
const auto filters = inputs.at(1);
const auto groups = node.get_attribute_value<int64_t>("group", 1);

NGRAPH_CHECK(data.get_partial_shape().rank().is_static(), "The input data tensor's rank has to be known (static)");
Expand All @@ -80,10 +80,9 @@ OutputVector conv(const Node& node) {
make_ng_convolution(data, filters, strides, dilations, padding_below, padding_above, groups, auto_pad_type);

// no bias param
if (inputs.size() < 3) {
if (ngraph::op::is_null(bias)) {
return {conv_node};
} else {
const auto& bias = inputs.at(2);
const auto& bias_ps = bias.get_partial_shape();

NGRAPH_CHECK(bias_ps.rank().is_static() && bias_ps.rank().get_length() == 1,
Expand All @@ -92,7 +91,11 @@ OutputVector conv(const Node& node) {
return {add_bias(conv_node, bias)};
}
}

} // namespace detail
OutputVector conv(const Node& node) {
const OutputVector& inputs = node.get_ng_inputs();
return detail::conv(node, inputs[0], inputs[1], inputs.size() < 3 ? std::make_shared<NullNode>() : inputs[2]);
}
} // namespace set_1

} // namespace op
Expand Down
3 changes: 3 additions & 0 deletions ngraph/frontend/onnx/frontend/src/op/conv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ namespace ngraph {
namespace onnx_import {
namespace op {
namespace set_1 {
namespace detail {
OutputVector conv(const Node& node, Output<ngraph::Node> data, Output<ngraph::Node> filters, Output<ngraph::Node> bias);
}
/// \brief Performs ONNX Conv operation.
///
/// \param node The ONNX node object representing this operation.
Expand Down
43 changes: 25 additions & 18 deletions ngraph/frontend/onnx/frontend/src/op/dequantize_linear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
namespace ngraph {
namespace onnx_import {
namespace op {
namespace {
namespace detail {
Output<ngraph::Node> get_zero_point(const OutputVector& inputs) {
if (inputs.size() == 3 && !ngraph::op::is_null(inputs[2])) {
auto zero_point = inputs[2];
Expand All @@ -33,7 +33,7 @@ Output<ngraph::Node> get_zero_point(const OutputVector& inputs) {
return default_opset::Constant::create(element::f32, Shape{}, {0});
}
}
} // namespace
} // namespace detail
namespace set_1 {
OutputVector dequantize_linear(const Node& node) {
const OutputVector inputs{node.get_ng_inputs()};
Expand All @@ -44,7 +44,7 @@ OutputVector dequantize_linear(const Node& node) {

const auto x = inputs[0];
const auto scale = inputs[1];
const auto zero_point = get_zero_point(inputs);
const auto zero_point = detail::get_zero_point(inputs);

common::validate_scalar_input("Dequantization scale", scale.get_node_shared_ptr(), {element::f32});
common::validate_scalar_input("Zero point", zero_point.get_node_shared_ptr());
Expand All @@ -58,7 +58,7 @@ OutputVector dequantize_linear(const Node& node) {
} // namespace set_1

namespace set_13 {
namespace {
namespace detail {
void validate_scale(const Output<ngraph::Node> scale, const Output<ngraph::Node> x, const int64_t axis) {
const auto& scale_shape = scale.get_partial_shape();
NGRAPH_CHECK(scale_shape.rank().get_length() == 0 || scale_shape.rank().get_length() == 1,
Expand Down Expand Up @@ -129,25 +129,16 @@ std::shared_ptr<ngraph::Node> reshape_input(const Output<ngraph::Node> input,

return std::make_shared<default_opset::Reshape>(input, target_shape, true);
}
} // namespace

OutputVector dequantize_linear(const Node& node) {
const OutputVector inputs{node.get_ng_inputs()};

NGRAPH_CHECK(2 <= inputs.size() && inputs.size() <= 3,
"The DequantizeLinear op expects 2 required and one optional "
"input. Got: ",
inputs.size());

const auto x = inputs[0];
auto scale = inputs[1];
auto zero_point = get_zero_point(inputs);

OutputVector dequantize_linear(Output<ngraph::Node> x,
Output<ngraph::Node> scale,
Output<ngraph::Node> zero_point,
int64_t axis,
Node node) {
const auto x_shape = x.get_partial_shape();

NGRAPH_CHECK(x_shape.rank().is_static(), "Rank of the input data tensor has to be known (static).");

int64_t axis{node.get_attribute_value<int64_t>("axis", 1)};
axis = ngraph::normalize_axis(node.get_description(), axis, x_shape.rank());

validate_scale(scale, x, axis);
Expand All @@ -163,6 +154,22 @@ OutputVector dequantize_linear(const Node& node) {
std::make_shared<default_opset::Multiply>(std::make_shared<default_opset::Subtract>(converted_x, zero_point),
scale)};
}
} // namespace detail

OutputVector dequantize_linear(const Node& node) {
const OutputVector inputs{node.get_ng_inputs()};

NGRAPH_CHECK(2 <= inputs.size() && inputs.size() <= 3,
"The DequantizeLinear op expects 2 required and one optional "
"input. Got: ",
inputs.size());
const auto x = inputs[0];
auto scale = inputs[1];
auto zero_point = op::detail::get_zero_point(inputs);

// these reshapes make sure that dequantization happens over the specified axis
return detail::dequantize_linear(x, scale, zero_point, node.get_attribute_value<int64_t>("axis", 1), node);
}
} // namespace set_13
} // namespace op
} // namespace onnx_import
Expand Down
13 changes: 12 additions & 1 deletion ngraph/frontend/onnx/frontend/src/op/dequantize_linear.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,25 @@
namespace ngraph {
namespace onnx_import {
namespace op {
namespace detail {
Output<ngraph::Node> get_zero_point(const OutputVector& inputs);
}

namespace set_1 {
OutputVector dequantize_linear(const Node& node);

} // namespace set_1

namespace set_13 {
OutputVector dequantize_linear(const Node& node);
namespace detail {
OutputVector dequantize_linear(Output<ngraph::Node> x,
Output<ngraph::Node> scale,
Output<ngraph::Node> zero_point,
int64_t axis,
Node node);
}
OutputVector dequantize_linear(const Node& node);
} // namespace set_13

} // namespace op

Expand Down
68 changes: 68 additions & 0 deletions ngraph/frontend/onnx/frontend/src/op/qlinear_conv.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

// Disabled in CMakeList
// Update to higher opset required

#include "op/qlinear_conv.hpp"

#include <cstddef>
#include <memory>
#include <vector>

#include "conv.hpp"
#include "dequantize_linear.hpp"
#include "exceptions.hpp"
#include "ngraph/opsets/opset6.hpp"
#include "onnx_import/core/null_node.hpp"
#include "quantize_linear.hpp"

namespace ngraph {
namespace onnx_import {
namespace op {
namespace set_1 {
OutputVector qlinear_conv(const Node& node) {
const OutputVector& inputs = node.get_ng_inputs();

auto x = inputs.at(0);
auto x_scale = inputs.at(1);
auto x_zero_point = inputs.at(2);
auto w = inputs.at(3);
auto w_scale = inputs.at(4);
auto w_zero_point = inputs.at(5);
auto y_scale = inputs.at(6);
auto y_zero_point = inputs.at(7);
Output<ngraph::Node> B = inputs.size() > 8 ? inputs.at(8) : std::make_shared<NullNode>()->output(0);

x = set_13::detail::dequantize_linear(x,
x_scale,
std::make_shared<opset6::Convert>(x_zero_point, element::f32),
1,
node)[0];
w = set_13::detail::dequantize_linear(w,
w_scale,
std::make_shared<opset6::Convert>(w_zero_point, element::f32),
1,
node)[0];

if (!ngraph::op::is_null(B)) {
B = std::make_shared<opset6::Multiply>(std::make_shared<opset6::Convert>(B, x_scale.get_element_type()),
std::make_shared<opset6::Multiply>(x_scale, w_scale))
->output(0);
}

auto result = detail::conv(node, x, w, B)[0];

result = op::detail::make_fake_quantize(y_scale, y_zero_point, result);

return {result};
}

} // namespace set_1

} // namespace op

} // namespace onnx_import

} // namespace ngraph
31 changes: 31 additions & 0 deletions ngraph/frontend/onnx/frontend/src/op/qlinear_conv.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

// Disabled in CMakeList
// Update to higher opset required

#pragma once

#include "ngraph/node.hpp"
#include "onnx_import/core/node.hpp"

namespace ngraph {
namespace onnx_import {
namespace op {
namespace set_1 {
/// \brief Performs ONNX QLinearConv operation.
///
/// \param node The ONNX node object representing this operation.
///
/// \return The vector containing Ngraph nodes producing output of ONNX quantizied
/// convolution operation.
OutputVector qlinear_conv(const Node& node);

} // namespace set_1

} // namespace op

} // namespace onnx_import

} // namespace ngraph
Loading

0 comments on commit 868b783

Please sign in to comment.