Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GNA] Depth-wise separable convolution support #7281

Merged
merged 4 commits into from
Sep 16, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions inference-engine/src/gna_plugin/gna_plugin.cpp
Original file line number Diff line number Diff line change
@@ -64,9 +64,10 @@
#include "transformations/convert_matmul_to_pointwise_convolution.hpp"
#include "transformations/split_convolution_with_large_buffer_size.hpp"
#include "transformations/handle_transposes_around_matmul.hpp"
#include "transformations/decompose_2d_conv.hpp"
#include "transformations/convert_padded2valid_conv.hpp"
#include "transformations/decompose_2d_convolution.hpp"
#include "transformations/convert_padded_to_valid_convolution.hpp"
#include "transformations/insert_reshape_around_matmul.hpp"
#include "transformations/convert_dwsc_to_scaleshifts.hpp"
#include "transformations/op_conversions/lstm_cell_decomposition.hpp"
#include "transformations/remove_single_input_concat.hpp"

@@ -716,7 +717,8 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
manager.register_pass<ngraph::pass::ConvertPriorBox>();
manager.register_pass<ngraph::pass::CommonOptimizations>();
manager.register_pass<ngraph::pass::LSTMCellDecomposition>();
manager.register_pass<ConvertPadded2ValidConv>();
manager.register_pass<ConvertDWSCToScaleShifts>();
manager.register_pass<ConvertPaddedToValidConv>();
if (config.gnaCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
manager.register_pass<Decompose2DConvTransposedWithBiasAF>();
manager.register_pass<Decompose2DConvTransposedWithBias>();
@@ -748,7 +750,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
manager.register_pass<RemoveExtraReshapes>();
// UnrollTI should be the last transformation in the transformation pipeline
manager.register_pass<ngraph::pass::UnrollTensorIterator>();

const auto& pass_config = manager.get_pass_config();
pass_config->disable<ngraph::pass::FakeQuantizeMulFusion>();
pass_config->disable<ngraph::pass::FakeQuantizeReshapeFusion>();
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <openvino/cc/ngraph/itt.hpp>

#include "transformations/convert_dwsc_to_scaleshifts.hpp"

#include <ngraph/opsets/opset7.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/pattern/op/or.hpp>
#include <transformations/utils/utils.hpp>
#include <ngraph/rt_info.hpp>
#include <ie_common.h>
#include "utils/transformation_helper.hpp"


using namespace GNAPluginNS;

NGRAPH_RTTI_DEFINITION(ConvertDWSCToScaleShifts, "ConvertDWSCToScaleShifts", 0);

static std::shared_ptr<ngraph::Node> DecomposeDWSC(std::shared_ptr<ngraph::opset7::GroupConvolution> dwsc,
std::shared_ptr<ngraph::opset7::Constant> bias_const, std::shared_ptr<ngraph::opset7::FakeQuantize> fq_bias,
std::shared_ptr<ngraph::opset7::Reshape> flat_input_plane, std::shared_ptr<ngraph::Node> flat_filters_plane) {
std::shared_ptr<ngraph::opset7::Constant> const_zero_padding;
std::shared_ptr<ngraph::Node> reshaped_bias;
ngraph::OutputVector output_chunks;
auto input_channel_count = dwsc->get_input_shape(0)[1];
auto input_width = dwsc->get_input_shape(0)[3];
auto output_width = dwsc->get_output_shape(0)[3];
auto filter_width = dwsc->get_input_shape(1)[4];
auto pads_begin = dwsc->get_pads_begin()[1];
auto stride_width = dwsc->get_strides()[1];
auto dilation_width = dwsc->get_dilations()[1];

// Constant with zero padding
if (pads_begin) {
const_zero_padding = std::make_shared<ngraph::opset7::Constant>(dwsc->get_element_type(), ngraph::Shape{1, input_channel_count}, 0);
copy_runtime_info(dwsc, const_zero_padding);
}

// Reshape bias const
if (bias_const) {
auto bias_size = shape_size(bias_const->get_shape());
reshaped_bias = ngraph::op::util::make_try_fold<ngraph::opset7::Reshape>(bias_const,
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, bias_size}), false);
}

// Move filter over input performing multiplication and addition (scaleshift), take padding, stride, dilation and bias into account
for (int32_t input_position = -pads_begin, o = 0; o < output_width; input_position += stride_width, o++) {
std::shared_ptr<ngraph::Node> previous_layer_output, last_layer_output;
int32_t filter_end = input_position + filter_width * dilation_width;
bool first = true;

filter_end = filter_end < input_width ? filter_end : input_width;

for (int32_t filter_pos = input_position, filter_idx = 0; filter_pos < filter_end; filter_pos += dilation_width, filter_idx++) {
if (filter_pos >= 0) {
auto conv_input_slice = FlatCrop(flat_input_plane, filter_pos * input_channel_count, input_channel_count);
auto conv_filter_slice = FlatCrop(flat_filters_plane, filter_idx * input_channel_count, input_channel_count);

if (first) {
first = false;
previous_layer_output = std::make_shared<ngraph::opset7::Multiply>(conv_input_slice, conv_filter_slice);
copy_runtime_info(dwsc, previous_layer_output);
if (bias_const) {
previous_layer_output = std::make_shared<ngraph::opset7::Add>(previous_layer_output, reshaped_bias);
copy_runtime_info(dwsc, previous_layer_output);
previous_layer_output = InsertFQLayer(fq_bias, previous_layer_output);
}
last_layer_output = previous_layer_output;
} else {
last_layer_output = std::make_shared<ngraph::opset7::Multiply>(conv_input_slice, conv_filter_slice);
copy_runtime_info(dwsc, last_layer_output);
last_layer_output = std::make_shared<ngraph::opset7::Add>(last_layer_output, previous_layer_output);
copy_runtime_info(dwsc, last_layer_output);
previous_layer_output = last_layer_output;
}
}
}

if (!last_layer_output) {
IE_ASSERT(const_zero_padding);
last_layer_output = const_zero_padding;
}

output_chunks.push_back(last_layer_output);
}

// Concat is only needed when output width > 1
if (output_chunks.size() > 1) {
auto concat_output_plane = std::make_shared<ngraph::opset7::Concat>(output_chunks, 0);
copy_runtime_info(dwsc, concat_output_plane);
return concat_output_plane;
}

return output_chunks[0].get_node_shared_ptr();
}

static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,
std::shared_ptr<ngraph::Node> dwsc_node,
std::shared_ptr<ngraph::Node> bias_const_node,
std::shared_ptr<ngraph::Node> fq_bias_node,
std::shared_ptr<ngraph::Node> trailing_transpose) {
auto dwsc = std::dynamic_pointer_cast<ngraph::opset7::GroupConvolution>(dwsc_node);
auto bias_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(bias_const_node);
auto fq_bias = std::dynamic_pointer_cast<ngraph::opset7::FakeQuantize>(fq_bias_node);

// We are looking for Transpose(NHWC->NCHW) => GroupConv => Transpose(NCHW->NHWC)
// or similar cases, so required network must be in NHWC order like in TF
if (!TransposeOrderMatches(std::dynamic_pointer_cast<ngraph::opset7::Transpose>(leading_transpose), {0, 3, 1, 2}))
return false;

if (!TransposeOrderMatches(std::dynamic_pointer_cast<ngraph::opset7::Transpose>(trailing_transpose), {0, 2, 3, 1}))
return false;

auto output_channel_count = dwsc->get_output_shape(0)[1];
auto output_width = dwsc->get_output_shape(0)[3];

// Prepare flat input data
auto flat_input_plane = std::make_shared<ngraph::opset7::Reshape>(leading_transpose->input_value(0),
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2},
ngraph::Shape{1, shape_size(dwsc->input_value(0).get_shape())}), false);

// Prepare flat filter data
auto filters_const = std::dynamic_pointer_cast<ngraph::Node>(dwsc->get_input_node_shared_ptr(1));
auto filters_size = shape_size(filters_const->get_shape());

auto transposed_filters_const = ngraph::op::util::make_try_fold<ngraph::opset7::Transpose>(filters_const,
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{5}, ngraph::Shape{4, 1, 2, 3, 0}));

auto flat_filters_plane = ngraph::op::util::make_try_fold<ngraph::opset7::Reshape>(transposed_filters_const,
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, filters_size}), false);

copy_runtime_info(dwsc, {flat_input_plane, transposed_filters_const, flat_filters_plane});

// Convert DWSC to a set of diagonal layers
auto output_plane = DecomposeDWSC(dwsc, bias_const, fq_bias, flat_input_plane, flat_filters_plane);

// Restore the original output shape
auto result = std::make_shared<ngraph::opset7::Reshape>(output_plane,
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4},
ngraph::Shape{1, output_channel_count, 1, output_width}), false);
copy_runtime_info(dwsc, result);

// We need to put here the original Group Convolution layer name, so the new layer output can be used as a network result
std::string result_name = trailing_transpose->get_friendly_name();
replace_node(trailing_transpose, result);
result->set_friendly_name(result_name);

return true;
}

static bool VerifyDWSC(const ngraph::Output<ngraph::Node>& output) {
auto dwsc = output.get_node();

// Verify it's a 1D convolution
// Verify that filter group count == input channel count
// Verify that per group filter output channel count == 1
if (!consumers_and_rank(1, 4)(output) ||
dwsc->get_input_shape(1)[3] != 1 || dwsc->get_input_shape(0)[2] != 1 || dwsc->get_output_shape(0)[2] != 1 ||
dwsc->get_input_shape(1)[0] != dwsc->get_input_shape(0)[1] ||
dwsc->get_input_shape(1)[1] != 1)
return false;

return true;
}

ConvertDWSCToScaleShifts::ConvertDWSCToScaleShifts() {
MATCHER_SCOPE(ConvertDWSCToScaleShifts);

auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
auto leading_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({ngraph::pattern::any_input(), const_input},
consumers_and_rank(1, 4));
auto filters_const_fq = ngraph::pattern::wrap_type<ngraph::opset7::Constant>(ngraph::pattern::rank_equals(4));
auto fq_filters_const = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({filters_const_fq, const_input, const_input, const_input, const_input},
consumers_and_rank(1, 4));
auto reshape_filters_const = ngraph::pattern::wrap_type<ngraph::opset7::Reshape>({fq_filters_const, const_input}, ngraph::pattern::rank_equals(5));
auto filters_const = ngraph::pattern::wrap_type<ngraph::opset7::Constant>(ngraph::pattern::rank_equals(5));
auto dwsc_filters = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{filters_const, reshape_filters_const });
auto dwsc = ngraph::pattern::wrap_type<ngraph::opset7::GroupConvolution>({leading_transpose, dwsc_filters}, VerifyDWSC);
auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Add>({dwsc, const_input});
auto fq_bias = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({bias, const_input, const_input, const_input, const_input},
elilobanova marked this conversation as resolved.
Show resolved Hide resolved
consumers_and_rank(1, 4));
auto transpose_input = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{dwsc, bias, fq_bias});
auto trailing_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({transpose_input, const_input}, consumers_and_rank(1, 4));

ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
const auto& pattern_map = m.get_pattern_value_map();
auto bias_it = pattern_map.find(bias);
auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr());
std::shared_ptr<ngraph::Node> bias_const = nullptr;

if (bias_node && (bias_const = VerifyBiasGetConst(pattern_map.at(dwsc).get_node_shared_ptr(), bias_node)) == nullptr)
return false;

auto fq_bias_it = pattern_map.find(fq_bias);
auto fq_bias_node = (fq_bias_it == std::end(pattern_map) ? nullptr : fq_bias_it->second.get_node_shared_ptr());

return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), pattern_map.at(dwsc).get_node_shared_ptr(),
bias_const, fq_bias_node,
pattern_map.at(trailing_transpose).get_node_shared_ptr());
};

auto m = std::make_shared<ngraph::pattern::Matcher>(trailing_transpose, matcher_name);
this->register_matcher(m, callback);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <ngraph/pass/graph_rewrite.hpp>

namespace GNAPluginNS {

/**
* @brief Convert a depthwise separable convolution (represented by a GroupConvolution) to a set of ScaleShift layers (MatMul + Add)
* Additionally supported are bias and fake quantize layers.
*/
class ConvertDWSCToScaleShifts : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
ConvertDWSCToScaleShifts();
};

} // namespace GNAPluginNS
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@

#include <openvino/cc/ngraph/itt.hpp>

#include "transformations/convert_padded2valid_conv.hpp"
#include "transformations/convert_padded_to_valid_convolution.hpp"

#include <memory>

@@ -19,7 +19,7 @@

using namespace GNAPluginNS;

NGRAPH_RTTI_DEFINITION(ConvertPadded2ValidConv, "ConvertPadded2ValidConv", 0);
NGRAPH_RTTI_DEFINITION(ConvertPaddedToValidConv, "ConvertPaddedToValidConv", 0);

static bool VerifyAndGetConvData(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData& conv_data) {
const auto& input = conv->input_value(0);
@@ -34,17 +34,6 @@ static bool VerifyAndGetConvData(std::shared_ptr<ngraph::opset7::Convolution> co
return conv_data.pads_begin_height || conv_data.pads_end_height || conv_data.pads_begin_width || conv_data.pads_end_width;
}

static bool VerifyBias(std::shared_ptr<ngraph::opset7::Add> bias, const size_t& filter_count) {
auto add_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(bias->input_value(0).get_node_shared_ptr());

// We need to check both inputs of Add when looking for constant
if (!add_const)
add_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(bias->input_value(1).get_node_shared_ptr());

// The add may be a normal add not convolution bias, then we just go further
return (add_const && shape_size(add_const->get_shape()) == filter_count);
}

static void InsertPadding(ngraph::OutputVector& input_rows_to_concat, size_t size, const std::shared_ptr<ngraph::opset7::Convolution>& conv,
const std::shared_ptr<ngraph::opset7::Constant> padding_const, size_t biggest_padding) {

@@ -181,9 +170,6 @@ static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,
if (!TransposeOrderMatches(std::dynamic_pointer_cast<ngraph::opset7::Transpose>(trailing_transpose), {0, 2, 3, 1}))
return false;

if (bias && !VerifyBias(std::dynamic_pointer_cast<ngraph::opset7::Add>(bias), conv_data.filter_count))
return false;

GeneratePadding(std::dynamic_pointer_cast<ngraph::opset7::Transpose>(leading_transpose),
std::dynamic_pointer_cast<ngraph::opset7::Convolution>(conv), conv_data);

@@ -196,8 +182,8 @@ static std::function<bool(ngraph::Output<ngraph::Node>)> consumers_and_rank(cons
};
}

ConvertPadded2ValidConv::ConvertPadded2ValidConv() {
MATCHER_SCOPE(ConvertPadded2ValidConv);
ConvertPaddedToValidConv::ConvertPaddedToValidConv() {
MATCHER_SCOPE(ConvertPaddedToValidConv);

auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
auto leading_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({ngraph::pattern::any_input(), const_input},
@@ -237,6 +223,9 @@ ConvertPadded2ValidConv::ConvertPadded2ValidConv() {
auto bias_it = pattern_map.find(bias);
auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr());

if (bias_node && !VerifyBiasGetConst(pattern_map.at(conv).get_node_shared_ptr(), bias_node))
return false;

return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), pattern_map.at(conv).get_node_shared_ptr(),
pattern_map.at(trailing_transpose).get_node_shared_ptr(), bias_node);
};
Original file line number Diff line number Diff line change
@@ -28,10 +28,10 @@ namespace GNAPluginNS {
* Transpose (NCHW -> NHWC) Transpose (NCHW -> NHWC)
*
*/
class ConvertPadded2ValidConv : public ngraph::pass::MatcherPass {
class ConvertPaddedToValidConv : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
ConvertPadded2ValidConv();
ConvertPaddedToValidConv();
};

} // namespace GNAPluginNS
Loading