Skip to content

Commit

Permalink
[GNA] Depth-wise separable convolution support (openvinotoolkit#7281)
Browse files Browse the repository at this point in the history
* [GNA] Add support for DWSC, other fixes and code refactoring.

* [GNA] Change supported layout to NHWC

* [GNA] Detect bias const only on second position, move verification of dwsc to matcher
  • Loading branch information
sirzabek authored and dood-apo committed Aug 24, 2023
1 parent 6c351cf commit 92cb12a
Show file tree
Hide file tree
Showing 9 changed files with 311 additions and 84 deletions.
9 changes: 5 additions & 4 deletions inference-engine/src/gna_plugin/gna_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,10 @@
#include "transformations/convert_matmul_to_pointwise_convolution.hpp"
#include "transformations/split_convolution_with_large_buffer_size.hpp"
#include "transformations/handle_transposes_around_matmul.hpp"
#include "transformations/decompose_2d_conv.hpp"
#include "transformations/convert_padded2valid_conv.hpp"
#include "transformations/decompose_2d_convolution.hpp"
#include "transformations/convert_padded_to_valid_convolution.hpp"
#include "transformations/insert_reshape_around_matmul.hpp"
#include "transformations/convert_dwsc_to_scaleshifts.hpp"
#include "transformations/op_conversions/lstm_cell_decomposition.hpp"
#include "transformations/remove_single_input_concat.hpp"

Expand Down Expand Up @@ -716,7 +717,8 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
manager.register_pass<ngraph::pass::ConvertPriorBox>();
manager.register_pass<ngraph::pass::CommonOptimizations>();
manager.register_pass<ngraph::pass::LSTMCellDecomposition>();
manager.register_pass<ConvertPadded2ValidConv>();
manager.register_pass<ConvertDWSCToScaleShifts>();
manager.register_pass<ConvertPaddedToValidConv>();
if (config.gnaCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
manager.register_pass<Decompose2DConvTransposedWithBiasAF>();
manager.register_pass<Decompose2DConvTransposedWithBias>();
Expand Down Expand Up @@ -748,7 +750,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
manager.register_pass<RemoveExtraReshapes>();
// UnrollTI should be the last transformation in the transformation pipeline
manager.register_pass<ngraph::pass::UnrollTensorIterator>();

const auto& pass_config = manager.get_pass_config();
pass_config->disable<ngraph::pass::FakeQuantizeMulFusion>();
pass_config->disable<ngraph::pass::FakeQuantizeReshapeFusion>();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <openvino/cc/ngraph/itt.hpp>

#include "transformations/convert_dwsc_to_scaleshifts.hpp"

#include <ngraph/opsets/opset7.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/pattern/op/or.hpp>
#include <transformations/utils/utils.hpp>
#include <ngraph/rt_info.hpp>
#include <ie_common.h>
#include "utils/transformation_helper.hpp"


using namespace GNAPluginNS;

NGRAPH_RTTI_DEFINITION(ConvertDWSCToScaleShifts, "ConvertDWSCToScaleShifts", 0);

static std::shared_ptr<ngraph::Node> DecomposeDWSC(std::shared_ptr<ngraph::opset7::GroupConvolution> dwsc,
std::shared_ptr<ngraph::opset7::Constant> bias_const, std::shared_ptr<ngraph::opset7::FakeQuantize> fq_bias,
std::shared_ptr<ngraph::opset7::Reshape> flat_input_plane, std::shared_ptr<ngraph::Node> flat_filters_plane) {
std::shared_ptr<ngraph::opset7::Constant> const_zero_padding;
std::shared_ptr<ngraph::Node> reshaped_bias;
ngraph::OutputVector output_chunks;
auto input_channel_count = dwsc->get_input_shape(0)[1];
auto input_width = dwsc->get_input_shape(0)[3];
auto output_width = dwsc->get_output_shape(0)[3];
auto filter_width = dwsc->get_input_shape(1)[4];
auto pads_begin = dwsc->get_pads_begin()[1];
auto stride_width = dwsc->get_strides()[1];
auto dilation_width = dwsc->get_dilations()[1];

// Constant with zero padding
if (pads_begin) {
const_zero_padding = std::make_shared<ngraph::opset7::Constant>(dwsc->get_element_type(), ngraph::Shape{1, input_channel_count}, 0);
copy_runtime_info(dwsc, const_zero_padding);
}

// Reshape bias const
if (bias_const) {
auto bias_size = shape_size(bias_const->get_shape());
reshaped_bias = ngraph::op::util::make_try_fold<ngraph::opset7::Reshape>(bias_const,
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, bias_size}), false);
}

// Move filter over input performing multiplication and addition (scaleshift), take padding, stride, dilation and bias into account
for (int32_t input_position = -pads_begin, o = 0; o < output_width; input_position += stride_width, o++) {
std::shared_ptr<ngraph::Node> previous_layer_output, last_layer_output;
int32_t filter_end = input_position + filter_width * dilation_width;
bool first = true;

filter_end = filter_end < input_width ? filter_end : input_width;

for (int32_t filter_pos = input_position, filter_idx = 0; filter_pos < filter_end; filter_pos += dilation_width, filter_idx++) {
if (filter_pos >= 0) {
auto conv_input_slice = FlatCrop(flat_input_plane, filter_pos * input_channel_count, input_channel_count);
auto conv_filter_slice = FlatCrop(flat_filters_plane, filter_idx * input_channel_count, input_channel_count);

if (first) {
first = false;
previous_layer_output = std::make_shared<ngraph::opset7::Multiply>(conv_input_slice, conv_filter_slice);
copy_runtime_info(dwsc, previous_layer_output);
if (bias_const) {
previous_layer_output = std::make_shared<ngraph::opset7::Add>(previous_layer_output, reshaped_bias);
copy_runtime_info(dwsc, previous_layer_output);
previous_layer_output = InsertFQLayer(fq_bias, previous_layer_output);
}
last_layer_output = previous_layer_output;
} else {
last_layer_output = std::make_shared<ngraph::opset7::Multiply>(conv_input_slice, conv_filter_slice);
copy_runtime_info(dwsc, last_layer_output);
last_layer_output = std::make_shared<ngraph::opset7::Add>(last_layer_output, previous_layer_output);
copy_runtime_info(dwsc, last_layer_output);
previous_layer_output = last_layer_output;
}
}
}

if (!last_layer_output) {
IE_ASSERT(const_zero_padding);
last_layer_output = const_zero_padding;
}

output_chunks.push_back(last_layer_output);
}

// Concat is only needed when output width > 1
if (output_chunks.size() > 1) {
auto concat_output_plane = std::make_shared<ngraph::opset7::Concat>(output_chunks, 0);
copy_runtime_info(dwsc, concat_output_plane);
return concat_output_plane;
}

return output_chunks[0].get_node_shared_ptr();
}

static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,
std::shared_ptr<ngraph::Node> dwsc_node,
std::shared_ptr<ngraph::Node> bias_const_node,
std::shared_ptr<ngraph::Node> fq_bias_node,
std::shared_ptr<ngraph::Node> trailing_transpose) {
auto dwsc = std::dynamic_pointer_cast<ngraph::opset7::GroupConvolution>(dwsc_node);
auto bias_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(bias_const_node);
auto fq_bias = std::dynamic_pointer_cast<ngraph::opset7::FakeQuantize>(fq_bias_node);

// We are looking for Transpose(NHWC->NCHW) => GroupConv => Transpose(NCHW->NHWC)
// or similar cases, so required network must be in NHWC order like in TF
if (!TransposeOrderMatches(std::dynamic_pointer_cast<ngraph::opset7::Transpose>(leading_transpose), {0, 3, 1, 2}))
return false;

if (!TransposeOrderMatches(std::dynamic_pointer_cast<ngraph::opset7::Transpose>(trailing_transpose), {0, 2, 3, 1}))
return false;

auto output_channel_count = dwsc->get_output_shape(0)[1];
auto output_width = dwsc->get_output_shape(0)[3];

// Prepare flat input data
auto flat_input_plane = std::make_shared<ngraph::opset7::Reshape>(leading_transpose->input_value(0),
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2},
ngraph::Shape{1, shape_size(dwsc->input_value(0).get_shape())}), false);

// Prepare flat filter data
auto filters_const = std::dynamic_pointer_cast<ngraph::Node>(dwsc->get_input_node_shared_ptr(1));
auto filters_size = shape_size(filters_const->get_shape());

auto transposed_filters_const = ngraph::op::util::make_try_fold<ngraph::opset7::Transpose>(filters_const,
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{5}, ngraph::Shape{4, 1, 2, 3, 0}));

auto flat_filters_plane = ngraph::op::util::make_try_fold<ngraph::opset7::Reshape>(transposed_filters_const,
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, filters_size}), false);

copy_runtime_info(dwsc, {flat_input_plane, transposed_filters_const, flat_filters_plane});

// Convert DWSC to a set of diagonal layers
auto output_plane = DecomposeDWSC(dwsc, bias_const, fq_bias, flat_input_plane, flat_filters_plane);

// Restore the original output shape
auto result = std::make_shared<ngraph::opset7::Reshape>(output_plane,
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4},
ngraph::Shape{1, output_channel_count, 1, output_width}), false);
copy_runtime_info(dwsc, result);

// We need to put here the original Group Convolution layer name, so the new layer output can be used as a network result
std::string result_name = trailing_transpose->get_friendly_name();
replace_node(trailing_transpose, result);
result->set_friendly_name(result_name);

return true;
}

static bool VerifyDWSC(const ngraph::Output<ngraph::Node>& output) {
auto dwsc = output.get_node();

// Verify it's a 1D convolution
// Verify that filter group count == input channel count
// Verify that per group filter output channel count == 1
if (!consumers_and_rank(1, 4)(output) ||
dwsc->get_input_shape(1)[3] != 1 || dwsc->get_input_shape(0)[2] != 1 || dwsc->get_output_shape(0)[2] != 1 ||
dwsc->get_input_shape(1)[0] != dwsc->get_input_shape(0)[1] ||
dwsc->get_input_shape(1)[1] != 1)
return false;

return true;
}

ConvertDWSCToScaleShifts::ConvertDWSCToScaleShifts() {
MATCHER_SCOPE(ConvertDWSCToScaleShifts);

auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
auto leading_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({ngraph::pattern::any_input(), const_input},
consumers_and_rank(1, 4));
auto filters_const_fq = ngraph::pattern::wrap_type<ngraph::opset7::Constant>(ngraph::pattern::rank_equals(4));
auto fq_filters_const = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({filters_const_fq, const_input, const_input, const_input, const_input},
consumers_and_rank(1, 4));
auto reshape_filters_const = ngraph::pattern::wrap_type<ngraph::opset7::Reshape>({fq_filters_const, const_input}, ngraph::pattern::rank_equals(5));
auto filters_const = ngraph::pattern::wrap_type<ngraph::opset7::Constant>(ngraph::pattern::rank_equals(5));
auto dwsc_filters = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{filters_const, reshape_filters_const });
auto dwsc = ngraph::pattern::wrap_type<ngraph::opset7::GroupConvolution>({leading_transpose, dwsc_filters}, VerifyDWSC);
auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Add>({dwsc, const_input});
auto fq_bias = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({bias, const_input, const_input, const_input, const_input},
consumers_and_rank(1, 4));
auto transpose_input = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{dwsc, bias, fq_bias});
auto trailing_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({transpose_input, const_input}, consumers_and_rank(1, 4));

ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
const auto& pattern_map = m.get_pattern_value_map();
auto bias_it = pattern_map.find(bias);
auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr());
std::shared_ptr<ngraph::Node> bias_const = nullptr;

if (bias_node && (bias_const = VerifyBiasGetConst(pattern_map.at(dwsc).get_node_shared_ptr(), bias_node)) == nullptr)
return false;

auto fq_bias_it = pattern_map.find(fq_bias);
auto fq_bias_node = (fq_bias_it == std::end(pattern_map) ? nullptr : fq_bias_it->second.get_node_shared_ptr());

return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), pattern_map.at(dwsc).get_node_shared_ptr(),
bias_const, fq_bias_node,
pattern_map.at(trailing_transpose).get_node_shared_ptr());
};

auto m = std::make_shared<ngraph::pattern::Matcher>(trailing_transpose, matcher_name);
this->register_matcher(m, callback);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <ngraph/pass/graph_rewrite.hpp>

namespace GNAPluginNS {

/**
* @brief Convert a depthwise separable convolution (represented by a GroupConvolution) to a set of ScaleShift layers (MatMul + Add)
* Additionally supported are bias and fake quantize layers.
*/
class ConvertDWSCToScaleShifts : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
ConvertDWSCToScaleShifts();
};

} // namespace GNAPluginNS
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#include <openvino/cc/ngraph/itt.hpp>

#include "transformations/convert_padded2valid_conv.hpp"
#include "transformations/convert_padded_to_valid_convolution.hpp"

#include <memory>

Expand All @@ -19,7 +19,7 @@

using namespace GNAPluginNS;

NGRAPH_RTTI_DEFINITION(ConvertPadded2ValidConv, "ConvertPadded2ValidConv", 0);
NGRAPH_RTTI_DEFINITION(ConvertPaddedToValidConv, "ConvertPaddedToValidConv", 0);

static bool VerifyAndGetConvData(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData& conv_data) {
const auto& input = conv->input_value(0);
Expand All @@ -34,17 +34,6 @@ static bool VerifyAndGetConvData(std::shared_ptr<ngraph::opset7::Convolution> co
return conv_data.pads_begin_height || conv_data.pads_end_height || conv_data.pads_begin_width || conv_data.pads_end_width;
}

static bool VerifyBias(std::shared_ptr<ngraph::opset7::Add> bias, const size_t& filter_count) {
auto add_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(bias->input_value(0).get_node_shared_ptr());

// We need to check both inputs of Add when looking for constant
if (!add_const)
add_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(bias->input_value(1).get_node_shared_ptr());

// The add may be a normal add not convolution bias, then we just go further
return (add_const && shape_size(add_const->get_shape()) == filter_count);
}

static void InsertPadding(ngraph::OutputVector& input_rows_to_concat, size_t size, const std::shared_ptr<ngraph::opset7::Convolution>& conv,
const std::shared_ptr<ngraph::opset7::Constant> padding_const, size_t biggest_padding) {

Expand Down Expand Up @@ -181,9 +170,6 @@ static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,
if (!TransposeOrderMatches(std::dynamic_pointer_cast<ngraph::opset7::Transpose>(trailing_transpose), {0, 2, 3, 1}))
return false;

if (bias && !VerifyBias(std::dynamic_pointer_cast<ngraph::opset7::Add>(bias), conv_data.filter_count))
return false;

GeneratePadding(std::dynamic_pointer_cast<ngraph::opset7::Transpose>(leading_transpose),
std::dynamic_pointer_cast<ngraph::opset7::Convolution>(conv), conv_data);

Expand All @@ -196,8 +182,8 @@ static std::function<bool(ngraph::Output<ngraph::Node>)> consumers_and_rank(cons
};
}

ConvertPadded2ValidConv::ConvertPadded2ValidConv() {
MATCHER_SCOPE(ConvertPadded2ValidConv);
ConvertPaddedToValidConv::ConvertPaddedToValidConv() {
MATCHER_SCOPE(ConvertPaddedToValidConv);

auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
auto leading_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({ngraph::pattern::any_input(), const_input},
Expand Down Expand Up @@ -237,6 +223,9 @@ ConvertPadded2ValidConv::ConvertPadded2ValidConv() {
auto bias_it = pattern_map.find(bias);
auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr());

if (bias_node && !VerifyBiasGetConst(pattern_map.at(conv).get_node_shared_ptr(), bias_node))
return false;

return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), pattern_map.at(conv).get_node_shared_ptr(),
pattern_map.at(trailing_transpose).get_node_shared_ptr(), bias_node);
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ namespace GNAPluginNS {
* Transpose (NCHW -> NHWC) Transpose (NCHW -> NHWC)
*
*/
class ConvertPadded2ValidConv : public ngraph::pass::MatcherPass {
class ConvertPaddedToValidConv : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
ConvertPadded2ValidConv();
ConvertPaddedToValidConv();
};

} // namespace GNAPluginNS
Loading

0 comments on commit 92cb12a

Please sign in to comment.