Skip to content

Commit

Permalink
[LPT] ConvolutionBackpropData support (#5313)
Browse files Browse the repository at this point in the history
* [LPT] ConvolutionBackpropData support

* minor fixes

* [Transformations] Legacy subtract precision keep

* [LPT] ConvolutionBackpropData tests improvements

* [LPT] ConvolutionBackpropData weights folding when can't be transformed

* [LPT] CanBeTransformed unification and convolution weights folding

* [LPT] GPU INT8 optimizations condition flag

* [LPT] Concat precision predict improvement

* [LPT] Turn off asymmetric quantization for Deconvolution on GPU

* [LPT] Improvements from review

* [LPT] Check if layer after concat isQuantized and require per-tensor quantize

* [LPT] Improvement for Deconv->FQ pattern

* [LPT] Commented failing tests
  • Loading branch information
vzinovie authored May 17, 2021
1 parent f84b257 commit e41e255
Show file tree
Hide file tree
Showing 38 changed files with 1,401 additions and 168 deletions.
4 changes: 4 additions & 0 deletions inference-engine/src/cldnn_engine/cldnn_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
#include <low_precision/pull_reshape_through_dequantization.hpp>
#include <low_precision/pull_transpose_through_dequantization.hpp>
#include <low_precision/transformer.hpp>
#include <low_precision/convolution_backprop_data.hpp>
#include <low_precision/mat_mul.hpp>
#include <low_precision/strided_slice.hpp>
#include <low_precision/network_helper.hpp>
Expand Down Expand Up @@ -381,6 +382,9 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
.add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params)
.setSupportAsymmetricQuantization(false)
.setSupport3DTensorOnActivations(false))
.add<ConvolutionBackpropDataTransformation, ngraph::opset1::ConvolutionBackpropData>(LayerTransformation::Params(params)
.setSupportAsymmetricQuantization(false)
.setDeconvolutionSpecificChannelsRatio(true))
// INT8 StridedSlice not supported
.remove<StridedSliceTransformation, ngraph::opset1::StridedSlice>());

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <ngraph/ngraph.hpp>
#include "weightable_layer_transformation.hpp"

namespace ngraph {
namespace pass {
namespace low_precision {

class TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation {
public:
ConvolutionBackpropDataTransformation(const Params& params);
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
};

} // namespace low_precision
} // namespace pass
} // namespace ngraph
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ class TRANSFORMATIONS_API DataPrecision {
public:
DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {}

explicit DataPrecision(const element::Type& precision) {
this->precision = precision;
min = getMinValue(precision, 256);
max = getMaxValue(precision, 256);
hasZeroPoint = false;
}

DataPrecision(const element::Type precision, const float min, const float max, const bool hasZeroPoint) :
precision(precision),
min(min),
Expand Down Expand Up @@ -122,29 +129,6 @@ class TRANSFORMATIONS_API DataPrecision {
static element::Type getPrecision(const size_t /* quantizationLevels */, const bool signedInterval) {
return signedInterval ? element::i8 : element::u8;
}

static float getMin(const size_t quantizationLevels, const bool signedInterval) {
if (quantizationLevels == 255) {
return signedInterval ? -127.0f : 0.0f;
} else if (quantizationLevels == 256) {
return signedInterval ? -128.0f : 0.0f;
} else {
// THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
// FIXME: not completed
return signedInterval ? -128.0f : 0.0f;
}
}

static float getMax(const size_t quantizationLevels, const bool signedInterval) {
if ((quantizationLevels == 255) || (quantizationLevels == 256)) {
return signedInterval ? 127.0f : 255.0f;
} else {
// THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
// FIXME: not completed
// return quantizationLevels - 1.0;
return signedInterval ? 127.0f : 255.0f;
}
}
};

inline bool operator==(const DataPrecision& value1, const DataPrecision& value2) {
Expand Down Expand Up @@ -181,15 +165,17 @@ class TRANSFORMATIONS_API LayerTransformation {
std::vector<element::Type> precisionsOnActivations = { element::u8, element::i8 },
std::vector<element::Type> precisionsOnWeights = { element::i8 },
element::Type deqPrecision = element::f32,
bool support3DTensorOnActivations = true) :
bool support3DTensorOnActivations = true,
bool deconvolutionSpecificChannelsRatio = false) :
updatePrecisions(updatePrecisions),
quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations),
quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights),
supportAsymmetricQuantization(supportAsymmetricQuantization),
precisionsOnActivations(precisionsOnActivations),
precisionsOnWeights(precisionsOnWeights),
deqPrecision(deqPrecision),
support3DTensorOnActivations(support3DTensorOnActivations) {
support3DTensorOnActivations(support3DTensorOnActivations),
deconvolutionSpecificChannelsRatio(deconvolutionSpecificChannelsRatio) {
if (precisionsOnActivations.size() == 0ul) {
THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed";
}
Expand Down Expand Up @@ -234,6 +220,11 @@ class TRANSFORMATIONS_API LayerTransformation {
return *this;
}

Params& setDeconvolutionSpecificChannelsRatio(const bool deconvolutionSpecificChannelsRatio) {
this->deconvolutionSpecificChannelsRatio = deconvolutionSpecificChannelsRatio;
return *this;
}

bool updatePrecisions;
QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
Expand All @@ -242,6 +233,7 @@ class TRANSFORMATIONS_API LayerTransformation {
std::vector<element::Type> precisionsOnWeights;
element::Type deqPrecision;
bool support3DTensorOnActivations;
bool deconvolutionSpecificChannelsRatio;
};

class PrecisionDetails {
Expand Down Expand Up @@ -318,6 +310,7 @@ class TRANSFORMATIONS_API LayerTransformation {
std::vector<element::Type> precisionsOnWeights;
element::Type deqPrecision;
bool support3DTensorOnActivations;
bool deconvolutionSpecificChannelsRatio;

// absolute value, used to determine quantization interval asymmetry
float quantizationIntervalAsymmetryThreshold;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ class TRANSFORMATIONS_API NetworkHelper {
const float max,
const bool hasZeroPoint,
const bool updatePrecision,
const element::Type deqPrecision = element::f32);
const element::Type deqPrecision = element::f32,
const size_t outChannelsShapeIndex = 0);

static std::shared_ptr<opset1::FakeQuantize> updateFakeQuantize(
std::shared_ptr<opset1::FakeQuantize> fq,
Expand Down Expand Up @@ -183,16 +184,24 @@ class TRANSFORMATIONS_API NetworkHelper {
static std::shared_ptr<Node> toScalarIfPossible(std::shared_ptr<Node> node);

static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq);
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues);
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues, int outChannelsShapeIndex = 0);

static FakeQuantizeDequantization foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace = false);

static std::shared_ptr<ngraph::Node> separateInStandaloneBranch(std::shared_ptr<ngraph::Node> node);

static std::shared_ptr<opset1::FakeQuantize> fuseConvert(const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize);

static std::vector<element::Type> precisionIntersection(
const std::vector<element::Type>& v1,
const std::vector<element::Type>& v2) noexcept;

private:
static std::shared_ptr<Node> foldFakeQuantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues, const bool roundValuesWasSet);
static std::shared_ptr<Node> foldFakeQuantize(
const std::shared_ptr<opset1::FakeQuantize>& fq,
const bool roundValues,
const bool roundValuesWasSet,
int outChannelsShapeIndex = 0);

// 1 - on weights
// 0 - weightable layer was not found
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -303,10 +303,6 @@ class TRANSFORMATIONS_API LowPrecisionTransformer : public IParamsManager, ILaye
std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>> transformations,
GraphRewrite& pass,
TransformationContext& context);

std::vector<element::Type> precisionIntersection(
const std::vector<element::Type>& v1,
const std::vector<element::Type>& v2) const noexcept;
};

class TRANSFORMATIONS_API TypeRelaxedReplacer : public GraphRewrite {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class TRANSFORMATIONS_API WeightableLayerTransformation : public LayerTransforma
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;

protected:
void decomposeFakeQuantizeForWeightsPath(std::shared_ptr<Node> weightableLayer) const;
void decomposeFakeQuantizeForWeightsPath(const std::shared_ptr<Node>& weightableLayer, size_t outChannelsShapeIndex = 0ul) const;
static bool isGroup(const std::shared_ptr<Node>& node);
static bool isDepthwise(const std::shared_ptr<Node>& node);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ std::shared_ptr<opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>&
const auto parent = add->get_input_node_shared_ptr(dataBranchIndex);
if (is_type<opset1::Convolution>(parent) ||
is_type<opset1::GroupConvolution>(parent) ||
is_type<opset1::ConvolutionBackpropData>(parent) ||
(is_type<opset1::MatMul>(parent) &&
(is_type<opset1::Constant>(parent->get_input_node_ptr(0)) || is_type<opset1::Constant>(parent->get_input_node_ptr(1))))) {
return nullptr;
Expand Down
27 changes: 13 additions & 14 deletions inference-engine/src/low_precision_transformations/src/concat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
return false;
}

DataPrecision dataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
if (dataPrecision.precision == ngraph::element::undefined) {
std::vector<element::Type> concatParentsChildrensPrecisions = precisionsOnActivations;
fillAvailablePrecisions(subgraph.quantizationLayers[0], concatParentsChildrensPrecisions);
if (concatParentsChildrensPrecisions.empty()) {
return false;
}

std::unordered_map<std::string, ngraph::pass::low_precision::FakeQuantizeDequantization> dequantizations;
for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
const std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
if (fq == nullptr) {
return false;
}
Expand All @@ -72,21 +72,20 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
if (quantizationDetails.inputHighValues.size() != 1ul) {
return false;
}
std::vector<element::Type> fqChildrensPrecisions = precisionsOnActivations;
fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrensPrecisions);
concatParentsChildrensPrecisions = NetworkHelper::precisionIntersection(concatParentsChildrensPrecisions, fqChildrensPrecisions);

const DataPrecision dataPrecision2 = getDataPrecision(subgraph.quantizationLayers[i]->shared_from_this(), quantizationDetails, false);
if (dataPrecision2.precision == ngraph::element::undefined) {
if (concatParentsChildrensPrecisions.empty()) {
return false;
}

if (dataPrecision.precision != dataPrecision2.precision) {
// quantization levels are the same, difference can be in sign
// wider interval (precision) is preferable: use signed if least one interval is signed
dataPrecision = dataPrecision.precision.is_signed() ? dataPrecision : dataPrecision2;
}
}

if (dataPrecision.precision == ngraph::element::undefined) {
return false;
DataPrecision dataPrecision;
if (std::find(concatParentsChildrensPrecisions.begin(), concatParentsChildrensPrecisions.end(), element::i8) != concatParentsChildrensPrecisions.end()) {
dataPrecision = DataPrecision(element::i8);
} else {
dataPrecision = DataPrecision(concatParentsChildrensPrecisions[0]);
}

std::vector<QuantizationDetails> quantizationLayersDetails;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ bool ConcatMultiChannelsTransformation::isMultiChannel(const std::vector<std::sh
for (const std::shared_ptr<ngraph::opset1::Concat>& concat : concatLayers) {
const std::vector<std::shared_ptr<ngraph::Node>> children = getChildrenRecursivelyExceptPrecisionPreserved(concat);
for (const std::shared_ptr<ngraph::Node>& child : children) {
if (is_type<ngraph::opset1::Convolution>(child.get())) {
if ((is_type<ngraph::opset1::Convolution>(child.get()) ||
is_type<ngraph::opset1::ConvolutionBackpropData>(child.get())) &&
this->layerTransformationsManager->isQuantized(child)) {
return false;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,27 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
auto convolution = m.get_match_root();

if (!canConvolutionBeTransformed(context, convolution)) {
return false;
auto weightInput = convolution->get_input_node_shared_ptr(1);
std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightInput);
FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
NetworkHelper::getDequantization(convolution, 1ul) :
NetworkHelper::getDequantization(reshapeFromWeights);
if (dequantization.empty()) {
const auto fqOnWeights = getFakeQuantizeOnWeights(convolution);
std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
if (reshapeFromWeights != nullptr) {
resultConstant = fold_reshape<opset1::Reshape>(
resultConstant,
reshapeFromWeights->input_value(1),
false);
}
if (as_type_ptr<opset1::Constant>(resultConstant)) {
replace_node(weightInput, resultConstant);
}
} else {
NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
}
return true;
}

convolution = NetworkHelper::separateInStandaloneBranch(convolution);
Expand Down
Loading

0 comments on commit e41e255

Please sign in to comment.