Skip to content

Commit

Permalink
part #2
Browse files Browse the repository at this point in the history
  • Loading branch information
eshoguli committed Mar 30, 2022
1 parent 1623356 commit 9a62376
Show file tree
Hide file tree
Showing 41 changed files with 263 additions and 261 deletions.
38 changes: 19 additions & 19 deletions docs/snippets/lpt_intel_cpu_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>

#include <low_precision/common/operation_quantization_restriction.hpp>
#include <low_precision/common/quantization_granularity_restriction.hpp>
#include <low_precision/convert_subtract_constant.hpp>
#include <low_precision/convolution.hpp>
#include <low_precision/convolution_backprop_data.hpp>
Expand Down Expand Up @@ -45,7 +45,7 @@ if (useLpt) {
// nGraph common transformations happen here

if (useLpt) {
// convert subtract constant to INT8 to prevent unnecessary FP16 to FP32 conversion
// convert subtract constant to INT8 to prevent unnecessary FP16 to FP32 conversion
manager.register_pass<ngraph::pass::low_precision::ConvertSubtractConstant>(defaultPrecisions);
}

Expand All @@ -70,29 +70,29 @@ manager.run_passes(nGraphFunc);
using namespace ngraph::pass::low_precision;
if (useLpt) {
// Low precision transformations plugin specific configuration: restrictions definition
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}},
}),
OperationPrecisionRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
PrecisionsRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
{0, {ngraph::element::u8, ngraph::element::i8}},
{1, {ngraph::element::i8}}
}),
OperationPrecisionRestriction::create<ngraph::opset1::GroupConvolution>({
PrecisionsRestriction::create<ngraph::opset1::GroupConvolution>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}}
}),
OperationPrecisionRestriction::create<ngraph::opset1::Multiply>({
PrecisionsRestriction::create<ngraph::opset1::Multiply>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}},
}),
});

// Low precision transformations plugin specific configuration: per-tensor quantization operations definition
auto perTensorQuantization = std::vector<OperationQuantizationRestriction>({
OperationQuantizationRestriction::create<ngraph::opset1::Convolution>({0}),
OperationQuantizationRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0})
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0}),
QuantizationGranularityRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0})
});

// Low precision transformations instantiation and registration in pass manager
Expand Down Expand Up @@ -133,8 +133,8 @@ ngraph::pass::Manager manager;

using namespace ngraph::pass::low_precision;
//! [lpt_supported_precisions]
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}},
}),
Expand All @@ -157,10 +157,10 @@ std::shared_ptr<ov::Model> nGraphFunc;
//! [per_tensor_quantization]
using namespace ngraph::pass::low_precision;

const std::vector<OperationPrecisionRestriction> emptyRestrictions;
const std::vector<PrecisionsRestriction> emptyRestrictions;

auto perTensorQuantization = std::vector<OperationQuantizationRestriction>({
OperationQuantizationRestriction::create<ngraph::opset1::Convolution>({0})
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0})
});

ngraph::pass::Manager lptManager;
Expand Down Expand Up @@ -197,15 +197,15 @@ ngraph::pass::Manager manager;

using namespace ngraph::pass::low_precision;
//! [lpt_markup_pipeline]
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}},
}),
});

auto perTensorQuantization = std::vector<OperationQuantizationRestriction>({
OperationQuantizationRestriction::create<ngraph::opset1::Convolution>({0})
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0})
});

ngraph::pass::Manager lptManager;
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,18 @@
#include <vector>

#include <ngraph/pass/pass.hpp>
#include "common/operation_quantization_restriction.hpp"
#include "low_precision/lpt_visibility.hpp"
#include "rt_info/quantization_attribute.hpp"
#include "low_precision/rt_info/quantization_granularity_attribute.hpp"

namespace ngraph {
namespace pass {
namespace low_precision {

class PortQuantizationRestriction {
class PortQuantizationGranularityRestriction {
public:
PortQuantizationRestriction(const size_t port, QuantizationAttribute::Type type) : port(port), type(type) {}
PortQuantizationGranularityRestriction(const size_t port, QuantizationGranularityAttribute::Type type) : port(port), type(type) {}
size_t port;
QuantizationAttribute::Type type;
QuantizationGranularityAttribute::Type type;

};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@ namespace ngraph {
namespace pass {
namespace low_precision {

class OperationPrecisionRestriction {
class PrecisionsRestriction {
public:
using PrecisionsByPort = std::vector<std::pair<size_t, std::vector<ngraph::element::Type>>>;

ngraph::Node::type_info_t operationType;
bool specifyVersion;
std::vector<std::pair<size_t, std::vector<ngraph::element::Type>>> precisionsByPort;

OperationPrecisionRestriction() = default;
OperationPrecisionRestriction(
PrecisionsRestriction() = default;
PrecisionsRestriction(
const ngraph::Node::type_info_t operationType,
const bool specifyVersion,
const PrecisionsByPort& precisionsByPort) :
Expand All @@ -37,14 +37,14 @@ class OperationPrecisionRestriction {
precisionsByPort(precisionsByPort) {}

template <typename T>
static OperationPrecisionRestriction create(
static PrecisionsRestriction create(
const PrecisionsByPort& precisionsByPort,
const bool specifyVersion = false) {
return OperationPrecisionRestriction(T::get_type_info_static(), specifyVersion, precisionsByPort);
return PrecisionsRestriction(T::get_type_info_static(), specifyVersion, precisionsByPort);
}

template <typename T>
static PrecisionsByPort getPrecisionsByOperationType(std::vector<OperationPrecisionRestriction>& restrictions) {
static PrecisionsByPort getPrecisionsByOperationType(std::vector<PrecisionsRestriction>& restrictions) {
for (const auto& restriction : restrictions) {
if (restriction.operationType == T::get_type_info_static()) {
return restriction.precisionsByPort;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <vector>

#include <ngraph/node.hpp>
#include <ngraph/variant.hpp>

#include <low_precision/lpt_visibility.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
#include <low_precision/rt_info/quantization_granularity_attribute.hpp>
#include <low_precision/common/port_quantization_granularity_restriction.hpp>


namespace ngraph {
namespace pass {
namespace low_precision {

class QuantizationGranularityRestriction {
public:
ngraph::Node::type_info_t operationType;
bool specifyVersion;
std::vector<PortQuantizationGranularityRestriction> restrictedPorts;

QuantizationGranularityRestriction() = default;
QuantizationGranularityRestriction(
const ngraph::Node::type_info_t operationType,
const bool specifyVersion,
const std::vector<PortQuantizationGranularityRestriction>& restrictedPorts) :
operationType(operationType),
specifyVersion(specifyVersion),
restrictedPorts(restrictedPorts) {}

template <typename T>
static QuantizationGranularityRestriction create(
const std::vector<PortQuantizationGranularityRestriction>& restrictedPorts,
const bool specifyVersion) {
return QuantizationGranularityRestriction(T::get_type_info_static(), specifyVersion, restrictedPorts);
}

template <typename T>
static QuantizationGranularityRestriction create(
const std::vector<size_t>& restrictedPorts = {},
const bool specifyVersion = false) {
std::vector<PortQuantizationGranularityRestriction> restrictions;
restrictions.reserve(restrictedPorts.size());
for (auto i = 0ul; i < restrictedPorts.size(); ++i) {
restrictions[i] = PortQuantizationGranularityRestriction(restrictedPorts[i], ngraph::QuantizationGranularityAttribute::Type::PerTensor);
}
return QuantizationGranularityRestriction(T::get_type_info_static(), specifyVersion, restrictions);
}

template <typename T>
static std::vector<PortQuantizationGranularityRestriction> getPrecisionsByOperationType(std::vector<QuantizationGranularityRestriction>& restrictions) {
for (const auto& restriction : restrictions) {
if (restriction.operationType == T::get_type_info_static()) {
return restriction.restrictedPorts;
}
}
return {};
}
};

} // namespace low_precision
} // namespace pass
} // namespace ngraph
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@

#include <low_precision/lpt_visibility.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
#include <low_precision/common/operation_quantization_restriction.hpp>
#include <low_precision/common/quantization_granularity_restriction.hpp>
#include <low_precision/common/precisions_restriction.hpp>
#include "low_precision/layer_transformation.hpp"
#include "low_precision/markup_precisions.hpp"

Expand All @@ -41,13 +42,13 @@ class ngraph::pass::low_precision::MarkupOptimizations : public ngraph::pass::Fu
public:
OPENVINO_RTTI("MarkupOptimizations", "0");
MarkupOptimizations(
const std::vector<OperationPrecisionRestriction>& precisionRestrictions,
const std::vector<OperationQuantizationRestriction>& quantizationRestrictions,
const std::vector<PrecisionsRestriction>& precisionRestrictions,
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions,
const AttributeParameters& params);
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
private:
const std::vector<OperationPrecisionRestriction>& precisionRestrictions;
const std::vector<OperationQuantizationRestriction>& quantizationRestrictions;
const std::vector<PrecisionsRestriction>& precisionRestrictions;
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions;
const AttributeParameters& params;
};

Expand All @@ -61,17 +62,17 @@ class ngraph::pass::low_precision::LowPrecision : public ngraph::pass::FunctionP
public:
OPENVINO_RTTI("LowPrecision", "0");
LowPrecision(
const std::vector<OperationPrecisionRestriction>& precisionRestrictions = {},
const std::vector<OperationQuantizationRestriction>& quantizationRestrictions = {},
const std::vector<PrecisionsRestriction>& precisionRestrictions = {},
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions = {},
const LayerTransformation::Params = LayerTransformation::Params());
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;

static bool isFunctionQuantized(const std::shared_ptr<const ngraph::Function>& function);
static bool isFQLevelsPresent(const std::shared_ptr<const ngraph::Function>& function, const std::set<size_t>& levels);

protected:
std::vector<OperationPrecisionRestriction> precisionRestrictions;
std::vector<OperationQuantizationRestriction> quantizationRestrictions;
std::vector<PrecisionsRestriction> precisionRestrictions;
std::vector<QuantizationGranularityRestriction> quantizationRestrictions;
// remove
LayerTransformation::Params params;
};
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

#include <ngraph/pass/pass.hpp>
#include "low_precision/lpt_visibility.hpp"
#include "low_precision/common/operation_precision_restriction.hpp"
#include "low_precision/common/precisions_restriction.hpp"

namespace ngraph {
namespace pass {
Expand Down Expand Up @@ -48,7 +48,7 @@ class ngraph::pass::low_precision::MarkupPrecisions : public ngraph::pass::Funct
};

OPENVINO_RTTI("MarkupPrecisions", "0");
explicit MarkupPrecisions(const std::vector<OperationPrecisionRestriction>& restrictions = {},
explicit MarkupPrecisions(const std::vector<PrecisionsRestriction>& restrictions = {},
const std::vector<ngraph::element::Type>& defaultPrecisions = { ngraph::element::u8, ngraph::element::i8 });
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;

Expand Down
Loading

0 comments on commit 9a62376

Please sign in to comment.