Skip to content

Commit

Permalink
Snippets: precision propagation (#14996)
Browse files Browse the repository at this point in the history
  • Loading branch information
eshoguli authored Mar 23, 2023
1 parent 5fa95ff commit 087b10f
Show file tree
Hide file tree
Showing 48 changed files with 2,066 additions and 327 deletions.
1 change: 0 additions & 1 deletion src/bindings/python/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True):

xfail_issue_63033 = xfail_test(reason="BatchNormalization: Training mode is not supported")
xfail_issue_63036 = xfail_test(reason="Changes in ConvTranspose padding")
xfail_issue_63039 = xfail_test(reason="Result mismatches with UINT8 operations")
xfail_issue_63043 = xfail_test(reason="Recurrent node expects constants as W, R, B inputs.")

skip_rng_tests = pytest.mark.skip(reason="Tests use random number generator with no seed.")
Expand Down
5 changes: 0 additions & 5 deletions src/bindings/python/tests/test_onnx/test_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
xfail_issue_58033,
xfail_issue_63033,
xfail_issue_63036,
xfail_issue_63039,
xfail_issue_63043,
xfail_issue_63137,
xfail_issue_63138,
Expand Down Expand Up @@ -278,10 +277,6 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None
"OnnxBackendNodeModelTest.test_batchnorm_example_training_mode_cpu",
),
(xfail_issue_63036, "OnnxBackendNodeModelTest.test_convtranspose_autopad_same_cpu"),
(
xfail_issue_63039,
"OnnxBackendNodeModelTest.test_div_uint8_cpu",
),
(
xfail_issue_63043,
"OnnxBackendNodeModelTest.test_gru_batchwise_cpu",
Expand Down
1 change: 0 additions & 1 deletion src/bindings/python/tests_compatibility/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True):

xfail_issue_63033 = xfail_test(reason="BatchNormalization: Training mode is not supported")
xfail_issue_63036 = xfail_test(reason="Changes in ConvTranspose padding")
xfail_issue_63039 = xfail_test(reason="Result mismatches with UINT8 operations")
xfail_issue_63043 = xfail_test(reason="Recurrent node expects constants as W, R, B inputs.")

skip_rng_tests = pytest.mark.skip(reason="Tests use random number generator with no seed.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
xfail_issue_58033,
xfail_issue_63033,
xfail_issue_63036,
xfail_issue_63039,
xfail_issue_63043,
xfail_issue_63137,
xfail_issue_63138,
Expand Down Expand Up @@ -282,10 +281,6 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None
"OnnxBackendNodeModelTest.test_batchnorm_example_training_mode_cpu",
),
(xfail_issue_63036, "OnnxBackendNodeModelTest.test_convtranspose_autopad_same_cpu"),
(
xfail_issue_63039,
"OnnxBackendNodeModelTest.test_div_uint8_cpu",
),
(
xfail_issue_63043,
"OnnxBackendNodeModelTest.test_gru_batchwise_cpu",
Expand Down
15 changes: 13 additions & 2 deletions src/common/snippets/include/snippets/generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ namespace snippets {

auto getRegisters(std::shared_ptr<ngraph::Node>& n) -> ngraph::snippets::RegInfo;

typedef std::pair<std::function<std::shared_ptr<Emitter>(const std::shared_ptr<ngraph::Node>&)>,
std::function<std::set<std::vector<element::Type>>(const std::shared_ptr<ngraph::Node>&)>> jitters_value;
/**
* @interface TargetMachine
* @brief Base class Target machine representation. Target derives from this class to provide generator information about supported emitters
Expand Down Expand Up @@ -51,7 +53,16 @@ class TargetMachine {
if (jitter == jitters.end()) {
throw ngraph_error(std::string("Target code emitter is not available for ") + type.name + " operation.");
}
return jitter->second;
return jitter->second.first;
}

std::function<std::set<std::vector<element::Type>>(const std::shared_ptr<ngraph::Node>&)>
get_supported_precisions(const ngraph::DiscreteTypeInfo type) const {
auto jitter = jitters.find(type);
if (jitter == jitters.end()) {
throw ngraph_error(std::string("Target code emitter is not available for ") + type.name + " operation.");
}
return jitter->second.second;
}

/**
Expand All @@ -64,7 +75,7 @@ class TargetMachine {
virtual ~TargetMachine() = default;

protected:
std::map<const ngraph::DiscreteTypeInfo, std::function<std::shared_ptr<Emitter>(std::shared_ptr<ngraph::Node>)>> jitters;
std::map<const ngraph::DiscreteTypeInfo, jitters_value> jitters;
};

/**
Expand Down
16 changes: 11 additions & 5 deletions src/common/snippets/include/snippets/op/subgraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,17 @@ class Subgraph : public ov::op::util::SubGraphOp {
bool is_quantized() const { return config.m_is_quantized; }
bool has_type_relaxed_ops() const { return config.m_has_type_relaxed_ops; }
bool has_domain_sensitive_ops() const { return config.m_has_domain_sensitive_ops; }

snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, ngraph::pass::Manager& opt,
snippets::Schedule generate(const BlockedShapeVector& output_shapes,
const BlockedShapeVector& input_shapes,
ngraph::pass::Manager& pre_dialect,
ngraph::pass::Manager& post_dialect,
ngraph::pass::Manager& post_precision,
const void* compile_params = nullptr);
snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, const void* compile_params = nullptr);
snippets::Schedule generate(ngraph::pass::Manager &opt, const void* compile_params = nullptr);
snippets::Schedule generate(ngraph::pass::Manager& pre_dialect,
ngraph::pass::Manager& post_dialect,
ngraph::pass::Manager& post_precision,
const void* compile_params = nullptr);
snippets::Schedule generate(const void* compile_params = nullptr);
ov::PartialShape canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes);
std::vector<PartialShape> reshape_body(const std::vector<PartialShape>& input_shapes);
Expand All @@ -132,6 +138,8 @@ class Subgraph : public ov::op::util::SubGraphOp {
// This check returns True if Constant op which is input of this op should be inside Subgraph body
static auto constant_input_should_be_inside_body(const std::shared_ptr<ov::Node>& node) -> bool;

static bool check_broadcast(const std::shared_ptr<const ov::Node>& node) noexcept;

private:
void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes);
void convert_to_snippet_dialect();
Expand Down Expand Up @@ -164,8 +172,6 @@ class Subgraph : public ov::op::util::SubGraphOp {
public:
// True if Subgraph contains FakeQuantize -> FQ decomposition should be called
bool m_is_quantized = false;
// True if we should align element types indise body
bool m_is_needed_to_align_precision = false;
// True if Subgraph contains TypeRelaxed nodes -> for several streams in tp mode we should copy body using mutexes
// because TypeRelaxed::copy_with_new_inputs() isn't save-thread method
bool m_has_type_relaxed_ops = false;
Expand Down
46 changes: 0 additions & 46 deletions src/common/snippets/include/snippets/pass/align_element_type.hpp

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ namespace pass {
*
* Expand brackets:
* round(x * (levels-1) / (ih - il) - il * (levels-1) / (ih - il)) * (oh - ol) / (levels-1) + ol
*
*
* Marking:
* - isc := (levels-1) / (ih - il)
* - ish := -il * isc
* - osc := (oh - ol) / (levels-1)
* - osh := ol
* Final expression:
* round(x * isc + ish) * osc + osh
*
*
* Some optimizations (example for scalars):
* 1. If output element type of FQ is U8 and il = 0, ish = 0, osc = 1, osh = 0, there is enough expression: x * isc
* 2. If output element type of FQ is I8 and ish ~= 128, osc = 1, osh ~= -128, il * isc ~= -128, ih * isc ~= 127 there is enough expression: x * isc
Expand All @@ -54,7 +54,6 @@ class FakeQuantizeDecomposition : public ngraph::pass::MatcherPass {
public:
FakeQuantizeDecomposition();

static bool isAllScalarConstant(const std::shared_ptr<const ngraph::Node>& node);
static bool getScalesAndShifts(const std::shared_ptr<const ngraph::op::v0::FakeQuantize>& fq_node,
std::vector<float>& cl,
std::vector<float>& ch,
Expand Down
48 changes: 48 additions & 0 deletions src/common/snippets/include/snippets/pass/propagate_precision.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <memory>
#include <ngraph/pass/pass.hpp>
#include "snippets/generator.hpp"

namespace ngraph {
namespace snippets {
namespace pass {

/**
* @class PropagatePrecision
* @ingroup snippets
* @brief PropagatePrecision transformation propagate precision from parameters to results.
*/
class PropagatePrecision: public ngraph::pass::FunctionPass {
public:
OPENVINO_RTTI("PropagatePrecision", "0");
PropagatePrecision(const std::shared_ptr<const TargetMachine>& target_machine);
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;

static std::vector<element::Type> get_precisions(
const std::vector<element::Type>& input_precisions,
const std::set<std::vector<element::Type>>& supported_precisions) noexcept;

// if can_be_removed returns true then actual convertion (actual_before => actual_after)
// can be replaced to required (actual_before => required_after)
static bool can_be_removed(
const element::Type& actual_before,
const element::Type& actual_after,
const element::Type& required_after) noexcept;

// if can_be_fused returns true then actual convertion can be replaced to required
static bool can_be_fused(
const element::Type& actual,
const element::Type& required) noexcept;

private:
const std::shared_ptr<const TargetMachine> target_machine;
};

} // namespace pass
} // namespace snippets
} // namespace ngraph
80 changes: 49 additions & 31 deletions src/common/snippets/src/op/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
#include "snippets/pass/insert_movebroadcast.hpp"
#include "snippets/pass/broadcast_to_movebroadcast.hpp"
#include "snippets/pass/load_movebroadcast_to_broadcastload.hpp"
#include "snippets/pass/propagate_precision.hpp"
#include "snippets/pass/assign_registers.hpp"
#include "snippets/pass/convert_constants.hpp"
#include "snippets/pass/convert_power_to_powerstatic.hpp"
#include "snippets/pass/vector_to_scalar.hpp"
#include "snippets/pass/insert_loops.hpp"
#include "snippets/pass/transpose_decomposition.hpp"
#include "snippets/pass/transform_convert.hpp"
#include "snippets/pass/align_element_type.hpp"
#include "snippets/pass/matmul_to_brgemm.hpp"
#include "snippets/pass/fuse_transpose_brgemm.hpp"
#include "snippets/pass/softmax_decomposition.hpp"
Expand Down Expand Up @@ -62,10 +62,6 @@ void snippets::op::Subgraph::init_config() {
ov::is_type<ov::op::v0::FakeQuantize>(op);
config.m_has_type_relaxed_ops = config.m_has_type_relaxed_ops ||
std::dynamic_pointer_cast<ov::op::TypeRelaxedBase>(op);
config.m_is_needed_to_align_precision = config.m_is_needed_to_align_precision ||
is_quantized() ||
has_type_relaxed_ops() ||
snippets::pass::AlignElementType::opNeedsAlignElementType(op, execution_element_type);
config.m_has_domain_sensitive_ops = config.m_has_domain_sensitive_ops ||
ov::is_type<ov::op::v1::Transpose>(op) ||
ov::is_type<ov::op::v1::Softmax>(op) ||
Expand Down Expand Up @@ -359,6 +355,14 @@ ov::PartialShape snippets::op::Subgraph::canonicalize(const BlockedShapeVector&
return master_shape;
}

bool snippets::op::Subgraph::check_broadcast(const std::shared_ptr<const ov::Node>& node) noexcept {
const auto elementwise = std::dynamic_pointer_cast<const ov::op::util::BinaryElementwiseArithmetic>(node);
return
(elementwise == nullptr) ||
(elementwise->get_input_partial_shape(0).size() == elementwise->get_input_partial_shape(1).size()) ||
(elementwise->get_autob().m_type != ov::op::AutoBroadcastType::PDPD);
}

void snippets::op::Subgraph::align_element_types(const BlockedShapeVector& outputShapes,
const BlockedShapeVector& inputShapes) {
// We should insert Convert before Results to set original output element type if needed
Expand All @@ -369,35 +373,34 @@ void snippets::op::Subgraph::align_element_types(const BlockedShapeVector& outpu
const auto convert = std::make_shared<ngraph::snippets::op::ConvertSaturation>(
body_results[i]->get_input_node_shared_ptr(0), needed_out_type);
body_results[i]->set_argument(0, convert);
body_results[i]->validate_and_infer_types();
}
}

// We should change existing element type to original for Parameters if needed
const auto& body_parameters = body_ptr()->get_parameters();
const auto& parameters = body_ptr()->get_parameters();
for (size_t i = 0; i < inputShapes.size(); ++i) {
const auto needed_in_type = std::get<2>(inputShapes[i]);
if (body_parameters[i]->get_element_type() != needed_in_type) {
body_parameters[i]->set_element_type(needed_in_type);
config.m_is_needed_to_align_precision = true;
}
}
const auto& parameter = parameters[i];
if (parameter->get_element_type() != needed_in_type) {
const auto parameter_output = parameter->output(0);
const auto convert = std::make_shared<ngraph::snippets::op::ConvertSaturation>(
parameter_output,
parameter_output.get_element_type());
ngraph::copy_runtime_info(parameter, convert);

for (const auto input : parameter_output.get_target_inputs()) {
const auto& input_node = input.get_node();
if (input_node == convert.get()) {
continue;
}
input_node->set_argument(input.get_index(), convert->output(0));
}

// We should align element type inside body using the corresponding pass:
// - Insert Convert before operations that doesn't support original element type for execution
// - Insert reverse Convert before operations that support original element type
// but have inputs that doesn't support it (because before them will be inserted Convert with exec_type - first point)
// - Then we should use ConstantFolding pass to convert element type of Scalars before inference.
// - Eliminate redundant Converts which can be inserted in AlignElementType() pass
ngraph::pass::Manager manager;
if (config.m_is_needed_to_align_precision) {
manager.register_pass<snippets::pass::AlignElementType>(execution_element_type);
manager.register_pass<ov::pass::ConstantFolding>();
// TODO [100041] : In some cases AlignElementType pass can insert extra Convert because
// the pass doesn't know real precisions in real time.
// We call EliminateConverts pass to remove them
manager.register_pass<ov::pass::EliminateConvert>();
parameter->set_element_type(needed_in_type);
parameter->validate_and_infer_types();
}
}
manager.run_passes(body_ptr());
}

void snippets::op::Subgraph::initialize_buffer_scratchpad_size() {
Expand Down Expand Up @@ -602,24 +605,39 @@ snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& ou

snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& output_shapes,
const BlockedShapeVector& input_shapes,
ngraph::pass::Manager& opt,
ngraph::pass::Manager& pre_dialect,
ngraph::pass::Manager& post_dialect,
ngraph::pass::Manager& post_precision,
const void* compile_params) {
canonicalize(output_shapes, input_shapes);
return generate(opt, compile_params);
return generate(pre_dialect, post_dialect, post_precision, compile_params);
}

snippets::Schedule snippets::op::Subgraph::generate(const void* compile_params) {
auto mngr = ngraph::pass::Manager();
return generate(mngr, compile_params);
return generate(mngr, mngr, mngr, compile_params);
}

snippets::Schedule snippets::op::Subgraph::generate(ngraph::pass::Manager& opt, const void* compile_params) {
snippets::Schedule snippets::op::Subgraph::generate(
ngraph::pass::Manager& pre_dialect,
ngraph::pass::Manager& post_dialect,
ngraph::pass::Manager& post_precision,
const void* compile_params) {
INTERNAL_OP_SCOPE(Subgraph);
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::generate")
NGRAPH_CHECK(m_generator != nullptr, "generate is called while generator is not set");

pre_dialect.run_passes(body_ptr());
convert_to_snippet_dialect();
opt.run_passes(body_ptr());
post_dialect.run_passes(body_ptr());

ngraph::pass::Manager precision_manager;
precision_manager.register_pass<snippets::pass::PropagatePrecision>(m_generator->get_target_machine());
precision_manager.register_pass<ngraph::pass::ConstantFolding>();
precision_manager.register_pass<snippets::pass::ConvertConstantsToScalars>();
precision_manager.run_passes(body_ptr());

post_precision.run_passes(body_ptr());

// After all passes, when all optimizations are completed and all MemoryAccess ops are inserted,
// we can calculate common buffer scratchpad size and propagate offset from Buffer to the corresponding MemoryAccess ops
Expand Down
Loading

0 comments on commit 087b10f

Please sign in to comment.