Skip to content

Commit

Permalink
[Snippets] Created Analizator-Pass
Browse files Browse the repository at this point in the history
  • Loading branch information
a-sidorova committed Aug 9, 2024
1 parent 9b059c8 commit 4dc5bbb
Show file tree
Hide file tree
Showing 7 changed files with 174 additions and 101 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/pass/pass.hpp"

namespace ov {
namespace snippets {
namespace pass {

/**
* @interface AnalyzeBroadcastableInputs
* @brief Analyzes body parameters which affects inputs of broadcastable operations(`Broadcast` op should be inserted there).
* Initializes special map `BroadcastableInputsMap = [Index of Parameter -> Index of broadcastable dimension from end]`
* Notes:
* - Must be called after Canonicalization pass
* - Doesn't support `layouts` in PortDescriptors
* @ingroup snippets
*/
class AnalyzeBroadcastableInputs : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("AnalyzeBroadcastableInputs");
using BroadcastableInputsMap = std::map<size_t, size_t>;
AnalyzeBroadcastableInputs(BroadcastableInputsMap& map);

bool run_on_model(const std::shared_ptr<ov::Model>& m) override;

private:
BroadcastableInputsMap& m_broadcastable_inputs;
};

} // namespace pass
} // namespace snippets
} // namespace ov
2 changes: 2 additions & 0 deletions src/common/snippets/include/snippets/pass/manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ namespace pass {
*/
class Manager : public ov::pass::Manager {
public:
Manager(std::shared_ptr<ov::pass::PassConfig> pass_config = std::make_shared<ov::pass::PassConfig>(),
std::string name = "UnnamedSnippetsManager");
~Manager() override = default;
using PassBase = ov::pass::PassBase;
using Validate = ov::pass::Validate;
Expand Down
14 changes: 9 additions & 5 deletions src/common/snippets/src/op/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,13 +398,17 @@ void Subgraph::data_flow_transformations(const BlockedShapeVector& blocked_input
INTERNAL_OP_SCOPE(Subgraph);
OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::data_flow_transformations")

ov::snippets::pass::Manager manager;
std::shared_ptr<ov::pass::PassConfig> pass_config = std::make_shared<ov::pass::PassConfig>();
// If subgraph has its own specific canonicalization, which is different with common behavior, will skip the this common one.
// for example in GN, scale and bias shape [c] are canonicalized to [1,c,1,1], not [1,1,1,c]. Common canonicalization is disabled in this case.
if (!blocked_input_shapes.empty() && !config.m_has_broadcast_sensitive_ops)
manager.register_pass<snippets::pass::Canonicalization>(blocked_input_shapes);
if (!input_precisions.empty() && !output_precisions.empty())
manager.register_pass<snippets::pass::AlignElementTypes>(input_precisions, output_precisions);
if (blocked_input_shapes.empty() || config.m_has_broadcast_sensitive_ops)
pass_config->disable<snippets::pass::Canonicalization>();
if (input_precisions.empty() || output_precisions.empty())
pass_config->disable<snippets::pass::AlignElementTypes>();

ov::snippets::pass::Manager manager(pass_config, "SnippetsDataFlowManager");
manager.register_pass<snippets::pass::Canonicalization>(blocked_input_shapes);
manager.register_pass<snippets::pass::AlignElementTypes>(input_precisions, output_precisions);

if (config.m_has_domain_sensitive_ops) {
manager.register_pass<snippets::pass::MatMulToBrgemm>();
Expand Down
106 changes: 106 additions & 0 deletions src/common/snippets/src/pass/analyze_broadcastable_inputs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "snippets/pass/analyze_broadcastable_inputs.hpp"

#include "snippets/lowered/pass/insert_broadcastmove.hpp"
#include "snippets/utils/utils.hpp"
#include "snippets/itt.hpp"

namespace ov {
namespace snippets {
namespace pass {

AnalyzeBroadcastableInputs::AnalyzeBroadcastableInputs(BroadcastableInputsMap& map) : m_broadcastable_inputs(map) {}

bool pass::AnalyzeBroadcastableInputs::run_on_model(const std::shared_ptr<ov::Model>& body) {
RUN_ON_MODEL_SCOPE(AnalyzeBroadcastableInputs);
OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::AnalyzeBroadcastableInputs")
// Snippets supports tokenization of the following operations:
// - Unary, Binary and Ternary (Select) Elementwise ops
// - Softmax, MatMul, Transpose, GroupNorm
// Binary Elementwise ops (+ Select) requires explicit Broadcast op
// on inputs if broadcasting of latest dimensions is needed.
// These ops will be start points of DFS - need to go to Parameters and update `broadcastable_inputs_map`.
// We iterates through all ops by execution order. So if we already analyzied some op in the input branch - skip this branch.
// However, there some ops which can change `processing_dim_idx`:
// - Transpose has order which changes `processing_dim_idx`. But Transpose can be only after Parameters and before Results.
// - MatMul's first input doesn't affect output latest dimension - skip this branch.
// Also MatMul has `transposed_b` which changes `processing_dim_idx`
m_broadcastable_inputs.clear();
// Currently Broadcasting can be changed only if there are several Parameters in body
if (body->get_parameters().size() < 2)
return false;

const auto& ops = body->get_ordered_ops();
std::set<std::shared_ptr<ov::Node>> visited_ops = {};
for (const auto& op : ops) {
if (!ov::snippets::lowered::pass::InsertBroadcastMove::is_broadcasting_supported(op))
continue;

size_t processing_dim_idx = 0;

// We need to propagate `processing_dim_idx` from input of the current node to the parameter.
// To do it we use DFS
std::stack<std::shared_ptr<ov::Node>> nodes_to_calculate;
nodes_to_calculate.push(op);
while (!nodes_to_calculate.empty()) {
auto current_node = nodes_to_calculate.top();
nodes_to_calculate.pop();

if (const auto& param = ov::as_type_ptr<ov::op::v0::Parameter>(current_node)) {
const auto consumers = param->get_output_target_inputs(0);
if (std::any_of(consumers.cbegin(), consumers.cend(),
[](const ov::Input<ov::Node>& in) { return ov::is_type<ov::op::v1::Transpose>(in.get_node()); })) {
OPENVINO_ASSERT(consumers.size() == 1, "Incorrect count of outputs of Parameter!");
const auto transpose = consumers.begin()->get_node();
std::vector<size_t> order;
const auto& constant = ov::as_type_ptr<const opset1::Constant>(transpose->get_input_node_shared_ptr(1));
OPENVINO_ASSERT(constant, "Unsupported order node of Transpose");
order = constant->cast_vector<size_t>();
if (order.empty()) {
order.resize(transpose->get_output_partial_shape(0).size());
std::iota(order.rbegin(), order.rend(), 0);
}
// `processing_dim_idx` starts from the end
processing_dim_idx = order.size() - 1 - ov::snippets::utils::get_input_dim_idx(order, processing_dim_idx);
}
const auto param_idx = body->get_parameter_index(param);
if (m_broadcastable_inputs.count(param_idx) == 0) {
m_broadcastable_inputs[param_idx] = processing_dim_idx;
} else {
OPENVINO_ASSERT(m_broadcastable_inputs.at(param_idx) == processing_dim_idx,
"Parameter has been already analyzed and has another processing dim index!");
}
processing_dim_idx = 0;
continue;
} else if (ov::is_type<ov::op::v0::Constant>(current_node)) {
visited_ops.insert(op);
continue;
}

ov::OutputVector inputs = current_node->input_values();
if (const auto mm = ov::as_type_ptr<ov::op::v0::MatMul>(current_node)) {
inputs = { current_node->input_value(1) };
processing_dim_idx = static_cast<size_t>(mm->get_transpose_b());
}

// not a leaf - continue to search
for (const auto& input_value : inputs) {
const auto& input_node = input_value.get_node()->shared_from_this();
if (visited_ops.count(input_node) == 0) {
nodes_to_calculate.push(input_node);
}
}
}

visited_ops.insert(op);
}

return true;
}

} // namespace pass
} // namespace snippets
} // namespace ov
3 changes: 3 additions & 0 deletions src/common/snippets/src/pass/manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ namespace ov {
namespace snippets {
namespace pass {

Manager::Manager(std::shared_ptr<ov::pass::PassConfig> pass_config, std::string name)
: ov::pass::Manager(std::move(pass_config), std::move(name)) {}

std::shared_ptr<Manager::PassBase> Manager::register_pass_instance(const PassPosition& position,
const std::shared_ptr<PassBase>& pass) {
pass->set_pass_config(m_pass_config);
Expand Down
111 changes: 17 additions & 94 deletions src/plugins/intel_cpu/src/nodes/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@
#include "snippets/pass/matmul_to_brgemm.hpp"
#include "snippets/pass/propagate_precision.hpp"
#include "snippets/pass/positioned_pass.hpp"
#include "snippets/pass/canonicalization.hpp"
#include "snippets/pass/analyze_broadcastable_inputs.hpp"
#include "snippets/lowered/linear_ir.hpp"
#include "snippets/lowered/pass/optimize_domain.hpp"
#include "snippets/lowered/pass/insert_loops.hpp"
#include "snippets/lowered/pass/mark_loops.hpp"
#include "snippets/lowered/pass/insert_broadcastmove.hpp"
#include "transformations/defs.hpp"
#include "transformations/cpu_opset/common/pass/convert_to_swish_cpu.hpp"
#include "transformations/snippets/common/pass/mul_add_to_fma.hpp"
Expand Down Expand Up @@ -540,104 +541,12 @@ void Subgraph::createPrimitive() {
initPluginBlockedShapes();
initAttributes();
initStartOffsets();
initBroadcastableInputs();
optimizeIR();
}

Node::createPrimitive();
}

void Subgraph::initBroadcastableInputs() {
// Snippets supports tokenization of the following operations:
// - Unary, Binary and Ternary (Select) Elementwise ops
// - Softmax, MatMul, Transpose, GroupNorm
// Binary Elementwise ops (+ Select) requires explicit Broadcast op
// on inputs if broadcasting of latest dimensions is needed.
// These ops will be start points of DFS - need to go to Parameters and update `broadcastable_inputs_map`.
// We iterates through all ops by execution order. So if we already analyzied some op in the input branch - skip this branch.
// However, there some ops which can change `processing_dim_idx`:
// - Transpose has order which changes `processing_dim_idx`. But Transpose can be only after Parameters
// - MatMul's first input doesn't affect output latest dimension - skip this branch.
// Also MatMul has `transposed_b` which changes `processing_dim_idx`
broadcastable_inputs.clear();
const auto& body = subgraph_attrs->snippet->body_ptr();
// Currently Broadcasting can be changed only if there are several Parameters in body
if (body->get_parameters().size() < 2)
return;

const auto& ops = body->get_ordered_ops();
std::set<std::shared_ptr<ov::Node>> visited_ops = {};
for (const auto& op : ops) {
if (!ov::snippets::lowered::pass::InsertBroadcastMove::is_broadcasting_supported(op))
continue;

size_t processing_dim_idx = 0;

// We need to propagate `processing_dim_idx` from input of the current node to the parameter.
// To do it we use DFS
std::stack<std::shared_ptr<ov::Node>> nodes_to_calculate;
nodes_to_calculate.push(op);
while (!nodes_to_calculate.empty()) {
auto current_node = nodes_to_calculate.top();
nodes_to_calculate.pop();

if (const auto& param = ov::as_type_ptr<ov::op::v0::Parameter>(current_node)) {
const auto consumers = param->get_output_target_inputs(0);
if (std::any_of(consumers.cbegin(), consumers.cend(),
[](const ov::Input<ov::Node>& in) { return ov::is_type<ov::op::v1::Transpose>(in.get_node()); })) {
OPENVINO_ASSERT(consumers.size() == 1, "Incorrect count of outputs of Parameter!");
const auto transpose = consumers.begin()->get_node();
std::vector<size_t> order;
const auto& constant = ov::as_type_ptr<const opset1::Constant>(transpose->get_input_node_shared_ptr(1));
OPENVINO_ASSERT(constant, "Unsupported order node of Transpose");
order = constant->cast_vector<size_t>();
if (order.empty()) {
order.resize(transpose->get_output_partial_shape(0).size());
std::iota(order.rbegin(), order.rend(), 0);
}
// `processing_dim_idx` starts from the end
processing_dim_idx = order.size() - 1 - ov::snippets::utils::get_input_dim_idx(order, processing_dim_idx);
}
const auto param_idx = body->get_parameter_index(param);
if (broadcastable_inputs.count(param_idx) == 0) {
broadcastable_inputs[param_idx] = processing_dim_idx;
} else {
OPENVINO_ASSERT(broadcastable_inputs.at(param_idx) == processing_dim_idx,
"Parameter has been already analyzed and has another processing dim index!");
}
processing_dim_idx = 0;
continue;
} else if (ov::is_type<ov::op::v0::Constant>(current_node)) {
visited_ops.insert(op);
continue;
}

ov::OutputVector inputs = current_node->input_values();
if (const auto mm = ov::as_type_ptr<ov::op::v0::MatMul>(current_node)) {
inputs = { current_node->input_value(1) };
processing_dim_idx = static_cast<size_t>(mm->get_transpose_b());
}

// not a leaf - continue to search
for (const auto& input_value : inputs) {
const auto& input_node = input_value.get_node()->shared_from_this();
if (visited_ops.count(input_node) == 0) {
nodes_to_calculate.push(input_node);
}
}
}

visited_ops.insert(op);
}

OPENVINO_ASSERT(broadcastable_inputs.size() < srcMemPtrs.size() || broadcastable_inputs.rbegin()->first < srcMemPtrs.size(),
"Incorrect indexes of broadcastable inputs of Subgraph");
for (const auto broadcastable_input : broadcastable_inputs) {
OPENVINO_ASSERT(broadcastable_input.second < in_shapes[broadcastable_input.first].size(),
"Incorrect processing dimension index of broadcastable index");
}
}

void Subgraph::initMemoryPtrs() {
srcMemPtrs.resize(input_num);
dstMemPtrs.resize(output_num);
Expand Down Expand Up @@ -711,14 +620,16 @@ void Subgraph::initPluginBlockedShapes() const {
in_shapes[i] = srcMemPtrs[i]->getDescWithType<BlockedMemoryDesc>()->getBlockDims();
}

Subgraph::DataFlowPasses Subgraph::getDataFlowPasses() const {
Subgraph::DataFlowPasses Subgraph::getDataFlowPasses() {
DataFlowPasses backend_passes;

using PassPosition = ov::snippets::pass::PassPosition;
using Place = PassPosition::Place;

# define SNIPPETS_REGISTER_PASS_ABSOLUTE_COMMON(PASS_PLACE, PASS, ...) \
backend_passes.emplace_back(PassPosition(PASS_PLACE), std::make_shared<PASS>(__VA_ARGS__))
# define SNIPPETS_REGISTER_PASS_RELATIVEE_COMMON(PASS_PLACE, TARGET_PASS, PASS, ...) \
backend_passes.emplace_back(PassPosition(PASS_PLACE, TARGET_PASS::get_type_info_static()), std::make_shared<PASS>(__VA_ARGS__))

#if defined(OPENVINO_ARCH_X86_64)
# define SNIPPETS_REGISTER_PASS_ABSOLUTE_X86_64(PASS_PLACE, PASS, ...) \
Expand All @@ -731,6 +642,8 @@ Subgraph::DataFlowPasses Subgraph::getDataFlowPasses() const {
#endif // OPENVINO_ARCH_X86_64

SNIPPETS_REGISTER_PASS_ABSOLUTE_COMMON(Place::PipelineStart, ConvertToSwishCPU);
SNIPPETS_REGISTER_PASS_RELATIVEE_COMMON(Place::After, ov::snippets::pass::Canonicalization,
ov::snippets::pass::AnalyzeBroadcastableInputs, broadcastable_inputs);
if (context->getConfig().inferencePrecision == ov::element::bf16 && subgraph_attrs->snippet->has_domain_sensitive_ops()) {
// enforce BF16 precisions to supported operations
// MatMul has to be decomposed to Brgemm operations before enforcement
Expand All @@ -754,6 +667,7 @@ Subgraph::DataFlowPasses Subgraph::getDataFlowPasses() const {
#endif

#undef SNIPPETS_REGISTER_PASS_ABSOLUTE_COMMON
#undef SNIPPETS_REGISTER_PASS_RELATIVE_COMMON
#undef SNIPPETS_REGISTER_PASS_ABSOLUTE_X86_64
#undef SNIPPETS_REGISTER_PASS_RELATIVE_X86_64

Expand Down Expand Up @@ -808,6 +722,15 @@ void Subgraph::optimizeIR() {
const auto precisions = getIOPrecisions();
subgraph->data_flow_transformations(in_blocked_shapes, precisions.first, precisions.second, getDataFlowPasses());

// DataFlow transformations includes AnalyzeBroadcastableInputs pass - we should verify that the map is aligned with our blocked input shapes
OPENVINO_ASSERT((broadcastable_inputs.size() < in_shapes.size()) ||
(!broadcastable_inputs.empty() && broadcastable_inputs.rbegin()->first < in_shapes.size()),
"Incorrect indexes of broadcastable inputs of Subgraph");
for (const auto broadcastable_input : broadcastable_inputs) {
OPENVINO_ASSERT(broadcastable_input.second < in_shapes[broadcastable_input.first].size(),
"Incorrect processing dimension index of broadcastable index");
}

// TODO: Snippets don't support backend-provided blocking, so we need to reshape body
// using blocked shapes first. This can be removed after [121670]
std::vector<snippets::VectorDimsRef> in_shapes;
Expand Down
3 changes: 1 addition & 2 deletions src/plugins/intel_cpu/src/nodes/subgraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ class Subgraph : public Node {
IShapeInfer::Result shapeInfer() const override;

private:
void initBroadcastableInputs();
void initMemoryPtrs();
void initAttributes();
void initStartOffsets();
Expand All @@ -77,7 +76,7 @@ class Subgraph : public Node {
using DataFlowPasses = std::vector<ov::snippets::pass::Manager::PositionedPassBase>;
using ControlFlowPasses = std::vector<ov::snippets::lowered::pass::PassPipeline::PositionedPassLowered>;

DataFlowPasses getDataFlowPasses() const;
DataFlowPasses getDataFlowPasses();
ControlFlowPasses getControlFlowPasses() const;

// Holds ISA version used is codeGeneration target
Expand Down

0 comments on commit 4dc5bbb

Please sign in to comment.