Skip to content

Commit

Permalink
Added support of custom Plugin ops in Linear IR
Browse files Browse the repository at this point in the history
  • Loading branch information
a-sidorova committed Mar 30, 2023
1 parent 0ceddc3 commit 61ef97a
Show file tree
Hide file tree
Showing 43 changed files with 614 additions and 1,354 deletions.
2 changes: 2 additions & 0 deletions src/common/snippets/include/snippets/lowered_expr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class LoweredExpr {
void set_loop_ids(const std::vector<size_t>& loops) { m_loop_ids = loops; }
void set_loop_id(size_t id, size_t idx);
void remove_loop_id(size_t id);
bool is_outside_loop() const { return m_is_outside_loop; }

protected:
void replace_input(size_t port, TensorDescriptorPtr to);
Expand All @@ -61,6 +62,7 @@ class LoweredExpr {
RegInfo m_reg_info{{}, {}};
// The order Loops identifies: Outer ---> Inner
std::vector<size_t> m_loop_ids;
bool m_is_outside_loop = false;
};

class IOLoweredExpr : public LoweredExpr {
Expand Down
2 changes: 2 additions & 0 deletions src/common/snippets/include/snippets/op/brgemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ class Brgemm : public MemoryAccess {

protected:
ov::element::Type get_output_type() const;
std::vector<ov::PartialShape> get_planar_input_shapes(const std::vector<ov::Output<ov::Node>>& inputs) const;
ov::PartialShape get_output_partial_shape(const std::vector<ov::PartialShape>& input_shapes) const;
ov::PartialShape get_planar_output_shape(const ov::PartialShape& output_shape) const;
};

} // namespace op
Expand Down
3 changes: 3 additions & 0 deletions src/common/snippets/include/snippets/op/load.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ class Load : public MemoryAccess {

void validate_and_infer_types() override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;

protected:
void validate_memory_access_params() const;
};

/**
Expand Down
23 changes: 17 additions & 6 deletions src/common/snippets/include/snippets/op/memory_access.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ namespace op {
* @interface MemoryAccess
* @brief This is a base class for memory access operations (like Load and Store).
* It provides universal interface to manipulate with memory: load/store.
* @param m_input_ports - vector of input descriptors: variables of PortDescriptor class
* @param m_output_ports - vector of output descriptors: variables of PortDescriptor class
* @param m_input_ports - map of input descriptors: variables of PortDescriptor class
* @param m_output_ports - map of output descriptors: variables of PortDescriptor class
* @ingroup snippets
*/

Expand Down Expand Up @@ -55,22 +55,33 @@ class MemoryAccess : public ngraph::op::Op {
size_t get_input_offset(size_t idx = 0) const;
size_t get_output_offset(size_t idx = 0) const;

size_t get_input_port_count() const { return m_input_ports.size(); }
size_t get_output_port_count() const { return m_output_ports.size(); }
std::map<size_t, PortDescriptor> get_memory_access_input_ports() const { return m_input_ports; }
std::map<size_t, PortDescriptor> get_memory_access_output_ports() const { return m_output_ports; }

bool is_memory_access_input_port(size_t idx) const;
bool is_memory_access_output_port(size_t idx) const;

// All input and output ports are MemoryAccess
bool is_full_memory_access_op() const;

bool visit_attributes(AttributeVisitor& visitor) override;

protected:
explicit MemoryAccess(const OutputVector& arguments, size_t input_count = 0, size_t output_count = 0);
explicit MemoryAccess(const OutputVector& arguments, const std::set<size_t>& input_ports, const std::set<size_t>& output_ports);
MemoryAccess() = default;

// This method can be called only in ctors
void ctor_initialize(const std::set<size_t>& input_ports, const std::set<size_t>& output_ports);

void set_input_port_descriptor(const PortDescriptor& desc, const size_t i);
void set_output_port_descriptor(const PortDescriptor& desc, const size_t i);
const PortDescriptor& get_input_port_descriptor(const size_t i) const;
const PortDescriptor& get_output_port_descriptor(const size_t i) const;

std::vector<PortDescriptor> m_input_ports;
std::vector<PortDescriptor> m_output_ports;
// [port_num, port_desc]
std::map<size_t, PortDescriptor> m_input_ports;
std::map<size_t, PortDescriptor> m_output_ports;
};

} // namespace op
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#pragma once

#include "linear_IR_transformation.hpp"
#include "snippets/generator.hpp"

namespace ngraph {
namespace snippets {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,17 @@ namespace pass {
namespace lowered {

/**
* @interface PropagateOffsetAndResetBuffer
* @brief Propagates Buffer offsets to connected Load/Store (and other MemoryAccess) operations.
* Also, calculates the amount of data stored to the Buffer (via Store inside one or more Loops),
* and resets the corresponding pointer (sets negative finalization offset to the outermost LoopEnd).
* @interface BufferAllocation
* @brief The pass calculation common size of buffer scratchpad and propagates Buffer offsets to connected MemoryAccess operations.
* @ingroup snippets
*/

class PropagateOffsetAndResetBuffer : public LinearIRTransformation {
class BufferAllocation : public LinearIRTransformation {
static void propagate_offset(const LoweredExprIR& linear_ir, const LoweredExprPtr& buffer_expr, size_t offset);
size_t m_buffer_scratchpad_size = 0;

public:
OPENVINO_RTTI("PropagateOffsetAndResetBuffer", "LinearIRTransformation")
OPENVINO_RTTI("BufferAllocation", "LinearIRTransformation")
bool run(LoweredExprIR& linear_ir) override;
size_t get_scratchpad_size() const {return m_buffer_scratchpad_size;}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ class BufferInsertion : public LinearIRTransformation {
const LoweredExprIR::LoweredLoopManagerPtr& loop_manager,
const LoweredExprPtr& up_expr, const LoweredExprPtr& down_expr);


int32_t m_buffer_allocation_rank;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,14 @@ class LoopInit : public LinearIRTransformation {
std::vector<int64_t> init_ptr_increments(const std::vector<LoweredExprPort>& loop_inputs,
const std::vector<LoweredExprPort>& loop_outputs,
size_t dim_idx) const;
std::vector<int64_t> init_finalization_offsets(const std::vector<int64_t>& ptr_increments, size_t work_amount) const;
std::vector<int64_t> init_finalization_offsets(const std::vector<int64_t>& finalization_offsets, size_t work_amount) const;
std::vector<int64_t> init_element_type_sizes(const std::vector<LoweredExprPort>& loop_inputs,
const std::vector<LoweredExprPort>& loop_outputs);
void reuse_buffer_increments(std::vector<int64_t>& ptr_increments,
std::vector<int64_t>& finalization_offsets,
const LoweredExprIR& linear_ir,
const std::vector<LoweredExprPort>& loop_inputs,
const std::vector<LoweredExprPort>& loop_outputs);
};

} // namespace lowered
Expand Down
29 changes: 0 additions & 29 deletions src/common/snippets/include/snippets/pass/reset_buffer.hpp

This file was deleted.

40 changes: 0 additions & 40 deletions src/common/snippets/include/snippets/pass/vector_to_scalar.hpp

This file was deleted.

4 changes: 3 additions & 1 deletion src/common/snippets/include/snippets/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,16 @@ inline auto is_scalar_constant(const std::shared_ptr<ngraph::Node>& source_outpu
return ngraph::is_type<ngraph::opset1::Constant>(source_output_node) && ngraph::shape_size(source_output_node->get_shape()) == 1;
}


ov::PartialShape get_port_planar_shape(const Output<Node>& out);
ov::PartialShape get_reordered_planar_shape(const ov::PartialShape& shape, const std::vector<size_t>& layout);
std::vector<size_t> get_node_output_layout(const std::shared_ptr<Node>& node);
std::vector<size_t> get_node_output_layout(const Node* node);
void set_transpose_output_layout(const ov::Output<Node>& port, const std::shared_ptr<opset1::Transpose>& node);
void set_output_layout(const ov::Output<Node>& port, const std::vector<size_t>& layout);

bool get_outside_loop_value(const std::shared_ptr<Node>& node);
void set_outside_loop_value(const std::shared_ptr<Node>& node, bool is_outside = true);

inline ov::Dimension get_inner_dim(const ov::PartialShape &shape) { return *(shape.rbegin()); }
inline ov::Dimension get_outer_dim(const ov::PartialShape &shape) { return *(shape.rbegin() + 1); }

Expand Down
14 changes: 9 additions & 5 deletions src/common/snippets/src/generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include "snippets/pass/lowered/load_store_insertion.hpp"
#include "snippets/pass/lowered/vector_to_scalar.hpp"
#include "snippets/pass/lowered/load_movebroadcast_to_broadcastload.hpp"
#include "snippets/pass/lowered/buffer_propagate_offset_and_reset.hpp"
#include "snippets/pass/lowered/buffer_allocation.hpp"
#include "snippets/pass/lowered/propagate_layout.hpp"
#include "snippets/pass/lowered/cleanup_loop_offsets.hpp"
#include "snippets/pass/lowered/softmax_decomposition.hpp"
Expand All @@ -40,7 +40,7 @@ Generator::LoweringResult Generator::generate(std::shared_ptr<ov::Model>& m, con
// Note: The pass LoopInit uses LoopInfo that contains entry and exit points of the corresponding Loop.
// To avoid the Loop information corruption, we should call the passes with Load/Store work
// (for example, LoadMoveBroadcastToBroadcastLoad()) after explicit Loop insertion (LoopInit())
const auto propagate_buffer_offsets = std::make_shared<pass::lowered::PropagateOffsetAndResetBuffer>();
const auto buffer_allocation_pass = std::make_shared<pass::lowered::BufferAllocation>();
pass::lowered::LinearIRTransformationPipeline common_pipeline;
common_pipeline.register_transformation<pass::lowered::LoopMarkup>(vector_size);
common_pipeline.register_transformation<pass::lowered::SoftmaxDecomposition>(vector_size);
Expand All @@ -53,15 +53,19 @@ Generator::LoweringResult Generator::generate(std::shared_ptr<ov::Model>& m, con
common_pipeline.register_transformation<pass::lowered::MoveScalarToConsumer>();
common_pipeline.register_transformation<pass::lowered::LoadMoveBroadcastToBroadcastLoad>();
common_pipeline.register_transformation<pass::lowered::PropagateLayout>();
common_pipeline.register_transformation(propagate_buffer_offsets);
common_pipeline.register_transformation(buffer_allocation_pass);
common_pipeline.register_transformation<pass::lowered::CleanupLoopOffsets>();
common_pipeline.run(linear_ir);

pass::lowered::LinearIRTransformationPipeline target_pipeline = target_specific_transformations();
target_pipeline.run(linear_ir);

std::function<opRegType(const std::shared_ptr<Node>& op)> reg_type_mapper = [&](const std::shared_ptr<Node>& op) -> opRegType {
return get_op_reg_type(op);
};

pass::lowered::LinearIRTransformationPipeline final_pipeline;
final_pipeline.register_transformation<pass::lowered::AssignRegisters>(get_op_reg_type);
final_pipeline.register_transformation<pass::lowered::AssignRegisters>(reg_type_mapper);
final_pipeline.register_transformation<pass::lowered::InsertTailLoop>();
final_pipeline.run(linear_ir);

Expand All @@ -85,7 +89,7 @@ Generator::LoweringResult Generator::generate(std::shared_ptr<ov::Model>& m, con
if (config.m_save_lowered_code)
lowered_saved = linear_ir;

return {target->get_snippet(), propagate_buffer_offsets->get_scratchpad_size()};
return {target->get_snippet(), buffer_allocation_pass->get_scratchpad_size()};
}

std::shared_ptr<const TargetMachine> Generator::get_target_machine() const {
Expand Down
8 changes: 5 additions & 3 deletions src/common/snippets/src/lowered_expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
//

#include "snippets/lowered_expr.hpp"
#include "snippets/pass/assign_registers.hpp"
#include "snippets/pass/vector_to_scalar.hpp"
#include "snippets/op/loop.hpp"
#include "snippets/op/subgraph.hpp"
#include <snippets/itt.hpp>
#include <snippets/op/serialization_node.hpp>
#include "snippets/tensor_descriptor.hpp"
#include "snippets/utils.hpp"

#include <openvino/core/graph_util.hpp>
#include <openvino/core/type.hpp>
Expand All @@ -24,13 +23,15 @@ LoweredExpr::LoweredExpr(const std::shared_ptr<Node>& n) : m_source_node{n}, m_e
m_inputs.emplace_back(get_tensor_descriptor_ptr(in.get_source_output()));
for (const auto& out : n->outputs())
m_outputs.emplace_back(get_tensor_descriptor_ptr(out));
m_is_outside_loop = utils::get_outside_loop_value(n);
}

LoweredExpr::LoweredExpr(const std::shared_ptr<Node>& n, std::vector<TensorDescriptorPtr> inputs, std::vector<TensorDescriptorPtr> outputs)
: m_source_node{n}, m_emitter{nullptr}, m_inputs(std::move(inputs)), m_outputs(std::move(outputs)), m_reg_info{{}, {}} {
if (m_outputs.empty())
for (const auto& out : n->outputs())
m_outputs.emplace_back(get_tensor_descriptor_ptr(out));
m_is_outside_loop = utils::get_outside_loop_value(n);
}

std::shared_ptr<Node> LoweredExpr::get_node() const {
Expand Down Expand Up @@ -113,7 +114,8 @@ bool operator!=(const LoweredExprPort& lhs, const LoweredExprPort& rhs) {

bool operator<(const LoweredExprPort& lhs, const LoweredExprPort& rhs) {
OPENVINO_ASSERT(lhs.type == rhs.type, "Incorrect comparison: Ports are from different types!");
return (lhs.expr < rhs.expr) || (lhs.expr == rhs.expr && lhs.port < rhs.port);
// Firstly ports
return (lhs.port < rhs.port) || (lhs.port == rhs.port && lhs.expr < rhs.expr);
}

LoweredExprIR::LoweredExprIR(const std::shared_ptr<ov::Model>& model, LoweringConfig config)
Expand Down
36 changes: 20 additions & 16 deletions src/common/snippets/src/op/brgemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ namespace snippets {
namespace op {

Brgemm::Brgemm(const Output<Node>& A, const Output<Node>& B,
const size_t offset_a, const size_t offset_b, const size_t offset_c) : MemoryAccess({A, B}, 2, 1) {
const size_t offset_a, const size_t offset_b, const size_t offset_c) : MemoryAccess({A, B}, std::set<size_t>{0, 1}, std::set<size_t>{0}) {
set_output_size(1);
set_input_offset(offset_a, 0);
set_input_offset(offset_b, 1);
set_output_offset(offset_a, 0);
set_output_offset(offset_c, 0);
constructor_validate_and_infer_types();
}

Expand All @@ -27,21 +27,9 @@ void Brgemm::validate_and_infer_types() {
NODE_VALIDATION_CHECK(this, get_input_partial_shape(0).is_static() && get_input_partial_shape(1).is_static(),
"Brgemm currently supports only static shapes.");

std::vector<ov::PartialShape> planar_input_shapes;
for (const auto& in : input_values()) {
const auto& td = ngraph::snippets::get_tensor_descriptor_ptr(in);
const auto& planar_shape = utils::get_reordered_planar_shape(ov::Shape{td->get_tensor()}, td->get_layout());
planar_input_shapes.emplace_back(planar_shape);
}

const auto planar_input_shapes = get_planar_input_shapes(input_values());
auto output_shape = get_output_partial_shape(planar_input_shapes);
const auto& rt_info = get_rt_info();
auto it = rt_info.find(TensorDescriptorPtrVectorAttribute::get_type_info_static());
if (it != rt_info.end()) {
const auto& td = it->second.as<TensorDescriptorPtrVectorAttribute>().m_value[0];
output_shape = utils::get_reordered_planar_shape(output_shape, td->get_layout());
}
set_output_type(0, get_output_type(), output_shape);
set_output_type(0, get_output_type(), get_planar_output_shape(output_shape));
}

std::shared_ptr<Node> Brgemm::clone_with_new_inputs(const OutputVector& new_args) const {
Expand All @@ -68,6 +56,22 @@ ov::element::Type Brgemm::get_output_type() const {
}
}

std::vector<ov::PartialShape> Brgemm::get_planar_input_shapes(const std::vector<ov::Output<ov::Node>>& inputs) const {
OPENVINO_ASSERT(inputs.size() == 2, "Brgemm::get_planar_input_shapes() expects 2 inputs");
return { utils::get_port_planar_shape(inputs[0]), utils::get_port_planar_shape(inputs[1]) };
}

ov::PartialShape Brgemm::get_planar_output_shape(const ov::PartialShape& output_shape) const {
// This method can be safely called from validate_and_infer_types() before output creation
const auto& rt_info = get_rt_info();
auto it = rt_info.find(TensorDescriptorPtrVectorAttribute::get_type_info_static());
if (it != rt_info.end()) {
const auto& td = it->second.as<TensorDescriptorPtrVectorAttribute>().m_value[0];
return utils::get_reordered_planar_shape(output_shape, td->get_layout());
}
return output_shape;
}

ov::PartialShape Brgemm::get_output_partial_shape(const std::vector<ov::PartialShape>& input_shapes) const {
NGRAPH_CHECK(input_shapes.size() == 2, "BRGEMM expects 2 input shapes for shape inference");

Expand Down
Loading

0 comments on commit 61ef97a

Please sign in to comment.