From 15a3aaf24eaa16a217ce037f39e88c850561a313 Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Tue, 2 May 2023 15:50:21 +0400 Subject: [PATCH 01/13] [Snippets] Added support of Port Descriptor --- .../include/snippets/lowered/expression.hpp | 70 +++---- .../snippets/lowered/expression_factory.hpp | 99 +++++++++ .../include/snippets/lowered/linear_ir.hpp | 32 +-- .../include/snippets/lowered/loop_manager.hpp | 31 ++- .../snippets/lowered/pass/fuse_loops.hpp | 6 +- .../snippets/lowered/pass/init_loops.hpp | 8 +- .../snippets/lowered/pass/insert_buffers.hpp | 3 +- .../lowered/pass/insert_load_store.hpp | 4 +- .../include/snippets/lowered/tensor.hpp | 90 ++++++++ .../snippets/include/snippets/op/brgemm.hpp | 2 +- .../snippets/include/snippets/op/subgraph.hpp | 3 - .../snippets/pass/fuse_transpose_brgemm.hpp | 7 + .../snippets/pass/matmul_to_brgemm.hpp | 5 + .../include/snippets/port_descriptor.hpp | 87 ++++++++ .../include/snippets/tensor_descriptor.hpp | 62 ------ .../snippets/include/snippets/utils.hpp | 7 +- src/common/snippets/src/generator.cpp | 1 - .../snippets/src/lowered/expression.cpp | 92 ++++---- .../src/lowered/expression_factory.cpp | 169 +++++++++++++++ src/common/snippets/src/lowered/linear_ir.cpp | 197 +++++++----------- .../snippets/src/lowered/loop_manager.cpp | 119 +++++++---- .../src/lowered/pass/allocate_buffers.cpp | 14 +- .../src/lowered/pass/assign_registers.cpp | 23 +- .../src/lowered/pass/cleanup_loop_offsets.cpp | 2 +- .../snippets/src/lowered/pass/fuse_loops.cpp | 82 ++++---- .../src/lowered/pass/indentify_buffers.cpp | 10 +- .../snippets/src/lowered/pass/init_loops.cpp | 92 ++++---- .../src/lowered/pass/insert_buffers.cpp | 76 +++---- .../src/lowered/pass/insert_load_store.cpp | 50 ++--- .../src/lowered/pass/insert_tail_loop.cpp | 22 +- .../load_movebroadcast_to_broadcastload.cpp | 21 +- .../snippets/src/lowered/pass/mark_loops.cpp | 33 +-- .../pass/move_result_out_from_loop.cpp | 2 +- .../lowered/pass/move_scalar_to_consumer.cpp | 4 +- .../src/lowered/pass/propagate_layout.cpp | 80 ++++--- .../src/lowered/pass/reset_buffers.cpp | 6 +- .../lowered/pass/softmax_decomposition.cpp | 50 ++--- src/common/snippets/src/lowered/tensor.cpp | 130 ++++++++++++ src/common/snippets/src/op/brgemm.cpp | 18 +- src/common/snippets/src/op/subgraph.cpp | 5 +- .../src/pass/fuse_transpose_brgemm.cpp | 83 +++++--- .../snippets/src/pass/matmul_to_brgemm.cpp | 24 ++- .../src/pass/transpose_decomposition.cpp | 43 ++-- src/common/snippets/src/port_descriptor.cpp | 156 ++++++++++++++ src/common/snippets/src/tensor_descriptor.cpp | 136 ------------ src/common/snippets/src/utils.cpp | 29 ++- .../emitters/x64/jit_snippets_emitters.cpp | 34 +-- .../snippets/x64/op/brgemm_copy_b.cpp | 19 +- .../snippets/x64/op/brgemm_copy_b.hpp | 3 + .../snippets/x64/op/brgemm_cpu.cpp | 51 +++-- .../snippets/x64/op/brgemm_cpu.hpp | 8 +- .../x64/pass/brgemm_to_brgemm_cpu.cpp | 49 ++++- .../lowered/fuse_load_store_and_convert.cpp | 54 +++-- .../src/subgraph_lowered.cpp | 14 +- 54 files changed, 1577 insertions(+), 940 deletions(-) create mode 100644 src/common/snippets/include/snippets/lowered/expression_factory.hpp create mode 100644 src/common/snippets/include/snippets/lowered/tensor.hpp create mode 100644 src/common/snippets/include/snippets/port_descriptor.hpp delete mode 100644 src/common/snippets/include/snippets/tensor_descriptor.hpp create mode 100644 src/common/snippets/src/lowered/expression_factory.cpp create mode 100644 src/common/snippets/src/lowered/tensor.cpp create mode 100644 src/common/snippets/src/port_descriptor.cpp delete mode 100644 src/common/snippets/src/tensor_descriptor.cpp diff --git a/src/common/snippets/include/snippets/lowered/expression.hpp b/src/common/snippets/include/snippets/lowered/expression.hpp index d3367c2abc6475..65864eba7ebe31 100644 --- a/src/common/snippets/include/snippets/lowered/expression.hpp +++ b/src/common/snippets/include/snippets/lowered/expression.hpp @@ -9,9 +9,9 @@ #include #include -#include "snippets/tensor_descriptor.hpp" #include "snippets/emitter.hpp" #include "snippets/target_machine.hpp" +#include "snippets/lowered/tensor.hpp" namespace ngraph { @@ -19,30 +19,6 @@ namespace snippets { namespace lowered { class LinearIR; -class Expression; -using ExpressionPtr = std::shared_ptr; - -class ExpressionPort { - friend class Expression; - -public: - enum Type { - Input, - Output - }; - - ExpressionPort() = default; - - Type get_type() const { return m_type; } - - ExpressionPtr expr = nullptr; - size_t port = 0; - -private: - ExpressionPort(const ExpressionPtr& expr, size_t port, Type type); - - Type m_type = Type::Input; -}; class Expression : public std::enable_shared_from_this { friend class LinearIR; @@ -51,11 +27,6 @@ class Expression : public std::enable_shared_from_this { static size_t LOOP_NULL_ID; Expression() = default; - explicit Expression(const std::shared_ptr& n); - // The ctor fills outputs automatically from rt_info and/or tensor shapes - explicit Expression(const std::shared_ptr& n, std::vector inputs); - explicit Expression(const std::shared_ptr& n, std::vector inputs, std::vector outputs); - virtual ~Expression() = default; std::shared_ptr get_node() const; @@ -64,8 +35,10 @@ class Expression : public std::enable_shared_from_this { RegInfo get_reg_info() const { return m_reg_info; } void set_reg_info(RegInfo rinfo) { m_reg_info = std::move(rinfo); } - const std::vector& get_inputs() { return m_inputs; } - const std::vector& get_outputs() { return m_outputs; } + const std::vector& get_inputs() { return m_inputs; } + const std::vector& get_outputs() { return m_outputs; } + size_t get_input_count() const { return m_inputs.size(); } + size_t get_output_count() const { return m_outputs.size(); } std::vector get_loop_ids() const { return m_loop_ids; } void set_loop_ids(const std::vector& loops) { m_loop_ids = loops; } @@ -75,42 +48,49 @@ class Expression : public std::enable_shared_from_this { void init_emitter(const std::shared_ptr& target); - ExpressionPort input_port(size_t i); - ExpressionPort output_port(size_t i); + TensorDescriptor input_port(size_t i); + TensorDescriptor output_port(size_t i); protected: - void replace_input(size_t port, TensorDescriptorPtr to); - void replace_output(size_t port, TensorDescriptorPtr to); + // Note: The constructor and tensor initialization are private since an expression can be created only by Linear IR. + // These methods must be used only by Linear IR creator of expressions! + explicit Expression(const std::shared_ptr& n); + void init_inputs_with_validation(const std::vector& inputs); + void init_inputs(const std::vector& inputs) { m_inputs = inputs; } + void init_outputs(const std::vector& outputs) { m_outputs = outputs; } + + // Note: These methods don't control availability of the current expression in this + void replace_input(size_t port, TensorPtr to); + void replace_output(size_t port, TensorPtr to); std::shared_ptr m_source_node{nullptr}; std::shared_ptr m_emitter{nullptr}; - std::vector m_inputs; - std::vector m_outputs; + std::vector m_inputs; + std::vector m_outputs; RegInfo m_reg_info{{}, {}}; // The order Loops identifies: Outer ---> Inner std::vector m_loop_ids; bool m_is_outside_loop = false; }; +using ExpressionPtr = std::shared_ptr; class IOExpression : public Expression { + friend class LinearIR; + public: enum class io_type {INPUT, OUTPUT, UNDEFINED}; - IOExpression(const std::shared_ptr& n, int64_t index); - IOExpression(const std::shared_ptr& n, int64_t index, std::vector inputs); - int64_t get_index() const { return m_index; } io_type get_type() const { return m_type; } private: + explicit IOExpression(const std::shared_ptr& n, int64_t index); + explicit IOExpression(const std::shared_ptr& n, int64_t index); + int64_t m_index = -1; io_type m_type = io_type::UNDEFINED; }; -bool operator==(const ExpressionPort& lhs, const ExpressionPort& rhs); -bool operator!=(const ExpressionPort& lhs, const ExpressionPort& rhs); -bool operator<(const ExpressionPort& lhs, const ExpressionPort& rhs); - } // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/lowered/expression_factory.hpp b/src/common/snippets/include/snippets/lowered/expression_factory.hpp new file mode 100644 index 00000000000000..0eed43bf1208d7 --- /dev/null +++ b/src/common/snippets/include/snippets/lowered/expression_factory.hpp @@ -0,0 +1,99 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "linear_ir.hpp" + +namespace ngraph { +namespace snippets { +namespace lowered { + +class LinearIR::BaseExpressionFactory { +public: + BaseExpressionFactory() = default; + BaseExpressionFactory(const LinearIR& linear_ir) : m_linear_ir(linear_ir) {} + + virtual ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model); + virtual ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, + const std::vector inputs); + virtual ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, + const std::vector inputs, const std::vector outputs); + + static std::shared_ptr get(const LinearIR& linear_ir, const std::shared_ptr& n); + +protected: + virtual ExpressionPtr create(const std::shared_ptr& n, const std::shared_ptr& model) = 0; + virtual std::vector create_expression_inputs(const ExpressionPtr& expr); + virtual std::vector create_expression_outputs(const ExpressionPtr& expr); + + LinearIR m_linear_ir; +}; + +class LinearIR::ExpressionFactory : public LinearIR::BaseExpressionFactory { +public: + ExpressionFactory() = default; + ExpressionFactory(const LinearIR& linear_ir) : BaseExpressionFactory(linear_ir) {} + + ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model) override; + ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, + const std::vector inputs) override; + ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, + const std::vector inputs, const std::vector outputs) override; + +protected: + ExpressionPtr create(const std::shared_ptr& n, const std::shared_ptr& model) override; +}; + +class LinearIR::ParameterExpressionFactory : public LinearIR::BaseExpressionFactory { +public: + ParameterExpressionFactory() = default; + ParameterExpressionFactory(const LinearIR& linear_ir) : BaseExpressionFactory(linear_ir) {} + + ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model) override; + +protected: + ExpressionPtr create(const std::shared_ptr& n, const std::shared_ptr& model) override; +}; + +class LinearIR::ResultExpressionFactory : public LinearIR::BaseExpressionFactory { +public: + ResultExpressionFactory() = default; + ResultExpressionFactory(const LinearIR& linear_ir) : BaseExpressionFactory(linear_ir) {} + + ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model) override; + +protected: + ExpressionPtr create(const std::shared_ptr& n, const std::shared_ptr& model) override; +}; + +class LinearIR::LoopBeginExpressionFactory : public LinearIR::BaseExpressionFactory { +public: + LoopBeginExpressionFactory() = default; + LoopBeginExpressionFactory(const LinearIR& linear_ir) : BaseExpressionFactory(linear_ir) {} + + ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, + const std::vector inputs) override; + +protected: + ExpressionPtr create(const std::shared_ptr& n, const std::shared_ptr& model) override; +}; + +class LinearIR::LoopEndExpressionFactory : public LinearIR::BaseExpressionFactory { +public: + LoopEndExpressionFactory() = default; + LoopEndExpressionFactory(const LinearIR& linear_ir) : BaseExpressionFactory(linear_ir) {} + + ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, + const std::vector inputs) override; + +protected: + ExpressionPtr create(const std::shared_ptr& n, const std::shared_ptr& model) override; +}; + +} // namespace lowered +} // namespace snippets +} // namespace ngraph diff --git a/src/common/snippets/include/snippets/lowered/linear_ir.hpp b/src/common/snippets/include/snippets/lowered/linear_ir.hpp index 3b789e40b1ca79..b5780c741f420e 100644 --- a/src/common/snippets/include/snippets/lowered/linear_ir.hpp +++ b/src/common/snippets/include/snippets/lowered/linear_ir.hpp @@ -18,12 +18,17 @@ class Config { bool m_save_lowered_code = false; // True if we should check runtime info for nodes to call specific needed transformations bool m_need_fill_tail_register = false; - bool m_explicit_loop_insertion = false; ov::PartialShape m_master_shape{}; size_t m_loop_depth = 1; }; class LinearIR { + class BaseExpressionFactory; + class ExpressionFactory; + class ParameterExpressionFactory; + class ResultExpressionFactory; + class LoopBeginExpressionFactory; + class LoopEndExpressionFactory; public: using container = std::list; using io_container = std::list>; @@ -33,6 +38,11 @@ class LinearIR { LinearIR() = default; explicit LinearIR(const std::shared_ptr& m, Config config = {}); + ExpressionPtr create_expression(const std::shared_ptr& n, const std::vector inputs, + const std::shared_ptr& model = nullptr); + ExpressionPtr create_expression(const std::shared_ptr& n, const std::vector inputs, const std::vector outputs, + const std::shared_ptr& model = nullptr); + LinearIR deep_copy() const; static LinearIR::container deep_copy_range(LinearIR::container::const_iterator begin, LinearIR::container::const_iterator end); @@ -41,13 +51,12 @@ class LinearIR { Config get_config() {return m_config; } ExpressionPtr get_expr_by_node(const std::shared_ptr& n) const; - ExpressionPort get_expr_by_output(const TensorDescriptorPtr& n) const; - const std::set& get_exprs_by_input(const TensorDescriptorPtr& n) const; - void replace_input(const ExpressionPort& expr_port, const TensorDescriptorPtr& to); - void replace_input(const ExpressionPtr& expr, size_t port, const TensorDescriptorPtr& to); - void replace_output(const ExpressionPort& expr_port, const TensorDescriptorPtr& to); - void replace_output(const ExpressionPtr& expr, size_t port, const TensorDescriptorPtr& to); + void replace_input(const std::vector& consumers, const TensorPtr& to); + void replace_input(const TensorDescriptor& expr_port, const TensorPtr& to); + void replace_input(const ExpressionPtr& expr, size_t port, const TensorPtr& to); + void replace_output(const TensorDescriptor& expr_port, const TensorPtr& to); + void replace_output(const ExpressionPtr& expr, size_t port, const TensorPtr& to); /** * @brief Move an expression from the position "from" to the position immediately before "to". @@ -96,6 +105,10 @@ class LinearIR { const LoopManagerPtr& get_loop_manager() const { return m_loop_manager; } private: + // Default ctor - can be called only from Linear IR initialization as default way + ExpressionPtr create_expression(const std::shared_ptr& n, + const std::shared_ptr& model = nullptr); + void register_expression(const ExpressionPtr& expr); // Like register_expression, but doesn't allow Parameter or Result registration. You can do it only through ctor void register_regular_expression(const ExpressionPtr& expr); @@ -103,11 +116,6 @@ class LinearIR { container m_lowered_ops{}; std::unordered_map, std::shared_ptr> m_node2expression_map; - // Expression must be uniquely identified by an output, so there can't be expressions that have the same output - std::unordered_map m_output2expression_map; - // At the same time, several expressions can have the same input if they are connected to the same parent - // E.g. LoopEnd will always have the same input as a Load inside the loop (since it has to increment the same reg) - std::unordered_map> m_input2expression_map; io_container m_io_lowered_ops; Config m_config{}; LoopManagerPtr m_loop_manager = nullptr; diff --git a/src/common/snippets/include/snippets/lowered/loop_manager.hpp b/src/common/snippets/include/snippets/lowered/loop_manager.hpp index 4c3f171995a200..4606e769c0998a 100644 --- a/src/common/snippets/include/snippets/lowered/loop_manager.hpp +++ b/src/common/snippets/include/snippets/lowered/loop_manager.hpp @@ -9,7 +9,7 @@ #include #include -#include "snippets/tensor_descriptor.hpp" +#include "snippets/port_descriptor.hpp" namespace ngraph { namespace snippets { @@ -23,8 +23,8 @@ class LinearIR::LoopManager { public: LoopInfo() = default; LoopInfo(size_t work_amount, size_t increment, - const std::vector& entries, - const std::vector& exits) + const std::vector& entries, + const std::vector& exits) : work_amount(work_amount), increment(increment), entry_exprs(entries), exit_exprs(exits) {} size_t work_amount = 0; size_t increment = 0; @@ -32,8 +32,8 @@ class LinearIR::LoopManager { // - The position before first entry expr is Loop Begin position // - The position after last exit expr is Loop End position // Note: Scalars aren't entry expressions but can be before first entry expr in Linear IR - std::vector entry_exprs = {}; - std::vector exit_exprs = {}; + std::vector entry_exprs = {}; + std::vector exit_exprs = {}; }; using LoopInfoPtr = std::shared_ptr; @@ -46,26 +46,24 @@ class LinearIR::LoopManager { static void skipped_mark(LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, size_t loop_depth); - void mark_loop(LinearIR& linear_ir, - LinearIR::constExprIt loop_begin_pos, + void mark_loop(LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, size_t loop_depth, size_t vector_size); - void mark_loop(LinearIR& linear_ir, - LinearIR::constExprIt loop_begin_pos, + void mark_loop(LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, size_t idx, size_t work_amount, size_t work_amount_increment, - const std::vector& entries, - const std::vector& exits); + const std::vector& entries, + const std::vector& exits); void get_loop_bounds(const LinearIR& linear_ir, size_t loop_id, LinearIR::constExprIt& loop_begin_pos, LinearIR::constExprIt& loop_end_pos) const; static void get_loop_bounds(const LinearIR& linear_ir, - const std::vector& entries, - const std::vector& exits, + const std::vector& entries, + const std::vector& exits, LinearIR::constExprIt& loop_begin_pos, LinearIR::constExprIt& loop_end_pos, size_t loop_id = Expression::LOOP_NULL_ID); @@ -74,11 +72,10 @@ class LinearIR::LoopManager { static void exprs_marking(LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, size_t loop_id, size_t idx); - static void get_io_loop_ports(LinearIR& linear_ir, - LinearIR::constExprIt loop_begin_pos, + static void get_io_loop_ports(LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, - std::vector& entries, - std::vector& exits); + std::vector& entries, + std::vector& exits); std::map m_map = {}; size_t next_id = 0; diff --git a/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp b/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp index 1f355fbe9dfbb6..288c267f33dba3 100644 --- a/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp @@ -28,14 +28,14 @@ class FuseLoops : public Transformation { static bool can_be_fused(const LinearIR::LoopManager::LoopInfoPtr& loop_current, const LinearIR::LoopManager::LoopInfoPtr& loop_target); static bool fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, - const ExpressionPort& current_entry_point, const ExpressionPort& target_exit_point, + const TensorDescriptor& current_entry_point, const TensorDescriptor& target_exit_point, size_t current_loop_id, size_t target_loop_id, size_t dim_idx, LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos); static bool fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, - const ExpressionPort& current_entry_point, const ExpressionPort& target_exit_point, + const TensorDescriptor& current_entry_point, const TensorDescriptor& target_exit_point, size_t current_loop_id, size_t target_loop_id, size_t dim_idx, LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos); - static void fuse_points(LinearIR& linear_ir, std::vector& exit_points, std::vector& entry_points, + static void fuse_points(std::vector& exit_points, std::vector& entry_points, LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos); }; diff --git a/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp b/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp index 064c5200170e52..48bfecd7c471d2 100644 --- a/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp @@ -27,12 +27,12 @@ class InitLoops : public Transformation { private: bool insertion(LinearIR& linear_ir, const LinearIR::LoopManager::LoopInfoPtr& loop_info, size_t loop_id, size_t dim_idx, bool has_outer_loop); - std::vector init_ptr_increments(const std::vector& loop_inputs, - const std::vector& loop_outputs, + std::vector init_ptr_increments(const std::vector& loop_inputs, + const std::vector& loop_outputs, size_t dim_idx) const; std::vector init_finalization_offsets(const std::vector& finalization_offsets, size_t work_amount) const; - std::vector init_element_type_sizes(const std::vector& loop_inputs, - const std::vector& loop_outputs); + std::vector init_element_type_sizes(const std::vector& loop_inputs, + const std::vector& loop_outputs); }; } // namespace pass diff --git a/src/common/snippets/include/snippets/lowered/pass/insert_buffers.hpp b/src/common/snippets/include/snippets/lowered/pass/insert_buffers.hpp index 552ca10ab94863..70d769c8faed5c 100644 --- a/src/common/snippets/include/snippets/lowered/pass/insert_buffers.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/insert_buffers.hpp @@ -5,7 +5,6 @@ #pragma once #include "transformation.hpp" -#include "snippets/tensor_descriptor.hpp" namespace ngraph { namespace snippets { @@ -28,7 +27,7 @@ class InsertBuffers : public Transformation { private: void insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, size_t loop_id, - const std::vector& loop_entries, const std::vector& loop_exits); + const std::vector& loop_entries, const std::vector& loop_exits); LinearIR::constExprIt insertion_position(const LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, diff --git a/src/common/snippets/include/snippets/lowered/pass/insert_load_store.hpp b/src/common/snippets/include/snippets/lowered/pass/insert_load_store.hpp index bbc29656084324..6d9bde2b26f3a5 100644 --- a/src/common/snippets/include/snippets/lowered/pass/insert_load_store.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/insert_load_store.hpp @@ -30,9 +30,9 @@ class InsertLoadStore : public Transformation { bool insert_load(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it); bool insert_store(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it); void update_loops(const LinearIR::LoopManagerPtr& loop_manager, const std::vector& loop_ids, - const ExpressionPort& actual_port, const std::vector& target_ports, bool is_entry = true); + const TensorDescriptor& actual_port, const std::vector& target_ports, bool is_entry = true); void update_loop(const LinearIR::LoopManager::LoopInfoPtr& loop_info, - const ExpressionPort& actual_port, const std::vector& target_ports, bool is_entry = true); + const TensorDescriptor& actual_port, const std::vector& target_ports, bool is_entry = true); std::vector get_loops_for_update(const std::vector& loop_ids, size_t loop_id); size_t m_vector_size; diff --git a/src/common/snippets/include/snippets/lowered/tensor.hpp b/src/common/snippets/include/snippets/lowered/tensor.hpp new file mode 100644 index 00000000000000..06487cd80a195c --- /dev/null +++ b/src/common/snippets/include/snippets/lowered/tensor.hpp @@ -0,0 +1,90 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "snippets/port_descriptor.hpp" + + +namespace ngraph { +namespace snippets { +namespace lowered { + +class Expression; + +class TensorDescriptor { +public: + enum Type { + Input, + Output + }; + + TensorDescriptor() = default; + explicit TensorDescriptor(const std::weak_ptr& expr, Type type, size_t port, + const std::vector& tensor = {}, const std::vector& layout = {}, const std::vector& subtensor = {}); + explicit TensorDescriptor(const std::weak_ptr& expr, Type type, size_t port, const PortDescriptorPtr& port_desc = nullptr); + + std::shared_ptr get_expr_ptr() const; + const std::weak_ptr& get_expr_wptr() const { return m_expr; } + Type get_type() const { return m_type; } + size_t get_index() const { return m_port_index; } + + std::vector get_tensor() const { return m_port_desc->get_tensor(); } + std::vector get_layout() const { return m_port_desc->get_layout(); } + std::vector get_subtensor() const { return m_port_desc->get_subtensor(); } + PortDescriptorPtr get_port_descriptor() const { return m_port_desc; } + + void set_tensor(const std::vector& tensor) { m_port_desc->set_tensor(tensor); } + void set_layout(const std::vector& layout) { m_port_desc->set_layout(layout); } + void set_subtensor(const std::vector& subtensor) { m_port_desc->set_subtensor(subtensor); } + void set_port_descriptor(const PortDescriptorPtr& desc) { m_port_desc = desc; } + + friend bool operator==(const TensorDescriptor& lhs, const TensorDescriptor& rhs); + friend bool operator!=(const TensorDescriptor& lhs, const TensorDescriptor& rhs); + friend bool operator<(const TensorDescriptor& lhs, const TensorDescriptor& rhs); + friend std::ostream& operator<<(std::ostream&, const TensorDescriptor& td); + +private: + std::weak_ptr m_expr; + Type m_type = Type::Output; + size_t m_port_index = 0; + PortDescriptorPtr m_port_desc; +}; + +class Tensor { +public: + Tensor() = default; + explicit Tensor(const TensorDescriptor& source_descriptor, const std::vector& consumer_descriptors = {}); + + TensorDescriptor& get_source() { return m_source_port; } + const TensorDescriptor& get_source() const { return m_source_port; } + const std::vector& get_consumers() const { return m_consumer_ports; } + + void add_consumer(const TensorDescriptor& consumer); + void remove_consumer(const TensorDescriptor& consumer); + bool found_consumer(const TensorDescriptor& consumer) const; + std::vector::const_iterator find_consumer(const TensorDescriptor& consumer) const; + std::vector::iterator find_consumer(const TensorDescriptor& consumer); + + std::vector get_conflicted_consumers() const; + bool is_conflicted_consumer(const TensorDescriptor& consumer) const; + + // The scheduling params of Tensor is controlled by source expression port + std::vector get_tensor() const { return m_source_port.get_tensor(); } + std::vector get_layout() const { return m_source_port.get_layout(); } + std::vector get_subtensor() const { return m_source_port.get_subtensor(); } + +private: + TensorDescriptor m_source_port; + std::vector m_consumer_ports; +}; +using TensorPtr = std::shared_ptr; + + +} // namespace lowered +} // namespace snippets +} // namespace ngraph diff --git a/src/common/snippets/include/snippets/op/brgemm.hpp b/src/common/snippets/include/snippets/op/brgemm.hpp index 6d7e08a9d05ffb..c1aec360c4dce7 100644 --- a/src/common/snippets/include/snippets/op/brgemm.hpp +++ b/src/common/snippets/include/snippets/op/brgemm.hpp @@ -34,7 +34,7 @@ class Brgemm : public MemoryAccess { protected: ov::element::Type get_output_type() const; - std::vector get_planar_input_shapes(const std::vector>& inputs) const; + std::vector get_planar_input_shapes(const std::vector>& inputs) const; ov::PartialShape get_output_partial_shape(const std::vector& input_shapes) const; ov::PartialShape get_planar_output_shape(const ov::PartialShape& output_shape) const; }; diff --git a/src/common/snippets/include/snippets/op/subgraph.hpp b/src/common/snippets/include/snippets/op/subgraph.hpp index f7bae5aaeac815..092d6a35c9d8ca 100644 --- a/src/common/snippets/include/snippets/op/subgraph.hpp +++ b/src/common/snippets/include/snippets/op/subgraph.hpp @@ -172,9 +172,6 @@ class Subgraph : public ov::op::util::SubGraphOp { // True if body has operations that don't support plugin-side domain optimizations // (e.g. Transpose, Softmax, MatMul in general doesn't support dimensions collapsing) bool m_has_domain_sensitive_ops = false; - // True if we should go through whole body to check for where loops should be explicitly inserted. - // Otherwise, we insert Loops on Parameters and Results - for example, it's optimized out for subgraph with only Eltwise ops - bool m_explicit_loop_insertion = false; } config; }; diff --git a/src/common/snippets/include/snippets/pass/fuse_transpose_brgemm.hpp b/src/common/snippets/include/snippets/pass/fuse_transpose_brgemm.hpp index 15929f908c774b..6ffa77e53ccfa9 100644 --- a/src/common/snippets/include/snippets/pass/fuse_transpose_brgemm.hpp +++ b/src/common/snippets/include/snippets/pass/fuse_transpose_brgemm.hpp @@ -7,6 +7,10 @@ #include "ngraph/pass/graph_rewrite.hpp" #include "ngraph/pattern/matcher.hpp" +#include "openvino/op/transpose.hpp" + +#include "snippets/port_descriptor.hpp" + namespace ngraph { namespace snippets { namespace pass { @@ -23,6 +27,9 @@ class FuseTransposeBrgemm: public ngraph::pass::MatcherPass { OPENVINO_RTTI("FuseTransposeBrgemm", "0"); FuseTransposeBrgemm(); static const std::set> supported_cases; + +private: + static bool is_supported_transpose(const Output& transpose_port); }; } // namespace pass diff --git a/src/common/snippets/include/snippets/pass/matmul_to_brgemm.hpp b/src/common/snippets/include/snippets/pass/matmul_to_brgemm.hpp index 4cfbd1fa394edb..dbe7d3446d398c 100644 --- a/src/common/snippets/include/snippets/pass/matmul_to_brgemm.hpp +++ b/src/common/snippets/include/snippets/pass/matmul_to_brgemm.hpp @@ -7,6 +7,8 @@ #include "ngraph/pass/graph_rewrite.hpp" #include "ngraph/pattern/matcher.hpp" +#include "snippets/op/brgemm.hpp" + namespace ngraph { namespace snippets { namespace pass { @@ -20,6 +22,9 @@ class MatMulToBrgemm: public ngraph::pass::MatcherPass { public: OPENVINO_RTTI("MatMulToBrgemm", "0"); MatMulToBrgemm(); + +private: + void init_ports(const std::shared_ptr& brgemm) const; }; diff --git a/src/common/snippets/include/snippets/port_descriptor.hpp b/src/common/snippets/include/snippets/port_descriptor.hpp new file mode 100644 index 00000000000000..622df4264f42e5 --- /dev/null +++ b/src/common/snippets/include/snippets/port_descriptor.hpp @@ -0,0 +1,87 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/node.hpp" +#include "openvino/core/attribute_visitor.hpp" + + +namespace ngraph { +namespace snippets { + +class PortDescriptor { +public: + explicit PortDescriptor(const ov::Input& node, + std::vector subtensor_shape = {}, + std::vector layout = {}); + explicit PortDescriptor(const ov::Input& node, + std::vector subtensor_shape = {}, + std::vector layout = {}); + explicit PortDescriptor(const ov::Output& node, + std::vector subtensor_shape = {}, + std::vector layout = {}); + explicit PortDescriptor(const ov::Output& node, + std::vector subtensor_shape = {}, + std::vector layout = {}); + PortDescriptor(std::vector shape, std::vector subtensor_shape, std::vector layout = {}); + PortDescriptor() = default; + + std::vector get_tensor() const {return m_tensor_shape;} + std::vector get_subtensor() const {return m_subtensor_shape;} + std::vector get_layout() const {return m_layout;} + + void set_tensor(const std::vector& tensor) { m_tensor_shape = tensor; } + void set_layout(const std::vector& layout) { m_layout = layout; } + void set_subtensor(const std::vector& subtensor) { m_subtensor_shape = subtensor; } + + static PortDescriptor deserialize(const std::string& serialized_info); + std::string serialize() const; + bool empty() const { return m_layout.empty() && m_subtensor_shape.empty();} + + friend bool operator==(const PortDescriptor& lhs, const PortDescriptor& rhs); + friend bool operator!=(const PortDescriptor& lhs, const PortDescriptor& rhs) {return !(lhs == rhs);} + +private: + void validate_arguments(); + /// \brief Original tensor shape + std::vector m_tensor_shape{}; + /// \brief Order of dimensions: NCHW == {0, 1, 2, 3}, NHWC == {0, 2, 3, 1}, NCHW16c == {0, 1, 2, 3, 1} + std::vector m_layout{}; + /// \brief Minimal tensor size that could be processed in one call + std::vector m_subtensor_shape{}; +}; +using PortDescriptorPtr = std::shared_ptr; + +class PortManager { +public: + static void set_port_descriptor_ptr(const ov::Input& n, const PortDescriptorPtr& desc); + static void set_port_descriptor_ptr(const ov::Output& n, const PortDescriptorPtr& desc); + + static PortDescriptorPtr get_port_descriptor_ptr(const ov::Input& in); + static PortDescriptorPtr get_port_descriptor_ptr(const ov::Input& out); + static PortDescriptorPtr get_port_descriptor_ptr(const ov::Output& in); + static PortDescriptorPtr get_port_descriptor_ptr(const ov::Output& out); + +private: + static void init_default(std::vector& in_descs, std::vector& out_descs, const std::shared_ptr& node); +}; + +class PortDescriptorVectorAttribute : public ov::RuntimeAttribute { +public: + OPENVINO_RTTI("PortDescriptorVectorAttribute", "0"); + + PortDescriptorVectorAttribute() = default; + explicit PortDescriptorVectorAttribute(std::vector in_descs = {}, std::vector out_descs = {}) + : inputs(std::move(in_descs)), outputs(std::move(out_descs)) {} + + void set_input_port_descriptor(const PortDescriptorPtr& desc, size_t index); + void set_output_port_descriptor(const PortDescriptorPtr& desc, size_t index); + + std::vector inputs{}; + std::vector outputs{}; +}; + +} // namespace snippets +} // namespace ngraph diff --git a/src/common/snippets/include/snippets/tensor_descriptor.hpp b/src/common/snippets/include/snippets/tensor_descriptor.hpp deleted file mode 100644 index bd676222d33ab6..00000000000000 --- a/src/common/snippets/include/snippets/tensor_descriptor.hpp +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (C) 2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "openvino/core/node.hpp" -#include "openvino/core/attribute_visitor.hpp" - - -namespace ngraph { -namespace snippets { -class TensorDescriptorAttribute; -class TensorDescriptor { - friend class TensorDescriptorAttribute; -public: -explicit TensorDescriptor(const Output& node, - std::vector subtensor_shape = {}, - std::vector layout = {}); -explicit TensorDescriptor(const Output& node, - std::vector subtensor_shape = {}, - std::vector layout = {}); - TensorDescriptor(std::vector tensor_shape, - std::vector subtensor_shape, - std::vector layout = {}); - TensorDescriptor() = default; - static TensorDescriptor deserialize(const std::string& serialized_info); - std::string serialize() const; - std::vector get_tensor() const {return m_tensor_shape;} - std::vector get_subtensor() const {return m_subtensor_shape;} - std::vector get_layout() const {return m_layout;} - bool empty() const { return m_tensor_shape.empty() && m_layout.empty() && m_subtensor_shape.empty();} - friend bool operator==(const TensorDescriptor& lhs, const TensorDescriptor& rhs); - friend bool operator!=(const TensorDescriptor& lhs, const TensorDescriptor& rhs) {return !(lhs == rhs);} - -private: - void validate_arguments(); - /// \brief Original tensor shape - std::vector m_tensor_shape{}; - /// \brief Order of dimensions: NCHW == {0, 1, 2, 3}, NHWC == {0, 2, 3, 1}, NCHW16c == {0, 1, 2, 3, 1} - std::vector m_layout{}; - /// \brief Minimal tensor size that could be processed in one call - std::vector m_subtensor_shape{}; -}; - -std::ostream& operator << (std::ostream&, const TensorDescriptor& td); -using TensorDescriptorPtr = std::shared_ptr; -class TensorDescriptorPtrVectorAttribute : public ov::RuntimeAttribute { -public: - OPENVINO_RTTI("TensorDescriptorVectorAttribute", "0"); - - TensorDescriptorPtrVectorAttribute() = default; - explicit TensorDescriptorPtrVectorAttribute(std::vector descriptor) : m_value(std::move(descriptor)) {} - std::vector m_value{}; -}; - -void set_tensor_descriptor_ptr(const Output& n, const TensorDescriptorPtr& desc); -TensorDescriptorPtr get_tensor_descriptor_ptr(const Output& out); -TensorDescriptorPtr get_tensor_descriptor_ptr(const Output& out); - -} // namespace snippets -} // namespace ngraph diff --git a/src/common/snippets/include/snippets/utils.hpp b/src/common/snippets/include/snippets/utils.hpp index ec719971923101..463a8d870526aa 100644 --- a/src/common/snippets/include/snippets/utils.hpp +++ b/src/common/snippets/include/snippets/utils.hpp @@ -24,19 +24,16 @@ inline auto is_scalar_constant(const std::shared_ptr& source_outpu return ngraph::is_type(source_output_node) && ngraph::shape_size(source_output_node->get_shape()) == 1; } +ov::PartialShape get_port_planar_shape(const Input& out); ov::PartialShape get_port_planar_shape(const Output& out); ov::PartialShape get_reordered_planar_shape(const ov::PartialShape& shape, const std::vector& layout); +ov::Shape get_reordered_shape(const ov::Shape& shape, const std::vector& layout); std::vector get_node_output_layout(const std::shared_ptr& node); std::vector get_node_output_layout(const Node* node); -void set_transpose_output_layout(const ov::Output& port, const std::shared_ptr& node); -void set_output_layout(const ov::Output& port, const std::vector& layout); bool get_outside_loop_value(const std::shared_ptr& node); void set_outside_loop_value(const std::shared_ptr& node, bool is_outside = true); -inline ov::Dimension get_inner_dim(const ov::PartialShape &shape) { return *(shape.rbegin()); } -inline ov::Dimension get_outer_dim(const ov::PartialShape &shape) { return *(shape.rbegin() + 1); } - inline auto normalize_rank(int32_t allocation_rank, const size_t shape_rank) -> int32_t { return allocation_rank < 0 ? allocation_rank + static_cast(shape_rank) + 1 : allocation_rank; } diff --git a/src/common/snippets/src/generator.cpp b/src/common/snippets/src/generator.cpp index b8968a97d28126..5f166619b1c7f7 100644 --- a/src/common/snippets/src/generator.cpp +++ b/src/common/snippets/src/generator.cpp @@ -24,7 +24,6 @@ #include "snippets/lowered/pass/indentify_buffers.hpp" #include "snippets/op/kernel.hpp" -#include "snippets/tensor_descriptor.hpp" #include diff --git a/src/common/snippets/src/lowered/expression.cpp b/src/common/snippets/src/lowered/expression.cpp index e543e211d57b7f..60c65de14154fd 100644 --- a/src/common/snippets/src/lowered/expression.cpp +++ b/src/common/snippets/src/lowered/expression.cpp @@ -16,31 +16,8 @@ namespace lowered { size_t Expression::LOOP_NULL_ID = SIZE_MAX; -ExpressionPort::ExpressionPort(const ExpressionPtr& expr, size_t port, Type type) : expr(expr), port(port), m_type(type) { - if (type == Type::Input) { - OPENVINO_ASSERT(port < expr->get_inputs().size(), "The input port must be less than input count"); - } else if (type == Type::Output) { - OPENVINO_ASSERT(port < expr->get_outputs().size(), "The output port must be less than output count"); - } -} - Expression::Expression(const std::shared_ptr& n) - : m_source_node{n}, m_emitter{nullptr}, m_reg_info{{}, {}}, m_is_outside_loop(utils::get_outside_loop_value(n)) { - for (const auto& in : n->inputs()) - m_inputs.emplace_back(get_tensor_descriptor_ptr(in.get_source_output())); - for (const auto& out : n->outputs()) - m_outputs.emplace_back(get_tensor_descriptor_ptr(out)); -} - -Expression::Expression(const std::shared_ptr& n, std::vector inputs) - : m_source_node{n}, m_emitter{nullptr}, m_inputs(std::move(inputs)), m_reg_info{{}, {}}, m_is_outside_loop(utils::get_outside_loop_value(n)) { - for (const auto& out : n->outputs()) - m_outputs.emplace_back(get_tensor_descriptor_ptr(out)); -} - -Expression::Expression(const std::shared_ptr& n, std::vector inputs, std::vector outputs) - : m_source_node{n}, m_emitter{nullptr}, m_inputs(std::move(inputs)), m_outputs(std::move(outputs)), - m_reg_info{{}, {}}, m_is_outside_loop(utils::get_outside_loop_value(n)) {} + : m_source_node{n}, m_emitter{nullptr}, m_inputs{}, m_outputs{}, m_reg_info{{}, {}}, m_is_outside_loop(utils::get_outside_loop_value(n)) {} std::shared_ptr Expression::get_node() const { if (!m_source_node) @@ -49,19 +26,19 @@ std::shared_ptr Expression::get_node() const { } std::shared_ptr Expression::get_emitter() const { - return m_emitter; + return m_emitter; } void Expression::init_emitter(const std::shared_ptr& target) { m_emitter = target->get(m_source_node->get_type_info())(m_source_node); } -void Expression::replace_input(size_t port, TensorDescriptorPtr to) { +void Expression::replace_input(size_t port, TensorPtr to) { OPENVINO_ASSERT(port < m_inputs.size(), "Failed to replace: target input port must be less than input count!"); m_inputs[port] = std::move(to); } -void Expression::replace_output(size_t port, TensorDescriptorPtr to) { +void Expression::replace_output(size_t port, TensorPtr to) { OPENVINO_ASSERT(port < m_outputs.size(), "Failed to replace: target output port must be less than output count!"); m_outputs[port] = std::move(to); } @@ -81,40 +58,49 @@ void Expression::remove_loop_id(size_t id) { *it = Expression::LOOP_NULL_ID; } -ExpressionPort Expression::input_port(size_t i) { +void Expression::init_inputs_with_validation(const std::vector& inputs) { + auto is_service_expr = [&](){ + return ov::is_type(m_source_node); + }; + for (size_t i = 0; i < inputs.size(); ++i) { + const auto& input = inputs[i]; + const auto consumers = input->get_consumers(); + const auto found = std::find_if(consumers.begin(), consumers.end(), + [&](const TensorDescriptor& desc) { + return desc.get_index() == i && desc.get_expr_ptr().get() == this->shared_from_this().get(); + }); + if (found == consumers.end()) { + const auto port_desc = is_service_expr() ? input->get_source().get_port_descriptor() + : PortManager::get_port_descriptor_ptr(m_source_node->input(i)); + const auto tensor_desc = TensorDescriptor(this->shared_from_this(), TensorDescriptor::Type::Input, i, port_desc); + input->add_consumer(tensor_desc); + } + } + m_inputs = inputs; +} + +TensorDescriptor Expression::input_port(size_t i) { OPENVINO_ASSERT(i < m_inputs.size(), "Failed to get input port: target input port must be less than input count!"); - return ExpressionPort(this->shared_from_this(), i, ExpressionPort::Type::Input); + const auto& input = m_inputs[i]; + const auto& consumers = input->get_consumers(); + const auto found = std::find_if(consumers.begin(), consumers.end(), + [&](const TensorDescriptor& desc) { + return desc.get_index() == i && desc.get_expr_ptr().get() == this->shared_from_this().get(); + }); + OPENVINO_ASSERT(found != consumers.end(), "Input TensorDescriptor for Expression hasn't found in input Tensor!"); + return *found; } -ExpressionPort Expression::output_port(size_t i) { +TensorDescriptor Expression::output_port(size_t i) { OPENVINO_ASSERT(i < m_outputs.size(), "Failed to get output port: target output port must be less than output count!"); - return ExpressionPort(this->shared_from_this(), i, ExpressionPort::Type::Output); + return m_outputs[i]->get_source(); } IOExpression::IOExpression(const std::shared_ptr& par, int64_t index) - : Expression(par), m_index(index), m_type{io_type::INPUT} { -} + : Expression(par), m_index(index), m_type{io_type::INPUT} {} +IOExpression::IOExpression(const std::shared_ptr& res, int64_t index) + : Expression(res), m_index(index), m_type{io_type::OUTPUT} {} -IOExpression::IOExpression(const std::shared_ptr& res, int64_t index, std::vector inputs) - : Expression(res, inputs, {}), m_index(index), m_type{io_type::OUTPUT} { -} - -bool operator==(const ExpressionPort& lhs, const ExpressionPort& rhs) { - if (&lhs == &rhs) - return true; - OPENVINO_ASSERT(lhs.get_type() == rhs.get_type(), "Incorrect comparison: Ports are from different types!"); - return lhs.expr == rhs.expr && lhs.port == rhs.port; -} - -bool operator!=(const ExpressionPort& lhs, const ExpressionPort& rhs) { - return !(lhs == rhs); -} - -bool operator<(const ExpressionPort& lhs, const ExpressionPort& rhs) { - OPENVINO_ASSERT(lhs.get_type() == rhs.get_type(), "Incorrect comparison: Ports are from different types!"); - // Firstly ports - return (lhs.port < rhs.port) || (lhs.port == rhs.port && lhs.expr < rhs.expr); -} }// namespace lowered }// namespace snippets }// namespace ngraph diff --git a/src/common/snippets/src/lowered/expression_factory.cpp b/src/common/snippets/src/lowered/expression_factory.cpp new file mode 100644 index 00000000000000..ffd13178061656 --- /dev/null +++ b/src/common/snippets/src/lowered/expression_factory.cpp @@ -0,0 +1,169 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/lowered/expression_factory.hpp" + +#include "snippets/snippets_isa.hpp" + +namespace ngraph { +namespace snippets { +namespace lowered { + +ExpressionPtr LinearIR::BaseExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model) { + OPENVINO_THROW("The Factory doesn't support default builder"); +} +ExpressionPtr LinearIR::BaseExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, + const std::vector inputs) { + OPENVINO_THROW("The Factory doesn't support builder with just input tensors"); +} +ExpressionPtr LinearIR::BaseExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, + const std::vector inputs, const std::vector outputs) { + OPENVINO_THROW("The Factory doesn't support builder with input and outputs tensors"); +} + +std::shared_ptr LinearIR::BaseExpressionFactory::get(const LinearIR& linear_ir, const std::shared_ptr& n) { + if (ov::is_type(n)) { + return std::make_shared(linear_ir); + } + if (ov::is_type(n)) { + return std::make_shared(linear_ir); + } + if (ov::is_type(n)) { + return std::make_shared(linear_ir); + } + if (ov::is_type(n)) { + return std::make_shared(linear_ir); + } + return std::make_shared(linear_ir); +} + +std::vector LinearIR::BaseExpressionFactory::create_expression_inputs(const ExpressionPtr& expr) { + OPENVINO_ASSERT(expr != nullptr, "Failed expression inputs creation: expression is null"); + const auto& node = expr->get_node(); + + std::vector inputs(node->get_input_size(), nullptr); + for (const auto& input : node->inputs()) { + const auto input_source = input.get_source_output(); + const auto in_index = input.get_index(); + const auto out_index = input_source.get_index(); + const auto parent = input_source.get_node_shared_ptr(); + const auto parent_expr = m_linear_ir.get_expr_by_node(parent); + const auto tensor = parent_expr->get_outputs()[out_index]; + const auto tensor_desc = TensorDescriptor(expr, TensorDescriptor::Type::Input, in_index, PortManager::get_port_descriptor_ptr(input)); + tensor->add_consumer(tensor_desc); + inputs[in_index] = tensor; + } + return inputs; +} + +std::vector LinearIR::BaseExpressionFactory::create_expression_outputs(const ExpressionPtr& expr) { + OPENVINO_ASSERT(expr != nullptr, "Failed expression outputs creation: expression is null"); + const auto& node = expr->get_node(); + + std::vector outputs(node->get_output_size(), nullptr); + for (const auto& output : node->outputs()) { + const auto out_index = output.get_index(); + const auto tensor_desc = TensorDescriptor(expr, TensorDescriptor::Type::Output, out_index, PortManager::get_port_descriptor_ptr(output)); + outputs[out_index] = std::make_shared(tensor_desc); + } + return outputs; +} + +ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const std::shared_ptr& model) { + // Note: ctor of shared_ptr isn't friend class for Expression + return std::make_shared(Expression(n)); +} + +ExpressionPtr LinearIR::ExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model) { + const auto expr = create(n, model); + expr->init_inputs(create_expression_inputs(expr)); + expr->init_outputs(create_expression_outputs(expr)); + return expr; +} + +ExpressionPtr LinearIR::ExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, + const std::vector inputs) { + const auto expr = create(n, model); + expr->init_inputs_with_validation(inputs); + expr->init_outputs(create_expression_outputs(expr)); + return expr; +} + +ExpressionPtr LinearIR::ExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, + const std::vector inputs, const std::vector outputs) { + const auto expr = create(n, model); + expr->init_inputs_with_validation(inputs); + expr->init_outputs(outputs); + return expr; +} + +ExpressionPtr LinearIR::ParameterExpressionFactory::create(const std::shared_ptr& n, const std::shared_ptr& model) { + // Note: ctor of shared_ptr isn't friend class for Expression -> we cannot use directly make_shared(args) + if (const auto& par = as_type_ptr(n)) { + OPENVINO_ASSERT(model != nullptr, "To create IOExpression from Parameter there must be inited model!"); + return std::make_shared(IOExpression(par, model->get_parameter_index(par))); + } + OPENVINO_THROW("ParameterExpressionFactory support only Parameter node"); +} + +ExpressionPtr LinearIR::ParameterExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model) { + const auto expr = create(n, model); + expr->init_inputs({}); + expr->init_outputs(create_expression_outputs(expr)); + return expr; +} + +ExpressionPtr LinearIR::ResultExpressionFactory::create(const std::shared_ptr& n, const std::shared_ptr& model) { + // Note: ctor of shared_ptr isn't friend class for Expression -> we cannot use directly make_shared(args) + if (const auto& res = as_type_ptr(n)) { + OPENVINO_ASSERT(model != nullptr, "To create IOExpression from Result there must be inited model!"); + return std::make_shared(IOExpression(res, model->get_result_index(res))); + } + OPENVINO_THROW("ResultExpressionFactory support only Result node"); +} + +ExpressionPtr LinearIR::ResultExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model) { + const auto expr = create(n, model); + expr->init_inputs(create_expression_inputs(expr)); + expr->init_outputs({}); + return expr; +} + +ExpressionPtr LinearIR::LoopBeginExpressionFactory::create(const std::shared_ptr& n, const std::shared_ptr& model) { + // Note: ctor of shared_ptr isn't friend class for Expression -> we cannot use directly make_shared(args) + if (const auto& op = as_type_ptr(n)) { + return std::make_shared(Expression(op)); + } + OPENVINO_THROW("LoopBeginExpressionFactory support only LoopBegin node"); +} + +ExpressionPtr LinearIR::LoopBeginExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, + const std::vector inputs) { + OPENVINO_ASSERT(inputs.empty(), "LoopBegin cannot have inputs"); + const auto expr = create(n, model); + expr->init_inputs(inputs); + expr->init_outputs(create_expression_outputs(expr)); + return expr; +} + +ExpressionPtr LinearIR::LoopEndExpressionFactory::create(const std::shared_ptr& n, const std::shared_ptr& model) { + // Note: ctor of shared_ptr isn't friend class for Expression -> we cannot use directly make_shared(args) + if (const auto& op = as_type_ptr(n)) { + return std::make_shared(Expression(op)); + } + OPENVINO_THROW("LoopEndExpressionFactory support only LoopEnd node"); +} + +ExpressionPtr LinearIR::LoopEndExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, + const std::vector inputs) { + const auto expr = create(n, model); + expr->init_inputs_with_validation(inputs); + expr->init_outputs({}); + return expr; +} + + +}// namespace lowered +}// namespace snippets +}// namespace ngraph diff --git a/src/common/snippets/src/lowered/linear_ir.cpp b/src/common/snippets/src/lowered/linear_ir.cpp index 976efd62e7f639..c3b33a52f5d5bd 100644 --- a/src/common/snippets/src/lowered/linear_ir.cpp +++ b/src/common/snippets/src/lowered/linear_ir.cpp @@ -7,8 +7,8 @@ #include #include "snippets/lowered/loop_manager.hpp" +#include "snippets/lowered/expression_factory.hpp" #include -#include "snippets/tensor_descriptor.hpp" #include "snippets/utils.hpp" #include @@ -24,41 +24,49 @@ LinearIR::LinearIR(const std::shared_ptr& model, Config config) ExpressionPtr last_param = nullptr; for (const auto& n : get_ordered_ops(model)) { constExprIt insertion_pos = m_lowered_ops.end(); - std::shared_ptr expr; - std::vector input_tds; - for (const auto& in : n->inputs()) { - const auto& out = in.get_source_output(); - const auto& parent_out_tds = m_node2expression_map[out.get_node_shared_ptr()]->get_outputs(); - input_tds.push_back(parent_out_tds[out.get_index()]); + const auto expr = create_expression(n, model); + + // Scalar should be on the Linear IR beginning after Parameters to have valid expression order after Loop passes. + // After these passes we must call pass MoveScalarToConsumer() to have a correct accuracy. + // For more details, please see the pass description + if (const auto& scalar = as_type_ptr(n)) { + if (scalar_pos == m_lowered_ops.end()) { + OPENVINO_ASSERT(last_param, "Scalars must be executed after Parameters"); + scalar_pos = std::find(m_lowered_ops.begin(), m_lowered_ops.end(), last_param); + } + insertion_pos = std::next(scalar_pos); } - if (const auto& par = as_type_ptr(n)) { - auto io_expr = std::make_shared(par, model->get_parameter_index(par)); - m_io_lowered_ops.push_back(io_expr); - expr = io_expr; - last_param = expr; - } else if (const auto& res = as_type_ptr(n)) { - auto io_expr = std::make_shared(res, model->get_result_index(res), input_tds); + + if (const auto io_expr = std::dynamic_pointer_cast(expr)) { + register_expression(expr); m_io_lowered_ops.push_back(io_expr); - expr = io_expr; + if (ov::is_type(n)) + last_param = expr; } else { - if (const auto& scalar = as_type_ptr(n)) { - // Scalar should be on the Linear IR beginning after Parameters to have valid expression order after Loop passes. - // After these passes we must call pass MoveScalarToConsumer() to have a correct accuracy. - // For more details, please see the pass description - if (scalar_pos == m_lowered_ops.end()) { - OPENVINO_ASSERT(last_param, "Scalars must be executed after Parameters"); - scalar_pos = std::find(m_lowered_ops.begin(), m_lowered_ops.end(), last_param); - } - insertion_pos = std::next(scalar_pos); - } - // Note that output tds must be empty since they are filled automatically from rt_info and/or tensor shapes - expr = std::make_shared(n, input_tds); + register_regular_expression(expr); } - register_expression(expr); + m_lowered_ops.insert(insertion_pos, expr); } } +ExpressionPtr LinearIR::create_expression(const std::shared_ptr& n, const std::shared_ptr& model) { + const auto factory = BaseExpressionFactory::get(*this, n); + return factory->build(n, model); +} + +ExpressionPtr LinearIR::create_expression(const std::shared_ptr& n, const std::vector inputs, + const std::shared_ptr& model) { + const auto factory = BaseExpressionFactory::get(*this, n); + return factory->build(n, model, inputs); +} + +ExpressionPtr LinearIR::create_expression(const std::shared_ptr& n, const std::vector inputs, const std::vector outputs, + const std::shared_ptr& model) { + const auto factory = BaseExpressionFactory::get(*this, n); + return factory->build(n, model, inputs, outputs); +} + ov::NodeVector LinearIR::get_ordered_ops(const std::shared_ptr& m) { if (!m->get_sinks().empty()) OPENVINO_THROW("Linear IR is not supposed to work for model with sinks. Check your transformation pipeline."); @@ -125,7 +133,7 @@ void LinearIR::debug_print(bool tds_as_pointers) const { std::cerr << i << " "; std::cerr << "}"; }; - std::map td2int; + std::map td2int; int td_counter = 0; int counter = 0; for (const auto& expr : m_lowered_ops) { @@ -145,11 +153,11 @@ void LinearIR::debug_print(bool tds_as_pointers) const { std::cerr << td2int.at(out) << ", "; } } else { - for (const auto& in : expr->get_inputs()) - std::cerr << *in << ", "; + for (size_t i = 0; i < expr->get_input_count(); ++i) + std::cerr << expr->input_port(i) << ", "; std::cerr << "\b\b => "; - for (const auto& out : expr->get_outputs()) - std::cerr << *out << ", "; + for (size_t i = 0; i < expr->get_output_count(); ++i) + std::cerr << expr->output_port(i) << ", "; } std::cerr << "\b\b"; const auto& rinfo = expr->get_reg_info(); @@ -171,59 +179,49 @@ ExpressionPtr LinearIR::get_expr_by_node(const std::shared_ptr& n) const { return found == m_node2expression_map.end() ? nullptr : found->second; } -ExpressionPort LinearIR::get_expr_by_output(const TensorDescriptorPtr& td) const { - auto found = m_output2expression_map.find(td); - if (found == m_output2expression_map.end()) - OPENVINO_THROW("Failed to find expression by output tensor descriptor"); - return found->second; -} - -const std::set& LinearIR::get_exprs_by_input(const TensorDescriptorPtr& td) const { - auto found = m_input2expression_map.find(td); - if (found == m_input2expression_map.end()) - OPENVINO_THROW("Failed to find expression by input tensor descriptor"); - return found->second; +void LinearIR::replace_input(const std::vector& consumers, const TensorPtr& to) { + for (const auto& consumer_input : consumers) { + replace_input(consumer_input, to); + } } -void LinearIR::replace_input(const ExpressionPtr& expr, size_t port, const TensorDescriptorPtr& to) { +void LinearIR::replace_input(const ExpressionPtr& expr, size_t port, const TensorPtr& to) { replace_input(expr->input_port(port), to); } -void LinearIR::replace_input(const ExpressionPort& expr_port, const TensorDescriptorPtr& to) { - const auto& expr = expr_port.expr; - const auto port = expr_port.port; - OPENVINO_ASSERT(expr_port.get_type() == ExpressionPort::Type::Input, "Failed to replace: target input port must have Input type"); - OPENVINO_ASSERT(port < expr->m_inputs.size(), "Failed to replace: target input port must be less than input count!"); - const auto from = expr->m_inputs[port]; - auto found = m_input2expression_map.find(from); - if (found == m_input2expression_map.end() || found->second.count(expr_port) == 0) - OPENVINO_THROW("Invalid expression of input was provided to replace_input"); - found->second.erase(expr_port); - { - const auto& res = m_input2expression_map.insert({to, std::set{expr_port}}); - // If input is already in the map => add ExprPtr to the mapped set - if (!res.second) { - res.first->second.insert(expr_port); - } +void LinearIR::replace_input(const TensorDescriptor& expr_port, const TensorPtr& to) { + const auto port = expr_port.get_index(); + const auto expr = expr_port.get_expr_ptr(); + + OPENVINO_ASSERT(expr_port.get_type() == TensorDescriptor::Type::Input, "Failed to replace: target input port must have Input type"); + OPENVINO_ASSERT(expr_port.get_index() < expr->get_input_count(), "Failed to replace: target input port must be less than input count!"); + + const auto& from = expr->m_inputs[port]; + if (from.get() == to.get()) + return; + + if (!to->found_consumer(expr_port)) { + to->add_consumer(expr_port); } + from->remove_consumer(expr_port); expr->replace_input(port, std::move(to)); } -void LinearIR::replace_output(const ExpressionPtr& expr, size_t port, const TensorDescriptorPtr& to) { +void LinearIR::replace_output(const ExpressionPtr& expr, size_t port, const TensorPtr& to) { replace_output(expr->output_port(port), to); } -void LinearIR::replace_output(const ExpressionPort& expr_port, const TensorDescriptorPtr& to) { - const auto& expr = expr_port.expr; - const auto port = expr_port.port; - OPENVINO_ASSERT(expr_port.get_type() == ExpressionPort::Type::Output, "Failed to replace: target output port must have Output type"); - OPENVINO_ASSERT(port < expr->m_outputs.size(), "Failed to replace: target output port must be less than output count!"); - const auto from = expr->m_outputs[port]; - auto found = m_output2expression_map.find(from); - if (found == m_output2expression_map.end() || found->second != expr_port) - OPENVINO_THROW("Invalid expression of output was provided to replace_output"); - m_output2expression_map.erase(found); - m_output2expression_map[to] = expr_port; +void LinearIR::replace_output(const TensorDescriptor& expr_port, const TensorPtr& to) { + const auto port = expr_port.get_index(); + const auto expr = expr_port.get_expr_ptr(); + + OPENVINO_ASSERT(expr_port.get_type() == TensorDescriptor::Type::Output, "Failed to replace: target output port must have Output type"); + OPENVINO_ASSERT(port < expr->get_output_count(), "Failed to replace: target output port must be less than output count!"); + const auto to_source_td = to->get_source(); + OPENVINO_ASSERT(to_source_td.get_expr_ptr().get() == expr.get() && to_source_td.get_index() == port, + "Failed to replace: incorrect new output Tensor. Source expr must be the current expr"); + if (expr->get_outputs()[port].get() == to.get()) + return; expr->replace_output(port, to); } @@ -240,39 +238,12 @@ void LinearIR::register_expression(const ExpressionPtr& expr) { if (!res.second) OPENVINO_THROW("Duplicate node is detected in linear IR: " + std::string(node->get_friendly_name())); } - for (size_t i = 0; i < expr->m_outputs.size(); ++i) { - const auto& out = expr->m_outputs[i]; - m_output2expression_map[out] = expr->output_port(i); - } - - for (size_t i = 0; i < expr->m_inputs.size(); ++i) { - const auto& in = expr->m_inputs[i]; - const auto expr_port = expr->input_port(i); - const auto& res = m_input2expression_map.insert({in, std::set{expr_port}}); - // If input is already in the map => add ExprPtr to the mapped set - if (!res.second) { - res.first->second.insert(expr_port); - } - } } void LinearIR::unregister_expression(const ExpressionPtr& expr) { - for (const auto& out : expr->m_outputs) - m_output2expression_map.erase(out); - - size_t in_port = 0; - for (const auto& in : expr->m_inputs) { - const auto& found = m_input2expression_map.find(in); - if (found != m_input2expression_map.end()) { - // Note: If the input is used by only by this expr => delete the whole entry - // Otherwise delete the expr from the users set - auto& users = found->second; - if (users.size() == 1) - m_input2expression_map.erase(found); - else - users.erase(expr->input_port(in_port)); - } - ++in_port; + for (size_t i = 0; i < expr->get_input_count(); ++i) { + const auto& input = expr->get_inputs()[i]; + input->remove_consumer(expr->input_port(i)); } m_node2expression_map.erase(expr->get_node()); @@ -303,14 +274,7 @@ LinearIR::exprIt LinearIR::insert(constExprIt pos, constExprIt begin, constExprI LinearIR::exprIt LinearIR::insert(LinearIR::constExprIt pos, const NodeVector& nodes) { auto ret = m_lowered_ops.end(); for (const auto& n : nodes) { - std::vector input_tds; - for (const auto& in : n->inputs()) { - const auto& out = in.get_source_output(); - const auto& parent_out_tds = m_node2expression_map[out.get_node_shared_ptr()]->get_outputs(); - input_tds.push_back(parent_out_tds[out.get_index()]); - } - // Note that output tds must be empty since they are filled automatically from rt_info and/or tensor shapes - const auto& expr = std::make_shared(n, input_tds); + const auto& expr = create_expression(n); register_regular_expression(expr); ret = m_lowered_ops.insert(pos, expr); } @@ -319,14 +283,7 @@ LinearIR::exprIt LinearIR::insert(LinearIR::constExprIt pos, const NodeVector& n } LinearIR::exprIt LinearIR::insert(LinearIR::constExprIt pos, const std::shared_ptr& n) { - std::vector input_tds; - for (const auto& in : n->inputs()) { - const auto& out = in.get_source_output(); - const auto& parent_out_tds = m_node2expression_map[out.get_node_shared_ptr()]->get_outputs(); - input_tds.push_back(parent_out_tds[out.get_index()]); - } - // Note that output tds must be empty since they are filled automatically from rt_info and/or tensor shapes - const auto& expr = std::make_shared(n, input_tds); + const auto& expr = create_expression(n); register_regular_expression(expr); return m_lowered_ops.insert(pos, expr); } diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp index cf2caeea807631..ef9e736b2532a1 100644 --- a/src/common/snippets/src/lowered/loop_manager.cpp +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -5,7 +5,7 @@ #include "snippets/lowered/loop_manager.hpp" #include "snippets/lowered/expression.hpp" -#include "snippets/tensor_descriptor.hpp" +#include "snippets/utils.hpp" #include #include @@ -44,19 +44,19 @@ void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir, LinearIR::constExprIt &loop_begin_pos, LinearIR::constExprIt &loop_end_pos) const { const auto loop_info = get_loop_info(loop_id); - get_loop_bounds(linear_ir, loop_info->entry_exprs, loop_info->exit_exprs, loop_begin_pos, loop_end_pos, - loop_id); + get_loop_bounds(linear_ir, loop_info->entry_exprs, loop_info->exit_exprs, loop_begin_pos, loop_end_pos, loop_id); } void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir, - const std::vector &entries, - const std::vector &exits, + const std::vector &entries, + const std::vector &exits, LinearIR::constExprIt &loop_begin_pos, LinearIR::constExprIt &loop_end_pos, size_t loop_id) { OPENVINO_ASSERT(!entries.empty(), "Loop must have entry points"); OPENVINO_ASSERT(!exits.empty(), "Loop must have entry points"); - loop_begin_pos = std::find(linear_ir.begin(), linear_ir.end(), entries.front().expr); + const auto& entry_expr = entries.front().get_expr_ptr(); + loop_begin_pos = std::find(linear_ir.begin(), linear_ir.end(), entry_expr); OPENVINO_ASSERT(loop_begin_pos != linear_ir.end(), "Loop begin hasn't been found!"); // Some operations in Loop can be before first entry points: Scalars, VectorBuffer. @@ -68,15 +68,15 @@ void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir, } // At the moment all Loops must have exit points - loop_end_pos = std::next(std::find(loop_begin_pos, linear_ir.end(), exits.back().expr)); + const auto& exit_expr = exits.back().get_expr_ptr(); + loop_end_pos = std::next(std::find(loop_begin_pos, linear_ir.end(), exit_expr)); OPENVINO_ASSERT(loop_end_pos != linear_ir.end(), "Loop end hasn't been found!"); } -void LinearIR::LoopManager::get_io_loop_ports(LinearIR &linear_ir, - LinearIR::constExprIt loop_begin_pos, +void LinearIR::LoopManager::get_io_loop_ports(LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, - std::vector &entries, - std::vector &exits) { + std::vector &entries, + std::vector &exits) { entries.clear(); exits.clear(); for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { @@ -86,7 +86,7 @@ void LinearIR::LoopManager::get_io_loop_ports(LinearIR &linear_ir, for (size_t in_port = 0; in_port < inputs.size(); ++in_port) { const auto in_td = inputs[in_port]; - const auto parent_expr = linear_ir.get_expr_by_output(in_td).expr; + const auto parent_expr = in_td->get_source().get_expr_ptr(); if (!ov::is_type(parent_expr->get_node()) && std::find(loop_begin_pos, expr_it, parent_expr) == expr_it) { entries.push_back(expr->input_port(in_port)); @@ -95,9 +95,10 @@ void LinearIR::LoopManager::get_io_loop_ports(LinearIR &linear_ir, for (size_t out_port = 0; out_port < outputs.size(); ++out_port) { const auto out_td = outputs[out_port]; - const auto consumer_exprs = linear_ir.get_exprs_by_input(out_td); - for (const auto& conumer_expr : consumer_exprs) { - if (std::find(expr_it, loop_end_pos, conumer_expr.expr) == loop_end_pos) { + const auto consumer_ports = out_td->get_consumers(); + for (const auto& consumer : consumer_ports) { + const auto consumer_expr = consumer.get_expr_ptr(); + if (std::find(expr_it, loop_end_pos, consumer_expr) == loop_end_pos) { exits.push_back(expr->output_port(out_port)); break; } @@ -116,13 +117,11 @@ void LinearIR::LoopManager::skipped_mark(LinearIR::constExprIt loop_begin_pos, } } -void LinearIR::LoopManager::mark_loop(LinearIR &linear_ir, - LinearIR::constExprIt loop_begin_pos, +void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, size_t loop_depth, size_t vector_size) { - std::vector loop_entry_points, loop_exit_points; - LoopManager::get_io_loop_ports(linear_ir, loop_begin_pos, loop_end_pos, loop_entry_points, - loop_exit_points); + std::vector loop_entry_points, loop_exit_points; + LoopManager::get_io_loop_ports(loop_begin_pos, loop_end_pos, loop_entry_points, loop_exit_points); auto broadcast = [](std::vector &lhs, const std::vector &rhs) -> void { if (rhs == lhs) @@ -130,7 +129,6 @@ void LinearIR::LoopManager::mark_loop(LinearIR &linear_ir, const auto lhs_size = lhs.size(); const auto rhs_size = rhs.size(); const auto size = std::max(lhs_size, rhs_size); - std::vector result(size, 1); lhs.resize(size, 1); for (size_t i = 0; i < size; ++i) { const auto lhs_value = i < lhs_size ? *(lhs.crbegin() + i) : 1; @@ -141,53 +139,82 @@ void LinearIR::LoopManager::mark_loop(LinearIR &linear_ir, } }; + auto found_port = [](const std::vector& ports, const TensorDescriptor& target) { + return std::find_if(ports.begin(), ports.end(), [&target](const TensorDescriptor& port) { + return port.get_expr_ptr().get() == target.get_expr_ptr().get() && + port.get_index() == target.get_index() && + port.get_type() == target.get_type(); + }) != ports.end(); + }; + std::vector loop_subtensor; std::vector loop_layout; std::vector loop_tensor(1, 1); // Scalar for (const auto& exit_point : loop_exit_points) { - const auto expr = exit_point.expr; - const auto port = exit_point.port; - const auto out_td = expr->get_outputs()[port]; - const auto out_tensor = out_td->get_tensor(); - const auto out_layout = out_td->get_layout(); + const auto out_tensor = utils::get_reordered_shape(exit_point.get_tensor(), exit_point.get_layout()); broadcast(loop_tensor, out_tensor); - if (loop_layout.empty()) - loop_layout = out_layout; - OPENVINO_ASSERT(loop_layout == out_layout, "Output layouts of Loop must be the same!"); + + // SubTensor and Layout inside Loops must be the same. + // We have to verify that input of exit point isn't entry point or Constant to check for subtensor and layout because of + // then this input is not inside Loop + const auto& expr = exit_point.get_expr_ptr(); + for (size_t i = 0; i < expr->get_input_count(); ++i) { + const auto port = expr->input_port(i); + const auto parent = expr->get_inputs()[port.get_index()]->get_source().get_expr_ptr()->get_node(); + if (!found_port(loop_entry_points, port) && !ov::is_type(parent)) { + if (loop_subtensor.empty()) + loop_subtensor = port.get_subtensor(); + if (loop_layout.empty()) + loop_layout = port.get_layout(); + OPENVINO_ASSERT(loop_subtensor == port.get_subtensor(), "SubTensor inside Loop must be the same"); + OPENVINO_ASSERT(loop_layout == port.get_layout(), "Layout inside Loop must be the same"); + } + } } for (const auto& entry_point : loop_entry_points) { - const auto expr = entry_point.expr; - const auto out_td = expr->get_outputs().front(); - const auto out_subtensor = out_td->get_subtensor(); - if (loop_subtensor.empty()) - loop_subtensor = out_subtensor; - OPENVINO_ASSERT(loop_subtensor == out_subtensor, "Subtensors of Loop must be the same!"); + const auto in_tensor = utils::get_reordered_shape(entry_point.get_tensor(), entry_point.get_layout()); + broadcast(loop_tensor, in_tensor); + + // SubTensor and Layout inside Loops must be the same. + // We have to verify that output of entry point isn't exit point to check for subtensor and layout because of + // then this output is not inside Loop + const auto& expr = entry_point.get_expr_ptr(); + for (size_t i = 0; i < expr->get_output_count(); ++i) { + const auto port = expr->output_port(i); + if (!found_port(loop_exit_points, port)) { + if (loop_subtensor.empty()) + loop_subtensor = port.get_subtensor(); + if (loop_layout.empty()) + loop_layout = port.get_layout(); + OPENVINO_ASSERT(loop_subtensor == port.get_subtensor(), "SubTensor inside Loop must be the same"); + OPENVINO_ASSERT(loop_layout == port.get_layout(), "Layout inside Loop must be the same"); + } + } } for (size_t dim_idx = 0; dim_idx < loop_depth; ++dim_idx) { OPENVINO_ASSERT(dim_idx < loop_tensor.size(), "Incorrect indexes of Loop for markup"); - const auto dim = loop_layout.size() >= dim_idx ? *(loop_layout.rbegin() + dim_idx) : 0; - const auto work_amount = loop_tensor.size() > dim ? loop_tensor[dim] : 0; + const auto work_amount = + loop_tensor.size() > dim_idx ? *(loop_tensor.rbegin() + dim_idx) + : 0; const auto work_amount_increment = - loop_subtensor.size() > dim_idx ? *(loop_subtensor.rbegin() + dim_idx) : - dim_idx == 0 ? vector_size : 1; + loop_subtensor.size() > dim_idx ? *(loop_subtensor.rbegin() + dim_idx) + : (dim_idx == 0 ? vector_size : 1); - mark_loop(linear_ir, loop_begin_pos, loop_end_pos, loop_depth - dim_idx - 1, work_amount, + mark_loop(loop_begin_pos, loop_end_pos, loop_depth - dim_idx - 1, work_amount, work_amount_increment, loop_entry_points, loop_exit_points); } } -void LinearIR::LoopManager::mark_loop(LinearIR &linear_ir, - LinearIR::constExprIt loop_begin_pos, +void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, size_t idx, size_t work_amount, size_t work_amount_increment, - const std::vector &entries, - const std::vector &exits) { - const auto loop_info = std::make_shared( - work_amount, work_amount_increment, entries, exits); + const std::vector &entries, + const std::vector &exits) { + const auto loop_info = std::make_shared(work_amount, work_amount_increment, entries, exits); const auto loop_id = this->add_loop_info(loop_info); exprs_marking(loop_begin_pos, loop_end_pos, loop_id, idx); } diff --git a/src/common/snippets/src/lowered/pass/allocate_buffers.cpp b/src/common/snippets/src/lowered/pass/allocate_buffers.cpp index 9e17b573aa274e..4cdd5ec8853bea 100644 --- a/src/common/snippets/src/lowered/pass/allocate_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/allocate_buffers.cpp @@ -22,9 +22,9 @@ void AllocateBuffers::propagate_offset(const LinearIR& linear_ir, const Expressi { if (buffer->is_intermediate_memory()) { OPENVINO_ASSERT(buffer_expr->get_inputs().size() == 1, "Buffer with intermediate memory must have one parent"); - const auto& parent_output = linear_ir.get_expr_by_output(buffer_expr->get_inputs()[0]); - const auto& parent_expr = parent_output.expr; - const auto port = parent_output.port; + const auto& parent_output = buffer_expr->get_inputs()[0]->get_source(); + const auto& parent_expr = parent_output.get_expr_ptr(); + const auto port = parent_output.get_index(); const auto& parent_node = parent_expr->get_node(); auto memory_access = ov::as_type_ptr(parent_node); if (memory_access && memory_access->is_memory_access_output_port(port)) { @@ -37,9 +37,9 @@ void AllocateBuffers::propagate_offset(const LinearIR& linear_ir, const Expressi } // Propagate to down: in Load. Buffer can have several Load const auto& buffer_out = buffer_expr->get_outputs()[0]; - for (const auto& child_expr_input : linear_ir.get_exprs_by_input(buffer_out)) { - const auto& child_expr = child_expr_input.expr; - const auto port = child_expr_input.port; + for (const auto& child_expr_input : buffer_out->get_consumers()) { + const auto& child_expr = child_expr_input.get_expr_ptr(); + const auto port = child_expr_input.get_index(); const auto& child_node = child_expr->get_node(); auto memory_access = ov::as_type_ptr(child_node); if (memory_access && memory_access->is_memory_access_input_port(port)) { @@ -70,7 +70,7 @@ bool AllocateBuffers::run(LinearIR& linear_ir) { } if (buffer->is_intermediate_memory()) { - const auto& parent_expr = linear_ir.get_expr_by_output(expr_it->get()->get_inputs()[0]).expr; + const auto& parent_expr = expr_it->get()->get_inputs()[0]->get_source().get_expr_ptr(); const auto& parent_node = parent_expr->get_node(); // Full MemoryAccess ops need new memory. Previous logic is to check for parent isn't Loop // TODO: It should be unified in MemoryManager with memory reuse in the near future diff --git a/src/common/snippets/src/lowered/pass/assign_registers.cpp b/src/common/snippets/src/lowered/pass/assign_registers.cpp index 1d770d1b5e6c5e..e15a932eb7fb53 100644 --- a/src/common/snippets/src/lowered/pass/assign_registers.cpp +++ b/src/common/snippets/src/lowered/pass/assign_registers.cpp @@ -19,7 +19,7 @@ namespace pass { bool AssignRegisters::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::AssignRegisters") using Reg = size_t; - using tensor = snippets::TensorDescriptorPtr; + using tensor = TensorPtr; auto& expressions = linear_ir.get_ops(); std::vector> typed_ops; @@ -66,19 +66,19 @@ bool AssignRegisters::run(LinearIR& linear_ir) { // We should manually set the one vector register for VectorBuffer and Max/Sum output to simulate a accumulator // TODO [96351]: We should rewrite accumulator pattern using another way const auto input_td = expr->get_inputs()[0]; - const auto& input_expr = linear_ir.get_expr_by_output(input_td).expr; + const auto& input_expr = input_td->get_source().get_expr_ptr(); const auto& input_expr_input_tds = input_expr->get_inputs(); for (const auto& td : input_expr_input_tds) { - if (ov::is_type(linear_ir.get_expr_by_output(td).expr->get_node())) { + if (ov::is_type(td->get_source().get_expr_ptr()->get_node())) { manually_assigned_vecs[td] = static_cast(accumulator_reg); } } const auto output_td = expr->get_outputs()[0]; manually_assigned_vecs[input_td] = static_cast(accumulator_reg); manually_assigned_vecs[output_td] = static_cast(accumulator_reg); - for (const auto& child_expr_input : linear_ir.get_exprs_by_input(output_td)) { - if (ov::is_type(child_expr_input.expr->get_node())) { - manually_assigned_vecs[child_expr_input.expr->get_outputs()[0]] = + for (const auto& child_expr_input : output_td->get_consumers()) { + if (ov::is_type(child_expr_input.get_expr_ptr()->get_node())) { + manually_assigned_vecs[child_expr_input.get_expr_ptr()->get_outputs()[0]] = static_cast(accumulator_reg); } } @@ -86,11 +86,11 @@ bool AssignRegisters::run(LinearIR& linear_ir) { // TODO: Fix via common pipeline using LoopEnd: // All operations `outside loop` after Horizon ops should have the same register to avoid using it in the next Loop const auto current_loops_ids = expr->get_loop_ids(); - auto next_expr = linear_ir.get_exprs_by_input(output_td).begin()->expr; + auto next_expr = output_td->get_consumers().begin()->get_expr_ptr(); while (next_expr->get_loop_ids() == current_loops_ids) { manually_assigned_vecs[next_expr->get_outputs()[0]] = static_cast(accumulator_reg); - next_expr = linear_ir.get_exprs_by_input(next_expr->get_outputs()[0]).begin()->expr; + next_expr = next_expr->get_outputs()[0]->get_consumers().begin()->get_expr_ptr(); } accumulator_reg++; @@ -192,8 +192,8 @@ bool AssignRegisters::run(LinearIR& linear_ir) { if (is_type(expr->get_node()) || is_type(expr->get_node())) continue; for (const auto& out : expr->get_outputs()) { - for (const auto& child_expr_input : linear_ir.get_exprs_by_input(out)) { - const auto& child_expr = child_expr_input.expr; + for (const auto& child_expr_input : out->get_consumers()) { + const auto& child_expr = child_expr_input.get_expr_ptr(); auto child_it = linear_ir.begin(); std::advance(child_it, n); size_t k = n; @@ -304,8 +304,7 @@ bool AssignRegisters::run(LinearIR& linear_ir) { std::map assigned_regs(std::move(manually_assigned_gprs)); assigned_regs.insert(manually_assigned_vecs.begin(), manually_assigned_vecs.end()); - auto register_assigned_regs = [=, &assigned_regs](const std::map& unique_regs, - const std::map& unique2reused) { + auto register_assigned_regs = [=, &assigned_regs](const std::map& unique_regs, const std::map& unique2reused) { for (const auto& reg : unique_regs) { if (reg.second == IS_MANUALLY_ALLOCATED_REG) continue; diff --git a/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp b/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp index b35043e132b39c..15b835b9ff7123 100644 --- a/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp +++ b/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp @@ -35,7 +35,7 @@ bool CleanupLoopOffsets::run(LinearIR& linear_ir) { } if (auto outer_loop_end = as_type_ptr(next_node)) { auto fin_offsets = loop_end->get_finalization_offsets(); - std::unordered_map per_tensor_offset; + std::unordered_map per_tensor_offset; const auto& loop_inputs = expr_it->get()->get_inputs(); for (size_t i = 0; i < fin_offsets.size(); i++) per_tensor_offset[loop_inputs[i]] = i; diff --git a/src/common/snippets/src/lowered/pass/fuse_loops.cpp b/src/common/snippets/src/lowered/pass/fuse_loops.cpp index 2f49ce4aca13ee..6bf30cf8b6f7fa 100644 --- a/src/common/snippets/src/lowered/pass/fuse_loops.cpp +++ b/src/common/snippets/src/lowered/pass/fuse_loops.cpp @@ -29,20 +29,20 @@ bool FuseLoops::can_be_fused(const LoopInfoPtr& loop_current, const LoopInfoPtr& return supported_work_amount && supported_increment; } -void FuseLoops::fuse_points(LinearIR& linear_ir, std::vector& exit_points, std::vector& entry_points, - LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos) { - std::vector new_exit_points; +void FuseLoops::fuse_points(std::vector& exit_points, std::vector& entry_points, + LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos) { + std::vector new_exit_points; for (const auto& exit_point : exit_points) { - const auto expr = exit_point.expr; - const auto port = exit_point.port; + const auto expr = exit_point.get_expr_ptr(); + const auto port = exit_point.get_index(); const auto output_td = expr->get_outputs()[port]; - const auto consumers_inputs = linear_ir.get_exprs_by_input(output_td); + const auto consumers_inputs = output_td->get_consumers(); - std::vector mapped_entry_points; + std::vector mapped_entry_points; std::vector outside_consumers; for (const auto& consumer_input : consumers_inputs) { - const auto consumer = consumer_input.expr; - const auto consumer_port = consumer_input.port; + const auto consumer = consumer_input.get_expr_ptr(); + const auto consumer_port = consumer_input.get_index(); const auto consumer_point = consumer->input_port(consumer_port); const auto entry_point_it = std::find(entry_points.begin(), entry_points.end(), consumer_point); if (entry_point_it != entry_points.end()) { @@ -73,9 +73,9 @@ void FuseLoops::fuse_points(LinearIR& linear_ir, std::vector& ex } bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, - const ExpressionPort& current_entry_point, const ExpressionPort& target_exit_point, - size_t current_loop_id, size_t target_loop_id, size_t dim_idx, - LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos) { + const TensorDescriptor& current_entry_point, const TensorDescriptor& target_exit_point, + size_t current_loop_id, size_t target_loop_id, size_t dim_idx, + LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos) { const auto& loop_current = loop_manager->get_loop_info(current_loop_id); const auto& loop_target = loop_manager->get_loop_info(target_loop_id); if (!can_be_fused(loop_current, loop_target)) @@ -89,13 +89,13 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo bool is_fusion_allowed = true; for (size_t i = 0; i < loop_target->exit_exprs.size() && is_fusion_allowed; ++i) { const auto target_exit_point = loop_target->exit_exprs[i]; - const auto target_exit_expr = target_exit_point.expr; - const auto port = target_exit_point.port; + const auto target_exit_expr = target_exit_point.get_expr_ptr(); + const auto port = target_exit_point.get_index(); const auto output_td = target_exit_expr->get_outputs()[port]; - const auto consumer_inputs = linear_ir.get_exprs_by_input(output_td); + const auto consumer_inputs = output_td->get_consumers(); for (const auto& consumer_input : consumer_inputs) { - const auto consumer = consumer_input.expr; - if (ov::is_type(consumer->get_node()) || consumer == current_entry_point.expr) + const auto consumer = consumer_input.get_expr_ptr(); + if (ov::is_type(consumer->get_node()) || consumer == current_entry_point.get_expr_ptr()) continue; // The fusing is only valid if target Loop consumer (the Consumer is outside of target Loop) // is after current Loop (after Loop_down). @@ -113,7 +113,7 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo auto current_exit_points = loop_current->exit_exprs; auto target_entry_points = loop_target->entry_exprs; auto target_exit_points = loop_target->exit_exprs; - fuse_points(linear_ir, target_exit_points, current_entry_points, target_loop_begin_pos, target_loop_end_pos); + fuse_points(target_exit_points, current_entry_points, target_loop_begin_pos, target_loop_end_pos); const auto insertion_place = current_loop_begin_pos; const auto is_move_needed = target_loop_end_pos != current_loop_begin_pos; @@ -135,9 +135,9 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo // Update work_amount for Loop (increment is constant because increments must be the identical for fusion): loop_current->work_amount = std::max(loop_current->work_amount, loop_target->work_amount); - std::vector new_entries = target_entry_points; + std::vector new_entries = target_entry_points; new_entries.insert(new_entries.end(), current_entry_points.begin(), current_entry_points.end()); - std::vector new_exits = target_exit_points; + std::vector new_exits = target_exit_points; new_exits.insert(new_exits.end(), current_exit_points.begin(), current_exit_points.end()); loop_current->entry_exprs = new_entries; @@ -147,9 +147,9 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo } bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, - const ExpressionPort& current_exit_point, const ExpressionPort& target_entry_point, - size_t current_loop_id, size_t target_loop_id, size_t dim_idx, - LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos) { + const TensorDescriptor& current_exit_point, const TensorDescriptor& target_entry_point, + size_t current_loop_id, size_t target_loop_id, size_t dim_idx, + LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos) { const auto& loop_current = loop_manager->get_loop_info(current_loop_id); const auto& loop_target = loop_manager->get_loop_info(target_loop_id); if (!can_be_fused(loop_current, loop_target)) @@ -160,12 +160,12 @@ bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::Loo bool is_fusion_allowed = true; for (size_t i = 0; i < loop_target->entry_exprs.size() && is_fusion_allowed; ++i) { const auto target_entry_point = loop_target->entry_exprs[i]; - const auto target_entry_expr = target_entry_point.expr; - const auto port = target_entry_point.port; + const auto target_entry_expr = target_entry_point.get_expr_ptr(); + const auto port = target_entry_point.get_index(); const auto input_td = target_entry_expr->get_inputs()[port]; - const auto parent_expr_output = linear_ir.get_expr_by_output(input_td); - const auto parent_expr = parent_expr_output.expr; - if (ov::is_type(parent_expr->get_node()) || parent_expr == current_exit_point.expr) + const auto parent_expr_output = input_td->get_source(); + const auto parent_expr = parent_expr_output.get_expr_ptr(); + if (ov::is_type(parent_expr->get_node()) || parent_expr == current_exit_point.get_expr_ptr()) continue; is_fusion_allowed = parent_expr->get_loop_ids()[dim_idx] == current_loop_id || // The parent expr is from the same current Loop std::find(linear_ir.cbegin(), current_loop_begin_pos, parent_expr) != current_loop_begin_pos; // The parent is before current Loop @@ -182,7 +182,7 @@ bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::Loo auto current_exit_points = loop_current->exit_exprs; auto target_entry_points = loop_target->entry_exprs; auto target_exit_points = loop_target->exit_exprs; - fuse_points(linear_ir, current_exit_points, target_entry_points, current_loop_begin_pos, current_loop_end_pos); + fuse_points(current_exit_points, target_entry_points, current_loop_begin_pos, current_loop_end_pos); const auto insertion_place = current_loop_end_pos; const auto is_move_needed = insertion_place != target_loop_begin_pos; @@ -205,9 +205,9 @@ bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::Loo // Update work_amount for Loop (increment is constant because increments must be the identical for fusion): loop_current->work_amount = std::max(loop_current->work_amount, loop_target->work_amount); - std::vector& new_entries = current_entry_points; + std::vector& new_entries = current_entry_points; new_entries.insert(new_entries.end(), target_entry_points.begin(), target_entry_points.end()); - std::vector& new_exits = current_exit_points; + std::vector& new_exits = current_exit_points; new_exits.insert(new_exits.end(), target_exit_points.begin(), target_exit_points.end()); loop_current->entry_exprs = new_entries; @@ -268,12 +268,12 @@ bool FuseLoops::run(LinearIR& linear_ir) { bool was_fusion_up = false; for (size_t in_port = 0; in_port < entry_points.size() && !was_fusion_up; ++in_port) { const auto entry_point = entry_points[in_port]; - const auto entry_expr = entry_point.expr; - const auto port = entry_point.port; + const auto entry_expr = entry_point.get_expr_ptr(); + const auto port = entry_point.get_index(); const auto input_td = entry_expr->get_inputs()[port]; - const auto parent_expr_output = linear_ir.get_expr_by_output(input_td); - const auto parent_expr = parent_expr_output.expr; - const auto out_port = parent_expr_output.port; + const auto parent_expr_output = input_td->get_source(); + const auto parent_expr = parent_expr_output.get_expr_ptr(); + const auto out_port = parent_expr_output.get_index(); const auto parent = parent_expr->get_node(); if (ov::is_type(parent) || ov::is_type(parent) || @@ -309,13 +309,13 @@ bool FuseLoops::run(LinearIR& linear_ir) { bool was_fusion_down = false; for (size_t out_port = 0; out_port < exit_points.size() && !was_fusion_down; ++out_port) { const auto exit_point = exit_points[out_port]; - const auto exit_expr = exit_point.expr; - const auto port = exit_point.port; + const auto exit_expr = exit_point.get_expr_ptr(); + const auto port = exit_point.get_index(); const auto output_td = exit_expr->get_outputs()[port]; - const auto consumer_exprs_inputs = linear_ir.get_exprs_by_input(output_td); + const auto consumer_exprs_inputs = output_td->get_consumers(); for (const auto& consumer_expr_input : consumer_exprs_inputs) { - const auto consumer_expr = consumer_expr_input.expr; - const auto in_port = consumer_expr_input.port; + const auto consumer_expr = consumer_expr_input.get_expr_ptr(); + const auto in_port = consumer_expr_input.get_index(); const auto consumer = consumer_expr->get_node(); if (ov::is_type(consumer) || ov::is_type(consumer)) { diff --git a/src/common/snippets/src/lowered/pass/indentify_buffers.cpp b/src/common/snippets/src/lowered/pass/indentify_buffers.cpp index 769454c36aded2..619ae88fd808b3 100644 --- a/src/common/snippets/src/lowered/pass/indentify_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/indentify_buffers.cpp @@ -60,9 +60,9 @@ std::vector IdentifyBuffers::create_adjacency_matrix(const LinearIR& linea const auto buffer = ov::as_type_ptr(buffer_expr->get_node()); const auto& buffer_td = buffer_input_tds.front(); - const auto buffer_siblings = linear_ir.get_exprs_by_input(buffer_td); + const auto buffer_siblings = buffer_td->get_consumers(); for (const auto& buffer_sibling : buffer_siblings) { - const auto& sibling_expr = buffer_sibling.expr; + const auto& sibling_expr = buffer_sibling.get_expr_ptr(); // Skip myself if (sibling_expr == buffer_expr) { continue; @@ -76,7 +76,7 @@ std::vector IdentifyBuffers::create_adjacency_matrix(const LinearIR& linea // Verify Buffers on Loop inputs: for (size_t input_idx = 0; input_idx < input_count; ++input_idx) { - const auto loop_in = linear_ir.get_expr_by_output(loop_tds[input_idx]).expr; + const auto loop_in = loop_tds[input_idx]->get_source().get_expr_ptr(); if (const auto& neighbour_buffer = is_intermediate_buffer(loop_in->get_node())) { const auto neighbour_buffer_loop_port = input_idx; update_adj_matrix(buffer, buffer_idx, neighbour_buffer, @@ -91,9 +91,9 @@ std::vector IdentifyBuffers::create_adjacency_matrix(const LinearIR& linea if (buffer_td == loop_tds[input_count + output_idx]) continue; - const auto& consumer_inputs = linear_ir.get_exprs_by_input(loop_tds[input_count + output_idx]); + const auto& consumer_inputs = loop_tds[input_count + output_idx]->get_consumers(); for (const auto& consumer_input : consumer_inputs) { - const auto& child_node = consumer_input.expr->get_node(); + const auto& child_node = consumer_input.get_expr_ptr()->get_node(); if (const auto& neighbour_buffer = is_intermediate_buffer(child_node)) { const auto neighbour_buffer_loop_port = input_count + output_idx; update_adj_matrix(buffer, buffer_idx, neighbour_buffer, diff --git a/src/common/snippets/src/lowered/pass/init_loops.cpp b/src/common/snippets/src/lowered/pass/init_loops.cpp index 460997d547a14e..0501adcc22f2e5 100644 --- a/src/common/snippets/src/lowered/pass/init_loops.cpp +++ b/src/common/snippets/src/lowered/pass/init_loops.cpp @@ -16,20 +16,20 @@ namespace pass { namespace { void filter_ports(LinearIR& linear_ir, - std::vector& loop_entries, std::vector& loop_exits) { - std::vector new_loop_entries; - std::vector new_loop_exits; + std::vector& loop_entries, std::vector& loop_exits) { + std::vector new_loop_entries; + std::vector new_loop_exits; new_loop_entries.reserve(loop_entries.size()); new_loop_exits.reserve(loop_exits.size()); std::set> loop_parents; for (const auto& loop_entry_point : loop_entries) { - const auto& expr = loop_entry_point.expr; - const auto port = loop_entry_point.port; + const auto& expr = loop_entry_point.get_expr_ptr(); + const auto port = loop_entry_point.get_index(); const auto node = expr->get_node(); const auto ma = ov::as_type_ptr(node); if (ma && ma->is_memory_access_input_port(port)) { - const auto& parent_expr = linear_ir.get_expr_by_output(expr->get_inputs()[port]).expr; + const auto& parent_expr = expr->get_inputs()[port]->get_source().get_expr_ptr(); const auto& parent = parent_expr->get_node(); // Todo: Sometimes several Load in one Loop read data from the same Node if (loop_parents.find(parent) == loop_parents.end()) { @@ -40,8 +40,8 @@ void filter_ports(LinearIR& linear_ir, } for (const auto& loop_exit_point : loop_exits) { - const auto& expr = loop_exit_point.expr; - const auto port = loop_exit_point.port; + const auto& expr = loop_exit_point.get_expr_ptr(); + const auto port = loop_exit_point.get_index(); const auto ma = ov::as_type_ptr(expr->get_node()); if (ma && ma->is_memory_access_output_port(port)) { new_loop_exits.push_back(loop_exit_point); @@ -52,12 +52,10 @@ void filter_ports(LinearIR& linear_ir, loop_exits = new_loop_exits; } -int64_t get_dim_stride(const size_t dim, const std::vector& layout, const std::vector& shape) { +int64_t get_dim_stride(const size_t dim, const std::vector& shape) { int64_t stride = 1; - for (int i = static_cast(layout.size()) - 1; i >= 0; i--) { - if (layout[i] == dim) - break; - stride *= static_cast(shape[layout[i]]); + for (size_t i = dim + 1; i < shape.size(); ++i) { + stride *= static_cast(shape[i]); } return stride; } @@ -65,60 +63,44 @@ int64_t get_dim_stride(const size_t dim, const std::vector& layout, cons InitLoops::InitLoops() : Transformation() {} -std::vector InitLoops::init_ptr_increments(const std::vector& loop_inputs, - const std::vector& loop_outputs, +std::vector InitLoops::init_ptr_increments(const std::vector& loop_inputs, + const std::vector& loop_outputs, size_t dim_idx) const { - std::vector ptr_increments; - // Note: All loop inputs must have the same layout by definition. - // If this doesn't hold, then we're trying to inject loops in the wrong place. - const std::vector loop_layout{ - !loop_inputs.empty() ? loop_inputs.front().expr->get_inputs()[0]->get_layout() : - !loop_outputs.empty() ? loop_outputs.front().expr->get_outputs()[0]->get_layout() : - std::vector{}}; + std::vector ptr_increments; // Note: Need to find max relevant dim expr to account for broadcasting, collect relevant_dims as well - // Note: At the moment all loop_inputs and loop_outputs - are Load/Store ops in this method. - // So for example, we can call loop_input[i]->get_outputs().front() because Load have one output - size_t max_relevant_dim_size = 0; + size_t max_relevant_dim_size = 1; for (const auto& loop_input : loop_inputs) { - const auto& expr = loop_input.expr; - const auto out_td = expr->get_outputs().front(); - const auto& layout = out_td->get_layout(); - const auto& tensor = out_td->get_tensor(); + const auto& layout = loop_input.get_layout(); + const auto& tensor = loop_input.get_tensor(); const auto& dim = *(layout.rbegin() + dim_idx); max_relevant_dim_size = std::max(tensor[dim], max_relevant_dim_size); } for (const auto& loop_output : loop_outputs) { - const auto& expr = loop_output.expr; - const auto in_td = expr->get_inputs().front(); - const auto& layout = in_td->get_layout(); - const auto& tensor = in_td->get_tensor(); + const auto& layout = loop_output.get_layout(); + const auto& tensor = loop_output.get_tensor(); const auto& dim = *(layout.rbegin() + dim_idx); max_relevant_dim_size = std::max(tensor[dim], max_relevant_dim_size); } + for (const auto& loop_input : loop_inputs) { - const auto& expr = loop_input.expr; - const auto out_td = expr->get_outputs().front(); - const auto& layout = out_td->get_layout(); - const auto& tensor = out_td->get_tensor(); + const auto& layout = loop_input.get_layout(); + const auto& tensor = loop_input.get_tensor(); const auto& dim = *(layout.rbegin() + dim_idx); int64_t ptr_increment = 0; // If relevant dim is not broadcasted, then ptr_increment is the dim stride in the new layout if (!(tensor[dim] == 1 && max_relevant_dim_size != 1)) - ptr_increment = get_dim_stride(dim, loop_layout, tensor); + ptr_increment = get_dim_stride(dim, tensor); ptr_increments.push_back(ptr_increment); } - // Note: Le already accounted for loop_input vs inside loops layout mismatch. So we need non-dense output - // ptr_increments only if loop_input_layout doesn't match loop_output_layout + for (const auto& loop_output : loop_outputs) { - const auto& expr = loop_output.expr; - const auto in_td = expr->get_inputs().front(); - const auto& layout = in_td->get_layout(); - const auto& tensor = in_td->get_tensor(); + const auto& layout = loop_output.get_layout(); + const auto& tensor = loop_output.get_tensor(); const auto& dim = *(layout.rbegin() + dim_idx); int64_t ptr_increment = 0; // If relevant dim is not broadcasted, then ptr_increment is the dim stride in the new layout if (!(tensor[dim] == 1 && max_relevant_dim_size != 1)) - ptr_increment = get_dim_stride(dim, layout, tensor); + ptr_increment = get_dim_stride(dim, tensor); ptr_increments.push_back(ptr_increment); } @@ -134,15 +116,15 @@ std::vector InitLoops::init_finalization_offsets(const std::vector InitLoops::init_element_type_sizes(const std::vector& loop_inputs, - const std::vector& loop_outputs) { +std::vector InitLoops::init_element_type_sizes(const std::vector& loop_inputs, + const std::vector& loop_outputs) { std::vector element_types; element_types.reserve(loop_inputs.size() + loop_outputs.size()); for (const auto& in : loop_inputs) { - element_types.push_back(in.expr->get_node()->get_input_element_type(in.port).size()); + element_types.push_back(in.get_expr_ptr()->get_node()->get_input_element_type(in.get_index()).size()); } for (const auto& out : loop_outputs) { - element_types.push_back(out.expr->get_node()->get_output_element_type(out.port).size()); + element_types.push_back(out.get_expr_ptr()->get_node()->get_output_element_type(out.get_index()).size()); } return element_types; } @@ -164,7 +146,7 @@ bool InitLoops::insertion(LinearIR& linear_ir, const LinearIR::LoopManager::Loop const auto io_data_sizes = init_element_type_sizes(loop_entries, loop_exits); const auto& loop_begin = std::make_shared(); - const auto& loop_begin_expr = std::make_shared(loop_begin); + const auto& loop_begin_expr = linear_ir.create_expression(loop_begin, std::vector{}); linear_ir.insert(loop_begin_pos, loop_begin_expr); const auto& loop_end = std::make_shared( @@ -172,14 +154,14 @@ bool InitLoops::insertion(LinearIR& linear_ir, const LinearIR::LoopManager::Loop io_data_sizes, loop_entries.size(), loop_exits.size()); loop_end->has_outer_loop = has_outer_loop; - std::vector loop_end_inputs; + std::vector loop_end_inputs; for (const auto& expr_port : loop_entries) - loop_end_inputs.push_back(expr_port.expr->get_inputs()[expr_port.port]); + loop_end_inputs.push_back(expr_port.get_expr_ptr()->get_inputs()[expr_port.get_index()]); for (const auto& expr_port : loop_exits) - loop_end_inputs.push_back(expr_port.expr->get_outputs()[expr_port.port]); - loop_end_inputs.push_back(linear_ir.get_expr_by_node(loop_begin)->get_outputs().front()); + loop_end_inputs.push_back(expr_port.get_expr_ptr()->get_outputs()[expr_port.get_index()]); + loop_end_inputs.push_back(loop_begin_expr->get_outputs()[0]); - const auto& loop_end_expr = std::make_shared(loop_end, loop_end_inputs, std::vector{}); + const auto& loop_end_expr = linear_ir.create_expression(loop_end, loop_end_inputs); linear_ir.insert(loop_end_pos, loop_end_expr); return true; } diff --git a/src/common/snippets/src/lowered/pass/insert_buffers.cpp b/src/common/snippets/src/lowered/pass/insert_buffers.cpp index 09efcf3e4b47da..84768a2ffef79c 100644 --- a/src/common/snippets/src/lowered/pass/insert_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/insert_buffers.cpp @@ -58,15 +58,15 @@ LinearIR::constExprIt InsertBuffers::insertion_position(const LinearIR& linear_i } void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, size_t loop_id, - const std::vector& loop_entries, const std::vector& loop_exits) { + const std::vector& loop_entries, const std::vector& loop_exits) { for (const auto& entry_point : loop_entries) { - const auto expr = entry_point.expr; - const auto port = entry_point.port; + const auto expr = entry_point.get_expr_ptr(); + const auto port = entry_point.get_index(); const auto node = expr->get_node(); const auto input_td = expr->get_inputs()[port]; - const auto parent_expr_output = linear_ir.get_expr_by_output(input_td); - const auto& parent_expr = parent_expr_output.expr; - const auto parent_port = parent_expr_output.port; + const auto parent_expr_output = input_td->get_source(); + const auto& parent_expr = parent_expr_output.get_expr_ptr(); + const auto parent_port = parent_expr_output.get_index(); const auto parent = parent_expr->get_node(); if (ov::is_type(parent) || ov::is_type(parent) || @@ -103,33 +103,32 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt // Need to insert between 2nd and 4th Loops - after 2nd Loop const auto pos = insertion_position(linear_ir, loop_manager, parent_expr, expr); const auto buffer = std::make_shared(parent->output(parent_port), m_buffer_allocation_rank); - - const auto td = std::make_shared(input_td->get_tensor(), - input_td->get_subtensor(), - input_td->get_layout()); - const std::vector buffer_outs = { td }; - const std::vector parent_outs = { input_td }; - linear_ir.insert(pos, std::make_shared(buffer, parent_outs, buffer_outs)); - linear_ir.replace_input(expr, port, td); + PortManager::set_port_descriptor_ptr(buffer->output(0), std::make_shared(input_td->get_tensor(), + input_td->get_subtensor(), + input_td->get_layout())); + // Output td is automatically filled from PortDescriptor + const auto buffer_expr = linear_ir.create_expression(buffer, {input_td}); + linear_ir.insert(pos, buffer_expr); + linear_ir.replace_input(expr, port, buffer_expr->get_outputs()[0]); } } for (const auto& exit_point : loop_exits) { - const auto expr = exit_point.expr; - const auto port = exit_point.port; + const auto expr = exit_point.get_expr_ptr(); + const auto port = exit_point.get_index(); const auto node = expr->get_node(); const auto output_td = expr->get_outputs()[port]; - const auto child_exprs_inputs = linear_ir.get_exprs_by_input(output_td); + const auto child_exprs_inputs = output_td->get_consumers(); const auto current_loops = expr->get_loop_ids(); const auto current_loop_count = current_loops.size(); - const std::vector node_outs = {output_td}; + const std::vector node_outs = {output_td}; - std::set potential_consumers; + std::vector potential_consumers; std::set buffers; const auto current_loop_lvl = std::distance(current_loops.begin(), std::find(current_loops.begin(), current_loops.end(), loop_id)); for (const auto& child_expr_input : child_exprs_inputs) { - const auto& child_expr = child_expr_input.expr; - const auto child_port = child_expr_input.port; + const auto& child_expr = child_expr_input.get_expr_ptr(); + const auto child_port = child_expr_input.get_index(); const auto& child = child_expr->get_node(); if (ov::is_type(child)) continue; @@ -142,7 +141,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt const auto node_ma = ov::as_type_ptr(node); if ((child_ma && child_ma->is_memory_access_input_port(child_port)) || (node_ma && node_ma->is_memory_access_output_port(port))) { - potential_consumers.insert(child_expr_input); + potential_consumers.push_back(child_expr_input); continue; } @@ -153,7 +152,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt if (current_loops[i] != child_loops[i] && current_loops[i] != Expression::LOOP_NULL_ID && child_loops[i] != Expression::LOOP_NULL_ID) { - potential_consumers.insert(child_expr_input); + potential_consumers.push_back(child_expr_input); break; } } @@ -165,13 +164,9 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt if (!buffers.empty()) { for (const auto& buffer : buffers) { const auto buffer_out = buffer->get_outputs().front(); - const auto buffer_consumers_inputs = linear_ir.get_exprs_by_input(buffer_out); - for (const auto& consumer_input : buffer_consumers_inputs) { - const auto consumer = consumer_input.expr; - const auto consumer_port = consumer_input.port; - linear_ir.replace_input(consumer, consumer_port, output_td); - } - potential_consumers.insert(buffer_consumers_inputs.begin(), buffer_consumers_inputs.end()); + const auto buffer_consumers_inputs = buffer_out->get_consumers(); + linear_ir.replace_input(buffer_consumers_inputs, output_td); + potential_consumers.insert(potential_consumers.end(), buffer_consumers_inputs.begin(), buffer_consumers_inputs.end()); linear_ir.erase(std::find(linear_ir.begin(), linear_ir.end(), buffer)); } } @@ -182,12 +177,12 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt // Need to insert after 2nd Loops // Note: All potential consumers must have the same count of first equal Loop identifies and the same count of different last identifies // TODO: Need to verify that - const auto pos = insertion_position(linear_ir, loop_manager, expr, (*potential_consumers.begin()).expr); + const auto pos = insertion_position(linear_ir, loop_manager, expr, (*potential_consumers.begin()).get_expr_ptr()); auto buffer = std::make_shared(node->output(port), m_buffer_allocation_rank); - const auto td = std::make_shared(output_td->get_tensor(), - output_td->get_subtensor(), - output_td->get_layout()); + PortManager::set_port_descriptor_ptr(buffer->output(0), std::make_shared(output_td->get_tensor(), + output_td->get_subtensor(), + output_td->get_layout())); // We cannot insert Node output tensor on Buffer output because not all consumers of Node needs Buffer // Example: // Add @@ -195,13 +190,10 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt // Result Buffer // | <- It should be new TD // Relu - const std::vector buffer_outs = {td}; - linear_ir.insert(pos, std::make_shared(buffer, node_outs, buffer_outs)); - for (const auto& consumer_input : potential_consumers) { - const auto consumer = consumer_input.expr; - const auto consumer_port = consumer_input.port; - linear_ir.replace_input(consumer, consumer_port, td); - } + // Output td is automatically filled from PortDescriptor + const auto buffer_expr = linear_ir.create_expression(buffer, node_outs); + linear_ir.insert(pos, buffer_expr); + linear_ir.replace_input(potential_consumers, buffer_expr->get_outputs().front()); } } } @@ -231,7 +223,7 @@ bool InsertBuffers::run(LinearIR& linear_ir) { const auto input_ports = ma->get_memory_access_input_ports(); const auto output_ports = ma->get_memory_access_output_ports(); - std::vector loop_entries(input_ports.size()), loop_exits(output_ports.size()); + std::vector loop_entries(input_ports.size()), loop_exits(output_ports.size()); // C++17: for (auto const& [loop_id, loop_info] : loop_data_map) for (const auto& p : input_ports) { loop_entries[p.first] = expr->input_port(p.first); diff --git a/src/common/snippets/src/lowered/pass/insert_load_store.cpp b/src/common/snippets/src/lowered/pass/insert_load_store.cpp index f67ff2094382ec..3fae4c6077530b 100644 --- a/src/common/snippets/src/lowered/pass/insert_load_store.cpp +++ b/src/common/snippets/src/lowered/pass/insert_load_store.cpp @@ -33,7 +33,7 @@ using LoopInfoPtr = LoopManager::LoopInfoPtr; InsertLoadStore::InsertLoadStore(size_t vector_size) : m_vector_size(vector_size) {} void InsertLoadStore::update_loops(const LinearIR::LoopManagerPtr& loop_manager, const std::vector& loop_ids, - const ExpressionPort& actual_port, const std::vector& target_ports, bool is_entry) { + const TensorDescriptor& actual_port, const std::vector& target_ports, bool is_entry) { for (auto loop_id : loop_ids) { if (loop_id != Expression::LOOP_NULL_ID) update_loop(loop_manager->get_loop_info(loop_id), actual_port, target_ports, is_entry); @@ -41,7 +41,7 @@ void InsertLoadStore::update_loops(const LinearIR::LoopManagerPtr& loop_manager, } void InsertLoadStore::update_loop(const LinearIR::LoopManager::LoopInfoPtr& loop_info, - const ExpressionPort& actual_port, const std::vector& target_ports, bool is_entry) { + const TensorDescriptor& actual_port, const std::vector& target_ports, bool is_entry) { auto& ports = is_entry ? loop_info->entry_exprs : loop_info->exit_exprs; auto port_it = std::find(ports.begin(), ports.end(), actual_port); if (port_it == ports.end()) @@ -55,12 +55,12 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr const auto& data_expr = *data_expr_it; const auto& data_node = data_expr->get_node(); const auto& output_td = data_expr->get_outputs().front(); - const auto consumer_inputs = linear_ir.get_exprs_by_input(output_td); + const auto consumer_inputs = output_td->get_consumers(); bool was_inserted = false; for (const auto& consumer_input : consumer_inputs) { - const auto& consumer_expr = consumer_input.expr; - const auto port = consumer_input.port; + const auto& consumer_expr = consumer_input.get_expr_ptr(); + const auto port = consumer_input.get_index(); const auto& consumer = consumer_expr->get_node(); const auto ma = ov::as_type_ptr(consumer); if (ma && ma->is_memory_access_input_port(port)) @@ -71,15 +71,13 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr const auto inner_loop = get_inner_loop_id(loop_ids); OPENVINO_ASSERT(inner_loop != Expression::LOOP_NULL_ID, "Loop hasn't been found!"); - const auto load_td = std::make_shared(output_td->get_tensor(), - output_td->get_subtensor(), - output_td->get_layout()); const auto load = std::make_shared(data_node->output(0), m_vector_size); - const auto load_outs = std::vector{ load_td }; - const auto param_outs = std::vector{ output_td }; - const auto load_expr = std::make_shared(load, param_outs, load_outs); + PortManager::set_port_descriptor_ptr(load->output(0), std::make_shared(output_td->get_tensor(), + output_td->get_subtensor(), + output_td->get_layout())); + const auto load_expr = linear_ir.create_expression(load, {output_td}); linear_ir.insert(std::find(data_expr_it, linear_ir.cend(), consumer_expr), load_expr); - linear_ir.replace_input(consumer_expr, port, load_td); + linear_ir.replace_input(consumer_expr, port, load_expr->get_outputs()[0]); // Copy Loop identifies load_expr->set_loop_ids(loop_ids); @@ -97,9 +95,9 @@ bool InsertLoadStore::insert_store(LinearIR& linear_ir, const LinearIR::constExp const auto& loop_manager = linear_ir.get_loop_manager(); const auto& data_expr = *data_expr_it; const auto& input_td = data_expr->get_inputs().front(); - const auto parent_output = linear_ir.get_expr_by_output(input_td); - const auto& parent_expr = parent_output.expr; - const auto port = parent_output.port; + const auto parent_output = input_td->get_source(); + const auto& parent_expr = parent_output.get_expr_ptr(); + const auto port = parent_output.get_index(); const auto& parent = parent_expr->get_node(); const auto ma = ov::as_type_ptr(parent); if (ma && ma->is_memory_access_output_port(port)) @@ -110,17 +108,15 @@ bool InsertLoadStore::insert_store(LinearIR& linear_ir, const LinearIR::constExp const auto inner_loop = get_inner_loop_id(loop_ids); OPENVINO_ASSERT(inner_loop != Expression::LOOP_NULL_ID, "Loop hasn't been found!"); - const auto store_td = std::make_shared(input_td->get_tensor(), - input_td->get_subtensor(), - input_td->get_layout()); const auto store = std::make_shared(parent->output(port), m_vector_size); - const auto store_outs = std::vector{ store_td }; - const auto param_outs = std::vector{ input_td }; - const auto store_expr = std::make_shared(store, param_outs, store_outs); + PortManager::set_port_descriptor_ptr(store->output(0), std::make_shared(input_td->get_tensor(), + input_td->get_subtensor(), + input_td->get_layout())); + const auto store_expr = linear_ir.create_expression(store, {input_td}); const auto& reverse_insertion_pos = std::find(std::reverse_iterator(data_expr_it), linear_ir.crend(), parent_expr); const auto& insertion_pos = reverse_insertion_pos.base(); linear_ir.insert(insertion_pos, store_expr); - linear_ir.replace_input(data_expr, 0, store_td); + linear_ir.replace_input(data_expr, 0, store_expr->get_outputs()[0]); // Copy Loop identifies store_expr->set_loop_ids(loop_ids); @@ -128,15 +124,15 @@ bool InsertLoadStore::insert_store(LinearIR& linear_ir, const LinearIR::constExp const auto prev_exit_point = parent_output; // The previous exit point byt one output port can have several consumers that can be potential exit points // So we should verify on the possible future exit points - const auto consumer_inputs = linear_ir.get_exprs_by_input(input_td); + const auto consumer_inputs = input_td->get_consumers(); const auto should_be_saved = std::any_of(consumer_inputs.begin(), consumer_inputs.end(), - [](const ExpressionPort& input_port) { - const auto& node = input_port.expr->get_node(); + [](const TensorDescriptor& input_port) { + const auto& node = input_port.get_expr_ptr()->get_node(); return ov::is_type(node) || ov::is_type(node); }); const auto new_exit_point = store_expr->output_port(0); - const auto new_exit_points = should_be_saved ? std::vector{prev_exit_point, new_exit_point} - : std::vector{new_exit_point}; + const auto new_exit_points = should_be_saved ? std::vector{prev_exit_point, new_exit_point} + : std::vector{new_exit_point}; update_loops(loop_manager, loop_ids, prev_exit_point, new_exit_points, false); return true; } diff --git a/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp b/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp index d9bed42e347d0f..b66e6035051ab9 100644 --- a/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp +++ b/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp @@ -41,9 +41,9 @@ void InsertTailLoop::tail_transformations(LinearIR& linear_ir, ov::is_type(op))) { for (size_t i = 0; i < op->inputs().size(); ++i) { if (auto fill = insertFill(op->input(i))) { - std::vector inputs{expr_it->get()->get_inputs()[i]}; + std::vector inputs{expr_it->get()->get_inputs()[i]}; // Note: inputs == outputs, since we want to modify vector reg inplace - auto fill_expr = std::make_shared(fill, inputs, inputs); + auto fill_expr = linear_ir.create_expression(fill, inputs, inputs); auto reg = expr_it->get()->get_reg_info().first[i]; fill_expr->set_reg_info({{reg}, {reg}}); linear_ir.insert(expr_it, fill_expr); @@ -53,13 +53,13 @@ void InsertTailLoop::tail_transformations(LinearIR& linear_ir, // FIXME: C++17 const auto& [port, desc] : memory_access->get_memory_access_input_ports() for (const auto p : memory_access->get_memory_access_input_ports()) { const auto port = p.first; - if (memory_access->is_memory_access_input_port(port) && memory_access->get_input_count(port) > 1) { + if (memory_access->get_input_count(port) > 1) { memory_access->set_input_count(tail_size, port); } } for (const auto p : memory_access->get_memory_access_output_ports()) { const auto port = p.first; - if (memory_access->is_memory_access_output_port(port) && memory_access->get_output_count(port) > 1) { + if (memory_access->get_output_count(port) > 1) { memory_access->set_output_count(tail_size, port); } } @@ -95,14 +95,14 @@ bool InsertTailLoop::run(LinearIR& linear_ir) { } }; auto is_loop_with_buffers = [&linear_ir](const std::shared_ptr& loop_end) { - auto is_buffer_input = [&linear_ir](const TensorDescriptorPtr& input) { - const auto parent_expr = linear_ir.get_expr_by_output(input).expr; + auto is_buffer_input = [&linear_ir](const TensorPtr& input) { + const auto parent_expr = input->get_source().get_expr_ptr(); return ov::is_type(parent_expr->get_node()); }; - auto is_buffer_output = [&linear_ir](const TensorDescriptorPtr& output) { - const auto& child_exprs_inputs = linear_ir.get_exprs_by_input(output); + auto is_buffer_output = [&linear_ir](const TensorPtr& output) { + const auto& child_exprs_inputs = output->get_consumers(); return std::any_of(child_exprs_inputs.begin(), child_exprs_inputs.end(), - [](const ExpressionPort& lp) {return ov::is_type(lp.expr->get_node());}); + [](const TensorDescriptor& lp) {return ov::is_type(lp.get_expr_ptr()->get_node());}); }; const auto loop_end_expr = linear_ir.get_expr_by_node(loop_end); @@ -112,8 +112,8 @@ bool InsertTailLoop::run(LinearIR& linear_ir) { OPENVINO_ASSERT(inputs.size() == (in_num + out_num + 1), std::string("The LoopEnd expression must have the count of inputs is") + std::string("equal to count of input and outputs of Loop plus one for work amount")); - const std::vector loop_ins(inputs.begin(), inputs.begin() + in_num); - const std::vector loop_outs(inputs.begin() + in_num, inputs.begin() + in_num + out_num); + const std::vector loop_ins(inputs.begin(), inputs.begin() + in_num); + const std::vector loop_outs(inputs.begin() + in_num, inputs.begin() + in_num + out_num); return std::any_of(loop_ins.begin(), loop_ins.end(), is_buffer_input) || std::any_of(loop_outs.begin(), loop_outs.end(), is_buffer_output); }; diff --git a/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp b/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp index 8a13cf2328d6c1..cf66f869fafd0f 100644 --- a/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp +++ b/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp @@ -23,17 +23,17 @@ bool LoadMoveBroadcastToBroadcastLoad::run(LinearIR& linear_ir) { // Match on MoveBroadcast because MoveBroadcast is rare node in bodies if (const auto move_broadcast = ov::as_type_ptr(op)) { const auto interm_td = (*expr_it)->get_inputs().front(); - const auto parent_expr = linear_ir.get_expr_by_output(interm_td).expr; + const auto parent_expr = interm_td->get_source().get_expr_ptr(); const auto load = ov::as_type_ptr(parent_expr->get_node()); if (!load) continue; // Cannot rewrite Broadcast + Load if load has more than 1 user // or more than one input, or if Broadcast has several inputs - const auto load_consumers_inputs = linear_ir.get_exprs_by_input(interm_td); + const auto load_consumers_inputs = interm_td->get_consumers(); size_t count = 0; for (const auto& consumer_expr_input : load_consumers_inputs) { - const auto consumer = consumer_expr_input.expr->get_node(); + const auto consumer = consumer_expr_input.get_expr_ptr()->get_node(); if (!ov::is_type(consumer)) count++; } @@ -41,15 +41,20 @@ bool LoadMoveBroadcastToBroadcastLoad::run(LinearIR& linear_ir) { if (count > 1) continue; - auto outshape = move_broadcast->get_output_partial_shape(0); - auto broadcastload = std::make_shared(load->input_value(0), outshape, load->get_offset()); - const auto in_td = std::vector{ parent_expr->get_inputs().front() }; - const auto out_td = std::vector{ (*expr_it)->get_outputs().front() }; + const auto outshape = move_broadcast->get_output_partial_shape(0); + const auto broadcastload = std::make_shared(load->input_value(0), outshape, load->get_offset()); + const auto move_out = (*expr_it)->get_outputs().front(); + const auto move_consumers = move_out->get_consumers(); + PortManager::set_port_descriptor_ptr(broadcastload->output(0), std::make_shared(move_out->get_tensor(), + move_out->get_subtensor(), + move_out->get_layout())); + const auto broadcastload_expr = linear_ir.create_expression(broadcastload, { parent_expr->get_inputs().front() }); const auto mv_expr_it = expr_it; const auto insertion_pos = std::next(expr_it); + expr_it = linear_ir.insert(insertion_pos, broadcastload_expr); linear_ir.erase(std::find(linear_ir.begin(), mv_expr_it, parent_expr)); linear_ir.erase(mv_expr_it); - expr_it = linear_ir.insert(insertion_pos, std::make_shared(broadcastload, in_td, out_td)); + linear_ir.replace_input(move_consumers, broadcastload_expr->get_outputs().front()); modified |= true; } } diff --git a/src/common/snippets/src/lowered/pass/mark_loops.cpp b/src/common/snippets/src/lowered/pass/mark_loops.cpp index 4380ec9ca41072..3c9ab0b7e9be64 100644 --- a/src/common/snippets/src/lowered/pass/mark_loops.cpp +++ b/src/common/snippets/src/lowered/pass/mark_loops.cpp @@ -42,9 +42,6 @@ bool MarkLoops::run(LinearIR& linear_ir) { auto loop_begin_pos = expr_it; auto loop_end_pos = loop_begin_pos; - const auto& outputs = expr->get_outputs(); - const auto& loop_inner_layout = outputs.front()->get_layout(); - const auto& loop_inner_subtensor = outputs.front()->get_subtensor(); const bool loop_is_outside = expr->is_outside_loop(); const bool loop_is_inside = !loop_is_outside; @@ -65,21 +62,33 @@ bool MarkLoops::run(LinearIR& linear_ir) { ov::is_type(current_node)) break; - const auto& ins = loop_end_pos->get()->get_inputs(); - current_is_inside = std::all_of(ins.begin(), ins.end(), - [&loop_inner_layout, &loop_inner_subtensor](const TensorDescriptorPtr& td) { - return td->get_layout() == loop_inner_layout && - td->get_subtensor() == loop_inner_subtensor; }); - // If the next expr isn't real customer of prev expr we should finish Loop - auto connected = [&](const TensorDescriptorPtr& td) {return linear_ir.get_expr_by_output(td).expr == prev_expr;}; - if (current_is_inside && std::none_of(ins.begin(), ins.end(), connected)) + // We finish Loop if + // - the next expr isn't real customer + // - the is conflict between the corresponding ports + bool is_connected = false; + bool is_conflicted = false; + for (size_t i = 0; i < prev_expr->get_output_count(); ++i) { + const auto& loop_td = prev_expr->get_outputs()[i]; + const auto& consumers = loop_td->get_consumers(); + const auto found = std::find_if(consumers.begin(), consumers.end(), [&loop_end_pos](const TensorDescriptor& consumer) { + return consumer.get_expr_ptr().get() == loop_end_pos->get(); + }); + if (found != consumers.end()) { + if (loop_td->is_conflicted_consumer(*found)) { + is_conflicted = true; + break; + } + is_connected = true; + } + } + if (is_conflicted || !is_connected) break; current_is_outside = current_expr->is_outside_loop(); } while (current_is_inside == loop_is_inside && current_is_outside == loop_is_outside); if (loop_is_inside) - loop_manager->mark_loop(linear_ir, loop_begin_pos, loop_end_pos, loop_depth, m_vector_size); + loop_manager->mark_loop(loop_begin_pos, loop_end_pos, loop_depth, m_vector_size); else if (loop_is_outside) loop_manager->skipped_mark(loop_begin_pos, loop_end_pos, loop_depth); diff --git a/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp b/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp index 82a73e6328d7cf..d2d9b363be3d81 100644 --- a/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp +++ b/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp @@ -32,7 +32,7 @@ bool MoveResultOutOfLoop::run(LinearIR& linear_ir) { } const auto input_td = expr->get_inputs().front(); - const auto parent_expr = linear_ir.get_expr_by_output(input_td).expr; + const auto parent_expr = input_td->get_source().get_expr_ptr(); const auto parent_loop_ids = parent_expr->get_loop_ids(); int outer_loop_id = static_cast(parent_loop_ids.size()) - 1; for (; outer_loop_id >= 0; --outer_loop_id) { diff --git a/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp b/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp index 808530982446e3..1410ed9f33545d 100644 --- a/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp +++ b/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp @@ -26,10 +26,10 @@ bool MoveScalarToConsumer::run(LinearIR& linear_ir) { const auto expr = expr_it->get(); if (ov::is_type(expr->get_node())) { const auto& output = expr->get_outputs().front(); - const auto& consumers = linear_ir.get_exprs_by_input(output); + const auto& consumers = output->get_consumers(); OPENVINO_ASSERT(consumers.size() == 1, "Scalar expression is expected to have a single consumer"); - const auto& consumer_expr = consumers.begin()->expr; + const auto& consumer_expr = consumers.begin()->get_expr_ptr(); // Move something only if consumer is not already the next one (previous since the iterator is a reverse one) auto forward_it = std::prev(expr_it.base()); if (consumer_expr != *std::next(forward_it)) { diff --git a/src/common/snippets/src/lowered/pass/propagate_layout.cpp b/src/common/snippets/src/lowered/pass/propagate_layout.cpp index 85c3facb9e7d2a..2986230ae844c5 100644 --- a/src/common/snippets/src/lowered/pass/propagate_layout.cpp +++ b/src/common/snippets/src/lowered/pass/propagate_layout.cpp @@ -16,43 +16,61 @@ namespace pass { bool PropagateLayout::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::PropagateLayout") - const auto& io_ops = linear_ir.get_IO_ops(); - auto io_ops_it = io_ops.begin(); + if (linear_ir.empty()) + return false; + for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) { - if (*expr_it == *io_ops_it) { - const auto& expr = io_ops_it->get(); - io_ops_it++; - const bool is_input = expr->get_type() == IOExpression::io_type::INPUT; - const auto& tds = is_input ? expr->get_outputs() : expr->get_inputs(); - if (tds.size() != 1) - OPENVINO_THROW("Parameter/Results should have exactly one output/input"); - const auto& target_td = tds[0]; - // If input - we should be looking downstream, if output - upstream - if (is_input) { - const auto& child_exprs_inputs = linear_ir.get_exprs_by_input(target_td); - // Note that here we consider only the first child (which is usually load), - // but often there is another child - LoopEnd - std::vector child_layout{}; - for (const auto& child_input : child_exprs_inputs) { - const auto child = child_input.expr; - const auto& n = child->get_node(); - if (is_type(n) || is_type(n)) { - // Note: this limitation could be relaxed to multiple ops, - // but all of them must have the same shape and layout - if (!child_layout.empty() && child->get_outputs().front()->get_layout() != child_layout) - OPENVINO_THROW("All children of an input expression must have the same layout"); - child_layout = child->get_outputs().front()->get_layout(); - } + const auto& expr = *expr_it; + const auto io_expr = std::dynamic_pointer_cast(expr); + if (!io_expr) + continue; + + const bool is_input = io_expr->get_type() == IOExpression::io_type::INPUT; + const auto& tds = is_input ? expr->get_outputs() : expr->get_inputs(); + if (tds.size() != 1) + OPENVINO_THROW("Parameter/Results should have exactly one output/input"); + + // If input - we should be looking downstream, if output - upstream + const auto& target_td = tds.front(); + if (is_input) { + const auto& consumer_inputs = target_td->get_consumers(); + // Note that here we consider only the first child (which is usually load), + // but often there is another child - LoopEnd + std::set> child_layouts; + for (const auto& child_input : consumer_inputs) { + const auto child = child_input.get_expr_ptr(); + const auto port = child_input.get_index(); + const auto& n = child->get_node(); + const auto ma = ov::as_type_ptr(n); + if (ma && ma->is_memory_access_input_port(port)) { + child_layouts.insert(child_input.get_layout()); } - if (!child_layout.empty()) { - auto new_td = TensorDescriptor(target_td.get()->get_tensor(), target_td.get()->get_subtensor(), - child_layout); - (*target_td) = new_td; + } + OPENVINO_ASSERT(child_layouts.size() == 1, "All children of an input expression must have the same layout"); + target_td->get_source().set_layout(*child_layouts.begin()); + } else { + const auto& consumer_inputs = target_td->get_consumers(); + // Note that here we consider only the first child (which is usually Store), + // but often there is another child - LoopEnd + TensorDescriptor result_td; + for (const auto& child_input : consumer_inputs) { + const auto child = child_input.get_expr_ptr(); + if (ov::is_type(child->get_node())) { + continue; } + if (child.get() == io_expr.get()) { + result_td = child_input; + continue; + } + OPENVINO_THROW("Result cannot have any siblings (only LoopEnd's)"); } + + const auto& td_it = target_td->find_consumer(result_td); + td_it->set_layout(target_td->get_layout()); } } -return true; + + return true; } } // namespace pass diff --git a/src/common/snippets/src/lowered/pass/reset_buffers.cpp b/src/common/snippets/src/lowered/pass/reset_buffers.cpp index 89dad68eb0ed5d..f0957e8bb7499f 100644 --- a/src/common/snippets/src/lowered/pass/reset_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/reset_buffers.cpp @@ -25,7 +25,7 @@ bool ResetBuffers::reuse_buffer_increments(const LinearIR& linear_ir, const Expr std::set resetting_buffers; std::set buffers_ids; for (size_t i = 0; i < input_count; ++i) { - const auto parent_output = linear_ir.get_expr_by_output(loop_tds[i]).expr; + const auto parent_output = loop_tds[i]->get_source().get_expr_ptr(); if (const auto buffer = ov::as_type_ptr(parent_output->get_node())) { // If Buffer is missed in set, Just save - it's first meeting if (buffers_ids.count(buffer->get_id()) == 0) { @@ -37,11 +37,11 @@ bool ResetBuffers::reuse_buffer_increments(const LinearIR& linear_ir, const Expr } } for (size_t i = 0; i < output_count; ++i) { - const auto consumer_inputs = linear_ir.get_exprs_by_input(loop_tds[input_count + i]); + const auto consumer_inputs = loop_tds[input_count + i]->get_consumers(); size_t buffer_count = 0; size_t loop_count = 0; for (const auto& consumer_input : consumer_inputs) { - const auto& child_node = consumer_input.expr->get_node(); + const auto& child_node = consumer_input.get_expr_ptr()->get_node(); if (const auto buffer = ov::as_type_ptr(child_node)) { buffer_count++; // If Buffer is missed in set, Just save - it's first meeting diff --git a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp index b491dfe1172fce..f06003dba42619 100644 --- a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp +++ b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp @@ -35,10 +35,10 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { const auto& pm = matcher->get_pattern_map(); const auto softmax = pm.at(match_softmax); const auto softmax_expr = *expr_it; - const auto input_tds = softmax_expr->get_inputs(); - const auto output_tds = softmax_expr->get_outputs(); - const auto tensor_out = output_tds.front()->get_tensor(); - const auto subtensor_in = input_tds.front()->get_subtensor(); + const auto input_td = softmax_expr->get_inputs().front(); + const auto output_td = softmax_expr->get_outputs().front(); + const auto tensor_out = output_td->get_tensor(); + const auto subtensor_in = input_td->get_subtensor(); const auto inner_work_amount = *(tensor_out.rbegin()); const auto outer_work_amount = *(tensor_out.rbegin() + 1); @@ -61,10 +61,10 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { outer_exprs.push_back(*horizon_max.first); // Markup of ReduceMax Loop - loop_manager->mark_loop(linear_ir, max.first, horizon_max.first, 1, inner_work_amount, m_vector_size, - std::vector{(*max.first)->input_port(0), - (*max.first)->input_port(1)}, - std::vector{(*max.first)->output_port(0)}); + loop_manager->mark_loop(max.first, horizon_max.first, 1, inner_work_amount, m_vector_size, + std::vector{(*max.first)->input_port(0), + (*max.first)->input_port(1)}, + std::vector{(*max.first)->output_port(0)}); const auto broadcast_horizon_max = push_node( std::make_shared(horizon_max.second, horizon_max.second->get_input_partial_shape(0))); @@ -81,12 +81,12 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { outer_exprs.push_back(*horizon_sum.first); // Markup of ReduceMax Loop - loop_manager->mark_loop(linear_ir, sub.first, horizon_sum.first, 1, inner_work_amount, m_vector_size, - std::vector{(*sub.first)->input_port(0), - (*sub.first)->input_port(1), - (*sum.first)->input_port(1)}, - std::vector{(*exp.first)->output_port(0), - (*sum.first)->output_port(0)}); + loop_manager->mark_loop(sub.first, horizon_sum.first, 1, inner_work_amount, m_vector_size, + std::vector{(*sub.first)->input_port(0), + (*sub.first)->input_port(1), + (*sum.first)->input_port(1)}, + std::vector{(*exp.first)->output_port(0), + (*sum.first)->output_port(0)}); // Divide is expensive operation, so we decompose it into 1 / x * y, where 1 / x is executed outside loop const auto pow = push_node(std::make_shared(horizon_sum.second, -1.f)); @@ -98,15 +98,15 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { const auto mul = push_node(std::make_shared(exp.second, broadcast_pow.second)); // Transfer original TensorDescriptors - linear_ir.replace_input(*max.first, 0, input_tds.front()); - linear_ir.replace_input(*sub.first, 0, input_tds.front()); - linear_ir.replace_output(*mul.first, 0, output_tds.front()); + linear_ir.replace_input(*max.first, 0, input_td); + linear_ir.replace_input(*sub.first, 0, input_td); + linear_ir.replace_input(output_td->get_consumers(), (*mul.first)->get_outputs().front()); // Markup of Mul Loop - loop_manager->mark_loop(linear_ir, mul.first, expr_it, 1, inner_work_amount, m_vector_size, - std::vector{(*mul.first)->input_port(0), - (*mul.first)->input_port(1)}, - std::vector{(*mul.first)->output_port(0)}); + loop_manager->mark_loop(mul.first, expr_it, 1, inner_work_amount, m_vector_size, + std::vector{(*mul.first)->input_port(0), + (*mul.first)->input_port(1)}, + std::vector{(*mul.first)->output_port(0)}); // Markup inner loop for outside expression with null loop id for (const auto& expr : outer_exprs) { @@ -114,10 +114,10 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { } // Outer Loop - loop_manager->mark_loop(linear_ir, vector_buffer_max.first, expr_it, 0, outer_work_amount, 1, - std::vector{(*max.first)->input_port(0), - (*sub.first)->input_port(0)}, - std::vector{(*mul.first)->output_port(0)}); + loop_manager->mark_loop(vector_buffer_max.first, expr_it, 0, outer_work_amount, 1, + std::vector{(*max.first)->input_port(0), + (*sub.first)->input_port(0)}, + std::vector{(*mul.first)->output_port(0)}); /* =========================================== */ diff --git a/src/common/snippets/src/lowered/tensor.cpp b/src/common/snippets/src/lowered/tensor.cpp new file mode 100644 index 00000000000000..3603fd3a1c337e --- /dev/null +++ b/src/common/snippets/src/lowered/tensor.cpp @@ -0,0 +1,130 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/lowered/tensor.hpp" + +#include +#include "snippets/utils.hpp" + + +namespace ngraph { +namespace snippets { +namespace lowered { + +TensorDescriptor::TensorDescriptor(const std::weak_ptr& expr, Type type, size_t port, + const std::vector& tensor, const std::vector& layout, const std::vector& subtensor) + : m_expr(expr), m_type(type), m_port_index(port), m_port_desc(std::make_shared(tensor, subtensor, layout)) {} + +TensorDescriptor::TensorDescriptor(const std::weak_ptr& expr, Type type, size_t port, const PortDescriptorPtr& port_desc) + : m_expr(expr), m_type(type), m_port_index(port) { + PortDescriptorPtr local_port_desc = port_desc; + if (!local_port_desc) { + if (type == Type::Input) { + local_port_desc = PortManager::get_port_descriptor_ptr(expr.lock()->get_node()->input(port)); + } else if (type == Type::Output) { + local_port_desc = PortManager::get_port_descriptor_ptr(expr.lock()->get_node()->output(port)); + } else { + OPENVINO_THROW("TensorDescriptor supports only Input and Output type!"); + } + } + + m_port_desc = local_port_desc; +} + +std::shared_ptr TensorDescriptor::get_expr_ptr() const { + auto shared = m_expr.lock(); + OPENVINO_ASSERT(shared != nullptr, "Failed attempt to get shared pointer of source expression: nullptr"); + return shared; +} + +Tensor::Tensor(const TensorDescriptor& source_descriptor, const std::vector& consumer_descriptors) + : m_source_port(source_descriptor), m_consumer_ports(consumer_descriptors) {} + +std::vector::const_iterator Tensor::find_consumer(const TensorDescriptor& consumer) const { + // Note: Find by shared ptr and index port is enough since these parameters must be unique + return std::find_if(m_consumer_ports.begin(), m_consumer_ports.end(), + [&consumer](const TensorDescriptor& td) { + return consumer.get_expr_ptr().get() == td.get_expr_ptr().get() && consumer.get_index() == td.get_index(); + }); +} + +std::vector::iterator Tensor::find_consumer(const TensorDescriptor& consumer) { + // Note: Find by shared ptr and index port is enough since these parameters must be unique + return std::find_if(m_consumer_ports.begin(), m_consumer_ports.end(), + [&consumer](const TensorDescriptor& td) { + return consumer.get_expr_ptr().get() == td.get_expr_ptr().get() && consumer.get_index() == td.get_index(); + }); +} + +bool Tensor::found_consumer(const TensorDescriptor& consumer) const { + return find_consumer(consumer) != m_consumer_ports.end(); +} + +void Tensor::add_consumer(const TensorDescriptor& consumer) { + OPENVINO_ASSERT(!found_consumer(consumer), "Consumer has been already added to Tensor!"); + m_consumer_ports.push_back(consumer); +} + +void Tensor::remove_consumer(const TensorDescriptor& consumer) { + const auto& found = find_consumer(consumer); + OPENVINO_ASSERT(found != m_consumer_ports.end(), "Consumer is missed in Tensor!"); + m_consumer_ports.erase(found); +} + +std::vector Tensor::get_conflicted_consumers() const { + std::vector conflicted_consumers; + for (const auto& consumer : m_consumer_ports) { + if (is_conflicted_consumer(consumer)) { + conflicted_consumers.push_back(consumer); + } + } + return conflicted_consumers; +} + +bool Tensor::is_conflicted_consumer(const TensorDescriptor& consumer) const { + return get_tensor() != consumer.get_tensor() || + get_layout() != consumer.get_layout() || + get_subtensor() != consumer.get_subtensor(); +} + +bool operator==(const TensorDescriptor& lhs, const TensorDescriptor& rhs) { + if (&rhs == &lhs) + return true; + return lhs.m_type == rhs.m_type && + lhs.m_expr.lock().get() == rhs.m_expr.lock().get() && + lhs.m_port_index == rhs.m_port_index && + lhs.m_port_desc == rhs.m_port_desc; +} +bool operator!=(const TensorDescriptor& lhs, const TensorDescriptor& rhs) { + return !(lhs == rhs); +} +bool operator<(const TensorDescriptor& lhs, const TensorDescriptor& rhs) { + OPENVINO_ASSERT(lhs.get_type() == rhs.get_type(), "TensorDescriptors must be of the same type for comparison!"); + return lhs.get_index() < rhs.get_index() && + lhs.get_expr_ptr().get() < rhs.get_expr_ptr().get() && + lhs.get_tensor() < rhs.get_tensor() && + lhs.get_layout() < rhs.get_layout() && + lhs.get_subtensor() < rhs.get_subtensor(); +} + +std::ostream& operator<<(std::ostream& ss, const TensorDescriptor& td) { + auto print_vector = [&ss](const std::vector& data){ + ss << "["; + for (auto i : data) + ss << i << ","; + ss << (data.empty() ? "]" : "\b]"); + }; + ss << "{Tensor: "; + print_vector(td.get_tensor()); + ss << " Subtensor: "; + print_vector(td.get_subtensor()); + ss << " Layout: "; + print_vector(td.get_layout()); + ss << "}"; + return ss; +} + +}// namespace lowered +}// namespace snippets +}// namespace ngraph diff --git a/src/common/snippets/src/op/brgemm.cpp b/src/common/snippets/src/op/brgemm.cpp index 4c9c2c497fb9a0..4d20bd0ab238f0 100644 --- a/src/common/snippets/src/op/brgemm.cpp +++ b/src/common/snippets/src/op/brgemm.cpp @@ -27,7 +27,7 @@ void Brgemm::validate_and_infer_types() { NODE_VALIDATION_CHECK(this, get_input_partial_shape(0).is_static() && get_input_partial_shape(1).is_static(), "Brgemm currently supports only static shapes."); - const auto planar_input_shapes = get_planar_input_shapes(input_values()); + const auto planar_input_shapes = get_planar_input_shapes(inputs()); auto output_shape = get_output_partial_shape(planar_input_shapes); set_output_type(0, get_output_type(), get_planar_output_shape(output_shape)); } @@ -56,18 +56,22 @@ ov::element::Type Brgemm::get_output_type() const { } } -std::vector Brgemm::get_planar_input_shapes(const std::vector>& inputs) const { +std::vector Brgemm::get_planar_input_shapes(const std::vector>& inputs) const { OPENVINO_ASSERT(inputs.size() == 2, "Brgemm::get_planar_input_shapes() expects 2 inputs"); return { utils::get_port_planar_shape(inputs[0]), utils::get_port_planar_shape(inputs[1]) }; } ov::PartialShape Brgemm::get_planar_output_shape(const ov::PartialShape& output_shape) const { // This method can be safely called from validate_and_infer_types() before output creation - const auto& rt_info = get_rt_info(); - auto it = rt_info.find(TensorDescriptorPtrVectorAttribute::get_type_info_static()); - if (it != rt_info.end()) { - const auto& td = it->second.as().m_value[0]; - return utils::get_reordered_planar_shape(output_shape, td->get_layout()); + const auto& key = PortDescriptorVectorAttribute::get_type_info_static(); + auto& rt_info = get_rt_info(); + const auto& found = rt_info.find(key); + if (found != rt_info.end()) { + const auto& out_descs = found->second.as().outputs; + if (out_descs.size() != get_output_size()) + OPENVINO_THROW("Get output port descriptor is failed: incorrect count"); + const auto& port_desc = out_descs[0]; + return utils::get_reordered_planar_shape(output_shape, port_desc->get_layout()); } return output_shape; } diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index da026c03a57e1b..e1feda5b13cf3d 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -17,7 +17,7 @@ #include "snippets/pass/matmul_to_brgemm.hpp" #include "snippets/pass/fuse_transpose_brgemm.hpp" #include "snippets/utils.hpp" -#include "snippets/tensor_descriptor.hpp" +#include "snippets/port_descriptor.hpp" #include "transformations/common_optimizations/nop_elimination.hpp" #include "transformations/utils/utils.hpp" @@ -64,8 +64,6 @@ void snippets::op::Subgraph::init_config() { config.m_has_domain_sensitive_ops = config.m_has_domain_sensitive_ops || is_domain_sensitive_op(op); } - // Domain sensitive ops are decomposed with explicit Loops. So, we should explicitly insert Loops in Subgraph if it contains these ops - config.m_explicit_loop_insertion = config.m_has_domain_sensitive_ops; } auto snippets::op::Subgraph::get_estimated_buffer_count(const ov::NodeVector& ops) -> size_t { @@ -529,7 +527,6 @@ snippets::Schedule snippets::op::Subgraph::generate( lowering_config.m_need_fill_tail_register = config.m_has_domain_sensitive_ops; lowering_config.m_loop_depth = tileRank; lowering_config.m_master_shape = master_shape; - lowering_config.m_explicit_loop_insertion = config.m_explicit_loop_insertion; const auto& lowering_result = m_generator->generate(body_ptr(), lowering_config, compile_params); ngraph::snippets::code ptr = lowering_result.binary_code; m_buffer_scratchpad = lowering_result.buffer_scratchpad_size; diff --git a/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp b/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp index 3f6d2a99d5b2a6..d1be3ee57b0b13 100644 --- a/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp +++ b/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp @@ -17,46 +17,45 @@ namespace ngraph { namespace snippets { namespace pass { + const std::set> FuseTransposeBrgemm::supported_cases = {{0, 2, 1, 3}}; + +bool FuseTransposeBrgemm::is_supported_transpose(const Output& transpose_port) { + const auto transpose_node = transpose_port.get_node_shared_ptr(); + // it's safe to do so because of the patterns we used. alternatively we can do it through pattern_values_map + const auto& constant = as_type_ptr(transpose_node->get_input_node_shared_ptr(1)); + // if Transpose in and out layout is not empty => something was already fused on this port + if (!utils::get_node_output_layout(transpose_node).empty() || + !utils::get_node_output_layout(transpose_node->get_input_node_shared_ptr(0)).empty()) + return false; + const auto& transpose_order = constant->cast_vector(); + // todo: this limitation is due to the fact that offsets are calculated in Kernel, and the only way + // to calc them non-default way is to set Parameter rt_info field. This limitation can be removed if + // the rt_info is properly propagated to the corresponding parameter + return is_type(transpose_node->get_input_node_shared_ptr(0)) && + supported_cases.count(transpose_order) != 0; +} + FuseTransposeBrgemm::FuseTransposeBrgemm() { MATCHER_SCOPE(FuseTransposeBrgemm); - auto transpose_is_supported = [](const Output& transpose_port) { - const auto transpose_node = transpose_port.get_node_shared_ptr(); - // it's safe to do so because of the patterns we used. alternatively we can do it through pattern_values_map - const auto& constant = as_type_ptr(transpose_node->get_input_node_shared_ptr(1)); - // if Transpose in and out layout is not empty => something was already fused on this port - if (!utils::get_node_output_layout(transpose_node).empty() || - !utils::get_node_output_layout(transpose_node->get_input_node_shared_ptr(0)).empty()) - return false; - const auto& transpose_order = constant->cast_vector(); - // todo: this limitation is due to the fact that offsets are calculated in Kernel, and the only way - // to calc them non-default way is to set Parameter rt_info field. This limitation can be removed if - // the rt_info is properly propagated to the corresponding parameter - if (!is_type(transpose_node->get_input_node_shared_ptr(0)) || - supported_cases.count(transpose_order) == 0) - return false; - return true; - }; auto constant = pattern::wrap_type(); - auto transpose = pattern::wrap_type({pattern::any_input(), constant}, transpose_is_supported); + auto transpose = pattern::wrap_type({pattern::any_input(), constant}, is_supported_transpose); auto transpose_matcher = std::make_shared(transpose); - auto brgemm_any = pattern::wrap_type({pattern::any_input(), pattern::any_input()}); + // Pattern 0: Transpose on 0-th input of MatMul auto brgemm_in0 = pattern::wrap_type({transpose, pattern::any_input()}); + + // Pattern 1: Transpose on 1-st input of MatMul auto brgemm_in1 = pattern::wrap_type({pattern::any_input(), transpose}); - auto brgemm_out0 = pattern::wrap_type({brgemm_any, constant}); - auto brgemm_or_transpose = std::make_shared(OutputVector{brgemm_in0, brgemm_in1, brgemm_out0}); + + // Pattern 2: Transpose on output of MatMul + auto brgemm_out = pattern::wrap_type({pattern::any_input(), pattern::any_input()}); + auto transpose2 = pattern::wrap_type({brgemm_out, constant}); + + auto brgemm_or_transpose = std::make_shared(OutputVector{brgemm_in0, brgemm_in1, transpose2}); auto callback = [=](pattern::Matcher& m) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "FuseTransposeBrgemm") - auto set_layout_from_order = [](const std::shared_ptr& node, const ov::Output& port) { - const auto& const_order = as_type_ptr(node->get_input_node_shared_ptr(1)); - const auto& td = ngraph::snippets::get_tensor_descriptor_ptr(port); - const auto& tensor = td->get_tensor(); - const auto& subtensor = td->get_subtensor(); - std::vector layout = const_order->cast_vector(); - ngraph::snippets::set_tensor_descriptor_ptr(port, std::make_shared(tensor, subtensor, layout)); - }; auto brgemm = as_type_ptr(m.get_match_root()); // Transpose on the Brgemm's output @@ -64,26 +63,44 @@ FuseTransposeBrgemm::FuseTransposeBrgemm() { brgemm = as_type_ptr(m.get_match_root()->get_input_node_shared_ptr(0)); const auto& brgemm_out = brgemm->output(0); const auto& transpose_out = m.get_match_value(); + const auto& const_order = ov::as_type_ptr(transpose_out.get_node_shared_ptr()->get_input_node_shared_ptr(1)); + ngraph::snippets::PortManager::set_port_descriptor_ptr(brgemm_out, + std::make_shared(transpose_out.get_shape(), + std::vector{}, + const_order->cast_vector())); for (const auto& in : transpose_out.get_target_inputs()) in.replace_source_output(brgemm->output(0)); - set_layout_from_order(as_type_ptr(transpose_out.get_node_shared_ptr()), brgemm_out); } + for (size_t i = 0; i < brgemm->get_input_size(); i++) { - const auto& in_value = brgemm->input_value(i); + const auto& in = brgemm->input(i); + const auto& in_value = in.get_source_output(); if (transpose_matcher->match(in_value)) { const auto& transpose = as_type_ptr(in_value.get_node_shared_ptr()); - set_layout_from_order(transpose, transpose->input_value(0)); + const auto& const_order = ov::as_type_ptr(transpose->get_input_node_shared_ptr(1)); brgemm->set_argument(i, transpose->input_value(0)); + ngraph::snippets::PortManager::set_port_descriptor_ptr(in, + std::make_shared(transpose->get_input_shape(0), + std::vector{}, + const_order->cast_vector())); + // At the moment we support fused Transpose only after Parameter -> we can update port descriptor for Paramarer as well. + // Note: It's needed for BrgemmCPU + ngraph::snippets::PortManager::set_port_descriptor_ptr(transpose->input_value(0), + std::make_shared(transpose->get_input_shape(0), + std::vector{}, + const_order->cast_vector())); } } + // need to run validate_and_infer_types manually: either input shapes were updated or // output Layout was updated (out shape will be updated in validate_and_infer_types()) brgemm->validate_and_infer_types(); return true; }; + register_matcher(std::make_shared(brgemm_or_transpose, matcher_name), callback); } } // namespace pass } // namespace snippets -} // namespace ngraph \ No newline at end of file +} // namespace ngraph diff --git a/src/common/snippets/src/pass/matmul_to_brgemm.cpp b/src/common/snippets/src/pass/matmul_to_brgemm.cpp index 42b3775e2536bd..bd93245cd368fa 100644 --- a/src/common/snippets/src/pass/matmul_to_brgemm.cpp +++ b/src/common/snippets/src/pass/matmul_to_brgemm.cpp @@ -9,19 +9,31 @@ #include "snippets/snippets_isa.hpp" #include "snippets/utils.hpp" -#include "ngraph/opsets/opset1.hpp" #include "ngraph/rt_info.hpp" -#include +#include #include "ngraph/pattern/op/wrap_type.hpp" namespace ngraph { namespace snippets { namespace pass { +void MatMulToBrgemm::init_ports(const std::shared_ptr& brgemm) const { + auto get_subtensor = [](const ov::Shape& shape) { + return std::vector{shape[shape.size() - 2], shape[shape.size() - 1]}; + }; + for (const auto& input : brgemm->inputs()) { + const auto tensor = input.get_shape(); + const auto subtensor = get_subtensor(tensor); + PortManager::set_port_descriptor_ptr(input, std::make_shared(tensor, subtensor)); + } + const auto tensor = brgemm->get_output_shape(0); + const auto subtensor = get_subtensor(tensor); + PortManager::set_port_descriptor_ptr(brgemm->output(0), std::make_shared(tensor, subtensor)); +} + MatMulToBrgemm::MatMulToBrgemm() { MATCHER_SCOPE(MatMulToBrgemm); - auto matmul_pattern = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), - ngraph::pattern::any_input()}); + auto matmul_pattern = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), ngraph::pattern::any_input()}); auto callback = [=](ngraph::pattern::Matcher& m) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "ov::intel_cpu::pass::MatMulToBrgemm") @@ -39,9 +51,7 @@ MatMulToBrgemm::MatMulToBrgemm() { brgemm->set_friendly_name(matmul->get_friendly_name()); ngraph::copy_runtime_info(matmul, nodes); ngraph::replace_node(matmul, nodes.back()); - const std::vector tensor = brgemm->get_output_shape(0); - const std::vector subtensor = {tensor[tensor.size() - 2], tensor[tensor.size() - 1]}; - ngraph::snippets::set_tensor_descriptor_ptr(brgemm->output(0), std::make_shared(tensor, subtensor)); + init_ports(brgemm); // TODO: At the moment Brgemm is executed outside Loop. When Blocking is supported, remove it utils::set_outside_loop_value(brgemm, true); return true; diff --git a/src/common/snippets/src/pass/transpose_decomposition.cpp b/src/common/snippets/src/pass/transpose_decomposition.cpp index 08a083558c9760..0083d33b00f4e9 100644 --- a/src/common/snippets/src/pass/transpose_decomposition.cpp +++ b/src/common/snippets/src/pass/transpose_decomposition.cpp @@ -5,17 +5,23 @@ #include #include #include -#include +#include #include #include #include -const std::set> ngraph::snippets::pass::TransposeDecomposition::supported_cases = {{0, 2, 3, 1}}; -ngraph::snippets::pass::TransposeDecomposition::TransposeDecomposition() { + +namespace ngraph { +namespace snippets { +namespace pass { + +const std::set> TransposeDecomposition::supported_cases = {{0, 2, 3, 1}}; + +TransposeDecomposition::TransposeDecomposition() { MATCHER_SCOPE(TransposeDecomposition); - // todo: we need a special transformation that detects and propagates data access pattern to Parameters and Results - // this is needed to communicate access pattern to the plugin node and op::Kernel - // This is the reason we match only to Parameter, this limitation could be relaxed if we propagate access pattern - // to the appropriate parameter + // Todo: we need a special transformation that detects and propagates data access pattern to Parameters and Results + // this is needed to communicate access pattern to the plugin node and op::Kernel + // This is the reason we match only to Parameter, this limitation could be relaxed if we propagate access pattern + // to the appropriate parameter auto match_data = ngraph::pattern::wrap_type(); auto match_order = ngraph::pattern::wrap_type(); auto match_transpose = ngraph::pattern::wrap_type({match_data, match_order}); @@ -23,8 +29,8 @@ ngraph::snippets::pass::TransposeDecomposition::TransposeDecomposition() { ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::TransposeDecomposition") auto& pattern_to_output = m.get_pattern_value_map(); - const auto transpose = ov::as_type_ptr( - pattern_to_output.at(match_transpose).get_node_shared_ptr()); + const auto& data_input = pattern_to_output.at(match_data); + const auto transpose = ov::as_type_ptr(pattern_to_output.at(match_transpose).get_node_shared_ptr()); const auto order = ov::as_type_ptr(pattern_to_output.at(match_order).get_node_shared_ptr()); if (transformation_callback(transpose) || transpose->is_dynamic()) @@ -34,20 +40,19 @@ ngraph::snippets::pass::TransposeDecomposition::TransposeDecomposition() { if (supported_cases.count(order_value) == 0) return false; - auto data_input = pattern_to_output.at(match_data); - const std::vector& tensor_shape {data_input.get_shape()}; // number of elements that can be processed on every iteration. For 0,1,2,3 -> 0,2,3,1 we can guarantee only scalar access - const std::vector subtensor_shape {1}; + const auto subtensor_shape = std::vector{1}; const auto& layout = order->cast_vector(); + // todo: LoadReshape used here is essentially Load + an easy way to maintain correct shape propagation // fix this in future and develop a more consistent shape propagation approach. auto load = std::make_shared(data_input, subtensor_shape[0], 0, layout); auto store = std::make_shared(load, subtensor_shape[0]); - ngraph::snippets::set_tensor_descriptor_ptr(load->output(0), std::make_shared(tensor_shape, subtensor_shape, layout)); - ngraph::snippets::set_tensor_descriptor_ptr(store->output(0), - std::make_shared(store->get_output_shape(0), - std::vector{}, - std::vector{})); + + PortManager::set_port_descriptor_ptr(load->input(0), std::make_shared(load->get_input_shape(0), subtensor_shape, layout)); + PortManager::set_port_descriptor_ptr(load->output(0), std::make_shared(load->get_output_shape(0), subtensor_shape)); + PortManager::set_port_descriptor_ptr(store->input(0), std::make_shared(store->get_input_shape(0), subtensor_shape)); + PortManager::set_port_descriptor_ptr(store->output(0), std::make_shared(store->get_output_shape(0), subtensor_shape)); for (auto& input : transpose->output(0).get_target_inputs()) { input.replace_source_output(store->output(0)); @@ -59,3 +64,7 @@ ngraph::snippets::pass::TransposeDecomposition::TransposeDecomposition() { auto m = std::make_shared(match_transpose, matcher_name); register_matcher(m, callback); } + +} // namespace pass +} // namespace snippets +} // namespace ngraph diff --git a/src/common/snippets/src/port_descriptor.cpp b/src/common/snippets/src/port_descriptor.cpp new file mode 100644 index 00000000000000..a8398dceb9c657 --- /dev/null +++ b/src/common/snippets/src/port_descriptor.cpp @@ -0,0 +1,156 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/port_descriptor.hpp" +#include "ngraph/except.hpp" +#include + +namespace ngraph { +namespace snippets { + +PortDescriptor::PortDescriptor(const ov::Input& in, std::vector subtensor_shape, std::vector layout) + : PortDescriptor(ov::Input(in.get_node(), in.get_index()), std::move(subtensor_shape), std::move(layout)) {} +PortDescriptor::PortDescriptor(const ov::Input& in, std::vector subtensor_shape, std::vector layout) + : PortDescriptor(in.get_shape(), std::move(subtensor_shape), std::move(layout)) {} + +PortDescriptor::PortDescriptor(const ov::Output& out, std::vector subtensor_shape, std::vector layout) + : PortDescriptor(ov::Output(out.get_node(), out.get_index()), std::move(subtensor_shape), std::move(layout)) {} +PortDescriptor::PortDescriptor(const ov::Output& out, std::vector subtensor_shape, std::vector layout) + : PortDescriptor(out.get_shape(), std::move(subtensor_shape), std::move(layout)) {} + +PortDescriptor::PortDescriptor(std::vector shape, std::vector subtensor_shape, std::vector layout) + : m_tensor_shape(std::move(shape)), m_layout(std::move(layout)), m_subtensor_shape(std::move(subtensor_shape)) { + validate_arguments(); +} + +void PortDescriptor::validate_arguments() { + if (!m_tensor_shape.empty() && m_layout.empty()) { + m_layout.resize(m_tensor_shape.size()); + // NCHW layout by default + std::iota(m_layout.begin(), m_layout.end(), 0); + } else if (m_layout.size() != m_tensor_shape.size()) { + OPENVINO_THROW("Snippets tensor descriptor: Layout size must be equal to the shape size"); + } +} + +PortDescriptor PortDescriptor::deserialize(const std::string& serialized_info) { + std::stringstream sinfo(serialized_info); + auto read_values = [](std::stringstream& ss){ + size_t num = 0; + ss >> num; + std::vector res; + for (size_t i = 0; i < num; i++) { + size_t val; + ss >> val; + res.push_back(val); + } + return res; + }; + const auto& tensor_shape = read_values(sinfo); + const auto& subtensor_shape = read_values(sinfo); + const auto& layout = read_values(sinfo); + return {tensor_shape, subtensor_shape, layout}; +} + +std::string PortDescriptor::serialize() const { + std::stringstream ss; + ss << m_tensor_shape.size() << " "; + for (auto val : m_tensor_shape) + ss << val << " "; + ss << m_subtensor_shape.size() << " "; + for (auto val : m_subtensor_shape) + ss << val << " "; + ss << m_layout.size() << " "; + for (auto val : m_layout) + ss << val << " "; + return ss.str(); +} +bool operator==(const PortDescriptor& lhs, const PortDescriptor& rhs) { + return lhs.m_tensor_shape == rhs.m_tensor_shape && + lhs.m_layout == rhs.m_layout && + lhs.m_subtensor_shape == rhs.m_subtensor_shape; +} + +void PortManager::init_default(std::vector& in_descs, std::vector& out_descs, const std::shared_ptr& node) { + in_descs.resize(node->get_input_size()); + out_descs.resize(node->get_output_size()); + for (size_t i = 0; i < node->get_input_size(); ++i) { + in_descs[i] = std::make_shared(node->input(i)); + } + for (size_t i = 0; i < node->get_output_size(); ++i) { + out_descs[i] = std::make_shared(node->output(i)); + } +} + +void PortManager::set_port_descriptor_ptr(const ov::Input& in, const PortDescriptorPtr& desc) { + const auto& node = in.get_node()->shared_from_this(); + auto& rt_info = node->get_rt_info(); + const auto& key = PortDescriptorVectorAttribute::get_type_info_static(); + const auto& found = rt_info.find(key); + if (found == rt_info.end()) { + std::vector in_descs, out_descs; + init_default(in_descs, out_descs, node); + in_descs[in.get_index()] = desc; + rt_info[key] = PortDescriptorVectorAttribute(in_descs, out_descs); + } else { + auto& in_descs = found->second.as().inputs; + if (in_descs.size() != node->get_input_size()) + OPENVINO_THROW("Set input port descriptor is failed: incorrect count"); + in_descs[in.get_index()] = desc; + } +} + +void PortManager::set_port_descriptor_ptr(const ov::Output& out, const PortDescriptorPtr& desc) { + const auto& node = out.get_node_shared_ptr(); + auto& rt_info = node->get_rt_info(); + const auto& key = PortDescriptorVectorAttribute::get_type_info_static(); + const auto& found = rt_info.find(key); + if (found == rt_info.end()) { + std::vector in_descs, out_descs; + init_default(in_descs, out_descs, node); + out_descs[out.get_index()] = desc; + rt_info[key] = PortDescriptorVectorAttribute(in_descs, out_descs); + } else { + auto& out_descs = found->second.as().outputs; + if (out_descs.size() != node->get_output_size()) + OPENVINO_THROW("Set output port descriptor is failed: incorrect count"); + out_descs[out.get_index()] = desc; + } +} + +PortDescriptorPtr PortManager::get_port_descriptor_ptr(const ov::Input& in) { + return get_port_descriptor_ptr(ov::Input(in.get_node(), in.get_index())); +} +PortDescriptorPtr PortManager::get_port_descriptor_ptr(const ov::Input& in) { + const auto& node = in.get_node(); + auto& rt_info = node->get_rt_info(); + const auto& key = PortDescriptorVectorAttribute::get_type_info_static(); + const auto& found = rt_info.find(key); + if (found == rt_info.end()) { + return std::make_shared(in); + } + const auto& in_descs = found->second.as().inputs; + if (in_descs.size() != node->get_input_size()) + OPENVINO_THROW("Get input port descriptor is failed: incorrect count"); + return in_descs[in.get_index()]; +} + +PortDescriptorPtr PortManager::get_port_descriptor_ptr(const Output& out) { + return get_port_descriptor_ptr(ov::Output(out.get_node(), out.get_index())); +} +PortDescriptorPtr PortManager::get_port_descriptor_ptr(const Output& out) { + const auto& node = out.get_node(); + const auto& rt_info = node->get_rt_info(); + const auto& key = PortDescriptorVectorAttribute::get_type_info_static(); + const auto& found = rt_info.find(key); + if (found == rt_info.end()) { + return std::make_shared(out); + } + const auto& out_descs = found->second.as().outputs; + if (out_descs.size() != node->get_output_size()) + OPENVINO_THROW("Get output port descriptor is failed: incorrect count"); + return out_descs[out.get_index()]; +} +} // namespace snippets +} // namespace ngraph diff --git a/src/common/snippets/src/tensor_descriptor.cpp b/src/common/snippets/src/tensor_descriptor.cpp deleted file mode 100644 index a3182686c80c2a..00000000000000 --- a/src/common/snippets/src/tensor_descriptor.cpp +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright (C) 2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "snippets/tensor_descriptor.hpp" -#include "ngraph/except.hpp" -#include - -namespace ngraph { -namespace snippets { -TensorDescriptor::TensorDescriptor(const Output& out, - std::vector subtensor_shape, - std::vector layout) - : TensorDescriptor(ov::Output(out.get_node(), out.get_index()), - std::move(subtensor_shape), - std::move(layout)) { -} - -TensorDescriptor::TensorDescriptor(const Output& out, - std::vector subtensor_shape, - std::vector layout) - : m_layout(std::move(layout)), m_subtensor_shape(std::move(subtensor_shape)) { - const auto& pshape = out.get_partial_shape(); - // Note: this limitation could be relaxed if necessary - if (pshape.is_dynamic()) - OPENVINO_THROW("Snippets tensor descriptor can be created only for static shapes"); - m_tensor_shape = pshape.get_shape(); - validate_arguments(); -} - -TensorDescriptor::TensorDescriptor(std::vector tensor_shape, - std::vector subtensor_shape, - std::vector layout) : m_tensor_shape(std::move(tensor_shape)), - m_layout(std::move(layout)), m_subtensor_shape(std::move(subtensor_shape)) { - validate_arguments(); -} - -void TensorDescriptor::validate_arguments() { - if (!m_tensor_shape.empty() && m_layout.empty()) { - m_layout.resize(m_tensor_shape.size()); - // NCHW layout by default - std::iota(m_layout.begin(), m_layout.end(), 0); - } else if (m_layout.size() != m_tensor_shape.size()) { - OPENVINO_THROW("Snippets tensor descriptor: Layout size must be equal to the shape size"); - } -} - - -TensorDescriptor TensorDescriptor::deserialize(const std::string& serialized_info) { - std::stringstream sinfo(serialized_info); - auto read_values = [](std::stringstream& ss){ - size_t num = 0; - ss >> num; - std::vector res; - for (size_t i = 0; i < num; i++) { - size_t val; - ss >> val; - res.push_back(val); - } - return res; - }; - const auto& tensor_shape = read_values(sinfo); - const auto& subtensor_shape = read_values(sinfo); - const auto& layout = read_values(sinfo); - return {tensor_shape, subtensor_shape, layout}; -} - -std::string TensorDescriptor::serialize() const { - std::stringstream ss; - ss << m_tensor_shape.size() << " "; - for (auto val : m_tensor_shape) - ss << val << " "; - ss << m_subtensor_shape.size() << " "; - for (auto val : m_subtensor_shape) - ss << val << " "; - ss << m_layout.size() << " "; - for (auto val : m_layout) - ss << val << " "; - return ss.str(); -} -bool operator==(const TensorDescriptor& lhs, const TensorDescriptor& rhs) { - return lhs.m_tensor_shape == rhs.m_tensor_shape && - lhs.m_layout == rhs.m_layout && - lhs.m_subtensor_shape == rhs.m_subtensor_shape; -} - -std::ostream& operator << (std::ostream& ss, const TensorDescriptor& td) { - auto print_vector = [&ss](const std::vector& data){ - ss << "["; - for (auto i : data) - ss << i << ","; - ss << (data.empty() ? "]" : "\b]"); - }; - ss << "{Tensor: "; - print_vector(td.get_tensor()); - ss << " Subtensor: "; - print_vector(td.get_subtensor()); - ss << " Layout: "; - print_vector(td.get_layout()); - ss << "}"; - return ss; -} - -void set_tensor_descriptor_ptr(const Output& out, const TensorDescriptorPtr& desc) { - const auto& node = out.get_node_shared_ptr(); - auto& rt_info = node->get_rt_info(); - const auto& key = TensorDescriptorPtrVectorAttribute::get_type_info_static(); - const auto& found = rt_info.find(key); - if (found == rt_info.end()) { - std::vector value(node->get_output_size()); - value[out.get_index()] = desc; - rt_info[key] = TensorDescriptorPtrVectorAttribute(value); - } else { - auto& value = found->second.as().m_value; - if (value.size() != node->get_output_size()) - OPENVINO_THROW("Either all or none of Tensor descriptors should be stored in rt_info (set)"); - value[out.get_index()] = desc; - } -} -TensorDescriptorPtr get_tensor_descriptor_ptr(const Output& out) { - return get_tensor_descriptor_ptr(ov::Output(out.get_node(), out.get_index())); -} -TensorDescriptorPtr get_tensor_descriptor_ptr(const Output& out) { - const auto& node = out.get_node_shared_ptr(); - const auto& rt_info = node->get_rt_info(); - auto it = rt_info.find(TensorDescriptorPtrVectorAttribute::get_type_info_static()); - if (it == rt_info.end()) { - return std::make_shared(out); - } - const auto& td_vector = it->second.as().m_value; - if (td_vector.size() != node->get_output_size()) - OPENVINO_THROW("Either all or none of Tensor descriptors should be stored in rt_info (get)"); - return td_vector[out.get_index()]; -} -} // namespace snippets -} // namespace ngraph diff --git a/src/common/snippets/src/utils.cpp b/src/common/snippets/src/utils.cpp index 789a5e6daeb080..75ea99bdedac0d 100644 --- a/src/common/snippets/src/utils.cpp +++ b/src/common/snippets/src/utils.cpp @@ -106,20 +106,29 @@ ov::PartialShape get_reordered_planar_shape(const ov::PartialShape& shape, const return reordered_shape; } -ov::PartialShape get_port_planar_shape(const Output& out) { - const auto& td = ngraph::snippets::get_tensor_descriptor_ptr(out); - return utils::get_reordered_planar_shape(ov::Shape{td->get_tensor()}, td->get_layout()); +ov::Shape get_reordered_shape(const ov::Shape& shape, const std::vector& layout) { + if (layout.empty()) + return shape; + ov::Shape reordered_shape(layout.size()); + const size_t rank = shape.size(); + if (layout.size() > rank) + OPENVINO_THROW("Layout rank can't be larger than tensor rank"); + // Note that it can be smaller though, for example tensor shape can be prepended with 1 for scheduling purposes + if (std::any_of(layout.begin(), layout.end(), [=](size_t x) {return x >= rank;})) + OPENVINO_THROW("Invalid layout detected: all layout indexes must be smaller than the tensor rank"); + for (size_t i = 0; i < layout.size(); i++) + reordered_shape[i] = shape[layout[i]]; + return reordered_shape; } -void set_transpose_output_layout(const ov::Output& port, const std::shared_ptr& node) { - const auto& const_order = as_type_ptr(node->get_input_node_shared_ptr(1)); - OPENVINO_ASSERT(const_order != nullptr, "Transpose order must be Constant to set layout!"); - set_output_layout(port, const_order->cast_vector()); +ov::PartialShape get_port_planar_shape(const Input& in) { + const auto& td = PortManager::get_port_descriptor_ptr(in); + return utils::get_reordered_planar_shape(ov::Shape{td->get_tensor()}, td->get_layout()); } -void set_output_layout(const ov::Output& port, const std::vector& layout) { - auto& rt_info = port.get_node_shared_ptr()->get_rt_info(); - rt_info["Layout"] = layout; +ov::PartialShape get_port_planar_shape(const Output& out) { + const auto& td = PortManager::get_port_descriptor_ptr(out); + return utils::get_reordered_planar_shape(ov::Shape{td->get_tensor()}, td->get_layout()); } bool get_outside_loop_value(const std::shared_ptr& node) { diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp index 731e5c898b7cd7..917d595bd98e79 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp @@ -15,7 +15,7 @@ #include "transformations/snippets/x64/op//brgemm_cpu.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/op/subgraph.hpp" -#include "snippets/tensor_descriptor.hpp" +#include "snippets/lowered/tensor.hpp" using namespace InferenceEngine; using ngraph::snippets::op::Subgraph; @@ -26,7 +26,7 @@ using namespace dnnl::impl::cpu::x64; using ngraph::snippets::lowered::Expression; using ngraph::snippets::lowered::IOExpression; using ngraph::snippets::lowered::ExpressionPtr; -using ngraph::snippets::TensorDescriptorPtr; +using ngraph::snippets::lowered::TensorPtr; namespace ov { namespace intel_cpu { @@ -121,7 +121,7 @@ KernelEmitter::KernelEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl: num_inputs = 0; num_outputs = 0; for (const auto& expr : io_exprs) { - TensorDescriptorPtr td {}; + TensorPtr td {}; element::Type etype; switch (expr->get_type()) { case ngraph::snippets::lowered::IOExpression::io_type::INPUT: { @@ -222,8 +222,7 @@ void KernelEmitter::init_data_pointers(size_t num_inputs, size_t num_params, siz const size_t offset_rank = jcp.master_shape.size() - 1; //const size_t tile_rank = jcp.tile_rank; std::vector> data_offsets(num_params, std::vector{}); - auto offset_calculation = [=](const std::vector& shape, - const std::vector& layout, const size_t data_size) { + auto offset_calculation = [=](const std::vector& shape, const std::vector& layout, const size_t data_size) { // Strides represent distance between consecutive elements of corresponding dimension. // If a dim size == 1, then the next dim starts immediately and the stride is 0 // case 1: @@ -724,14 +723,11 @@ BrgemmEmitter::BrgemmEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl: if (brgemm_node->is_dynamic()) IE_THROW() << "Snippets don't support code generation for dynamic Brgemm"; const auto brgemm_copy = brgemm_node->is_with_data_repacking() ? brgemm_node->get_brgemm_copy() : nullptr; - const OutputVector io_values {brgemm_node->input_value(0), - brgemm_copy ? brgemm_copy->input_value(0) : brgemm_node->input_value(1), - brgemm_node->output(0)}; + std::vector leading_dimensions; std::vector> io_layouts; - for (const auto& val : io_values) { - const auto& layout = ngraph::snippets::get_tensor_descriptor_ptr(val.get_node_shared_ptr())->get_layout(); - const auto& io_shape = val.get_shape(); + + auto init_scheduling_params = [&](const std::vector& layout, const ov::Shape& io_shape) { if (layout.empty()) { // empty value indicates a planar layout leading_dimensions.push_back(io_shape.back()); @@ -744,17 +740,23 @@ BrgemmEmitter::BrgemmEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl: // counting from the end since shape could be prepended with ones const int64_t num_last_dims = layout.end() - std::find(layout.begin(), layout.end(), layout.size() - 2) - 1; if (layout.back() != layout.size() - 1 || num_last_dims < 1) - IE_THROW() << "BrgemmEmitter detected invalid layout values: " << - "check that this shape + layout combination is schedulable"; + IE_THROW() << "BrgemmEmitter detected invalid layout values: check that this shape + layout combination is schedulable"; leading_dimensions.emplace_back( std::accumulate(io_shape.end() - num_last_dims, io_shape.end(), 1, std::multiplies())); io_layouts.push_back(layout); } + }; + + std::vector> brgemm_inputs = {brgemm_node->input(0), + brgemm_copy ? brgemm_copy->input(0) : brgemm_node->input(1)}; + for (const auto& input : brgemm_inputs) { + init_scheduling_params(ngraph::snippets::PortManager::get_port_descriptor_ptr(input)->get_layout(), input.get_shape()); } + init_scheduling_params(ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm_node->output(0))->get_layout(), brgemm_node->output(0).get_shape()); - const auto& A_shape = io_values[0].get_shape(); + const auto& A_shape = brgemm_node->get_input_shape(0); const auto& A_layout = io_layouts[0]; - const auto& C_shape = io_values[2].get_shape(); + const auto& C_shape = brgemm_node->get_output_shape(0); const auto& C_layout = io_layouts[2]; // We need find original M,N,K having layouts and ordered shapes @@ -1105,7 +1107,7 @@ BrgemmCopyBEmitter::BrgemmCopyBEmitter(dnnl::impl::cpu::x64::jit_generator* h, d if (m_with_comp) m_comp_offset = brgemm_repack->get_offset_compensations(); - const auto& layout = ngraph::snippets::get_tensor_descriptor_ptr(brgemm_repack->get_input_node_shared_ptr(0))->get_layout(); + const auto& layout = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm_repack->input(0))->get_layout(); const auto& original_shape = brgemm_repack->get_input_shape(0); auto transposed_shape = original_shape; size_t leading_dimension = *(original_shape.rbegin()); diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp index 3502586495a512..aa16bf4b99622a 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp @@ -21,7 +21,7 @@ intel_cpu::BrgemmCopyB::BrgemmCopyB(const Output& x, const element::Type s if (is_with_compensations()) { set_output_port_descriptor({0, offset_out1}, 1); } - constructor_validate_and_infer_types(); + ctor_validate_and_infer_types(); } bool intel_cpu::BrgemmCopyB::visit_attributes(AttributeVisitor& visitor) { @@ -31,14 +31,27 @@ bool intel_cpu::BrgemmCopyB::visit_attributes(AttributeVisitor& visitor) { return true; } +void intel_cpu::BrgemmCopyB::ctor_validate_and_infer_types() { + INTERNAL_OP_SCOPE(BrgemmRepack_ctor_validate_and_infer_types); + // During ctor call, BrgemmCopyB doesn't know his port descriptors. + // So we use port descs from source inputs + const auto element_type = get_input_element_type(0); + const auto pshape = ngraph::snippets::utils::get_port_planar_shape(input_value(0)); + validate(pshape, element_type); +} + void intel_cpu::BrgemmCopyB::validate_and_infer_types() { INTERNAL_OP_SCOPE(BrgemmRepack_validate_and_infer_types); const auto element_type = get_input_element_type(0); + const auto pshape = ngraph::snippets::utils::get_port_planar_shape(input(0)); + validate(pshape, element_type); +} + +void intel_cpu::BrgemmCopyB::validate(const ov::PartialShape& pshape, const ov::element::Type& element_type) { NGRAPH_CHECK(one_of(element_type, element::bf16, element::i8), - "BrgemmCopyB doesn't support element type" + element_type.get_type_name()); + "BrgemmCopyB doesn't support element type" + element_type.get_type_name()); - const auto pshape = ngraph::snippets::utils::get_port_planar_shape(input_value(0)); if (pshape.is_dynamic()) { set_output_type(0, element_type, ov::PartialShape{ov::Dimension::dynamic()}); if (is_with_compensations()) { diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.hpp index d8db828b4a3e56..73e46e60c41aa3 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.hpp @@ -43,6 +43,9 @@ class BrgemmCopyB : public ngraph::snippets::op::MemoryAccess { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; private: + void ctor_validate_and_infer_types(); + void validate(const ov::PartialShape& pshape, const ov::element::Type& element_type); + Type m_type = Type::OnlyRepacking; element::Type m_src_type = ov::element::undefined; // src element type of the corresponding BRGEMM }; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp index 011501a53947c2..bc5e8bc9256acb 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp @@ -5,7 +5,7 @@ #include "brgemm_cpu.hpp" #include "snippets/itt.hpp" #include "snippets/utils.hpp" -#include "snippets/tensor_descriptor.hpp" +#include "snippets/port_descriptor.hpp" #include "utils/general_utils.h" @@ -22,7 +22,7 @@ BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Type ty set_input_port_descriptor({0, offset_a}, 0); set_input_port_descriptor({0, offset_b}, 1); set_output_port_descriptor({0, offset_c}, 0); - constructor_validate_and_infer_types(); + ctor_validate_and_infer_types(); } BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Output& scratch, const Type type, @@ -35,25 +35,41 @@ BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Output< set_input_port_descriptor({0, offset_b}, 1); set_output_port_descriptor({0, offset_c}, 0); set_input_port_descriptor({0, offset_scratch}, 2); - constructor_validate_and_infer_types(); + ctor_validate_and_infer_types(); +} + +void BrgemmCPU::ctor_validate_and_infer_types() { + INTERNAL_OP_SCOPE(BrgemmCPU_ctor_validate_and_infer_types); + validate_inputs(); + + // During ctor call, BrgemmCPU doesn't know his port descriptors. + // So we use port descs from source inputs + const auto brgemm_copy = is_with_data_repacking() ? get_brgemm_copy() : nullptr; + const auto planar_input_shapes = + std::vector{ ngraph::snippets::utils::get_port_planar_shape(input_value(0)), + brgemm_copy ? ngraph::snippets::utils::get_port_planar_shape(brgemm_copy->input(0)) + : ngraph::snippets::utils::get_port_planar_shape(input_value(1)) }; + auto output_shape = get_output_partial_shape(planar_input_shapes); + set_output_type(0, get_output_type(), get_planar_output_shape(output_shape)); + + //Additional check for 3rd input + validate_with_scratchpad(planar_input_shapes[1].get_shape()); } void BrgemmCPU::validate_and_infer_types() { INTERNAL_OP_SCOPE(BrgemmCPU_validate_and_infer_types); - // If no leading dimensions are provided, assume dense row-major inputs-outputs - NODE_VALIDATION_CHECK(this, get_input_partial_shape(0).is_static() && get_input_partial_shape(1).is_static(), - "BrgemmCPU currently supports only static shapes."); - - OPENVINO_ASSERT(implication(one_of(m_type, Type::Floating, Type::WithDataRepacking), get_input_size() == 2), - "BrgemmCPU expects 2 inputs in cases, when input precisions are f32|f32, u8|i8 or bf16|bf16 (non-AMX system)"); - OPENVINO_ASSERT(implication(one_of(m_type, Type::WithCompensations, Type::AMX), get_input_size() == 3), - "BrgemmCPU expects 3 inputs with input precisions i8|i8 and bf16|bf16 on AMX system"); + validate_inputs(); const auto brgemm_copy = is_with_data_repacking() ? get_brgemm_copy() : nullptr; - const auto planar_input_shapes = get_planar_input_shapes({input_value(0), brgemm_copy ? brgemm_copy->input_value(0) : input_value(1)}); + const auto planar_input_shapes = get_planar_input_shapes({input(0), brgemm_copy ? brgemm_copy->input(0) : input(1)}); auto output_shape = get_output_partial_shape(planar_input_shapes); set_output_type(0, get_output_type(), get_planar_output_shape(output_shape)); + //Additional check for 3rd input + validate_with_scratchpad(planar_input_shapes[1].get_shape()); +} + +void BrgemmCPU::validate_with_scratchpad(const ov::Shape& shape_b) const { //Additional check for 3rd input if (one_of(m_type, Type::WithCompensations, Type::AMX)) { const auto shape = get_input_partial_shape(2); @@ -61,7 +77,6 @@ void BrgemmCPU::validate_and_infer_types() { const auto type = get_input_element_type(2); if (is_with_compensations()) { const auto element_type_b = get_input_element_type(0); - const auto shape_b = planar_input_shapes[1].get_shape(); const auto N = *shape_b.rbegin(); const auto N_blk = element_type_b == element::f32 ? N : element_type_b == element::bf16 ? 32 : 64; @@ -76,6 +91,16 @@ void BrgemmCPU::validate_and_infer_types() { } } +void BrgemmCPU::validate_inputs() const { + // If no leading dimensions are provided, assume dense row-major inputs-outputs + NODE_VALIDATION_CHECK(this, get_input_partial_shape(0).is_static() && get_input_partial_shape(1).is_static(), + "BrgemmCPU currently supports only static shapes."); + OPENVINO_ASSERT(implication(one_of(m_type, Type::Floating, Type::WithDataRepacking), get_input_size() == 2), + "BrgemmCPU expects 2 inputs in cases, when input precisions are f32|f32, u8|i8 or bf16|bf16 (non-AMX system)"); + OPENVINO_ASSERT(implication(one_of(m_type, Type::WithCompensations, Type::AMX), get_input_size() == 3), + "BrgemmCPU expects 3 inputs with input precisions i8|i8 and bf16|bf16 on AMX system"); +} + std::shared_ptr BrgemmCPU::clone_with_new_inputs(const OutputVector& new_args) const { INTERNAL_OP_SCOPE(BrgemmCPU_clone_with_new_inputs); check_new_args_count(this, new_args); diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp index 2081ca25c7528f..e93d7d70452fe6 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp @@ -7,6 +7,8 @@ #include "snippets/op/brgemm.hpp" #include "brgemm_copy_b.hpp" +#include "snippets/port_descriptor.hpp" + namespace ov { namespace intel_cpu { @@ -48,7 +50,11 @@ class BrgemmCPU : public ngraph::snippets::op::Brgemm { constexpr static size_t SCRATCH_BYTE_SIZE = 32 * 1024; private: - Type m_type = Type::Floating; + void ctor_validate_and_infer_types(); + void validate_with_scratchpad(const ov::Shape& shape_b) const; + void validate_inputs() const; + + Type m_type = Type::Floating; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp index 70f46d3f08f2f5..a7ba620a388af6 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp @@ -5,8 +5,9 @@ #include "snippets/itt.hpp" #include "brgemm_to_brgemm_cpu.hpp" -#include "snippets/snippets_isa.hpp" + #include "snippets/utils.hpp" +#include "snippets/op/brgemm.hpp" #include "transformations/snippets/x64/op/brgemm_copy_b.hpp" #include "transformations/snippets/x64/op/brgemm_cpu.hpp" @@ -56,25 +57,32 @@ pass::BrgemmToBrgemmCPU::BrgemmToBrgemmCPU() { const auto offset_c = brgemm->get_offset_c(); std::shared_ptr brgemm_cpu = nullptr; + std::shared_ptr brgemm_repacking = nullptr; if (element_type_a == ov::element::f32) { brgemm_cpu = std::make_shared(brgemm->input_value(0), brgemm->input_value(1), BrgemmCPU::Type::Floating, offset_a, offset_b, offset_c); } else { const auto copy_b_type = with_comp ? BrgemmCopyB::WithCompensations : BrgemmCopyB::OnlyRepacking; - const auto brgemmRepackIn1 = std::make_shared(brgemm->input_value(1), element_type_a, copy_b_type, offset_b); - const auto buffer = std::make_shared(brgemmRepackIn1->output(0)); - ngraph::snippets::utils::set_outside_loop_value(brgemmRepackIn1, true); + brgemm_repacking = std::make_shared(brgemm->input_value(1), element_type_a, copy_b_type, offset_b); + const auto buffer = std::make_shared(brgemm_repacking->output(0)); + ngraph::snippets::utils::set_outside_loop_value(brgemm_repacking, true); ngraph::snippets::utils::set_outside_loop_value(buffer, true); + // copy port desc from MatMul input 1 + const auto& brgemm_in1_desc = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm->input(1)); + ngraph::snippets::PortManager::set_port_descriptor_ptr(brgemm_repacking->input(0), + std::make_shared(brgemm_in1_desc->get_tensor(), + brgemm_in1_desc->get_subtensor(), + brgemm_in1_desc->get_layout())); if (with_amx) { const auto scratch = std::make_shared(ov::Shape{BrgemmCPU::SCRATCH_BYTE_SIZE}); brgemm_cpu = std::make_shared(brgemm->input_value(0), buffer, scratch, BrgemmCPU::Type::AMX, - offset_a, offset_b, offset_c); + offset_a, offset_b, 0, offset_c); ngraph::snippets::utils::set_outside_loop_value(scratch, true); } else if (with_comp) { - const auto scratch = std::make_shared(brgemmRepackIn1->output(1)); + const auto scratch = std::make_shared(brgemm_repacking->output(1)); brgemm_cpu = std::make_shared(brgemm->input_value(0), buffer, scratch, BrgemmCPU::Type::WithCompensations, - offset_a, offset_b, offset_c); + offset_a, offset_b, 0, offset_c); ngraph::snippets::utils::set_outside_loop_value(scratch, true); } else if (one_of(element_type_a, ov::element::u8, ov::element::bf16)) { brgemm_cpu = std::make_shared(brgemm->input_value(0), buffer, BrgemmCPU::Type::WithDataRepacking, @@ -85,11 +93,36 @@ pass::BrgemmToBrgemmCPU::BrgemmToBrgemmCPU() { } brgemm_cpu->set_friendly_name(brgemm->get_friendly_name()); - ngraph::copy_runtime_info(brgemm, brgemm_cpu); // Copy output layout inside as well ngraph::replace_node(brgemm, brgemm_cpu); // TODO: At the moment Brgemm is executed outside Loop. When Blocking is supported, remove it ngraph::snippets::utils::set_outside_loop_value(brgemm_cpu, true); + // Transfer ports + const auto& brgemm_in0_desc = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm->input(0)); + ngraph::snippets::PortManager::set_port_descriptor_ptr(brgemm_cpu->input(0), + std::make_shared(brgemm_in0_desc->get_tensor(), + brgemm_in0_desc->get_subtensor(), + brgemm_in0_desc->get_layout())); + if (!brgemm_repacking) { + const auto& brgemm_in1_desc = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm->input(1)); + ngraph::snippets::PortManager::set_port_descriptor_ptr(brgemm_cpu->input(1), + std::make_shared(brgemm_in1_desc->get_tensor(), + brgemm_in1_desc->get_subtensor(), + brgemm_in1_desc->get_layout())); + } + + const auto& brgemm_out_desc = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm->output(0)); + ngraph::snippets::PortManager::set_port_descriptor_ptr(brgemm_cpu->output(0), + std::make_shared(brgemm_out_desc->get_tensor(), + brgemm_out_desc->get_subtensor(), + brgemm_out_desc->get_layout())); + + // need to run validate_and_infer_types manually: either input shapes were updated or + // output Layout was updated (out shape will be updated in validate_and_infer_types()) + if (brgemm_repacking) + brgemm_repacking->validate_and_infer_types(); + brgemm_cpu->validate_and_infer_types(); + return true; }; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp index 066d3758e74f22..bbe7c85995ed22 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp @@ -20,15 +20,15 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_load_convert(ngraph::snippe if (convert->get_destination_type() != ov::element::f32 && convert->get_destination_type() != ov::element::i32) return false; - const auto& load_output = linear_ir.get_expr_by_output(input_td); - const auto& load_expr = load_output.expr; + const auto& load_output = input_td->get_source(); + const auto& load_expr = load_output.get_expr_ptr(); const auto load = ov::as_type_ptr(load_expr->get_node()); if (!load || ov::is_type(load_expr->get_node()) || ov::is_type(load_expr->get_node())) return false; - const auto consumers = linear_ir.get_exprs_by_input(input_td); + const auto consumers = input_td->get_consumers(); if (consumers.size() != 1) return false; @@ -45,13 +45,21 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_load_convert(ngraph::snippe OPENVINO_THROW("Type of Convert op is undefined. Supports only fusing Load and ConvertTruncation or ConvertSaturation ops"); } - const auto in_td = std::vector{ load_expr->get_inputs().front() }; - const auto out_td = std::vector{ output_td }; - const auto mv_expr_it = convert_it; - const auto& insertion_pos = std::next(convert_it); - linear_ir.erase(std::find(linear_ir.cbegin(), mv_expr_it, load_expr)); - linear_ir.erase(mv_expr_it); - convert_it = linear_ir.insert(insertion_pos, std::make_shared(load_convert, in_td, out_td)); + const auto convert_out = convert_expr->get_outputs().front(); + const auto convert_consumers = convert_out->get_consumers(); + ngraph::snippets::PortManager::set_port_descriptor_ptr(load_convert->output(0), + std::make_shared(convert_out->get_tensor(), + convert_out->get_subtensor(), + convert_out->get_layout())); + const auto load_convert_expr = linear_ir.create_expression(load_convert, { load_expr->get_inputs().front() }); + const auto convert_expr_it = convert_it; + const auto insertion_pos = std::next(convert_it); + convert_it = linear_ir.insert(insertion_pos, load_convert_expr); + linear_ir.erase(std::find(linear_ir.cbegin(), convert_expr_it, load_expr)); + linear_ir.erase(convert_expr_it); + for (const auto& consumer : convert_consumers) { + linear_ir.replace_input(consumer, load_convert_expr->get_outputs().front()); + } return true; } @@ -64,12 +72,12 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert(ngraph::snipp if (convert->get_input_element_type(0) != ov::element::f32 && convert->get_input_element_type(0) != ov::element::i32) return false; - const auto consumers = linear_ir.get_exprs_by_input(output_td); + const auto consumers = output_td->get_consumers(); if (consumers.size() != 1) return false; const auto store_input = *(consumers.begin()); - const auto store_expr = store_input.expr; + const auto store_expr = store_input.get_expr_ptr(); const auto store = ov::as_type_ptr(store_expr->get_node()); if (!store) return false; @@ -87,13 +95,21 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert(ngraph::snipp OPENVINO_THROW("Type of Convert op is undefined. Supports only fusing Store and ConvertTruncation or ConvertSaturation ops"); } - const auto in_td = std::vector{ input_td }; - const auto out_td = std::vector{ store_expr->get_outputs().front() }; - const auto store_it = std::find(convert_it, linear_ir.cend(), store_expr); - const auto& insertion_pos = std::next(store_it); - linear_ir.erase(store_it); - convert_it = linear_ir.erase(convert_it); - linear_ir.insert(insertion_pos, std::make_shared(store_convert, in_td, out_td)); + const auto store_out = store_expr->get_outputs().front(); + const auto store_consumers = store_out->get_consumers(); + ngraph::snippets::PortManager::set_port_descriptor_ptr(store_convert->output(0), + std::make_shared(store_out->get_tensor(), + store_out->get_subtensor(), + store_out->get_layout())); + const auto store_convert_expr = linear_ir.create_expression(store_convert, { input_td }); + const auto convert_expr_it = convert_it; + const auto insertion_pos = std::next(convert_it); + convert_it = linear_ir.insert(insertion_pos, store_convert_expr); + linear_ir.erase(std::find(convert_expr_it, linear_ir.cend(), store_expr)); + linear_ir.erase(convert_expr_it); + for (const auto& consumer : store_consumers) { + linear_ir.replace_input(consumer, store_convert_expr->get_outputs().front()); + } return true; } diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp index 44be5e51dc0c8a..0cf9163ddff77d 100644 --- a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp +++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp @@ -78,16 +78,16 @@ std::shared_ptr Transpose0213MatMulLoweredFunction::initLowered() con std::make_shared(precisions[1], input_shapes[1])}; std::vector layout{0, 2, 1, 3}; // Note: validity of transpose_position values is checked in Transpose0213MatMulSinhFunction constructor - if (transpose_position <= 1) { + /* if (transpose_position <= 1) { const auto& anchor = data[transpose_position]; - const auto& td = ngraph::snippets::get_tensor_descriptor_ptr(anchor); + const auto& td = ngraph::snippets::PortManager::get_port_descriptor_ptr(anchor); const auto& tensor = td->get_tensor(); const auto& subtensor = td->get_subtensor(); - ngraph::snippets::set_tensor_descriptor_ptr(anchor, - std::make_shared(tensor, subtensor, layout)); - } + ngraph::snippets::PortManager::set_port_descriptor_ptr(anchor, + std::make_shared(tensor, subtensor, layout)); + }*/ auto matmul = std::make_shared(data[0], data[1]); - if (transpose_position == 2) { + /* if (transpose_position == 2) { const auto& anchor = matmul->output(0); const auto& td = ngraph::snippets::get_tensor_descriptor_ptr(anchor); const auto& tensor = td->get_tensor(); @@ -95,7 +95,7 @@ std::shared_ptr Transpose0213MatMulLoweredFunction::initLowered() con ngraph::snippets::set_tensor_descriptor_ptr(anchor, std::make_shared(tensor, subtensor, layout)); matmul->validate_and_infer_types(); - } + }*/ return std::make_shared(NodeVector{matmul}, data); } From 938afe373dfe5e3967072a7a5b8cea43971b2c16 Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Wed, 3 May 2023 18:13:18 +0400 Subject: [PATCH 02/13] review --- .../include/snippets/lowered/expression.hpp | 13 ++-- .../snippets/lowered/expression_factory.hpp | 19 +++--- .../include/snippets/lowered/linear_ir.hpp | 3 +- .../include/snippets/lowered/tensor.hpp | 10 +++- .../snippets/include/snippets/op/brgemm.hpp | 3 + .../include/snippets/port_descriptor.hpp | 2 +- .../snippets/src/lowered/expression.cpp | 34 ++++------- .../src/lowered/expression_factory.cpp | 60 +++++++++++++++---- src/common/snippets/src/lowered/linear_ir.cpp | 17 +++--- .../snippets/src/lowered/loop_manager.cpp | 10 ++-- .../src/lowered/pass/allocate_buffers.cpp | 8 +-- .../src/lowered/pass/assign_registers.cpp | 32 +++++----- .../src/lowered/pass/cleanup_loop_offsets.cpp | 4 +- .../snippets/src/lowered/pass/fuse_loops.cpp | 10 ++-- .../src/lowered/pass/indentify_buffers.cpp | 6 +- .../snippets/src/lowered/pass/init_loops.cpp | 8 +-- .../src/lowered/pass/insert_buffers.cpp | 10 ++-- .../src/lowered/pass/insert_load_store.cpp | 8 +-- .../src/lowered/pass/insert_tail_loop.cpp | 6 +- .../load_movebroadcast_to_broadcastload.cpp | 10 ++-- .../snippets/src/lowered/pass/mark_loops.cpp | 6 +- .../pass/move_result_out_from_loop.cpp | 2 +- .../lowered/pass/move_scalar_to_consumer.cpp | 4 +- .../src/lowered/pass/propagate_layout.cpp | 10 ++-- .../src/lowered/pass/reset_buffers.cpp | 2 +- .../lowered/pass/softmax_decomposition.cpp | 6 +- .../src/lowered/pass/vector_to_scalar.cpp | 4 +- src/common/snippets/src/lowered/tensor.cpp | 9 +-- src/common/snippets/src/op/brgemm.cpp | 21 ++++++- .../emitters/x64/jit_snippets_emitters.cpp | 4 +- .../snippets/x64/op/brgemm_copy_b.cpp | 4 +- .../snippets/x64/op/brgemm_copy_b.hpp | 2 +- .../snippets/x64/op/brgemm_cpu.cpp | 8 +-- .../snippets/x64/op/brgemm_cpu.hpp | 2 +- .../lowered/fuse_load_store_and_convert.cpp | 22 +++---- .../src/subgraph_lowered.cpp | 31 ++++++---- 36 files changed, 233 insertions(+), 177 deletions(-) diff --git a/src/common/snippets/include/snippets/lowered/expression.hpp b/src/common/snippets/include/snippets/lowered/expression.hpp index 65864eba7ebe31..15a6169c5d2eec 100644 --- a/src/common/snippets/include/snippets/lowered/expression.hpp +++ b/src/common/snippets/include/snippets/lowered/expression.hpp @@ -4,8 +4,6 @@ #pragma once -#include - #include #include @@ -35,8 +33,10 @@ class Expression : public std::enable_shared_from_this { RegInfo get_reg_info() const { return m_reg_info; } void set_reg_info(RegInfo rinfo) { m_reg_info = std::move(rinfo); } - const std::vector& get_inputs() { return m_inputs; } - const std::vector& get_outputs() { return m_outputs; } + const TensorPtr& input(size_t i) const; + const TensorPtr& output(size_t i) const; + const std::vector& inputs() const { return m_inputs; } + const std::vector& outputs() const { return m_outputs; } size_t get_input_count() const { return m_inputs.size(); } size_t get_output_count() const { return m_outputs.size(); } @@ -53,13 +53,12 @@ class Expression : public std::enable_shared_from_this { protected: // Note: The constructor and tensor initialization are private since an expression can be created only by Linear IR. - // These methods must be used only by Linear IR creator of expressions! + // These methods must be used only by Linear IR builder of expressions! explicit Expression(const std::shared_ptr& n); - void init_inputs_with_validation(const std::vector& inputs); void init_inputs(const std::vector& inputs) { m_inputs = inputs; } void init_outputs(const std::vector& outputs) { m_outputs = outputs; } - // Note: These methods don't control availability of the current expression in this + // Note: These methods don't control availability of the current expression in this Tensor (as Consumer or Source) void replace_input(size_t port, TensorPtr to); void replace_output(size_t port, TensorPtr to); diff --git a/src/common/snippets/include/snippets/lowered/expression_factory.hpp b/src/common/snippets/include/snippets/lowered/expression_factory.hpp index 0eed43bf1208d7..ff561a31d46263 100644 --- a/src/common/snippets/include/snippets/lowered/expression_factory.hpp +++ b/src/common/snippets/include/snippets/lowered/expression_factory.hpp @@ -4,8 +4,6 @@ #pragma once -#include - #include "linear_ir.hpp" namespace ngraph { @@ -19,16 +17,20 @@ class LinearIR::BaseExpressionFactory { virtual ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model); virtual ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector inputs); + const std::vector& inputs); virtual ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector inputs, const std::vector outputs); + const std::vector& inputs, const std::vector& outputs); static std::shared_ptr get(const LinearIR& linear_ir, const std::shared_ptr& n); protected: virtual ExpressionPtr create(const std::shared_ptr& n, const std::shared_ptr& model) = 0; + // Creates inputs for expression using parent output tensors virtual std::vector create_expression_inputs(const ExpressionPtr& expr); + // Creates new output tensors virtual std::vector create_expression_outputs(const ExpressionPtr& expr); + // The method verifies of input tensors to availability of the expression as consumer and add it if missed + virtual void validate_inputs(const ExpressionPtr& expr, const std::vector& inputs); LinearIR m_linear_ir; }; @@ -40,9 +42,9 @@ class LinearIR::ExpressionFactory : public LinearIR::BaseExpressionFactory { ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model) override; ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector inputs) override; + const std::vector& inputs) override; ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector inputs, const std::vector outputs) override; + const std::vector& inputs, const std::vector& outputs) override; protected: ExpressionPtr create(const std::shared_ptr& n, const std::shared_ptr& model) override; @@ -76,7 +78,7 @@ class LinearIR::LoopBeginExpressionFactory : public LinearIR::BaseExpressionFact LoopBeginExpressionFactory(const LinearIR& linear_ir) : BaseExpressionFactory(linear_ir) {} ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector inputs) override; + const std::vector& inputs) override; protected: ExpressionPtr create(const std::shared_ptr& n, const std::shared_ptr& model) override; @@ -88,10 +90,11 @@ class LinearIR::LoopEndExpressionFactory : public LinearIR::BaseExpressionFactor LoopEndExpressionFactory(const LinearIR& linear_ir) : BaseExpressionFactory(linear_ir) {} ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector inputs) override; + const std::vector& inputs) override; protected: ExpressionPtr create(const std::shared_ptr& n, const std::shared_ptr& model) override; + void validate_inputs(const ExpressionPtr& expr, const std::vector& inputs) override; }; } // namespace lowered diff --git a/src/common/snippets/include/snippets/lowered/linear_ir.hpp b/src/common/snippets/include/snippets/lowered/linear_ir.hpp index b5780c741f420e..ac4730ca79f9d3 100644 --- a/src/common/snippets/include/snippets/lowered/linear_ir.hpp +++ b/src/common/snippets/include/snippets/lowered/linear_ir.hpp @@ -106,8 +106,7 @@ class LinearIR { private: // Default ctor - can be called only from Linear IR initialization as default way - ExpressionPtr create_expression(const std::shared_ptr& n, - const std::shared_ptr& model = nullptr); + ExpressionPtr create_expression(const std::shared_ptr& n, const std::shared_ptr& model = nullptr); void register_expression(const ExpressionPtr& expr); // Like register_expression, but doesn't allow Parameter or Result registration. You can do it only through ctor diff --git a/src/common/snippets/include/snippets/lowered/tensor.hpp b/src/common/snippets/include/snippets/lowered/tensor.hpp index 06487cd80a195c..9c291540a39979 100644 --- a/src/common/snippets/include/snippets/lowered/tensor.hpp +++ b/src/common/snippets/include/snippets/lowered/tensor.hpp @@ -36,7 +36,7 @@ class TensorDescriptor { std::vector get_tensor() const { return m_port_desc->get_tensor(); } std::vector get_layout() const { return m_port_desc->get_layout(); } std::vector get_subtensor() const { return m_port_desc->get_subtensor(); } - PortDescriptorPtr get_port_descriptor() const { return m_port_desc; } + const PortDescriptorPtr& get_port_descriptor() const { return m_port_desc; } void set_tensor(const std::vector& tensor) { m_port_desc->set_tensor(tensor); } void set_layout(const std::vector& layout) { m_port_desc->set_layout(layout); } @@ -60,9 +60,8 @@ class Tensor { Tensor() = default; explicit Tensor(const TensorDescriptor& source_descriptor, const std::vector& consumer_descriptors = {}); - TensorDescriptor& get_source() { return m_source_port; } const TensorDescriptor& get_source() const { return m_source_port; } - const std::vector& get_consumers() const { return m_consumer_ports; } + std::vector get_consumers() const { return m_consumer_ports; } void add_consumer(const TensorDescriptor& consumer); void remove_consumer(const TensorDescriptor& consumer); @@ -78,6 +77,11 @@ class Tensor { std::vector get_layout() const { return m_source_port.get_layout(); } std::vector get_subtensor() const { return m_source_port.get_subtensor(); } + void set_tensor(const std::vector& tensor) { m_source_port.set_tensor(tensor); } + void set_layout(const std::vector& layout) { m_source_port.set_layout(layout); } + void set_subtensor(const std::vector& subtensor) { m_source_port.set_subtensor(subtensor); } + void set_port_descriptor(const PortDescriptorPtr& desc) { m_source_port.set_port_descriptor(desc); } + private: TensorDescriptor m_source_port; std::vector m_consumer_ports; diff --git a/src/common/snippets/include/snippets/op/brgemm.hpp b/src/common/snippets/include/snippets/op/brgemm.hpp index c1aec360c4dce7..7d1d85e589c2de 100644 --- a/src/common/snippets/include/snippets/op/brgemm.hpp +++ b/src/common/snippets/include/snippets/op/brgemm.hpp @@ -33,6 +33,9 @@ class Brgemm : public MemoryAccess { bool has_evaluate() const override { return false; } protected: + void constructor_validate_and_infer_types(); + void validate_inputs() const; + ov::element::Type get_output_type() const; std::vector get_planar_input_shapes(const std::vector>& inputs) const; ov::PartialShape get_output_partial_shape(const std::vector& input_shapes) const; diff --git a/src/common/snippets/include/snippets/port_descriptor.hpp b/src/common/snippets/include/snippets/port_descriptor.hpp index 622df4264f42e5..976cb7faac2be5 100644 --- a/src/common/snippets/include/snippets/port_descriptor.hpp +++ b/src/common/snippets/include/snippets/port_descriptor.hpp @@ -37,7 +37,7 @@ class PortDescriptor { void set_subtensor(const std::vector& subtensor) { m_subtensor_shape = subtensor; } static PortDescriptor deserialize(const std::string& serialized_info); - std::string serialize() const; + std::string serialize() const; bool empty() const { return m_layout.empty() && m_subtensor_shape.empty();} friend bool operator==(const PortDescriptor& lhs, const PortDescriptor& rhs); diff --git a/src/common/snippets/src/lowered/expression.cpp b/src/common/snippets/src/lowered/expression.cpp index 60c65de14154fd..e161be2290c366 100644 --- a/src/common/snippets/src/lowered/expression.cpp +++ b/src/common/snippets/src/lowered/expression.cpp @@ -19,6 +19,15 @@ size_t Expression::LOOP_NULL_ID = SIZE_MAX; Expression::Expression(const std::shared_ptr& n) : m_source_node{n}, m_emitter{nullptr}, m_inputs{}, m_outputs{}, m_reg_info{{}, {}}, m_is_outside_loop(utils::get_outside_loop_value(n)) {} +const TensorPtr& Expression::input(size_t i) const { + OPENVINO_ASSERT(i < m_inputs.size(), "Failed to get input: target input port must be less than input count!"); + return m_inputs[i]; +} +const TensorPtr& Expression::output(size_t i) const { + OPENVINO_ASSERT(i < m_outputs.size(), "Failed to get output: target output port must be less than output count!"); + return m_outputs[i]; +} + std::shared_ptr Expression::get_node() const { if (!m_source_node) OPENVINO_THROW("An attempt to get uninitialized node from lowered expression"); @@ -58,34 +67,13 @@ void Expression::remove_loop_id(size_t id) { *it = Expression::LOOP_NULL_ID; } -void Expression::init_inputs_with_validation(const std::vector& inputs) { - auto is_service_expr = [&](){ - return ov::is_type(m_source_node); - }; - for (size_t i = 0; i < inputs.size(); ++i) { - const auto& input = inputs[i]; - const auto consumers = input->get_consumers(); - const auto found = std::find_if(consumers.begin(), consumers.end(), - [&](const TensorDescriptor& desc) { - return desc.get_index() == i && desc.get_expr_ptr().get() == this->shared_from_this().get(); - }); - if (found == consumers.end()) { - const auto port_desc = is_service_expr() ? input->get_source().get_port_descriptor() - : PortManager::get_port_descriptor_ptr(m_source_node->input(i)); - const auto tensor_desc = TensorDescriptor(this->shared_from_this(), TensorDescriptor::Type::Input, i, port_desc); - input->add_consumer(tensor_desc); - } - } - m_inputs = inputs; -} - TensorDescriptor Expression::input_port(size_t i) { OPENVINO_ASSERT(i < m_inputs.size(), "Failed to get input port: target input port must be less than input count!"); const auto& input = m_inputs[i]; - const auto& consumers = input->get_consumers(); + const auto consumers = input->get_consumers(); const auto found = std::find_if(consumers.begin(), consumers.end(), [&](const TensorDescriptor& desc) { - return desc.get_index() == i && desc.get_expr_ptr().get() == this->shared_from_this().get(); + return desc.get_index() == i && desc.get_expr_ptr() == this->shared_from_this(); }); OPENVINO_ASSERT(found != consumers.end(), "Input TensorDescriptor for Expression hasn't found in input Tensor!"); return *found; diff --git a/src/common/snippets/src/lowered/expression_factory.cpp b/src/common/snippets/src/lowered/expression_factory.cpp index ffd13178061656..d3cd1a2edcb283 100644 --- a/src/common/snippets/src/lowered/expression_factory.cpp +++ b/src/common/snippets/src/lowered/expression_factory.cpp @@ -14,11 +14,11 @@ ExpressionPtr LinearIR::BaseExpressionFactory::build(const std::shared_ptr OPENVINO_THROW("The Factory doesn't support default builder"); } ExpressionPtr LinearIR::BaseExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector inputs) { + const std::vector& inputs) { OPENVINO_THROW("The Factory doesn't support builder with just input tensors"); } ExpressionPtr LinearIR::BaseExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector inputs, const std::vector outputs) { + const std::vector& inputs, const std::vector& outputs) { OPENVINO_THROW("The Factory doesn't support builder with input and outputs tensors"); } @@ -46,10 +46,8 @@ std::vector LinearIR::BaseExpressionFactory::create_expression_inputs for (const auto& input : node->inputs()) { const auto input_source = input.get_source_output(); const auto in_index = input.get_index(); - const auto out_index = input_source.get_index(); - const auto parent = input_source.get_node_shared_ptr(); - const auto parent_expr = m_linear_ir.get_expr_by_node(parent); - const auto tensor = parent_expr->get_outputs()[out_index]; + const auto& parent_expr = m_linear_ir.get_expr_by_node(input_source.get_node_shared_ptr()); + const auto& tensor = parent_expr->output(input_source.get_index()); const auto tensor_desc = TensorDescriptor(expr, TensorDescriptor::Type::Input, in_index, PortManager::get_port_descriptor_ptr(input)); tensor->add_consumer(tensor_desc); inputs[in_index] = tensor; @@ -70,6 +68,22 @@ std::vector LinearIR::BaseExpressionFactory::create_expression_output return outputs; } +void LinearIR::BaseExpressionFactory::validate_inputs(const ExpressionPtr& expr, const std::vector& inputs) { + for (size_t i = 0; i < inputs.size(); ++i) { + const auto& input = inputs[i]; + const auto consumers = input->get_consumers(); + const auto found = std::find_if(consumers.begin(), consumers.end(), + [&](const TensorDescriptor& desc) { + return desc.get_index() == i && desc.get_expr_ptr() == expr; + }); + if (found == consumers.end()) { + const auto port_desc = PortManager::get_port_descriptor_ptr(expr->get_node()->input(i)); + const auto tensor_desc = TensorDescriptor(expr, TensorDescriptor::Type::Input, i, port_desc); + input->add_consumer(tensor_desc); + } + } +} + ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const std::shared_ptr& model) { // Note: ctor of shared_ptr isn't friend class for Expression return std::make_shared(Expression(n)); @@ -83,17 +97,19 @@ ExpressionPtr LinearIR::ExpressionFactory::build(const std::shared_ptr& n, } ExpressionPtr LinearIR::ExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector inputs) { + const std::vector& inputs) { const auto expr = create(n, model); - expr->init_inputs_with_validation(inputs); + validate_inputs(expr, inputs); + expr->init_inputs(inputs); expr->init_outputs(create_expression_outputs(expr)); return expr; } ExpressionPtr LinearIR::ExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector inputs, const std::vector outputs) { + const std::vector& inputs, const std::vector& outputs) { const auto expr = create(n, model); - expr->init_inputs_with_validation(inputs); + validate_inputs(expr, inputs); + expr->init_inputs(inputs); expr->init_outputs(outputs); return expr; } @@ -139,7 +155,7 @@ ExpressionPtr LinearIR::LoopBeginExpressionFactory::create(const std::shared_ptr } ExpressionPtr LinearIR::LoopBeginExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector inputs) { + const std::vector& inputs) { OPENVINO_ASSERT(inputs.empty(), "LoopBegin cannot have inputs"); const auto expr = create(n, model); expr->init_inputs(inputs); @@ -156,13 +172,31 @@ ExpressionPtr LinearIR::LoopEndExpressionFactory::create(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector inputs) { + const std::vector& inputs) { const auto expr = create(n, model); - expr->init_inputs_with_validation(inputs); + validate_inputs(expr, inputs); + expr->init_inputs(inputs); expr->init_outputs({}); return expr; } +void LinearIR::LoopEndExpressionFactory::validate_inputs(const ExpressionPtr& expr, const std::vector& inputs) { + for (size_t i = 0; i < inputs.size(); ++i) { + const auto& input = inputs[i]; + const auto consumers = input->get_consumers(); + const auto found = std::find_if(consumers.begin(), consumers.end(), + [&](const TensorDescriptor& desc) { + return desc.get_index() == i && desc.get_expr_ptr()== expr; + }); + if (found == consumers.end()) { + // LoopEnd doesn't have input ports. So consumer for the Tensor should have the same Port Descriptor like source + const auto& port_desc = input->get_source().get_port_descriptor(); + const auto tensor_desc = TensorDescriptor(expr, TensorDescriptor::Type::Input, i, port_desc); + input->add_consumer(tensor_desc); + } + } +} + }// namespace lowered }// namespace snippets diff --git a/src/common/snippets/src/lowered/linear_ir.cpp b/src/common/snippets/src/lowered/linear_ir.cpp index c3b33a52f5d5bd..bf1f0c2caf4e4a 100644 --- a/src/common/snippets/src/lowered/linear_ir.cpp +++ b/src/common/snippets/src/lowered/linear_ir.cpp @@ -141,13 +141,13 @@ void LinearIR::debug_print(bool tds_as_pointers) const { std::cerr << counter++ << " : " << node->get_friendly_name() << " : "; if (tds_as_pointers) { - for (const auto& in : expr->get_inputs()) { + for (const auto& in : expr->inputs()) { if (td2int.count(in) == 0) OPENVINO_THROW("Undefined input descriptor for op"); std::cerr << td2int.at(in) << ", "; } std::cerr << "\b\b => "; - for (const auto& out : expr->get_outputs()) { + for (const auto& out : expr->outputs()) { if (td2int.count(out) == 0) td2int.insert({out, td_counter++}); std::cerr << td2int.at(out) << ", "; @@ -176,7 +176,8 @@ void LinearIR::init_emitters(const std::shared_ptr& target) { ExpressionPtr LinearIR::get_expr_by_node(const std::shared_ptr& n) const { auto found = m_node2expression_map.find(n); - return found == m_node2expression_map.end() ? nullptr : found->second; + OPENVINO_ASSERT(found != m_node2expression_map.end(), "The node " + n->get_friendly_name() + " hasn't been found in Linear IR"); + return found->second; } void LinearIR::replace_input(const std::vector& consumers, const TensorPtr& to) { @@ -196,8 +197,8 @@ void LinearIR::replace_input(const TensorDescriptor& expr_port, const TensorPtr& OPENVINO_ASSERT(expr_port.get_type() == TensorDescriptor::Type::Input, "Failed to replace: target input port must have Input type"); OPENVINO_ASSERT(expr_port.get_index() < expr->get_input_count(), "Failed to replace: target input port must be less than input count!"); - const auto& from = expr->m_inputs[port]; - if (from.get() == to.get()) + const auto& from = expr->input(port); + if (from == to) return; if (!to->found_consumer(expr_port)) { @@ -218,9 +219,9 @@ void LinearIR::replace_output(const TensorDescriptor& expr_port, const TensorPtr OPENVINO_ASSERT(expr_port.get_type() == TensorDescriptor::Type::Output, "Failed to replace: target output port must have Output type"); OPENVINO_ASSERT(port < expr->get_output_count(), "Failed to replace: target output port must be less than output count!"); const auto to_source_td = to->get_source(); - OPENVINO_ASSERT(to_source_td.get_expr_ptr().get() == expr.get() && to_source_td.get_index() == port, + OPENVINO_ASSERT(to_source_td.get_expr_ptr() == expr && to_source_td.get_index() == port, "Failed to replace: incorrect new output Tensor. Source expr must be the current expr"); - if (expr->get_outputs()[port].get() == to.get()) + if (expr->output(port) == to) return; expr->replace_output(port, to); } @@ -242,7 +243,7 @@ void LinearIR::register_expression(const ExpressionPtr& expr) { void LinearIR::unregister_expression(const ExpressionPtr& expr) { for (size_t i = 0; i < expr->get_input_count(); ++i) { - const auto& input = expr->get_inputs()[i]; + const auto& input = expr->input(i); input->remove_consumer(expr->input_port(i)); } diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp index ef9e736b2532a1..4e7d271a66e9ab 100644 --- a/src/common/snippets/src/lowered/loop_manager.cpp +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -55,7 +55,7 @@ void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir, size_t loop_id) { OPENVINO_ASSERT(!entries.empty(), "Loop must have entry points"); OPENVINO_ASSERT(!exits.empty(), "Loop must have entry points"); - const auto& entry_expr = entries.front().get_expr_ptr(); + const auto entry_expr = entries.front().get_expr_ptr(); loop_begin_pos = std::find(linear_ir.begin(), linear_ir.end(), entry_expr); OPENVINO_ASSERT(loop_begin_pos != linear_ir.end(), "Loop begin hasn't been found!"); @@ -81,8 +81,8 @@ void LinearIR::LoopManager::get_io_loop_ports(LinearIR::constExprIt loop_begin_p exits.clear(); for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { const auto& expr = *expr_it; - const auto inputs = expr->get_inputs(); - const auto outputs = expr->get_outputs(); + const auto inputs = expr->inputs(); + const auto outputs = expr->outputs(); for (size_t in_port = 0; in_port < inputs.size(); ++in_port) { const auto in_td = inputs[in_port]; @@ -141,7 +141,7 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, auto found_port = [](const std::vector& ports, const TensorDescriptor& target) { return std::find_if(ports.begin(), ports.end(), [&target](const TensorDescriptor& port) { - return port.get_expr_ptr().get() == target.get_expr_ptr().get() && + return port.get_expr_ptr() == target.get_expr_ptr() && port.get_index() == target.get_index() && port.get_type() == target.get_type(); }) != ports.end(); @@ -160,7 +160,7 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, const auto& expr = exit_point.get_expr_ptr(); for (size_t i = 0; i < expr->get_input_count(); ++i) { const auto port = expr->input_port(i); - const auto parent = expr->get_inputs()[port.get_index()]->get_source().get_expr_ptr()->get_node(); + const auto parent = expr->input(port.get_index())->get_source().get_expr_ptr()->get_node(); if (!found_port(loop_entry_points, port) && !ov::is_type(parent)) { if (loop_subtensor.empty()) loop_subtensor = port.get_subtensor(); diff --git a/src/common/snippets/src/lowered/pass/allocate_buffers.cpp b/src/common/snippets/src/lowered/pass/allocate_buffers.cpp index 4cdd5ec8853bea..d0f1e2403b5c4e 100644 --- a/src/common/snippets/src/lowered/pass/allocate_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/allocate_buffers.cpp @@ -21,8 +21,8 @@ void AllocateBuffers::propagate_offset(const LinearIR& linear_ir, const Expressi // Propagate to up: in Store. Buffer can have only one Store { if (buffer->is_intermediate_memory()) { - OPENVINO_ASSERT(buffer_expr->get_inputs().size() == 1, "Buffer with intermediate memory must have one parent"); - const auto& parent_output = buffer_expr->get_inputs()[0]->get_source(); + OPENVINO_ASSERT(buffer_expr->inputs().size() == 1, "Buffer with intermediate memory must have one parent"); + const auto& parent_output = buffer_expr->input(0)->get_source(); const auto& parent_expr = parent_output.get_expr_ptr(); const auto port = parent_output.get_index(); const auto& parent_node = parent_expr->get_node(); @@ -36,7 +36,7 @@ void AllocateBuffers::propagate_offset(const LinearIR& linear_ir, const Expressi } } // Propagate to down: in Load. Buffer can have several Load - const auto& buffer_out = buffer_expr->get_outputs()[0]; + const auto& buffer_out = buffer_expr->output(0); for (const auto& child_expr_input : buffer_out->get_consumers()) { const auto& child_expr = child_expr_input.get_expr_ptr(); const auto port = child_expr_input.get_index(); @@ -70,7 +70,7 @@ bool AllocateBuffers::run(LinearIR& linear_ir) { } if (buffer->is_intermediate_memory()) { - const auto& parent_expr = expr_it->get()->get_inputs()[0]->get_source().get_expr_ptr(); + const auto& parent_expr = expr_it->get()->input(0)->get_source().get_expr_ptr(); const auto& parent_node = parent_expr->get_node(); // Full MemoryAccess ops need new memory. Previous logic is to check for parent isn't Loop // TODO: It should be unified in MemoryManager with memory reuse in the near future diff --git a/src/common/snippets/src/lowered/pass/assign_registers.cpp b/src/common/snippets/src/lowered/pass/assign_registers.cpp index e15a932eb7fb53..cf941600e6e010 100644 --- a/src/common/snippets/src/lowered/pass/assign_registers.cpp +++ b/src/common/snippets/src/lowered/pass/assign_registers.cpp @@ -47,38 +47,38 @@ bool AssignRegisters::run(LinearIR& linear_ir) { auto op = expr->get_node(); if (const auto io_expr = std::dynamic_pointer_cast(expr)) { if (io_expr->get_type() == IOExpression::io_type::INPUT) - manually_assigned_gprs[expr->get_outputs()[0]] = io_expr->get_index(); + manually_assigned_gprs[expr->output(0)] = io_expr->get_index(); else if (io_expr->get_type() == IOExpression::io_type::OUTPUT) - manually_assigned_gprs[expr->get_inputs()[0]] = num_parameters + io_expr->get_index(); + manually_assigned_gprs[expr->input(0)] = num_parameters + io_expr->get_index(); else OPENVINO_THROW("Unsupported io_type detected"); } else if (const auto& buffer = ov::as_type_ptr(op)) { const auto buffer_id = buffer->get_id(); // All buffers have one common data pointer if (buffer->is_intermediate_memory()) { - manually_assigned_gprs[expr->get_inputs()[0]] = + manually_assigned_gprs[expr->input(0)] = static_cast(num_results + num_parameters + buffer_id); } - manually_assigned_gprs[expr->get_outputs()[0]] = + manually_assigned_gprs[expr->output(0)] = static_cast(num_results + num_parameters + buffer_id); } else if (ov::is_type(op) || ov::is_type(op)) { // Only in SoftmaxDecomposition ReduceMax and ReduceSum use HorizonMax/HorizonSum and VectorBuffer. // We should manually set the one vector register for VectorBuffer and Max/Sum output to simulate a accumulator // TODO [96351]: We should rewrite accumulator pattern using another way - const auto input_td = expr->get_inputs()[0]; + const auto input_td = expr->input(0); const auto& input_expr = input_td->get_source().get_expr_ptr(); - const auto& input_expr_input_tds = input_expr->get_inputs(); + const auto& input_expr_input_tds = input_expr->inputs(); for (const auto& td : input_expr_input_tds) { if (ov::is_type(td->get_source().get_expr_ptr()->get_node())) { manually_assigned_vecs[td] = static_cast(accumulator_reg); } } - const auto output_td = expr->get_outputs()[0]; + const auto output_td = expr->output(0); manually_assigned_vecs[input_td] = static_cast(accumulator_reg); manually_assigned_vecs[output_td] = static_cast(accumulator_reg); for (const auto& child_expr_input : output_td->get_consumers()) { if (ov::is_type(child_expr_input.get_expr_ptr()->get_node())) { - manually_assigned_vecs[child_expr_input.get_expr_ptr()->get_outputs()[0]] = + manually_assigned_vecs[child_expr_input.get_expr_ptr()->output(0)] = static_cast(accumulator_reg); } } @@ -88,9 +88,9 @@ bool AssignRegisters::run(LinearIR& linear_ir) { const auto current_loops_ids = expr->get_loop_ids(); auto next_expr = output_td->get_consumers().begin()->get_expr_ptr(); while (next_expr->get_loop_ids() == current_loops_ids) { - manually_assigned_vecs[next_expr->get_outputs()[0]] = + manually_assigned_vecs[next_expr->output(0)] = static_cast(accumulator_reg); - next_expr = next_expr->get_outputs()[0]->get_consumers().begin()->get_expr_ptr(); + next_expr = next_expr->output(0)->get_consumers().begin()->get_expr_ptr(); } accumulator_reg++; @@ -103,7 +103,7 @@ bool AssignRegisters::run(LinearIR& linear_ir) { decltype(regs_vec)& reg_map, const std::map& manually_assigned_regs, size_t& counter) { - for (const auto& out_td : expr->get_outputs()) { + for (const auto& out_td : expr->outputs()) { // Note that some ops might have identical input&output tensors (Result and Tile* for ex.) // so we have to check that the tensor has not been enumerated already if (reg_map.count(out_td) == 0) { @@ -143,9 +143,9 @@ bool AssignRegisters::run(LinearIR& linear_ir) { for (size_t i = 0; i < typed_ops.size(); i++) { const auto& t_op = typed_ops[i]; std::vector used_tensors, defined_tensors; - for (const auto& in : t_op.second->get_inputs()) + for (const auto& in : t_op.second->inputs()) used_tensors.push_back(in); - for (const auto& out : t_op.second->get_outputs()) + for (const auto& out : t_op.second->outputs()) defined_tensors.push_back(out); switch (t_op.first) { case Generator::opRegType::vec2vec: @@ -191,7 +191,7 @@ bool AssignRegisters::run(LinearIR& linear_ir) { const auto& expr = typed_ops[n].second; if (is_type(expr->get_node()) || is_type(expr->get_node())) continue; - for (const auto& out : expr->get_outputs()) { + for (const auto& out : expr->outputs()) { for (const auto& child_expr_input : out->get_consumers()) { const auto& child_expr = child_expr_input.get_expr_ptr(); auto child_it = linear_ir.begin(); @@ -319,10 +319,10 @@ bool AssignRegisters::run(LinearIR& linear_ir) { for (auto& t_op : typed_ops) { RegInfo rinfo; const auto& expr = t_op.second; - for (const auto& in : expr->get_inputs()) { + for (const auto& in : expr->inputs()) { rinfo.first.push_back(assigned_regs[in]); } - for (const auto& out : expr->get_outputs()) { + for (const auto& out : expr->outputs()) { rinfo.second.push_back(assigned_regs[out]); } t_op.second->set_reg_info(rinfo); diff --git a/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp b/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp index 15b835b9ff7123..9825ac6d6d92e0 100644 --- a/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp +++ b/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp @@ -36,12 +36,12 @@ bool CleanupLoopOffsets::run(LinearIR& linear_ir) { if (auto outer_loop_end = as_type_ptr(next_node)) { auto fin_offsets = loop_end->get_finalization_offsets(); std::unordered_map per_tensor_offset; - const auto& loop_inputs = expr_it->get()->get_inputs(); + const auto& loop_inputs = expr_it->get()->inputs(); for (size_t i = 0; i < fin_offsets.size(); i++) per_tensor_offset[loop_inputs[i]] = i; auto outer_ptr_increments = outer_loop_end->get_ptr_increments(); - const auto& outer_loop_inputs = next_expr_it->get()->get_inputs(); + const auto& outer_loop_inputs = next_expr_it->get()->inputs(); for (size_t i = 0; i < outer_ptr_increments.size(); i++) { const auto& managed_tensor = outer_loop_inputs[i]; const auto& found = per_tensor_offset.find(managed_tensor); diff --git a/src/common/snippets/src/lowered/pass/fuse_loops.cpp b/src/common/snippets/src/lowered/pass/fuse_loops.cpp index 6bf30cf8b6f7fa..c83aed643823aa 100644 --- a/src/common/snippets/src/lowered/pass/fuse_loops.cpp +++ b/src/common/snippets/src/lowered/pass/fuse_loops.cpp @@ -35,7 +35,7 @@ void FuseLoops::fuse_points(std::vector& exit_points, std::vec for (const auto& exit_point : exit_points) { const auto expr = exit_point.get_expr_ptr(); const auto port = exit_point.get_index(); - const auto output_td = expr->get_outputs()[port]; + const auto output_td = expr->output(port); const auto consumers_inputs = output_td->get_consumers(); std::vector mapped_entry_points; @@ -91,7 +91,7 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo const auto target_exit_point = loop_target->exit_exprs[i]; const auto target_exit_expr = target_exit_point.get_expr_ptr(); const auto port = target_exit_point.get_index(); - const auto output_td = target_exit_expr->get_outputs()[port]; + const auto output_td = target_exit_expr->output(port); const auto consumer_inputs = output_td->get_consumers(); for (const auto& consumer_input : consumer_inputs) { const auto consumer = consumer_input.get_expr_ptr(); @@ -162,7 +162,7 @@ bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::Loo const auto target_entry_point = loop_target->entry_exprs[i]; const auto target_entry_expr = target_entry_point.get_expr_ptr(); const auto port = target_entry_point.get_index(); - const auto input_td = target_entry_expr->get_inputs()[port]; + const auto input_td = target_entry_expr->input(port); const auto parent_expr_output = input_td->get_source(); const auto parent_expr = parent_expr_output.get_expr_ptr(); if (ov::is_type(parent_expr->get_node()) || parent_expr == current_exit_point.get_expr_ptr()) @@ -270,7 +270,7 @@ bool FuseLoops::run(LinearIR& linear_ir) { const auto entry_point = entry_points[in_port]; const auto entry_expr = entry_point.get_expr_ptr(); const auto port = entry_point.get_index(); - const auto input_td = entry_expr->get_inputs()[port]; + const auto input_td = entry_expr->input(port); const auto parent_expr_output = input_td->get_source(); const auto parent_expr = parent_expr_output.get_expr_ptr(); const auto out_port = parent_expr_output.get_index(); @@ -311,7 +311,7 @@ bool FuseLoops::run(LinearIR& linear_ir) { const auto exit_point = exit_points[out_port]; const auto exit_expr = exit_point.get_expr_ptr(); const auto port = exit_point.get_index(); - const auto output_td = exit_expr->get_outputs()[port]; + const auto output_td = exit_expr->output(port); const auto consumer_exprs_inputs = output_td->get_consumers(); for (const auto& consumer_expr_input : consumer_exprs_inputs) { const auto consumer_expr = consumer_expr_input.get_expr_ptr(); diff --git a/src/common/snippets/src/lowered/pass/indentify_buffers.cpp b/src/common/snippets/src/lowered/pass/indentify_buffers.cpp index 619ae88fd808b3..bf053afd389579 100644 --- a/src/common/snippets/src/lowered/pass/indentify_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/indentify_buffers.cpp @@ -55,7 +55,7 @@ std::vector IdentifyBuffers::create_adjacency_matrix(const LinearIR& linea for (size_t buffer_idx = 0; buffer_idx < buffers.size(); ++buffer_idx) { // Here intermediate Buffer const auto buffer_expr = buffers[buffer_idx]; - const auto buffer_input_tds = buffer_expr->get_inputs(); + const auto buffer_input_tds = buffer_expr->inputs(); OPENVINO_ASSERT(buffer_input_tds.size() == 1, "Intermediate Buffer must have one input"); const auto buffer = ov::as_type_ptr(buffer_expr->get_node()); @@ -67,7 +67,7 @@ std::vector IdentifyBuffers::create_adjacency_matrix(const LinearIR& linea if (sibling_expr == buffer_expr) { continue; } else if (const auto loop_end = ov::as_type_ptr(sibling_expr->get_node())) { - const auto& loop_tds = sibling_expr->get_inputs(); + const auto& loop_tds = sibling_expr->inputs(); const auto input_count = loop_end->get_input_num(); const auto output_count = loop_end->get_output_num(); const auto& ptr_increments = loop_end->get_ptr_increments(); @@ -91,7 +91,7 @@ std::vector IdentifyBuffers::create_adjacency_matrix(const LinearIR& linea if (buffer_td == loop_tds[input_count + output_idx]) continue; - const auto& consumer_inputs = loop_tds[input_count + output_idx]->get_consumers(); + const auto consumer_inputs = loop_tds[input_count + output_idx]->get_consumers(); for (const auto& consumer_input : consumer_inputs) { const auto& child_node = consumer_input.get_expr_ptr()->get_node(); if (const auto& neighbour_buffer = is_intermediate_buffer(child_node)) { diff --git a/src/common/snippets/src/lowered/pass/init_loops.cpp b/src/common/snippets/src/lowered/pass/init_loops.cpp index 0501adcc22f2e5..ca96b43311241e 100644 --- a/src/common/snippets/src/lowered/pass/init_loops.cpp +++ b/src/common/snippets/src/lowered/pass/init_loops.cpp @@ -29,7 +29,7 @@ void filter_ports(LinearIR& linear_ir, const auto node = expr->get_node(); const auto ma = ov::as_type_ptr(node); if (ma && ma->is_memory_access_input_port(port)) { - const auto& parent_expr = expr->get_inputs()[port]->get_source().get_expr_ptr(); + const auto& parent_expr = expr->input(port)->get_source().get_expr_ptr(); const auto& parent = parent_expr->get_node(); // Todo: Sometimes several Load in one Loop read data from the same Node if (loop_parents.find(parent) == loop_parents.end()) { @@ -156,10 +156,10 @@ bool InitLoops::insertion(LinearIR& linear_ir, const LinearIR::LoopManager::Loop std::vector loop_end_inputs; for (const auto& expr_port : loop_entries) - loop_end_inputs.push_back(expr_port.get_expr_ptr()->get_inputs()[expr_port.get_index()]); + loop_end_inputs.push_back(expr_port.get_expr_ptr()->input(expr_port.get_index())); for (const auto& expr_port : loop_exits) - loop_end_inputs.push_back(expr_port.get_expr_ptr()->get_outputs()[expr_port.get_index()]); - loop_end_inputs.push_back(loop_begin_expr->get_outputs()[0]); + loop_end_inputs.push_back(expr_port.get_expr_ptr()->output(expr_port.get_index())); + loop_end_inputs.push_back(loop_begin_expr->output(0)); const auto& loop_end_expr = linear_ir.create_expression(loop_end, loop_end_inputs); linear_ir.insert(loop_end_pos, loop_end_expr); diff --git a/src/common/snippets/src/lowered/pass/insert_buffers.cpp b/src/common/snippets/src/lowered/pass/insert_buffers.cpp index 84768a2ffef79c..cd6bc7c4c72116 100644 --- a/src/common/snippets/src/lowered/pass/insert_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/insert_buffers.cpp @@ -63,7 +63,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt const auto expr = entry_point.get_expr_ptr(); const auto port = entry_point.get_index(); const auto node = expr->get_node(); - const auto input_td = expr->get_inputs()[port]; + const auto input_td = expr->input(port); const auto parent_expr_output = input_td->get_source(); const auto& parent_expr = parent_expr_output.get_expr_ptr(); const auto parent_port = parent_expr_output.get_index(); @@ -109,7 +109,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt // Output td is automatically filled from PortDescriptor const auto buffer_expr = linear_ir.create_expression(buffer, {input_td}); linear_ir.insert(pos, buffer_expr); - linear_ir.replace_input(expr, port, buffer_expr->get_outputs()[0]); + linear_ir.replace_input(expr, port, buffer_expr->output(0)); } } @@ -117,7 +117,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt const auto expr = exit_point.get_expr_ptr(); const auto port = exit_point.get_index(); const auto node = expr->get_node(); - const auto output_td = expr->get_outputs()[port]; + const auto output_td = expr->output(port); const auto child_exprs_inputs = output_td->get_consumers(); const auto current_loops = expr->get_loop_ids(); const auto current_loop_count = current_loops.size(); @@ -163,7 +163,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt // we should remove them to insert one common Buffer on one common port if (!buffers.empty()) { for (const auto& buffer : buffers) { - const auto buffer_out = buffer->get_outputs().front(); + const auto& buffer_out = buffer->output(0); const auto buffer_consumers_inputs = buffer_out->get_consumers(); linear_ir.replace_input(buffer_consumers_inputs, output_td); potential_consumers.insert(potential_consumers.end(), buffer_consumers_inputs.begin(), buffer_consumers_inputs.end()); @@ -193,7 +193,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt // Output td is automatically filled from PortDescriptor const auto buffer_expr = linear_ir.create_expression(buffer, node_outs); linear_ir.insert(pos, buffer_expr); - linear_ir.replace_input(potential_consumers, buffer_expr->get_outputs().front()); + linear_ir.replace_input(potential_consumers, buffer_expr->output(0)); } } } diff --git a/src/common/snippets/src/lowered/pass/insert_load_store.cpp b/src/common/snippets/src/lowered/pass/insert_load_store.cpp index 3fae4c6077530b..130dc2170f0387 100644 --- a/src/common/snippets/src/lowered/pass/insert_load_store.cpp +++ b/src/common/snippets/src/lowered/pass/insert_load_store.cpp @@ -54,7 +54,7 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr const auto& loop_manager = linear_ir.get_loop_manager(); const auto& data_expr = *data_expr_it; const auto& data_node = data_expr->get_node(); - const auto& output_td = data_expr->get_outputs().front(); + const auto& output_td = data_expr->output(0); const auto consumer_inputs = output_td->get_consumers(); bool was_inserted = false; @@ -77,7 +77,7 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr output_td->get_layout())); const auto load_expr = linear_ir.create_expression(load, {output_td}); linear_ir.insert(std::find(data_expr_it, linear_ir.cend(), consumer_expr), load_expr); - linear_ir.replace_input(consumer_expr, port, load_expr->get_outputs()[0]); + linear_ir.replace_input(consumer_expr, port, load_expr->output(0)); // Copy Loop identifies load_expr->set_loop_ids(loop_ids); @@ -94,7 +94,7 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr bool InsertLoadStore::insert_store(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it) { const auto& loop_manager = linear_ir.get_loop_manager(); const auto& data_expr = *data_expr_it; - const auto& input_td = data_expr->get_inputs().front(); + const auto& input_td = data_expr->input(0); const auto parent_output = input_td->get_source(); const auto& parent_expr = parent_output.get_expr_ptr(); const auto port = parent_output.get_index(); @@ -116,7 +116,7 @@ bool InsertLoadStore::insert_store(LinearIR& linear_ir, const LinearIR::constExp const auto& reverse_insertion_pos = std::find(std::reverse_iterator(data_expr_it), linear_ir.crend(), parent_expr); const auto& insertion_pos = reverse_insertion_pos.base(); linear_ir.insert(insertion_pos, store_expr); - linear_ir.replace_input(data_expr, 0, store_expr->get_outputs()[0]); + linear_ir.replace_input(data_expr, 0, store_expr->output(0)); // Copy Loop identifies store_expr->set_loop_ids(loop_ids); diff --git a/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp b/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp index b66e6035051ab9..fffef56513c8ab 100644 --- a/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp +++ b/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp @@ -41,7 +41,7 @@ void InsertTailLoop::tail_transformations(LinearIR& linear_ir, ov::is_type(op))) { for (size_t i = 0; i < op->inputs().size(); ++i) { if (auto fill = insertFill(op->input(i))) { - std::vector inputs{expr_it->get()->get_inputs()[i]}; + std::vector inputs{expr_it->get()->input(i)}; // Note: inputs == outputs, since we want to modify vector reg inplace auto fill_expr = linear_ir.create_expression(fill, inputs, inputs); auto reg = expr_it->get()->get_reg_info().first[i]; @@ -100,13 +100,13 @@ bool InsertTailLoop::run(LinearIR& linear_ir) { return ov::is_type(parent_expr->get_node()); }; auto is_buffer_output = [&linear_ir](const TensorPtr& output) { - const auto& child_exprs_inputs = output->get_consumers(); + const auto child_exprs_inputs = output->get_consumers(); return std::any_of(child_exprs_inputs.begin(), child_exprs_inputs.end(), [](const TensorDescriptor& lp) {return ov::is_type(lp.get_expr_ptr()->get_node());}); }; const auto loop_end_expr = linear_ir.get_expr_by_node(loop_end); - const auto inputs = loop_end_expr->get_inputs(); + const auto inputs = loop_end_expr->inputs(); const auto in_num = loop_end->get_input_num(); const auto out_num = loop_end->get_output_num(); OPENVINO_ASSERT(inputs.size() == (in_num + out_num + 1), diff --git a/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp b/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp index cf66f869fafd0f..743b31a6b04f51 100644 --- a/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp +++ b/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp @@ -22,7 +22,7 @@ bool LoadMoveBroadcastToBroadcastLoad::run(LinearIR& linear_ir) { const auto& op = (*expr_it)->get_node(); // Match on MoveBroadcast because MoveBroadcast is rare node in bodies if (const auto move_broadcast = ov::as_type_ptr(op)) { - const auto interm_td = (*expr_it)->get_inputs().front(); + const auto& interm_td = (*expr_it)->input(0); const auto parent_expr = interm_td->get_source().get_expr_ptr(); const auto load = ov::as_type_ptr(parent_expr->get_node()); if (!load) @@ -41,20 +41,20 @@ bool LoadMoveBroadcastToBroadcastLoad::run(LinearIR& linear_ir) { if (count > 1) continue; - const auto outshape = move_broadcast->get_output_partial_shape(0); + const auto& outshape = move_broadcast->get_output_partial_shape(0); const auto broadcastload = std::make_shared(load->input_value(0), outshape, load->get_offset()); - const auto move_out = (*expr_it)->get_outputs().front(); + const auto& move_out = (*expr_it)->output(0); const auto move_consumers = move_out->get_consumers(); PortManager::set_port_descriptor_ptr(broadcastload->output(0), std::make_shared(move_out->get_tensor(), move_out->get_subtensor(), move_out->get_layout())); - const auto broadcastload_expr = linear_ir.create_expression(broadcastload, { parent_expr->get_inputs().front() }); + const auto broadcastload_expr = linear_ir.create_expression(broadcastload, { parent_expr->input(0) }); const auto mv_expr_it = expr_it; const auto insertion_pos = std::next(expr_it); expr_it = linear_ir.insert(insertion_pos, broadcastload_expr); linear_ir.erase(std::find(linear_ir.begin(), mv_expr_it, parent_expr)); linear_ir.erase(mv_expr_it); - linear_ir.replace_input(move_consumers, broadcastload_expr->get_outputs().front()); + linear_ir.replace_input(move_consumers, broadcastload_expr->output(0)); modified |= true; } } diff --git a/src/common/snippets/src/lowered/pass/mark_loops.cpp b/src/common/snippets/src/lowered/pass/mark_loops.cpp index 3c9ab0b7e9be64..4e4881e565db9e 100644 --- a/src/common/snippets/src/lowered/pass/mark_loops.cpp +++ b/src/common/snippets/src/lowered/pass/mark_loops.cpp @@ -68,10 +68,10 @@ bool MarkLoops::run(LinearIR& linear_ir) { bool is_connected = false; bool is_conflicted = false; for (size_t i = 0; i < prev_expr->get_output_count(); ++i) { - const auto& loop_td = prev_expr->get_outputs()[i]; - const auto& consumers = loop_td->get_consumers(); + const auto& loop_td = prev_expr->output(i); + const auto consumers = loop_td->get_consumers(); const auto found = std::find_if(consumers.begin(), consumers.end(), [&loop_end_pos](const TensorDescriptor& consumer) { - return consumer.get_expr_ptr().get() == loop_end_pos->get(); + return consumer.get_expr_ptr() == *loop_end_pos; }); if (found != consumers.end()) { if (loop_td->is_conflicted_consumer(*found)) { diff --git a/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp b/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp index d2d9b363be3d81..58f844212b6849 100644 --- a/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp +++ b/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp @@ -31,7 +31,7 @@ bool MoveResultOutOfLoop::run(LinearIR& linear_ir) { continue; } - const auto input_td = expr->get_inputs().front(); + const auto& input_td = expr->input(0); const auto parent_expr = input_td->get_source().get_expr_ptr(); const auto parent_loop_ids = parent_expr->get_loop_ids(); int outer_loop_id = static_cast(parent_loop_ids.size()) - 1; diff --git a/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp b/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp index 1410ed9f33545d..9c3f85270a0bdc 100644 --- a/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp +++ b/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp @@ -25,8 +25,8 @@ bool MoveScalarToConsumer::run(LinearIR& linear_ir) { for (auto expr_it = linear_ir.rbegin(); expr_it != linear_ir.rend(); expr_it++) { const auto expr = expr_it->get(); if (ov::is_type(expr->get_node())) { - const auto& output = expr->get_outputs().front(); - const auto& consumers = output->get_consumers(); + const auto& output = expr->output(0); + const auto consumers = output->get_consumers(); OPENVINO_ASSERT(consumers.size() == 1, "Scalar expression is expected to have a single consumer"); const auto& consumer_expr = consumers.begin()->get_expr_ptr(); diff --git a/src/common/snippets/src/lowered/pass/propagate_layout.cpp b/src/common/snippets/src/lowered/pass/propagate_layout.cpp index 2986230ae844c5..cfa37e9c52b687 100644 --- a/src/common/snippets/src/lowered/pass/propagate_layout.cpp +++ b/src/common/snippets/src/lowered/pass/propagate_layout.cpp @@ -26,14 +26,14 @@ bool PropagateLayout::run(LinearIR& linear_ir) { continue; const bool is_input = io_expr->get_type() == IOExpression::io_type::INPUT; - const auto& tds = is_input ? expr->get_outputs() : expr->get_inputs(); + const auto& tds = is_input ? expr->outputs() : expr->inputs(); if (tds.size() != 1) OPENVINO_THROW("Parameter/Results should have exactly one output/input"); // If input - we should be looking downstream, if output - upstream const auto& target_td = tds.front(); if (is_input) { - const auto& consumer_inputs = target_td->get_consumers(); + const auto consumer_inputs = target_td->get_consumers(); // Note that here we consider only the first child (which is usually load), // but often there is another child - LoopEnd std::set> child_layouts; @@ -47,9 +47,9 @@ bool PropagateLayout::run(LinearIR& linear_ir) { } } OPENVINO_ASSERT(child_layouts.size() == 1, "All children of an input expression must have the same layout"); - target_td->get_source().set_layout(*child_layouts.begin()); + target_td->set_layout(*child_layouts.begin()); } else { - const auto& consumer_inputs = target_td->get_consumers(); + const auto consumer_inputs = target_td->get_consumers(); // Note that here we consider only the first child (which is usually Store), // but often there is another child - LoopEnd TensorDescriptor result_td; @@ -58,7 +58,7 @@ bool PropagateLayout::run(LinearIR& linear_ir) { if (ov::is_type(child->get_node())) { continue; } - if (child.get() == io_expr.get()) { + if (child == io_expr) { result_td = child_input; continue; } diff --git a/src/common/snippets/src/lowered/pass/reset_buffers.cpp b/src/common/snippets/src/lowered/pass/reset_buffers.cpp index f0957e8bb7499f..977ade95bad9a7 100644 --- a/src/common/snippets/src/lowered/pass/reset_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/reset_buffers.cpp @@ -18,7 +18,7 @@ bool ResetBuffers::reuse_buffer_increments(const LinearIR& linear_ir, const Expr if (!loop_end) return false; - const auto loop_tds = loop_end_expr->get_inputs(); + const auto loop_tds = loop_end_expr->inputs(); const auto input_count = loop_end->get_input_num(); const auto output_count = loop_end->get_output_num(); diff --git a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp index f06003dba42619..601911c4b95bdb 100644 --- a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp +++ b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp @@ -35,8 +35,8 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { const auto& pm = matcher->get_pattern_map(); const auto softmax = pm.at(match_softmax); const auto softmax_expr = *expr_it; - const auto input_td = softmax_expr->get_inputs().front(); - const auto output_td = softmax_expr->get_outputs().front(); + const auto& input_td = softmax_expr->input(0); + const auto& output_td = softmax_expr->output(0); const auto tensor_out = output_td->get_tensor(); const auto subtensor_in = input_td->get_subtensor(); const auto inner_work_amount = *(tensor_out.rbegin()); @@ -100,7 +100,7 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { // Transfer original TensorDescriptors linear_ir.replace_input(*max.first, 0, input_td); linear_ir.replace_input(*sub.first, 0, input_td); - linear_ir.replace_input(output_td->get_consumers(), (*mul.first)->get_outputs().front()); + linear_ir.replace_input(output_td->get_consumers(), (*mul.first)->output(0)); // Markup of Mul Loop loop_manager->mark_loop(mul.first, expr_it, 1, inner_work_amount, m_vector_size, diff --git a/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp b/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp index 41335b74e7be70..60355de49c8aff 100644 --- a/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp +++ b/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp @@ -23,8 +23,8 @@ bool SetScalarCountForLoadStore::run(LinearIR& linear_ir) { const auto load = ov::as_type_ptr(op); const auto store = ov::as_type_ptr(op); if (load || store) { - const auto td = load ? (*expr_it)->get_inputs().front() : - (*expr_it)->get_outputs().front(); + const auto& td = load ? (*expr_it)->input(0) + : (*expr_it)->output(0); const auto& layout = td->get_layout(); const auto& tensor_shape = td->get_tensor(); // Find last dimension by layout diff --git a/src/common/snippets/src/lowered/tensor.cpp b/src/common/snippets/src/lowered/tensor.cpp index 3603fd3a1c337e..1a0f3525690b69 100644 --- a/src/common/snippets/src/lowered/tensor.cpp +++ b/src/common/snippets/src/lowered/tensor.cpp @@ -45,7 +45,7 @@ std::vector::const_iterator Tensor::find_consumer(const Tensor // Note: Find by shared ptr and index port is enough since these parameters must be unique return std::find_if(m_consumer_ports.begin(), m_consumer_ports.end(), [&consumer](const TensorDescriptor& td) { - return consumer.get_expr_ptr().get() == td.get_expr_ptr().get() && consumer.get_index() == td.get_index(); + return consumer.get_expr_ptr() == td.get_expr_ptr() && consumer.get_index() == td.get_index(); }); } @@ -53,7 +53,7 @@ std::vector::iterator Tensor::find_consumer(const TensorDescri // Note: Find by shared ptr and index port is enough since these parameters must be unique return std::find_if(m_consumer_ports.begin(), m_consumer_ports.end(), [&consumer](const TensorDescriptor& td) { - return consumer.get_expr_ptr().get() == td.get_expr_ptr().get() && consumer.get_index() == td.get_index(); + return consumer.get_expr_ptr() == td.get_expr_ptr() && consumer.get_index() == td.get_index(); }); } @@ -83,6 +83,7 @@ std::vector Tensor::get_conflicted_consumers() const { } bool Tensor::is_conflicted_consumer(const TensorDescriptor& consumer) const { + OPENVINO_ASSERT(found_consumer(consumer), "Failed check for conflicted consumer: it's not a consumer fot the Tensor"); return get_tensor() != consumer.get_tensor() || get_layout() != consumer.get_layout() || get_subtensor() != consumer.get_subtensor(); @@ -92,7 +93,7 @@ bool operator==(const TensorDescriptor& lhs, const TensorDescriptor& rhs) { if (&rhs == &lhs) return true; return lhs.m_type == rhs.m_type && - lhs.m_expr.lock().get() == rhs.m_expr.lock().get() && + lhs.m_expr.lock() == rhs.m_expr.lock() && lhs.m_port_index == rhs.m_port_index && lhs.m_port_desc == rhs.m_port_desc; } @@ -102,7 +103,7 @@ bool operator!=(const TensorDescriptor& lhs, const TensorDescriptor& rhs) { bool operator<(const TensorDescriptor& lhs, const TensorDescriptor& rhs) { OPENVINO_ASSERT(lhs.get_type() == rhs.get_type(), "TensorDescriptors must be of the same type for comparison!"); return lhs.get_index() < rhs.get_index() && - lhs.get_expr_ptr().get() < rhs.get_expr_ptr().get() && + lhs.get_expr_ptr() < rhs.get_expr_ptr() && lhs.get_tensor() < rhs.get_tensor() && lhs.get_layout() < rhs.get_layout() && lhs.get_subtensor() < rhs.get_subtensor(); diff --git a/src/common/snippets/src/op/brgemm.cpp b/src/common/snippets/src/op/brgemm.cpp index 4d20bd0ab238f0..5e627e63f62251 100644 --- a/src/common/snippets/src/op/brgemm.cpp +++ b/src/common/snippets/src/op/brgemm.cpp @@ -21,11 +21,28 @@ Brgemm::Brgemm(const Output& A, const Output& B, constructor_validate_and_infer_types(); } -void Brgemm::validate_and_infer_types() { - INTERNAL_OP_SCOPE(Brgemm_validate_and_infer_types); +void Brgemm::constructor_validate_and_infer_types() { + INTERNAL_OP_SCOPE(BrgemmCPU_constructor_validate_and_infer_types); + validate_inputs(); + + // During ctor call, Brgemm doesn't know his port descriptors. + // So we use port descs from source inputs + const auto planar_input_shapes = + std::vector{ ngraph::snippets::utils::get_port_planar_shape(input_value(0)), + ngraph::snippets::utils::get_port_planar_shape(input_value(1)) }; + auto output_shape = get_output_partial_shape(planar_input_shapes); + set_output_type(0, get_output_type(), get_planar_output_shape(output_shape)); +} + +void Brgemm::validate_inputs() const { // If no leading dimensions are provided, assume dense row-major inputs-outputs NODE_VALIDATION_CHECK(this, get_input_partial_shape(0).is_static() && get_input_partial_shape(1).is_static(), "Brgemm currently supports only static shapes."); +} + +void Brgemm::validate_and_infer_types() { + INTERNAL_OP_SCOPE(Brgemm_validate_and_infer_types); + validate_inputs(); const auto planar_input_shapes = get_planar_input_shapes(inputs()); auto output_shape = get_output_partial_shape(planar_input_shapes); diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp index 917d595bd98e79..c180c9ace65d71 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp @@ -125,14 +125,14 @@ KernelEmitter::KernelEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl: element::Type etype; switch (expr->get_type()) { case ngraph::snippets::lowered::IOExpression::io_type::INPUT: { - td = expr->get_outputs()[0]; + td = expr->outputs()[0]; etype = expr->get_node()->get_output_element_type(0); num_inputs++; break; } case ngraph::snippets::lowered::IOExpression::io_type::OUTPUT: { num_outputs++; - td = expr->get_inputs()[0]; + td = expr->inputs()[0]; etype = expr->get_node()->get_input_element_type(0); break; } default : { diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp index aa16bf4b99622a..096075106787d3 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp @@ -21,7 +21,7 @@ intel_cpu::BrgemmCopyB::BrgemmCopyB(const Output& x, const element::Type s if (is_with_compensations()) { set_output_port_descriptor({0, offset_out1}, 1); } - ctor_validate_and_infer_types(); + constructor_validate_and_infer_types(); } bool intel_cpu::BrgemmCopyB::visit_attributes(AttributeVisitor& visitor) { @@ -31,7 +31,7 @@ bool intel_cpu::BrgemmCopyB::visit_attributes(AttributeVisitor& visitor) { return true; } -void intel_cpu::BrgemmCopyB::ctor_validate_and_infer_types() { +void intel_cpu::BrgemmCopyB::constructor_validate_and_infer_types() { INTERNAL_OP_SCOPE(BrgemmRepack_ctor_validate_and_infer_types); // During ctor call, BrgemmCopyB doesn't know his port descriptors. // So we use port descs from source inputs diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.hpp index 73e46e60c41aa3..eefe39d5b4c70d 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.hpp @@ -43,7 +43,7 @@ class BrgemmCopyB : public ngraph::snippets::op::MemoryAccess { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; private: - void ctor_validate_and_infer_types(); + void constructor_validate_and_infer_types(); void validate(const ov::PartialShape& pshape, const ov::element::Type& element_type); Type m_type = Type::OnlyRepacking; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp index bc5e8bc9256acb..be1ba4c460fb8f 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp @@ -22,7 +22,7 @@ BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Type ty set_input_port_descriptor({0, offset_a}, 0); set_input_port_descriptor({0, offset_b}, 1); set_output_port_descriptor({0, offset_c}, 0); - ctor_validate_and_infer_types(); + constructor_validate_and_infer_types(); } BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Output& scratch, const Type type, @@ -35,11 +35,11 @@ BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Output< set_input_port_descriptor({0, offset_b}, 1); set_output_port_descriptor({0, offset_c}, 0); set_input_port_descriptor({0, offset_scratch}, 2); - ctor_validate_and_infer_types(); + constructor_validate_and_infer_types(); } -void BrgemmCPU::ctor_validate_and_infer_types() { - INTERNAL_OP_SCOPE(BrgemmCPU_ctor_validate_and_infer_types); +void BrgemmCPU::constructor_validate_and_infer_types() { + INTERNAL_OP_SCOPE(BrgemmCPU_constructor_validate_and_infer_types); validate_inputs(); // During ctor call, BrgemmCPU doesn't know his port descriptors. diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp index e93d7d70452fe6..5b1fb688f7dda5 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp @@ -50,7 +50,7 @@ class BrgemmCPU : public ngraph::snippets::op::Brgemm { constexpr static size_t SCRATCH_BYTE_SIZE = 32 * 1024; private: - void ctor_validate_and_infer_types(); + void constructor_validate_and_infer_types(); void validate_with_scratchpad(const ov::Shape& shape_b) const; void validate_inputs() const; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp index bbe7c85995ed22..2a9555bfbf7a6b 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp @@ -15,8 +15,8 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_load_convert(ngraph::snippe ngraph::snippets::lowered::LinearIR::constExprIt& convert_it) { const auto& convert_expr = *convert_it; const auto& convert = ov::as_type_ptr(convert_expr->get_node()); - const auto input_td = convert_expr->get_inputs().front(); - const auto output_td = convert_expr->get_outputs().front(); + const auto input_td = convert_expr->inputs().front(); + const auto output_td = convert_expr->outputs().front(); if (convert->get_destination_type() != ov::element::f32 && convert->get_destination_type() != ov::element::i32) return false; @@ -45,21 +45,19 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_load_convert(ngraph::snippe OPENVINO_THROW("Type of Convert op is undefined. Supports only fusing Load and ConvertTruncation or ConvertSaturation ops"); } - const auto convert_out = convert_expr->get_outputs().front(); + const auto convert_out = convert_expr->outputs().front(); const auto convert_consumers = convert_out->get_consumers(); ngraph::snippets::PortManager::set_port_descriptor_ptr(load_convert->output(0), std::make_shared(convert_out->get_tensor(), convert_out->get_subtensor(), convert_out->get_layout())); - const auto load_convert_expr = linear_ir.create_expression(load_convert, { load_expr->get_inputs().front() }); + const auto load_convert_expr = linear_ir.create_expression(load_convert, { load_expr->inputs().front() }); const auto convert_expr_it = convert_it; const auto insertion_pos = std::next(convert_it); convert_it = linear_ir.insert(insertion_pos, load_convert_expr); linear_ir.erase(std::find(linear_ir.cbegin(), convert_expr_it, load_expr)); linear_ir.erase(convert_expr_it); - for (const auto& consumer : convert_consumers) { - linear_ir.replace_input(consumer, load_convert_expr->get_outputs().front()); - } + linear_ir.replace_input(convert_consumers, load_convert_expr->output(0)); return true; } @@ -67,8 +65,8 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert(ngraph::snipp ngraph::snippets::lowered::LinearIR::constExprIt& convert_it) { const auto& convert_expr = *convert_it; const auto& convert = convert_expr->get_node(); - const auto input_td = convert_expr->get_inputs().front(); - const auto output_td = convert_expr->get_outputs().front(); + const auto input_td = convert_expr->inputs().front(); + const auto output_td = convert_expr->outputs().front(); if (convert->get_input_element_type(0) != ov::element::f32 && convert->get_input_element_type(0) != ov::element::i32) return false; @@ -95,7 +93,7 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert(ngraph::snipp OPENVINO_THROW("Type of Convert op is undefined. Supports only fusing Store and ConvertTruncation or ConvertSaturation ops"); } - const auto store_out = store_expr->get_outputs().front(); + const auto store_out = store_expr->outputs().front(); const auto store_consumers = store_out->get_consumers(); ngraph::snippets::PortManager::set_port_descriptor_ptr(store_convert->output(0), std::make_shared(store_out->get_tensor(), @@ -107,9 +105,7 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert(ngraph::snipp convert_it = linear_ir.insert(insertion_pos, store_convert_expr); linear_ir.erase(std::find(convert_expr_it, linear_ir.cend(), store_expr)); linear_ir.erase(convert_expr_it); - for (const auto& consumer : store_consumers) { - linear_ir.replace_input(consumer, store_convert_expr->get_outputs().front()); - } + linear_ir.replace_input(store_consumers, store_convert_expr->output(0)); return true; } diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp index 0cf9163ddff77d..93eec1b1418069 100644 --- a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp +++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp @@ -78,24 +78,35 @@ std::shared_ptr Transpose0213MatMulLoweredFunction::initLowered() con std::make_shared(precisions[1], input_shapes[1])}; std::vector layout{0, 2, 1, 3}; // Note: validity of transpose_position values is checked in Transpose0213MatMulSinhFunction constructor - /* if (transpose_position <= 1) { - const auto& anchor = data[transpose_position]; + if (transpose_position < 2) { + const auto& anchor = data[transpose_position]->output(0); const auto& td = ngraph::snippets::PortManager::get_port_descriptor_ptr(anchor); const auto& tensor = td->get_tensor(); const auto& subtensor = td->get_subtensor(); ngraph::snippets::PortManager::set_port_descriptor_ptr(anchor, - std::make_shared(tensor, subtensor, layout)); - }*/ + std::make_shared(tensor, subtensor, layout)); + } auto matmul = std::make_shared(data[0], data[1]); - /* if (transpose_position == 2) { + auto result = std::make_shared(matmul); + if (transpose_position == 2) { const auto& anchor = matmul->output(0); - const auto& td = ngraph::snippets::get_tensor_descriptor_ptr(anchor); + const auto& td = ngraph::snippets::PortManager::get_port_descriptor_ptr(anchor); + const auto& tensor = td->get_tensor(); + const auto& subtensor = td->get_subtensor(); + ngraph::snippets::PortManager::set_port_descriptor_ptr(anchor, + std::make_shared(tensor, subtensor, layout)); + ngraph::snippets::PortManager::set_port_descriptor_ptr(result->input(0), + std::make_shared(tensor, subtensor, layout)); + } + if (transpose_position < 2) { + const auto& anchor = data[transpose_position]->output(0); + const auto& td = ngraph::snippets::PortManager::get_port_descriptor_ptr(anchor); const auto& tensor = td->get_tensor(); const auto& subtensor = td->get_subtensor(); - ngraph::snippets::set_tensor_descriptor_ptr(anchor, - std::make_shared(tensor, subtensor, layout)); - matmul->validate_and_infer_types(); - }*/ + ngraph::snippets::PortManager::set_port_descriptor_ptr(matmul->input(transpose_position), + std::make_shared(tensor, subtensor, layout)); + } + matmul->validate_and_infer_types(); return std::make_shared(NodeVector{matmul}, data); } From 88c403fc82339a98f5368f258cf9d05f19ae84a5 Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Thu, 4 May 2023 16:23:55 +0400 Subject: [PATCH 03/13] Added Softmax support via common pipeline --- .../snippets/pass/schedule_softmax.hpp | 26 ++++++ .../include/snippets/port_descriptor.hpp | 6 ++ .../snippets/src/lowered/expression.cpp | 6 +- .../snippets/src/lowered/loop_manager.cpp | 85 +++++++------------ .../snippets/src/lowered/pass/mark_loops.cpp | 6 +- .../lowered/pass/softmax_decomposition.cpp | 34 ++++++-- src/common/snippets/src/op/subgraph.cpp | 2 + .../snippets/src/pass/schedule_softmax.cpp | 59 +++++++++++++ src/common/snippets/src/port_descriptor.cpp | 2 + 9 files changed, 155 insertions(+), 71 deletions(-) create mode 100644 src/common/snippets/include/snippets/pass/schedule_softmax.hpp create mode 100644 src/common/snippets/src/pass/schedule_softmax.cpp diff --git a/src/common/snippets/include/snippets/pass/schedule_softmax.hpp b/src/common/snippets/include/snippets/pass/schedule_softmax.hpp new file mode 100644 index 00000000000000..b4ec4f487708eb --- /dev/null +++ b/src/common/snippets/include/snippets/pass/schedule_softmax.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace ngraph { +namespace snippets { +namespace pass { + +/** + * @interface ScheduleSoftmax + * @brief The pass updates port descriptors for Softmax to show by which axes there is reducing + * @ingroup snippets + */ +class ScheduleSoftmax: public ngraph::pass::MatcherPass { +public: + ScheduleSoftmax(); +}; + +} // namespace pass +} // namespace snippets +} // namespace ngraph diff --git a/src/common/snippets/include/snippets/port_descriptor.hpp b/src/common/snippets/include/snippets/port_descriptor.hpp index 976cb7faac2be5..f9802d113ce10c 100644 --- a/src/common/snippets/include/snippets/port_descriptor.hpp +++ b/src/common/snippets/include/snippets/port_descriptor.hpp @@ -13,6 +13,12 @@ namespace snippets { class PortDescriptor { public: + // The structure with service values for scheduling parameters + struct Scheduling { + // The value for the subtensor that means that scheduling should be by full dimension + static size_t FULL_DIM; + }; + explicit PortDescriptor(const ov::Input& node, std::vector subtensor_shape = {}, std::vector layout = {}); diff --git a/src/common/snippets/src/lowered/expression.cpp b/src/common/snippets/src/lowered/expression.cpp index e161be2290c366..63c94de00b9db9 100644 --- a/src/common/snippets/src/lowered/expression.cpp +++ b/src/common/snippets/src/lowered/expression.cpp @@ -53,8 +53,10 @@ void Expression::replace_output(size_t port, TensorPtr to) { } void Expression::set_loop_id(size_t id, size_t idx) { - OPENVINO_ASSERT((std::find(m_loop_ids.begin(), m_loop_ids.end(), id) == m_loop_ids.end()), - "Expression cannot have several the same Loops"); + if (id != LOOP_NULL_ID) { + OPENVINO_ASSERT((std::find(m_loop_ids.begin(), m_loop_ids.end(), id) == m_loop_ids.end()), + "Expression cannot have several the same Loops"); + } if (m_loop_ids.size() <= idx) { m_loop_ids.resize(idx + 1, LOOP_NULL_ID); } diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp index 4e7d271a66e9ab..469c42f11b54d1 100644 --- a/src/common/snippets/src/lowered/loop_manager.cpp +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -123,77 +123,51 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, std::vector loop_entry_points, loop_exit_points; LoopManager::get_io_loop_ports(loop_begin_pos, loop_end_pos, loop_entry_points, loop_exit_points); - auto broadcast = [](std::vector &lhs, const std::vector &rhs) -> void { + auto broadcast = [](std::vector &lhs, const std::vector &rhs, size_t index) -> void { if (rhs == lhs) return; const auto lhs_size = lhs.size(); const auto rhs_size = rhs.size(); const auto size = std::max(lhs_size, rhs_size); lhs.resize(size, 1); - for (size_t i = 0; i < size; ++i) { - const auto lhs_value = i < lhs_size ? *(lhs.crbegin() + i) : 1; - const auto rhs_value = i < rhs_size ? *(rhs.crbegin() + i) : 1; - OPENVINO_ASSERT(lhs_value == rhs_value || lhs_value == 1 || rhs_value == 1, - "Output shapes of Loop must be broadcastable!"); - *(lhs.rbegin() + i) = std::max(lhs_value, rhs_value); - } - }; - - auto found_port = [](const std::vector& ports, const TensorDescriptor& target) { - return std::find_if(ports.begin(), ports.end(), [&target](const TensorDescriptor& port) { - return port.get_expr_ptr() == target.get_expr_ptr() && - port.get_index() == target.get_index() && - port.get_type() == target.get_type(); - }) != ports.end(); + OPENVINO_ASSERT(index < size, "Incorrect index for broadcasting"); + const auto lhs_value = index < lhs_size ? *(lhs.crbegin() + index) : 1; + const auto rhs_value = index < rhs_size ? *(rhs.crbegin() + index) : 1; + OPENVINO_ASSERT(lhs_value == rhs_value || lhs_value == 1 || rhs_value == 1, + "Output shapes of Loop must be broadcastable!"); + *(lhs.rbegin() + index) = std::max(lhs_value, rhs_value); }; std::vector loop_subtensor; - std::vector loop_layout; - std::vector loop_tensor(1, 1); // Scalar + std::vector loop_tensor(loop_depth, 1); for (const auto& exit_point : loop_exit_points) { - const auto out_tensor = utils::get_reordered_shape(exit_point.get_tensor(), exit_point.get_layout()); - broadcast(loop_tensor, out_tensor); - - // SubTensor and Layout inside Loops must be the same. - // We have to verify that input of exit point isn't entry point or Constant to check for subtensor and layout because of - // then this input is not inside Loop - const auto& expr = exit_point.get_expr_ptr(); - for (size_t i = 0; i < expr->get_input_count(); ++i) { - const auto port = expr->input_port(i); - const auto parent = expr->input(port.get_index())->get_source().get_expr_ptr()->get_node(); - if (!found_port(loop_entry_points, port) && !ov::is_type(parent)) { - if (loop_subtensor.empty()) - loop_subtensor = port.get_subtensor(); - if (loop_layout.empty()) - loop_layout = port.get_layout(); - OPENVINO_ASSERT(loop_subtensor == port.get_subtensor(), "SubTensor inside Loop must be the same"); - OPENVINO_ASSERT(loop_layout == port.get_layout(), "Layout inside Loop must be the same"); - } + const auto tensor = utils::get_reordered_shape(exit_point.get_tensor(), exit_point.get_layout()); + auto subtensor = exit_point.get_subtensor(); + if (subtensor.empty()) { + subtensor.resize(loop_depth, 1); + subtensor[subtensor.size() - 1] = vector_size; } - } - - for (const auto& entry_point : loop_entry_points) { - const auto in_tensor = utils::get_reordered_shape(entry_point.get_tensor(), entry_point.get_layout()); - broadcast(loop_tensor, in_tensor); - - // SubTensor and Layout inside Loops must be the same. - // We have to verify that output of entry point isn't exit point to check for subtensor and layout because of - // then this output is not inside Loop - const auto& expr = entry_point.get_expr_ptr(); - for (size_t i = 0; i < expr->get_output_count(); ++i) { - const auto port = expr->output_port(i); - if (!found_port(loop_exit_points, port)) { - if (loop_subtensor.empty()) - loop_subtensor = port.get_subtensor(); - if (loop_layout.empty()) - loop_layout = port.get_layout(); - OPENVINO_ASSERT(loop_subtensor == port.get_subtensor(), "SubTensor inside Loop must be the same"); - OPENVINO_ASSERT(loop_layout == port.get_layout(), "Layout inside Loop must be the same"); + while (subtensor.size() < loop_depth) + subtensor.insert(subtensor.begin(), 1); + if (loop_subtensor.empty()) + loop_subtensor = subtensor; + OPENVINO_ASSERT(loop_subtensor == subtensor, "Incorrect scheduling parameters for loop"); + + for (size_t dim_idx = 0; dim_idx < loop_depth; ++dim_idx) { + if (*(subtensor.rbegin() + dim_idx) == PortDescriptor::Scheduling::FULL_DIM) { + *(loop_tensor.rbegin() + dim_idx) = PortDescriptor::Scheduling::FULL_DIM; + } else { + broadcast(loop_tensor, tensor, dim_idx); } } } for (size_t dim_idx = 0; dim_idx < loop_depth; ++dim_idx) { + if (*(loop_subtensor.rbegin() + dim_idx) == PortDescriptor::Scheduling::FULL_DIM) { + exprs_marking(loop_begin_pos, loop_end_pos, Expression::LOOP_NULL_ID, loop_depth - dim_idx - 1); + continue; + } + OPENVINO_ASSERT(dim_idx < loop_tensor.size(), "Incorrect indexes of Loop for markup"); const auto work_amount = loop_tensor.size() > dim_idx ? *(loop_tensor.rbegin() + dim_idx) @@ -201,7 +175,6 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, const auto work_amount_increment = loop_subtensor.size() > dim_idx ? *(loop_subtensor.rbegin() + dim_idx) : (dim_idx == 0 ? vector_size : 1); - mark_loop(loop_begin_pos, loop_end_pos, loop_depth - dim_idx - 1, work_amount, work_amount_increment, loop_entry_points, loop_exit_points); } diff --git a/src/common/snippets/src/lowered/pass/mark_loops.cpp b/src/common/snippets/src/lowered/pass/mark_loops.cpp index 4e4881e565db9e..f35352ce20e900 100644 --- a/src/common/snippets/src/lowered/pass/mark_loops.cpp +++ b/src/common/snippets/src/lowered/pass/mark_loops.cpp @@ -29,8 +29,7 @@ bool MarkLoops::run(LinearIR& linear_ir) { auto is_not_start_point = [](const std::shared_ptr& node) { return ov::is_type(node) || ov::is_type(node) || - ov::is_type(node) || - ov::is_type(node); // Softmax is decomposed operation. The marking is in decomposition pass + ov::is_type(node); }; for (auto expr_it = linear_ir.cbegin(); expr_it != linear_ir.cend(); expr_it++) { @@ -57,8 +56,7 @@ bool MarkLoops::run(LinearIR& linear_ir) { // If iterator is the last, we should finish Loop const auto& current_expr = *loop_end_pos; const auto& current_node = current_expr->get_node(); - if (ov::is_type(current_node) || // Softmax is marked in decomposition - ov::is_type(current_node) || + if (ov::is_type(current_node) || ov::is_type(current_node)) break; diff --git a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp index 601911c4b95bdb..057b9119b0a8f0 100644 --- a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp +++ b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp @@ -35,20 +35,21 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { const auto& pm = matcher->get_pattern_map(); const auto softmax = pm.at(match_softmax); const auto softmax_expr = *expr_it; + const auto softmax_loop_ids = softmax_expr->get_loop_ids(); const auto& input_td = softmax_expr->input(0); const auto& output_td = softmax_expr->output(0); const auto tensor_out = output_td->get_tensor(); - const auto subtensor_in = input_td->get_subtensor(); const auto inner_work_amount = *(tensor_out.rbegin()); - const auto outer_work_amount = *(tensor_out.rbegin() + 1); expr_it = linear_ir.erase(expr_it); // Remove Softmax std::vector outer_exprs; // We need an iterator to the inserted element - auto push_node = [&linear_ir, &expr_it](const std::shared_ptr& n) { - return std::make_pair(linear_ir.insert(expr_it, n), n); + auto push_node = [&linear_ir, &expr_it, &softmax_loop_ids](const std::shared_ptr& n) { + const auto expr = linear_ir.insert(expr_it, n); + (*expr)->set_loop_ids(softmax_loop_ids); + return std::make_pair(expr, n); }; // Note: VectorBuffer is a special case, since it should go before the initial Load. So we handle it separately @@ -113,11 +114,26 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { expr->set_loop_id(Expression::LOOP_NULL_ID, 1); } - // Outer Loop - loop_manager->mark_loop(vector_buffer_max.first, expr_it, 0, outer_work_amount, 1, - std::vector{(*max.first)->input_port(0), - (*sub.first)->input_port(0)}, - std::vector{(*mul.first)->output_port(0)}); + auto update_loop_bounds = [&softmax_expr](std::vector& points, + const std::vector& new_points, + const LinearIR::LoopManager::LoopInfoPtr& loop_info) { + auto entry_found = std::find_if(points.begin(), points.end(), [&softmax_expr](const TensorDescriptor& desc) { + return desc.get_expr_ptr() == softmax_expr; + }); + if (entry_found != points.end()) { + entry_found = points.erase(entry_found); + points.insert(entry_found, new_points.begin(), new_points.end()); + } + }; + + // Update Loop info for outer loops + for (auto loop_id : softmax_loop_ids) { + if (loop_id == Expression::LOOP_NULL_ID) + continue; + const auto loop_info = loop_manager->get_loop_info(loop_id); + update_loop_bounds(loop_info->entry_exprs, std::vector{(*max.first)->input_port(0), (*sub.first)->input_port(0)}, loop_info); + update_loop_bounds(loop_info->exit_exprs, std::vector{(*mul.first)->output_port(0)}, loop_info); + } /* =========================================== */ diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index e1feda5b13cf3d..d0b1821c6b0b05 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -16,6 +16,7 @@ #include "snippets/pass/transform_convert.hpp" #include "snippets/pass/matmul_to_brgemm.hpp" #include "snippets/pass/fuse_transpose_brgemm.hpp" +#include "snippets/pass/schedule_softmax.hpp" #include "snippets/utils.hpp" #include "snippets/port_descriptor.hpp" @@ -462,6 +463,7 @@ void snippets::op::Subgraph::convert_to_snippet_dialect() { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); } manager.register_pass(); manager.register_pass(); diff --git a/src/common/snippets/src/pass/schedule_softmax.cpp b/src/common/snippets/src/pass/schedule_softmax.cpp new file mode 100644 index 00000000000000..1e4d4ac50d38f0 --- /dev/null +++ b/src/common/snippets/src/pass/schedule_softmax.cpp @@ -0,0 +1,59 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "snippets/pass/schedule_softmax.hpp" + +#include "snippets/port_descriptor.hpp" + +#include "ngraph/op/softmax.hpp" +#include "ngraph/pattern/op/wrap_type.hpp" +#include "ngraph/pattern/op/or.hpp" +#include "ngraph/validation_util.hpp" + +using namespace ngraph; + +ngraph::snippets::pass::ScheduleSoftmax::ScheduleSoftmax() { + MATCHER_SCOPE(ScheduleSoftmax); + + auto m_softmax_v1 = ngraph::pattern::wrap_type(); + auto m_softmax_v8 = ngraph::pattern::wrap_type(); + auto m_softmax = std::make_shared(OutputVector{m_softmax_v1, m_softmax_v8}); + + auto callback = [](ngraph::pattern::Matcher &m) { + OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::ScheduleSoftmax") + auto root = m.get_match_root(); + + const auto& pshape = root->get_input_partial_shape(0); + if (pshape.is_dynamic()) + return false; + + const auto shape = pshape.get_shape(); + const auto rank = shape.size(); + + int64_t axis; + if (const auto softmax_v8 = ngraph::as_type_ptr(root)) { + OPENVINO_SUPPRESS_DEPRECATED_START + axis = ngraph::normalize_axis(root->get_friendly_name(), softmax_v8->get_axis(), rank); + OPENVINO_SUPPRESS_DEPRECATED_END + } else if (const auto softmax_v1 = ngraph::as_type_ptr(root)) { + axis = softmax_v1->get_axis(); + } else { + return false; + } + + OPENVINO_ASSERT(axis < static_cast(rank), "Softmax has incorrect axis"); + std::vector subtensor(rank, 1); + for (size_t i = axis; i < rank; ++i) + subtensor[i] = PortDescriptor::Scheduling::FULL_DIM; + + PortManager::set_port_descriptor_ptr(root->input(0), std::make_shared(root->input(0), subtensor)); + PortManager::set_port_descriptor_ptr(root->output(0), std::make_shared(root->output(0), subtensor)); + + return true; + }; + + register_matcher(std::make_shared(m_softmax, matcher_name), callback); +} diff --git a/src/common/snippets/src/port_descriptor.cpp b/src/common/snippets/src/port_descriptor.cpp index a8398dceb9c657..241f26243ff683 100644 --- a/src/common/snippets/src/port_descriptor.cpp +++ b/src/common/snippets/src/port_descriptor.cpp @@ -9,6 +9,8 @@ namespace ngraph { namespace snippets { +size_t PortDescriptor::Scheduling::FULL_DIM = SIZE_MAX; + PortDescriptor::PortDescriptor(const ov::Input& in, std::vector subtensor_shape, std::vector layout) : PortDescriptor(ov::Input(in.get_node(), in.get_index()), std::move(subtensor_shape), std::move(layout)) {} PortDescriptor::PortDescriptor(const ov::Input& in, std::vector subtensor_shape, std::vector layout) From c2708cedbababcc66c14f5cdbf114ab913e2a75a Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Fri, 5 May 2023 11:10:39 +0400 Subject: [PATCH 04/13] Added Brgemm marking via general pipeline --- .../include/snippets/lowered/expression.hpp | 2 - .../include/snippets/lowered/loop_manager.hpp | 3 - .../snippets/include/snippets/utils.hpp | 3 - .../snippets/src/lowered/expression.cpp | 2 +- .../snippets/src/lowered/loop_manager.cpp | 26 ++++---- .../snippets/src/lowered/pass/mark_loops.cpp | 19 ++---- .../src/pass/fuse_transpose_brgemm.cpp | 16 ++--- .../snippets/src/pass/matmul_to_brgemm.cpp | 4 +- src/common/snippets/src/utils.cpp | 13 ---- .../x64/pass/brgemm_to_brgemm_cpu.cpp | 64 ++++++++++--------- 10 files changed, 59 insertions(+), 93 deletions(-) diff --git a/src/common/snippets/include/snippets/lowered/expression.hpp b/src/common/snippets/include/snippets/lowered/expression.hpp index 15a6169c5d2eec..ac0aa317f5f6b6 100644 --- a/src/common/snippets/include/snippets/lowered/expression.hpp +++ b/src/common/snippets/include/snippets/lowered/expression.hpp @@ -44,7 +44,6 @@ class Expression : public std::enable_shared_from_this { void set_loop_ids(const std::vector& loops) { m_loop_ids = loops; } void set_loop_id(size_t id, size_t idx); void remove_loop_id(size_t id); - bool is_outside_loop() const { return m_is_outside_loop; } void init_emitter(const std::shared_ptr& target); @@ -69,7 +68,6 @@ class Expression : public std::enable_shared_from_this { RegInfo m_reg_info{{}, {}}; // The order Loops identifies: Outer ---> Inner std::vector m_loop_ids; - bool m_is_outside_loop = false; }; using ExpressionPtr = std::shared_ptr; diff --git a/src/common/snippets/include/snippets/lowered/loop_manager.hpp b/src/common/snippets/include/snippets/lowered/loop_manager.hpp index 4606e769c0998a..79c29ce856fa87 100644 --- a/src/common/snippets/include/snippets/lowered/loop_manager.hpp +++ b/src/common/snippets/include/snippets/lowered/loop_manager.hpp @@ -43,9 +43,6 @@ class LinearIR::LoopManager { size_t get_loop_count() const { return m_map.size(); } const std::map& get_map() const; - static void skipped_mark(LinearIR::constExprIt loop_begin_pos, - LinearIR::constExprIt loop_end_pos, - size_t loop_depth); void mark_loop(LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, size_t loop_depth, size_t vector_size); diff --git a/src/common/snippets/include/snippets/utils.hpp b/src/common/snippets/include/snippets/utils.hpp index 463a8d870526aa..76ae3cf48fd2f0 100644 --- a/src/common/snippets/include/snippets/utils.hpp +++ b/src/common/snippets/include/snippets/utils.hpp @@ -31,9 +31,6 @@ ov::Shape get_reordered_shape(const ov::Shape& shape, const std::vector& std::vector get_node_output_layout(const std::shared_ptr& node); std::vector get_node_output_layout(const Node* node); -bool get_outside_loop_value(const std::shared_ptr& node); -void set_outside_loop_value(const std::shared_ptr& node, bool is_outside = true); - inline auto normalize_rank(int32_t allocation_rank, const size_t shape_rank) -> int32_t { return allocation_rank < 0 ? allocation_rank + static_cast(shape_rank) + 1 : allocation_rank; } diff --git a/src/common/snippets/src/lowered/expression.cpp b/src/common/snippets/src/lowered/expression.cpp index 63c94de00b9db9..66c0d91bbe38c3 100644 --- a/src/common/snippets/src/lowered/expression.cpp +++ b/src/common/snippets/src/lowered/expression.cpp @@ -17,7 +17,7 @@ namespace lowered { size_t Expression::LOOP_NULL_ID = SIZE_MAX; Expression::Expression(const std::shared_ptr& n) - : m_source_node{n}, m_emitter{nullptr}, m_inputs{}, m_outputs{}, m_reg_info{{}, {}}, m_is_outside_loop(utils::get_outside_loop_value(n)) {} + : m_source_node{n}, m_emitter{nullptr}, m_inputs{}, m_outputs{}, m_reg_info{{}, {}} {} const TensorPtr& Expression::input(size_t i) const { OPENVINO_ASSERT(i < m_inputs.size(), "Failed to get input: target input port must be less than input count!"); diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp index 469c42f11b54d1..cc6c099cb2a354 100644 --- a/src/common/snippets/src/lowered/loop_manager.cpp +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -107,16 +107,6 @@ void LinearIR::LoopManager::get_io_loop_ports(LinearIR::constExprIt loop_begin_p } } -void LinearIR::LoopManager::skipped_mark(LinearIR::constExprIt loop_begin_pos, - LinearIR::constExprIt loop_end_pos, - size_t loop_depth) { - const auto loop_ids = std::vector(loop_depth, Expression::LOOP_NULL_ID); - for (auto& expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { - const auto expr = *expr_it; - expr->set_loop_ids(loop_ids); - } -} - void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, size_t loop_depth, size_t vector_size) { @@ -138,6 +128,10 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, *(lhs.rbegin() + index) = std::max(lhs_value, rhs_value); }; + auto is_outside_loop = [](const std::vector& subtensor) { + return std::all_of(subtensor.begin(), subtensor.end(), [](size_t lhs) { return lhs == PortDescriptor::Scheduling::FULL_DIM; }); + }; + std::vector loop_subtensor; std::vector loop_tensor(loop_depth, 1); for (const auto& exit_point : loop_exit_points) { @@ -147,16 +141,18 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, subtensor.resize(loop_depth, 1); subtensor[subtensor.size() - 1] = vector_size; } + + const size_t resizing_value = is_outside_loop(subtensor) ? PortDescriptor::Scheduling::FULL_DIM : 1; while (subtensor.size() < loop_depth) - subtensor.insert(subtensor.begin(), 1); + subtensor.insert(subtensor.begin(), resizing_value); if (loop_subtensor.empty()) loop_subtensor = subtensor; - OPENVINO_ASSERT(loop_subtensor == subtensor, "Incorrect scheduling parameters for loop"); + + OPENVINO_ASSERT(std::equal(loop_subtensor.crbegin(), loop_subtensor.crbegin() + loop_depth, subtensor.crbegin()), + "Incorrect scheduling parameters for loop"); for (size_t dim_idx = 0; dim_idx < loop_depth; ++dim_idx) { - if (*(subtensor.rbegin() + dim_idx) == PortDescriptor::Scheduling::FULL_DIM) { - *(loop_tensor.rbegin() + dim_idx) = PortDescriptor::Scheduling::FULL_DIM; - } else { + if (*(subtensor.rbegin() + dim_idx) != PortDescriptor::Scheduling::FULL_DIM) { broadcast(loop_tensor, tensor, dim_idx); } } diff --git a/src/common/snippets/src/lowered/pass/mark_loops.cpp b/src/common/snippets/src/lowered/pass/mark_loops.cpp index f35352ce20e900..955b95d6cd036d 100644 --- a/src/common/snippets/src/lowered/pass/mark_loops.cpp +++ b/src/common/snippets/src/lowered/pass/mark_loops.cpp @@ -41,11 +41,7 @@ bool MarkLoops::run(LinearIR& linear_ir) { auto loop_begin_pos = expr_it; auto loop_end_pos = loop_begin_pos; - const bool loop_is_outside = expr->is_outside_loop(); - const bool loop_is_inside = !loop_is_outside; - - bool current_is_outside = loop_is_outside; - bool current_is_inside = loop_is_inside; + bool collapse = true; do { const auto& prev_expr = *loop_end_pos; loop_end_pos++; @@ -79,17 +75,10 @@ bool MarkLoops::run(LinearIR& linear_ir) { is_connected = true; } } - if (is_conflicted || !is_connected) - break; - - current_is_outside = current_expr->is_outside_loop(); - } while (current_is_inside == loop_is_inside && current_is_outside == loop_is_outside); - - if (loop_is_inside) - loop_manager->mark_loop(loop_begin_pos, loop_end_pos, loop_depth, m_vector_size); - else if (loop_is_outside) - loop_manager->skipped_mark(loop_begin_pos, loop_end_pos, loop_depth); + collapse = is_connected && !is_conflicted; + } while (collapse); + loop_manager->mark_loop(loop_begin_pos, loop_end_pos, loop_depth, m_vector_size); expr_it = std::prev(loop_end_pos); } diff --git a/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp b/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp index d1be3ee57b0b13..ad5de3d5b1e264 100644 --- a/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp +++ b/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp @@ -64,10 +64,9 @@ FuseTransposeBrgemm::FuseTransposeBrgemm() { const auto& brgemm_out = brgemm->output(0); const auto& transpose_out = m.get_match_value(); const auto& const_order = ov::as_type_ptr(transpose_out.get_node_shared_ptr()->get_input_node_shared_ptr(1)); - ngraph::snippets::PortManager::set_port_descriptor_ptr(brgemm_out, - std::make_shared(transpose_out.get_shape(), - std::vector{}, - const_order->cast_vector())); + const auto& original_port = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm_out); + original_port->set_tensor(transpose_out.get_shape()); + original_port->set_layout(const_order->cast_vector()); for (const auto& in : transpose_out.get_target_inputs()) in.replace_source_output(brgemm->output(0)); } @@ -79,11 +78,10 @@ FuseTransposeBrgemm::FuseTransposeBrgemm() { const auto& transpose = as_type_ptr(in_value.get_node_shared_ptr()); const auto& const_order = ov::as_type_ptr(transpose->get_input_node_shared_ptr(1)); brgemm->set_argument(i, transpose->input_value(0)); - ngraph::snippets::PortManager::set_port_descriptor_ptr(in, - std::make_shared(transpose->get_input_shape(0), - std::vector{}, - const_order->cast_vector())); - // At the moment we support fused Transpose only after Parameter -> we can update port descriptor for Paramarer as well. + const auto& original_port = ngraph::snippets::PortManager::get_port_descriptor_ptr(in); + original_port->set_tensor(transpose->get_input_shape(0)); + original_port->set_layout(const_order->cast_vector()); + // At the moment we support fused Transpose only after Parameter -> we can update port descriptor for Parameter as well. // Note: It's needed for BrgemmCPU ngraph::snippets::PortManager::set_port_descriptor_ptr(transpose->input_value(0), std::make_shared(transpose->get_input_shape(0), diff --git a/src/common/snippets/src/pass/matmul_to_brgemm.cpp b/src/common/snippets/src/pass/matmul_to_brgemm.cpp index bd93245cd368fa..81745a883921d2 100644 --- a/src/common/snippets/src/pass/matmul_to_brgemm.cpp +++ b/src/common/snippets/src/pass/matmul_to_brgemm.cpp @@ -19,7 +19,7 @@ namespace pass { void MatMulToBrgemm::init_ports(const std::shared_ptr& brgemm) const { auto get_subtensor = [](const ov::Shape& shape) { - return std::vector{shape[shape.size() - 2], shape[shape.size() - 1]}; + return std::vector{ PortDescriptor::Scheduling::FULL_DIM, PortDescriptor::Scheduling::FULL_DIM }; }; for (const auto& input : brgemm->inputs()) { const auto tensor = input.get_shape(); @@ -52,8 +52,6 @@ MatMulToBrgemm::MatMulToBrgemm() { ngraph::copy_runtime_info(matmul, nodes); ngraph::replace_node(matmul, nodes.back()); init_ports(brgemm); - // TODO: At the moment Brgemm is executed outside Loop. When Blocking is supported, remove it - utils::set_outside_loop_value(brgemm, true); return true; }; diff --git a/src/common/snippets/src/utils.cpp b/src/common/snippets/src/utils.cpp index 75ea99bdedac0d..efc4ec0bb67d8c 100644 --- a/src/common/snippets/src/utils.cpp +++ b/src/common/snippets/src/utils.cpp @@ -131,19 +131,6 @@ ov::PartialShape get_port_planar_shape(const Output& out) { return utils::get_reordered_planar_shape(ov::Shape{td->get_tensor()}, td->get_layout()); } -bool get_outside_loop_value(const std::shared_ptr& node) { - auto& rt_info = node->get_rt_info(); - const auto& found = rt_info.find("snippets::is_outside_loop"); - if (found == rt_info.end()) { - return false; // Default value: Expression should be executed inside - } - return found->second.as(); -} -void set_outside_loop_value(const std::shared_ptr& node, bool is_outside) { - auto& rt_info = node->get_rt_info(); - rt_info["snippets::is_outside_loop"] = is_outside; -} - } // namespace utils } // namespace snippets } // namespace ngraph diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp index a7ba620a388af6..1506231bd6f686 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp @@ -22,6 +22,21 @@ namespace ov { namespace intel_cpu { +namespace { +inline std::vector make_subtensor(const ov::Shape& tensor) { + return std::vector(std::min(tensor.size(), 2lu), ngraph::snippets::PortDescriptor::Scheduling::FULL_DIM); +} +template +void set_full_port_desc(const T& port) { + const auto& shape = port.get_shape(); + ngraph::snippets::PortManager::set_port_descriptor_ptr(port, std::make_shared(shape, + make_subtensor(shape))); +} +template +void set_port_desc(const T& port, Args... params) { + ngraph::snippets::PortManager::set_port_descriptor_ptr(port, std::make_shared(params...)); +} +} // namespace pass::BrgemmToBrgemmCPU::BrgemmToBrgemmCPU() { MATCHER_SCOPE(BrgemmToBrgemmCPU); @@ -40,6 +55,10 @@ pass::BrgemmToBrgemmCPU::BrgemmToBrgemmCPU() { return false; } + const auto& brgemm_in0_desc = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm->input(0)); + const auto& brgemm_in1_desc = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm->input(1)); + const auto& brgemm_out_desc = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm->output(0)); + const auto dimsMatMulIn0 = ngraph::snippets::utils::get_port_planar_shape(brgemm->input_value(0)).get_shape(); const auto dimsMatMulIn1 = ngraph::snippets::utils::get_port_planar_shape(brgemm->input_value(1)).get_shape(); @@ -65,25 +84,25 @@ pass::BrgemmToBrgemmCPU::BrgemmToBrgemmCPU() { const auto copy_b_type = with_comp ? BrgemmCopyB::WithCompensations : BrgemmCopyB::OnlyRepacking; brgemm_repacking = std::make_shared(brgemm->input_value(1), element_type_a, copy_b_type, offset_b); const auto buffer = std::make_shared(brgemm_repacking->output(0)); - ngraph::snippets::utils::set_outside_loop_value(brgemm_repacking, true); - ngraph::snippets::utils::set_outside_loop_value(buffer, true); - // copy port desc from MatMul input 1 - const auto& brgemm_in1_desc = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm->input(1)); - ngraph::snippets::PortManager::set_port_descriptor_ptr(brgemm_repacking->input(0), - std::make_shared(brgemm_in1_desc->get_tensor(), - brgemm_in1_desc->get_subtensor(), - brgemm_in1_desc->get_layout())); + set_port_desc(brgemm_repacking->input(0), brgemm_in1_desc->get_tensor(), brgemm_in1_desc->get_subtensor(), brgemm_in1_desc->get_layout()); + set_full_port_desc(brgemm_repacking->output(0)); + set_full_port_desc(buffer->input(0)); + set_full_port_desc(buffer->output(0)); if (with_amx) { const auto scratch = std::make_shared(ov::Shape{BrgemmCPU::SCRATCH_BYTE_SIZE}); brgemm_cpu = std::make_shared(brgemm->input_value(0), buffer, scratch, BrgemmCPU::Type::AMX, offset_a, offset_b, 0, offset_c); - ngraph::snippets::utils::set_outside_loop_value(scratch, true); + set_full_port_desc(scratch->output(0)); + set_full_port_desc(brgemm_cpu->input(2)); } else if (with_comp) { const auto scratch = std::make_shared(brgemm_repacking->output(1)); brgemm_cpu = std::make_shared(brgemm->input_value(0), buffer, scratch, BrgemmCPU::Type::WithCompensations, offset_a, offset_b, 0, offset_c); - ngraph::snippets::utils::set_outside_loop_value(scratch, true); + set_full_port_desc(brgemm_repacking->output(1)); + set_full_port_desc(scratch->input(0)); + set_full_port_desc(scratch->output(0)); + set_full_port_desc(brgemm_cpu->input(2)); } else if (one_of(element_type_a, ov::element::u8, ov::element::bf16)) { brgemm_cpu = std::make_shared(brgemm->input_value(0), buffer, BrgemmCPU::Type::WithDataRepacking, offset_a, offset_b, offset_c); @@ -94,28 +113,15 @@ pass::BrgemmToBrgemmCPU::BrgemmToBrgemmCPU() { brgemm_cpu->set_friendly_name(brgemm->get_friendly_name()); ngraph::replace_node(brgemm, brgemm_cpu); - // TODO: At the moment Brgemm is executed outside Loop. When Blocking is supported, remove it - ngraph::snippets::utils::set_outside_loop_value(brgemm_cpu, true); // Transfer ports - const auto& brgemm_in0_desc = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm->input(0)); - ngraph::snippets::PortManager::set_port_descriptor_ptr(brgemm_cpu->input(0), - std::make_shared(brgemm_in0_desc->get_tensor(), - brgemm_in0_desc->get_subtensor(), - brgemm_in0_desc->get_layout())); - if (!brgemm_repacking) { - const auto& brgemm_in1_desc = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm->input(1)); - ngraph::snippets::PortManager::set_port_descriptor_ptr(brgemm_cpu->input(1), - std::make_shared(brgemm_in1_desc->get_tensor(), - brgemm_in1_desc->get_subtensor(), - brgemm_in1_desc->get_layout())); + set_port_desc(brgemm_cpu->input(0), brgemm_in0_desc->get_tensor(), brgemm_in0_desc->get_subtensor(), brgemm_in0_desc->get_layout()); + if (brgemm_repacking) { + set_full_port_desc(brgemm_cpu->input(1)); + } else { + set_port_desc(brgemm_cpu->input(1), brgemm_in1_desc->get_tensor(), brgemm_in1_desc->get_subtensor(), brgemm_in1_desc->get_layout()); } - - const auto& brgemm_out_desc = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm->output(0)); - ngraph::snippets::PortManager::set_port_descriptor_ptr(brgemm_cpu->output(0), - std::make_shared(brgemm_out_desc->get_tensor(), - brgemm_out_desc->get_subtensor(), - brgemm_out_desc->get_layout())); + set_port_desc(brgemm_cpu->output(0), brgemm_out_desc->get_tensor(), brgemm_out_desc->get_subtensor(), brgemm_out_desc->get_layout()); // need to run validate_and_infer_types manually: either input shapes were updated or // output Layout was updated (out shape will be updated in validate_and_infer_types()) From 06529f087470a01858227e6d896adcb9f6c17090 Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Fri, 5 May 2023 11:25:43 +0400 Subject: [PATCH 05/13] TensorDescriptor -> ExpressionPort --- .../include/snippets/lowered/expression.hpp | 5 +- .../snippets/lowered/expression_port.hpp | 58 +++++++++++++++++ .../include/snippets/lowered/linear_ir.hpp | 6 +- .../include/snippets/lowered/loop_manager.hpp | 20 +++--- .../snippets/lowered/pass/fuse_loops.hpp | 6 +- .../snippets/lowered/pass/init_loops.hpp | 8 +-- .../snippets/lowered/pass/insert_buffers.hpp | 2 +- .../lowered/pass/insert_load_store.hpp | 4 +- .../include/snippets/lowered/tensor.hpp | 65 ++++--------------- .../snippets/src/lowered/expression.cpp | 8 +-- .../src/lowered/expression_factory.cpp | 12 ++-- .../snippets/src/lowered/expression_port.cpp | 42 ++++++++++++ src/common/snippets/src/lowered/linear_ir.cpp | 10 +-- .../snippets/src/lowered/loop_manager.cpp | 14 ++-- .../snippets/src/lowered/pass/fuse_loops.cpp | 18 ++--- .../snippets/src/lowered/pass/init_loops.cpp | 14 ++-- .../src/lowered/pass/insert_buffers.cpp | 6 +- .../src/lowered/pass/insert_load_store.cpp | 10 +-- .../src/lowered/pass/insert_tail_loop.cpp | 2 +- .../snippets/src/lowered/pass/mark_loops.cpp | 2 +- .../src/lowered/pass/propagate_layout.cpp | 2 +- .../lowered/pass/softmax_decomposition.cpp | 34 +++++----- src/common/snippets/src/lowered/tensor.cpp | 58 +++++------------ 23 files changed, 222 insertions(+), 184 deletions(-) create mode 100644 src/common/snippets/include/snippets/lowered/expression_port.hpp create mode 100644 src/common/snippets/src/lowered/expression_port.cpp diff --git a/src/common/snippets/include/snippets/lowered/expression.hpp b/src/common/snippets/include/snippets/lowered/expression.hpp index ac0aa317f5f6b6..0665f7505559d1 100644 --- a/src/common/snippets/include/snippets/lowered/expression.hpp +++ b/src/common/snippets/include/snippets/lowered/expression.hpp @@ -10,6 +10,7 @@ #include "snippets/emitter.hpp" #include "snippets/target_machine.hpp" #include "snippets/lowered/tensor.hpp" +#include "snippets/lowered/expression_port.hpp" namespace ngraph { @@ -47,8 +48,8 @@ class Expression : public std::enable_shared_from_this { void init_emitter(const std::shared_ptr& target); - TensorDescriptor input_port(size_t i); - TensorDescriptor output_port(size_t i); + ExpressionPort input_port(size_t i); + ExpressionPort output_port(size_t i); protected: // Note: The constructor and tensor initialization are private since an expression can be created only by Linear IR. diff --git a/src/common/snippets/include/snippets/lowered/expression_port.hpp b/src/common/snippets/include/snippets/lowered/expression_port.hpp new file mode 100644 index 00000000000000..d1b8b0f088a9fa --- /dev/null +++ b/src/common/snippets/include/snippets/lowered/expression_port.hpp @@ -0,0 +1,58 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "snippets/port_descriptor.hpp" + + +namespace ngraph { +namespace snippets { +namespace lowered { + +class Expression; +class ExpressionPort { +public: + enum Type { + Input, + Output + }; + + ExpressionPort() = default; + explicit ExpressionPort(const std::weak_ptr& expr, Type type, size_t port, + const std::vector& tensor = {}, const std::vector& layout = {}, const std::vector& subtensor = {}); + explicit ExpressionPort(const std::weak_ptr& expr, Type type, size_t port, const PortDescriptorPtr& port_desc = nullptr); + + std::shared_ptr get_expr_ptr() const; + const std::weak_ptr& get_expr_wptr() const { return m_expr; } + Type get_type() const { return m_type; } + size_t get_index() const { return m_port_index; } + + std::vector get_tensor() const { return m_port_desc->get_tensor(); } + std::vector get_layout() const { return m_port_desc->get_layout(); } + std::vector get_subtensor() const { return m_port_desc->get_subtensor(); } + const PortDescriptorPtr& get_port_descriptor() const { return m_port_desc; } + + void set_tensor(const std::vector& tensor) { m_port_desc->set_tensor(tensor); } + void set_layout(const std::vector& layout) { m_port_desc->set_layout(layout); } + void set_subtensor(const std::vector& subtensor) { m_port_desc->set_subtensor(subtensor); } + void set_port_descriptor(const PortDescriptorPtr& desc) { m_port_desc = desc; } + + friend bool operator==(const ExpressionPort& lhs, const ExpressionPort& rhs); + friend bool operator!=(const ExpressionPort& lhs, const ExpressionPort& rhs); + friend bool operator<(const ExpressionPort& lhs, const ExpressionPort& rhs); + friend std::ostream& operator<<(std::ostream&, const ExpressionPort& td); + +private: + std::weak_ptr m_expr; + Type m_type = Type::Output; + size_t m_port_index = 0; + PortDescriptorPtr m_port_desc; +}; +} // namespace lowered +} // namespace snippets +} // namespace ngraph diff --git a/src/common/snippets/include/snippets/lowered/linear_ir.hpp b/src/common/snippets/include/snippets/lowered/linear_ir.hpp index ac4730ca79f9d3..f00f9e272bdb79 100644 --- a/src/common/snippets/include/snippets/lowered/linear_ir.hpp +++ b/src/common/snippets/include/snippets/lowered/linear_ir.hpp @@ -52,10 +52,10 @@ class LinearIR { ExpressionPtr get_expr_by_node(const std::shared_ptr& n) const; - void replace_input(const std::vector& consumers, const TensorPtr& to); - void replace_input(const TensorDescriptor& expr_port, const TensorPtr& to); + void replace_input(const std::vector& consumers, const TensorPtr& to); + void replace_input(const ExpressionPort& expr_port, const TensorPtr& to); void replace_input(const ExpressionPtr& expr, size_t port, const TensorPtr& to); - void replace_output(const TensorDescriptor& expr_port, const TensorPtr& to); + void replace_output(const ExpressionPort& expr_port, const TensorPtr& to); void replace_output(const ExpressionPtr& expr, size_t port, const TensorPtr& to); /** diff --git a/src/common/snippets/include/snippets/lowered/loop_manager.hpp b/src/common/snippets/include/snippets/lowered/loop_manager.hpp index 79c29ce856fa87..225be5ff77f9e3 100644 --- a/src/common/snippets/include/snippets/lowered/loop_manager.hpp +++ b/src/common/snippets/include/snippets/lowered/loop_manager.hpp @@ -23,8 +23,8 @@ class LinearIR::LoopManager { public: LoopInfo() = default; LoopInfo(size_t work_amount, size_t increment, - const std::vector& entries, - const std::vector& exits) + const std::vector& entries, + const std::vector& exits) : work_amount(work_amount), increment(increment), entry_exprs(entries), exit_exprs(exits) {} size_t work_amount = 0; size_t increment = 0; @@ -32,8 +32,8 @@ class LinearIR::LoopManager { // - The position before first entry expr is Loop Begin position // - The position after last exit expr is Loop End position // Note: Scalars aren't entry expressions but can be before first entry expr in Linear IR - std::vector entry_exprs = {}; - std::vector exit_exprs = {}; + std::vector entry_exprs = {}; + std::vector exit_exprs = {}; }; using LoopInfoPtr = std::shared_ptr; @@ -51,16 +51,16 @@ class LinearIR::LoopManager { size_t idx, size_t work_amount, size_t work_amount_increment, - const std::vector& entries, - const std::vector& exits); + const std::vector& entries, + const std::vector& exits); void get_loop_bounds(const LinearIR& linear_ir, size_t loop_id, LinearIR::constExprIt& loop_begin_pos, LinearIR::constExprIt& loop_end_pos) const; static void get_loop_bounds(const LinearIR& linear_ir, - const std::vector& entries, - const std::vector& exits, + const std::vector& entries, + const std::vector& exits, LinearIR::constExprIt& loop_begin_pos, LinearIR::constExprIt& loop_end_pos, size_t loop_id = Expression::LOOP_NULL_ID); @@ -71,8 +71,8 @@ class LinearIR::LoopManager { size_t loop_id, size_t idx); static void get_io_loop_ports(LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, - std::vector& entries, - std::vector& exits); + std::vector& entries, + std::vector& exits); std::map m_map = {}; size_t next_id = 0; diff --git a/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp b/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp index 288c267f33dba3..87314543d50af8 100644 --- a/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp @@ -28,14 +28,14 @@ class FuseLoops : public Transformation { static bool can_be_fused(const LinearIR::LoopManager::LoopInfoPtr& loop_current, const LinearIR::LoopManager::LoopInfoPtr& loop_target); static bool fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, - const TensorDescriptor& current_entry_point, const TensorDescriptor& target_exit_point, + const ExpressionPort& current_entry_point, const ExpressionPort& target_exit_point, size_t current_loop_id, size_t target_loop_id, size_t dim_idx, LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos); static bool fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, - const TensorDescriptor& current_entry_point, const TensorDescriptor& target_exit_point, + const ExpressionPort& current_entry_point, const ExpressionPort& target_exit_point, size_t current_loop_id, size_t target_loop_id, size_t dim_idx, LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos); - static void fuse_points(std::vector& exit_points, std::vector& entry_points, + static void fuse_points(std::vector& exit_points, std::vector& entry_points, LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos); }; diff --git a/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp b/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp index 48bfecd7c471d2..064c5200170e52 100644 --- a/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp @@ -27,12 +27,12 @@ class InitLoops : public Transformation { private: bool insertion(LinearIR& linear_ir, const LinearIR::LoopManager::LoopInfoPtr& loop_info, size_t loop_id, size_t dim_idx, bool has_outer_loop); - std::vector init_ptr_increments(const std::vector& loop_inputs, - const std::vector& loop_outputs, + std::vector init_ptr_increments(const std::vector& loop_inputs, + const std::vector& loop_outputs, size_t dim_idx) const; std::vector init_finalization_offsets(const std::vector& finalization_offsets, size_t work_amount) const; - std::vector init_element_type_sizes(const std::vector& loop_inputs, - const std::vector& loop_outputs); + std::vector init_element_type_sizes(const std::vector& loop_inputs, + const std::vector& loop_outputs); }; } // namespace pass diff --git a/src/common/snippets/include/snippets/lowered/pass/insert_buffers.hpp b/src/common/snippets/include/snippets/lowered/pass/insert_buffers.hpp index 70d769c8faed5c..9abded985e60c7 100644 --- a/src/common/snippets/include/snippets/lowered/pass/insert_buffers.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/insert_buffers.hpp @@ -27,7 +27,7 @@ class InsertBuffers : public Transformation { private: void insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, size_t loop_id, - const std::vector& loop_entries, const std::vector& loop_exits); + const std::vector& loop_entries, const std::vector& loop_exits); LinearIR::constExprIt insertion_position(const LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, diff --git a/src/common/snippets/include/snippets/lowered/pass/insert_load_store.hpp b/src/common/snippets/include/snippets/lowered/pass/insert_load_store.hpp index 6d9bde2b26f3a5..bbc29656084324 100644 --- a/src/common/snippets/include/snippets/lowered/pass/insert_load_store.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/insert_load_store.hpp @@ -30,9 +30,9 @@ class InsertLoadStore : public Transformation { bool insert_load(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it); bool insert_store(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it); void update_loops(const LinearIR::LoopManagerPtr& loop_manager, const std::vector& loop_ids, - const TensorDescriptor& actual_port, const std::vector& target_ports, bool is_entry = true); + const ExpressionPort& actual_port, const std::vector& target_ports, bool is_entry = true); void update_loop(const LinearIR::LoopManager::LoopInfoPtr& loop_info, - const TensorDescriptor& actual_port, const std::vector& target_ports, bool is_entry = true); + const ExpressionPort& actual_port, const std::vector& target_ports, bool is_entry = true); std::vector get_loops_for_update(const std::vector& loop_ids, size_t loop_id); size_t m_vector_size; diff --git a/src/common/snippets/include/snippets/lowered/tensor.hpp b/src/common/snippets/include/snippets/lowered/tensor.hpp index 9c291540a39979..b17daa27b0f8dd 100644 --- a/src/common/snippets/include/snippets/lowered/tensor.hpp +++ b/src/common/snippets/include/snippets/lowered/tensor.hpp @@ -9,6 +9,8 @@ #include "snippets/port_descriptor.hpp" +#include "expression_port.hpp" + namespace ngraph { namespace snippets { @@ -16,61 +18,22 @@ namespace lowered { class Expression; -class TensorDescriptor { -public: - enum Type { - Input, - Output - }; - - TensorDescriptor() = default; - explicit TensorDescriptor(const std::weak_ptr& expr, Type type, size_t port, - const std::vector& tensor = {}, const std::vector& layout = {}, const std::vector& subtensor = {}); - explicit TensorDescriptor(const std::weak_ptr& expr, Type type, size_t port, const PortDescriptorPtr& port_desc = nullptr); - - std::shared_ptr get_expr_ptr() const; - const std::weak_ptr& get_expr_wptr() const { return m_expr; } - Type get_type() const { return m_type; } - size_t get_index() const { return m_port_index; } - - std::vector get_tensor() const { return m_port_desc->get_tensor(); } - std::vector get_layout() const { return m_port_desc->get_layout(); } - std::vector get_subtensor() const { return m_port_desc->get_subtensor(); } - const PortDescriptorPtr& get_port_descriptor() const { return m_port_desc; } - - void set_tensor(const std::vector& tensor) { m_port_desc->set_tensor(tensor); } - void set_layout(const std::vector& layout) { m_port_desc->set_layout(layout); } - void set_subtensor(const std::vector& subtensor) { m_port_desc->set_subtensor(subtensor); } - void set_port_descriptor(const PortDescriptorPtr& desc) { m_port_desc = desc; } - - friend bool operator==(const TensorDescriptor& lhs, const TensorDescriptor& rhs); - friend bool operator!=(const TensorDescriptor& lhs, const TensorDescriptor& rhs); - friend bool operator<(const TensorDescriptor& lhs, const TensorDescriptor& rhs); - friend std::ostream& operator<<(std::ostream&, const TensorDescriptor& td); - -private: - std::weak_ptr m_expr; - Type m_type = Type::Output; - size_t m_port_index = 0; - PortDescriptorPtr m_port_desc; -}; - class Tensor { public: Tensor() = default; - explicit Tensor(const TensorDescriptor& source_descriptor, const std::vector& consumer_descriptors = {}); + explicit Tensor(const ExpressionPort& source_descriptor, const std::vector& consumer_descriptors = {}); - const TensorDescriptor& get_source() const { return m_source_port; } - std::vector get_consumers() const { return m_consumer_ports; } + const ExpressionPort& get_source() const { return m_source_port; } + std::vector get_consumers() const { return m_consumer_ports; } - void add_consumer(const TensorDescriptor& consumer); - void remove_consumer(const TensorDescriptor& consumer); - bool found_consumer(const TensorDescriptor& consumer) const; - std::vector::const_iterator find_consumer(const TensorDescriptor& consumer) const; - std::vector::iterator find_consumer(const TensorDescriptor& consumer); + void add_consumer(const ExpressionPort& consumer); + void remove_consumer(const ExpressionPort& consumer); + bool found_consumer(const ExpressionPort& consumer) const; + std::vector::const_iterator find_consumer(const ExpressionPort& consumer) const; + std::vector::iterator find_consumer(const ExpressionPort& consumer); - std::vector get_conflicted_consumers() const; - bool is_conflicted_consumer(const TensorDescriptor& consumer) const; + std::vector get_conflicted_consumers() const; + bool is_conflicted_consumer(const ExpressionPort& consumer) const; // The scheduling params of Tensor is controlled by source expression port std::vector get_tensor() const { return m_source_port.get_tensor(); } @@ -83,8 +46,8 @@ class Tensor { void set_port_descriptor(const PortDescriptorPtr& desc) { m_source_port.set_port_descriptor(desc); } private: - TensorDescriptor m_source_port; - std::vector m_consumer_ports; + ExpressionPort m_source_port; + std::vector m_consumer_ports; }; using TensorPtr = std::shared_ptr; diff --git a/src/common/snippets/src/lowered/expression.cpp b/src/common/snippets/src/lowered/expression.cpp index 66c0d91bbe38c3..6ac749885e39ef 100644 --- a/src/common/snippets/src/lowered/expression.cpp +++ b/src/common/snippets/src/lowered/expression.cpp @@ -69,19 +69,19 @@ void Expression::remove_loop_id(size_t id) { *it = Expression::LOOP_NULL_ID; } -TensorDescriptor Expression::input_port(size_t i) { +ExpressionPort Expression::input_port(size_t i) { OPENVINO_ASSERT(i < m_inputs.size(), "Failed to get input port: target input port must be less than input count!"); const auto& input = m_inputs[i]; const auto consumers = input->get_consumers(); const auto found = std::find_if(consumers.begin(), consumers.end(), - [&](const TensorDescriptor& desc) { + [&](const ExpressionPort& desc) { return desc.get_index() == i && desc.get_expr_ptr() == this->shared_from_this(); }); - OPENVINO_ASSERT(found != consumers.end(), "Input TensorDescriptor for Expression hasn't found in input Tensor!"); + OPENVINO_ASSERT(found != consumers.end(), "Input ExpressionPort for Expression hasn't found in input Tensor!"); return *found; } -TensorDescriptor Expression::output_port(size_t i) { +ExpressionPort Expression::output_port(size_t i) { OPENVINO_ASSERT(i < m_outputs.size(), "Failed to get output port: target output port must be less than output count!"); return m_outputs[i]->get_source(); } diff --git a/src/common/snippets/src/lowered/expression_factory.cpp b/src/common/snippets/src/lowered/expression_factory.cpp index d3cd1a2edcb283..6f95f919b40b08 100644 --- a/src/common/snippets/src/lowered/expression_factory.cpp +++ b/src/common/snippets/src/lowered/expression_factory.cpp @@ -48,7 +48,7 @@ std::vector LinearIR::BaseExpressionFactory::create_expression_inputs const auto in_index = input.get_index(); const auto& parent_expr = m_linear_ir.get_expr_by_node(input_source.get_node_shared_ptr()); const auto& tensor = parent_expr->output(input_source.get_index()); - const auto tensor_desc = TensorDescriptor(expr, TensorDescriptor::Type::Input, in_index, PortManager::get_port_descriptor_ptr(input)); + const auto tensor_desc = ExpressionPort(expr, ExpressionPort::Type::Input, in_index, PortManager::get_port_descriptor_ptr(input)); tensor->add_consumer(tensor_desc); inputs[in_index] = tensor; } @@ -62,7 +62,7 @@ std::vector LinearIR::BaseExpressionFactory::create_expression_output std::vector outputs(node->get_output_size(), nullptr); for (const auto& output : node->outputs()) { const auto out_index = output.get_index(); - const auto tensor_desc = TensorDescriptor(expr, TensorDescriptor::Type::Output, out_index, PortManager::get_port_descriptor_ptr(output)); + const auto tensor_desc = ExpressionPort(expr, ExpressionPort::Type::Output, out_index, PortManager::get_port_descriptor_ptr(output)); outputs[out_index] = std::make_shared(tensor_desc); } return outputs; @@ -73,12 +73,12 @@ void LinearIR::BaseExpressionFactory::validate_inputs(const ExpressionPtr& expr, const auto& input = inputs[i]; const auto consumers = input->get_consumers(); const auto found = std::find_if(consumers.begin(), consumers.end(), - [&](const TensorDescriptor& desc) { + [&](const ExpressionPort& desc) { return desc.get_index() == i && desc.get_expr_ptr() == expr; }); if (found == consumers.end()) { const auto port_desc = PortManager::get_port_descriptor_ptr(expr->get_node()->input(i)); - const auto tensor_desc = TensorDescriptor(expr, TensorDescriptor::Type::Input, i, port_desc); + const auto tensor_desc = ExpressionPort(expr, ExpressionPort::Type::Input, i, port_desc); input->add_consumer(tensor_desc); } } @@ -185,13 +185,13 @@ void LinearIR::LoopEndExpressionFactory::validate_inputs(const ExpressionPtr& ex const auto& input = inputs[i]; const auto consumers = input->get_consumers(); const auto found = std::find_if(consumers.begin(), consumers.end(), - [&](const TensorDescriptor& desc) { + [&](const ExpressionPort& desc) { return desc.get_index() == i && desc.get_expr_ptr()== expr; }); if (found == consumers.end()) { // LoopEnd doesn't have input ports. So consumer for the Tensor should have the same Port Descriptor like source const auto& port_desc = input->get_source().get_port_descriptor(); - const auto tensor_desc = TensorDescriptor(expr, TensorDescriptor::Type::Input, i, port_desc); + const auto tensor_desc = ExpressionPort(expr, ExpressionPort::Type::Input, i, port_desc); input->add_consumer(tensor_desc); } } diff --git a/src/common/snippets/src/lowered/expression_port.cpp b/src/common/snippets/src/lowered/expression_port.cpp new file mode 100644 index 00000000000000..6e064509ad21d9 --- /dev/null +++ b/src/common/snippets/src/lowered/expression_port.cpp @@ -0,0 +1,42 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/lowered/expression_port.hpp" + +#include "snippets/utils.hpp" + + +namespace ngraph { +namespace snippets { +namespace lowered { + +ExpressionPort::ExpressionPort(const std::weak_ptr& expr, Type type, size_t port, + const std::vector& tensor, const std::vector& layout, const std::vector& subtensor) + : m_expr(expr), m_type(type), m_port_index(port), m_port_desc(std::make_shared(tensor, subtensor, layout)) {} + +ExpressionPort::ExpressionPort(const std::weak_ptr& expr, Type type, size_t port, const PortDescriptorPtr& port_desc) + : m_expr(expr), m_type(type), m_port_index(port) { + PortDescriptorPtr local_port_desc = port_desc; + if (!local_port_desc) { + if (type == Type::Input) { + local_port_desc = PortManager::get_port_descriptor_ptr(expr.lock()->get_node()->input(port)); + } else if (type == Type::Output) { + local_port_desc = PortManager::get_port_descriptor_ptr(expr.lock()->get_node()->output(port)); + } else { + OPENVINO_THROW("ExpressionPort supports only Input and Output type!"); + } + } + + m_port_desc = local_port_desc; +} + +std::shared_ptr ExpressionPort::get_expr_ptr() const { + auto shared = m_expr.lock(); + OPENVINO_ASSERT(shared != nullptr, "Failed attempt to get shared pointer of source expression: nullptr"); + return shared; +} + +}// namespace lowered +}// namespace snippets +}// namespace ngraph diff --git a/src/common/snippets/src/lowered/linear_ir.cpp b/src/common/snippets/src/lowered/linear_ir.cpp index bf1f0c2caf4e4a..d6f36ba88c9bdf 100644 --- a/src/common/snippets/src/lowered/linear_ir.cpp +++ b/src/common/snippets/src/lowered/linear_ir.cpp @@ -180,7 +180,7 @@ ExpressionPtr LinearIR::get_expr_by_node(const std::shared_ptr& n) const { return found->second; } -void LinearIR::replace_input(const std::vector& consumers, const TensorPtr& to) { +void LinearIR::replace_input(const std::vector& consumers, const TensorPtr& to) { for (const auto& consumer_input : consumers) { replace_input(consumer_input, to); } @@ -190,11 +190,11 @@ void LinearIR::replace_input(const ExpressionPtr& expr, size_t port, const Tenso replace_input(expr->input_port(port), to); } -void LinearIR::replace_input(const TensorDescriptor& expr_port, const TensorPtr& to) { +void LinearIR::replace_input(const ExpressionPort& expr_port, const TensorPtr& to) { const auto port = expr_port.get_index(); const auto expr = expr_port.get_expr_ptr(); - OPENVINO_ASSERT(expr_port.get_type() == TensorDescriptor::Type::Input, "Failed to replace: target input port must have Input type"); + OPENVINO_ASSERT(expr_port.get_type() == ExpressionPort::Type::Input, "Failed to replace: target input port must have Input type"); OPENVINO_ASSERT(expr_port.get_index() < expr->get_input_count(), "Failed to replace: target input port must be less than input count!"); const auto& from = expr->input(port); @@ -212,11 +212,11 @@ void LinearIR::replace_output(const ExpressionPtr& expr, size_t port, const Tens replace_output(expr->output_port(port), to); } -void LinearIR::replace_output(const TensorDescriptor& expr_port, const TensorPtr& to) { +void LinearIR::replace_output(const ExpressionPort& expr_port, const TensorPtr& to) { const auto port = expr_port.get_index(); const auto expr = expr_port.get_expr_ptr(); - OPENVINO_ASSERT(expr_port.get_type() == TensorDescriptor::Type::Output, "Failed to replace: target output port must have Output type"); + OPENVINO_ASSERT(expr_port.get_type() == ExpressionPort::Type::Output, "Failed to replace: target output port must have Output type"); OPENVINO_ASSERT(port < expr->get_output_count(), "Failed to replace: target output port must be less than output count!"); const auto to_source_td = to->get_source(); OPENVINO_ASSERT(to_source_td.get_expr_ptr() == expr && to_source_td.get_index() == port, diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp index cc6c099cb2a354..730c3e40c5b6db 100644 --- a/src/common/snippets/src/lowered/loop_manager.cpp +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -48,8 +48,8 @@ void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir, } void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir, - const std::vector &entries, - const std::vector &exits, + const std::vector &entries, + const std::vector &exits, LinearIR::constExprIt &loop_begin_pos, LinearIR::constExprIt &loop_end_pos, size_t loop_id) { @@ -75,8 +75,8 @@ void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir, void LinearIR::LoopManager::get_io_loop_ports(LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, - std::vector &entries, - std::vector &exits) { + std::vector &entries, + std::vector &exits) { entries.clear(); exits.clear(); for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { @@ -110,7 +110,7 @@ void LinearIR::LoopManager::get_io_loop_ports(LinearIR::constExprIt loop_begin_p void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, size_t loop_depth, size_t vector_size) { - std::vector loop_entry_points, loop_exit_points; + std::vector loop_entry_points, loop_exit_points; LoopManager::get_io_loop_ports(loop_begin_pos, loop_end_pos, loop_entry_points, loop_exit_points); auto broadcast = [](std::vector &lhs, const std::vector &rhs, size_t index) -> void { @@ -181,8 +181,8 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, size_t idx, size_t work_amount, size_t work_amount_increment, - const std::vector &entries, - const std::vector &exits) { + const std::vector &entries, + const std::vector &exits) { const auto loop_info = std::make_shared(work_amount, work_amount_increment, entries, exits); const auto loop_id = this->add_loop_info(loop_info); exprs_marking(loop_begin_pos, loop_end_pos, loop_id, idx); diff --git a/src/common/snippets/src/lowered/pass/fuse_loops.cpp b/src/common/snippets/src/lowered/pass/fuse_loops.cpp index c83aed643823aa..66d3b586fcc27b 100644 --- a/src/common/snippets/src/lowered/pass/fuse_loops.cpp +++ b/src/common/snippets/src/lowered/pass/fuse_loops.cpp @@ -29,16 +29,16 @@ bool FuseLoops::can_be_fused(const LoopInfoPtr& loop_current, const LoopInfoPtr& return supported_work_amount && supported_increment; } -void FuseLoops::fuse_points(std::vector& exit_points, std::vector& entry_points, +void FuseLoops::fuse_points(std::vector& exit_points, std::vector& entry_points, LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos) { - std::vector new_exit_points; + std::vector new_exit_points; for (const auto& exit_point : exit_points) { const auto expr = exit_point.get_expr_ptr(); const auto port = exit_point.get_index(); const auto output_td = expr->output(port); const auto consumers_inputs = output_td->get_consumers(); - std::vector mapped_entry_points; + std::vector mapped_entry_points; std::vector outside_consumers; for (const auto& consumer_input : consumers_inputs) { const auto consumer = consumer_input.get_expr_ptr(); @@ -73,7 +73,7 @@ void FuseLoops::fuse_points(std::vector& exit_points, std::vec } bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, - const TensorDescriptor& current_entry_point, const TensorDescriptor& target_exit_point, + const ExpressionPort& current_entry_point, const ExpressionPort& target_exit_point, size_t current_loop_id, size_t target_loop_id, size_t dim_idx, LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos) { const auto& loop_current = loop_manager->get_loop_info(current_loop_id); @@ -135,9 +135,9 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo // Update work_amount for Loop (increment is constant because increments must be the identical for fusion): loop_current->work_amount = std::max(loop_current->work_amount, loop_target->work_amount); - std::vector new_entries = target_entry_points; + std::vector new_entries = target_entry_points; new_entries.insert(new_entries.end(), current_entry_points.begin(), current_entry_points.end()); - std::vector new_exits = target_exit_points; + std::vector new_exits = target_exit_points; new_exits.insert(new_exits.end(), current_exit_points.begin(), current_exit_points.end()); loop_current->entry_exprs = new_entries; @@ -147,7 +147,7 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo } bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, - const TensorDescriptor& current_exit_point, const TensorDescriptor& target_entry_point, + const ExpressionPort& current_exit_point, const ExpressionPort& target_entry_point, size_t current_loop_id, size_t target_loop_id, size_t dim_idx, LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos) { const auto& loop_current = loop_manager->get_loop_info(current_loop_id); @@ -205,9 +205,9 @@ bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::Loo // Update work_amount for Loop (increment is constant because increments must be the identical for fusion): loop_current->work_amount = std::max(loop_current->work_amount, loop_target->work_amount); - std::vector& new_entries = current_entry_points; + std::vector& new_entries = current_entry_points; new_entries.insert(new_entries.end(), target_entry_points.begin(), target_entry_points.end()); - std::vector& new_exits = current_exit_points; + std::vector& new_exits = current_exit_points; new_exits.insert(new_exits.end(), target_exit_points.begin(), target_exit_points.end()); loop_current->entry_exprs = new_entries; diff --git a/src/common/snippets/src/lowered/pass/init_loops.cpp b/src/common/snippets/src/lowered/pass/init_loops.cpp index ca96b43311241e..e90127129b7e54 100644 --- a/src/common/snippets/src/lowered/pass/init_loops.cpp +++ b/src/common/snippets/src/lowered/pass/init_loops.cpp @@ -16,9 +16,9 @@ namespace pass { namespace { void filter_ports(LinearIR& linear_ir, - std::vector& loop_entries, std::vector& loop_exits) { - std::vector new_loop_entries; - std::vector new_loop_exits; + std::vector& loop_entries, std::vector& loop_exits) { + std::vector new_loop_entries; + std::vector new_loop_exits; new_loop_entries.reserve(loop_entries.size()); new_loop_exits.reserve(loop_exits.size()); @@ -63,8 +63,8 @@ int64_t get_dim_stride(const size_t dim, const std::vector& shape) { InitLoops::InitLoops() : Transformation() {} -std::vector InitLoops::init_ptr_increments(const std::vector& loop_inputs, - const std::vector& loop_outputs, +std::vector InitLoops::init_ptr_increments(const std::vector& loop_inputs, + const std::vector& loop_outputs, size_t dim_idx) const { std::vector ptr_increments; // Note: Need to find max relevant dim expr to account for broadcasting, collect relevant_dims as well @@ -116,8 +116,8 @@ std::vector InitLoops::init_finalization_offsets(const std::vector InitLoops::init_element_type_sizes(const std::vector& loop_inputs, - const std::vector& loop_outputs) { +std::vector InitLoops::init_element_type_sizes(const std::vector& loop_inputs, + const std::vector& loop_outputs) { std::vector element_types; element_types.reserve(loop_inputs.size() + loop_outputs.size()); for (const auto& in : loop_inputs) { diff --git a/src/common/snippets/src/lowered/pass/insert_buffers.cpp b/src/common/snippets/src/lowered/pass/insert_buffers.cpp index cd6bc7c4c72116..34c44be68449ca 100644 --- a/src/common/snippets/src/lowered/pass/insert_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/insert_buffers.cpp @@ -58,7 +58,7 @@ LinearIR::constExprIt InsertBuffers::insertion_position(const LinearIR& linear_i } void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, size_t loop_id, - const std::vector& loop_entries, const std::vector& loop_exits) { + const std::vector& loop_entries, const std::vector& loop_exits) { for (const auto& entry_point : loop_entries) { const auto expr = entry_point.get_expr_ptr(); const auto port = entry_point.get_index(); @@ -123,7 +123,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt const auto current_loop_count = current_loops.size(); const std::vector node_outs = {output_td}; - std::vector potential_consumers; + std::vector potential_consumers; std::set buffers; const auto current_loop_lvl = std::distance(current_loops.begin(), std::find(current_loops.begin(), current_loops.end(), loop_id)); for (const auto& child_expr_input : child_exprs_inputs) { @@ -223,7 +223,7 @@ bool InsertBuffers::run(LinearIR& linear_ir) { const auto input_ports = ma->get_memory_access_input_ports(); const auto output_ports = ma->get_memory_access_output_ports(); - std::vector loop_entries(input_ports.size()), loop_exits(output_ports.size()); + std::vector loop_entries(input_ports.size()), loop_exits(output_ports.size()); // C++17: for (auto const& [loop_id, loop_info] : loop_data_map) for (const auto& p : input_ports) { loop_entries[p.first] = expr->input_port(p.first); diff --git a/src/common/snippets/src/lowered/pass/insert_load_store.cpp b/src/common/snippets/src/lowered/pass/insert_load_store.cpp index 130dc2170f0387..d7a326486f51ba 100644 --- a/src/common/snippets/src/lowered/pass/insert_load_store.cpp +++ b/src/common/snippets/src/lowered/pass/insert_load_store.cpp @@ -33,7 +33,7 @@ using LoopInfoPtr = LoopManager::LoopInfoPtr; InsertLoadStore::InsertLoadStore(size_t vector_size) : m_vector_size(vector_size) {} void InsertLoadStore::update_loops(const LinearIR::LoopManagerPtr& loop_manager, const std::vector& loop_ids, - const TensorDescriptor& actual_port, const std::vector& target_ports, bool is_entry) { + const ExpressionPort& actual_port, const std::vector& target_ports, bool is_entry) { for (auto loop_id : loop_ids) { if (loop_id != Expression::LOOP_NULL_ID) update_loop(loop_manager->get_loop_info(loop_id), actual_port, target_ports, is_entry); @@ -41,7 +41,7 @@ void InsertLoadStore::update_loops(const LinearIR::LoopManagerPtr& loop_manager, } void InsertLoadStore::update_loop(const LinearIR::LoopManager::LoopInfoPtr& loop_info, - const TensorDescriptor& actual_port, const std::vector& target_ports, bool is_entry) { + const ExpressionPort& actual_port, const std::vector& target_ports, bool is_entry) { auto& ports = is_entry ? loop_info->entry_exprs : loop_info->exit_exprs; auto port_it = std::find(ports.begin(), ports.end(), actual_port); if (port_it == ports.end()) @@ -126,13 +126,13 @@ bool InsertLoadStore::insert_store(LinearIR& linear_ir, const LinearIR::constExp // So we should verify on the possible future exit points const auto consumer_inputs = input_td->get_consumers(); const auto should_be_saved = std::any_of(consumer_inputs.begin(), consumer_inputs.end(), - [](const TensorDescriptor& input_port) { + [](const ExpressionPort& input_port) { const auto& node = input_port.get_expr_ptr()->get_node(); return ov::is_type(node) || ov::is_type(node); }); const auto new_exit_point = store_expr->output_port(0); - const auto new_exit_points = should_be_saved ? std::vector{prev_exit_point, new_exit_point} - : std::vector{new_exit_point}; + const auto new_exit_points = should_be_saved ? std::vector{prev_exit_point, new_exit_point} + : std::vector{new_exit_point}; update_loops(loop_manager, loop_ids, prev_exit_point, new_exit_points, false); return true; } diff --git a/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp b/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp index fffef56513c8ab..f62f798480a87f 100644 --- a/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp +++ b/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp @@ -102,7 +102,7 @@ bool InsertTailLoop::run(LinearIR& linear_ir) { auto is_buffer_output = [&linear_ir](const TensorPtr& output) { const auto child_exprs_inputs = output->get_consumers(); return std::any_of(child_exprs_inputs.begin(), child_exprs_inputs.end(), - [](const TensorDescriptor& lp) {return ov::is_type(lp.get_expr_ptr()->get_node());}); + [](const ExpressionPort& lp) {return ov::is_type(lp.get_expr_ptr()->get_node());}); }; const auto loop_end_expr = linear_ir.get_expr_by_node(loop_end); diff --git a/src/common/snippets/src/lowered/pass/mark_loops.cpp b/src/common/snippets/src/lowered/pass/mark_loops.cpp index 955b95d6cd036d..fa27bc60dbfb51 100644 --- a/src/common/snippets/src/lowered/pass/mark_loops.cpp +++ b/src/common/snippets/src/lowered/pass/mark_loops.cpp @@ -64,7 +64,7 @@ bool MarkLoops::run(LinearIR& linear_ir) { for (size_t i = 0; i < prev_expr->get_output_count(); ++i) { const auto& loop_td = prev_expr->output(i); const auto consumers = loop_td->get_consumers(); - const auto found = std::find_if(consumers.begin(), consumers.end(), [&loop_end_pos](const TensorDescriptor& consumer) { + const auto found = std::find_if(consumers.begin(), consumers.end(), [&loop_end_pos](const ExpressionPort& consumer) { return consumer.get_expr_ptr() == *loop_end_pos; }); if (found != consumers.end()) { diff --git a/src/common/snippets/src/lowered/pass/propagate_layout.cpp b/src/common/snippets/src/lowered/pass/propagate_layout.cpp index cfa37e9c52b687..e4540f6bcb90a5 100644 --- a/src/common/snippets/src/lowered/pass/propagate_layout.cpp +++ b/src/common/snippets/src/lowered/pass/propagate_layout.cpp @@ -52,7 +52,7 @@ bool PropagateLayout::run(LinearIR& linear_ir) { const auto consumer_inputs = target_td->get_consumers(); // Note that here we consider only the first child (which is usually Store), // but often there is another child - LoopEnd - TensorDescriptor result_td; + ExpressionPort result_td; for (const auto& child_input : consumer_inputs) { const auto child = child_input.get_expr_ptr(); if (ov::is_type(child->get_node())) { diff --git a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp index 057b9119b0a8f0..64f49a6781b082 100644 --- a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp +++ b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp @@ -63,9 +63,9 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { // Markup of ReduceMax Loop loop_manager->mark_loop(max.first, horizon_max.first, 1, inner_work_amount, m_vector_size, - std::vector{(*max.first)->input_port(0), - (*max.first)->input_port(1)}, - std::vector{(*max.first)->output_port(0)}); + std::vector{(*max.first)->input_port(0), + (*max.first)->input_port(1)}, + std::vector{(*max.first)->output_port(0)}); const auto broadcast_horizon_max = push_node( std::make_shared(horizon_max.second, horizon_max.second->get_input_partial_shape(0))); @@ -83,11 +83,11 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { // Markup of ReduceMax Loop loop_manager->mark_loop(sub.first, horizon_sum.first, 1, inner_work_amount, m_vector_size, - std::vector{(*sub.first)->input_port(0), - (*sub.first)->input_port(1), - (*sum.first)->input_port(1)}, - std::vector{(*exp.first)->output_port(0), - (*sum.first)->output_port(0)}); + std::vector{(*sub.first)->input_port(0), + (*sub.first)->input_port(1), + (*sum.first)->input_port(1)}, + std::vector{(*exp.first)->output_port(0), + (*sum.first)->output_port(0)}); // Divide is expensive operation, so we decompose it into 1 / x * y, where 1 / x is executed outside loop const auto pow = push_node(std::make_shared(horizon_sum.second, -1.f)); @@ -98,26 +98,26 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { // Mul (pseudo-Divide loop) const auto mul = push_node(std::make_shared(exp.second, broadcast_pow.second)); - // Transfer original TensorDescriptors + // Transfer original ExpressionPorts linear_ir.replace_input(*max.first, 0, input_td); linear_ir.replace_input(*sub.first, 0, input_td); linear_ir.replace_input(output_td->get_consumers(), (*mul.first)->output(0)); // Markup of Mul Loop loop_manager->mark_loop(mul.first, expr_it, 1, inner_work_amount, m_vector_size, - std::vector{(*mul.first)->input_port(0), - (*mul.first)->input_port(1)}, - std::vector{(*mul.first)->output_port(0)}); + std::vector{(*mul.first)->input_port(0), + (*mul.first)->input_port(1)}, + std::vector{(*mul.first)->output_port(0)}); // Markup inner loop for outside expression with null loop id for (const auto& expr : outer_exprs) { expr->set_loop_id(Expression::LOOP_NULL_ID, 1); } - auto update_loop_bounds = [&softmax_expr](std::vector& points, - const std::vector& new_points, + auto update_loop_bounds = [&softmax_expr](std::vector& points, + const std::vector& new_points, const LinearIR::LoopManager::LoopInfoPtr& loop_info) { - auto entry_found = std::find_if(points.begin(), points.end(), [&softmax_expr](const TensorDescriptor& desc) { + auto entry_found = std::find_if(points.begin(), points.end(), [&softmax_expr](const ExpressionPort& desc) { return desc.get_expr_ptr() == softmax_expr; }); if (entry_found != points.end()) { @@ -131,8 +131,8 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { if (loop_id == Expression::LOOP_NULL_ID) continue; const auto loop_info = loop_manager->get_loop_info(loop_id); - update_loop_bounds(loop_info->entry_exprs, std::vector{(*max.first)->input_port(0), (*sub.first)->input_port(0)}, loop_info); - update_loop_bounds(loop_info->exit_exprs, std::vector{(*mul.first)->output_port(0)}, loop_info); + update_loop_bounds(loop_info->entry_exprs, std::vector{(*max.first)->input_port(0), (*sub.first)->input_port(0)}, loop_info); + update_loop_bounds(loop_info->exit_exprs, std::vector{(*mul.first)->output_port(0)}, loop_info); } /* =========================================== */ diff --git a/src/common/snippets/src/lowered/tensor.cpp b/src/common/snippets/src/lowered/tensor.cpp index 1a0f3525690b69..70e40160a6f29b 100644 --- a/src/common/snippets/src/lowered/tensor.cpp +++ b/src/common/snippets/src/lowered/tensor.cpp @@ -12,68 +12,42 @@ namespace ngraph { namespace snippets { namespace lowered { -TensorDescriptor::TensorDescriptor(const std::weak_ptr& expr, Type type, size_t port, - const std::vector& tensor, const std::vector& layout, const std::vector& subtensor) - : m_expr(expr), m_type(type), m_port_index(port), m_port_desc(std::make_shared(tensor, subtensor, layout)) {} - -TensorDescriptor::TensorDescriptor(const std::weak_ptr& expr, Type type, size_t port, const PortDescriptorPtr& port_desc) - : m_expr(expr), m_type(type), m_port_index(port) { - PortDescriptorPtr local_port_desc = port_desc; - if (!local_port_desc) { - if (type == Type::Input) { - local_port_desc = PortManager::get_port_descriptor_ptr(expr.lock()->get_node()->input(port)); - } else if (type == Type::Output) { - local_port_desc = PortManager::get_port_descriptor_ptr(expr.lock()->get_node()->output(port)); - } else { - OPENVINO_THROW("TensorDescriptor supports only Input and Output type!"); - } - } - - m_port_desc = local_port_desc; -} - -std::shared_ptr TensorDescriptor::get_expr_ptr() const { - auto shared = m_expr.lock(); - OPENVINO_ASSERT(shared != nullptr, "Failed attempt to get shared pointer of source expression: nullptr"); - return shared; -} - -Tensor::Tensor(const TensorDescriptor& source_descriptor, const std::vector& consumer_descriptors) +Tensor::Tensor(const ExpressionPort& source_descriptor, const std::vector& consumer_descriptors) : m_source_port(source_descriptor), m_consumer_ports(consumer_descriptors) {} -std::vector::const_iterator Tensor::find_consumer(const TensorDescriptor& consumer) const { +std::vector::const_iterator Tensor::find_consumer(const ExpressionPort& consumer) const { // Note: Find by shared ptr and index port is enough since these parameters must be unique return std::find_if(m_consumer_ports.begin(), m_consumer_ports.end(), - [&consumer](const TensorDescriptor& td) { + [&consumer](const ExpressionPort& td) { return consumer.get_expr_ptr() == td.get_expr_ptr() && consumer.get_index() == td.get_index(); }); } -std::vector::iterator Tensor::find_consumer(const TensorDescriptor& consumer) { +std::vector::iterator Tensor::find_consumer(const ExpressionPort& consumer) { // Note: Find by shared ptr and index port is enough since these parameters must be unique return std::find_if(m_consumer_ports.begin(), m_consumer_ports.end(), - [&consumer](const TensorDescriptor& td) { + [&consumer](const ExpressionPort& td) { return consumer.get_expr_ptr() == td.get_expr_ptr() && consumer.get_index() == td.get_index(); }); } -bool Tensor::found_consumer(const TensorDescriptor& consumer) const { +bool Tensor::found_consumer(const ExpressionPort& consumer) const { return find_consumer(consumer) != m_consumer_ports.end(); } -void Tensor::add_consumer(const TensorDescriptor& consumer) { +void Tensor::add_consumer(const ExpressionPort& consumer) { OPENVINO_ASSERT(!found_consumer(consumer), "Consumer has been already added to Tensor!"); m_consumer_ports.push_back(consumer); } -void Tensor::remove_consumer(const TensorDescriptor& consumer) { +void Tensor::remove_consumer(const ExpressionPort& consumer) { const auto& found = find_consumer(consumer); OPENVINO_ASSERT(found != m_consumer_ports.end(), "Consumer is missed in Tensor!"); m_consumer_ports.erase(found); } -std::vector Tensor::get_conflicted_consumers() const { - std::vector conflicted_consumers; +std::vector Tensor::get_conflicted_consumers() const { + std::vector conflicted_consumers; for (const auto& consumer : m_consumer_ports) { if (is_conflicted_consumer(consumer)) { conflicted_consumers.push_back(consumer); @@ -82,14 +56,14 @@ std::vector Tensor::get_conflicted_consumers() const { return conflicted_consumers; } -bool Tensor::is_conflicted_consumer(const TensorDescriptor& consumer) const { +bool Tensor::is_conflicted_consumer(const ExpressionPort& consumer) const { OPENVINO_ASSERT(found_consumer(consumer), "Failed check for conflicted consumer: it's not a consumer fot the Tensor"); return get_tensor() != consumer.get_tensor() || get_layout() != consumer.get_layout() || get_subtensor() != consumer.get_subtensor(); } -bool operator==(const TensorDescriptor& lhs, const TensorDescriptor& rhs) { +bool operator==(const ExpressionPort& lhs, const ExpressionPort& rhs) { if (&rhs == &lhs) return true; return lhs.m_type == rhs.m_type && @@ -97,11 +71,11 @@ bool operator==(const TensorDescriptor& lhs, const TensorDescriptor& rhs) { lhs.m_port_index == rhs.m_port_index && lhs.m_port_desc == rhs.m_port_desc; } -bool operator!=(const TensorDescriptor& lhs, const TensorDescriptor& rhs) { +bool operator!=(const ExpressionPort& lhs, const ExpressionPort& rhs) { return !(lhs == rhs); } -bool operator<(const TensorDescriptor& lhs, const TensorDescriptor& rhs) { - OPENVINO_ASSERT(lhs.get_type() == rhs.get_type(), "TensorDescriptors must be of the same type for comparison!"); +bool operator<(const ExpressionPort& lhs, const ExpressionPort& rhs) { + OPENVINO_ASSERT(lhs.get_type() == rhs.get_type(), "ExpressionPorts must be of the same type for comparison!"); return lhs.get_index() < rhs.get_index() && lhs.get_expr_ptr() < rhs.get_expr_ptr() && lhs.get_tensor() < rhs.get_tensor() && @@ -109,7 +83,7 @@ bool operator<(const TensorDescriptor& lhs, const TensorDescriptor& rhs) { lhs.get_subtensor() < rhs.get_subtensor(); } -std::ostream& operator<<(std::ostream& ss, const TensorDescriptor& td) { +std::ostream& operator<<(std::ostream& ss, const ExpressionPort& td) { auto print_vector = [&ss](const std::vector& data){ ss << "["; for (auto i : data) From dc8daf85fb5dc18f0253c3fc1f26fa2012bddadc Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Fri, 5 May 2023 13:43:05 +0400 Subject: [PATCH 06/13] Refactored expression factory --- .../snippets/lowered/expression_factory.hpp | 116 ++++------- .../include/snippets/lowered/linear_ir.hpp | 11 +- .../src/lowered/expression_factory.cpp | 182 ++++++------------ src/common/snippets/src/lowered/linear_ir.cpp | 15 +- 4 files changed, 106 insertions(+), 218 deletions(-) diff --git a/src/common/snippets/include/snippets/lowered/expression_factory.hpp b/src/common/snippets/include/snippets/lowered/expression_factory.hpp index ff561a31d46263..a0209e7d6f7efd 100644 --- a/src/common/snippets/include/snippets/lowered/expression_factory.hpp +++ b/src/common/snippets/include/snippets/lowered/expression_factory.hpp @@ -6,95 +6,55 @@ #include "linear_ir.hpp" +#include "snippets/snippets_isa.hpp" + namespace ngraph { namespace snippets { namespace lowered { -class LinearIR::BaseExpressionFactory { +class LinearIR::ExpressionFactory { public: - BaseExpressionFactory() = default; - BaseExpressionFactory(const LinearIR& linear_ir) : m_linear_ir(linear_ir) {} - - virtual ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model); - virtual ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, + template + static ExpressionPtr build(const std::shared_ptr& n, Args&&... params) { + if (const auto par = ov::as_type_ptr(n)) { + return create(par, params...); + } else if (const auto res = ov::as_type_ptr(n)) { + return create(res, params...); + } else if (const auto loop_begin = ov::as_type_ptr(n)) { + return create(loop_begin, params...); + } else if (const auto loop_end = ov::as_type_ptr(n)) { + return create(loop_end, params...); + } + return create(n, params...); + } + +private: + /* -- Default Builders - initialize input tensors from parents and create new output tensors themselves */ + static ExpressionPtr create(const std::shared_ptr& par, const LinearIR& linear_ir, + const std::shared_ptr& model); + static ExpressionPtr create(const std::shared_ptr& res, const LinearIR& linear_ir, + const std::shared_ptr& model); + static ExpressionPtr create(const std::shared_ptr& n, const LinearIR& linear_ir, + const std::shared_ptr& model); + + /* -- Input Builders - get input tensors from method parameters and create new output tensors themselves */ + static ExpressionPtr create(const std::shared_ptr& n, const LinearIR& linear_ir, + const std::vector& inputs); + static ExpressionPtr create(const std::shared_ptr& n, const LinearIR& linear_ir, + const std::vector& inputs); + static ExpressionPtr create(const std::shared_ptr& n, const LinearIR& linear_ir, const std::vector& inputs); - virtual ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector& inputs, const std::vector& outputs); - static std::shared_ptr get(const LinearIR& linear_ir, const std::shared_ptr& n); + /* -- Full Builders - get input and outputs tensors from parameters */ + static ExpressionPtr create(const std::shared_ptr& n, const LinearIR& linear_ir, + const std::vector& inputs, const std::vector& outputs); -protected: - virtual ExpressionPtr create(const std::shared_ptr& n, const std::shared_ptr& model) = 0; // Creates inputs for expression using parent output tensors - virtual std::vector create_expression_inputs(const ExpressionPtr& expr); + static std::vector create_expression_inputs(const LinearIR& linear_ir, const ExpressionPtr& expr); // Creates new output tensors - virtual std::vector create_expression_outputs(const ExpressionPtr& expr); + static std::vector create_expression_outputs(const LinearIR& linear_ir, const ExpressionPtr& expr); // The method verifies of input tensors to availability of the expression as consumer and add it if missed - virtual void validate_inputs(const ExpressionPtr& expr, const std::vector& inputs); - - LinearIR m_linear_ir; -}; - -class LinearIR::ExpressionFactory : public LinearIR::BaseExpressionFactory { -public: - ExpressionFactory() = default; - ExpressionFactory(const LinearIR& linear_ir) : BaseExpressionFactory(linear_ir) {} - - ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model) override; - ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector& inputs) override; - ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector& inputs, const std::vector& outputs) override; - -protected: - ExpressionPtr create(const std::shared_ptr& n, const std::shared_ptr& model) override; -}; - -class LinearIR::ParameterExpressionFactory : public LinearIR::BaseExpressionFactory { -public: - ParameterExpressionFactory() = default; - ParameterExpressionFactory(const LinearIR& linear_ir) : BaseExpressionFactory(linear_ir) {} - - ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model) override; - -protected: - ExpressionPtr create(const std::shared_ptr& n, const std::shared_ptr& model) override; -}; - -class LinearIR::ResultExpressionFactory : public LinearIR::BaseExpressionFactory { -public: - ResultExpressionFactory() = default; - ResultExpressionFactory(const LinearIR& linear_ir) : BaseExpressionFactory(linear_ir) {} - - ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model) override; - -protected: - ExpressionPtr create(const std::shared_ptr& n, const std::shared_ptr& model) override; -}; - -class LinearIR::LoopBeginExpressionFactory : public LinearIR::BaseExpressionFactory { -public: - LoopBeginExpressionFactory() = default; - LoopBeginExpressionFactory(const LinearIR& linear_ir) : BaseExpressionFactory(linear_ir) {} - - ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector& inputs) override; - -protected: - ExpressionPtr create(const std::shared_ptr& n, const std::shared_ptr& model) override; -}; - -class LinearIR::LoopEndExpressionFactory : public LinearIR::BaseExpressionFactory { -public: - LoopEndExpressionFactory() = default; - LoopEndExpressionFactory(const LinearIR& linear_ir) : BaseExpressionFactory(linear_ir) {} - - ExpressionPtr build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector& inputs) override; - -protected: - ExpressionPtr create(const std::shared_ptr& n, const std::shared_ptr& model) override; - void validate_inputs(const ExpressionPtr& expr, const std::vector& inputs) override; + static void validate_inputs(const ExpressionPtr& expr, const std::vector& inputs); }; } // namespace lowered diff --git a/src/common/snippets/include/snippets/lowered/linear_ir.hpp b/src/common/snippets/include/snippets/lowered/linear_ir.hpp index f00f9e272bdb79..4de93b464b506e 100644 --- a/src/common/snippets/include/snippets/lowered/linear_ir.hpp +++ b/src/common/snippets/include/snippets/lowered/linear_ir.hpp @@ -23,12 +23,7 @@ class Config { }; class LinearIR { - class BaseExpressionFactory; class ExpressionFactory; - class ParameterExpressionFactory; - class ResultExpressionFactory; - class LoopBeginExpressionFactory; - class LoopEndExpressionFactory; public: using container = std::list; using io_container = std::list>; @@ -38,10 +33,8 @@ class LinearIR { LinearIR() = default; explicit LinearIR(const std::shared_ptr& m, Config config = {}); - ExpressionPtr create_expression(const std::shared_ptr& n, const std::vector inputs, - const std::shared_ptr& model = nullptr); - ExpressionPtr create_expression(const std::shared_ptr& n, const std::vector inputs, const std::vector outputs, - const std::shared_ptr& model = nullptr); + ExpressionPtr create_expression(const std::shared_ptr& n, const std::vector inputs); + ExpressionPtr create_expression(const std::shared_ptr& n, const std::vector inputs, const std::vector outputs); LinearIR deep_copy() const; static LinearIR::container deep_copy_range(LinearIR::container::const_iterator begin, LinearIR::container::const_iterator end); diff --git a/src/common/snippets/src/lowered/expression_factory.cpp b/src/common/snippets/src/lowered/expression_factory.cpp index 6f95f919b40b08..4a6824adce624a 100644 --- a/src/common/snippets/src/lowered/expression_factory.cpp +++ b/src/common/snippets/src/lowered/expression_factory.cpp @@ -10,35 +10,7 @@ namespace ngraph { namespace snippets { namespace lowered { -ExpressionPtr LinearIR::BaseExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model) { - OPENVINO_THROW("The Factory doesn't support default builder"); -} -ExpressionPtr LinearIR::BaseExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector& inputs) { - OPENVINO_THROW("The Factory doesn't support builder with just input tensors"); -} -ExpressionPtr LinearIR::BaseExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector& inputs, const std::vector& outputs) { - OPENVINO_THROW("The Factory doesn't support builder with input and outputs tensors"); -} - -std::shared_ptr LinearIR::BaseExpressionFactory::get(const LinearIR& linear_ir, const std::shared_ptr& n) { - if (ov::is_type(n)) { - return std::make_shared(linear_ir); - } - if (ov::is_type(n)) { - return std::make_shared(linear_ir); - } - if (ov::is_type(n)) { - return std::make_shared(linear_ir); - } - if (ov::is_type(n)) { - return std::make_shared(linear_ir); - } - return std::make_shared(linear_ir); -} - -std::vector LinearIR::BaseExpressionFactory::create_expression_inputs(const ExpressionPtr& expr) { +std::vector LinearIR::ExpressionFactory::create_expression_inputs(const LinearIR& linear_ir, const ExpressionPtr& expr) { OPENVINO_ASSERT(expr != nullptr, "Failed expression inputs creation: expression is null"); const auto& node = expr->get_node(); @@ -46,7 +18,7 @@ std::vector LinearIR::BaseExpressionFactory::create_expression_inputs for (const auto& input : node->inputs()) { const auto input_source = input.get_source_output(); const auto in_index = input.get_index(); - const auto& parent_expr = m_linear_ir.get_expr_by_node(input_source.get_node_shared_ptr()); + const auto& parent_expr = linear_ir.get_expr_by_node(input_source.get_node_shared_ptr()); const auto& tensor = parent_expr->output(input_source.get_index()); const auto tensor_desc = ExpressionPort(expr, ExpressionPort::Type::Input, in_index, PortManager::get_port_descriptor_ptr(input)); tensor->add_consumer(tensor_desc); @@ -55,7 +27,7 @@ std::vector LinearIR::BaseExpressionFactory::create_expression_inputs return inputs; } -std::vector LinearIR::BaseExpressionFactory::create_expression_outputs(const ExpressionPtr& expr) { +std::vector LinearIR::ExpressionFactory::create_expression_outputs(const LinearIR& linear_ir, const ExpressionPtr& expr) { OPENVINO_ASSERT(expr != nullptr, "Failed expression outputs creation: expression is null"); const auto& node = expr->get_node(); @@ -68,7 +40,8 @@ std::vector LinearIR::BaseExpressionFactory::create_expression_output return outputs; } -void LinearIR::BaseExpressionFactory::validate_inputs(const ExpressionPtr& expr, const std::vector& inputs) { +// The method verifies of input tensors to availability of the expression as consumer and add it if missed +void LinearIR::ExpressionFactory::validate_inputs(const ExpressionPtr& expr, const std::vector& inputs) { for (size_t i = 0; i < inputs.size(); ++i) { const auto& input = inputs[i]; const auto consumers = input->get_consumers(); @@ -77,127 +50,94 @@ void LinearIR::BaseExpressionFactory::validate_inputs(const ExpressionPtr& expr, return desc.get_index() == i && desc.get_expr_ptr() == expr; }); if (found == consumers.end()) { - const auto port_desc = PortManager::get_port_descriptor_ptr(expr->get_node()->input(i)); + PortDescriptorPtr port_desc = nullptr; + // LoopEnd doesn't have many input ports, so LoopEnd cannot initialize PortDescriptor for ports himself. + // The expression should have the same PortDescriptor as sources + if (ov::is_type(expr->get_node())) { + port_desc = input->get_source().get_port_descriptor(); + } else { + port_desc = PortManager::get_port_descriptor_ptr(expr->get_node()->input(i)); + } const auto tensor_desc = ExpressionPort(expr, ExpressionPort::Type::Input, i, port_desc); input->add_consumer(tensor_desc); } } } -ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const std::shared_ptr& model) { - // Note: ctor of shared_ptr isn't friend class for Expression - return std::make_shared(Expression(n)); -} - -ExpressionPtr LinearIR::ExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model) { - const auto expr = create(n, model); - expr->init_inputs(create_expression_inputs(expr)); - expr->init_outputs(create_expression_outputs(expr)); +ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& par, + const LinearIR& linear_ir, const std::shared_ptr& model) { + // Note: ctor of shared_ptr isn't friend class for Expression -> we cannot use directly make_shared(args) + OPENVINO_ASSERT(model != nullptr, "To create IOExpression from Parameter there must be inited model!"); + const auto expr = std::make_shared(IOExpression(par, model->get_parameter_index(par))); + expr->init_inputs({}); + expr->init_outputs(create_expression_outputs(linear_ir, expr)); return expr; } -ExpressionPtr LinearIR::ExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector& inputs) { - const auto expr = create(n, model); - validate_inputs(expr, inputs); - expr->init_inputs(inputs); - expr->init_outputs(create_expression_outputs(expr)); +ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& res, + const LinearIR& linear_ir, const std::shared_ptr& model) { + // Note: ctor of shared_ptr isn't friend class for Expression -> we cannot use directly make_shared(args) + OPENVINO_ASSERT(model != nullptr, "To create IOExpression from Result there must be inited model!"); + const auto expr = std::make_shared(IOExpression(res, model->get_result_index(res))); + expr->init_inputs(create_expression_inputs(linear_ir, expr)); + expr->init_outputs({}); return expr; } -ExpressionPtr LinearIR::ExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector& inputs, const std::vector& outputs) { - const auto expr = create(n, model); - validate_inputs(expr, inputs); - expr->init_inputs(inputs); - expr->init_outputs(outputs); +ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const LinearIR& linear_ir, + const std::shared_ptr& model) { + OPENVINO_ASSERT(!ov::is_type(n), "Default expression builder doesn't support LoopBegin and LoopEnd"); + // Note: ctor of shared_ptr isn't friend class for Expression + const auto expr = std::make_shared(Expression(n)); + expr->init_inputs(create_expression_inputs(linear_ir, expr)); + expr->init_outputs(create_expression_outputs(linear_ir, expr)); return expr; } -ExpressionPtr LinearIR::ParameterExpressionFactory::create(const std::shared_ptr& n, const std::shared_ptr& model) { - // Note: ctor of shared_ptr isn't friend class for Expression -> we cannot use directly make_shared(args) - if (const auto& par = as_type_ptr(n)) { - OPENVINO_ASSERT(model != nullptr, "To create IOExpression from Parameter there must be inited model!"); - return std::make_shared(IOExpression(par, model->get_parameter_index(par))); - } - OPENVINO_THROW("ParameterExpressionFactory support only Parameter node"); -} - -ExpressionPtr LinearIR::ParameterExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model) { - const auto expr = create(n, model); +ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const LinearIR& linear_ir, + const std::vector& inputs) { + OPENVINO_ASSERT(inputs.empty(), "LoopBegin cannot have inputs"); + const auto expr = std::make_shared(Expression(n)); + validate_inputs(expr, inputs); expr->init_inputs({}); - expr->init_outputs(create_expression_outputs(expr)); + expr->init_outputs(create_expression_outputs(linear_ir, expr)); return expr; } -ExpressionPtr LinearIR::ResultExpressionFactory::create(const std::shared_ptr& n, const std::shared_ptr& model) { - // Note: ctor of shared_ptr isn't friend class for Expression -> we cannot use directly make_shared(args) - if (const auto& res = as_type_ptr(n)) { - OPENVINO_ASSERT(model != nullptr, "To create IOExpression from Result there must be inited model!"); - return std::make_shared(IOExpression(res, model->get_result_index(res))); - } - OPENVINO_THROW("ResultExpressionFactory support only Result node"); -} - -ExpressionPtr LinearIR::ResultExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model) { - const auto expr = create(n, model); - expr->init_inputs(create_expression_inputs(expr)); +ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const LinearIR& linear_ir, + const std::vector& inputs) { + const auto expr = std::make_shared(Expression(n)); + validate_inputs(expr, inputs); + expr->init_inputs(inputs); expr->init_outputs({}); return expr; } -ExpressionPtr LinearIR::LoopBeginExpressionFactory::create(const std::shared_ptr& n, const std::shared_ptr& model) { - // Note: ctor of shared_ptr isn't friend class for Expression -> we cannot use directly make_shared(args) - if (const auto& op = as_type_ptr(n)) { - return std::make_shared(Expression(op)); - } - OPENVINO_THROW("LoopBeginExpressionFactory support only LoopBegin node"); -} - -ExpressionPtr LinearIR::LoopBeginExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector& inputs) { - OPENVINO_ASSERT(inputs.empty(), "LoopBegin cannot have inputs"); - const auto expr = create(n, model); +ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const LinearIR& linear_ir, + const std::vector& inputs) { + OPENVINO_ASSERT(!ov::is_type(n) && + !ov::is_type(n), + "Expression builder with inputs doesn't support Result and Parameter"); + const auto expr = std::make_shared(Expression(n)); + validate_inputs(expr, inputs); expr->init_inputs(inputs); - expr->init_outputs(create_expression_outputs(expr)); + expr->init_outputs(create_expression_outputs(linear_ir, expr)); return expr; } -ExpressionPtr LinearIR::LoopEndExpressionFactory::create(const std::shared_ptr& n, const std::shared_ptr& model) { - // Note: ctor of shared_ptr isn't friend class for Expression -> we cannot use directly make_shared(args) - if (const auto& op = as_type_ptr(n)) { - return std::make_shared(Expression(op)); - } - OPENVINO_THROW("LoopEndExpressionFactory support only LoopEnd node"); -} - -ExpressionPtr LinearIR::LoopEndExpressionFactory::build(const std::shared_ptr& n, const std::shared_ptr& model, - const std::vector& inputs) { - const auto expr = create(n, model); +ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const LinearIR& linear_ir, + const std::vector& inputs, const std::vector& outputs) { + OPENVINO_ASSERT(!ov::is_type(n) && + !ov::is_type(n) && + !ov::is_type(n), + "Expression builder with inputs and outputs doesn't support service Nodes: Parameter, Result and LoopBegin, LoopEnd"); + const auto expr = std::make_shared(Expression(n)); validate_inputs(expr, inputs); expr->init_inputs(inputs); - expr->init_outputs({}); + expr->init_outputs(outputs); return expr; } -void LinearIR::LoopEndExpressionFactory::validate_inputs(const ExpressionPtr& expr, const std::vector& inputs) { - for (size_t i = 0; i < inputs.size(); ++i) { - const auto& input = inputs[i]; - const auto consumers = input->get_consumers(); - const auto found = std::find_if(consumers.begin(), consumers.end(), - [&](const ExpressionPort& desc) { - return desc.get_index() == i && desc.get_expr_ptr()== expr; - }); - if (found == consumers.end()) { - // LoopEnd doesn't have input ports. So consumer for the Tensor should have the same Port Descriptor like source - const auto& port_desc = input->get_source().get_port_descriptor(); - const auto tensor_desc = ExpressionPort(expr, ExpressionPort::Type::Input, i, port_desc); - input->add_consumer(tensor_desc); - } - } -} - - }// namespace lowered }// namespace snippets }// namespace ngraph diff --git a/src/common/snippets/src/lowered/linear_ir.cpp b/src/common/snippets/src/lowered/linear_ir.cpp index d6f36ba88c9bdf..77f62d71c9273c 100644 --- a/src/common/snippets/src/lowered/linear_ir.cpp +++ b/src/common/snippets/src/lowered/linear_ir.cpp @@ -51,20 +51,15 @@ LinearIR::LinearIR(const std::shared_ptr& model, Config config) } ExpressionPtr LinearIR::create_expression(const std::shared_ptr& n, const std::shared_ptr& model) { - const auto factory = BaseExpressionFactory::get(*this, n); - return factory->build(n, model); + return ExpressionFactory::build(n, *this, model); } -ExpressionPtr LinearIR::create_expression(const std::shared_ptr& n, const std::vector inputs, - const std::shared_ptr& model) { - const auto factory = BaseExpressionFactory::get(*this, n); - return factory->build(n, model, inputs); +ExpressionPtr LinearIR::create_expression(const std::shared_ptr& n, const std::vector inputs) { + return ExpressionFactory::build(n, *this, inputs); } -ExpressionPtr LinearIR::create_expression(const std::shared_ptr& n, const std::vector inputs, const std::vector outputs, - const std::shared_ptr& model) { - const auto factory = BaseExpressionFactory::get(*this, n); - return factory->build(n, model, inputs, outputs); +ExpressionPtr LinearIR::create_expression(const std::shared_ptr& n, const std::vector inputs, const std::vector outputs) { + return ExpressionFactory::build(n, *this, inputs, outputs); } ov::NodeVector LinearIR::get_ordered_ops(const std::shared_ptr& m) { From eac7afa794a7f0f1aae77ffdc675bb3b04cd0f0a Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Tue, 9 May 2023 14:05:24 +0400 Subject: [PATCH 07/13] ExpressionPort - is interface --- .../include/snippets/lowered/expression.hpp | 29 ++++--- .../snippets/lowered/expression_factory.hpp | 10 +-- .../snippets/lowered/expression_port.hpp | 28 +++---- .../include/snippets/lowered/linear_ir.hpp | 5 +- .../include/snippets/lowered/tensor.hpp | 18 ++-- .../snippets/src/lowered/expression.cpp | 49 +++++------ .../src/lowered/expression_factory.cpp | 82 +++++++------------ .../snippets/src/lowered/expression_port.cpp | 73 +++++++++++------ src/common/snippets/src/lowered/linear_ir.cpp | 46 +++-------- .../snippets/src/lowered/loop_manager.cpp | 16 ++-- .../src/lowered/pass/allocate_buffers.cpp | 12 +-- .../src/lowered/pass/assign_registers.cpp | 42 +++++----- .../src/lowered/pass/cleanup_loop_offsets.cpp | 4 +- .../snippets/src/lowered/pass/fuse_loops.cpp | 47 ++++------- .../src/lowered/pass/indentify_buffers.cpp | 10 +-- .../snippets/src/lowered/pass/init_loops.cpp | 16 ++-- .../src/lowered/pass/insert_buffers.cpp | 32 ++++---- .../src/lowered/pass/insert_load_store.cpp | 18 ++-- .../src/lowered/pass/insert_tail_loop.cpp | 14 ++-- .../load_movebroadcast_to_broadcastload.cpp | 12 +-- .../snippets/src/lowered/pass/mark_loops.cpp | 14 +++- .../pass/move_result_out_from_loop.cpp | 4 +- .../lowered/pass/move_scalar_to_consumer.cpp | 4 +- .../src/lowered/pass/propagate_layout.cpp | 11 ++- .../src/lowered/pass/reset_buffers.cpp | 6 +- .../lowered/pass/softmax_decomposition.cpp | 35 ++++---- .../src/lowered/pass/vector_to_scalar.cpp | 4 +- src/common/snippets/src/lowered/tensor.cpp | 69 ++-------------- .../emitters/x64/jit_snippets_emitters.cpp | 4 +- .../lowered/fuse_load_store_and_convert.cpp | 22 ++--- 30 files changed, 316 insertions(+), 420 deletions(-) diff --git a/src/common/snippets/include/snippets/lowered/expression.hpp b/src/common/snippets/include/snippets/lowered/expression.hpp index 0665f7505559d1..8be97055604b83 100644 --- a/src/common/snippets/include/snippets/lowered/expression.hpp +++ b/src/common/snippets/include/snippets/lowered/expression.hpp @@ -21,6 +21,7 @@ class LinearIR; class Expression : public std::enable_shared_from_this { friend class LinearIR; + friend class ExpressionPort; public: static size_t LOOP_NULL_ID; @@ -34,12 +35,13 @@ class Expression : public std::enable_shared_from_this { RegInfo get_reg_info() const { return m_reg_info; } void set_reg_info(RegInfo rinfo) { m_reg_info = std::move(rinfo); } - const TensorPtr& input(size_t i) const; - const TensorPtr& output(size_t i) const; - const std::vector& inputs() const { return m_inputs; } - const std::vector& outputs() const { return m_outputs; } - size_t get_input_count() const { return m_inputs.size(); } - size_t get_output_count() const { return m_outputs.size(); } + const TensorPtr& get_input_tensor(size_t i) const; + const TensorPtr& get_output_tensor(size_t i) const; + std::vector get_input_tensors() const { return m_input_tensors; } + std::vector get_output_tensors() const { return m_output_tensors; } + + size_t get_input_count() const { return m_input_tensors.size(); } + size_t get_output_count() const { return m_output_tensors.size(); } std::vector get_loop_ids() const { return m_loop_ids; } void set_loop_ids(const std::vector& loops) { m_loop_ids = loops; } @@ -48,24 +50,25 @@ class Expression : public std::enable_shared_from_this { void init_emitter(const std::shared_ptr& target); - ExpressionPort input_port(size_t i); - ExpressionPort output_port(size_t i); + ExpressionPort get_input_port(size_t i); + ExpressionPort get_output_port(size_t i); protected: // Note: The constructor and tensor initialization are private since an expression can be created only by Linear IR. // These methods must be used only by Linear IR builder of expressions! explicit Expression(const std::shared_ptr& n); - void init_inputs(const std::vector& inputs) { m_inputs = inputs; } - void init_outputs(const std::vector& outputs) { m_outputs = outputs; } + void init_inputs(const std::vector& inputs) { m_input_tensors = inputs; } + void init_outputs(const std::vector& outputs) { m_output_tensors = outputs; } // Note: These methods don't control availability of the current expression in this Tensor (as Consumer or Source) void replace_input(size_t port, TensorPtr to); - void replace_output(size_t port, TensorPtr to); std::shared_ptr m_source_node{nullptr}; std::shared_ptr m_emitter{nullptr}; - std::vector m_inputs; - std::vector m_outputs; + std::vector m_input_tensors{}; + std::vector m_output_tensors{}; + std::vector m_input_port_descriptors{}; + std::vector m_output_port_descriptors{}; RegInfo m_reg_info{{}, {}}; // The order Loops identifies: Outer ---> Inner std::vector m_loop_ids; diff --git a/src/common/snippets/include/snippets/lowered/expression_factory.hpp b/src/common/snippets/include/snippets/lowered/expression_factory.hpp index a0209e7d6f7efd..26f829a12140de 100644 --- a/src/common/snippets/include/snippets/lowered/expression_factory.hpp +++ b/src/common/snippets/include/snippets/lowered/expression_factory.hpp @@ -45,16 +45,12 @@ class LinearIR::ExpressionFactory { static ExpressionPtr create(const std::shared_ptr& n, const LinearIR& linear_ir, const std::vector& inputs); - /* -- Full Builders - get input and outputs tensors from parameters */ - static ExpressionPtr create(const std::shared_ptr& n, const LinearIR& linear_ir, - const std::vector& inputs, const std::vector& outputs); - // Creates inputs for expression using parent output tensors - static std::vector create_expression_inputs(const LinearIR& linear_ir, const ExpressionPtr& expr); + static void create_expression_inputs(const LinearIR& linear_ir, const ExpressionPtr& expr); // Creates new output tensors - static std::vector create_expression_outputs(const LinearIR& linear_ir, const ExpressionPtr& expr); + static void create_expression_outputs(const LinearIR& linear_ir, const ExpressionPtr& expr); // The method verifies of input tensors to availability of the expression as consumer and add it if missed - static void validate_inputs(const ExpressionPtr& expr, const std::vector& inputs); + static void init_expression_inputs(const ExpressionPtr& expr, const std::vector& inputs); }; } // namespace lowered diff --git a/src/common/snippets/include/snippets/lowered/expression_port.hpp b/src/common/snippets/include/snippets/lowered/expression_port.hpp index d1b8b0f088a9fa..7aaeeafa47fc88 100644 --- a/src/common/snippets/include/snippets/lowered/expression_port.hpp +++ b/src/common/snippets/include/snippets/lowered/expression_port.hpp @@ -14,6 +14,7 @@ namespace ngraph { namespace snippets { namespace lowered { +class Tensor; class Expression; class ExpressionPort { public: @@ -23,35 +24,30 @@ class ExpressionPort { }; ExpressionPort() = default; - explicit ExpressionPort(const std::weak_ptr& expr, Type type, size_t port, - const std::vector& tensor = {}, const std::vector& layout = {}, const std::vector& subtensor = {}); - explicit ExpressionPort(const std::weak_ptr& expr, Type type, size_t port, const PortDescriptorPtr& port_desc = nullptr); + explicit ExpressionPort(const std::shared_ptr& expr, Type type, size_t port); - std::shared_ptr get_expr_ptr() const; - const std::weak_ptr& get_expr_wptr() const { return m_expr; } + std::shared_ptr get_expr() const { return m_expr; } Type get_type() const { return m_type; } size_t get_index() const { return m_port_index; } - std::vector get_tensor() const { return m_port_desc->get_tensor(); } - std::vector get_layout() const { return m_port_desc->get_layout(); } - std::vector get_subtensor() const { return m_port_desc->get_subtensor(); } - const PortDescriptorPtr& get_port_descriptor() const { return m_port_desc; } + std::vector get_tensor() const; + std::vector get_layout() const; + std::vector get_subtensor() const; + PortDescriptorPtr get_port_descriptor() const; + const std::shared_ptr& get_tensor_ptr() const; - void set_tensor(const std::vector& tensor) { m_port_desc->set_tensor(tensor); } - void set_layout(const std::vector& layout) { m_port_desc->set_layout(layout); } - void set_subtensor(const std::vector& subtensor) { m_port_desc->set_subtensor(subtensor); } - void set_port_descriptor(const PortDescriptorPtr& desc) { m_port_desc = desc; } + void set_tensor(const std::vector& tensor); + void set_layout(const std::vector& layout); + void set_subtensor(const std::vector& subtensor); friend bool operator==(const ExpressionPort& lhs, const ExpressionPort& rhs); friend bool operator!=(const ExpressionPort& lhs, const ExpressionPort& rhs); friend bool operator<(const ExpressionPort& lhs, const ExpressionPort& rhs); - friend std::ostream& operator<<(std::ostream&, const ExpressionPort& td); private: - std::weak_ptr m_expr; + std::shared_ptr m_expr; Type m_type = Type::Output; size_t m_port_index = 0; - PortDescriptorPtr m_port_desc; }; } // namespace lowered } // namespace snippets diff --git a/src/common/snippets/include/snippets/lowered/linear_ir.hpp b/src/common/snippets/include/snippets/lowered/linear_ir.hpp index 4de93b464b506e..68f79e3a818db0 100644 --- a/src/common/snippets/include/snippets/lowered/linear_ir.hpp +++ b/src/common/snippets/include/snippets/lowered/linear_ir.hpp @@ -34,7 +34,6 @@ class LinearIR { explicit LinearIR(const std::shared_ptr& m, Config config = {}); ExpressionPtr create_expression(const std::shared_ptr& n, const std::vector inputs); - ExpressionPtr create_expression(const std::shared_ptr& n, const std::vector inputs, const std::vector outputs); LinearIR deep_copy() const; static LinearIR::container deep_copy_range(LinearIR::container::const_iterator begin, LinearIR::container::const_iterator end); @@ -45,11 +44,9 @@ class LinearIR { ExpressionPtr get_expr_by_node(const std::shared_ptr& n) const; - void replace_input(const std::vector& consumers, const TensorPtr& to); + void replace_input(std::set consumers, const TensorPtr& to); void replace_input(const ExpressionPort& expr_port, const TensorPtr& to); void replace_input(const ExpressionPtr& expr, size_t port, const TensorPtr& to); - void replace_output(const ExpressionPort& expr_port, const TensorPtr& to); - void replace_output(const ExpressionPtr& expr, size_t port, const TensorPtr& to); /** * @brief Move an expression from the position "from" to the position immediately before "to". diff --git a/src/common/snippets/include/snippets/lowered/tensor.hpp b/src/common/snippets/include/snippets/lowered/tensor.hpp index b17daa27b0f8dd..29b04138acf407 100644 --- a/src/common/snippets/include/snippets/lowered/tensor.hpp +++ b/src/common/snippets/include/snippets/lowered/tensor.hpp @@ -21,33 +21,25 @@ class Expression; class Tensor { public: Tensor() = default; - explicit Tensor(const ExpressionPort& source_descriptor, const std::vector& consumer_descriptors = {}); + explicit Tensor(const ExpressionPort& source_descriptor, const std::set& consumer_descriptors = {}); const ExpressionPort& get_source() const { return m_source_port; } - std::vector get_consumers() const { return m_consumer_ports; } + std::set get_consumers() const { return m_consumer_ports; } void add_consumer(const ExpressionPort& consumer); void remove_consumer(const ExpressionPort& consumer); bool found_consumer(const ExpressionPort& consumer) const; - std::vector::const_iterator find_consumer(const ExpressionPort& consumer) const; - std::vector::iterator find_consumer(const ExpressionPort& consumer); - - std::vector get_conflicted_consumers() const; - bool is_conflicted_consumer(const ExpressionPort& consumer) const; + std::set::const_iterator find_consumer(const ExpressionPort& consumer) const; + std::set::iterator find_consumer(const ExpressionPort& consumer); // The scheduling params of Tensor is controlled by source expression port std::vector get_tensor() const { return m_source_port.get_tensor(); } std::vector get_layout() const { return m_source_port.get_layout(); } std::vector get_subtensor() const { return m_source_port.get_subtensor(); } - void set_tensor(const std::vector& tensor) { m_source_port.set_tensor(tensor); } - void set_layout(const std::vector& layout) { m_source_port.set_layout(layout); } - void set_subtensor(const std::vector& subtensor) { m_source_port.set_subtensor(subtensor); } - void set_port_descriptor(const PortDescriptorPtr& desc) { m_source_port.set_port_descriptor(desc); } - private: ExpressionPort m_source_port; - std::vector m_consumer_ports; + std::set m_consumer_ports; }; using TensorPtr = std::shared_ptr; diff --git a/src/common/snippets/src/lowered/expression.cpp b/src/common/snippets/src/lowered/expression.cpp index 6ac749885e39ef..2b78997522e3d1 100644 --- a/src/common/snippets/src/lowered/expression.cpp +++ b/src/common/snippets/src/lowered/expression.cpp @@ -17,15 +17,24 @@ namespace lowered { size_t Expression::LOOP_NULL_ID = SIZE_MAX; Expression::Expression(const std::shared_ptr& n) - : m_source_node{n}, m_emitter{nullptr}, m_inputs{}, m_outputs{}, m_reg_info{{}, {}} {} + : m_source_node{n}, m_emitter{nullptr}, m_input_tensors{}, m_output_tensors{}, m_reg_info{{}, {}} { + m_input_port_descriptors.reserve(n->get_input_size()); + m_output_port_descriptors.reserve(n->get_output_size()); + for (const auto& input : n->inputs()) { + m_input_port_descriptors.push_back(PortManager::get_port_descriptor_ptr(input)); + } + for (const auto& output : n->outputs()) { + m_output_port_descriptors.push_back(PortManager::get_port_descriptor_ptr(output)); + } +} -const TensorPtr& Expression::input(size_t i) const { - OPENVINO_ASSERT(i < m_inputs.size(), "Failed to get input: target input port must be less than input count!"); - return m_inputs[i]; +const TensorPtr& Expression::get_input_tensor(size_t i) const { + OPENVINO_ASSERT(i < m_input_tensors.size(), "Failed to get input tensor: target input port must be less than input count!"); + return m_input_tensors[i]; } -const TensorPtr& Expression::output(size_t i) const { - OPENVINO_ASSERT(i < m_outputs.size(), "Failed to get output: target output port must be less than output count!"); - return m_outputs[i]; +const TensorPtr& Expression::get_output_tensor(size_t i) const { + OPENVINO_ASSERT(i < m_output_tensors.size(), "Failed to get output: target output port must be less than output count!"); + return m_output_tensors[i]; } std::shared_ptr Expression::get_node() const { @@ -43,13 +52,8 @@ void Expression::init_emitter(const std::shared_ptr& target } void Expression::replace_input(size_t port, TensorPtr to) { - OPENVINO_ASSERT(port < m_inputs.size(), "Failed to replace: target input port must be less than input count!"); - m_inputs[port] = std::move(to); -} - -void Expression::replace_output(size_t port, TensorPtr to) { - OPENVINO_ASSERT(port < m_outputs.size(), "Failed to replace: target output port must be less than output count!"); - m_outputs[port] = std::move(to); + OPENVINO_ASSERT(port < m_input_tensors.size(), "Failed to replace: target input port must be less than input count!"); + m_input_tensors[port] = std::move(to); } void Expression::set_loop_id(size_t id, size_t idx) { @@ -69,21 +73,12 @@ void Expression::remove_loop_id(size_t id) { *it = Expression::LOOP_NULL_ID; } -ExpressionPort Expression::input_port(size_t i) { - OPENVINO_ASSERT(i < m_inputs.size(), "Failed to get input port: target input port must be less than input count!"); - const auto& input = m_inputs[i]; - const auto consumers = input->get_consumers(); - const auto found = std::find_if(consumers.begin(), consumers.end(), - [&](const ExpressionPort& desc) { - return desc.get_index() == i && desc.get_expr_ptr() == this->shared_from_this(); - }); - OPENVINO_ASSERT(found != consumers.end(), "Input ExpressionPort for Expression hasn't found in input Tensor!"); - return *found; +ExpressionPort Expression::get_input_port(size_t i) { + return ExpressionPort(this->shared_from_this(), ExpressionPort::Type::Input, i); } -ExpressionPort Expression::output_port(size_t i) { - OPENVINO_ASSERT(i < m_outputs.size(), "Failed to get output port: target output port must be less than output count!"); - return m_outputs[i]->get_source(); +ExpressionPort Expression::get_output_port(size_t i) { + return ExpressionPort(this->shared_from_this(), ExpressionPort::Type::Output, i); } IOExpression::IOExpression(const std::shared_ptr& par, int64_t index) diff --git a/src/common/snippets/src/lowered/expression_factory.cpp b/src/common/snippets/src/lowered/expression_factory.cpp index 4a6824adce624a..d104a6c03e64fb 100644 --- a/src/common/snippets/src/lowered/expression_factory.cpp +++ b/src/common/snippets/src/lowered/expression_factory.cpp @@ -10,58 +10,47 @@ namespace ngraph { namespace snippets { namespace lowered { -std::vector LinearIR::ExpressionFactory::create_expression_inputs(const LinearIR& linear_ir, const ExpressionPtr& expr) { +void LinearIR::ExpressionFactory::create_expression_inputs(const LinearIR& linear_ir, const ExpressionPtr& expr) { OPENVINO_ASSERT(expr != nullptr, "Failed expression inputs creation: expression is null"); const auto& node = expr->get_node(); - std::vector inputs(node->get_input_size(), nullptr); + expr->m_input_tensors.resize(node->get_input_size(), nullptr); for (const auto& input : node->inputs()) { const auto input_source = input.get_source_output(); const auto in_index = input.get_index(); const auto& parent_expr = linear_ir.get_expr_by_node(input_source.get_node_shared_ptr()); - const auto& tensor = parent_expr->output(input_source.get_index()); - const auto tensor_desc = ExpressionPort(expr, ExpressionPort::Type::Input, in_index, PortManager::get_port_descriptor_ptr(input)); - tensor->add_consumer(tensor_desc); - inputs[in_index] = tensor; + const auto& tensor = parent_expr->get_output_tensor(input_source.get_index()); + tensor->add_consumer(expr->get_input_port(in_index)); + expr->m_input_tensors[in_index] = tensor; } - return inputs; } -std::vector LinearIR::ExpressionFactory::create_expression_outputs(const LinearIR& linear_ir, const ExpressionPtr& expr) { +void LinearIR::ExpressionFactory::create_expression_outputs(const LinearIR& linear_ir, const ExpressionPtr& expr) { OPENVINO_ASSERT(expr != nullptr, "Failed expression outputs creation: expression is null"); const auto& node = expr->get_node(); - std::vector outputs(node->get_output_size(), nullptr); + expr->m_output_tensors.resize(node->get_output_size(), nullptr); for (const auto& output : node->outputs()) { const auto out_index = output.get_index(); - const auto tensor_desc = ExpressionPort(expr, ExpressionPort::Type::Output, out_index, PortManager::get_port_descriptor_ptr(output)); - outputs[out_index] = std::make_shared(tensor_desc); + const auto source = expr->get_output_port(out_index); + expr->m_output_tensors[out_index] = std::make_shared(source); } - return outputs; } // The method verifies of input tensors to availability of the expression as consumer and add it if missed -void LinearIR::ExpressionFactory::validate_inputs(const ExpressionPtr& expr, const std::vector& inputs) { +void LinearIR::ExpressionFactory::init_expression_inputs(const ExpressionPtr& expr, const std::vector& inputs) { for (size_t i = 0; i < inputs.size(); ++i) { const auto& input = inputs[i]; const auto consumers = input->get_consumers(); const auto found = std::find_if(consumers.begin(), consumers.end(), [&](const ExpressionPort& desc) { - return desc.get_index() == i && desc.get_expr_ptr() == expr; + return desc.get_index() == i && desc.get_expr() == expr; }); if (found == consumers.end()) { - PortDescriptorPtr port_desc = nullptr; - // LoopEnd doesn't have many input ports, so LoopEnd cannot initialize PortDescriptor for ports himself. - // The expression should have the same PortDescriptor as sources - if (ov::is_type(expr->get_node())) { - port_desc = input->get_source().get_port_descriptor(); - } else { - port_desc = PortManager::get_port_descriptor_ptr(expr->get_node()->input(i)); - } - const auto tensor_desc = ExpressionPort(expr, ExpressionPort::Type::Input, i, port_desc); - input->add_consumer(tensor_desc); + input->add_consumer(expr->get_input_port(i)); } } + expr->m_input_tensors = inputs; } ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& par, @@ -69,8 +58,7 @@ ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr we cannot use directly make_shared(args) OPENVINO_ASSERT(model != nullptr, "To create IOExpression from Parameter there must be inited model!"); const auto expr = std::make_shared(IOExpression(par, model->get_parameter_index(par))); - expr->init_inputs({}); - expr->init_outputs(create_expression_outputs(linear_ir, expr)); + create_expression_outputs(linear_ir, expr); return expr; } @@ -79,8 +67,7 @@ ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr we cannot use directly make_shared(args) OPENVINO_ASSERT(model != nullptr, "To create IOExpression from Result there must be inited model!"); const auto expr = std::make_shared(IOExpression(res, model->get_result_index(res))); - expr->init_inputs(create_expression_inputs(linear_ir, expr)); - expr->init_outputs({}); + create_expression_inputs(linear_ir, expr); return expr; } @@ -89,8 +76,8 @@ ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr(n), "Default expression builder doesn't support LoopBegin and LoopEnd"); // Note: ctor of shared_ptr isn't friend class for Expression const auto expr = std::make_shared(Expression(n)); - expr->init_inputs(create_expression_inputs(linear_ir, expr)); - expr->init_outputs(create_expression_outputs(linear_ir, expr)); + create_expression_inputs(linear_ir, expr); + create_expression_outputs(linear_ir, expr); return expr; } @@ -98,46 +85,33 @@ ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& inputs) { OPENVINO_ASSERT(inputs.empty(), "LoopBegin cannot have inputs"); const auto expr = std::make_shared(Expression(n)); - validate_inputs(expr, inputs); - expr->init_inputs({}); - expr->init_outputs(create_expression_outputs(linear_ir, expr)); + init_expression_inputs(expr, inputs); + create_expression_outputs(linear_ir, expr); return expr; } ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const LinearIR& linear_ir, const std::vector& inputs) { const auto expr = std::make_shared(Expression(n)); - validate_inputs(expr, inputs); - expr->init_inputs(inputs); - expr->init_outputs({}); + // Copy port descriptor shared pointers to LoopEnd + expr->m_input_port_descriptors.resize(inputs.size()); + for (size_t i = 0; i < inputs.size(); ++i) { + expr->m_input_port_descriptors[i] = inputs[i]->get_source().get_port_descriptor(); + } + init_expression_inputs(expr, inputs); return expr; } ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const LinearIR& linear_ir, - const std::vector& inputs) { + const std::vector& inputs) { OPENVINO_ASSERT(!ov::is_type(n) && !ov::is_type(n), "Expression builder with inputs doesn't support Result and Parameter"); const auto expr = std::make_shared(Expression(n)); - validate_inputs(expr, inputs); - expr->init_inputs(inputs); - expr->init_outputs(create_expression_outputs(linear_ir, expr)); - return expr; -} - -ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const LinearIR& linear_ir, - const std::vector& inputs, const std::vector& outputs) { - OPENVINO_ASSERT(!ov::is_type(n) && - !ov::is_type(n) && - !ov::is_type(n), - "Expression builder with inputs and outputs doesn't support service Nodes: Parameter, Result and LoopBegin, LoopEnd"); - const auto expr = std::make_shared(Expression(n)); - validate_inputs(expr, inputs); - expr->init_inputs(inputs); - expr->init_outputs(outputs); + init_expression_inputs(expr, inputs); + create_expression_outputs(linear_ir, expr); return expr; } - }// namespace lowered }// namespace snippets }// namespace ngraph diff --git a/src/common/snippets/src/lowered/expression_port.cpp b/src/common/snippets/src/lowered/expression_port.cpp index 6e064509ad21d9..bfd419fd84fd57 100644 --- a/src/common/snippets/src/lowered/expression_port.cpp +++ b/src/common/snippets/src/lowered/expression_port.cpp @@ -11,30 +11,55 @@ namespace ngraph { namespace snippets { namespace lowered { -ExpressionPort::ExpressionPort(const std::weak_ptr& expr, Type type, size_t port, - const std::vector& tensor, const std::vector& layout, const std::vector& subtensor) - : m_expr(expr), m_type(type), m_port_index(port), m_port_desc(std::make_shared(tensor, subtensor, layout)) {} - -ExpressionPort::ExpressionPort(const std::weak_ptr& expr, Type type, size_t port, const PortDescriptorPtr& port_desc) - : m_expr(expr), m_type(type), m_port_index(port) { - PortDescriptorPtr local_port_desc = port_desc; - if (!local_port_desc) { - if (type == Type::Input) { - local_port_desc = PortManager::get_port_descriptor_ptr(expr.lock()->get_node()->input(port)); - } else if (type == Type::Output) { - local_port_desc = PortManager::get_port_descriptor_ptr(expr.lock()->get_node()->output(port)); - } else { - OPENVINO_THROW("ExpressionPort supports only Input and Output type!"); - } - } - - m_port_desc = local_port_desc; -} - -std::shared_ptr ExpressionPort::get_expr_ptr() const { - auto shared = m_expr.lock(); - OPENVINO_ASSERT(shared != nullptr, "Failed attempt to get shared pointer of source expression: nullptr"); - return shared; +ExpressionPort::ExpressionPort(const std::shared_ptr& expr, Type type, size_t port) + : m_expr(expr), m_type(type), m_port_index(port) {} + +PortDescriptorPtr ExpressionPort::get_port_descriptor() const { + const auto& descs = m_type == Type::Input ? m_expr->m_input_port_descriptors + : m_expr->m_output_port_descriptors; + OPENVINO_ASSERT(m_port_index < descs.size(), "Incorrect index of port"); + return descs[m_port_index]; +} + +const std::shared_ptr& ExpressionPort::get_tensor_ptr() const { + const auto& tensors = m_type == Type::Input ? m_expr->m_input_tensors + : m_expr->m_output_tensors; + OPENVINO_ASSERT(m_port_index < tensors.size(), "Incorrect index of port"); + return tensors[m_port_index]; +} + +std::vector ExpressionPort::get_tensor() const { + return get_port_descriptor()->get_tensor(); +} +std::vector ExpressionPort::get_layout() const { + return get_port_descriptor()->get_layout(); +} +std::vector ExpressionPort::get_subtensor() const { + return get_port_descriptor()->get_subtensor(); +} + +void ExpressionPort::set_tensor(const std::vector& tensor) { + get_port_descriptor()->set_tensor(tensor); +} +void ExpressionPort::set_layout(const std::vector& layout) { + get_port_descriptor()->set_layout(layout); +} +void ExpressionPort::set_subtensor(const std::vector& subtensor) { + get_port_descriptor()->set_subtensor(subtensor); +} + +bool operator==(const ExpressionPort& lhs, const ExpressionPort& rhs) { + if (&lhs == &rhs) + return true; + OPENVINO_ASSERT(lhs.get_type() == rhs.get_type(), "Incorrect ExpressionPort comparison"); + return lhs.get_index() == rhs.get_index() && lhs.get_expr() == rhs.get_expr(); +} +bool operator!=(const ExpressionPort& lhs, const ExpressionPort& rhs) { + return !(lhs == rhs); +} +bool operator<(const ExpressionPort& lhs, const ExpressionPort& rhs) { + OPENVINO_ASSERT(lhs.get_type() == rhs.get_type(), "Incorrect ExpressionPort comparison"); + return (lhs.get_index() < rhs.get_index()) || (lhs.get_index() == rhs.get_index() && lhs.get_expr() < rhs.get_expr()); } }// namespace lowered diff --git a/src/common/snippets/src/lowered/linear_ir.cpp b/src/common/snippets/src/lowered/linear_ir.cpp index 77f62d71c9273c..969427d7e02a7a 100644 --- a/src/common/snippets/src/lowered/linear_ir.cpp +++ b/src/common/snippets/src/lowered/linear_ir.cpp @@ -58,10 +58,6 @@ ExpressionPtr LinearIR::create_expression(const std::shared_ptr& n, const return ExpressionFactory::build(n, *this, inputs); } -ExpressionPtr LinearIR::create_expression(const std::shared_ptr& n, const std::vector inputs, const std::vector outputs) { - return ExpressionFactory::build(n, *this, inputs, outputs); -} - ov::NodeVector LinearIR::get_ordered_ops(const std::shared_ptr& m) { if (!m->get_sinks().empty()) OPENVINO_THROW("Linear IR is not supposed to work for model with sinks. Check your transformation pipeline."); @@ -136,23 +132,23 @@ void LinearIR::debug_print(bool tds_as_pointers) const { std::cerr << counter++ << " : " << node->get_friendly_name() << " : "; if (tds_as_pointers) { - for (const auto& in : expr->inputs()) { + for (const auto& in : expr->m_input_tensors) { if (td2int.count(in) == 0) OPENVINO_THROW("Undefined input descriptor for op"); std::cerr << td2int.at(in) << ", "; } std::cerr << "\b\b => "; - for (const auto& out : expr->outputs()) { + for (const auto& out : expr->m_output_tensors) { if (td2int.count(out) == 0) td2int.insert({out, td_counter++}); std::cerr << td2int.at(out) << ", "; } } else { - for (size_t i = 0; i < expr->get_input_count(); ++i) - std::cerr << expr->input_port(i) << ", "; + for (const auto& port_desc : expr->m_input_port_descriptors) + std::cerr << port_desc << ", "; std::cerr << "\b\b => "; - for (size_t i = 0; i < expr->get_output_count(); ++i) - std::cerr << expr->output_port(i) << ", "; + for (const auto& port_desc : expr->m_output_port_descriptors) + std::cerr << port_desc << ", "; } std::cerr << "\b\b"; const auto& rinfo = expr->get_reg_info(); @@ -175,24 +171,24 @@ ExpressionPtr LinearIR::get_expr_by_node(const std::shared_ptr& n) const { return found->second; } -void LinearIR::replace_input(const std::vector& consumers, const TensorPtr& to) { +void LinearIR::replace_input(std::set consumers, const TensorPtr& to) { for (const auto& consumer_input : consumers) { replace_input(consumer_input, to); } } void LinearIR::replace_input(const ExpressionPtr& expr, size_t port, const TensorPtr& to) { - replace_input(expr->input_port(port), to); + replace_input(expr->get_input_port(port), to); } void LinearIR::replace_input(const ExpressionPort& expr_port, const TensorPtr& to) { const auto port = expr_port.get_index(); - const auto expr = expr_port.get_expr_ptr(); + const auto expr = expr_port.get_expr(); OPENVINO_ASSERT(expr_port.get_type() == ExpressionPort::Type::Input, "Failed to replace: target input port must have Input type"); OPENVINO_ASSERT(expr_port.get_index() < expr->get_input_count(), "Failed to replace: target input port must be less than input count!"); - const auto& from = expr->input(port); + const auto& from = expr->get_input_tensor(port); if (from == to) return; @@ -203,24 +199,6 @@ void LinearIR::replace_input(const ExpressionPort& expr_port, const TensorPtr& t expr->replace_input(port, std::move(to)); } -void LinearIR::replace_output(const ExpressionPtr& expr, size_t port, const TensorPtr& to) { - replace_output(expr->output_port(port), to); -} - -void LinearIR::replace_output(const ExpressionPort& expr_port, const TensorPtr& to) { - const auto port = expr_port.get_index(); - const auto expr = expr_port.get_expr_ptr(); - - OPENVINO_ASSERT(expr_port.get_type() == ExpressionPort::Type::Output, "Failed to replace: target output port must have Output type"); - OPENVINO_ASSERT(port < expr->get_output_count(), "Failed to replace: target output port must be less than output count!"); - const auto to_source_td = to->get_source(); - OPENVINO_ASSERT(to_source_td.get_expr_ptr() == expr && to_source_td.get_index() == port, - "Failed to replace: incorrect new output Tensor. Source expr must be the current expr"); - if (expr->output(port) == to) - return; - expr->replace_output(port, to); -} - void LinearIR::register_regular_expression(const ExpressionPtr& expr) { if (is_type(expr->get_node()) || is_type(expr->get_node())) OPENVINO_THROW("LinearIR::insert can't be used to add Parameters or Results to IR"); @@ -238,8 +216,8 @@ void LinearIR::register_expression(const ExpressionPtr& expr) { void LinearIR::unregister_expression(const ExpressionPtr& expr) { for (size_t i = 0; i < expr->get_input_count(); ++i) { - const auto& input = expr->input(i); - input->remove_consumer(expr->input_port(i)); + const auto& input = expr->get_input_tensor(i); + input->remove_consumer(expr->get_input_port(i)); } m_node2expression_map.erase(expr->get_node()); diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp index 730c3e40c5b6db..54d6e48c78250a 100644 --- a/src/common/snippets/src/lowered/loop_manager.cpp +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -55,7 +55,7 @@ void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir, size_t loop_id) { OPENVINO_ASSERT(!entries.empty(), "Loop must have entry points"); OPENVINO_ASSERT(!exits.empty(), "Loop must have entry points"); - const auto entry_expr = entries.front().get_expr_ptr(); + const auto entry_expr = entries.front().get_expr(); loop_begin_pos = std::find(linear_ir.begin(), linear_ir.end(), entry_expr); OPENVINO_ASSERT(loop_begin_pos != linear_ir.end(), "Loop begin hasn't been found!"); @@ -68,7 +68,7 @@ void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir, } // At the moment all Loops must have exit points - const auto& exit_expr = exits.back().get_expr_ptr(); + const auto& exit_expr = exits.back().get_expr(); loop_end_pos = std::next(std::find(loop_begin_pos, linear_ir.end(), exit_expr)); OPENVINO_ASSERT(loop_end_pos != linear_ir.end(), "Loop end hasn't been found!"); } @@ -81,15 +81,15 @@ void LinearIR::LoopManager::get_io_loop_ports(LinearIR::constExprIt loop_begin_p exits.clear(); for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { const auto& expr = *expr_it; - const auto inputs = expr->inputs(); - const auto outputs = expr->outputs(); + const auto inputs = expr->get_input_tensors(); + const auto outputs = expr->get_output_tensors(); for (size_t in_port = 0; in_port < inputs.size(); ++in_port) { const auto in_td = inputs[in_port]; - const auto parent_expr = in_td->get_source().get_expr_ptr(); + const auto parent_expr = in_td->get_source().get_expr(); if (!ov::is_type(parent_expr->get_node()) && std::find(loop_begin_pos, expr_it, parent_expr) == expr_it) { - entries.push_back(expr->input_port(in_port)); + entries.push_back(expr->get_input_port(in_port)); } } @@ -97,9 +97,9 @@ void LinearIR::LoopManager::get_io_loop_ports(LinearIR::constExprIt loop_begin_p const auto out_td = outputs[out_port]; const auto consumer_ports = out_td->get_consumers(); for (const auto& consumer : consumer_ports) { - const auto consumer_expr = consumer.get_expr_ptr(); + const auto consumer_expr = consumer.get_expr(); if (std::find(expr_it, loop_end_pos, consumer_expr) == loop_end_pos) { - exits.push_back(expr->output_port(out_port)); + exits.push_back(expr->get_output_port(out_port)); break; } } diff --git a/src/common/snippets/src/lowered/pass/allocate_buffers.cpp b/src/common/snippets/src/lowered/pass/allocate_buffers.cpp index d0f1e2403b5c4e..97d5d748a5be19 100644 --- a/src/common/snippets/src/lowered/pass/allocate_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/allocate_buffers.cpp @@ -21,9 +21,9 @@ void AllocateBuffers::propagate_offset(const LinearIR& linear_ir, const Expressi // Propagate to up: in Store. Buffer can have only one Store { if (buffer->is_intermediate_memory()) { - OPENVINO_ASSERT(buffer_expr->inputs().size() == 1, "Buffer with intermediate memory must have one parent"); - const auto& parent_output = buffer_expr->input(0)->get_source(); - const auto& parent_expr = parent_output.get_expr_ptr(); + OPENVINO_ASSERT(buffer_expr->get_input_tensors().size() == 1, "Buffer with intermediate memory must have one parent"); + const auto& parent_output = buffer_expr->get_input_tensor(0)->get_source(); + const auto& parent_expr = parent_output.get_expr(); const auto port = parent_output.get_index(); const auto& parent_node = parent_expr->get_node(); auto memory_access = ov::as_type_ptr(parent_node); @@ -36,9 +36,9 @@ void AllocateBuffers::propagate_offset(const LinearIR& linear_ir, const Expressi } } // Propagate to down: in Load. Buffer can have several Load - const auto& buffer_out = buffer_expr->output(0); + const auto& buffer_out = buffer_expr->get_output_tensor(0); for (const auto& child_expr_input : buffer_out->get_consumers()) { - const auto& child_expr = child_expr_input.get_expr_ptr(); + const auto& child_expr = child_expr_input.get_expr(); const auto port = child_expr_input.get_index(); const auto& child_node = child_expr->get_node(); auto memory_access = ov::as_type_ptr(child_node); @@ -70,7 +70,7 @@ bool AllocateBuffers::run(LinearIR& linear_ir) { } if (buffer->is_intermediate_memory()) { - const auto& parent_expr = expr_it->get()->input(0)->get_source().get_expr_ptr(); + const auto& parent_expr = expr_it->get()->get_input_tensor(0)->get_source().get_expr(); const auto& parent_node = parent_expr->get_node(); // Full MemoryAccess ops need new memory. Previous logic is to check for parent isn't Loop // TODO: It should be unified in MemoryManager with memory reuse in the near future diff --git a/src/common/snippets/src/lowered/pass/assign_registers.cpp b/src/common/snippets/src/lowered/pass/assign_registers.cpp index cf941600e6e010..04b671fbe03a72 100644 --- a/src/common/snippets/src/lowered/pass/assign_registers.cpp +++ b/src/common/snippets/src/lowered/pass/assign_registers.cpp @@ -47,38 +47,38 @@ bool AssignRegisters::run(LinearIR& linear_ir) { auto op = expr->get_node(); if (const auto io_expr = std::dynamic_pointer_cast(expr)) { if (io_expr->get_type() == IOExpression::io_type::INPUT) - manually_assigned_gprs[expr->output(0)] = io_expr->get_index(); + manually_assigned_gprs[expr->get_output_tensor(0)] = io_expr->get_index(); else if (io_expr->get_type() == IOExpression::io_type::OUTPUT) - manually_assigned_gprs[expr->input(0)] = num_parameters + io_expr->get_index(); + manually_assigned_gprs[expr->get_input_tensor(0)] = num_parameters + io_expr->get_index(); else OPENVINO_THROW("Unsupported io_type detected"); } else if (const auto& buffer = ov::as_type_ptr(op)) { const auto buffer_id = buffer->get_id(); // All buffers have one common data pointer if (buffer->is_intermediate_memory()) { - manually_assigned_gprs[expr->input(0)] = + manually_assigned_gprs[expr->get_input_tensor(0)] = static_cast(num_results + num_parameters + buffer_id); } - manually_assigned_gprs[expr->output(0)] = + manually_assigned_gprs[expr->get_output_tensor(0)] = static_cast(num_results + num_parameters + buffer_id); } else if (ov::is_type(op) || ov::is_type(op)) { // Only in SoftmaxDecomposition ReduceMax and ReduceSum use HorizonMax/HorizonSum and VectorBuffer. // We should manually set the one vector register for VectorBuffer and Max/Sum output to simulate a accumulator // TODO [96351]: We should rewrite accumulator pattern using another way - const auto input_td = expr->input(0); - const auto& input_expr = input_td->get_source().get_expr_ptr(); - const auto& input_expr_input_tds = input_expr->inputs(); + const auto input_td = expr->get_input_tensor(0); + const auto& input_expr = input_td->get_source().get_expr(); + const auto& input_expr_input_tds = input_expr->get_input_tensors(); for (const auto& td : input_expr_input_tds) { - if (ov::is_type(td->get_source().get_expr_ptr()->get_node())) { + if (ov::is_type(td->get_source().get_expr()->get_node())) { manually_assigned_vecs[td] = static_cast(accumulator_reg); } } - const auto output_td = expr->output(0); + const auto output_td = expr->get_output_tensor(0); manually_assigned_vecs[input_td] = static_cast(accumulator_reg); manually_assigned_vecs[output_td] = static_cast(accumulator_reg); for (const auto& child_expr_input : output_td->get_consumers()) { - if (ov::is_type(child_expr_input.get_expr_ptr()->get_node())) { - manually_assigned_vecs[child_expr_input.get_expr_ptr()->output(0)] = + if (ov::is_type(child_expr_input.get_expr()->get_node())) { + manually_assigned_vecs[child_expr_input.get_expr()->get_output_tensor(0)] = static_cast(accumulator_reg); } } @@ -86,11 +86,11 @@ bool AssignRegisters::run(LinearIR& linear_ir) { // TODO: Fix via common pipeline using LoopEnd: // All operations `outside loop` after Horizon ops should have the same register to avoid using it in the next Loop const auto current_loops_ids = expr->get_loop_ids(); - auto next_expr = output_td->get_consumers().begin()->get_expr_ptr(); + auto next_expr = output_td->get_consumers().begin()->get_expr(); while (next_expr->get_loop_ids() == current_loops_ids) { - manually_assigned_vecs[next_expr->output(0)] = + manually_assigned_vecs[next_expr->get_output_tensor(0)] = static_cast(accumulator_reg); - next_expr = next_expr->output(0)->get_consumers().begin()->get_expr_ptr(); + next_expr = next_expr->get_output_tensor(0)->get_consumers().begin()->get_expr(); } accumulator_reg++; @@ -103,7 +103,7 @@ bool AssignRegisters::run(LinearIR& linear_ir) { decltype(regs_vec)& reg_map, const std::map& manually_assigned_regs, size_t& counter) { - for (const auto& out_td : expr->outputs()) { + for (const auto& out_td : expr->get_output_tensors()) { // Note that some ops might have identical input&output tensors (Result and Tile* for ex.) // so we have to check that the tensor has not been enumerated already if (reg_map.count(out_td) == 0) { @@ -143,9 +143,9 @@ bool AssignRegisters::run(LinearIR& linear_ir) { for (size_t i = 0; i < typed_ops.size(); i++) { const auto& t_op = typed_ops[i]; std::vector used_tensors, defined_tensors; - for (const auto& in : t_op.second->inputs()) + for (const auto& in : t_op.second->get_input_tensors()) used_tensors.push_back(in); - for (const auto& out : t_op.second->outputs()) + for (const auto& out : t_op.second->get_output_tensors()) defined_tensors.push_back(out); switch (t_op.first) { case Generator::opRegType::vec2vec: @@ -191,9 +191,9 @@ bool AssignRegisters::run(LinearIR& linear_ir) { const auto& expr = typed_ops[n].second; if (is_type(expr->get_node()) || is_type(expr->get_node())) continue; - for (const auto& out : expr->outputs()) { + for (const auto& out : expr->get_output_tensors()) { for (const auto& child_expr_input : out->get_consumers()) { - const auto& child_expr = child_expr_input.get_expr_ptr(); + const auto& child_expr = child_expr_input.get_expr(); auto child_it = linear_ir.begin(); std::advance(child_it, n); size_t k = n; @@ -319,10 +319,10 @@ bool AssignRegisters::run(LinearIR& linear_ir) { for (auto& t_op : typed_ops) { RegInfo rinfo; const auto& expr = t_op.second; - for (const auto& in : expr->inputs()) { + for (const auto& in : expr->get_input_tensors()) { rinfo.first.push_back(assigned_regs[in]); } - for (const auto& out : expr->outputs()) { + for (const auto& out : expr->get_output_tensors()) { rinfo.second.push_back(assigned_regs[out]); } t_op.second->set_reg_info(rinfo); diff --git a/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp b/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp index 9825ac6d6d92e0..0b82c1d866a693 100644 --- a/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp +++ b/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp @@ -36,12 +36,12 @@ bool CleanupLoopOffsets::run(LinearIR& linear_ir) { if (auto outer_loop_end = as_type_ptr(next_node)) { auto fin_offsets = loop_end->get_finalization_offsets(); std::unordered_map per_tensor_offset; - const auto& loop_inputs = expr_it->get()->inputs(); + const auto& loop_inputs = expr_it->get()->get_input_tensors(); for (size_t i = 0; i < fin_offsets.size(); i++) per_tensor_offset[loop_inputs[i]] = i; auto outer_ptr_increments = outer_loop_end->get_ptr_increments(); - const auto& outer_loop_inputs = next_expr_it->get()->inputs(); + const auto& outer_loop_inputs = next_expr_it->get()->get_input_tensors(); for (size_t i = 0; i < outer_ptr_increments.size(); i++) { const auto& managed_tensor = outer_loop_inputs[i]; const auto& found = per_tensor_offset.find(managed_tensor); diff --git a/src/common/snippets/src/lowered/pass/fuse_loops.cpp b/src/common/snippets/src/lowered/pass/fuse_loops.cpp index 66d3b586fcc27b..e2b1c99f60ff1b 100644 --- a/src/common/snippets/src/lowered/pass/fuse_loops.cpp +++ b/src/common/snippets/src/lowered/pass/fuse_loops.cpp @@ -33,23 +33,18 @@ void FuseLoops::fuse_points(std::vector& exit_points, std::vecto LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos) { std::vector new_exit_points; for (const auto& exit_point : exit_points) { - const auto expr = exit_point.get_expr_ptr(); - const auto port = exit_point.get_index(); - const auto output_td = expr->output(port); - const auto consumers_inputs = output_td->get_consumers(); + const auto consumers_inputs = exit_point.get_tensor_ptr()->get_consumers(); std::vector mapped_entry_points; std::vector outside_consumers; for (const auto& consumer_input : consumers_inputs) { - const auto consumer = consumer_input.get_expr_ptr(); - const auto consumer_port = consumer_input.get_index(); - const auto consumer_point = consumer->input_port(consumer_port); - const auto entry_point_it = std::find(entry_points.begin(), entry_points.end(), consumer_point); + const auto entry_point_it = std::find(entry_points.begin(), entry_points.end(), consumer_input); if (entry_point_it != entry_points.end()) { mapped_entry_points.push_back(*entry_point_it); continue; } + const auto consumer = consumer_input.get_expr(); const auto inside_it = std::find(loop_begin_pos, loop_end_pos, consumer); if (inside_it == loop_end_pos) { outside_consumers.push_back(consumer); @@ -89,13 +84,10 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo bool is_fusion_allowed = true; for (size_t i = 0; i < loop_target->exit_exprs.size() && is_fusion_allowed; ++i) { const auto target_exit_point = loop_target->exit_exprs[i]; - const auto target_exit_expr = target_exit_point.get_expr_ptr(); - const auto port = target_exit_point.get_index(); - const auto output_td = target_exit_expr->output(port); - const auto consumer_inputs = output_td->get_consumers(); + const auto consumer_inputs = target_exit_point.get_tensor_ptr()->get_consumers(); for (const auto& consumer_input : consumer_inputs) { - const auto consumer = consumer_input.get_expr_ptr(); - if (ov::is_type(consumer->get_node()) || consumer == current_entry_point.get_expr_ptr()) + const auto consumer = consumer_input.get_expr(); + if (ov::is_type(consumer->get_node()) || consumer == current_entry_point.get_expr()) continue; // The fusing is only valid if target Loop consumer (the Consumer is outside of target Loop) // is after current Loop (after Loop_down). @@ -160,12 +152,9 @@ bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::Loo bool is_fusion_allowed = true; for (size_t i = 0; i < loop_target->entry_exprs.size() && is_fusion_allowed; ++i) { const auto target_entry_point = loop_target->entry_exprs[i]; - const auto target_entry_expr = target_entry_point.get_expr_ptr(); - const auto port = target_entry_point.get_index(); - const auto input_td = target_entry_expr->input(port); - const auto parent_expr_output = input_td->get_source(); - const auto parent_expr = parent_expr_output.get_expr_ptr(); - if (ov::is_type(parent_expr->get_node()) || parent_expr == current_exit_point.get_expr_ptr()) + const auto parent_expr_output = target_entry_point.get_tensor_ptr()->get_source(); + const auto parent_expr = parent_expr_output.get_expr(); + if (ov::is_type(parent_expr->get_node()) || parent_expr == current_exit_point.get_expr()) continue; is_fusion_allowed = parent_expr->get_loop_ids()[dim_idx] == current_loop_id || // The parent expr is from the same current Loop std::find(linear_ir.cbegin(), current_loop_begin_pos, parent_expr) != current_loop_begin_pos; // The parent is before current Loop @@ -268,11 +257,8 @@ bool FuseLoops::run(LinearIR& linear_ir) { bool was_fusion_up = false; for (size_t in_port = 0; in_port < entry_points.size() && !was_fusion_up; ++in_port) { const auto entry_point = entry_points[in_port]; - const auto entry_expr = entry_point.get_expr_ptr(); - const auto port = entry_point.get_index(); - const auto input_td = entry_expr->input(port); - const auto parent_expr_output = input_td->get_source(); - const auto parent_expr = parent_expr_output.get_expr_ptr(); + const auto parent_expr_output = entry_point.get_tensor_ptr()->get_source(); + const auto parent_expr = parent_expr_output.get_expr(); const auto out_port = parent_expr_output.get_index(); const auto parent = parent_expr->get_node(); if (ov::is_type(parent) || @@ -290,7 +276,7 @@ bool FuseLoops::run(LinearIR& linear_ir) { continue; const auto loop_info_target = loop_manager->get_loop_info(loop_id_target); - const auto target_exit_port = parent_expr->output_port(out_port); + const auto target_exit_port = parent_expr->get_output_port(out_port); if (fuse_upper_into_current(linear_ir, loop_manager, entry_point, target_exit_port, loop_id, loop_id_target, dim_idx, loop_begin_pos, loop_end_pos)) { was_fusion_up = true; @@ -309,12 +295,9 @@ bool FuseLoops::run(LinearIR& linear_ir) { bool was_fusion_down = false; for (size_t out_port = 0; out_port < exit_points.size() && !was_fusion_down; ++out_port) { const auto exit_point = exit_points[out_port]; - const auto exit_expr = exit_point.get_expr_ptr(); - const auto port = exit_point.get_index(); - const auto output_td = exit_expr->output(port); - const auto consumer_exprs_inputs = output_td->get_consumers(); + const auto consumer_exprs_inputs = exit_point.get_tensor_ptr()->get_consumers(); for (const auto& consumer_expr_input : consumer_exprs_inputs) { - const auto consumer_expr = consumer_expr_input.get_expr_ptr(); + const auto consumer_expr = consumer_expr_input.get_expr(); const auto in_port = consumer_expr_input.get_index(); const auto consumer = consumer_expr->get_node(); if (ov::is_type(consumer) || @@ -332,7 +315,7 @@ bool FuseLoops::run(LinearIR& linear_ir) { continue; const auto loop_info_target = loop_manager->get_loop_info(loop_id_target); - const auto target_entry_port = consumer_expr->input_port(in_port); + const auto target_entry_port = consumer_expr->get_input_port(in_port); if (fuse_lower_into_current(linear_ir, loop_manager, exit_point, target_entry_port, loop_id, loop_id_target, dim_idx, loop_begin_pos, loop_end_pos)) { was_fusion_down = true; diff --git a/src/common/snippets/src/lowered/pass/indentify_buffers.cpp b/src/common/snippets/src/lowered/pass/indentify_buffers.cpp index bf053afd389579..a59315e30d29af 100644 --- a/src/common/snippets/src/lowered/pass/indentify_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/indentify_buffers.cpp @@ -55,19 +55,19 @@ std::vector IdentifyBuffers::create_adjacency_matrix(const LinearIR& linea for (size_t buffer_idx = 0; buffer_idx < buffers.size(); ++buffer_idx) { // Here intermediate Buffer const auto buffer_expr = buffers[buffer_idx]; - const auto buffer_input_tds = buffer_expr->inputs(); + const auto buffer_input_tds = buffer_expr->get_input_tensors(); OPENVINO_ASSERT(buffer_input_tds.size() == 1, "Intermediate Buffer must have one input"); const auto buffer = ov::as_type_ptr(buffer_expr->get_node()); const auto& buffer_td = buffer_input_tds.front(); const auto buffer_siblings = buffer_td->get_consumers(); for (const auto& buffer_sibling : buffer_siblings) { - const auto& sibling_expr = buffer_sibling.get_expr_ptr(); + const auto& sibling_expr = buffer_sibling.get_expr(); // Skip myself if (sibling_expr == buffer_expr) { continue; } else if (const auto loop_end = ov::as_type_ptr(sibling_expr->get_node())) { - const auto& loop_tds = sibling_expr->inputs(); + const auto& loop_tds = sibling_expr->get_input_tensors(); const auto input_count = loop_end->get_input_num(); const auto output_count = loop_end->get_output_num(); const auto& ptr_increments = loop_end->get_ptr_increments(); @@ -76,7 +76,7 @@ std::vector IdentifyBuffers::create_adjacency_matrix(const LinearIR& linea // Verify Buffers on Loop inputs: for (size_t input_idx = 0; input_idx < input_count; ++input_idx) { - const auto loop_in = loop_tds[input_idx]->get_source().get_expr_ptr(); + const auto loop_in = loop_tds[input_idx]->get_source().get_expr(); if (const auto& neighbour_buffer = is_intermediate_buffer(loop_in->get_node())) { const auto neighbour_buffer_loop_port = input_idx; update_adj_matrix(buffer, buffer_idx, neighbour_buffer, @@ -93,7 +93,7 @@ std::vector IdentifyBuffers::create_adjacency_matrix(const LinearIR& linea const auto consumer_inputs = loop_tds[input_count + output_idx]->get_consumers(); for (const auto& consumer_input : consumer_inputs) { - const auto& child_node = consumer_input.get_expr_ptr()->get_node(); + const auto& child_node = consumer_input.get_expr()->get_node(); if (const auto& neighbour_buffer = is_intermediate_buffer(child_node)) { const auto neighbour_buffer_loop_port = input_count + output_idx; update_adj_matrix(buffer, buffer_idx, neighbour_buffer, diff --git a/src/common/snippets/src/lowered/pass/init_loops.cpp b/src/common/snippets/src/lowered/pass/init_loops.cpp index e90127129b7e54..fa9e78f5a9ad55 100644 --- a/src/common/snippets/src/lowered/pass/init_loops.cpp +++ b/src/common/snippets/src/lowered/pass/init_loops.cpp @@ -24,12 +24,12 @@ void filter_ports(LinearIR& linear_ir, std::set> loop_parents; for (const auto& loop_entry_point : loop_entries) { - const auto& expr = loop_entry_point.get_expr_ptr(); + const auto& expr = loop_entry_point.get_expr(); const auto port = loop_entry_point.get_index(); const auto node = expr->get_node(); const auto ma = ov::as_type_ptr(node); if (ma && ma->is_memory_access_input_port(port)) { - const auto& parent_expr = expr->input(port)->get_source().get_expr_ptr(); + const auto& parent_expr = expr->get_input_tensor(port)->get_source().get_expr(); const auto& parent = parent_expr->get_node(); // Todo: Sometimes several Load in one Loop read data from the same Node if (loop_parents.find(parent) == loop_parents.end()) { @@ -40,7 +40,7 @@ void filter_ports(LinearIR& linear_ir, } for (const auto& loop_exit_point : loop_exits) { - const auto& expr = loop_exit_point.get_expr_ptr(); + const auto& expr = loop_exit_point.get_expr(); const auto port = loop_exit_point.get_index(); const auto ma = ov::as_type_ptr(expr->get_node()); if (ma && ma->is_memory_access_output_port(port)) { @@ -121,10 +121,10 @@ std::vector InitLoops::init_element_type_sizes(const std::vector element_types; element_types.reserve(loop_inputs.size() + loop_outputs.size()); for (const auto& in : loop_inputs) { - element_types.push_back(in.get_expr_ptr()->get_node()->get_input_element_type(in.get_index()).size()); + element_types.push_back(in.get_expr()->get_node()->get_input_element_type(in.get_index()).size()); } for (const auto& out : loop_outputs) { - element_types.push_back(out.get_expr_ptr()->get_node()->get_output_element_type(out.get_index()).size()); + element_types.push_back(out.get_expr()->get_node()->get_output_element_type(out.get_index()).size()); } return element_types; } @@ -156,10 +156,10 @@ bool InitLoops::insertion(LinearIR& linear_ir, const LinearIR::LoopManager::Loop std::vector loop_end_inputs; for (const auto& expr_port : loop_entries) - loop_end_inputs.push_back(expr_port.get_expr_ptr()->input(expr_port.get_index())); + loop_end_inputs.push_back(expr_port.get_expr()->get_input_tensor(expr_port.get_index())); for (const auto& expr_port : loop_exits) - loop_end_inputs.push_back(expr_port.get_expr_ptr()->output(expr_port.get_index())); - loop_end_inputs.push_back(loop_begin_expr->output(0)); + loop_end_inputs.push_back(expr_port.get_expr()->get_output_tensor(expr_port.get_index())); + loop_end_inputs.push_back(loop_begin_expr->get_output_tensor(0)); const auto& loop_end_expr = linear_ir.create_expression(loop_end, loop_end_inputs); linear_ir.insert(loop_end_pos, loop_end_expr); diff --git a/src/common/snippets/src/lowered/pass/insert_buffers.cpp b/src/common/snippets/src/lowered/pass/insert_buffers.cpp index 34c44be68449ca..7758dab13a32e6 100644 --- a/src/common/snippets/src/lowered/pass/insert_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/insert_buffers.cpp @@ -60,12 +60,12 @@ LinearIR::constExprIt InsertBuffers::insertion_position(const LinearIR& linear_i void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, size_t loop_id, const std::vector& loop_entries, const std::vector& loop_exits) { for (const auto& entry_point : loop_entries) { - const auto expr = entry_point.get_expr_ptr(); + const auto expr = entry_point.get_expr(); const auto port = entry_point.get_index(); const auto node = expr->get_node(); - const auto input_td = expr->input(port); + const auto input_td = expr->get_input_tensor(port); const auto parent_expr_output = input_td->get_source(); - const auto& parent_expr = parent_expr_output.get_expr_ptr(); + const auto& parent_expr = parent_expr_output.get_expr(); const auto parent_port = parent_expr_output.get_index(); const auto parent = parent_expr->get_node(); if (ov::is_type(parent) || @@ -109,25 +109,25 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt // Output td is automatically filled from PortDescriptor const auto buffer_expr = linear_ir.create_expression(buffer, {input_td}); linear_ir.insert(pos, buffer_expr); - linear_ir.replace_input(expr, port, buffer_expr->output(0)); + linear_ir.replace_input(expr, port, buffer_expr->get_output_tensor(0)); } } for (const auto& exit_point : loop_exits) { - const auto expr = exit_point.get_expr_ptr(); + const auto expr = exit_point.get_expr(); const auto port = exit_point.get_index(); const auto node = expr->get_node(); - const auto output_td = expr->output(port); + const auto output_td = expr->get_output_tensor(port); const auto child_exprs_inputs = output_td->get_consumers(); const auto current_loops = expr->get_loop_ids(); const auto current_loop_count = current_loops.size(); const std::vector node_outs = {output_td}; - std::vector potential_consumers; + std::set potential_consumers; std::set buffers; const auto current_loop_lvl = std::distance(current_loops.begin(), std::find(current_loops.begin(), current_loops.end(), loop_id)); for (const auto& child_expr_input : child_exprs_inputs) { - const auto& child_expr = child_expr_input.get_expr_ptr(); + const auto& child_expr = child_expr_input.get_expr(); const auto child_port = child_expr_input.get_index(); const auto& child = child_expr->get_node(); if (ov::is_type(child)) @@ -141,7 +141,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt const auto node_ma = ov::as_type_ptr(node); if ((child_ma && child_ma->is_memory_access_input_port(child_port)) || (node_ma && node_ma->is_memory_access_output_port(port))) { - potential_consumers.push_back(child_expr_input); + potential_consumers.insert(child_expr_input); continue; } @@ -152,7 +152,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt if (current_loops[i] != child_loops[i] && current_loops[i] != Expression::LOOP_NULL_ID && child_loops[i] != Expression::LOOP_NULL_ID) { - potential_consumers.push_back(child_expr_input); + potential_consumers.insert(child_expr_input); break; } } @@ -163,10 +163,10 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt // we should remove them to insert one common Buffer on one common port if (!buffers.empty()) { for (const auto& buffer : buffers) { - const auto& buffer_out = buffer->output(0); + const auto& buffer_out = buffer->get_output_tensor(0); const auto buffer_consumers_inputs = buffer_out->get_consumers(); linear_ir.replace_input(buffer_consumers_inputs, output_td); - potential_consumers.insert(potential_consumers.end(), buffer_consumers_inputs.begin(), buffer_consumers_inputs.end()); + potential_consumers.insert(buffer_consumers_inputs.begin(), buffer_consumers_inputs.end()); linear_ir.erase(std::find(linear_ir.begin(), linear_ir.end(), buffer)); } } @@ -177,7 +177,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt // Need to insert after 2nd Loops // Note: All potential consumers must have the same count of first equal Loop identifies and the same count of different last identifies // TODO: Need to verify that - const auto pos = insertion_position(linear_ir, loop_manager, expr, (*potential_consumers.begin()).get_expr_ptr()); + const auto pos = insertion_position(linear_ir, loop_manager, expr, (*potential_consumers.begin()).get_expr()); auto buffer = std::make_shared(node->output(port), m_buffer_allocation_rank); PortManager::set_port_descriptor_ptr(buffer->output(0), std::make_shared(output_td->get_tensor(), @@ -193,7 +193,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt // Output td is automatically filled from PortDescriptor const auto buffer_expr = linear_ir.create_expression(buffer, node_outs); linear_ir.insert(pos, buffer_expr); - linear_ir.replace_input(potential_consumers, buffer_expr->output(0)); + linear_ir.replace_input(potential_consumers, buffer_expr->get_output_tensor(0)); } } } @@ -226,10 +226,10 @@ bool InsertBuffers::run(LinearIR& linear_ir) { std::vector loop_entries(input_ports.size()), loop_exits(output_ports.size()); // C++17: for (auto const& [loop_id, loop_info] : loop_data_map) for (const auto& p : input_ports) { - loop_entries[p.first] = expr->input_port(p.first); + loop_entries[p.first] = expr->get_input_port(p.first); } for (const auto& p : output_ports) { - loop_exits[p.first] = expr->output_port(p.first); + loop_exits[p.first] = expr->get_output_port(p.first); } insertion(linear_ir, loop_manager, Expression::LOOP_NULL_ID, loop_entries, loop_exits); diff --git a/src/common/snippets/src/lowered/pass/insert_load_store.cpp b/src/common/snippets/src/lowered/pass/insert_load_store.cpp index d7a326486f51ba..eeb1bccf118781 100644 --- a/src/common/snippets/src/lowered/pass/insert_load_store.cpp +++ b/src/common/snippets/src/lowered/pass/insert_load_store.cpp @@ -54,12 +54,12 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr const auto& loop_manager = linear_ir.get_loop_manager(); const auto& data_expr = *data_expr_it; const auto& data_node = data_expr->get_node(); - const auto& output_td = data_expr->output(0); + const auto& output_td = data_expr->get_output_tensor(0); const auto consumer_inputs = output_td->get_consumers(); bool was_inserted = false; for (const auto& consumer_input : consumer_inputs) { - const auto& consumer_expr = consumer_input.get_expr_ptr(); + const auto& consumer_expr = consumer_input.get_expr(); const auto port = consumer_input.get_index(); const auto& consumer = consumer_expr->get_node(); const auto ma = ov::as_type_ptr(consumer); @@ -77,13 +77,13 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr output_td->get_layout())); const auto load_expr = linear_ir.create_expression(load, {output_td}); linear_ir.insert(std::find(data_expr_it, linear_ir.cend(), consumer_expr), load_expr); - linear_ir.replace_input(consumer_expr, port, load_expr->output(0)); + linear_ir.replace_input(consumer_expr, port, load_expr->get_output_tensor(0)); // Copy Loop identifies load_expr->set_loop_ids(loop_ids); // Need to update all the corresponding Loops with the same Entry Point const auto prev_entry_point = consumer_input; - const auto new_entry_point = load_expr->input_port(0); + const auto new_entry_point = load_expr->get_input_port(0); update_loops(loop_manager, loop_ids, prev_entry_point, {new_entry_point}, true); was_inserted = true; } @@ -94,9 +94,9 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr bool InsertLoadStore::insert_store(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it) { const auto& loop_manager = linear_ir.get_loop_manager(); const auto& data_expr = *data_expr_it; - const auto& input_td = data_expr->input(0); + const auto& input_td = data_expr->get_input_tensor(0); const auto parent_output = input_td->get_source(); - const auto& parent_expr = parent_output.get_expr_ptr(); + const auto& parent_expr = parent_output.get_expr(); const auto port = parent_output.get_index(); const auto& parent = parent_expr->get_node(); const auto ma = ov::as_type_ptr(parent); @@ -116,7 +116,7 @@ bool InsertLoadStore::insert_store(LinearIR& linear_ir, const LinearIR::constExp const auto& reverse_insertion_pos = std::find(std::reverse_iterator(data_expr_it), linear_ir.crend(), parent_expr); const auto& insertion_pos = reverse_insertion_pos.base(); linear_ir.insert(insertion_pos, store_expr); - linear_ir.replace_input(data_expr, 0, store_expr->output(0)); + linear_ir.replace_input(data_expr, 0, store_expr->get_output_tensor(0)); // Copy Loop identifies store_expr->set_loop_ids(loop_ids); @@ -127,10 +127,10 @@ bool InsertLoadStore::insert_store(LinearIR& linear_ir, const LinearIR::constExp const auto consumer_inputs = input_td->get_consumers(); const auto should_be_saved = std::any_of(consumer_inputs.begin(), consumer_inputs.end(), [](const ExpressionPort& input_port) { - const auto& node = input_port.get_expr_ptr()->get_node(); + const auto& node = input_port.get_expr()->get_node(); return ov::is_type(node) || ov::is_type(node); }); - const auto new_exit_point = store_expr->output_port(0); + const auto new_exit_point = store_expr->get_output_port(0); const auto new_exit_points = should_be_saved ? std::vector{prev_exit_point, new_exit_point} : std::vector{new_exit_point}; update_loops(loop_manager, loop_ids, prev_exit_point, new_exit_points, false); diff --git a/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp b/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp index f62f798480a87f..74bbf109d44bf8 100644 --- a/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp +++ b/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp @@ -41,12 +41,14 @@ void InsertTailLoop::tail_transformations(LinearIR& linear_ir, ov::is_type(op))) { for (size_t i = 0; i < op->inputs().size(); ++i) { if (auto fill = insertFill(op->input(i))) { - std::vector inputs{expr_it->get()->input(i)}; + std::vector inputs{expr_it->get()->get_input_tensor(i)}; + const auto& consumers = inputs.front()->get_consumers(); // Note: inputs == outputs, since we want to modify vector reg inplace - auto fill_expr = linear_ir.create_expression(fill, inputs, inputs); + auto fill_expr = linear_ir.create_expression(fill, inputs); + linear_ir.insert(expr_it, fill_expr); + linear_ir.replace_input(consumers, fill_expr->get_output_tensor(0)); auto reg = expr_it->get()->get_reg_info().first[i]; fill_expr->set_reg_info({{reg}, {reg}}); - linear_ir.insert(expr_it, fill_expr); } } } else if (const auto memory_access = std::dynamic_pointer_cast(op)) { @@ -96,17 +98,17 @@ bool InsertTailLoop::run(LinearIR& linear_ir) { }; auto is_loop_with_buffers = [&linear_ir](const std::shared_ptr& loop_end) { auto is_buffer_input = [&linear_ir](const TensorPtr& input) { - const auto parent_expr = input->get_source().get_expr_ptr(); + const auto parent_expr = input->get_source().get_expr(); return ov::is_type(parent_expr->get_node()); }; auto is_buffer_output = [&linear_ir](const TensorPtr& output) { const auto child_exprs_inputs = output->get_consumers(); return std::any_of(child_exprs_inputs.begin(), child_exprs_inputs.end(), - [](const ExpressionPort& lp) {return ov::is_type(lp.get_expr_ptr()->get_node());}); + [](const ExpressionPort& lp) {return ov::is_type(lp.get_expr()->get_node());}); }; const auto loop_end_expr = linear_ir.get_expr_by_node(loop_end); - const auto inputs = loop_end_expr->inputs(); + const auto inputs = loop_end_expr->get_input_tensors(); const auto in_num = loop_end->get_input_num(); const auto out_num = loop_end->get_output_num(); OPENVINO_ASSERT(inputs.size() == (in_num + out_num + 1), diff --git a/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp b/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp index 743b31a6b04f51..0f65d9d1ff4c31 100644 --- a/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp +++ b/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp @@ -22,8 +22,8 @@ bool LoadMoveBroadcastToBroadcastLoad::run(LinearIR& linear_ir) { const auto& op = (*expr_it)->get_node(); // Match on MoveBroadcast because MoveBroadcast is rare node in bodies if (const auto move_broadcast = ov::as_type_ptr(op)) { - const auto& interm_td = (*expr_it)->input(0); - const auto parent_expr = interm_td->get_source().get_expr_ptr(); + const auto& interm_td = (*expr_it)->get_input_tensor(0); + const auto parent_expr = interm_td->get_source().get_expr(); const auto load = ov::as_type_ptr(parent_expr->get_node()); if (!load) continue; @@ -33,7 +33,7 @@ bool LoadMoveBroadcastToBroadcastLoad::run(LinearIR& linear_ir) { const auto load_consumers_inputs = interm_td->get_consumers(); size_t count = 0; for (const auto& consumer_expr_input : load_consumers_inputs) { - const auto consumer = consumer_expr_input.get_expr_ptr()->get_node(); + const auto consumer = consumer_expr_input.get_expr()->get_node(); if (!ov::is_type(consumer)) count++; } @@ -43,18 +43,18 @@ bool LoadMoveBroadcastToBroadcastLoad::run(LinearIR& linear_ir) { const auto& outshape = move_broadcast->get_output_partial_shape(0); const auto broadcastload = std::make_shared(load->input_value(0), outshape, load->get_offset()); - const auto& move_out = (*expr_it)->output(0); + const auto& move_out = (*expr_it)->get_output_tensor(0); const auto move_consumers = move_out->get_consumers(); PortManager::set_port_descriptor_ptr(broadcastload->output(0), std::make_shared(move_out->get_tensor(), move_out->get_subtensor(), move_out->get_layout())); - const auto broadcastload_expr = linear_ir.create_expression(broadcastload, { parent_expr->input(0) }); + const auto broadcastload_expr = linear_ir.create_expression(broadcastload, { parent_expr->get_input_tensor(0) }); const auto mv_expr_it = expr_it; const auto insertion_pos = std::next(expr_it); expr_it = linear_ir.insert(insertion_pos, broadcastload_expr); linear_ir.erase(std::find(linear_ir.begin(), mv_expr_it, parent_expr)); linear_ir.erase(mv_expr_it); - linear_ir.replace_input(move_consumers, broadcastload_expr->output(0)); + linear_ir.replace_input(move_consumers, broadcastload_expr->get_output_tensor(0)); modified |= true; } } diff --git a/src/common/snippets/src/lowered/pass/mark_loops.cpp b/src/common/snippets/src/lowered/pass/mark_loops.cpp index fa27bc60dbfb51..c9436b1c2b3318 100644 --- a/src/common/snippets/src/lowered/pass/mark_loops.cpp +++ b/src/common/snippets/src/lowered/pass/mark_loops.cpp @@ -32,6 +32,14 @@ bool MarkLoops::run(LinearIR& linear_ir) { ov::is_type(node); }; + auto are_conflicted = [](const ExpressionPort& lhs, const ExpressionPort& rhs) { + const auto& lhs_desc = lhs.get_port_descriptor(); + const auto& rhs_desc = rhs.get_port_descriptor(); + return lhs_desc->get_subtensor() != rhs_desc->get_subtensor() || + lhs_desc->get_layout() != rhs_desc->get_layout() || + lhs_desc->get_tensor() != rhs_desc->get_tensor(); + }; + for (auto expr_it = linear_ir.cbegin(); expr_it != linear_ir.cend(); expr_it++) { const auto expr = *expr_it; const auto& node = expr->get_node(); @@ -62,13 +70,13 @@ bool MarkLoops::run(LinearIR& linear_ir) { bool is_connected = false; bool is_conflicted = false; for (size_t i = 0; i < prev_expr->get_output_count(); ++i) { - const auto& loop_td = prev_expr->output(i); + const auto& loop_td = prev_expr->get_output_tensor(i); const auto consumers = loop_td->get_consumers(); const auto found = std::find_if(consumers.begin(), consumers.end(), [&loop_end_pos](const ExpressionPort& consumer) { - return consumer.get_expr_ptr() == *loop_end_pos; + return consumer.get_expr() == *loop_end_pos; }); if (found != consumers.end()) { - if (loop_td->is_conflicted_consumer(*found)) { + if (are_conflicted(*found, loop_td->get_source())) { is_conflicted = true; break; } diff --git a/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp b/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp index 58f844212b6849..6d287990e8ca26 100644 --- a/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp +++ b/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp @@ -31,8 +31,8 @@ bool MoveResultOutOfLoop::run(LinearIR& linear_ir) { continue; } - const auto& input_td = expr->input(0); - const auto parent_expr = input_td->get_source().get_expr_ptr(); + const auto& input_td = expr->get_input_tensor(0); + const auto parent_expr = input_td->get_source().get_expr(); const auto parent_loop_ids = parent_expr->get_loop_ids(); int outer_loop_id = static_cast(parent_loop_ids.size()) - 1; for (; outer_loop_id >= 0; --outer_loop_id) { diff --git a/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp b/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp index 9c3f85270a0bdc..bf9a15a784b023 100644 --- a/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp +++ b/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp @@ -25,11 +25,11 @@ bool MoveScalarToConsumer::run(LinearIR& linear_ir) { for (auto expr_it = linear_ir.rbegin(); expr_it != linear_ir.rend(); expr_it++) { const auto expr = expr_it->get(); if (ov::is_type(expr->get_node())) { - const auto& output = expr->output(0); + const auto& output = expr->get_output_tensor(0); const auto consumers = output->get_consumers(); OPENVINO_ASSERT(consumers.size() == 1, "Scalar expression is expected to have a single consumer"); - const auto& consumer_expr = consumers.begin()->get_expr_ptr(); + const auto& consumer_expr = consumers.begin()->get_expr(); // Move something only if consumer is not already the next one (previous since the iterator is a reverse one) auto forward_it = std::prev(expr_it.base()); if (consumer_expr != *std::next(forward_it)) { diff --git a/src/common/snippets/src/lowered/pass/propagate_layout.cpp b/src/common/snippets/src/lowered/pass/propagate_layout.cpp index e4540f6bcb90a5..050c9b59be16df 100644 --- a/src/common/snippets/src/lowered/pass/propagate_layout.cpp +++ b/src/common/snippets/src/lowered/pass/propagate_layout.cpp @@ -26,7 +26,7 @@ bool PropagateLayout::run(LinearIR& linear_ir) { continue; const bool is_input = io_expr->get_type() == IOExpression::io_type::INPUT; - const auto& tds = is_input ? expr->outputs() : expr->inputs(); + const auto& tds = is_input ? expr->get_output_tensors() : expr->get_input_tensors(); if (tds.size() != 1) OPENVINO_THROW("Parameter/Results should have exactly one output/input"); @@ -38,7 +38,7 @@ bool PropagateLayout::run(LinearIR& linear_ir) { // but often there is another child - LoopEnd std::set> child_layouts; for (const auto& child_input : consumer_inputs) { - const auto child = child_input.get_expr_ptr(); + const auto child = child_input.get_expr(); const auto port = child_input.get_index(); const auto& n = child->get_node(); const auto ma = ov::as_type_ptr(n); @@ -47,14 +47,14 @@ bool PropagateLayout::run(LinearIR& linear_ir) { } } OPENVINO_ASSERT(child_layouts.size() == 1, "All children of an input expression must have the same layout"); - target_td->set_layout(*child_layouts.begin()); + io_expr->get_output_port(0).set_layout(*child_layouts.begin()); } else { const auto consumer_inputs = target_td->get_consumers(); // Note that here we consider only the first child (which is usually Store), // but often there is another child - LoopEnd ExpressionPort result_td; for (const auto& child_input : consumer_inputs) { - const auto child = child_input.get_expr_ptr(); + const auto child = child_input.get_expr(); if (ov::is_type(child->get_node())) { continue; } @@ -65,8 +65,7 @@ bool PropagateLayout::run(LinearIR& linear_ir) { OPENVINO_THROW("Result cannot have any siblings (only LoopEnd's)"); } - const auto& td_it = target_td->find_consumer(result_td); - td_it->set_layout(target_td->get_layout()); + io_expr->get_input_port(0).set_layout(target_td->get_layout()); } } diff --git a/src/common/snippets/src/lowered/pass/reset_buffers.cpp b/src/common/snippets/src/lowered/pass/reset_buffers.cpp index 977ade95bad9a7..350d9a49c69313 100644 --- a/src/common/snippets/src/lowered/pass/reset_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/reset_buffers.cpp @@ -18,14 +18,14 @@ bool ResetBuffers::reuse_buffer_increments(const LinearIR& linear_ir, const Expr if (!loop_end) return false; - const auto loop_tds = loop_end_expr->inputs(); + const auto loop_tds = loop_end_expr->get_input_tensors(); const auto input_count = loop_end->get_input_num(); const auto output_count = loop_end->get_output_num(); std::set resetting_buffers; std::set buffers_ids; for (size_t i = 0; i < input_count; ++i) { - const auto parent_output = loop_tds[i]->get_source().get_expr_ptr(); + const auto parent_output = loop_tds[i]->get_source().get_expr(); if (const auto buffer = ov::as_type_ptr(parent_output->get_node())) { // If Buffer is missed in set, Just save - it's first meeting if (buffers_ids.count(buffer->get_id()) == 0) { @@ -41,7 +41,7 @@ bool ResetBuffers::reuse_buffer_increments(const LinearIR& linear_ir, const Expr size_t buffer_count = 0; size_t loop_count = 0; for (const auto& consumer_input : consumer_inputs) { - const auto& child_node = consumer_input.get_expr_ptr()->get_node(); + const auto& child_node = consumer_input.get_expr()->get_node(); if (const auto buffer = ov::as_type_ptr(child_node)) { buffer_count++; // If Buffer is missed in set, Just save - it's first meeting diff --git a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp index 64f49a6781b082..eef8066be357b4 100644 --- a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp +++ b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp @@ -36,8 +36,8 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { const auto softmax = pm.at(match_softmax); const auto softmax_expr = *expr_it; const auto softmax_loop_ids = softmax_expr->get_loop_ids(); - const auto& input_td = softmax_expr->input(0); - const auto& output_td = softmax_expr->output(0); + const auto& input_td = softmax_expr->get_input_tensor(0); + const auto& output_td = softmax_expr->get_output_tensor(0); const auto tensor_out = output_td->get_tensor(); const auto inner_work_amount = *(tensor_out.rbegin()); @@ -63,9 +63,9 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { // Markup of ReduceMax Loop loop_manager->mark_loop(max.first, horizon_max.first, 1, inner_work_amount, m_vector_size, - std::vector{(*max.first)->input_port(0), - (*max.first)->input_port(1)}, - std::vector{(*max.first)->output_port(0)}); + std::vector{(*max.first)->get_input_port(0), + (*max.first)->get_input_port(1)}, + std::vector{(*max.first)->get_output_port(0)}); const auto broadcast_horizon_max = push_node( std::make_shared(horizon_max.second, horizon_max.second->get_input_partial_shape(0))); @@ -83,11 +83,11 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { // Markup of ReduceMax Loop loop_manager->mark_loop(sub.first, horizon_sum.first, 1, inner_work_amount, m_vector_size, - std::vector{(*sub.first)->input_port(0), - (*sub.first)->input_port(1), - (*sum.first)->input_port(1)}, - std::vector{(*exp.first)->output_port(0), - (*sum.first)->output_port(0)}); + std::vector{(*sub.first)->get_input_port(0), + (*sub.first)->get_input_port(1), + (*sum.first)->get_input_port(1)}, + std::vector{(*exp.first)->get_output_port(0), + (*sum.first)->get_output_port(0)}); // Divide is expensive operation, so we decompose it into 1 / x * y, where 1 / x is executed outside loop const auto pow = push_node(std::make_shared(horizon_sum.second, -1.f)); @@ -101,13 +101,13 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { // Transfer original ExpressionPorts linear_ir.replace_input(*max.first, 0, input_td); linear_ir.replace_input(*sub.first, 0, input_td); - linear_ir.replace_input(output_td->get_consumers(), (*mul.first)->output(0)); + linear_ir.replace_input(output_td->get_consumers(), (*mul.first)->get_output_tensor(0)); // Markup of Mul Loop loop_manager->mark_loop(mul.first, expr_it, 1, inner_work_amount, m_vector_size, - std::vector{(*mul.first)->input_port(0), - (*mul.first)->input_port(1)}, - std::vector{(*mul.first)->output_port(0)}); + std::vector{(*mul.first)->get_input_port(0), + (*mul.first)->get_input_port(1)}, + std::vector{(*mul.first)->get_output_port(0)}); // Markup inner loop for outside expression with null loop id for (const auto& expr : outer_exprs) { @@ -118,7 +118,7 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { const std::vector& new_points, const LinearIR::LoopManager::LoopInfoPtr& loop_info) { auto entry_found = std::find_if(points.begin(), points.end(), [&softmax_expr](const ExpressionPort& desc) { - return desc.get_expr_ptr() == softmax_expr; + return desc.get_expr() == softmax_expr; }); if (entry_found != points.end()) { entry_found = points.erase(entry_found); @@ -131,8 +131,9 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { if (loop_id == Expression::LOOP_NULL_ID) continue; const auto loop_info = loop_manager->get_loop_info(loop_id); - update_loop_bounds(loop_info->entry_exprs, std::vector{(*max.first)->input_port(0), (*sub.first)->input_port(0)}, loop_info); - update_loop_bounds(loop_info->exit_exprs, std::vector{(*mul.first)->output_port(0)}, loop_info); + update_loop_bounds(loop_info->entry_exprs, std::vector{(*max.first)->get_input_port(0), + (*sub.first)->get_input_port(0)}, loop_info); + update_loop_bounds(loop_info->exit_exprs, std::vector{(*mul.first)->get_output_port(0)}, loop_info); } /* =========================================== */ diff --git a/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp b/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp index 60355de49c8aff..3e60c42875ccf2 100644 --- a/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp +++ b/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp @@ -23,8 +23,8 @@ bool SetScalarCountForLoadStore::run(LinearIR& linear_ir) { const auto load = ov::as_type_ptr(op); const auto store = ov::as_type_ptr(op); if (load || store) { - const auto& td = load ? (*expr_it)->input(0) - : (*expr_it)->output(0); + const auto& td = load ? (*expr_it)->get_input_tensor(0) + : (*expr_it)->get_output_tensor(0); const auto& layout = td->get_layout(); const auto& tensor_shape = td->get_tensor(); // Find last dimension by layout diff --git a/src/common/snippets/src/lowered/tensor.cpp b/src/common/snippets/src/lowered/tensor.cpp index 70e40160a6f29b..c35cfb0cf609da 100644 --- a/src/common/snippets/src/lowered/tensor.cpp +++ b/src/common/snippets/src/lowered/tensor.cpp @@ -12,22 +12,22 @@ namespace ngraph { namespace snippets { namespace lowered { -Tensor::Tensor(const ExpressionPort& source_descriptor, const std::vector& consumer_descriptors) +Tensor::Tensor(const ExpressionPort& source_descriptor, const std::set& consumer_descriptors) : m_source_port(source_descriptor), m_consumer_ports(consumer_descriptors) {} -std::vector::const_iterator Tensor::find_consumer(const ExpressionPort& consumer) const { +std::set::const_iterator Tensor::find_consumer(const ExpressionPort& consumer) const { // Note: Find by shared ptr and index port is enough since these parameters must be unique - return std::find_if(m_consumer_ports.begin(), m_consumer_ports.end(), + return std::find_if(m_consumer_ports.cbegin(), m_consumer_ports.cend(), [&consumer](const ExpressionPort& td) { - return consumer.get_expr_ptr() == td.get_expr_ptr() && consumer.get_index() == td.get_index(); + return consumer.get_expr() == td.get_expr() && consumer.get_index() == td.get_index(); }); } -std::vector::iterator Tensor::find_consumer(const ExpressionPort& consumer) { +std::set::iterator Tensor::find_consumer(const ExpressionPort& consumer) { // Note: Find by shared ptr and index port is enough since these parameters must be unique return std::find_if(m_consumer_ports.begin(), m_consumer_ports.end(), [&consumer](const ExpressionPort& td) { - return consumer.get_expr_ptr() == td.get_expr_ptr() && consumer.get_index() == td.get_index(); + return consumer.get_expr() == td.get_expr() && consumer.get_index() == td.get_index(); }); } @@ -37,7 +37,8 @@ bool Tensor::found_consumer(const ExpressionPort& consumer) const { void Tensor::add_consumer(const ExpressionPort& consumer) { OPENVINO_ASSERT(!found_consumer(consumer), "Consumer has been already added to Tensor!"); - m_consumer_ports.push_back(consumer); + const auto res = m_consumer_ports.insert(consumer); + OPENVINO_ASSERT(res.second, "Consumer hasn't been added to the Tensor"); } void Tensor::remove_consumer(const ExpressionPort& consumer) { @@ -46,60 +47,6 @@ void Tensor::remove_consumer(const ExpressionPort& consumer) { m_consumer_ports.erase(found); } -std::vector Tensor::get_conflicted_consumers() const { - std::vector conflicted_consumers; - for (const auto& consumer : m_consumer_ports) { - if (is_conflicted_consumer(consumer)) { - conflicted_consumers.push_back(consumer); - } - } - return conflicted_consumers; -} - -bool Tensor::is_conflicted_consumer(const ExpressionPort& consumer) const { - OPENVINO_ASSERT(found_consumer(consumer), "Failed check for conflicted consumer: it's not a consumer fot the Tensor"); - return get_tensor() != consumer.get_tensor() || - get_layout() != consumer.get_layout() || - get_subtensor() != consumer.get_subtensor(); -} - -bool operator==(const ExpressionPort& lhs, const ExpressionPort& rhs) { - if (&rhs == &lhs) - return true; - return lhs.m_type == rhs.m_type && - lhs.m_expr.lock() == rhs.m_expr.lock() && - lhs.m_port_index == rhs.m_port_index && - lhs.m_port_desc == rhs.m_port_desc; -} -bool operator!=(const ExpressionPort& lhs, const ExpressionPort& rhs) { - return !(lhs == rhs); -} -bool operator<(const ExpressionPort& lhs, const ExpressionPort& rhs) { - OPENVINO_ASSERT(lhs.get_type() == rhs.get_type(), "ExpressionPorts must be of the same type for comparison!"); - return lhs.get_index() < rhs.get_index() && - lhs.get_expr_ptr() < rhs.get_expr_ptr() && - lhs.get_tensor() < rhs.get_tensor() && - lhs.get_layout() < rhs.get_layout() && - lhs.get_subtensor() < rhs.get_subtensor(); -} - -std::ostream& operator<<(std::ostream& ss, const ExpressionPort& td) { - auto print_vector = [&ss](const std::vector& data){ - ss << "["; - for (auto i : data) - ss << i << ","; - ss << (data.empty() ? "]" : "\b]"); - }; - ss << "{Tensor: "; - print_vector(td.get_tensor()); - ss << " Subtensor: "; - print_vector(td.get_subtensor()); - ss << " Layout: "; - print_vector(td.get_layout()); - ss << "}"; - return ss; -} - }// namespace lowered }// namespace snippets }// namespace ngraph diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp index c180c9ace65d71..5346b8059681c6 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp @@ -125,14 +125,14 @@ KernelEmitter::KernelEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl: element::Type etype; switch (expr->get_type()) { case ngraph::snippets::lowered::IOExpression::io_type::INPUT: { - td = expr->outputs()[0]; + td = expr->get_output_tensor(0); etype = expr->get_node()->get_output_element_type(0); num_inputs++; break; } case ngraph::snippets::lowered::IOExpression::io_type::OUTPUT: { num_outputs++; - td = expr->inputs()[0]; + td = expr->get_input_tensor(0); etype = expr->get_node()->get_input_element_type(0); break; } default : { diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp index 2a9555bfbf7a6b..5e0bd20e561698 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp @@ -15,13 +15,13 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_load_convert(ngraph::snippe ngraph::snippets::lowered::LinearIR::constExprIt& convert_it) { const auto& convert_expr = *convert_it; const auto& convert = ov::as_type_ptr(convert_expr->get_node()); - const auto input_td = convert_expr->inputs().front(); - const auto output_td = convert_expr->outputs().front(); + const auto input_td = convert_expr->get_input_tensor(0); + const auto output_td = convert_expr->get_output_tensor(0); if (convert->get_destination_type() != ov::element::f32 && convert->get_destination_type() != ov::element::i32) return false; const auto& load_output = input_td->get_source(); - const auto& load_expr = load_output.get_expr_ptr(); + const auto& load_expr = load_output.get_expr(); const auto load = ov::as_type_ptr(load_expr->get_node()); if (!load || ov::is_type(load_expr->get_node()) || @@ -45,19 +45,19 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_load_convert(ngraph::snippe OPENVINO_THROW("Type of Convert op is undefined. Supports only fusing Load and ConvertTruncation or ConvertSaturation ops"); } - const auto convert_out = convert_expr->outputs().front(); + const auto convert_out = convert_expr->get_output_tensor(0); const auto convert_consumers = convert_out->get_consumers(); ngraph::snippets::PortManager::set_port_descriptor_ptr(load_convert->output(0), std::make_shared(convert_out->get_tensor(), convert_out->get_subtensor(), convert_out->get_layout())); - const auto load_convert_expr = linear_ir.create_expression(load_convert, { load_expr->inputs().front() }); + const auto load_convert_expr = linear_ir.create_expression(load_convert, { load_expr->get_input_tensor(0) }); const auto convert_expr_it = convert_it; const auto insertion_pos = std::next(convert_it); convert_it = linear_ir.insert(insertion_pos, load_convert_expr); linear_ir.erase(std::find(linear_ir.cbegin(), convert_expr_it, load_expr)); linear_ir.erase(convert_expr_it); - linear_ir.replace_input(convert_consumers, load_convert_expr->output(0)); + linear_ir.replace_input(convert_consumers, load_convert_expr->get_output_tensor(0)); return true; } @@ -65,8 +65,8 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert(ngraph::snipp ngraph::snippets::lowered::LinearIR::constExprIt& convert_it) { const auto& convert_expr = *convert_it; const auto& convert = convert_expr->get_node(); - const auto input_td = convert_expr->inputs().front(); - const auto output_td = convert_expr->outputs().front(); + const auto input_td = convert_expr->get_input_tensor(0); + const auto output_td = convert_expr->get_output_tensor(0); if (convert->get_input_element_type(0) != ov::element::f32 && convert->get_input_element_type(0) != ov::element::i32) return false; @@ -75,7 +75,7 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert(ngraph::snipp return false; const auto store_input = *(consumers.begin()); - const auto store_expr = store_input.get_expr_ptr(); + const auto store_expr = store_input.get_expr(); const auto store = ov::as_type_ptr(store_expr->get_node()); if (!store) return false; @@ -93,7 +93,7 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert(ngraph::snipp OPENVINO_THROW("Type of Convert op is undefined. Supports only fusing Store and ConvertTruncation or ConvertSaturation ops"); } - const auto store_out = store_expr->outputs().front(); + const auto store_out = store_expr->get_output_tensor(0); const auto store_consumers = store_out->get_consumers(); ngraph::snippets::PortManager::set_port_descriptor_ptr(store_convert->output(0), std::make_shared(store_out->get_tensor(), @@ -105,7 +105,7 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert(ngraph::snipp convert_it = linear_ir.insert(insertion_pos, store_convert_expr); linear_ir.erase(std::find(convert_expr_it, linear_ir.cend(), store_expr)); linear_ir.erase(convert_expr_it); - linear_ir.replace_input(store_consumers, store_convert_expr->output(0)); + linear_ir.replace_input(store_consumers, store_convert_expr->get_output_tensor(0)); return true; } From 1f76a5d943135592385e0e6ea45fc46925c8ba1f Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Tue, 9 May 2023 16:10:36 +0400 Subject: [PATCH 08/13] refactoring --- .../include/snippets/lowered/expression.hpp | 4 +- .../snippets/lowered/expression_factory.hpp | 11 ++-- .../snippets/lowered/expression_port.hpp | 12 ++-- .../include/snippets/lowered/linear_ir.hpp | 15 ++--- .../snippets/lowered/pass/fuse_loops.hpp | 6 +- .../include/snippets/lowered/tensor.hpp | 4 +- .../snippets/include/snippets/op/brgemm.hpp | 2 +- .../snippets/include/snippets/op/subgraph.hpp | 2 +- ...dule_softmax.hpp => set_softmax_ports.hpp} | 8 +-- .../include/snippets/port_descriptor.hpp | 14 ++-- .../snippets/include/snippets/utils.hpp | 3 - .../snippets/src/lowered/expression.cpp | 6 ++ .../src/lowered/expression_factory.cpp | 53 ++++++++------- .../snippets/src/lowered/expression_port.cpp | 28 +++++--- src/common/snippets/src/lowered/linear_ir.cpp | 64 ++++++------------- .../snippets/src/lowered/loop_manager.cpp | 28 ++++---- .../src/lowered/pass/allocate_buffers.cpp | 5 +- .../src/lowered/pass/assign_registers.cpp | 30 ++++----- .../snippets/src/lowered/pass/fuse_loops.cpp | 44 ++++++------- .../src/lowered/pass/indentify_buffers.cpp | 13 ++-- .../snippets/src/lowered/pass/init_loops.cpp | 10 +-- .../src/lowered/pass/insert_buffers.cpp | 32 ++++------ .../src/lowered/pass/insert_load_store.cpp | 26 ++++---- .../src/lowered/pass/insert_tail_loop.cpp | 10 +-- .../load_movebroadcast_to_broadcastload.cpp | 16 ++--- .../snippets/src/lowered/pass/mark_loops.cpp | 14 ++-- .../pass/move_result_out_from_loop.cpp | 4 +- .../lowered/pass/move_scalar_to_consumer.cpp | 3 +- .../src/lowered/pass/propagate_layout.cpp | 24 ++----- .../src/lowered/pass/reset_buffers.cpp | 2 +- .../lowered/pass/softmax_decomposition.cpp | 12 ++-- .../src/lowered/pass/vector_to_scalar.cpp | 8 +-- src/common/snippets/src/lowered/tensor.cpp | 4 +- src/common/snippets/src/op/brgemm.cpp | 4 +- src/common/snippets/src/op/subgraph.cpp | 4 +- .../src/pass/fuse_transpose_brgemm.cpp | 10 +-- ...dule_softmax.cpp => set_softmax_ports.cpp} | 11 ++-- src/common/snippets/src/port_descriptor.cpp | 19 +----- src/common/snippets/src/utils.cpp | 44 ++----------- .../emitters/x64/jit_snippets_emitters.cpp | 2 +- .../snippets/x64/op/brgemm_copy_b.cpp | 4 +- .../snippets/x64/op/brgemm_copy_b.hpp | 2 +- .../snippets/x64/op/brgemm_cpu.cpp | 6 +- .../snippets/x64/op/brgemm_cpu.hpp | 2 +- .../x64/pass/brgemm_to_brgemm_cpu.cpp | 8 +-- .../lowered/fuse_load_store_and_convert.cpp | 17 +++-- .../src/subgraph_lowered.cpp | 6 +- 47 files changed, 276 insertions(+), 380 deletions(-) rename src/common/snippets/include/snippets/pass/{schedule_softmax.hpp => set_softmax_ports.hpp} (62%) rename src/common/snippets/src/pass/{schedule_softmax.cpp => set_softmax_ports.cpp} (91%) diff --git a/src/common/snippets/include/snippets/lowered/expression.hpp b/src/common/snippets/include/snippets/lowered/expression.hpp index 8be97055604b83..4c761d8335bcb7 100644 --- a/src/common/snippets/include/snippets/lowered/expression.hpp +++ b/src/common/snippets/include/snippets/lowered/expression.hpp @@ -48,6 +48,7 @@ class Expression : public std::enable_shared_from_this { void set_loop_id(size_t id, size_t idx); void remove_loop_id(size_t id); + void validate() const; void init_emitter(const std::shared_ptr& target); ExpressionPort get_input_port(size_t i); @@ -57,10 +58,7 @@ class Expression : public std::enable_shared_from_this { // Note: The constructor and tensor initialization are private since an expression can be created only by Linear IR. // These methods must be used only by Linear IR builder of expressions! explicit Expression(const std::shared_ptr& n); - void init_inputs(const std::vector& inputs) { m_input_tensors = inputs; } - void init_outputs(const std::vector& outputs) { m_output_tensors = outputs; } - // Note: These methods don't control availability of the current expression in this Tensor (as Consumer or Source) void replace_input(size_t port, TensorPtr to); std::shared_ptr m_source_node{nullptr}; diff --git a/src/common/snippets/include/snippets/lowered/expression_factory.hpp b/src/common/snippets/include/snippets/lowered/expression_factory.hpp index 26f829a12140de..af6a1b74e6c021 100644 --- a/src/common/snippets/include/snippets/lowered/expression_factory.hpp +++ b/src/common/snippets/include/snippets/lowered/expression_factory.hpp @@ -38,17 +38,14 @@ class LinearIR::ExpressionFactory { const std::shared_ptr& model); /* -- Input Builders - get input tensors from method parameters and create new output tensors themselves */ - static ExpressionPtr create(const std::shared_ptr& n, const LinearIR& linear_ir, - const std::vector& inputs); - static ExpressionPtr create(const std::shared_ptr& n, const LinearIR& linear_ir, - const std::vector& inputs); - static ExpressionPtr create(const std::shared_ptr& n, const LinearIR& linear_ir, - const std::vector& inputs); + static ExpressionPtr create(const std::shared_ptr& n, const std::vector& inputs); + static ExpressionPtr create(const std::shared_ptr& n, const std::vector& inputs); + static ExpressionPtr create(const std::shared_ptr& n, const std::vector& inputs); // Creates inputs for expression using parent output tensors static void create_expression_inputs(const LinearIR& linear_ir, const ExpressionPtr& expr); // Creates new output tensors - static void create_expression_outputs(const LinearIR& linear_ir, const ExpressionPtr& expr); + static void create_expression_outputs(const ExpressionPtr& expr); // The method verifies of input tensors to availability of the expression as consumer and add it if missed static void init_expression_inputs(const ExpressionPtr& expr, const std::vector& inputs); }; diff --git a/src/common/snippets/include/snippets/lowered/expression_port.hpp b/src/common/snippets/include/snippets/lowered/expression_port.hpp index 7aaeeafa47fc88..239520c8620168 100644 --- a/src/common/snippets/include/snippets/lowered/expression_port.hpp +++ b/src/common/snippets/include/snippets/lowered/expression_port.hpp @@ -26,17 +26,21 @@ class ExpressionPort { ExpressionPort() = default; explicit ExpressionPort(const std::shared_ptr& expr, Type type, size_t port); - std::shared_ptr get_expr() const { return m_expr; } + const std::shared_ptr& get_expr() const { return m_expr; } Type get_type() const { return m_type; } size_t get_index() const { return m_port_index; } - std::vector get_tensor() const; + std::vector get_shape() const; std::vector get_layout() const; std::vector get_subtensor() const; - PortDescriptorPtr get_port_descriptor() const; + const PortDescriptorPtr& get_descriptor_ptr() const; const std::shared_ptr& get_tensor_ptr() const; + // Returns connected ports to the current: + // - Input port returns one source (parent) port + // - Output port returns all consumer ports (children) + std::set get_connected_ports() const; - void set_tensor(const std::vector& tensor); + void set_shape(const std::vector& tensor); void set_layout(const std::vector& layout); void set_subtensor(const std::vector& subtensor); diff --git a/src/common/snippets/include/snippets/lowered/linear_ir.hpp b/src/common/snippets/include/snippets/lowered/linear_ir.hpp index 68f79e3a818db0..e230d99d98d239 100644 --- a/src/common/snippets/include/snippets/lowered/linear_ir.hpp +++ b/src/common/snippets/include/snippets/lowered/linear_ir.hpp @@ -33,20 +33,18 @@ class LinearIR { LinearIR() = default; explicit LinearIR(const std::shared_ptr& m, Config config = {}); - ExpressionPtr create_expression(const std::shared_ptr& n, const std::vector inputs); + ExpressionPtr create_expression(const std::shared_ptr& n, const std::vector& inputs); - LinearIR deep_copy() const; static LinearIR::container deep_copy_range(LinearIR::container::const_iterator begin, LinearIR::container::const_iterator end); const container& get_ops() const {return m_lowered_ops; } const io_container& get_IO_ops() const {return m_io_lowered_ops; } Config get_config() {return m_config; } - ExpressionPtr get_expr_by_node(const std::shared_ptr& n) const; + const ExpressionPtr& get_expr_by_node(const std::shared_ptr& n) const; - void replace_input(std::set consumers, const TensorPtr& to); + void replace_input(const std::set& consumers, const TensorPtr& to); void replace_input(const ExpressionPort& expr_port, const TensorPtr& to); - void replace_input(const ExpressionPtr& expr, size_t port, const TensorPtr& to); /** * @brief Move an expression from the position "from" to the position immediately before "to". @@ -87,20 +85,17 @@ class LinearIR { void init_emitters(const std::shared_ptr& target); void serialize(const std::string& xml, const std::string& bin); - static ov::NodeVector get_ordered_ops(const std::shared_ptr& model); - class LoopManager; using LoopManagerPtr = std::shared_ptr; const LoopManagerPtr& get_loop_manager() const { return m_loop_manager; } private: + static ov::NodeVector get_ordered_ops(const std::shared_ptr& model); // Default ctor - can be called only from Linear IR initialization as default way ExpressionPtr create_expression(const std::shared_ptr& n, const std::shared_ptr& model = nullptr); - void register_expression(const ExpressionPtr& expr); - // Like register_expression, but doesn't allow Parameter or Result registration. You can do it only through ctor - void register_regular_expression(const ExpressionPtr& expr); + void register_expression(const ExpressionPtr& expr, bool io_allowed = false); void unregister_expression(const ExpressionPtr& expr); container m_lowered_ops{}; diff --git a/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp b/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp index 87314543d50af8..0f66b4ce55c3a6 100644 --- a/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp @@ -27,12 +27,10 @@ class FuseLoops : public Transformation { private: static bool can_be_fused(const LinearIR::LoopManager::LoopInfoPtr& loop_current, const LinearIR::LoopManager::LoopInfoPtr& loop_target); - static bool fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, - const ExpressionPort& current_entry_point, const ExpressionPort& target_exit_point, + static bool fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, const ExpressionPort& current_entry_point, size_t current_loop_id, size_t target_loop_id, size_t dim_idx, LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos); - static bool fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, - const ExpressionPort& current_entry_point, const ExpressionPort& target_exit_point, + static bool fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, const ExpressionPort& current_entry_point, size_t current_loop_id, size_t target_loop_id, size_t dim_idx, LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos); static void fuse_points(std::vector& exit_points, std::vector& entry_points, diff --git a/src/common/snippets/include/snippets/lowered/tensor.hpp b/src/common/snippets/include/snippets/lowered/tensor.hpp index 29b04138acf407..e9df31c098babe 100644 --- a/src/common/snippets/include/snippets/lowered/tensor.hpp +++ b/src/common/snippets/include/snippets/lowered/tensor.hpp @@ -21,7 +21,7 @@ class Expression; class Tensor { public: Tensor() = default; - explicit Tensor(const ExpressionPort& source_descriptor, const std::set& consumer_descriptors = {}); + explicit Tensor(ExpressionPort source_descriptor, const std::set& consumer_descriptors = {}); const ExpressionPort& get_source() const { return m_source_port; } std::set get_consumers() const { return m_consumer_ports; } @@ -33,7 +33,7 @@ class Tensor { std::set::iterator find_consumer(const ExpressionPort& consumer); // The scheduling params of Tensor is controlled by source expression port - std::vector get_tensor() const { return m_source_port.get_tensor(); } + std::vector get_shape() const { return m_source_port.get_shape(); } std::vector get_layout() const { return m_source_port.get_layout(); } std::vector get_subtensor() const { return m_source_port.get_subtensor(); } diff --git a/src/common/snippets/include/snippets/op/brgemm.hpp b/src/common/snippets/include/snippets/op/brgemm.hpp index 7d1d85e589c2de..f5db719ae03de5 100644 --- a/src/common/snippets/include/snippets/op/brgemm.hpp +++ b/src/common/snippets/include/snippets/op/brgemm.hpp @@ -33,7 +33,7 @@ class Brgemm : public MemoryAccess { bool has_evaluate() const override { return false; } protected: - void constructor_validate_and_infer_types(); + void custom_constructor_validate_and_infer_types(); void validate_inputs() const; ov::element::Type get_output_type() const; diff --git a/src/common/snippets/include/snippets/op/subgraph.hpp b/src/common/snippets/include/snippets/op/subgraph.hpp index 092d6a35c9d8ca..05ef134c28eb6d 100644 --- a/src/common/snippets/include/snippets/op/subgraph.hpp +++ b/src/common/snippets/include/snippets/op/subgraph.hpp @@ -195,7 +195,7 @@ static inline auto build_subgraph(const std::shared_ptr& node, con return subgraph; }; -// Need to update tensor name manually, since intel_cpu::Graph::Replicate() looks at input.get_tensor().get_name(); +// Need to update tensor name manually, since intel_cpu::Graph::Replicate() looks at input.get_shape().get_name(); // If subgraph->get_output_size() == 1, then the name will be restored correctly from the node name auto inline update_out_tensor_name(const std::shared_ptr& subgraph) -> void { bool not_set = true; diff --git a/src/common/snippets/include/snippets/pass/schedule_softmax.hpp b/src/common/snippets/include/snippets/pass/set_softmax_ports.hpp similarity index 62% rename from src/common/snippets/include/snippets/pass/schedule_softmax.hpp rename to src/common/snippets/include/snippets/pass/set_softmax_ports.hpp index b4ec4f487708eb..22e7f0b8af7a7e 100644 --- a/src/common/snippets/include/snippets/pass/schedule_softmax.hpp +++ b/src/common/snippets/include/snippets/pass/set_softmax_ports.hpp @@ -12,13 +12,13 @@ namespace snippets { namespace pass { /** - * @interface ScheduleSoftmax - * @brief The pass updates port descriptors for Softmax to show by which axes there is reducing + * @interface SetSoftmaxPorts + * @brief The pass updates port descriptors in accordance with the Softmax reduction axis * @ingroup snippets */ -class ScheduleSoftmax: public ngraph::pass::MatcherPass { +class SetSoftmaxPorts: public ngraph::pass::MatcherPass { public: - ScheduleSoftmax(); + SetSoftmaxPorts(); }; } // namespace pass diff --git a/src/common/snippets/include/snippets/port_descriptor.hpp b/src/common/snippets/include/snippets/port_descriptor.hpp index f9802d113ce10c..15570c70a9efce 100644 --- a/src/common/snippets/include/snippets/port_descriptor.hpp +++ b/src/common/snippets/include/snippets/port_descriptor.hpp @@ -11,6 +11,8 @@ namespace ngraph { namespace snippets { +class PortDescriptor; +using PortDescriptorPtr = std::shared_ptr; class PortDescriptor { public: // The structure with service values for scheduling parameters @@ -34,17 +36,17 @@ class PortDescriptor { PortDescriptor(std::vector shape, std::vector subtensor_shape, std::vector layout = {}); PortDescriptor() = default; - std::vector get_tensor() const {return m_tensor_shape;} + std::vector get_shape() const {return m_tensor_shape;} std::vector get_subtensor() const {return m_subtensor_shape;} std::vector get_layout() const {return m_layout;} - void set_tensor(const std::vector& tensor) { m_tensor_shape = tensor; } + void set_shape(const std::vector& tensor) { m_tensor_shape = tensor; } void set_layout(const std::vector& layout) { m_layout = layout; } void set_subtensor(const std::vector& subtensor) { m_subtensor_shape = subtensor; } - static PortDescriptor deserialize(const std::string& serialized_info); std::string serialize() const; bool empty() const { return m_layout.empty() && m_subtensor_shape.empty();} + PortDescriptorPtr clone() const; friend bool operator==(const PortDescriptor& lhs, const PortDescriptor& rhs); friend bool operator!=(const PortDescriptor& lhs, const PortDescriptor& rhs) {return !(lhs == rhs);} @@ -58,7 +60,6 @@ class PortDescriptor { /// \brief Minimal tensor size that could be processed in one call std::vector m_subtensor_shape{}; }; -using PortDescriptorPtr = std::shared_ptr; class PortManager { public: @@ -76,15 +77,12 @@ class PortManager { class PortDescriptorVectorAttribute : public ov::RuntimeAttribute { public: - OPENVINO_RTTI("PortDescriptorVectorAttribute", "0"); + OPENVINO_RTTI("PortDescriptorVectorAttribute", "", ov::RuntimeAttribute); PortDescriptorVectorAttribute() = default; explicit PortDescriptorVectorAttribute(std::vector in_descs = {}, std::vector out_descs = {}) : inputs(std::move(in_descs)), outputs(std::move(out_descs)) {} - void set_input_port_descriptor(const PortDescriptorPtr& desc, size_t index); - void set_output_port_descriptor(const PortDescriptorPtr& desc, size_t index); - std::vector inputs{}; std::vector outputs{}; }; diff --git a/src/common/snippets/include/snippets/utils.hpp b/src/common/snippets/include/snippets/utils.hpp index 76ae3cf48fd2f0..63547a226df2f9 100644 --- a/src/common/snippets/include/snippets/utils.hpp +++ b/src/common/snippets/include/snippets/utils.hpp @@ -27,9 +27,6 @@ inline auto is_scalar_constant(const std::shared_ptr& source_outpu ov::PartialShape get_port_planar_shape(const Input& out); ov::PartialShape get_port_planar_shape(const Output& out); ov::PartialShape get_reordered_planar_shape(const ov::PartialShape& shape, const std::vector& layout); -ov::Shape get_reordered_shape(const ov::Shape& shape, const std::vector& layout); -std::vector get_node_output_layout(const std::shared_ptr& node); -std::vector get_node_output_layout(const Node* node); inline auto normalize_rank(int32_t allocation_rank, const size_t shape_rank) -> int32_t { return allocation_rank < 0 ? allocation_rank + static_cast(shape_rank) + 1 : allocation_rank; diff --git a/src/common/snippets/src/lowered/expression.cpp b/src/common/snippets/src/lowered/expression.cpp index 2b78997522e3d1..fedd2f6e46605a 100644 --- a/src/common/snippets/src/lowered/expression.cpp +++ b/src/common/snippets/src/lowered/expression.cpp @@ -51,6 +51,12 @@ void Expression::init_emitter(const std::shared_ptr& target m_emitter = target->get(m_source_node->get_type_info())(m_source_node); } +void Expression::validate() const { + OPENVINO_ASSERT(m_input_port_descriptors.size() == m_input_tensors.size(), "The count of input ports and input tensors must be equal"); + OPENVINO_ASSERT(m_output_port_descriptors.size() == m_output_tensors.size(), "The count of output ports and output tensors must be equal"); + OPENVINO_ASSERT(m_source_node != nullptr, "The expression has null source node"); +} + void Expression::replace_input(size_t port, TensorPtr to) { OPENVINO_ASSERT(port < m_input_tensors.size(), "Failed to replace: target input port must be less than input count!"); m_input_tensors[port] = std::move(to); diff --git a/src/common/snippets/src/lowered/expression_factory.cpp b/src/common/snippets/src/lowered/expression_factory.cpp index d104a6c03e64fb..2bf63bb3a631e9 100644 --- a/src/common/snippets/src/lowered/expression_factory.cpp +++ b/src/common/snippets/src/lowered/expression_factory.cpp @@ -25,7 +25,7 @@ void LinearIR::ExpressionFactory::create_expression_inputs(const LinearIR& linea } } -void LinearIR::ExpressionFactory::create_expression_outputs(const LinearIR& linear_ir, const ExpressionPtr& expr) { +void LinearIR::ExpressionFactory::create_expression_outputs(const ExpressionPtr& expr) { OPENVINO_ASSERT(expr != nullptr, "Failed expression outputs creation: expression is null"); const auto& node = expr->get_node(); @@ -57,8 +57,9 @@ ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& model) { // Note: ctor of shared_ptr isn't friend class for Expression -> we cannot use directly make_shared(args) OPENVINO_ASSERT(model != nullptr, "To create IOExpression from Parameter there must be inited model!"); - const auto expr = std::make_shared(IOExpression(par, model->get_parameter_index(par))); - create_expression_outputs(linear_ir, expr); + auto expr = std::make_shared(IOExpression(par, model->get_parameter_index(par))); + create_expression_outputs(expr); + expr->validate(); return expr; } @@ -66,8 +67,12 @@ ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& model) { // Note: ctor of shared_ptr isn't friend class for Expression -> we cannot use directly make_shared(args) OPENVINO_ASSERT(model != nullptr, "To create IOExpression from Result there must be inited model!"); - const auto expr = std::make_shared(IOExpression(res, model->get_result_index(res))); + auto expr = std::make_shared(IOExpression(res, model->get_result_index(res))); create_expression_inputs(linear_ir, expr); + // The Result node don't need output port (because of sense of the node). But each node in ngraph must have one output at least. + // The port descriptors are automatically created in constructor. We manually clean output ports. + expr->m_output_port_descriptors.clear(); + expr->validate(); return expr; } @@ -75,41 +80,45 @@ ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& model) { OPENVINO_ASSERT(!ov::is_type(n), "Default expression builder doesn't support LoopBegin and LoopEnd"); // Note: ctor of shared_ptr isn't friend class for Expression - const auto expr = std::make_shared(Expression(n)); + auto expr = std::make_shared(Expression(n)); create_expression_inputs(linear_ir, expr); - create_expression_outputs(linear_ir, expr); + create_expression_outputs(expr); + expr->validate(); return expr; } -ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const LinearIR& linear_ir, - const std::vector& inputs) { +ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const std::vector& inputs) { OPENVINO_ASSERT(inputs.empty(), "LoopBegin cannot have inputs"); - const auto expr = std::make_shared(Expression(n)); + auto expr = std::make_shared(Expression(n)); init_expression_inputs(expr, inputs); - create_expression_outputs(linear_ir, expr); + create_expression_outputs(expr); + expr->validate(); return expr; } -ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const LinearIR& linear_ir, - const std::vector& inputs) { - const auto expr = std::make_shared(Expression(n)); - // Copy port descriptor shared pointers to LoopEnd - expr->m_input_port_descriptors.resize(inputs.size()); - for (size_t i = 0; i < inputs.size(); ++i) { - expr->m_input_port_descriptors[i] = inputs[i]->get_source().get_port_descriptor(); - } +ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const std::vector& inputs) { + auto expr = std::make_shared(Expression(n)); + // LoopEnd doesn't have port descriptors on inputs (except input from LoopBegin) + expr->m_input_port_descriptors.resize(inputs.size(), nullptr); + const auto& last_input = inputs.back()->get_source(); + OPENVINO_ASSERT(ov::is_type(last_input.get_expr()->get_node()), "LoopEnd expression expects LoopBegin on last input"); + expr->m_input_port_descriptors[inputs.size() - 1] = last_input.get_descriptor_ptr()->clone(); init_expression_inputs(expr, inputs); + // The LoopEnd node don't need output port (because of sense of the node). But each node in ngraph must have one output at least. + // The port descriptors are automatically created in constructor. We manually clean output ports. + expr->m_output_port_descriptors.clear(); + expr->validate(); return expr; } -ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const LinearIR& linear_ir, - const std::vector& inputs) { +ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr& n, const std::vector& inputs) { OPENVINO_ASSERT(!ov::is_type(n) && !ov::is_type(n), "Expression builder with inputs doesn't support Result and Parameter"); - const auto expr = std::make_shared(Expression(n)); + auto expr = std::make_shared(Expression(n)); init_expression_inputs(expr, inputs); - create_expression_outputs(linear_ir, expr); + create_expression_outputs(expr); + expr->validate(); return expr; } }// namespace lowered diff --git a/src/common/snippets/src/lowered/expression_port.cpp b/src/common/snippets/src/lowered/expression_port.cpp index bfd419fd84fd57..08aeae9f533551 100644 --- a/src/common/snippets/src/lowered/expression_port.cpp +++ b/src/common/snippets/src/lowered/expression_port.cpp @@ -14,7 +14,7 @@ namespace lowered { ExpressionPort::ExpressionPort(const std::shared_ptr& expr, Type type, size_t port) : m_expr(expr), m_type(type), m_port_index(port) {} -PortDescriptorPtr ExpressionPort::get_port_descriptor() const { +const PortDescriptorPtr& ExpressionPort::get_descriptor_ptr() const { const auto& descs = m_type == Type::Input ? m_expr->m_input_port_descriptors : m_expr->m_output_port_descriptors; OPENVINO_ASSERT(m_port_index < descs.size(), "Incorrect index of port"); @@ -28,24 +28,34 @@ const std::shared_ptr& ExpressionPort::get_tensor_ptr() const { return tensors[m_port_index]; } -std::vector ExpressionPort::get_tensor() const { - return get_port_descriptor()->get_tensor(); +std::set ExpressionPort::get_connected_ports() const { + if (ExpressionPort::m_type == Type::Input) { + return { m_expr->m_input_tensors[m_port_index]->get_source() }; + } + if (ExpressionPort::m_type == Type::Output) { + return m_expr->m_output_tensors[m_port_index]->get_consumers(); + } + OPENVINO_THROW("ExpressionPort supports only Input and Output types"); +} + +std::vector ExpressionPort::get_shape() const { + return get_descriptor_ptr()->get_shape(); } std::vector ExpressionPort::get_layout() const { - return get_port_descriptor()->get_layout(); + return get_descriptor_ptr()->get_layout(); } std::vector ExpressionPort::get_subtensor() const { - return get_port_descriptor()->get_subtensor(); + return get_descriptor_ptr()->get_subtensor(); } -void ExpressionPort::set_tensor(const std::vector& tensor) { - get_port_descriptor()->set_tensor(tensor); +void ExpressionPort::set_shape(const std::vector& tensor) { + get_descriptor_ptr()->set_shape(tensor); } void ExpressionPort::set_layout(const std::vector& layout) { - get_port_descriptor()->set_layout(layout); + get_descriptor_ptr()->set_layout(layout); } void ExpressionPort::set_subtensor(const std::vector& subtensor) { - get_port_descriptor()->set_subtensor(subtensor); + get_descriptor_ptr()->set_subtensor(subtensor); } bool operator==(const ExpressionPort& lhs, const ExpressionPort& rhs) { diff --git a/src/common/snippets/src/lowered/linear_ir.cpp b/src/common/snippets/src/lowered/linear_ir.cpp index 969427d7e02a7a..828462e020c9f6 100644 --- a/src/common/snippets/src/lowered/linear_ir.cpp +++ b/src/common/snippets/src/lowered/linear_ir.cpp @@ -20,8 +20,7 @@ namespace lowered { LinearIR::LinearIR(const std::shared_ptr& model, Config config) : m_io_lowered_ops{}, m_config{std::move(config)}, m_loop_manager(std::make_shared()) { - constExprIt scalar_pos = m_lowered_ops.begin(); - ExpressionPtr last_param = nullptr; + constExprIt last_param = m_lowered_ops.end(); for (const auto& n : get_ordered_ops(model)) { constExprIt insertion_pos = m_lowered_ops.end(); const auto expr = create_expression(n, model); @@ -30,23 +29,17 @@ LinearIR::LinearIR(const std::shared_ptr& model, Config config) // After these passes we must call pass MoveScalarToConsumer() to have a correct accuracy. // For more details, please see the pass description if (const auto& scalar = as_type_ptr(n)) { - if (scalar_pos == m_lowered_ops.end()) { - OPENVINO_ASSERT(last_param, "Scalars must be executed after Parameters"); - scalar_pos = std::find(m_lowered_ops.begin(), m_lowered_ops.end(), last_param); - } - insertion_pos = std::next(scalar_pos); + insertion_pos = std::next(last_param); } + register_expression(expr, true); + const auto& it = m_lowered_ops.insert(insertion_pos, expr); + if (const auto io_expr = std::dynamic_pointer_cast(expr)) { - register_expression(expr); m_io_lowered_ops.push_back(io_expr); if (ov::is_type(n)) - last_param = expr; - } else { - register_regular_expression(expr); + last_param = it; } - - m_lowered_ops.insert(insertion_pos, expr); } } @@ -54,8 +47,8 @@ ExpressionPtr LinearIR::create_expression(const std::shared_ptr& n, const return ExpressionFactory::build(n, *this, model); } -ExpressionPtr LinearIR::create_expression(const std::shared_ptr& n, const std::vector inputs) { - return ExpressionFactory::build(n, *this, inputs); +ExpressionPtr LinearIR::create_expression(const std::shared_ptr& n, const std::vector& inputs) { + return ExpressionFactory::build(n, inputs); } ov::NodeVector LinearIR::get_ordered_ops(const std::shared_ptr& m) { @@ -105,15 +98,6 @@ LinearIR::container LinearIR::deep_copy_range(LinearIR::container::const_iterato return result; } -LinearIR LinearIR::deep_copy() const { - LinearIR result; - auto& result_ops = result.m_lowered_ops; - for (const auto& expr : deep_copy_range(m_lowered_ops.begin(), m_lowered_ops.end())) - result_ops.emplace_back(expr); - result.m_config = m_config; - return result; -} - void LinearIR::debug_print(bool tds_as_pointers) const { auto print_rinfo = [](const RegInfo& rinfo) { std::cerr << " : {"; @@ -165,25 +149,21 @@ void LinearIR::init_emitters(const std::shared_ptr& target) { } } -ExpressionPtr LinearIR::get_expr_by_node(const std::shared_ptr& n) const { +const ExpressionPtr& LinearIR::get_expr_by_node(const std::shared_ptr& n) const { auto found = m_node2expression_map.find(n); OPENVINO_ASSERT(found != m_node2expression_map.end(), "The node " + n->get_friendly_name() + " hasn't been found in Linear IR"); return found->second; } -void LinearIR::replace_input(std::set consumers, const TensorPtr& to) { +void LinearIR::replace_input(const std::set& consumers, const TensorPtr& to) { for (const auto& consumer_input : consumers) { replace_input(consumer_input, to); } } -void LinearIR::replace_input(const ExpressionPtr& expr, size_t port, const TensorPtr& to) { - replace_input(expr->get_input_port(port), to); -} - void LinearIR::replace_input(const ExpressionPort& expr_port, const TensorPtr& to) { const auto port = expr_port.get_index(); - const auto expr = expr_port.get_expr(); + const auto& expr = expr_port.get_expr(); OPENVINO_ASSERT(expr_port.get_type() == ExpressionPort::Type::Input, "Failed to replace: target input port must have Input type"); OPENVINO_ASSERT(expr_port.get_index() < expr->get_input_count(), "Failed to replace: target input port must be less than input count!"); @@ -196,17 +176,13 @@ void LinearIR::replace_input(const ExpressionPort& expr_port, const TensorPtr& t to->add_consumer(expr_port); } from->remove_consumer(expr_port); - expr->replace_input(port, std::move(to)); + expr->replace_input(port, to); } -void LinearIR::register_regular_expression(const ExpressionPtr& expr) { - if (is_type(expr->get_node()) || is_type(expr->get_node())) - OPENVINO_THROW("LinearIR::insert can't be used to add Parameters or Results to IR"); - register_expression(expr); -} - -void LinearIR::register_expression(const ExpressionPtr& expr) { +void LinearIR::register_expression(const ExpressionPtr& expr, bool io_allowed) { const auto& node = expr->get_node(); + if (!io_allowed && (is_type(node) || is_type(node))) + OPENVINO_THROW("LinearIR::insert can't be used to add Parameters or Results to IR"); { const auto& res = m_node2expression_map.insert({node, expr}); if (!res.second) @@ -224,12 +200,12 @@ void LinearIR::unregister_expression(const ExpressionPtr& expr) { } LinearIR::exprIt LinearIR::insert(constExprIt pos, container::value_type&& value) { - register_regular_expression(value); + register_expression(value); return m_lowered_ops.insert(pos, value); } LinearIR::exprIt LinearIR::insert(constExprIt pos, const container::value_type& value) { - register_regular_expression(value); + register_expression(value); return m_lowered_ops.insert(pos, value); } @@ -241,7 +217,7 @@ LinearIR::exprIt LinearIR::insert(constExprIt pos, exprIt begin, exprIt end) { LinearIR::exprIt LinearIR::insert(constExprIt pos, constExprIt begin, constExprIt end) { for (auto b = begin; b != end; b++) - register_regular_expression(*b); + register_expression(*b); return m_lowered_ops.insert(pos, begin, end); } @@ -249,7 +225,7 @@ LinearIR::exprIt LinearIR::insert(LinearIR::constExprIt pos, const NodeVector& n auto ret = m_lowered_ops.end(); for (const auto& n : nodes) { const auto& expr = create_expression(n); - register_regular_expression(expr); + register_expression(expr); ret = m_lowered_ops.insert(pos, expr); } // Need to return iterator to the first of the inserted values @@ -258,7 +234,7 @@ LinearIR::exprIt LinearIR::insert(LinearIR::constExprIt pos, const NodeVector& n LinearIR::exprIt LinearIR::insert(LinearIR::constExprIt pos, const std::shared_ptr& n) { const auto& expr = create_expression(n); - register_regular_expression(expr); + register_expression(expr); return m_lowered_ops.insert(pos, expr); } diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp index 54d6e48c78250a..225d6e4b129150 100644 --- a/src/common/snippets/src/lowered/loop_manager.cpp +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -55,7 +55,7 @@ void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir, size_t loop_id) { OPENVINO_ASSERT(!entries.empty(), "Loop must have entry points"); OPENVINO_ASSERT(!exits.empty(), "Loop must have entry points"); - const auto entry_expr = entries.front().get_expr(); + const auto& entry_expr = entries.front().get_expr(); loop_begin_pos = std::find(linear_ir.begin(), linear_ir.end(), entry_expr); OPENVINO_ASSERT(loop_begin_pos != linear_ir.end(), "Loop begin hasn't been found!"); @@ -81,25 +81,21 @@ void LinearIR::LoopManager::get_io_loop_ports(LinearIR::constExprIt loop_begin_p exits.clear(); for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { const auto& expr = *expr_it; - const auto inputs = expr->get_input_tensors(); - const auto outputs = expr->get_output_tensors(); - - for (size_t in_port = 0; in_port < inputs.size(); ++in_port) { - const auto in_td = inputs[in_port]; - const auto parent_expr = in_td->get_source().get_expr(); + for (size_t i = 0; i < expr->get_input_count(); ++i) { + const auto in_port = expr->get_input_port(i); + const auto& parent_expr = in_port.get_connected_ports().begin()->get_expr(); if (!ov::is_type(parent_expr->get_node()) && std::find(loop_begin_pos, expr_it, parent_expr) == expr_it) { - entries.push_back(expr->get_input_port(in_port)); + entries.push_back(in_port); } } - - for (size_t out_port = 0; out_port < outputs.size(); ++out_port) { - const auto out_td = outputs[out_port]; - const auto consumer_ports = out_td->get_consumers(); + for (size_t i = 0; i < expr->get_output_count(); ++i) { + const auto out_port = expr->get_output_port(i); + const auto consumer_ports = out_port.get_connected_ports(); for (const auto& consumer : consumer_ports) { - const auto consumer_expr = consumer.get_expr(); + const auto& consumer_expr = consumer.get_expr(); if (std::find(expr_it, loop_end_pos, consumer_expr) == loop_end_pos) { - exits.push_back(expr->get_output_port(out_port)); + exits.push_back(out_port); break; } } @@ -113,7 +109,7 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, std::vector loop_entry_points, loop_exit_points; LoopManager::get_io_loop_ports(loop_begin_pos, loop_end_pos, loop_entry_points, loop_exit_points); - auto broadcast = [](std::vector &lhs, const std::vector &rhs, size_t index) -> void { + auto broadcast = [](std::vector& lhs, const std::vector& rhs, size_t index) -> void { if (rhs == lhs) return; const auto lhs_size = lhs.size(); @@ -135,7 +131,7 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, std::vector loop_subtensor; std::vector loop_tensor(loop_depth, 1); for (const auto& exit_point : loop_exit_points) { - const auto tensor = utils::get_reordered_shape(exit_point.get_tensor(), exit_point.get_layout()); + const auto tensor = utils::get_reordered_planar_shape(ov::PartialShape(exit_point.get_shape()), exit_point.get_layout()).get_shape(); auto subtensor = exit_point.get_subtensor(); if (subtensor.empty()) { subtensor.resize(loop_depth, 1); diff --git a/src/common/snippets/src/lowered/pass/allocate_buffers.cpp b/src/common/snippets/src/lowered/pass/allocate_buffers.cpp index 97d5d748a5be19..a22c8e19549634 100644 --- a/src/common/snippets/src/lowered/pass/allocate_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/allocate_buffers.cpp @@ -61,7 +61,8 @@ bool AllocateBuffers::run(LinearIR& linear_ir) { bool modified = false; size_t offset = 0; for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) { - if (auto buffer = as_type_ptr(expr_it->get()->get_node())) { + const auto& expr = *expr_it; + if (auto buffer = as_type_ptr(expr->get_node())) { const auto buffer_size = buffer->get_byte_size(); // If it's the first buffer, offsets are zero => nothing to propagate, can continue if (m_buffer_scratchpad_size == 0) { @@ -70,7 +71,7 @@ bool AllocateBuffers::run(LinearIR& linear_ir) { } if (buffer->is_intermediate_memory()) { - const auto& parent_expr = expr_it->get()->get_input_tensor(0)->get_source().get_expr(); + const auto& parent_expr = expr->get_input_tensor(0)->get_source().get_expr(); const auto& parent_node = parent_expr->get_node(); // Full MemoryAccess ops need new memory. Previous logic is to check for parent isn't Loop // TODO: It should be unified in MemoryManager with memory reuse in the near future diff --git a/src/common/snippets/src/lowered/pass/assign_registers.cpp b/src/common/snippets/src/lowered/pass/assign_registers.cpp index 04b671fbe03a72..92633245e1b036 100644 --- a/src/common/snippets/src/lowered/pass/assign_registers.cpp +++ b/src/common/snippets/src/lowered/pass/assign_registers.cpp @@ -20,7 +20,7 @@ bool AssignRegisters::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::AssignRegisters") using Reg = size_t; using tensor = TensorPtr; - auto& expressions = linear_ir.get_ops(); + const auto& expressions = linear_ir.get_ops(); std::vector> typed_ops; NodeVector ops; @@ -65,18 +65,18 @@ bool AssignRegisters::run(LinearIR& linear_ir) { // Only in SoftmaxDecomposition ReduceMax and ReduceSum use HorizonMax/HorizonSum and VectorBuffer. // We should manually set the one vector register for VectorBuffer and Max/Sum output to simulate a accumulator // TODO [96351]: We should rewrite accumulator pattern using another way - const auto input_td = expr->get_input_tensor(0); - const auto& input_expr = input_td->get_source().get_expr(); - const auto& input_expr_input_tds = input_expr->get_input_tensors(); - for (const auto& td : input_expr_input_tds) { - if (ov::is_type(td->get_source().get_expr()->get_node())) { - manually_assigned_vecs[td] = static_cast(accumulator_reg); + const auto& input_tensor = expr->get_input_tensor(0); + const auto& input_expr = input_tensor->get_source().get_expr(); + const auto& input_expr_input_tensors = input_expr->get_input_tensors(); + for (const auto& tensor : input_expr_input_tensors) { + if (ov::is_type(tensor->get_source().get_expr()->get_node())) { + manually_assigned_vecs[tensor] = static_cast(accumulator_reg); } } - const auto output_td = expr->get_output_tensor(0); - manually_assigned_vecs[input_td] = static_cast(accumulator_reg); - manually_assigned_vecs[output_td] = static_cast(accumulator_reg); - for (const auto& child_expr_input : output_td->get_consumers()) { + const auto& output_tensor = expr->get_output_tensor(0); + manually_assigned_vecs[input_tensor] = static_cast(accumulator_reg); + manually_assigned_vecs[output_tensor] = static_cast(accumulator_reg); + for (const auto& child_expr_input : output_tensor->get_consumers()) { if (ov::is_type(child_expr_input.get_expr()->get_node())) { manually_assigned_vecs[child_expr_input.get_expr()->get_output_tensor(0)] = static_cast(accumulator_reg); @@ -86,7 +86,7 @@ bool AssignRegisters::run(LinearIR& linear_ir) { // TODO: Fix via common pipeline using LoopEnd: // All operations `outside loop` after Horizon ops should have the same register to avoid using it in the next Loop const auto current_loops_ids = expr->get_loop_ids(); - auto next_expr = output_td->get_consumers().begin()->get_expr(); + auto next_expr = output_tensor->get_consumers().begin()->get_expr(); while (next_expr->get_loop_ids() == current_loops_ids) { manually_assigned_vecs[next_expr->get_output_tensor(0)] = static_cast(accumulator_reg); @@ -103,11 +103,11 @@ bool AssignRegisters::run(LinearIR& linear_ir) { decltype(regs_vec)& reg_map, const std::map& manually_assigned_regs, size_t& counter) { - for (const auto& out_td : expr->get_output_tensors()) { + for (const auto& out_tensor : expr->get_output_tensors()) { // Note that some ops might have identical input&output tensors (Result and Tile* for ex.) // so we have to check that the tensor has not been enumerated already - if (reg_map.count(out_td) == 0) { - reg_map[out_td] = manually_assigned_regs.count(out_td) == 0 ? counter++ : IS_MANUALLY_ALLOCATED_REG; + if (reg_map.count(out_tensor) == 0) { + reg_map[out_tensor] = manually_assigned_regs.count(out_tensor) == 0 ? counter++ : IS_MANUALLY_ALLOCATED_REG; } } }; diff --git a/src/common/snippets/src/lowered/pass/fuse_loops.cpp b/src/common/snippets/src/lowered/pass/fuse_loops.cpp index e2b1c99f60ff1b..f70e33e68ab23f 100644 --- a/src/common/snippets/src/lowered/pass/fuse_loops.cpp +++ b/src/common/snippets/src/lowered/pass/fuse_loops.cpp @@ -33,21 +33,21 @@ void FuseLoops::fuse_points(std::vector& exit_points, std::vecto LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos) { std::vector new_exit_points; for (const auto& exit_point : exit_points) { - const auto consumers_inputs = exit_point.get_tensor_ptr()->get_consumers(); + const auto consumers_inputs = exit_point.get_connected_ports(); - std::vector mapped_entry_points; - std::vector outside_consumers; + std::set mapped_entry_points; + std::set outside_consumers; for (const auto& consumer_input : consumers_inputs) { const auto entry_point_it = std::find(entry_points.begin(), entry_points.end(), consumer_input); if (entry_point_it != entry_points.end()) { - mapped_entry_points.push_back(*entry_point_it); + mapped_entry_points.insert(*entry_point_it); continue; } - const auto consumer = consumer_input.get_expr(); + const auto& consumer = consumer_input.get_expr(); const auto inside_it = std::find(loop_begin_pos, loop_end_pos, consumer); if (inside_it == loop_end_pos) { - outside_consumers.push_back(consumer); + outside_consumers.insert(consumer); } } @@ -67,8 +67,7 @@ void FuseLoops::fuse_points(std::vector& exit_points, std::vecto exit_points = new_exit_points; } -bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, - const ExpressionPort& current_entry_point, const ExpressionPort& target_exit_point, +bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, const ExpressionPort& current_entry_point, size_t current_loop_id, size_t target_loop_id, size_t dim_idx, LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos) { const auto& loop_current = loop_manager->get_loop_info(current_loop_id); @@ -84,9 +83,9 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo bool is_fusion_allowed = true; for (size_t i = 0; i < loop_target->exit_exprs.size() && is_fusion_allowed; ++i) { const auto target_exit_point = loop_target->exit_exprs[i]; - const auto consumer_inputs = target_exit_point.get_tensor_ptr()->get_consumers(); + const auto consumer_inputs = target_exit_point.get_connected_ports(); for (const auto& consumer_input : consumer_inputs) { - const auto consumer = consumer_input.get_expr(); + const auto& consumer = consumer_input.get_expr(); if (ov::is_type(consumer->get_node()) || consumer == current_entry_point.get_expr()) continue; // The fusing is only valid if target Loop consumer (the Consumer is outside of target Loop) @@ -138,8 +137,7 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo return true; } -bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, - const ExpressionPort& current_exit_point, const ExpressionPort& target_entry_point, +bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, const ExpressionPort& current_exit_point, size_t current_loop_id, size_t target_loop_id, size_t dim_idx, LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos) { const auto& loop_current = loop_manager->get_loop_info(current_loop_id); @@ -152,8 +150,8 @@ bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::Loo bool is_fusion_allowed = true; for (size_t i = 0; i < loop_target->entry_exprs.size() && is_fusion_allowed; ++i) { const auto target_entry_point = loop_target->entry_exprs[i]; - const auto parent_expr_output = target_entry_point.get_tensor_ptr()->get_source(); - const auto parent_expr = parent_expr_output.get_expr(); + const auto parent_expr_output = *target_entry_point.get_connected_ports().begin(); + const auto& parent_expr = parent_expr_output.get_expr(); if (ov::is_type(parent_expr->get_node()) || parent_expr == current_exit_point.get_expr()) continue; is_fusion_allowed = parent_expr->get_loop_ids()[dim_idx] == current_loop_id || // The parent expr is from the same current Loop @@ -257,9 +255,8 @@ bool FuseLoops::run(LinearIR& linear_ir) { bool was_fusion_up = false; for (size_t in_port = 0; in_port < entry_points.size() && !was_fusion_up; ++in_port) { const auto entry_point = entry_points[in_port]; - const auto parent_expr_output = entry_point.get_tensor_ptr()->get_source(); - const auto parent_expr = parent_expr_output.get_expr(); - const auto out_port = parent_expr_output.get_index(); + const auto parent_expr_output = *entry_point.get_connected_ports().begin(); + const auto& parent_expr = parent_expr_output.get_expr(); const auto parent = parent_expr->get_node(); if (ov::is_type(parent) || ov::is_type(parent) || @@ -274,10 +271,8 @@ bool FuseLoops::run(LinearIR& linear_ir) { "Loops cannot have parents of entry points with the same identifier"); if (loop_id_target == Expression::LOOP_NULL_ID) continue; - const auto loop_info_target = loop_manager->get_loop_info(loop_id_target); - const auto target_exit_port = parent_expr->get_output_port(out_port); - if (fuse_upper_into_current(linear_ir, loop_manager, entry_point, target_exit_port, loop_id, loop_id_target, + if (fuse_upper_into_current(linear_ir, loop_manager, entry_point, loop_id, loop_id_target, dim_idx, loop_begin_pos, loop_end_pos)) { was_fusion_up = true; loop_manager->remove_loop_info(loop_id_target); @@ -295,10 +290,9 @@ bool FuseLoops::run(LinearIR& linear_ir) { bool was_fusion_down = false; for (size_t out_port = 0; out_port < exit_points.size() && !was_fusion_down; ++out_port) { const auto exit_point = exit_points[out_port]; - const auto consumer_exprs_inputs = exit_point.get_tensor_ptr()->get_consumers(); + const auto consumer_exprs_inputs = exit_point.get_connected_ports(); for (const auto& consumer_expr_input : consumer_exprs_inputs) { - const auto consumer_expr = consumer_expr_input.get_expr(); - const auto in_port = consumer_expr_input.get_index(); + const auto& consumer_expr = consumer_expr_input.get_expr(); const auto consumer = consumer_expr->get_node(); if (ov::is_type(consumer) || ov::is_type(consumer)) { @@ -314,9 +308,7 @@ bool FuseLoops::run(LinearIR& linear_ir) { if (loop_id == loop_id_target || loop_id_target == Expression::LOOP_NULL_ID) continue; - const auto loop_info_target = loop_manager->get_loop_info(loop_id_target); - const auto target_entry_port = consumer_expr->get_input_port(in_port); - if (fuse_lower_into_current(linear_ir, loop_manager, exit_point, target_entry_port, loop_id, loop_id_target, + if (fuse_lower_into_current(linear_ir, loop_manager, exit_point, loop_id, loop_id_target, dim_idx, loop_begin_pos, loop_end_pos)) { was_fusion_down = true; loop_manager->remove_loop_info(loop_id_target); diff --git a/src/common/snippets/src/lowered/pass/indentify_buffers.cpp b/src/common/snippets/src/lowered/pass/indentify_buffers.cpp index a59315e30d29af..621ac31be7d101 100644 --- a/src/common/snippets/src/lowered/pass/indentify_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/indentify_buffers.cpp @@ -55,12 +55,9 @@ std::vector IdentifyBuffers::create_adjacency_matrix(const LinearIR& linea for (size_t buffer_idx = 0; buffer_idx < buffers.size(); ++buffer_idx) { // Here intermediate Buffer const auto buffer_expr = buffers[buffer_idx]; - const auto buffer_input_tds = buffer_expr->get_input_tensors(); - OPENVINO_ASSERT(buffer_input_tds.size() == 1, "Intermediate Buffer must have one input"); const auto buffer = ov::as_type_ptr(buffer_expr->get_node()); - - const auto& buffer_td = buffer_input_tds.front(); - const auto buffer_siblings = buffer_td->get_consumers(); + const auto& buffer_tensor = buffer_expr->get_input_tensor(0); + const auto buffer_siblings = buffer_tensor->get_consumers(); for (const auto& buffer_sibling : buffer_siblings) { const auto& sibling_expr = buffer_sibling.get_expr(); // Skip myself @@ -72,11 +69,11 @@ std::vector IdentifyBuffers::create_adjacency_matrix(const LinearIR& linea const auto output_count = loop_end->get_output_num(); const auto& ptr_increments = loop_end->get_ptr_increments(); const auto& io_data_sizes = loop_end->get_element_type_sizes(); - const auto buffer_loop_port = std::distance(loop_tds.begin(), std::find(loop_tds.begin(), loop_tds.end(), buffer_td)); + const auto buffer_loop_port = std::distance(loop_tds.begin(), std::find(loop_tds.begin(), loop_tds.end(), buffer_tensor)); // Verify Buffers on Loop inputs: for (size_t input_idx = 0; input_idx < input_count; ++input_idx) { - const auto loop_in = loop_tds[input_idx]->get_source().get_expr(); + const auto& loop_in = loop_tds[input_idx]->get_source().get_expr(); if (const auto& neighbour_buffer = is_intermediate_buffer(loop_in->get_node())) { const auto neighbour_buffer_loop_port = input_idx; update_adj_matrix(buffer, buffer_idx, neighbour_buffer, @@ -88,7 +85,7 @@ std::vector IdentifyBuffers::create_adjacency_matrix(const LinearIR& linea // Verify Buffers on Loop outputs for (size_t output_idx = 0; output_idx < output_count; ++output_idx) { // Skip the current Buffer - if (buffer_td == loop_tds[input_count + output_idx]) + if (buffer_tensor == loop_tds[input_count + output_idx]) continue; const auto consumer_inputs = loop_tds[input_count + output_idx]->get_consumers(); diff --git a/src/common/snippets/src/lowered/pass/init_loops.cpp b/src/common/snippets/src/lowered/pass/init_loops.cpp index fa9e78f5a9ad55..3a42178c53b4ce 100644 --- a/src/common/snippets/src/lowered/pass/init_loops.cpp +++ b/src/common/snippets/src/lowered/pass/init_loops.cpp @@ -29,7 +29,7 @@ void filter_ports(LinearIR& linear_ir, const auto node = expr->get_node(); const auto ma = ov::as_type_ptr(node); if (ma && ma->is_memory_access_input_port(port)) { - const auto& parent_expr = expr->get_input_tensor(port)->get_source().get_expr(); + const auto& parent_expr = loop_entry_point.get_connected_ports().begin()->get_expr(); const auto& parent = parent_expr->get_node(); // Todo: Sometimes several Load in one Loop read data from the same Node if (loop_parents.find(parent) == loop_parents.end()) { @@ -71,20 +71,20 @@ std::vector InitLoops::init_ptr_increments(const std::vector InitLoops::init_ptr_increments(const std::vector& loop_entries, const std::vector& loop_exits) { for (const auto& entry_point : loop_entries) { - const auto expr = entry_point.get_expr(); + const auto& expr = entry_point.get_expr(); const auto port = entry_point.get_index(); const auto node = expr->get_node(); - const auto input_td = expr->get_input_tensor(port); - const auto parent_expr_output = input_td->get_source(); + const auto& input_tensor = expr->get_input_tensor(port); + const auto& parent_expr_output = input_tensor->get_source(); const auto& parent_expr = parent_expr_output.get_expr(); const auto parent_port = parent_expr_output.get_index(); const auto parent = parent_expr->get_node(); @@ -103,25 +103,23 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt // Need to insert between 2nd and 4th Loops - after 2nd Loop const auto pos = insertion_position(linear_ir, loop_manager, parent_expr, expr); const auto buffer = std::make_shared(parent->output(parent_port), m_buffer_allocation_rank); - PortManager::set_port_descriptor_ptr(buffer->output(0), std::make_shared(input_td->get_tensor(), - input_td->get_subtensor(), - input_td->get_layout())); - // Output td is automatically filled from PortDescriptor - const auto buffer_expr = linear_ir.create_expression(buffer, {input_td}); + PortManager::set_port_descriptor_ptr(buffer->output(0), parent_expr_output.get_descriptor_ptr()->clone()); + // Output tensor is automatically filled from PortDescriptor + const auto buffer_expr = linear_ir.create_expression(buffer, {input_tensor}); linear_ir.insert(pos, buffer_expr); - linear_ir.replace_input(expr, port, buffer_expr->get_output_tensor(0)); + linear_ir.replace_input(entry_point, buffer_expr->get_output_tensor(0)); } } for (const auto& exit_point : loop_exits) { - const auto expr = exit_point.get_expr(); + const auto& expr = exit_point.get_expr(); const auto port = exit_point.get_index(); const auto node = expr->get_node(); - const auto output_td = expr->get_output_tensor(port); - const auto child_exprs_inputs = output_td->get_consumers(); + const auto output_tensor = exit_point.get_tensor_ptr(); + const auto child_exprs_inputs = output_tensor->get_consumers(); const auto current_loops = expr->get_loop_ids(); const auto current_loop_count = current_loops.size(); - const std::vector node_outs = {output_td}; + const std::vector node_outs = {output_tensor}; std::set potential_consumers; std::set buffers; @@ -165,7 +163,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt for (const auto& buffer : buffers) { const auto& buffer_out = buffer->get_output_tensor(0); const auto buffer_consumers_inputs = buffer_out->get_consumers(); - linear_ir.replace_input(buffer_consumers_inputs, output_td); + linear_ir.replace_input(buffer_consumers_inputs, output_tensor); potential_consumers.insert(buffer_consumers_inputs.begin(), buffer_consumers_inputs.end()); linear_ir.erase(std::find(linear_ir.begin(), linear_ir.end(), buffer)); } @@ -180,9 +178,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt const auto pos = insertion_position(linear_ir, loop_manager, expr, (*potential_consumers.begin()).get_expr()); auto buffer = std::make_shared(node->output(port), m_buffer_allocation_rank); - PortManager::set_port_descriptor_ptr(buffer->output(0), std::make_shared(output_td->get_tensor(), - output_td->get_subtensor(), - output_td->get_layout())); + PortManager::set_port_descriptor_ptr(buffer->output(0), exit_point.get_descriptor_ptr()->clone()); // We cannot insert Node output tensor on Buffer output because not all consumers of Node needs Buffer // Example: // Add @@ -190,7 +186,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt // Result Buffer // | <- It should be new TD // Relu - // Output td is automatically filled from PortDescriptor + // Output tensor is automatically filled from PortDescriptor const auto buffer_expr = linear_ir.create_expression(buffer, node_outs); linear_ir.insert(pos, buffer_expr); linear_ir.replace_input(potential_consumers, buffer_expr->get_output_tensor(0)); diff --git a/src/common/snippets/src/lowered/pass/insert_load_store.cpp b/src/common/snippets/src/lowered/pass/insert_load_store.cpp index eeb1bccf118781..c4931dfc1ad01a 100644 --- a/src/common/snippets/src/lowered/pass/insert_load_store.cpp +++ b/src/common/snippets/src/lowered/pass/insert_load_store.cpp @@ -54,8 +54,8 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr const auto& loop_manager = linear_ir.get_loop_manager(); const auto& data_expr = *data_expr_it; const auto& data_node = data_expr->get_node(); - const auto& output_td = data_expr->get_output_tensor(0); - const auto consumer_inputs = output_td->get_consumers(); + const auto& output_tensor = data_expr->get_output_tensor(0); + const auto consumer_inputs = output_tensor->get_consumers(); bool was_inserted = false; for (const auto& consumer_input : consumer_inputs) { @@ -72,12 +72,10 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr OPENVINO_ASSERT(inner_loop != Expression::LOOP_NULL_ID, "Loop hasn't been found!"); const auto load = std::make_shared(data_node->output(0), m_vector_size); - PortManager::set_port_descriptor_ptr(load->output(0), std::make_shared(output_td->get_tensor(), - output_td->get_subtensor(), - output_td->get_layout())); - const auto load_expr = linear_ir.create_expression(load, {output_td}); + PortManager::set_port_descriptor_ptr(load->output(0), consumer_input.get_descriptor_ptr()->clone()); + const auto load_expr = linear_ir.create_expression(load, {output_tensor}); linear_ir.insert(std::find(data_expr_it, linear_ir.cend(), consumer_expr), load_expr); - linear_ir.replace_input(consumer_expr, port, load_expr->get_output_tensor(0)); + linear_ir.replace_input(consumer_input, load_expr->get_output_tensor(0)); // Copy Loop identifies load_expr->set_loop_ids(loop_ids); @@ -94,8 +92,8 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr bool InsertLoadStore::insert_store(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it) { const auto& loop_manager = linear_ir.get_loop_manager(); const auto& data_expr = *data_expr_it; - const auto& input_td = data_expr->get_input_tensor(0); - const auto parent_output = input_td->get_source(); + const auto& input_tensor = data_expr->get_input_tensor(0); + const auto& parent_output = input_tensor->get_source(); const auto& parent_expr = parent_output.get_expr(); const auto port = parent_output.get_index(); const auto& parent = parent_expr->get_node(); @@ -109,14 +107,12 @@ bool InsertLoadStore::insert_store(LinearIR& linear_ir, const LinearIR::constExp OPENVINO_ASSERT(inner_loop != Expression::LOOP_NULL_ID, "Loop hasn't been found!"); const auto store = std::make_shared(parent->output(port), m_vector_size); - PortManager::set_port_descriptor_ptr(store->output(0), std::make_shared(input_td->get_tensor(), - input_td->get_subtensor(), - input_td->get_layout())); - const auto store_expr = linear_ir.create_expression(store, {input_td}); + PortManager::set_port_descriptor_ptr(store->output(0), parent_output.get_descriptor_ptr()->clone()); + const auto store_expr = linear_ir.create_expression(store, {input_tensor}); const auto& reverse_insertion_pos = std::find(std::reverse_iterator(data_expr_it), linear_ir.crend(), parent_expr); const auto& insertion_pos = reverse_insertion_pos.base(); linear_ir.insert(insertion_pos, store_expr); - linear_ir.replace_input(data_expr, 0, store_expr->get_output_tensor(0)); + linear_ir.replace_input(data_expr->get_input_port(0), store_expr->get_output_tensor(0)); // Copy Loop identifies store_expr->set_loop_ids(loop_ids); @@ -124,7 +120,7 @@ bool InsertLoadStore::insert_store(LinearIR& linear_ir, const LinearIR::constExp const auto prev_exit_point = parent_output; // The previous exit point byt one output port can have several consumers that can be potential exit points // So we should verify on the possible future exit points - const auto consumer_inputs = input_td->get_consumers(); + const auto consumer_inputs = input_tensor->get_consumers(); const auto should_be_saved = std::any_of(consumer_inputs.begin(), consumer_inputs.end(), [](const ExpressionPort& input_port) { const auto& node = input_port.get_expr()->get_node(); diff --git a/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp b/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp index 74bbf109d44bf8..cfdc9ab8ae66eb 100644 --- a/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp +++ b/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp @@ -41,10 +41,10 @@ void InsertTailLoop::tail_transformations(LinearIR& linear_ir, ov::is_type(op))) { for (size_t i = 0; i < op->inputs().size(); ++i) { if (auto fill = insertFill(op->input(i))) { - std::vector inputs{expr_it->get()->get_input_tensor(i)}; - const auto& consumers = inputs.front()->get_consumers(); + const auto& input = expr_it->get()->get_input_tensor(i); + const auto consumers = input->get_consumers(); // Note: inputs == outputs, since we want to modify vector reg inplace - auto fill_expr = linear_ir.create_expression(fill, inputs); + auto fill_expr = linear_ir.create_expression(fill, {input}); linear_ir.insert(expr_it, fill_expr); linear_ir.replace_input(consumers, fill_expr->get_output_tensor(0)); auto reg = expr_it->get()->get_reg_info().first[i]; @@ -98,7 +98,7 @@ bool InsertTailLoop::run(LinearIR& linear_ir) { }; auto is_loop_with_buffers = [&linear_ir](const std::shared_ptr& loop_end) { auto is_buffer_input = [&linear_ir](const TensorPtr& input) { - const auto parent_expr = input->get_source().get_expr(); + const auto& parent_expr = input->get_source().get_expr(); return ov::is_type(parent_expr->get_node()); }; auto is_buffer_output = [&linear_ir](const TensorPtr& output) { @@ -107,7 +107,7 @@ bool InsertTailLoop::run(LinearIR& linear_ir) { [](const ExpressionPort& lp) {return ov::is_type(lp.get_expr()->get_node());}); }; - const auto loop_end_expr = linear_ir.get_expr_by_node(loop_end); + const auto& loop_end_expr = linear_ir.get_expr_by_node(loop_end); const auto inputs = loop_end_expr->get_input_tensors(); const auto in_num = loop_end->get_input_num(); const auto out_num = loop_end->get_output_num(); diff --git a/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp b/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp index 0f65d9d1ff4c31..b9bcfce87f5394 100644 --- a/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp +++ b/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp @@ -19,18 +19,19 @@ bool LoadMoveBroadcastToBroadcastLoad::run(LinearIR& linear_ir) { bool modified = false; for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) { - const auto& op = (*expr_it)->get_node(); + const auto& expr = *expr_it; + const auto& op = expr->get_node(); // Match on MoveBroadcast because MoveBroadcast is rare node in bodies if (const auto move_broadcast = ov::as_type_ptr(op)) { - const auto& interm_td = (*expr_it)->get_input_tensor(0); - const auto parent_expr = interm_td->get_source().get_expr(); + const auto& interm_tensor = expr->get_input_tensor(0); + const auto parent_expr = interm_tensor->get_source().get_expr(); const auto load = ov::as_type_ptr(parent_expr->get_node()); if (!load) continue; // Cannot rewrite Broadcast + Load if load has more than 1 user // or more than one input, or if Broadcast has several inputs - const auto load_consumers_inputs = interm_td->get_consumers(); + const auto load_consumers_inputs = interm_tensor->get_consumers(); size_t count = 0; for (const auto& consumer_expr_input : load_consumers_inputs) { const auto consumer = consumer_expr_input.get_expr()->get_node(); @@ -43,11 +44,8 @@ bool LoadMoveBroadcastToBroadcastLoad::run(LinearIR& linear_ir) { const auto& outshape = move_broadcast->get_output_partial_shape(0); const auto broadcastload = std::make_shared(load->input_value(0), outshape, load->get_offset()); - const auto& move_out = (*expr_it)->get_output_tensor(0); - const auto move_consumers = move_out->get_consumers(); - PortManager::set_port_descriptor_ptr(broadcastload->output(0), std::make_shared(move_out->get_tensor(), - move_out->get_subtensor(), - move_out->get_layout())); + const auto move_consumers = expr->get_output_tensor(0)->get_consumers(); + PortManager::set_port_descriptor_ptr(broadcastload->output(0), expr->get_output_port(0).get_descriptor_ptr()->clone()); const auto broadcastload_expr = linear_ir.create_expression(broadcastload, { parent_expr->get_input_tensor(0) }); const auto mv_expr_it = expr_it; const auto insertion_pos = std::next(expr_it); diff --git a/src/common/snippets/src/lowered/pass/mark_loops.cpp b/src/common/snippets/src/lowered/pass/mark_loops.cpp index c9436b1c2b3318..1b13dbcdbbd4b3 100644 --- a/src/common/snippets/src/lowered/pass/mark_loops.cpp +++ b/src/common/snippets/src/lowered/pass/mark_loops.cpp @@ -33,11 +33,11 @@ bool MarkLoops::run(LinearIR& linear_ir) { }; auto are_conflicted = [](const ExpressionPort& lhs, const ExpressionPort& rhs) { - const auto& lhs_desc = lhs.get_port_descriptor(); - const auto& rhs_desc = rhs.get_port_descriptor(); + const auto& lhs_desc = lhs.get_descriptor_ptr(); + const auto& rhs_desc = rhs.get_descriptor_ptr(); return lhs_desc->get_subtensor() != rhs_desc->get_subtensor() || lhs_desc->get_layout() != rhs_desc->get_layout() || - lhs_desc->get_tensor() != rhs_desc->get_tensor(); + lhs_desc->get_shape() != rhs_desc->get_shape(); }; for (auto expr_it = linear_ir.cbegin(); expr_it != linear_ir.cend(); expr_it++) { @@ -65,18 +65,18 @@ bool MarkLoops::run(LinearIR& linear_ir) { break; // We finish Loop if - // - the next expr isn't real customer + // - the next expr isn't real consumer // - the is conflict between the corresponding ports bool is_connected = false; bool is_conflicted = false; for (size_t i = 0; i < prev_expr->get_output_count(); ++i) { - const auto& loop_td = prev_expr->get_output_tensor(i); - const auto consumers = loop_td->get_consumers(); + const auto& loop_tensor = prev_expr->get_output_tensor(i); + const auto consumers = loop_tensor->get_consumers(); const auto found = std::find_if(consumers.begin(), consumers.end(), [&loop_end_pos](const ExpressionPort& consumer) { return consumer.get_expr() == *loop_end_pos; }); if (found != consumers.end()) { - if (are_conflicted(*found, loop_td->get_source())) { + if (are_conflicted(*found, loop_tensor->get_source())) { is_conflicted = true; break; } diff --git a/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp b/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp index 6d287990e8ca26..c44cb6c6feb03f 100644 --- a/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp +++ b/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp @@ -31,8 +31,8 @@ bool MoveResultOutOfLoop::run(LinearIR& linear_ir) { continue; } - const auto& input_td = expr->get_input_tensor(0); - const auto parent_expr = input_td->get_source().get_expr(); + const auto& input_tensor = expr->get_input_tensor(0); + const auto& parent_expr = input_tensor->get_source().get_expr(); const auto parent_loop_ids = parent_expr->get_loop_ids(); int outer_loop_id = static_cast(parent_loop_ids.size()) - 1; for (; outer_loop_id >= 0; --outer_loop_id) { diff --git a/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp b/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp index bf9a15a784b023..88961847fe1ce6 100644 --- a/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp +++ b/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp @@ -25,8 +25,7 @@ bool MoveScalarToConsumer::run(LinearIR& linear_ir) { for (auto expr_it = linear_ir.rbegin(); expr_it != linear_ir.rend(); expr_it++) { const auto expr = expr_it->get(); if (ov::is_type(expr->get_node())) { - const auto& output = expr->get_output_tensor(0); - const auto consumers = output->get_consumers(); + const auto consumers = expr->get_output_tensor(0)->get_consumers(); OPENVINO_ASSERT(consumers.size() == 1, "Scalar expression is expected to have a single consumer"); const auto& consumer_expr = consumers.begin()->get_expr(); diff --git a/src/common/snippets/src/lowered/pass/propagate_layout.cpp b/src/common/snippets/src/lowered/pass/propagate_layout.cpp index 050c9b59be16df..7ce9c5a4fbbfaa 100644 --- a/src/common/snippets/src/lowered/pass/propagate_layout.cpp +++ b/src/common/snippets/src/lowered/pass/propagate_layout.cpp @@ -31,14 +31,14 @@ bool PropagateLayout::run(LinearIR& linear_ir) { OPENVINO_THROW("Parameter/Results should have exactly one output/input"); // If input - we should be looking downstream, if output - upstream - const auto& target_td = tds.front(); + const auto& target_tensor = tds.front(); if (is_input) { - const auto consumer_inputs = target_td->get_consumers(); + const auto consumer_inputs = target_tensor->get_consumers(); // Note that here we consider only the first child (which is usually load), // but often there is another child - LoopEnd std::set> child_layouts; for (const auto& child_input : consumer_inputs) { - const auto child = child_input.get_expr(); + const auto& child = child_input.get_expr(); const auto port = child_input.get_index(); const auto& n = child->get_node(); const auto ma = ov::as_type_ptr(n); @@ -49,23 +49,7 @@ bool PropagateLayout::run(LinearIR& linear_ir) { OPENVINO_ASSERT(child_layouts.size() == 1, "All children of an input expression must have the same layout"); io_expr->get_output_port(0).set_layout(*child_layouts.begin()); } else { - const auto consumer_inputs = target_td->get_consumers(); - // Note that here we consider only the first child (which is usually Store), - // but often there is another child - LoopEnd - ExpressionPort result_td; - for (const auto& child_input : consumer_inputs) { - const auto child = child_input.get_expr(); - if (ov::is_type(child->get_node())) { - continue; - } - if (child == io_expr) { - result_td = child_input; - continue; - } - OPENVINO_THROW("Result cannot have any siblings (only LoopEnd's)"); - } - - io_expr->get_input_port(0).set_layout(target_td->get_layout()); + io_expr->get_input_port(0).set_layout(target_tensor->get_layout()); } } diff --git a/src/common/snippets/src/lowered/pass/reset_buffers.cpp b/src/common/snippets/src/lowered/pass/reset_buffers.cpp index 350d9a49c69313..7da95d71b9079d 100644 --- a/src/common/snippets/src/lowered/pass/reset_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/reset_buffers.cpp @@ -25,7 +25,7 @@ bool ResetBuffers::reuse_buffer_increments(const LinearIR& linear_ir, const Expr std::set resetting_buffers; std::set buffers_ids; for (size_t i = 0; i < input_count; ++i) { - const auto parent_output = loop_tds[i]->get_source().get_expr(); + const auto& parent_output = loop_tds[i]->get_source().get_expr(); if (const auto buffer = ov::as_type_ptr(parent_output->get_node())) { // If Buffer is missed in set, Just save - it's first meeting if (buffers_ids.count(buffer->get_id()) == 0) { diff --git a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp index eef8066be357b4..0661101f43d026 100644 --- a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp +++ b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp @@ -36,9 +36,9 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { const auto softmax = pm.at(match_softmax); const auto softmax_expr = *expr_it; const auto softmax_loop_ids = softmax_expr->get_loop_ids(); - const auto& input_td = softmax_expr->get_input_tensor(0); - const auto& output_td = softmax_expr->get_output_tensor(0); - const auto tensor_out = output_td->get_tensor(); + const auto& input_tensor = softmax_expr->get_input_tensor(0); + const auto& output_tensor = softmax_expr->get_output_tensor(0); + const auto tensor_out = output_tensor->get_shape(); const auto inner_work_amount = *(tensor_out.rbegin()); expr_it = linear_ir.erase(expr_it); // Remove Softmax @@ -99,9 +99,9 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { const auto mul = push_node(std::make_shared(exp.second, broadcast_pow.second)); // Transfer original ExpressionPorts - linear_ir.replace_input(*max.first, 0, input_td); - linear_ir.replace_input(*sub.first, 0, input_td); - linear_ir.replace_input(output_td->get_consumers(), (*mul.first)->get_output_tensor(0)); + linear_ir.replace_input((*max.first)->get_input_port(0), input_tensor); + linear_ir.replace_input((*sub.first)->get_input_port(0), input_tensor); + linear_ir.replace_input(output_tensor->get_consumers(), (*mul.first)->get_output_tensor(0)); // Markup of Mul Loop loop_manager->mark_loop(mul.first, expr_it, 1, inner_work_amount, m_vector_size, diff --git a/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp b/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp index 3e60c42875ccf2..eb65d05bfc4357 100644 --- a/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp +++ b/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp @@ -23,10 +23,10 @@ bool SetScalarCountForLoadStore::run(LinearIR& linear_ir) { const auto load = ov::as_type_ptr(op); const auto store = ov::as_type_ptr(op); if (load || store) { - const auto& td = load ? (*expr_it)->get_input_tensor(0) - : (*expr_it)->get_output_tensor(0); - const auto& layout = td->get_layout(); - const auto& tensor_shape = td->get_tensor(); + const auto& tensor = load ? (*expr_it)->get_input_tensor(0) + : (*expr_it)->get_output_tensor(0); + const auto& layout = tensor->get_layout(); + const auto& tensor_shape = tensor->get_shape(); // Find last dimension by layout const auto last_dim_idx = std::find(layout.begin(), layout.end(), layout.size() - 1); OPENVINO_ASSERT(last_dim_idx != layout.end(), "Load/Store expression have incorrect layout"); diff --git a/src/common/snippets/src/lowered/tensor.cpp b/src/common/snippets/src/lowered/tensor.cpp index c35cfb0cf609da..866e58a49ee021 100644 --- a/src/common/snippets/src/lowered/tensor.cpp +++ b/src/common/snippets/src/lowered/tensor.cpp @@ -12,8 +12,8 @@ namespace ngraph { namespace snippets { namespace lowered { -Tensor::Tensor(const ExpressionPort& source_descriptor, const std::set& consumer_descriptors) - : m_source_port(source_descriptor), m_consumer_ports(consumer_descriptors) {} +Tensor::Tensor(ExpressionPort source_descriptor, const std::set& consumer_descriptors) + : m_source_port(std::move(source_descriptor)), m_consumer_ports(consumer_descriptors) {} std::set::const_iterator Tensor::find_consumer(const ExpressionPort& consumer) const { // Note: Find by shared ptr and index port is enough since these parameters must be unique diff --git a/src/common/snippets/src/op/brgemm.cpp b/src/common/snippets/src/op/brgemm.cpp index 5e627e63f62251..7c2a763ab69862 100644 --- a/src/common/snippets/src/op/brgemm.cpp +++ b/src/common/snippets/src/op/brgemm.cpp @@ -18,10 +18,10 @@ Brgemm::Brgemm(const Output& A, const Output& B, set_input_offset(offset_a, 0); set_input_offset(offset_b, 1); set_output_offset(offset_c, 0); - constructor_validate_and_infer_types(); + custom_constructor_validate_and_infer_types(); } -void Brgemm::constructor_validate_and_infer_types() { +void Brgemm::custom_constructor_validate_and_infer_types() { INTERNAL_OP_SCOPE(BrgemmCPU_constructor_validate_and_infer_types); validate_inputs(); diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index d0b1821c6b0b05..7e1e38d4bc6323 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -16,7 +16,7 @@ #include "snippets/pass/transform_convert.hpp" #include "snippets/pass/matmul_to_brgemm.hpp" #include "snippets/pass/fuse_transpose_brgemm.hpp" -#include "snippets/pass/schedule_softmax.hpp" +#include "snippets/pass/set_softmax_ports.hpp" #include "snippets/utils.hpp" #include "snippets/port_descriptor.hpp" @@ -463,7 +463,7 @@ void snippets::op::Subgraph::convert_to_snippet_dialect() { manager.register_pass(); manager.register_pass(); manager.register_pass(); - manager.register_pass(); + manager.register_pass(); } manager.register_pass(); manager.register_pass(); diff --git a/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp b/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp index ad5de3d5b1e264..348b21f97e6af7 100644 --- a/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp +++ b/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp @@ -25,8 +25,10 @@ bool FuseTransposeBrgemm::is_supported_transpose(const Output& transpose_p // it's safe to do so because of the patterns we used. alternatively we can do it through pattern_values_map const auto& constant = as_type_ptr(transpose_node->get_input_node_shared_ptr(1)); // if Transpose in and out layout is not empty => something was already fused on this port - if (!utils::get_node_output_layout(transpose_node).empty() || - !utils::get_node_output_layout(transpose_node->get_input_node_shared_ptr(0)).empty()) + auto default_layout = std::vector(transpose_port.get_shape().size()); + std::iota(default_layout.begin(), default_layout.end(), 0);// NCHW layout by default + if (PortManager::get_port_descriptor_ptr(transpose_port)->get_layout() != default_layout || + PortManager::get_port_descriptor_ptr(transpose_node->input_value(0))->get_layout() != default_layout) return false; const auto& transpose_order = constant->cast_vector(); // todo: this limitation is due to the fact that offsets are calculated in Kernel, and the only way @@ -65,7 +67,7 @@ FuseTransposeBrgemm::FuseTransposeBrgemm() { const auto& transpose_out = m.get_match_value(); const auto& const_order = ov::as_type_ptr(transpose_out.get_node_shared_ptr()->get_input_node_shared_ptr(1)); const auto& original_port = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm_out); - original_port->set_tensor(transpose_out.get_shape()); + original_port->set_shape(transpose_out.get_shape()); original_port->set_layout(const_order->cast_vector()); for (const auto& in : transpose_out.get_target_inputs()) in.replace_source_output(brgemm->output(0)); @@ -79,7 +81,7 @@ FuseTransposeBrgemm::FuseTransposeBrgemm() { const auto& const_order = ov::as_type_ptr(transpose->get_input_node_shared_ptr(1)); brgemm->set_argument(i, transpose->input_value(0)); const auto& original_port = ngraph::snippets::PortManager::get_port_descriptor_ptr(in); - original_port->set_tensor(transpose->get_input_shape(0)); + original_port->set_shape(transpose->get_input_shape(0)); original_port->set_layout(const_order->cast_vector()); // At the moment we support fused Transpose only after Parameter -> we can update port descriptor for Parameter as well. // Note: It's needed for BrgemmCPU diff --git a/src/common/snippets/src/pass/schedule_softmax.cpp b/src/common/snippets/src/pass/set_softmax_ports.cpp similarity index 91% rename from src/common/snippets/src/pass/schedule_softmax.cpp rename to src/common/snippets/src/pass/set_softmax_ports.cpp index 1e4d4ac50d38f0..a79e53f137d04a 100644 --- a/src/common/snippets/src/pass/schedule_softmax.cpp +++ b/src/common/snippets/src/pass/set_softmax_ports.cpp @@ -2,10 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - -#include "snippets/pass/schedule_softmax.hpp" +#include "snippets/pass/set_softmax_ports.hpp" +#include #include "snippets/port_descriptor.hpp" #include "ngraph/op/softmax.hpp" @@ -15,15 +14,15 @@ using namespace ngraph; -ngraph::snippets::pass::ScheduleSoftmax::ScheduleSoftmax() { - MATCHER_SCOPE(ScheduleSoftmax); +ngraph::snippets::pass::SetSoftmaxPorts::SetSoftmaxPorts() { + MATCHER_SCOPE(SetSoftmaxPorts); auto m_softmax_v1 = ngraph::pattern::wrap_type(); auto m_softmax_v8 = ngraph::pattern::wrap_type(); auto m_softmax = std::make_shared(OutputVector{m_softmax_v1, m_softmax_v8}); auto callback = [](ngraph::pattern::Matcher &m) { - OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::ScheduleSoftmax") + OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::SetSoftmaxPorts") auto root = m.get_match_root(); const auto& pshape = root->get_input_partial_shape(0); diff --git a/src/common/snippets/src/port_descriptor.cpp b/src/common/snippets/src/port_descriptor.cpp index 241f26243ff683..b025a79daf2358 100644 --- a/src/common/snippets/src/port_descriptor.cpp +++ b/src/common/snippets/src/port_descriptor.cpp @@ -36,23 +36,8 @@ void PortDescriptor::validate_arguments() { } } -PortDescriptor PortDescriptor::deserialize(const std::string& serialized_info) { - std::stringstream sinfo(serialized_info); - auto read_values = [](std::stringstream& ss){ - size_t num = 0; - ss >> num; - std::vector res; - for (size_t i = 0; i < num; i++) { - size_t val; - ss >> val; - res.push_back(val); - } - return res; - }; - const auto& tensor_shape = read_values(sinfo); - const auto& subtensor_shape = read_values(sinfo); - const auto& layout = read_values(sinfo); - return {tensor_shape, subtensor_shape, layout}; +PortDescriptorPtr PortDescriptor::clone() const { + return std::make_shared(m_tensor_shape, m_subtensor_shape, m_layout); } std::string PortDescriptor::serialize() const { diff --git a/src/common/snippets/src/utils.cpp b/src/common/snippets/src/utils.cpp index efc4ec0bb67d8c..ca6c4e4dd3f182 100644 --- a/src/common/snippets/src/utils.cpp +++ b/src/common/snippets/src/utils.cpp @@ -67,27 +67,6 @@ auto get_non_scalar_constant_count_for_fq(const std::shared_ptr get_node_output_layout(const std::shared_ptr& node) { - return get_node_output_layout(node.get()); -} -std::vector get_node_output_layout(const Node* node) { - if (!node) - return {}; - if (node->is_dynamic()) - OPENVINO_THROW("It's illegal to call get_node_output_layout for dynamic nodes"); - auto& rt = node->get_rt_info(); - const auto rinfo = rt.find("Layout"); - if (rinfo != rt.end()) { - std::vector layout(rinfo->second.as>()); - // This might be a little costy, but still useful sanity check. Remove if proved to be unacceptably heavy. - std::set unique_elements(layout.begin(), layout.end()); - if (unique_elements.size() < layout.size()) - OPENVINO_THROW("Layout must contain only unique dimension indexes"); - return layout; - } else { - return {}; - } -} ov::PartialShape get_reordered_planar_shape(const ov::PartialShape& shape, const std::vector& layout) { if (layout.empty()) @@ -106,29 +85,14 @@ ov::PartialShape get_reordered_planar_shape(const ov::PartialShape& shape, const return reordered_shape; } -ov::Shape get_reordered_shape(const ov::Shape& shape, const std::vector& layout) { - if (layout.empty()) - return shape; - ov::Shape reordered_shape(layout.size()); - const size_t rank = shape.size(); - if (layout.size() > rank) - OPENVINO_THROW("Layout rank can't be larger than tensor rank"); - // Note that it can be smaller though, for example tensor shape can be prepended with 1 for scheduling purposes - if (std::any_of(layout.begin(), layout.end(), [=](size_t x) {return x >= rank;})) - OPENVINO_THROW("Invalid layout detected: all layout indexes must be smaller than the tensor rank"); - for (size_t i = 0; i < layout.size(); i++) - reordered_shape[i] = shape[layout[i]]; - return reordered_shape; -} - ov::PartialShape get_port_planar_shape(const Input& in) { - const auto& td = PortManager::get_port_descriptor_ptr(in); - return utils::get_reordered_planar_shape(ov::Shape{td->get_tensor()}, td->get_layout()); + const auto& port = PortManager::get_port_descriptor_ptr(in); + return utils::get_reordered_planar_shape(ov::Shape{port->get_shape()}, port->get_layout()); } ov::PartialShape get_port_planar_shape(const Output& out) { - const auto& td = PortManager::get_port_descriptor_ptr(out); - return utils::get_reordered_planar_shape(ov::Shape{td->get_tensor()}, td->get_layout()); + const auto& port = PortManager::get_port_descriptor_ptr(out); + return utils::get_reordered_planar_shape(ov::Shape{port->get_shape()}, port->get_layout()); } } // namespace utils diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp index 5346b8059681c6..74b3513b68f093 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp @@ -139,7 +139,7 @@ KernelEmitter::KernelEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl: IE_THROW() << "Kernel detected unsupported io_type"; } } - io_shapes.push_back(td->get_tensor()); + io_shapes.push_back(td->get_shape()); io_data_layouts.push_back(td->get_layout()); io_data_sizes.push_back(etype.size()); } diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp index 096075106787d3..f9a3681c3d3c9f 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp @@ -21,7 +21,7 @@ intel_cpu::BrgemmCopyB::BrgemmCopyB(const Output& x, const element::Type s if (is_with_compensations()) { set_output_port_descriptor({0, offset_out1}, 1); } - constructor_validate_and_infer_types(); + custom_constructor_validate_and_infer_types(); } bool intel_cpu::BrgemmCopyB::visit_attributes(AttributeVisitor& visitor) { @@ -31,7 +31,7 @@ bool intel_cpu::BrgemmCopyB::visit_attributes(AttributeVisitor& visitor) { return true; } -void intel_cpu::BrgemmCopyB::constructor_validate_and_infer_types() { +void intel_cpu::BrgemmCopyB::custom_constructor_validate_and_infer_types() { INTERNAL_OP_SCOPE(BrgemmRepack_ctor_validate_and_infer_types); // During ctor call, BrgemmCopyB doesn't know his port descriptors. // So we use port descs from source inputs diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.hpp index eefe39d5b4c70d..70dc348910506d 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.hpp @@ -43,7 +43,7 @@ class BrgemmCopyB : public ngraph::snippets::op::MemoryAccess { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; private: - void constructor_validate_and_infer_types(); + void custom_constructor_validate_and_infer_types(); void validate(const ov::PartialShape& pshape, const ov::element::Type& element_type); Type m_type = Type::OnlyRepacking; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp index be1ba4c460fb8f..55f3e46d1809bb 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp @@ -22,7 +22,7 @@ BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Type ty set_input_port_descriptor({0, offset_a}, 0); set_input_port_descriptor({0, offset_b}, 1); set_output_port_descriptor({0, offset_c}, 0); - constructor_validate_and_infer_types(); + custom_constructor_validate_and_infer_types(); } BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Output& scratch, const Type type, @@ -35,10 +35,10 @@ BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Output< set_input_port_descriptor({0, offset_b}, 1); set_output_port_descriptor({0, offset_c}, 0); set_input_port_descriptor({0, offset_scratch}, 2); - constructor_validate_and_infer_types(); + custom_constructor_validate_and_infer_types(); } -void BrgemmCPU::constructor_validate_and_infer_types() { +void BrgemmCPU::custom_constructor_validate_and_infer_types() { INTERNAL_OP_SCOPE(BrgemmCPU_constructor_validate_and_infer_types); validate_inputs(); diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp index 5b1fb688f7dda5..4f4a192b5a755c 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp @@ -50,7 +50,7 @@ class BrgemmCPU : public ngraph::snippets::op::Brgemm { constexpr static size_t SCRATCH_BYTE_SIZE = 32 * 1024; private: - void constructor_validate_and_infer_types(); + void custom_constructor_validate_and_infer_types(); void validate_with_scratchpad(const ov::Shape& shape_b) const; void validate_inputs() const; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp index 1506231bd6f686..3919b5b23f5d47 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp @@ -84,7 +84,7 @@ pass::BrgemmToBrgemmCPU::BrgemmToBrgemmCPU() { const auto copy_b_type = with_comp ? BrgemmCopyB::WithCompensations : BrgemmCopyB::OnlyRepacking; brgemm_repacking = std::make_shared(brgemm->input_value(1), element_type_a, copy_b_type, offset_b); const auto buffer = std::make_shared(brgemm_repacking->output(0)); - set_port_desc(brgemm_repacking->input(0), brgemm_in1_desc->get_tensor(), brgemm_in1_desc->get_subtensor(), brgemm_in1_desc->get_layout()); + set_port_desc(brgemm_repacking->input(0), brgemm_in1_desc->get_shape(), brgemm_in1_desc->get_subtensor(), brgemm_in1_desc->get_layout()); set_full_port_desc(brgemm_repacking->output(0)); set_full_port_desc(buffer->input(0)); set_full_port_desc(buffer->output(0)); @@ -115,13 +115,13 @@ pass::BrgemmToBrgemmCPU::BrgemmToBrgemmCPU() { ngraph::replace_node(brgemm, brgemm_cpu); // Transfer ports - set_port_desc(brgemm_cpu->input(0), brgemm_in0_desc->get_tensor(), brgemm_in0_desc->get_subtensor(), brgemm_in0_desc->get_layout()); + set_port_desc(brgemm_cpu->input(0), brgemm_in0_desc->get_shape(), brgemm_in0_desc->get_subtensor(), brgemm_in0_desc->get_layout()); if (brgemm_repacking) { set_full_port_desc(brgemm_cpu->input(1)); } else { - set_port_desc(brgemm_cpu->input(1), brgemm_in1_desc->get_tensor(), brgemm_in1_desc->get_subtensor(), brgemm_in1_desc->get_layout()); + set_port_desc(brgemm_cpu->input(1), brgemm_in1_desc->get_shape(), brgemm_in1_desc->get_subtensor(), brgemm_in1_desc->get_layout()); } - set_port_desc(brgemm_cpu->output(0), brgemm_out_desc->get_tensor(), brgemm_out_desc->get_subtensor(), brgemm_out_desc->get_layout()); + set_port_desc(brgemm_cpu->output(0), brgemm_out_desc->get_shape(), brgemm_out_desc->get_subtensor(), brgemm_out_desc->get_layout()); // need to run validate_and_infer_types manually: either input shapes were updated or // output Layout was updated (out shape will be updated in validate_and_infer_types()) diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp index 5e0bd20e561698..6d024d7cd7bf14 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp @@ -15,8 +15,7 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_load_convert(ngraph::snippe ngraph::snippets::lowered::LinearIR::constExprIt& convert_it) { const auto& convert_expr = *convert_it; const auto& convert = ov::as_type_ptr(convert_expr->get_node()); - const auto input_td = convert_expr->get_input_tensor(0); - const auto output_td = convert_expr->get_output_tensor(0); + const auto& input_td = convert_expr->get_input_tensor(0); if (convert->get_destination_type() != ov::element::f32 && convert->get_destination_type() != ov::element::i32) return false; @@ -45,10 +44,10 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_load_convert(ngraph::snippe OPENVINO_THROW("Type of Convert op is undefined. Supports only fusing Load and ConvertTruncation or ConvertSaturation ops"); } - const auto convert_out = convert_expr->get_output_tensor(0); + const auto& convert_out = convert_expr->get_output_tensor(0); const auto convert_consumers = convert_out->get_consumers(); ngraph::snippets::PortManager::set_port_descriptor_ptr(load_convert->output(0), - std::make_shared(convert_out->get_tensor(), + std::make_shared(convert_out->get_shape(), convert_out->get_subtensor(), convert_out->get_layout())); const auto load_convert_expr = linear_ir.create_expression(load_convert, { load_expr->get_input_tensor(0) }); @@ -65,8 +64,8 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert(ngraph::snipp ngraph::snippets::lowered::LinearIR::constExprIt& convert_it) { const auto& convert_expr = *convert_it; const auto& convert = convert_expr->get_node(); - const auto input_td = convert_expr->get_input_tensor(0); - const auto output_td = convert_expr->get_output_tensor(0); + const auto& input_td = convert_expr->get_input_tensor(0); + const auto& output_td = convert_expr->get_output_tensor(0); if (convert->get_input_element_type(0) != ov::element::f32 && convert->get_input_element_type(0) != ov::element::i32) return false; @@ -75,7 +74,7 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert(ngraph::snipp return false; const auto store_input = *(consumers.begin()); - const auto store_expr = store_input.get_expr(); + const auto& store_expr = store_input.get_expr(); const auto store = ov::as_type_ptr(store_expr->get_node()); if (!store) return false; @@ -93,10 +92,10 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert(ngraph::snipp OPENVINO_THROW("Type of Convert op is undefined. Supports only fusing Store and ConvertTruncation or ConvertSaturation ops"); } - const auto store_out = store_expr->get_output_tensor(0); + const auto& store_out = store_expr->get_output_tensor(0); const auto store_consumers = store_out->get_consumers(); ngraph::snippets::PortManager::set_port_descriptor_ptr(store_convert->output(0), - std::make_shared(store_out->get_tensor(), + std::make_shared(store_out->get_shape(), store_out->get_subtensor(), store_out->get_layout())); const auto store_convert_expr = linear_ir.create_expression(store_convert, { input_td }); diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp index 93eec1b1418069..8387feacb70f77 100644 --- a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp +++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp @@ -81,7 +81,7 @@ std::shared_ptr Transpose0213MatMulLoweredFunction::initLowered() con if (transpose_position < 2) { const auto& anchor = data[transpose_position]->output(0); const auto& td = ngraph::snippets::PortManager::get_port_descriptor_ptr(anchor); - const auto& tensor = td->get_tensor(); + const auto& tensor = td->get_shape(); const auto& subtensor = td->get_subtensor(); ngraph::snippets::PortManager::set_port_descriptor_ptr(anchor, std::make_shared(tensor, subtensor, layout)); @@ -91,7 +91,7 @@ std::shared_ptr Transpose0213MatMulLoweredFunction::initLowered() con if (transpose_position == 2) { const auto& anchor = matmul->output(0); const auto& td = ngraph::snippets::PortManager::get_port_descriptor_ptr(anchor); - const auto& tensor = td->get_tensor(); + const auto& tensor = td->get_shape(); const auto& subtensor = td->get_subtensor(); ngraph::snippets::PortManager::set_port_descriptor_ptr(anchor, std::make_shared(tensor, subtensor, layout)); @@ -101,7 +101,7 @@ std::shared_ptr Transpose0213MatMulLoweredFunction::initLowered() con if (transpose_position < 2) { const auto& anchor = data[transpose_position]->output(0); const auto& td = ngraph::snippets::PortManager::get_port_descriptor_ptr(anchor); - const auto& tensor = td->get_tensor(); + const auto& tensor = td->get_shape(); const auto& subtensor = td->get_subtensor(); ngraph::snippets::PortManager::set_port_descriptor_ptr(matmul->input(transpose_position), std::make_shared(tensor, subtensor, layout)); From f08c06f1c6437fe62c7577a748995c7faf369c39 Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Wed, 10 May 2023 16:39:18 +0400 Subject: [PATCH 09/13] fixed init loops --- .../snippets/src/lowered/pass/init_loops.cpp | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/common/snippets/src/lowered/pass/init_loops.cpp b/src/common/snippets/src/lowered/pass/init_loops.cpp index 3a42178c53b4ce..daae16b1f25c09 100644 --- a/src/common/snippets/src/lowered/pass/init_loops.cpp +++ b/src/common/snippets/src/lowered/pass/init_loops.cpp @@ -52,10 +52,12 @@ void filter_ports(LinearIR& linear_ir, loop_exits = new_loop_exits; } -int64_t get_dim_stride(const size_t dim, const std::vector& shape) { +int64_t get_dim_stride(const size_t dim, const std::vector& layout, const std::vector& shape) { int64_t stride = 1; - for (size_t i = dim + 1; i < shape.size(); ++i) { - stride *= static_cast(shape[i]); + for (int i = static_cast(layout.size()) - 1; i >= 0; i--) { + if (layout[i] == dim) + break; + stride *= static_cast(shape[layout[i]]); } return stride; } @@ -71,36 +73,38 @@ std::vector InitLoops::init_ptr_increments(const std::vector Date: Wed, 10 May 2023 17:25:45 +0400 Subject: [PATCH 10/13] fixed brgemm ops --- .../snippets/include/snippets/op/brgemm.hpp | 5 ++-- src/common/snippets/src/op/brgemm.cpp | 22 +++++++++------ .../src/pass/fuse_transpose_brgemm.cpp | 6 ---- .../snippets/x64/op/brgemm_copy_b.cpp | 11 ++++---- .../snippets/x64/op/brgemm_copy_b.hpp | 5 ++-- .../snippets/x64/op/brgemm_cpu.cpp | 28 ++++++++++++------- .../snippets/x64/op/brgemm_cpu.hpp | 8 ++++-- .../x64/pass/brgemm_to_brgemm_cpu.cpp | 15 ++++++---- .../src/subgraph_lowered.cpp | 8 ++---- 9 files changed, 62 insertions(+), 46 deletions(-) diff --git a/src/common/snippets/include/snippets/op/brgemm.hpp b/src/common/snippets/include/snippets/op/brgemm.hpp index f5db719ae03de5..f207cafa1ad43c 100644 --- a/src/common/snippets/include/snippets/op/brgemm.hpp +++ b/src/common/snippets/include/snippets/op/brgemm.hpp @@ -20,7 +20,8 @@ class Brgemm : public MemoryAccess { public: OPENVINO_OP("Brgemm", "SnippetsOpset", MemoryAccess); Brgemm(const Output& A, const Output& B, - const size_t offset_a = 0lu, const size_t offset_b = 0lu, const size_t offset_c = 0lu); + const size_t offset_a = 0lu, const size_t offset_b = 0lu, const size_t offset_c = 0lu, + std::vector layout_a = {}, std::vector layout_b = {}, std::vector layout_c = {}); Brgemm() = default; size_t get_offset_a() const { return get_input_offset(0); } @@ -33,7 +34,7 @@ class Brgemm : public MemoryAccess { bool has_evaluate() const override { return false; } protected: - void custom_constructor_validate_and_infer_types(); + void custom_constructor_validate_and_infer_types(std::vector layout_a, std::vector layout_b, std::vector layout_c); void validate_inputs() const; ov::element::Type get_output_type() const; diff --git a/src/common/snippets/src/op/brgemm.cpp b/src/common/snippets/src/op/brgemm.cpp index 7c2a763ab69862..714de1bbf34177 100644 --- a/src/common/snippets/src/op/brgemm.cpp +++ b/src/common/snippets/src/op/brgemm.cpp @@ -13,25 +13,27 @@ namespace snippets { namespace op { Brgemm::Brgemm(const Output& A, const Output& B, - const size_t offset_a, const size_t offset_b, const size_t offset_c) : MemoryAccess({A, B}, std::set{0, 1}, std::set{0}) { + const size_t offset_a, const size_t offset_b, const size_t offset_c, + std::vector layout_a, std::vector layout_b, std::vector layout_c) + : MemoryAccess({A, B}, std::set{0, 1}, std::set{0}) { set_output_size(1); set_input_offset(offset_a, 0); set_input_offset(offset_b, 1); set_output_offset(offset_c, 0); - custom_constructor_validate_and_infer_types(); + custom_constructor_validate_and_infer_types(std::move(layout_a), std::move(layout_b), std::move(layout_c)); } -void Brgemm::custom_constructor_validate_and_infer_types() { +void Brgemm::custom_constructor_validate_and_infer_types(std::vector layout_a, std::vector layout_b, std::vector layout_c) { INTERNAL_OP_SCOPE(BrgemmCPU_constructor_validate_and_infer_types); validate_inputs(); // During ctor call, Brgemm doesn't know his port descriptors. - // So we use port descs from source inputs + // So we use explicit layouts from parameters const auto planar_input_shapes = - std::vector{ ngraph::snippets::utils::get_port_planar_shape(input_value(0)), - ngraph::snippets::utils::get_port_planar_shape(input_value(1)) }; + std::vector{ ngraph::snippets::utils::get_reordered_planar_shape(get_input_partial_shape(0), std::move(layout_a)), + ngraph::snippets::utils::get_reordered_planar_shape(get_input_partial_shape(1), std::move(layout_b)) }; auto output_shape = get_output_partial_shape(planar_input_shapes); - set_output_type(0, get_output_type(), get_planar_output_shape(output_shape)); + set_output_type(0, get_output_type(), ngraph::snippets::utils::get_reordered_planar_shape(output_shape, std::move(layout_c))); } void Brgemm::validate_inputs() const { @@ -52,7 +54,11 @@ void Brgemm::validate_and_infer_types() { std::shared_ptr Brgemm::clone_with_new_inputs(const OutputVector& new_args) const { INTERNAL_OP_SCOPE(Brgemm_clone_with_new_inputs); check_new_args_count(this, new_args); - return std::make_shared(new_args.at(0), new_args.at(1), get_offset_a(), get_offset_b(), get_offset_c()); + return std::make_shared(new_args.at(0), new_args.at(1), + get_offset_a(), get_offset_b(), get_offset_c(), + PortManager::get_port_descriptor_ptr(input(0))->get_layout(), + PortManager::get_port_descriptor_ptr(input(1))->get_layout(), + PortManager::get_port_descriptor_ptr(output(0))->get_layout()); } ov::element::Type Brgemm::get_output_type() const { diff --git a/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp b/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp index 348b21f97e6af7..dc437a49a4858f 100644 --- a/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp +++ b/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp @@ -83,12 +83,6 @@ FuseTransposeBrgemm::FuseTransposeBrgemm() { const auto& original_port = ngraph::snippets::PortManager::get_port_descriptor_ptr(in); original_port->set_shape(transpose->get_input_shape(0)); original_port->set_layout(const_order->cast_vector()); - // At the moment we support fused Transpose only after Parameter -> we can update port descriptor for Parameter as well. - // Note: It's needed for BrgemmCPU - ngraph::snippets::PortManager::set_port_descriptor_ptr(transpose->input_value(0), - std::make_shared(transpose->get_input_shape(0), - std::vector{}, - const_order->cast_vector())); } } diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp index f9a3681c3d3c9f..4d1b94f7f61b85 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp @@ -13,7 +13,7 @@ using namespace std; using namespace ov; intel_cpu::BrgemmCopyB::BrgemmCopyB(const Output& x, const element::Type src_type, const Type type, - const size_t offset_in, const size_t offset_out0, const size_t offset_out1) + const size_t offset_in, const size_t offset_out0, const size_t offset_out1, std::vector layout_input) : ngraph::snippets::op::MemoryAccess({x}, 1, type == Type::WithCompensations ? 2 : 1), m_type(type), m_src_type(src_type) { set_output_size(type == Type::WithCompensations ? 2 : 1); set_input_port_descriptor({0, offset_in}, 0); @@ -21,7 +21,7 @@ intel_cpu::BrgemmCopyB::BrgemmCopyB(const Output& x, const element::Type s if (is_with_compensations()) { set_output_port_descriptor({0, offset_out1}, 1); } - custom_constructor_validate_and_infer_types(); + custom_constructor_validate_and_infer_types(std::move(layout_input)); } bool intel_cpu::BrgemmCopyB::visit_attributes(AttributeVisitor& visitor) { @@ -31,12 +31,12 @@ bool intel_cpu::BrgemmCopyB::visit_attributes(AttributeVisitor& visitor) { return true; } -void intel_cpu::BrgemmCopyB::custom_constructor_validate_and_infer_types() { +void intel_cpu::BrgemmCopyB::custom_constructor_validate_and_infer_types(std::vector layout_input) { INTERNAL_OP_SCOPE(BrgemmRepack_ctor_validate_and_infer_types); // During ctor call, BrgemmCopyB doesn't know his port descriptors. // So we use port descs from source inputs const auto element_type = get_input_element_type(0); - const auto pshape = ngraph::snippets::utils::get_port_planar_shape(input_value(0)); + const auto pshape = ngraph::snippets::utils::get_reordered_planar_shape(get_input_partial_shape(0), std::move(layout_input)); validate(pshape, element_type); } @@ -79,7 +79,8 @@ std::shared_ptr intel_cpu::BrgemmCopyB::clone_with_new_inputs(const Output return std::make_shared(new_args.at(0), m_src_type, m_type, get_offset_in(), get_offset_out(), - is_with_compensations() ? get_offset_compensations() : 0); + is_with_compensations() ? get_offset_compensations() : 0, + ngraph::snippets::PortManager::get_port_descriptor_ptr(input(0))->get_layout()); } size_t intel_cpu::BrgemmCopyB::get_offset_compensations() const { diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.hpp index 70dc348910506d..dd34e23bdb89e3 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.hpp @@ -26,7 +26,8 @@ class BrgemmCopyB : public ngraph::snippets::op::MemoryAccess { }; BrgemmCopyB(const Output& x, const element::Type src_type, const Type type = Type::OnlyRepacking, - const size_t offset_in = 0lu, const size_t offset_out0 = 0lu, const size_t offset_out1 = 0lu); + const size_t offset_in = 0lu, const size_t offset_out0 = 0lu, const size_t offset_out1 = 0lu, + std::vector layout_input = {}); BrgemmCopyB() = default; size_t get_offset_in() const { return get_input_offset(0); } @@ -43,7 +44,7 @@ class BrgemmCopyB : public ngraph::snippets::op::MemoryAccess { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; private: - void custom_constructor_validate_and_infer_types(); + void custom_constructor_validate_and_infer_types(std::vector layout_input = {}); void validate(const ov::PartialShape& pshape, const ov::element::Type& element_type); Type m_type = Type::OnlyRepacking; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp index 55f3e46d1809bb..d55b05ac7db6cd 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp @@ -13,7 +13,8 @@ namespace ov { namespace intel_cpu { BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Type type, - const size_t offset_a, const size_t offset_b, const size_t offset_c) + const size_t offset_a, const size_t offset_b, const size_t offset_c, + std::vector layout_a, std::vector layout_b, std::vector layout_c) : Brgemm(), m_type(type) { // We call default ctor of Brgemm class to avoid incorrect shape infer in constructor_validate_and_type_infer() call set_arguments({A, B}); @@ -22,11 +23,12 @@ BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Type ty set_input_port_descriptor({0, offset_a}, 0); set_input_port_descriptor({0, offset_b}, 1); set_output_port_descriptor({0, offset_c}, 0); - custom_constructor_validate_and_infer_types(); + custom_constructor_validate_and_infer_types(layout_a, layout_b, layout_c); } BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Output& scratch, const Type type, - const size_t offset_a, const size_t offset_b, const size_t offset_scratch, const size_t offset_c) + const size_t offset_a, const size_t offset_b, const size_t offset_scratch, const size_t offset_c, + std::vector layout_a, std::vector layout_b, std::vector layout_c) : Brgemm(), m_type(type) { set_arguments({A, B, scratch}); set_output_size(1); @@ -35,10 +37,10 @@ BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Output< set_input_port_descriptor({0, offset_b}, 1); set_output_port_descriptor({0, offset_c}, 0); set_input_port_descriptor({0, offset_scratch}, 2); - custom_constructor_validate_and_infer_types(); + custom_constructor_validate_and_infer_types(layout_a, layout_b, layout_c); } -void BrgemmCPU::custom_constructor_validate_and_infer_types() { +void BrgemmCPU::custom_constructor_validate_and_infer_types(std::vector layout_a, std::vector layout_b, std::vector layout_c) { INTERNAL_OP_SCOPE(BrgemmCPU_constructor_validate_and_infer_types); validate_inputs(); @@ -46,11 +48,11 @@ void BrgemmCPU::custom_constructor_validate_and_infer_types() { // So we use port descs from source inputs const auto brgemm_copy = is_with_data_repacking() ? get_brgemm_copy() : nullptr; const auto planar_input_shapes = - std::vector{ ngraph::snippets::utils::get_port_planar_shape(input_value(0)), + std::vector{ ngraph::snippets::utils::get_reordered_planar_shape(get_input_partial_shape(0), std::move(layout_a)), brgemm_copy ? ngraph::snippets::utils::get_port_planar_shape(brgemm_copy->input(0)) - : ngraph::snippets::utils::get_port_planar_shape(input_value(1)) }; + : ngraph::snippets::utils::get_reordered_planar_shape(get_input_partial_shape(1), std::move(layout_b)) }; auto output_shape = get_output_partial_shape(planar_input_shapes); - set_output_type(0, get_output_type(), get_planar_output_shape(output_shape)); + set_output_type(0, get_output_type(), ngraph::snippets::utils::get_reordered_planar_shape(output_shape, std::move(layout_c))); //Additional check for 3rd input validate_with_scratchpad(planar_input_shapes[1].get_shape()); @@ -107,10 +109,16 @@ std::shared_ptr BrgemmCPU::clone_with_new_inputs(const OutputVector& new_a std::shared_ptr new_node = nullptr; if (!is_with_scratchpad()) { new_node = std::make_shared(new_args.at(0), new_args.at(1), m_type, - get_offset_a(), get_offset_b(), get_offset_c()); + get_offset_a(), get_offset_b(), get_offset_c(), + ngraph::snippets::PortManager::get_port_descriptor_ptr(input(0))->get_layout(), + ngraph::snippets::PortManager::get_port_descriptor_ptr(input(1))->get_layout(), + ngraph::snippets::PortManager::get_port_descriptor_ptr(output(0))->get_layout()); } else { new_node = std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2), m_type, - get_offset_a(), get_offset_b(), get_offset_scratch(), get_offset_c()); + get_offset_a(), get_offset_b(), get_offset_scratch(), get_offset_c(), + ngraph::snippets::PortManager::get_port_descriptor_ptr(input(0))->get_layout(), + ngraph::snippets::PortManager::get_port_descriptor_ptr(input(1))->get_layout(), + ngraph::snippets::PortManager::get_port_descriptor_ptr(output(0))->get_layout()); } return new_node; } diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp index 4f4a192b5a755c..615ed623faafce 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp @@ -30,9 +30,11 @@ class BrgemmCPU : public ngraph::snippets::op::Brgemm { }; BrgemmCPU(const Output& A, const Output& B, const Type type, - const size_t offset_a = 0, const size_t offset_b = 0, const size_t offset_c = 0); + const size_t offset_a = 0, const size_t offset_b = 0, const size_t offset_c = 0, + std::vector layout_a = {}, std::vector layout_b = {}, std::vector layout_c = {}); BrgemmCPU(const Output& A, const Output& B, const Output& scratch, const Type type, - const size_t offset_a = 0, const size_t offset_b = 0, const size_t offset_scratch = 0, const size_t offset_c = 0); + const size_t offset_a = 0, const size_t offset_b = 0, const size_t offset_scratch = 0, const size_t offset_c = 0, + std::vector layout_a = {}, std::vector layout_b = {}, std::vector layout_c = {}); BrgemmCPU() = default; void validate_and_infer_types() override; @@ -50,7 +52,7 @@ class BrgemmCPU : public ngraph::snippets::op::Brgemm { constexpr static size_t SCRATCH_BYTE_SIZE = 32 * 1024; private: - void custom_constructor_validate_and_infer_types(); + void custom_constructor_validate_and_infer_types(std::vector layout_a, std::vector layout_b, std::vector layout_c); void validate_with_scratchpad(const ov::Shape& shape_b) const; void validate_inputs() const; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp index 3919b5b23f5d47..10cdb4fa9cad83 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp @@ -79,10 +79,12 @@ pass::BrgemmToBrgemmCPU::BrgemmToBrgemmCPU() { std::shared_ptr brgemm_repacking = nullptr; if (element_type_a == ov::element::f32) { brgemm_cpu = std::make_shared(brgemm->input_value(0), brgemm->input_value(1), BrgemmCPU::Type::Floating, - offset_a, offset_b, offset_c); + offset_a, offset_b, offset_c, + brgemm_in0_desc->get_layout(), brgemm_in1_desc->get_layout(), brgemm_out_desc->get_layout()); } else { const auto copy_b_type = with_comp ? BrgemmCopyB::WithCompensations : BrgemmCopyB::OnlyRepacking; - brgemm_repacking = std::make_shared(brgemm->input_value(1), element_type_a, copy_b_type, offset_b); + brgemm_repacking = std::make_shared(brgemm->input_value(1), element_type_a, copy_b_type, offset_b, 0, 0, + brgemm_in1_desc->get_layout()); const auto buffer = std::make_shared(brgemm_repacking->output(0)); set_port_desc(brgemm_repacking->input(0), brgemm_in1_desc->get_shape(), brgemm_in1_desc->get_subtensor(), brgemm_in1_desc->get_layout()); set_full_port_desc(brgemm_repacking->output(0)); @@ -92,20 +94,23 @@ pass::BrgemmToBrgemmCPU::BrgemmToBrgemmCPU() { if (with_amx) { const auto scratch = std::make_shared(ov::Shape{BrgemmCPU::SCRATCH_BYTE_SIZE}); brgemm_cpu = std::make_shared(brgemm->input_value(0), buffer, scratch, BrgemmCPU::Type::AMX, - offset_a, offset_b, 0, offset_c); + offset_a, offset_b, 0, offset_c, + brgemm_in0_desc->get_layout(), std::vector{}, brgemm_out_desc->get_layout()); set_full_port_desc(scratch->output(0)); set_full_port_desc(brgemm_cpu->input(2)); } else if (with_comp) { const auto scratch = std::make_shared(brgemm_repacking->output(1)); brgemm_cpu = std::make_shared(brgemm->input_value(0), buffer, scratch, BrgemmCPU::Type::WithCompensations, - offset_a, offset_b, 0, offset_c); + offset_a, offset_b, 0, offset_c, + brgemm_in0_desc->get_layout(), std::vector{}, brgemm_out_desc->get_layout()); set_full_port_desc(brgemm_repacking->output(1)); set_full_port_desc(scratch->input(0)); set_full_port_desc(scratch->output(0)); set_full_port_desc(brgemm_cpu->input(2)); } else if (one_of(element_type_a, ov::element::u8, ov::element::bf16)) { brgemm_cpu = std::make_shared(brgemm->input_value(0), buffer, BrgemmCPU::Type::WithDataRepacking, - offset_a, offset_b, offset_c); + offset_a, offset_b, offset_c, + brgemm_in0_desc->get_layout(), std::vector{}, brgemm_out_desc->get_layout()); } else { IE_THROW() << "Invalid configuration for BRGEMM CPU"; } diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp index 8387feacb70f77..2a00ffc7b326c2 100644 --- a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp +++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp @@ -83,10 +83,10 @@ std::shared_ptr Transpose0213MatMulLoweredFunction::initLowered() con const auto& td = ngraph::snippets::PortManager::get_port_descriptor_ptr(anchor); const auto& tensor = td->get_shape(); const auto& subtensor = td->get_subtensor(); - ngraph::snippets::PortManager::set_port_descriptor_ptr(anchor, - std::make_shared(tensor, subtensor, layout)); } - auto matmul = std::make_shared(data[0], data[1]); + auto matmul = std::make_shared(data[0], data[1], 0, 0, 0, transpose_position == 0 ? layout : std::vector{}, + transpose_position == 1 ? layout : std::vector{}, + transpose_position == 2 ? layout : std::vector{}); auto result = std::make_shared(matmul); if (transpose_position == 2) { const auto& anchor = matmul->output(0); @@ -95,8 +95,6 @@ std::shared_ptr Transpose0213MatMulLoweredFunction::initLowered() con const auto& subtensor = td->get_subtensor(); ngraph::snippets::PortManager::set_port_descriptor_ptr(anchor, std::make_shared(tensor, subtensor, layout)); - ngraph::snippets::PortManager::set_port_descriptor_ptr(result->input(0), - std::make_shared(tensor, subtensor, layout)); } if (transpose_position < 2) { const auto& anchor = data[transpose_position]->output(0); From b8c122d6b824f95f7cd10d05ec275d96737e7ca6 Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Wed, 10 May 2023 17:44:10 +0400 Subject: [PATCH 11/13] Moved PortDescriptor to lowered level --- .../snippets/lowered/expression_port.hpp | 2 +- .../include/snippets/lowered/loop_manager.hpp | 2 +- .../snippets/{ => lowered}/port_descriptor.hpp | 2 ++ .../include/snippets/lowered/tensor.hpp | 2 +- .../snippets/pass/fuse_transpose_brgemm.hpp | 2 +- .../src/{ => lowered}/port_descriptor.cpp | 6 +++--- src/common/snippets/src/op/brgemm.cpp | 10 +++++----- src/common/snippets/src/op/subgraph.cpp | 2 +- .../src/pass/fuse_transpose_brgemm.cpp | 8 ++++---- .../snippets/src/pass/matmul_to_brgemm.cpp | 8 ++++---- .../snippets/src/pass/set_softmax_ports.cpp | 8 ++++---- .../src/pass/transpose_decomposition.cpp | 16 ++++++++-------- src/common/snippets/src/utils.cpp | 4 ++-- .../src/emitters/x64/jit_snippets_emitters.cpp | 8 +++++--- .../snippets/x64/op/brgemm_copy_b.cpp | 2 +- .../snippets/x64/op/brgemm_cpu.cpp | 14 +++++++------- .../snippets/x64/op/brgemm_cpu.hpp | 2 +- .../snippets/x64/pass/brgemm_to_brgemm_cpu.cpp | 16 +++++++++------- .../lowered/fuse_load_store_and_convert.cpp | 18 ++++++------------ .../src/subgraph_lowered.cpp | 18 +++++++++++------- 20 files changed, 77 insertions(+), 73 deletions(-) rename src/common/snippets/include/snippets/{ => lowered}/port_descriptor.hpp (98%) rename src/common/snippets/src/{ => lowered}/port_descriptor.cpp (98%) diff --git a/src/common/snippets/include/snippets/lowered/expression_port.hpp b/src/common/snippets/include/snippets/lowered/expression_port.hpp index 239520c8620168..99ecdc58fa31fd 100644 --- a/src/common/snippets/include/snippets/lowered/expression_port.hpp +++ b/src/common/snippets/include/snippets/lowered/expression_port.hpp @@ -7,7 +7,7 @@ #include #include -#include "snippets/port_descriptor.hpp" +#include "port_descriptor.hpp" namespace ngraph { diff --git a/src/common/snippets/include/snippets/lowered/loop_manager.hpp b/src/common/snippets/include/snippets/lowered/loop_manager.hpp index 225be5ff77f9e3..ed31e73c7c0688 100644 --- a/src/common/snippets/include/snippets/lowered/loop_manager.hpp +++ b/src/common/snippets/include/snippets/lowered/loop_manager.hpp @@ -9,7 +9,7 @@ #include #include -#include "snippets/port_descriptor.hpp" +#include "port_descriptor.hpp" namespace ngraph { namespace snippets { diff --git a/src/common/snippets/include/snippets/port_descriptor.hpp b/src/common/snippets/include/snippets/lowered/port_descriptor.hpp similarity index 98% rename from src/common/snippets/include/snippets/port_descriptor.hpp rename to src/common/snippets/include/snippets/lowered/port_descriptor.hpp index 15570c70a9efce..77d41814072655 100644 --- a/src/common/snippets/include/snippets/port_descriptor.hpp +++ b/src/common/snippets/include/snippets/lowered/port_descriptor.hpp @@ -10,6 +10,7 @@ namespace ngraph { namespace snippets { +namespace lowered { class PortDescriptor; using PortDescriptorPtr = std::shared_ptr; @@ -87,5 +88,6 @@ class PortDescriptorVectorAttribute : public ov::RuntimeAttribute { std::vector outputs{}; }; +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/lowered/tensor.hpp b/src/common/snippets/include/snippets/lowered/tensor.hpp index e9df31c098babe..d1e97e066b00f7 100644 --- a/src/common/snippets/include/snippets/lowered/tensor.hpp +++ b/src/common/snippets/include/snippets/lowered/tensor.hpp @@ -7,7 +7,7 @@ #include #include -#include "snippets/port_descriptor.hpp" +#include "port_descriptor.hpp" #include "expression_port.hpp" diff --git a/src/common/snippets/include/snippets/pass/fuse_transpose_brgemm.hpp b/src/common/snippets/include/snippets/pass/fuse_transpose_brgemm.hpp index 6ffa77e53ccfa9..f87b8d03c665d5 100644 --- a/src/common/snippets/include/snippets/pass/fuse_transpose_brgemm.hpp +++ b/src/common/snippets/include/snippets/pass/fuse_transpose_brgemm.hpp @@ -9,7 +9,7 @@ #include "openvino/op/transpose.hpp" -#include "snippets/port_descriptor.hpp" +#include "snippets/lowered/port_descriptor.hpp" namespace ngraph { namespace snippets { diff --git a/src/common/snippets/src/port_descriptor.cpp b/src/common/snippets/src/lowered/port_descriptor.cpp similarity index 98% rename from src/common/snippets/src/port_descriptor.cpp rename to src/common/snippets/src/lowered/port_descriptor.cpp index b025a79daf2358..c37634e06df670 100644 --- a/src/common/snippets/src/port_descriptor.cpp +++ b/src/common/snippets/src/lowered/port_descriptor.cpp @@ -2,12 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/port_descriptor.hpp" -#include "ngraph/except.hpp" -#include +#include "snippets/lowered/port_descriptor.hpp" namespace ngraph { namespace snippets { +namespace lowered { size_t PortDescriptor::Scheduling::FULL_DIM = SIZE_MAX; @@ -139,5 +138,6 @@ PortDescriptorPtr PortManager::get_port_descriptor_ptr(const Output Brgemm::clone_with_new_inputs(const OutputVector& new_args check_new_args_count(this, new_args); return std::make_shared(new_args.at(0), new_args.at(1), get_offset_a(), get_offset_b(), get_offset_c(), - PortManager::get_port_descriptor_ptr(input(0))->get_layout(), - PortManager::get_port_descriptor_ptr(input(1))->get_layout(), - PortManager::get_port_descriptor_ptr(output(0))->get_layout()); + lowered::PortManager::get_port_descriptor_ptr(input(0))->get_layout(), + lowered::PortManager::get_port_descriptor_ptr(input(1))->get_layout(), + lowered::PortManager::get_port_descriptor_ptr(output(0))->get_layout()); } ov::element::Type Brgemm::get_output_type() const { @@ -86,11 +86,11 @@ std::vector Brgemm::get_planar_input_shapes(const std::vector< ov::PartialShape Brgemm::get_planar_output_shape(const ov::PartialShape& output_shape) const { // This method can be safely called from validate_and_infer_types() before output creation - const auto& key = PortDescriptorVectorAttribute::get_type_info_static(); + const auto& key = lowered::PortDescriptorVectorAttribute::get_type_info_static(); auto& rt_info = get_rt_info(); const auto& found = rt_info.find(key); if (found != rt_info.end()) { - const auto& out_descs = found->second.as().outputs; + const auto& out_descs = found->second.as().outputs; if (out_descs.size() != get_output_size()) OPENVINO_THROW("Get output port descriptor is failed: incorrect count"); const auto& port_desc = out_descs[0]; diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index 7e1e38d4bc6323..8e95105dfa41b0 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -18,7 +18,7 @@ #include "snippets/pass/fuse_transpose_brgemm.hpp" #include "snippets/pass/set_softmax_ports.hpp" #include "snippets/utils.hpp" -#include "snippets/port_descriptor.hpp" +#include "snippets/lowered/port_descriptor.hpp" #include "transformations/common_optimizations/nop_elimination.hpp" #include "transformations/utils/utils.hpp" diff --git a/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp b/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp index dc437a49a4858f..25954e66ccb8ed 100644 --- a/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp +++ b/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp @@ -27,8 +27,8 @@ bool FuseTransposeBrgemm::is_supported_transpose(const Output& transpose_p // if Transpose in and out layout is not empty => something was already fused on this port auto default_layout = std::vector(transpose_port.get_shape().size()); std::iota(default_layout.begin(), default_layout.end(), 0);// NCHW layout by default - if (PortManager::get_port_descriptor_ptr(transpose_port)->get_layout() != default_layout || - PortManager::get_port_descriptor_ptr(transpose_node->input_value(0))->get_layout() != default_layout) + if (lowered::PortManager::get_port_descriptor_ptr(transpose_port)->get_layout() != default_layout || + lowered::PortManager::get_port_descriptor_ptr(transpose_node->input_value(0))->get_layout() != default_layout) return false; const auto& transpose_order = constant->cast_vector(); // todo: this limitation is due to the fact that offsets are calculated in Kernel, and the only way @@ -66,7 +66,7 @@ FuseTransposeBrgemm::FuseTransposeBrgemm() { const auto& brgemm_out = brgemm->output(0); const auto& transpose_out = m.get_match_value(); const auto& const_order = ov::as_type_ptr(transpose_out.get_node_shared_ptr()->get_input_node_shared_ptr(1)); - const auto& original_port = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm_out); + const auto& original_port = ngraph::snippets::lowered::PortManager::get_port_descriptor_ptr(brgemm_out); original_port->set_shape(transpose_out.get_shape()); original_port->set_layout(const_order->cast_vector()); for (const auto& in : transpose_out.get_target_inputs()) @@ -80,7 +80,7 @@ FuseTransposeBrgemm::FuseTransposeBrgemm() { const auto& transpose = as_type_ptr(in_value.get_node_shared_ptr()); const auto& const_order = ov::as_type_ptr(transpose->get_input_node_shared_ptr(1)); brgemm->set_argument(i, transpose->input_value(0)); - const auto& original_port = ngraph::snippets::PortManager::get_port_descriptor_ptr(in); + const auto& original_port = ngraph::snippets::lowered::PortManager::get_port_descriptor_ptr(in); original_port->set_shape(transpose->get_input_shape(0)); original_port->set_layout(const_order->cast_vector()); } diff --git a/src/common/snippets/src/pass/matmul_to_brgemm.cpp b/src/common/snippets/src/pass/matmul_to_brgemm.cpp index 81745a883921d2..58417c634684af 100644 --- a/src/common/snippets/src/pass/matmul_to_brgemm.cpp +++ b/src/common/snippets/src/pass/matmul_to_brgemm.cpp @@ -10,7 +10,7 @@ #include "snippets/utils.hpp" #include "ngraph/rt_info.hpp" -#include +#include "snippets/lowered/port_descriptor.hpp" #include "ngraph/pattern/op/wrap_type.hpp" namespace ngraph { @@ -19,16 +19,16 @@ namespace pass { void MatMulToBrgemm::init_ports(const std::shared_ptr& brgemm) const { auto get_subtensor = [](const ov::Shape& shape) { - return std::vector{ PortDescriptor::Scheduling::FULL_DIM, PortDescriptor::Scheduling::FULL_DIM }; + return std::vector{ lowered::PortDescriptor::Scheduling::FULL_DIM, lowered::PortDescriptor::Scheduling::FULL_DIM }; }; for (const auto& input : brgemm->inputs()) { const auto tensor = input.get_shape(); const auto subtensor = get_subtensor(tensor); - PortManager::set_port_descriptor_ptr(input, std::make_shared(tensor, subtensor)); + lowered::PortManager::set_port_descriptor_ptr(input, std::make_shared(tensor, subtensor)); } const auto tensor = brgemm->get_output_shape(0); const auto subtensor = get_subtensor(tensor); - PortManager::set_port_descriptor_ptr(brgemm->output(0), std::make_shared(tensor, subtensor)); + lowered::PortManager::set_port_descriptor_ptr(brgemm->output(0), std::make_shared(tensor, subtensor)); } MatMulToBrgemm::MatMulToBrgemm() { diff --git a/src/common/snippets/src/pass/set_softmax_ports.cpp b/src/common/snippets/src/pass/set_softmax_ports.cpp index a79e53f137d04a..5e0aeae4da6ee8 100644 --- a/src/common/snippets/src/pass/set_softmax_ports.cpp +++ b/src/common/snippets/src/pass/set_softmax_ports.cpp @@ -5,7 +5,7 @@ #include "snippets/pass/set_softmax_ports.hpp" #include -#include "snippets/port_descriptor.hpp" +#include "snippets/lowered/port_descriptor.hpp" #include "ngraph/op/softmax.hpp" #include "ngraph/pattern/op/wrap_type.hpp" @@ -46,10 +46,10 @@ ngraph::snippets::pass::SetSoftmaxPorts::SetSoftmaxPorts() { OPENVINO_ASSERT(axis < static_cast(rank), "Softmax has incorrect axis"); std::vector subtensor(rank, 1); for (size_t i = axis; i < rank; ++i) - subtensor[i] = PortDescriptor::Scheduling::FULL_DIM; + subtensor[i] = lowered::PortDescriptor::Scheduling::FULL_DIM; - PortManager::set_port_descriptor_ptr(root->input(0), std::make_shared(root->input(0), subtensor)); - PortManager::set_port_descriptor_ptr(root->output(0), std::make_shared(root->output(0), subtensor)); + lowered::PortManager::set_port_descriptor_ptr(root->input(0), std::make_shared(root->input(0), subtensor)); + lowered::PortManager::set_port_descriptor_ptr(root->output(0), std::make_shared(root->output(0), subtensor)); return true; }; diff --git a/src/common/snippets/src/pass/transpose_decomposition.cpp b/src/common/snippets/src/pass/transpose_decomposition.cpp index 0083d33b00f4e9..b71ba728ab5d90 100644 --- a/src/common/snippets/src/pass/transpose_decomposition.cpp +++ b/src/common/snippets/src/pass/transpose_decomposition.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include "snippets/lowered/port_descriptor.hpp" #include #include #include @@ -41,18 +41,18 @@ TransposeDecomposition::TransposeDecomposition() { return false; // number of elements that can be processed on every iteration. For 0,1,2,3 -> 0,2,3,1 we can guarantee only scalar access - const auto subtensor_shape = std::vector{1}; + const auto subtensor = std::vector{1}; const auto& layout = order->cast_vector(); // todo: LoadReshape used here is essentially Load + an easy way to maintain correct shape propagation // fix this in future and develop a more consistent shape propagation approach. - auto load = std::make_shared(data_input, subtensor_shape[0], 0, layout); - auto store = std::make_shared(load, subtensor_shape[0]); + auto load = std::make_shared(data_input, subtensor[0], 0, layout); + auto store = std::make_shared(load, subtensor[0]); - PortManager::set_port_descriptor_ptr(load->input(0), std::make_shared(load->get_input_shape(0), subtensor_shape, layout)); - PortManager::set_port_descriptor_ptr(load->output(0), std::make_shared(load->get_output_shape(0), subtensor_shape)); - PortManager::set_port_descriptor_ptr(store->input(0), std::make_shared(store->get_input_shape(0), subtensor_shape)); - PortManager::set_port_descriptor_ptr(store->output(0), std::make_shared(store->get_output_shape(0), subtensor_shape)); + lowered::PortManager::set_port_descriptor_ptr(load->input(0), std::make_shared(load->get_input_shape(0), subtensor, layout)); + lowered::PortManager::set_port_descriptor_ptr(load->output(0), std::make_shared(load->get_output_shape(0), subtensor)); + lowered::PortManager::set_port_descriptor_ptr(store->input(0), std::make_shared(store->get_input_shape(0), subtensor)); + lowered::PortManager::set_port_descriptor_ptr(store->output(0), std::make_shared(store->get_output_shape(0), subtensor)); for (auto& input : transpose->output(0).get_target_inputs()) { input.replace_source_output(store->output(0)); diff --git a/src/common/snippets/src/utils.cpp b/src/common/snippets/src/utils.cpp index ca6c4e4dd3f182..e64aa000028b9b 100644 --- a/src/common/snippets/src/utils.cpp +++ b/src/common/snippets/src/utils.cpp @@ -86,12 +86,12 @@ ov::PartialShape get_reordered_planar_shape(const ov::PartialShape& shape, const } ov::PartialShape get_port_planar_shape(const Input& in) { - const auto& port = PortManager::get_port_descriptor_ptr(in); + const auto& port = lowered::PortManager::get_port_descriptor_ptr(in); return utils::get_reordered_planar_shape(ov::Shape{port->get_shape()}, port->get_layout()); } ov::PartialShape get_port_planar_shape(const Output& out) { - const auto& port = PortManager::get_port_descriptor_ptr(out); + const auto& port = lowered::PortManager::get_port_descriptor_ptr(out); return utils::get_reordered_planar_shape(ov::Shape{port->get_shape()}, port->get_layout()); } diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp index 74b3513b68f093..8d072f3ca03645 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp @@ -750,9 +750,11 @@ BrgemmEmitter::BrgemmEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl: std::vector> brgemm_inputs = {brgemm_node->input(0), brgemm_copy ? brgemm_copy->input(0) : brgemm_node->input(1)}; for (const auto& input : brgemm_inputs) { - init_scheduling_params(ngraph::snippets::PortManager::get_port_descriptor_ptr(input)->get_layout(), input.get_shape()); + init_scheduling_params(ngraph::snippets::lowered::PortManager::get_port_descriptor_ptr(input)->get_layout(), + input.get_shape()); } - init_scheduling_params(ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm_node->output(0))->get_layout(), brgemm_node->output(0).get_shape()); + init_scheduling_params(ngraph::snippets::lowered::PortManager::get_port_descriptor_ptr(brgemm_node->output(0))->get_layout(), + brgemm_node->output(0).get_shape()); const auto& A_shape = brgemm_node->get_input_shape(0); const auto& A_layout = io_layouts[0]; @@ -1107,7 +1109,7 @@ BrgemmCopyBEmitter::BrgemmCopyBEmitter(dnnl::impl::cpu::x64::jit_generator* h, d if (m_with_comp) m_comp_offset = brgemm_repack->get_offset_compensations(); - const auto& layout = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm_repack->input(0))->get_layout(); + const auto& layout = ngraph::snippets::lowered::PortManager::get_port_descriptor_ptr(brgemm_repack->input(0))->get_layout(); const auto& original_shape = brgemm_repack->get_input_shape(0); auto transposed_shape = original_shape; size_t leading_dimension = *(original_shape.rbegin()); diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp index 4d1b94f7f61b85..f0f7ea4adc1a67 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp @@ -80,7 +80,7 @@ std::shared_ptr intel_cpu::BrgemmCopyB::clone_with_new_inputs(const Output get_offset_in(), get_offset_out(), is_with_compensations() ? get_offset_compensations() : 0, - ngraph::snippets::PortManager::get_port_descriptor_ptr(input(0))->get_layout()); + ngraph::snippets::lowered::PortManager::get_port_descriptor_ptr(input(0))->get_layout()); } size_t intel_cpu::BrgemmCopyB::get_offset_compensations() const { diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp index d55b05ac7db6cd..a6cd0201437a3b 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp @@ -5,7 +5,7 @@ #include "brgemm_cpu.hpp" #include "snippets/itt.hpp" #include "snippets/utils.hpp" -#include "snippets/port_descriptor.hpp" +#include "snippets/lowered/port_descriptor.hpp" #include "utils/general_utils.h" @@ -110,15 +110,15 @@ std::shared_ptr BrgemmCPU::clone_with_new_inputs(const OutputVector& new_a if (!is_with_scratchpad()) { new_node = std::make_shared(new_args.at(0), new_args.at(1), m_type, get_offset_a(), get_offset_b(), get_offset_c(), - ngraph::snippets::PortManager::get_port_descriptor_ptr(input(0))->get_layout(), - ngraph::snippets::PortManager::get_port_descriptor_ptr(input(1))->get_layout(), - ngraph::snippets::PortManager::get_port_descriptor_ptr(output(0))->get_layout()); + ngraph::snippets::lowered::PortManager::get_port_descriptor_ptr(input(0))->get_layout(), + ngraph::snippets::lowered::PortManager::get_port_descriptor_ptr(input(1))->get_layout(), + ngraph::snippets::lowered::PortManager::get_port_descriptor_ptr(output(0))->get_layout()); } else { new_node = std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2), m_type, get_offset_a(), get_offset_b(), get_offset_scratch(), get_offset_c(), - ngraph::snippets::PortManager::get_port_descriptor_ptr(input(0))->get_layout(), - ngraph::snippets::PortManager::get_port_descriptor_ptr(input(1))->get_layout(), - ngraph::snippets::PortManager::get_port_descriptor_ptr(output(0))->get_layout()); + ngraph::snippets::lowered::PortManager::get_port_descriptor_ptr(input(0))->get_layout(), + ngraph::snippets::lowered::PortManager::get_port_descriptor_ptr(input(1))->get_layout(), + ngraph::snippets::lowered::PortManager::get_port_descriptor_ptr(output(0))->get_layout()); } return new_node; } diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp index 615ed623faafce..2f744fe50e55c7 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.hpp @@ -7,7 +7,7 @@ #include "snippets/op/brgemm.hpp" #include "brgemm_copy_b.hpp" -#include "snippets/port_descriptor.hpp" +#include "snippets/lowered/port_descriptor.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp index 10cdb4fa9cad83..35aa9391bd33e9 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp @@ -22,19 +22,21 @@ namespace ov { namespace intel_cpu { + +using namespace ngraph::snippets::lowered; + namespace { inline std::vector make_subtensor(const ov::Shape& tensor) { - return std::vector(std::min(tensor.size(), 2lu), ngraph::snippets::PortDescriptor::Scheduling::FULL_DIM); + return std::vector(std::min(tensor.size(), 2lu), PortDescriptor::Scheduling::FULL_DIM); } template void set_full_port_desc(const T& port) { const auto& shape = port.get_shape(); - ngraph::snippets::PortManager::set_port_descriptor_ptr(port, std::make_shared(shape, - make_subtensor(shape))); + PortManager::set_port_descriptor_ptr(port, std::make_shared(shape, make_subtensor(shape))); } template void set_port_desc(const T& port, Args... params) { - ngraph::snippets::PortManager::set_port_descriptor_ptr(port, std::make_shared(params...)); + PortManager::set_port_descriptor_ptr(port, std::make_shared(params...)); } } // namespace @@ -55,9 +57,9 @@ pass::BrgemmToBrgemmCPU::BrgemmToBrgemmCPU() { return false; } - const auto& brgemm_in0_desc = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm->input(0)); - const auto& brgemm_in1_desc = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm->input(1)); - const auto& brgemm_out_desc = ngraph::snippets::PortManager::get_port_descriptor_ptr(brgemm->output(0)); + const auto& brgemm_in0_desc = PortManager::get_port_descriptor_ptr(brgemm->input(0)); + const auto& brgemm_in1_desc = PortManager::get_port_descriptor_ptr(brgemm->input(1)); + const auto& brgemm_out_desc = PortManager::get_port_descriptor_ptr(brgemm->output(0)); const auto dimsMatMulIn0 = ngraph::snippets::utils::get_port_planar_shape(brgemm->input_value(0)).get_shape(); const auto dimsMatMulIn1 = ngraph::snippets::utils::get_port_planar_shape(brgemm->input_value(1)).get_shape(); diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp index 6d024d7cd7bf14..0a95316a5c59df 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp @@ -44,12 +44,9 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_load_convert(ngraph::snippe OPENVINO_THROW("Type of Convert op is undefined. Supports only fusing Load and ConvertTruncation or ConvertSaturation ops"); } - const auto& convert_out = convert_expr->get_output_tensor(0); - const auto convert_consumers = convert_out->get_consumers(); - ngraph::snippets::PortManager::set_port_descriptor_ptr(load_convert->output(0), - std::make_shared(convert_out->get_shape(), - convert_out->get_subtensor(), - convert_out->get_layout())); + const auto out_port = convert_expr->get_output_port(0); + const auto convert_consumers = out_port.get_connected_ports(); + ngraph::snippets::lowered::PortManager::set_port_descriptor_ptr(load_convert->output(0), out_port.get_descriptor_ptr()->clone()); const auto load_convert_expr = linear_ir.create_expression(load_convert, { load_expr->get_input_tensor(0) }); const auto convert_expr_it = convert_it; const auto insertion_pos = std::next(convert_it); @@ -92,12 +89,9 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert(ngraph::snipp OPENVINO_THROW("Type of Convert op is undefined. Supports only fusing Store and ConvertTruncation or ConvertSaturation ops"); } - const auto& store_out = store_expr->get_output_tensor(0); - const auto store_consumers = store_out->get_consumers(); - ngraph::snippets::PortManager::set_port_descriptor_ptr(store_convert->output(0), - std::make_shared(store_out->get_shape(), - store_out->get_subtensor(), - store_out->get_layout())); + const auto out_port = store_expr->get_output_port(0); + const auto store_consumers = out_port.get_connected_ports(); + ngraph::snippets::lowered::PortManager::set_port_descriptor_ptr(store_convert->output(0), out_port.get_descriptor_ptr()->clone()); const auto store_convert_expr = linear_ir.create_expression(store_convert, { input_td }); const auto convert_expr_it = convert_it; const auto insertion_pos = std::next(convert_it); diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp index 2a00ffc7b326c2..09a5cbce0a3424 100644 --- a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp +++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp @@ -80,7 +80,7 @@ std::shared_ptr Transpose0213MatMulLoweredFunction::initLowered() con // Note: validity of transpose_position values is checked in Transpose0213MatMulSinhFunction constructor if (transpose_position < 2) { const auto& anchor = data[transpose_position]->output(0); - const auto& td = ngraph::snippets::PortManager::get_port_descriptor_ptr(anchor); + const auto& td = ngraph::snippets::lowered::PortManager::get_port_descriptor_ptr(anchor); const auto& tensor = td->get_shape(); const auto& subtensor = td->get_subtensor(); } @@ -90,19 +90,23 @@ std::shared_ptr Transpose0213MatMulLoweredFunction::initLowered() con auto result = std::make_shared(matmul); if (transpose_position == 2) { const auto& anchor = matmul->output(0); - const auto& td = ngraph::snippets::PortManager::get_port_descriptor_ptr(anchor); + const auto& td = ngraph::snippets::lowered::PortManager::get_port_descriptor_ptr(anchor); const auto& tensor = td->get_shape(); const auto& subtensor = td->get_subtensor(); - ngraph::snippets::PortManager::set_port_descriptor_ptr(anchor, - std::make_shared(tensor, subtensor, layout)); + ngraph::snippets::lowered::PortManager::set_port_descriptor_ptr(anchor, + std::make_shared(tensor, + subtensor, + layout)); } if (transpose_position < 2) { const auto& anchor = data[transpose_position]->output(0); - const auto& td = ngraph::snippets::PortManager::get_port_descriptor_ptr(anchor); + const auto& td = ngraph::snippets::lowered::PortManager::get_port_descriptor_ptr(anchor); const auto& tensor = td->get_shape(); const auto& subtensor = td->get_subtensor(); - ngraph::snippets::PortManager::set_port_descriptor_ptr(matmul->input(transpose_position), - std::make_shared(tensor, subtensor, layout)); + ngraph::snippets::lowered::PortManager::set_port_descriptor_ptr(matmul->input(transpose_position), + std::make_shared(tensor, + subtensor, + layout)); } matmul->validate_and_infer_types(); return std::make_shared(NodeVector{matmul}, data); From c623cebb379356ce798954e033ffaef6dc89f56d Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Thu, 11 May 2023 10:42:54 +0400 Subject: [PATCH 12/13] Removed PortDesc getters and setters from ExpressionPort and Tensor --- .../include/snippets/lowered/expression.hpp | 5 +++++ .../snippets/lowered/expression_port.hpp | 7 ------- .../include/snippets/lowered/tensor.hpp | 5 ----- .../snippets/src/lowered/expression.cpp | 9 +++++++++ .../snippets/src/lowered/expression_port.cpp | 20 ------------------- .../snippets/src/lowered/loop_manager.cpp | 5 +++-- .../snippets/src/lowered/pass/init_loops.cpp | 18 ++++++++--------- .../src/lowered/pass/insert_buffers.cpp | 2 +- .../src/lowered/pass/propagate_layout.cpp | 6 +++--- .../lowered/pass/softmax_decomposition.cpp | 2 +- .../src/lowered/pass/vector_to_scalar.cpp | 11 +++++----- .../emitters/x64/jit_snippets_emitters.cpp | 10 +++++----- 12 files changed, 42 insertions(+), 58 deletions(-) diff --git a/src/common/snippets/include/snippets/lowered/expression.hpp b/src/common/snippets/include/snippets/lowered/expression.hpp index 4c761d8335bcb7..3be336599bfdcd 100644 --- a/src/common/snippets/include/snippets/lowered/expression.hpp +++ b/src/common/snippets/include/snippets/lowered/expression.hpp @@ -40,6 +40,11 @@ class Expression : public std::enable_shared_from_this { std::vector get_input_tensors() const { return m_input_tensors; } std::vector get_output_tensors() const { return m_output_tensors; } + const PortDescriptorPtr& get_input_port_descriptor(size_t i) const; + const PortDescriptorPtr& get_output_port_descriptor(size_t i) const; + std::vector get_input_port_descriptors() const { return m_input_port_descriptors; } + std::vector get_output_port_descriptors() const { return m_output_port_descriptors; } + size_t get_input_count() const { return m_input_tensors.size(); } size_t get_output_count() const { return m_output_tensors.size(); } diff --git a/src/common/snippets/include/snippets/lowered/expression_port.hpp b/src/common/snippets/include/snippets/lowered/expression_port.hpp index 99ecdc58fa31fd..bb4ce7366a9a03 100644 --- a/src/common/snippets/include/snippets/lowered/expression_port.hpp +++ b/src/common/snippets/include/snippets/lowered/expression_port.hpp @@ -30,9 +30,6 @@ class ExpressionPort { Type get_type() const { return m_type; } size_t get_index() const { return m_port_index; } - std::vector get_shape() const; - std::vector get_layout() const; - std::vector get_subtensor() const; const PortDescriptorPtr& get_descriptor_ptr() const; const std::shared_ptr& get_tensor_ptr() const; // Returns connected ports to the current: @@ -40,10 +37,6 @@ class ExpressionPort { // - Output port returns all consumer ports (children) std::set get_connected_ports() const; - void set_shape(const std::vector& tensor); - void set_layout(const std::vector& layout); - void set_subtensor(const std::vector& subtensor); - friend bool operator==(const ExpressionPort& lhs, const ExpressionPort& rhs); friend bool operator!=(const ExpressionPort& lhs, const ExpressionPort& rhs); friend bool operator<(const ExpressionPort& lhs, const ExpressionPort& rhs); diff --git a/src/common/snippets/include/snippets/lowered/tensor.hpp b/src/common/snippets/include/snippets/lowered/tensor.hpp index d1e97e066b00f7..97a091c6258d41 100644 --- a/src/common/snippets/include/snippets/lowered/tensor.hpp +++ b/src/common/snippets/include/snippets/lowered/tensor.hpp @@ -32,11 +32,6 @@ class Tensor { std::set::const_iterator find_consumer(const ExpressionPort& consumer) const; std::set::iterator find_consumer(const ExpressionPort& consumer); - // The scheduling params of Tensor is controlled by source expression port - std::vector get_shape() const { return m_source_port.get_shape(); } - std::vector get_layout() const { return m_source_port.get_layout(); } - std::vector get_subtensor() const { return m_source_port.get_subtensor(); } - private: ExpressionPort m_source_port; std::set m_consumer_ports; diff --git a/src/common/snippets/src/lowered/expression.cpp b/src/common/snippets/src/lowered/expression.cpp index fedd2f6e46605a..dffc8e03c74355 100644 --- a/src/common/snippets/src/lowered/expression.cpp +++ b/src/common/snippets/src/lowered/expression.cpp @@ -37,6 +37,15 @@ const TensorPtr& Expression::get_output_tensor(size_t i) const { return m_output_tensors[i]; } +const PortDescriptorPtr& Expression::get_input_port_descriptor(size_t i) const { + OPENVINO_ASSERT(i < m_input_port_descriptors.size(), "Failed to get input port descriptor: target input port must be less than input count!"); + return m_input_port_descriptors[i]; +} +const PortDescriptorPtr& Expression::get_output_port_descriptor(size_t i) const { + OPENVINO_ASSERT(i < m_output_port_descriptors.size(), "Failed to get output port descriptor: target output port must be less than output count!"); + return m_output_port_descriptors[i]; +} + std::shared_ptr Expression::get_node() const { if (!m_source_node) OPENVINO_THROW("An attempt to get uninitialized node from lowered expression"); diff --git a/src/common/snippets/src/lowered/expression_port.cpp b/src/common/snippets/src/lowered/expression_port.cpp index 08aeae9f533551..d16a12e0da6287 100644 --- a/src/common/snippets/src/lowered/expression_port.cpp +++ b/src/common/snippets/src/lowered/expression_port.cpp @@ -38,26 +38,6 @@ std::set ExpressionPort::get_connected_ports() const { OPENVINO_THROW("ExpressionPort supports only Input and Output types"); } -std::vector ExpressionPort::get_shape() const { - return get_descriptor_ptr()->get_shape(); -} -std::vector ExpressionPort::get_layout() const { - return get_descriptor_ptr()->get_layout(); -} -std::vector ExpressionPort::get_subtensor() const { - return get_descriptor_ptr()->get_subtensor(); -} - -void ExpressionPort::set_shape(const std::vector& tensor) { - get_descriptor_ptr()->set_shape(tensor); -} -void ExpressionPort::set_layout(const std::vector& layout) { - get_descriptor_ptr()->set_layout(layout); -} -void ExpressionPort::set_subtensor(const std::vector& subtensor) { - get_descriptor_ptr()->set_subtensor(subtensor); -} - bool operator==(const ExpressionPort& lhs, const ExpressionPort& rhs) { if (&lhs == &rhs) return true; diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp index 225d6e4b129150..93094b48dae805 100644 --- a/src/common/snippets/src/lowered/loop_manager.cpp +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -131,8 +131,9 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, std::vector loop_subtensor; std::vector loop_tensor(loop_depth, 1); for (const auto& exit_point : loop_exit_points) { - const auto tensor = utils::get_reordered_planar_shape(ov::PartialShape(exit_point.get_shape()), exit_point.get_layout()).get_shape(); - auto subtensor = exit_point.get_subtensor(); + const auto& desc = exit_point.get_descriptor_ptr(); + const auto tensor = utils::get_reordered_planar_shape(ov::PartialShape(desc->get_shape()), desc->get_layout()).get_shape(); + auto subtensor = desc->get_subtensor(); if (subtensor.empty()) { subtensor.resize(loop_depth, 1); subtensor[subtensor.size() - 1] = vector_size; diff --git a/src/common/snippets/src/lowered/pass/init_loops.cpp b/src/common/snippets/src/lowered/pass/init_loops.cpp index daae16b1f25c09..550a4b7e7b9552 100644 --- a/src/common/snippets/src/lowered/pass/init_loops.cpp +++ b/src/common/snippets/src/lowered/pass/init_loops.cpp @@ -72,14 +72,14 @@ std::vector InitLoops::init_ptr_increments(const std::vectorget_layout(); + const auto& shape = loop_input.get_descriptor_ptr()->get_shape(); const auto& dim = *(layout.rbegin() + dim_idx); max_relevant_dim_size = std::max(shape[dim], max_relevant_dim_size); } for (const auto& loop_output : loop_outputs) { - const auto& layout = loop_output.get_layout(); - const auto& shape = loop_output.get_shape(); + const auto& layout = loop_output.get_descriptor_ptr()->get_layout(); + const auto& shape = loop_output.get_descriptor_ptr()->get_shape(); const auto& dim = *(layout.rbegin() + dim_idx); max_relevant_dim_size = std::max(shape[dim], max_relevant_dim_size); } @@ -87,19 +87,19 @@ std::vector InitLoops::init_ptr_increments(const std::vectorget_layout(); + const auto& shape = loop_input.get_descriptor_ptr()->get_shape(); const auto& dim = *(layout.rbegin() + dim_idx); int64_t ptr_increment = 0; // If relevant dim is not broadcasted, then ptr_increment is the dim stride in the new layout if (!(shape[dim] == 1 && max_relevant_dim_size != 1)) - ptr_increment = get_dim_stride(dim, source.get_layout(), shape); + ptr_increment = get_dim_stride(dim, source.get_descriptor_ptr()->get_layout(), shape); ptr_increments.push_back(ptr_increment); } for (const auto& loop_output : loop_outputs) { - const auto& layout = loop_output.get_layout(); - const auto& shape = loop_output.get_shape(); + const auto& layout = loop_output.get_descriptor_ptr()->get_layout(); + const auto& shape = loop_output.get_descriptor_ptr()->get_shape(); const auto& dim = *(layout.rbegin() + dim_idx); int64_t ptr_increment = 0; // If relevant dim is not broadcasted, then ptr_increment is the dim stride in the new layout diff --git a/src/common/snippets/src/lowered/pass/insert_buffers.cpp b/src/common/snippets/src/lowered/pass/insert_buffers.cpp index 11517170c372b0..4958a8552d5133 100644 --- a/src/common/snippets/src/lowered/pass/insert_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/insert_buffers.cpp @@ -19,7 +19,7 @@ InsertBuffers::InsertBuffers(int32_t buffer_allocation_rank) : Transformation(), m_buffer_allocation_rank(buffer_allocation_rank) {} LinearIR::constExprIt InsertBuffers::insertion_position(const LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, - const ExpressionPtr& up_expr, const ExpressionPtr& down_expr) { + const ExpressionPtr& up_expr, const ExpressionPtr& down_expr) { const auto up_loops = up_expr->get_loop_ids(); const auto down_loops = down_expr->get_loop_ids(); OPENVINO_ASSERT(up_loops.size() == down_loops.size(), "The Loop IDs must be normalized!"); diff --git a/src/common/snippets/src/lowered/pass/propagate_layout.cpp b/src/common/snippets/src/lowered/pass/propagate_layout.cpp index 7ce9c5a4fbbfaa..3a12b59a8e173b 100644 --- a/src/common/snippets/src/lowered/pass/propagate_layout.cpp +++ b/src/common/snippets/src/lowered/pass/propagate_layout.cpp @@ -43,13 +43,13 @@ bool PropagateLayout::run(LinearIR& linear_ir) { const auto& n = child->get_node(); const auto ma = ov::as_type_ptr(n); if (ma && ma->is_memory_access_input_port(port)) { - child_layouts.insert(child_input.get_layout()); + child_layouts.insert(child_input.get_descriptor_ptr()->get_layout()); } } OPENVINO_ASSERT(child_layouts.size() == 1, "All children of an input expression must have the same layout"); - io_expr->get_output_port(0).set_layout(*child_layouts.begin()); + io_expr->get_output_port_descriptor(0)->set_layout(*child_layouts.begin()); } else { - io_expr->get_input_port(0).set_layout(target_tensor->get_layout()); + io_expr->get_input_port_descriptor(0)->set_layout(target_tensor->get_source().get_descriptor_ptr()->get_layout()); } } diff --git a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp index 0661101f43d026..576f2915dded4d 100644 --- a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp +++ b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp @@ -38,7 +38,7 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { const auto softmax_loop_ids = softmax_expr->get_loop_ids(); const auto& input_tensor = softmax_expr->get_input_tensor(0); const auto& output_tensor = softmax_expr->get_output_tensor(0); - const auto tensor_out = output_tensor->get_shape(); + const auto tensor_out = softmax_expr->get_output_port_descriptor(0)->get_shape(); const auto inner_work_amount = *(tensor_out.rbegin()); expr_it = linear_ir.erase(expr_it); // Remove Softmax diff --git a/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp b/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp index eb65d05bfc4357..320c9fdb5af9ad 100644 --- a/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp +++ b/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp @@ -19,14 +19,15 @@ bool SetScalarCountForLoadStore::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::SetScalarCountForLoadStore") bool modified = false; for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) { - const auto& op = expr_it->get()->get_node(); + const auto& expr = *expr_it; + const auto& op = expr->get_node(); const auto load = ov::as_type_ptr(op); const auto store = ov::as_type_ptr(op); if (load || store) { - const auto& tensor = load ? (*expr_it)->get_input_tensor(0) - : (*expr_it)->get_output_tensor(0); - const auto& layout = tensor->get_layout(); - const auto& tensor_shape = tensor->get_shape(); + const auto& layout = load ? expr->get_input_port_descriptor(0)->get_layout() + : expr->get_output_port_descriptor(0)->get_layout(); + const auto& tensor_shape = load ? expr->get_input_port_descriptor(0)->get_shape() + : expr->get_output_port_descriptor(0)->get_shape(); // Find last dimension by layout const auto last_dim_idx = std::find(layout.begin(), layout.end(), layout.size() - 1); OPENVINO_ASSERT(last_dim_idx != layout.end(), "Load/Store expression have incorrect layout"); diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp index 8d072f3ca03645..b69a160b807a6f 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp @@ -121,26 +121,26 @@ KernelEmitter::KernelEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl: num_inputs = 0; num_outputs = 0; for (const auto& expr : io_exprs) { - TensorPtr td {}; + ngraph::snippets::lowered::PortDescriptorPtr desc = nullptr; element::Type etype; switch (expr->get_type()) { case ngraph::snippets::lowered::IOExpression::io_type::INPUT: { - td = expr->get_output_tensor(0); + desc = expr->get_output_port_descriptor(0); etype = expr->get_node()->get_output_element_type(0); num_inputs++; break; } case ngraph::snippets::lowered::IOExpression::io_type::OUTPUT: { num_outputs++; - td = expr->get_input_tensor(0); + desc = expr->get_input_port_descriptor(0); etype = expr->get_node()->get_input_element_type(0); break; } default : { IE_THROW() << "Kernel detected unsupported io_type"; } } - io_shapes.push_back(td->get_shape()); - io_data_layouts.push_back(td->get_layout()); + io_shapes.push_back(desc->get_shape()); + io_data_layouts.push_back(desc->get_layout()); io_data_sizes.push_back(etype.size()); } From 2288a64816820e53c2cab43f2221facaf65f967b Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Thu, 11 May 2023 11:01:43 +0400 Subject: [PATCH 13/13] Applied comments --- .../include/snippets/lowered/port_descriptor.hpp | 2 +- src/common/snippets/include/snippets/op/brgemm.hpp | 7 ++++--- src/common/snippets/src/lowered/loop_manager.cpp | 8 ++++---- src/common/snippets/src/lowered/port_descriptor.cpp | 2 +- src/common/snippets/src/op/brgemm.cpp | 6 +++--- src/common/snippets/src/pass/matmul_to_brgemm.cpp | 2 +- src/common/snippets/src/pass/set_softmax_ports.cpp | 2 +- .../transformations/snippets/x64/op/brgemm_copy_b.cpp | 2 +- .../src/transformations/snippets/x64/op/brgemm_cpu.cpp | 10 +++++----- .../snippets/x64/pass/brgemm_to_brgemm_cpu.cpp | 2 +- 10 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/common/snippets/include/snippets/lowered/port_descriptor.hpp b/src/common/snippets/include/snippets/lowered/port_descriptor.hpp index 77d41814072655..516512b8e655cb 100644 --- a/src/common/snippets/include/snippets/lowered/port_descriptor.hpp +++ b/src/common/snippets/include/snippets/lowered/port_descriptor.hpp @@ -17,7 +17,7 @@ using PortDescriptorPtr = std::shared_ptr; class PortDescriptor { public: // The structure with service values for scheduling parameters - struct Scheduling { + struct ServiceDimensions { // The value for the subtensor that means that scheduling should be by full dimension static size_t FULL_DIM; }; diff --git a/src/common/snippets/include/snippets/op/brgemm.hpp b/src/common/snippets/include/snippets/op/brgemm.hpp index f207cafa1ad43c..7ddcdb6975332a 100644 --- a/src/common/snippets/include/snippets/op/brgemm.hpp +++ b/src/common/snippets/include/snippets/op/brgemm.hpp @@ -34,13 +34,14 @@ class Brgemm : public MemoryAccess { bool has_evaluate() const override { return false; } protected: - void custom_constructor_validate_and_infer_types(std::vector layout_a, std::vector layout_b, std::vector layout_c); - void validate_inputs() const; - ov::element::Type get_output_type() const; std::vector get_planar_input_shapes(const std::vector>& inputs) const; ov::PartialShape get_output_partial_shape(const std::vector& input_shapes) const; ov::PartialShape get_planar_output_shape(const ov::PartialShape& output_shape) const; + +private: + void custom_constructor_validate_and_infer_types(std::vector layout_a, std::vector layout_b, std::vector layout_c); + void validate_inputs() const; }; } // namespace op diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp index 93094b48dae805..2e6d41fbde580f 100644 --- a/src/common/snippets/src/lowered/loop_manager.cpp +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -125,7 +125,7 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, }; auto is_outside_loop = [](const std::vector& subtensor) { - return std::all_of(subtensor.begin(), subtensor.end(), [](size_t lhs) { return lhs == PortDescriptor::Scheduling::FULL_DIM; }); + return std::all_of(subtensor.begin(), subtensor.end(), [](size_t lhs) { return lhs == PortDescriptor::ServiceDimensions::FULL_DIM; }); }; std::vector loop_subtensor; @@ -139,7 +139,7 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, subtensor[subtensor.size() - 1] = vector_size; } - const size_t resizing_value = is_outside_loop(subtensor) ? PortDescriptor::Scheduling::FULL_DIM : 1; + const size_t resizing_value = is_outside_loop(subtensor) ? PortDescriptor::ServiceDimensions::FULL_DIM : 1; while (subtensor.size() < loop_depth) subtensor.insert(subtensor.begin(), resizing_value); if (loop_subtensor.empty()) @@ -149,14 +149,14 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, "Incorrect scheduling parameters for loop"); for (size_t dim_idx = 0; dim_idx < loop_depth; ++dim_idx) { - if (*(subtensor.rbegin() + dim_idx) != PortDescriptor::Scheduling::FULL_DIM) { + if (*(subtensor.rbegin() + dim_idx) != PortDescriptor::ServiceDimensions::FULL_DIM) { broadcast(loop_tensor, tensor, dim_idx); } } } for (size_t dim_idx = 0; dim_idx < loop_depth; ++dim_idx) { - if (*(loop_subtensor.rbegin() + dim_idx) == PortDescriptor::Scheduling::FULL_DIM) { + if (*(loop_subtensor.rbegin() + dim_idx) == PortDescriptor::ServiceDimensions::FULL_DIM) { exprs_marking(loop_begin_pos, loop_end_pos, Expression::LOOP_NULL_ID, loop_depth - dim_idx - 1); continue; } diff --git a/src/common/snippets/src/lowered/port_descriptor.cpp b/src/common/snippets/src/lowered/port_descriptor.cpp index c37634e06df670..9b3591660eb720 100644 --- a/src/common/snippets/src/lowered/port_descriptor.cpp +++ b/src/common/snippets/src/lowered/port_descriptor.cpp @@ -8,7 +8,7 @@ namespace ngraph { namespace snippets { namespace lowered { -size_t PortDescriptor::Scheduling::FULL_DIM = SIZE_MAX; +size_t PortDescriptor::ServiceDimensions::FULL_DIM = SIZE_MAX; PortDescriptor::PortDescriptor(const ov::Input& in, std::vector subtensor_shape, std::vector layout) : PortDescriptor(ov::Input(in.get_node(), in.get_index()), std::move(subtensor_shape), std::move(layout)) {} diff --git a/src/common/snippets/src/op/brgemm.cpp b/src/common/snippets/src/op/brgemm.cpp index 5cb41b3e7c2787..b647835abe9e04 100644 --- a/src/common/snippets/src/op/brgemm.cpp +++ b/src/common/snippets/src/op/brgemm.cpp @@ -30,10 +30,10 @@ void Brgemm::custom_constructor_validate_and_infer_types(std::vector lay // During ctor call, Brgemm doesn't know his port descriptors. // So we use explicit layouts from parameters const auto planar_input_shapes = - std::vector{ ngraph::snippets::utils::get_reordered_planar_shape(get_input_partial_shape(0), std::move(layout_a)), - ngraph::snippets::utils::get_reordered_planar_shape(get_input_partial_shape(1), std::move(layout_b)) }; + std::vector{ ngraph::snippets::utils::get_reordered_planar_shape(get_input_partial_shape(0), layout_a), + ngraph::snippets::utils::get_reordered_planar_shape(get_input_partial_shape(1), layout_b) }; auto output_shape = get_output_partial_shape(planar_input_shapes); - set_output_type(0, get_output_type(), ngraph::snippets::utils::get_reordered_planar_shape(output_shape, std::move(layout_c))); + set_output_type(0, get_output_type(), ngraph::snippets::utils::get_reordered_planar_shape(output_shape, layout_c)); } void Brgemm::validate_inputs() const { diff --git a/src/common/snippets/src/pass/matmul_to_brgemm.cpp b/src/common/snippets/src/pass/matmul_to_brgemm.cpp index 58417c634684af..4ceca5802233ed 100644 --- a/src/common/snippets/src/pass/matmul_to_brgemm.cpp +++ b/src/common/snippets/src/pass/matmul_to_brgemm.cpp @@ -19,7 +19,7 @@ namespace pass { void MatMulToBrgemm::init_ports(const std::shared_ptr& brgemm) const { auto get_subtensor = [](const ov::Shape& shape) { - return std::vector{ lowered::PortDescriptor::Scheduling::FULL_DIM, lowered::PortDescriptor::Scheduling::FULL_DIM }; + return std::vector{ lowered::PortDescriptor::ServiceDimensions::FULL_DIM, lowered::PortDescriptor::ServiceDimensions::FULL_DIM }; }; for (const auto& input : brgemm->inputs()) { const auto tensor = input.get_shape(); diff --git a/src/common/snippets/src/pass/set_softmax_ports.cpp b/src/common/snippets/src/pass/set_softmax_ports.cpp index 5e0aeae4da6ee8..09737e69cb4646 100644 --- a/src/common/snippets/src/pass/set_softmax_ports.cpp +++ b/src/common/snippets/src/pass/set_softmax_ports.cpp @@ -46,7 +46,7 @@ ngraph::snippets::pass::SetSoftmaxPorts::SetSoftmaxPorts() { OPENVINO_ASSERT(axis < static_cast(rank), "Softmax has incorrect axis"); std::vector subtensor(rank, 1); for (size_t i = axis; i < rank; ++i) - subtensor[i] = lowered::PortDescriptor::Scheduling::FULL_DIM; + subtensor[i] = lowered::PortDescriptor::ServiceDimensions::FULL_DIM; lowered::PortManager::set_port_descriptor_ptr(root->input(0), std::make_shared(root->input(0), subtensor)); lowered::PortManager::set_port_descriptor_ptr(root->output(0), std::make_shared(root->output(0), subtensor)); diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp index f0f7ea4adc1a67..201ea3d23214b2 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_copy_b.cpp @@ -36,7 +36,7 @@ void intel_cpu::BrgemmCopyB::custom_constructor_validate_and_infer_types(std::ve // During ctor call, BrgemmCopyB doesn't know his port descriptors. // So we use port descs from source inputs const auto element_type = get_input_element_type(0); - const auto pshape = ngraph::snippets::utils::get_reordered_planar_shape(get_input_partial_shape(0), std::move(layout_input)); + const auto pshape = ngraph::snippets::utils::get_reordered_planar_shape(get_input_partial_shape(0), layout_input); validate(pshape, element_type); } diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp index a6cd0201437a3b..12fc4b0d2bc821 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp @@ -23,7 +23,7 @@ BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Type ty set_input_port_descriptor({0, offset_a}, 0); set_input_port_descriptor({0, offset_b}, 1); set_output_port_descriptor({0, offset_c}, 0); - custom_constructor_validate_and_infer_types(layout_a, layout_b, layout_c); + custom_constructor_validate_and_infer_types(std::move(layout_a), std::move(layout_b), std::move(layout_c)); } BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Output& scratch, const Type type, @@ -37,7 +37,7 @@ BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Output< set_input_port_descriptor({0, offset_b}, 1); set_output_port_descriptor({0, offset_c}, 0); set_input_port_descriptor({0, offset_scratch}, 2); - custom_constructor_validate_and_infer_types(layout_a, layout_b, layout_c); + custom_constructor_validate_and_infer_types(std::move(layout_a), std::move(layout_b), std::move(layout_c)); } void BrgemmCPU::custom_constructor_validate_and_infer_types(std::vector layout_a, std::vector layout_b, std::vector layout_c) { @@ -48,11 +48,11 @@ void BrgemmCPU::custom_constructor_validate_and_infer_types(std::vector // So we use port descs from source inputs const auto brgemm_copy = is_with_data_repacking() ? get_brgemm_copy() : nullptr; const auto planar_input_shapes = - std::vector{ ngraph::snippets::utils::get_reordered_planar_shape(get_input_partial_shape(0), std::move(layout_a)), + std::vector{ ngraph::snippets::utils::get_reordered_planar_shape(get_input_partial_shape(0), layout_a), brgemm_copy ? ngraph::snippets::utils::get_port_planar_shape(brgemm_copy->input(0)) - : ngraph::snippets::utils::get_reordered_planar_shape(get_input_partial_shape(1), std::move(layout_b)) }; + : ngraph::snippets::utils::get_reordered_planar_shape(get_input_partial_shape(1), layout_b) }; auto output_shape = get_output_partial_shape(planar_input_shapes); - set_output_type(0, get_output_type(), ngraph::snippets::utils::get_reordered_planar_shape(output_shape, std::move(layout_c))); + set_output_type(0, get_output_type(), ngraph::snippets::utils::get_reordered_planar_shape(output_shape, layout_c)); //Additional check for 3rd input validate_with_scratchpad(planar_input_shapes[1].get_shape()); diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp index 35aa9391bd33e9..15b327288d0e6e 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp @@ -27,7 +27,7 @@ using namespace ngraph::snippets::lowered; namespace { inline std::vector make_subtensor(const ov::Shape& tensor) { - return std::vector(std::min(tensor.size(), 2lu), PortDescriptor::Scheduling::FULL_DIM); + return std::vector(std::min(tensor.size(), 2lu), PortDescriptor::ServiceDimensions::FULL_DIM); } template void set_full_port_desc(const T& port) {