diff --git a/src/common/snippets/include/snippets/emitter.hpp b/src/common/snippets/include/snippets/emitter.hpp index 037a70ab19d759..f992bb9ef33b84 100644 --- a/src/common/snippets/include/snippets/emitter.hpp +++ b/src/common/snippets/include/snippets/emitter.hpp @@ -21,11 +21,9 @@ class Emitter { /** * @brief Default constructor */ - Emitter(const std::shared_ptr& n) { - } + Emitter(const std::shared_ptr& n) {} - Emitter(std::vector, RegInfo>>& region) { - } + Emitter(std::vector, RegInfo>>& region) {} /** * @brief called by generator to generate code to produce target code for a specific operation @@ -44,8 +42,8 @@ class Emitter { * @brief called by generator to generate data section, if needed for a specific operation * @return void */ - virtual void emit_data() const { - } + virtual void emit_data() const {} + virtual ~Emitter() = default; }; diff --git a/src/common/snippets/include/snippets/generator.hpp b/src/common/snippets/include/snippets/generator.hpp index 706826c5546e7b..8ac9444e331e2c 100644 --- a/src/common/snippets/include/snippets/generator.hpp +++ b/src/common/snippets/include/snippets/generator.hpp @@ -9,10 +9,9 @@ #pragma once #include "snippets_isa.hpp" -#include "emitter.hpp" -#include "target_machine.hpp" -#include "lowered_expr.hpp" -#include "pass/lowered/linear_IR_transformation.hpp" + +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/pass/transformation.hpp" namespace ngraph { namespace snippets { @@ -46,7 +45,7 @@ class Schedule { bool is_flat {false}; code ptr {nullptr}; }; -class LoweredExprIR; + /** * @interface Generator * @brief Target independent code generator interface @@ -78,7 +77,7 @@ class Generator { code binary_code = nullptr; size_t buffer_scratchpad_size = 0; }; - LoweringResult generate(std::shared_ptr& m, const LoweringConfig& config, const void* compile_params = nullptr); + LoweringResult generate(std::shared_ptr& m, const lowered::Config& config, const void* compile_params = nullptr); /** * @brief gets target machine @@ -111,12 +110,12 @@ class Generator { /** * @brief gets target specific transformations for code generation */ - virtual pass::lowered::LinearIRTransformationPipeline target_specific_transformations() const; + virtual lowered::pass::TransformationPipeline target_specific_transformations() const; std::shared_ptr target; // todo: we need to save lowered code to access compiled brgemm kernels on execution time (normally lowered is destructed by then). // This is temporary solution, remove this when kernel caching is implemented. Don't forget to make generate const method. - LoweredExprIR lowered_saved; + lowered::LinearIR lowered_saved; }; } // namespace snippets diff --git a/src/common/snippets/include/snippets/lowered/expression.hpp b/src/common/snippets/include/snippets/lowered/expression.hpp new file mode 100644 index 00000000000000..d3367c2abc6475 --- /dev/null +++ b/src/common/snippets/include/snippets/lowered/expression.hpp @@ -0,0 +1,116 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include + +#include "snippets/tensor_descriptor.hpp" +#include "snippets/emitter.hpp" +#include "snippets/target_machine.hpp" + + +namespace ngraph { +namespace snippets { +namespace lowered { + +class LinearIR; +class Expression; +using ExpressionPtr = std::shared_ptr; + +class ExpressionPort { + friend class Expression; + +public: + enum Type { + Input, + Output + }; + + ExpressionPort() = default; + + Type get_type() const { return m_type; } + + ExpressionPtr expr = nullptr; + size_t port = 0; + +private: + ExpressionPort(const ExpressionPtr& expr, size_t port, Type type); + + Type m_type = Type::Input; +}; + +class Expression : public std::enable_shared_from_this { + friend class LinearIR; + +public: + static size_t LOOP_NULL_ID; + + Expression() = default; + explicit Expression(const std::shared_ptr& n); + // The ctor fills outputs automatically from rt_info and/or tensor shapes + explicit Expression(const std::shared_ptr& n, std::vector inputs); + explicit Expression(const std::shared_ptr& n, std::vector inputs, std::vector outputs); + + virtual ~Expression() = default; + + std::shared_ptr get_node() const; + std::shared_ptr get_emitter() const; + + RegInfo get_reg_info() const { return m_reg_info; } + void set_reg_info(RegInfo rinfo) { m_reg_info = std::move(rinfo); } + + const std::vector& get_inputs() { return m_inputs; } + const std::vector& get_outputs() { return m_outputs; } + + std::vector get_loop_ids() const { return m_loop_ids; } + void set_loop_ids(const std::vector& loops) { m_loop_ids = loops; } + void set_loop_id(size_t id, size_t idx); + void remove_loop_id(size_t id); + bool is_outside_loop() const { return m_is_outside_loop; } + + void init_emitter(const std::shared_ptr& target); + + ExpressionPort input_port(size_t i); + ExpressionPort output_port(size_t i); + +protected: + void replace_input(size_t port, TensorDescriptorPtr to); + void replace_output(size_t port, TensorDescriptorPtr to); + + std::shared_ptr m_source_node{nullptr}; + std::shared_ptr m_emitter{nullptr}; + std::vector m_inputs; + std::vector m_outputs; + RegInfo m_reg_info{{}, {}}; + // The order Loops identifies: Outer ---> Inner + std::vector m_loop_ids; + bool m_is_outside_loop = false; +}; + +class IOExpression : public Expression { +public: + enum class io_type {INPUT, OUTPUT, UNDEFINED}; + + IOExpression(const std::shared_ptr& n, int64_t index); + IOExpression(const std::shared_ptr& n, int64_t index, std::vector inputs); + + int64_t get_index() const { return m_index; } + io_type get_type() const { return m_type; } + +private: + int64_t m_index = -1; + io_type m_type = io_type::UNDEFINED; +}; + +bool operator==(const ExpressionPort& lhs, const ExpressionPort& rhs); +bool operator!=(const ExpressionPort& lhs, const ExpressionPort& rhs); +bool operator<(const ExpressionPort& lhs, const ExpressionPort& rhs); + +} // namespace lowered +} // namespace snippets +} // namespace ngraph diff --git a/src/common/snippets/include/snippets/lowered/linear_ir.hpp b/src/common/snippets/include/snippets/lowered/linear_ir.hpp new file mode 100644 index 00000000000000..3b789e40b1ca79 --- /dev/null +++ b/src/common/snippets/include/snippets/lowered/linear_ir.hpp @@ -0,0 +1,118 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "expression.hpp" + +namespace ngraph { +namespace snippets { +namespace lowered { + +class Config { +public: + // True if the lowered Emitters need to be accessed during runtime. Normally they're destroyed after code emission. + bool m_save_lowered_code = false; + // True if we should check runtime info for nodes to call specific needed transformations + bool m_need_fill_tail_register = false; + bool m_explicit_loop_insertion = false; + ov::PartialShape m_master_shape{}; + size_t m_loop_depth = 1; +}; + +class LinearIR { +public: + using container = std::list; + using io_container = std::list>; + using exprIt = container::iterator; + using constExprIt = container::const_iterator; + + LinearIR() = default; + explicit LinearIR(const std::shared_ptr& m, Config config = {}); + + LinearIR deep_copy() const; + static LinearIR::container deep_copy_range(LinearIR::container::const_iterator begin, LinearIR::container::const_iterator end); + + const container& get_ops() const {return m_lowered_ops; } + const io_container& get_IO_ops() const {return m_io_lowered_ops; } + Config get_config() {return m_config; } + + ExpressionPtr get_expr_by_node(const std::shared_ptr& n) const; + ExpressionPort get_expr_by_output(const TensorDescriptorPtr& n) const; + const std::set& get_exprs_by_input(const TensorDescriptorPtr& n) const; + + void replace_input(const ExpressionPort& expr_port, const TensorDescriptorPtr& to); + void replace_input(const ExpressionPtr& expr, size_t port, const TensorDescriptorPtr& to); + void replace_output(const ExpressionPort& expr_port, const TensorDescriptorPtr& to); + void replace_output(const ExpressionPtr& expr, size_t port, const TensorDescriptorPtr& to); + + /** + * @brief Move an expression from the position "from" to the position immediately before "to". + * Note: this method does NOT take care about data dependencies and no relevant checks are performed. + * and doesn't touch internal maps. + */ + void move(constExprIt from, constExprIt to); + + bool empty() const noexcept {return m_lowered_ops.empty(); } + void debug_print(bool tds_as_pointers = false) const; + + container::reference back() noexcept {return m_lowered_ops.back();} + container::const_reference back() const noexcept {return m_lowered_ops.back();} + container::reference front() noexcept {return m_lowered_ops.front();} + container::const_reference front() const noexcept {return m_lowered_ops.front();} + + exprIt begin() noexcept {return m_lowered_ops.begin();} + exprIt end() noexcept {return m_lowered_ops.end();} + constExprIt begin() const noexcept {return cbegin();} + constExprIt end() const noexcept {return cend();} + constExprIt cbegin() const noexcept {return m_lowered_ops.cbegin();} + constExprIt cend() const noexcept {return m_lowered_ops.cend();} + container::reverse_iterator rbegin() noexcept {return m_lowered_ops.rbegin();} + container::reverse_iterator rend() noexcept {return m_lowered_ops.rend();} + container::const_reverse_iterator crbegin() const noexcept {return m_lowered_ops.crbegin();} + container::const_reverse_iterator crend() const noexcept {return m_lowered_ops.crend();} + + exprIt insert(constExprIt pos, const ov::NodeVector& nodes); + exprIt insert(constExprIt pos, const std::shared_ptr& n); + exprIt insert(constExprIt pos, container::value_type&& value); + exprIt insert(constExprIt pos, const container::value_type& value); + exprIt insert(constExprIt pos, exprIt begin, exprIt end); + exprIt insert(constExprIt pos, constExprIt begin, constExprIt end); + + exprIt erase(exprIt pos); + exprIt erase(constExprIt pos); + + void init_emitters(const std::shared_ptr& target); + void serialize(const std::string& xml, const std::string& bin); + + static ov::NodeVector get_ordered_ops(const std::shared_ptr& model); + + class LoopManager; + using LoopManagerPtr = std::shared_ptr; + + const LoopManagerPtr& get_loop_manager() const { return m_loop_manager; } + +private: + void register_expression(const ExpressionPtr& expr); + // Like register_expression, but doesn't allow Parameter or Result registration. You can do it only through ctor + void register_regular_expression(const ExpressionPtr& expr); + void unregister_expression(const ExpressionPtr& expr); + + container m_lowered_ops{}; + std::unordered_map, std::shared_ptr> m_node2expression_map; + // Expression must be uniquely identified by an output, so there can't be expressions that have the same output + std::unordered_map m_output2expression_map; + // At the same time, several expressions can have the same input if they are connected to the same parent + // E.g. LoopEnd will always have the same input as a Load inside the loop (since it has to increment the same reg) + std::unordered_map> m_input2expression_map; + io_container m_io_lowered_ops; + Config m_config{}; + LoopManagerPtr m_loop_manager = nullptr; +}; + +} // namespace lowered +} // namespace snippets +} // namespace ngraph diff --git a/src/common/snippets/include/snippets/lowered/loop_manager.hpp b/src/common/snippets/include/snippets/lowered/loop_manager.hpp new file mode 100644 index 00000000000000..4c3f171995a200 --- /dev/null +++ b/src/common/snippets/include/snippets/lowered/loop_manager.hpp @@ -0,0 +1,89 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "linear_ir.hpp" + +#include +#include + +#include "snippets/tensor_descriptor.hpp" + +namespace ngraph { +namespace snippets { +namespace lowered { + +class LinearIR::LoopManager { +public: + LoopManager() = default; + + class LoopInfo { + public: + LoopInfo() = default; + LoopInfo(size_t work_amount, size_t increment, + const std::vector& entries, + const std::vector& exits) + : work_amount(work_amount), increment(increment), entry_exprs(entries), exit_exprs(exits) {} + size_t work_amount = 0; + size_t increment = 0; + // The order of entry and exit expressions is important: + // - The position before first entry expr is Loop Begin position + // - The position after last exit expr is Loop End position + // Note: Scalars aren't entry expressions but can be before first entry expr in Linear IR + std::vector entry_exprs = {}; + std::vector exit_exprs = {}; + }; + using LoopInfoPtr = std::shared_ptr; + + size_t add_loop_info(const LoopInfoPtr& loop); + void remove_loop_info(size_t index); + LoopInfoPtr get_loop_info(size_t index) const; + size_t get_loop_count() const { return m_map.size(); } + const std::map& get_map() const; + + static void skipped_mark(LinearIR::constExprIt loop_begin_pos, + LinearIR::constExprIt loop_end_pos, + size_t loop_depth); + void mark_loop(LinearIR& linear_ir, + LinearIR::constExprIt loop_begin_pos, + LinearIR::constExprIt loop_end_pos, + size_t loop_depth, size_t vector_size); + void mark_loop(LinearIR& linear_ir, + LinearIR::constExprIt loop_begin_pos, + LinearIR::constExprIt loop_end_pos, + size_t idx, + size_t work_amount, + size_t work_amount_increment, + const std::vector& entries, + const std::vector& exits); + + void get_loop_bounds(const LinearIR& linear_ir, + size_t loop_id, + LinearIR::constExprIt& loop_begin_pos, + LinearIR::constExprIt& loop_end_pos) const; + static void get_loop_bounds(const LinearIR& linear_ir, + const std::vector& entries, + const std::vector& exits, + LinearIR::constExprIt& loop_begin_pos, + LinearIR::constExprIt& loop_end_pos, + size_t loop_id = Expression::LOOP_NULL_ID); + +private: + static void exprs_marking(LinearIR::constExprIt loop_begin_pos, + LinearIR::constExprIt loop_end_pos, + size_t loop_id, size_t idx); + static void get_io_loop_ports(LinearIR& linear_ir, + LinearIR::constExprIt loop_begin_pos, + LinearIR::constExprIt loop_end_pos, + std::vector& entries, + std::vector& exits); + + std::map m_map = {}; + size_t next_id = 0; +}; + +} // namespace lowered +} // namespace snippets +} // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/lowered/assign_registers.hpp b/src/common/snippets/include/snippets/lowered/pass/assign_registers.hpp similarity index 79% rename from src/common/snippets/include/snippets/pass/lowered/assign_registers.hpp rename to src/common/snippets/include/snippets/lowered/pass/assign_registers.hpp index 461e688f40df02..29b889dba27684 100644 --- a/src/common/snippets/include/snippets/pass/lowered/assign_registers.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/assign_registers.hpp @@ -4,13 +4,13 @@ #pragma once -#include "linear_IR_transformation.hpp" +#include "transformation.hpp" #include "snippets/generator.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { /** * @interface AssignRegisters @@ -18,18 +18,18 @@ namespace lowered { * Note that changing of the IR is likely to invalidate register assignment. * @ingroup snippets */ -class AssignRegisters : public LinearIRTransformation { +class AssignRegisters : public Transformation { public: - OPENVINO_RTTI("AssignRegisters", "LinearIRTransformation") + OPENVINO_RTTI("AssignRegisters", "Transformation") explicit AssignRegisters(const std::function& op)>& mapper) : m_reg_type_mapper(mapper) {} - bool run(LoweredExprIR& linear_ir) override; + bool run(LinearIR& linear_ir) override; private: std::function& op)> m_reg_type_mapper; static constexpr size_t reg_count = 16lu; }; -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/lowered/buffer_allocation.hpp b/src/common/snippets/include/snippets/lowered/pass/buffer_allocation.hpp similarity index 57% rename from src/common/snippets/include/snippets/pass/lowered/buffer_allocation.hpp rename to src/common/snippets/include/snippets/lowered/pass/buffer_allocation.hpp index ff698a435723f3..cf944745d5a63d 100644 --- a/src/common/snippets/include/snippets/pass/lowered/buffer_allocation.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/buffer_allocation.hpp @@ -4,13 +4,13 @@ #pragma once -#include "linear_IR_transformation.hpp" +#include "transformation.hpp" #include "snippets/snippets_isa.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { /** * @interface BufferAllocation @@ -18,17 +18,20 @@ namespace lowered { * @ingroup snippets */ -class BufferAllocation : public LinearIRTransformation { - static void propagate_offset(const LoweredExprIR& linear_ir, const LoweredExprPtr& buffer_expr, size_t offset); - size_t m_buffer_scratchpad_size = 0; - +class BufferAllocation : public Transformation { public: - OPENVINO_RTTI("BufferAllocation", "LinearIRTransformation") - bool run(LoweredExprIR& linear_ir) override; - size_t get_scratchpad_size() const {return m_buffer_scratchpad_size;} + OPENVINO_RTTI("BufferAllocation", "Transformation") + bool run(lowered::LinearIR& linear_ir) override; + + size_t get_scratchpad_size() const { return m_buffer_scratchpad_size; } + +private: + static void propagate_offset(const LinearIR& linear_ir, const ExpressionPtr& buffer_expr, size_t offset); + + size_t m_buffer_scratchpad_size = 0; }; -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/lowered/buffer_identification.hpp b/src/common/snippets/include/snippets/lowered/pass/buffer_identification.hpp similarity index 78% rename from src/common/snippets/include/snippets/pass/lowered/buffer_identification.hpp rename to src/common/snippets/include/snippets/lowered/pass/buffer_identification.hpp index 1e609af81efef4..d108e75d869760 100644 --- a/src/common/snippets/include/snippets/pass/lowered/buffer_identification.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/buffer_identification.hpp @@ -4,12 +4,12 @@ #pragma once -#include "linear_IR_transformation.hpp" +#include "transformation.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { /** * @interface BufferIdentification @@ -26,21 +26,21 @@ namespace lowered { * Note: should be called before ResetBuffer() pass to have correct offsets * @ingroup snippets */ -class BufferIdentification: public LinearIRTransformation { +class BufferIdentification: public Transformation { public: - OPENVINO_RTTI("BufferIdentification", "LinearIRTransformation") + OPENVINO_RTTI("BufferIdentification", "Transformation") BufferIdentification() = default; - bool run(LoweredExprIR& linear_ir) override; + bool run(LinearIR& linear_ir) override; private: - using BufferSet = std::vector; + using BufferSet = std::vector; - std::vector create_adjacency_matrix(const LoweredExprIR& linear_ir, const BufferSet& buffers) const; + std::vector create_adjacency_matrix(const LinearIR& linear_ir, const BufferSet& buffers) const; std::map coloring(BufferSet& buffers, std::vector& adj); }; -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/lowered/buffer_insertion.hpp b/src/common/snippets/include/snippets/lowered/pass/buffer_insertion.hpp similarity index 52% rename from src/common/snippets/include/snippets/pass/lowered/buffer_insertion.hpp rename to src/common/snippets/include/snippets/lowered/pass/buffer_insertion.hpp index 2ae5d0cff69ed0..3835502a70c155 100644 --- a/src/common/snippets/include/snippets/pass/lowered/buffer_insertion.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/buffer_insertion.hpp @@ -4,13 +4,13 @@ #pragma once -#include "linear_IR_transformation.hpp" +#include "transformation.hpp" #include "snippets/tensor_descriptor.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { /** * @interface BufferInsertion @@ -20,24 +20,25 @@ namespace lowered { * @param m_buffer_allocation_rank - rank of shape for memory allocation: shape[shape_rank - normalize(m_allocation_rank) : shape_rank] * @ingroup snippets */ -class BufferInsertion : public LinearIRTransformation { +class BufferInsertion : public Transformation { public: - OPENVINO_RTTI("BufferInsertion", "LinearIRTransformation") + OPENVINO_RTTI("BufferInsertion", "Transformation") BufferInsertion(int32_t buffer_allocation_rank); - bool run(LoweredExprIR& linear_ir) override; + bool run(LinearIR& linear_ir) override; private: - void insertion(LoweredExprIR& linear_ir, const LoweredExprIR::LoweredLoopManagerPtr& loop_manager, size_t loop_id, - const std::vector& loop_entries, const std::vector& loop_exits); + void insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, size_t loop_id, + const std::vector& loop_entries, const std::vector& loop_exits); - LoweredExprIR::constExprIt insertion_position(const LoweredExprIR& linear_ir, - const LoweredExprIR::LoweredLoopManagerPtr& loop_manager, - const LoweredExprPtr& up_expr, const LoweredExprPtr& down_expr); + LinearIR::constExprIt insertion_position(const LinearIR& linear_ir, + const LinearIR::LoopManagerPtr& loop_manager, + const ExpressionPtr& up_expr, + const ExpressionPtr& down_expr); int32_t m_buffer_allocation_rank; }; -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph \ No newline at end of file diff --git a/src/common/snippets/include/snippets/pass/lowered/buffer_reset.hpp b/src/common/snippets/include/snippets/lowered/pass/buffer_reset.hpp similarity index 76% rename from src/common/snippets/include/snippets/pass/lowered/buffer_reset.hpp rename to src/common/snippets/include/snippets/lowered/pass/buffer_reset.hpp index 23ed0a0859169c..0cfcb78bf9dad9 100644 --- a/src/common/snippets/include/snippets/pass/lowered/buffer_reset.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/buffer_reset.hpp @@ -4,12 +4,12 @@ #pragma once -#include "linear_IR_transformation.hpp" +#include "transformation.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { /** * @interface BufferReset @@ -21,18 +21,18 @@ namespace lowered { * This condition should be removed when Buffers stop being inplace by default. * @ingroup snippets */ -class BufferReset: public LinearIRTransformation { +class BufferReset: public Transformation { public: - OPENVINO_RTTI("BufferReset", "LinearIRTransformation") + OPENVINO_RTTI("BufferReset", "Transformation") BufferReset() = default; - bool run(LoweredExprIR& linear_ir) override; + bool run(LinearIR& linear_ir) override; private: - bool reuse_buffer_increments(const LoweredExprIR& linear_ir, const LoweredExprPtr& loop_end_expr); + bool reuse_buffer_increments(const LinearIR& linear_ir, const ExpressionPtr& loop_end_expr); }; -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/lowered/cleanup_loop_offsets.hpp b/src/common/snippets/include/snippets/lowered/pass/cleanup_loop_offsets.hpp similarity index 72% rename from src/common/snippets/include/snippets/pass/lowered/cleanup_loop_offsets.hpp rename to src/common/snippets/include/snippets/lowered/pass/cleanup_loop_offsets.hpp index 5cc3449c29a950..4cd7f9f1aefb43 100644 --- a/src/common/snippets/include/snippets/pass/lowered/cleanup_loop_offsets.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/cleanup_loop_offsets.hpp @@ -4,12 +4,12 @@ #pragma once -#include "linear_IR_transformation.hpp" +#include "transformation.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { /** * @interface CleanupLoopOffsets @@ -17,13 +17,13 @@ namespace lowered { * This transformation "fuses" the offsets with an outer loop's ptr_increments, and zeroes the offsets before Results. * @ingroup snippets */ -class CleanupLoopOffsets : public LinearIRTransformation { +class CleanupLoopOffsets : public Transformation { public: - OPENVINO_RTTI("CleanupLoopOffsets", "LinearIRTransformation") - bool run(LoweredExprIR& linear_ir) override; + OPENVINO_RTTI("CleanupLoopOffsets", "Transformation") + bool run(LinearIR& linear_ir) override; }; -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/lowered/insert_tail_loop.hpp b/src/common/snippets/include/snippets/lowered/pass/insert_tail_loop.hpp similarity index 50% rename from src/common/snippets/include/snippets/pass/lowered/insert_tail_loop.hpp rename to src/common/snippets/include/snippets/lowered/pass/insert_tail_loop.hpp index e9b1543c13d504..d946933a0bfc61 100644 --- a/src/common/snippets/include/snippets/pass/lowered/insert_tail_loop.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/insert_tail_loop.hpp @@ -4,12 +4,12 @@ #pragma once -#include "linear_IR_transformation.hpp" +#include "transformation.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { /** * @interface InsertTailLoop @@ -17,17 +17,17 @@ namespace lowered { * Additional optimizations are performed if a loop body is executed only once. * @ingroup snippets */ -class InsertTailLoop : public LinearIRTransformation { - static void tail_transformations(LoweredExprIR& linear_ir, - LoweredExprIR::container::const_iterator tail_begin, - LoweredExprIR::container::const_iterator tail_end, - size_t tail_size); +class InsertTailLoop : public Transformation { + static void tail_transformations(LinearIR& linear_ir, + LinearIR::container::const_iterator tail_begin, + LinearIR::container::const_iterator tail_end, + size_t tail_size); public: - OPENVINO_RTTI("InsertTailLoop", "LinearIRTransformation") - bool run(LoweredExprIR& linear_ir) override; + OPENVINO_RTTI("InsertTailLoop", "Transformation") + bool run(LinearIR& linear_ir) override; }; -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/lowered/load_movebroadcast_to_broadcastload.hpp b/src/common/snippets/include/snippets/lowered/pass/load_movebroadcast_to_broadcastload.hpp similarity index 67% rename from src/common/snippets/include/snippets/pass/lowered/load_movebroadcast_to_broadcastload.hpp rename to src/common/snippets/include/snippets/lowered/pass/load_movebroadcast_to_broadcastload.hpp index f11d8c215ff261..589e237bc7957d 100644 --- a/src/common/snippets/include/snippets/pass/lowered/load_movebroadcast_to_broadcastload.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/load_movebroadcast_to_broadcastload.hpp @@ -4,26 +4,26 @@ #pragma once -#include "linear_IR_transformation.hpp" +#include "transformation.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { /** * @interface LoadMoveBroadcastToBroadcastLoad * @brief Fuses consecutive Load and MoveBroadcast into a single load insctruction. * @ingroup snippets */ -class LoadMoveBroadcastToBroadcastLoad: public LinearIRTransformation { +class LoadMoveBroadcastToBroadcastLoad: public Transformation { public: LoadMoveBroadcastToBroadcastLoad() = default; - OPENVINO_RTTI("LoadMoveBroadcastToBroadcastLoad", "LinearIRTransformation") - bool run(LoweredExprIR& linear_ir) override; + OPENVINO_RTTI("LoadMoveBroadcastToBroadcastLoad", "Transformation") + bool run(LinearIR& linear_ir) override; }; -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/lowered/pass/load_store_insertion.hpp b/src/common/snippets/include/snippets/lowered/pass/load_store_insertion.hpp new file mode 100644 index 00000000000000..c4fdcfc55ae412 --- /dev/null +++ b/src/common/snippets/include/snippets/lowered/pass/load_store_insertion.hpp @@ -0,0 +1,44 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "transformation.hpp" + +#include "snippets/lowered/loop_manager.hpp" + +namespace ngraph { +namespace snippets { +namespace lowered { +namespace pass { + +/** + * @interface LoadStoreInsertion + * @brief The pass inserts Load and Store expressions in Linear IR after Parameters, Buffers and before Results, Buffers accordingly. + * Note: The pass should be called after LoopFusion and BufferInsertion passes to have all possible data expressions. + * @param m_vector_size - the count of elements for loading/storing + * @ingroup snippets + */ +class LoadStoreInsertion : public Transformation { +public: + explicit LoadStoreInsertion(size_t vector_size); + OPENVINO_RTTI("LoadStoreInsertion", "Transformation") + bool run(LinearIR& linear_ir) override; + +private: + bool insert_load(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it); + bool insert_store(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it); + void update_loops(const LinearIR::LoopManagerPtr& loop_manager, const std::vector& loop_ids, + const ExpressionPort& actual_port, const std::vector& target_ports, bool is_entry = true); + void update_loop(const LinearIR::LoopManager::LoopInfoPtr& loop_info, + const ExpressionPort& actual_port, const std::vector& target_ports, bool is_entry = true); + std::vector get_loops_for_update(const std::vector& loop_ids, size_t loop_id); + + size_t m_vector_size; +}; + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ngraph diff --git a/src/common/snippets/include/snippets/lowered/pass/loop_fusion.hpp b/src/common/snippets/include/snippets/lowered/pass/loop_fusion.hpp new file mode 100644 index 00000000000000..aab90e3232d563 --- /dev/null +++ b/src/common/snippets/include/snippets/lowered/pass/loop_fusion.hpp @@ -0,0 +1,45 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "transformation.hpp" + +#include "snippets/lowered/loop_manager.hpp" + +namespace ngraph { +namespace snippets { +namespace lowered { +namespace pass { + +/** + * @interface LoopFusion + * @brief The pass fuses marking Loops. + * @ingroup snippets + */ +class LoopFusion : public Transformation { +public: + OPENVINO_RTTI("LoopFusion", "Transformation") + LoopFusion(); + bool run(LinearIR& linear_ir) override; + +private: + static bool can_be_fused(const LinearIR::LoopManager::LoopInfoPtr& loop_current, + const LinearIR::LoopManager::LoopInfoPtr& loop_target); + static bool fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, + const ExpressionPort& current_entry_point, const ExpressionPort& target_exit_point, + size_t current_loop_id, size_t target_loop_id, size_t dim_idx, + LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos); + static bool fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, + const ExpressionPort& current_entry_point, const ExpressionPort& target_exit_point, + size_t current_loop_id, size_t target_loop_id, size_t dim_idx, + LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos); + static void fuse_points(LinearIR& linear_ir, std::vector& exit_points, std::vector& entry_points, + LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos); +}; + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/lowered/loop_init.hpp b/src/common/snippets/include/snippets/lowered/pass/loop_init.hpp similarity index 59% rename from src/common/snippets/include/snippets/pass/lowered/loop_init.hpp rename to src/common/snippets/include/snippets/lowered/pass/loop_init.hpp index b13c5e8aaab328..cb769196e65b73 100644 --- a/src/common/snippets/include/snippets/pass/lowered/loop_init.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/loop_init.hpp @@ -4,36 +4,38 @@ #pragma once -#include "linear_IR_transformation.hpp" +#include "transformation.hpp" + +#include "snippets/lowered/loop_manager.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { /** * @interface LoopInit * @brief The pass explicitly insert LoadBegin and LoadEnd in Linear IR using Loop markup * @ingroup snippets */ -class LoopInit : public LinearIRTransformation { +class LoopInit : public Transformation { public: - OPENVINO_RTTI("InsertLoops", "LinearIRTransformation") + OPENVINO_RTTI("InsertLoops", "Transformation") LoopInit(); - bool run(LoweredExprIR& linear_ir) override; + bool run(LinearIR& linear_ir) override; private: - bool insertion(LoweredExprIR& linear_ir, const LoweredExprIR::LoweredLoopManager::LoweredLoopInfoPtr& loop_info, + bool insertion(LinearIR& linear_ir, const LinearIR::LoopManager::LoopInfoPtr& loop_info, size_t loop_id, size_t dim_idx, bool has_outer_loop); - std::vector init_ptr_increments(const std::vector& loop_inputs, - const std::vector& loop_outputs, + std::vector init_ptr_increments(const std::vector& loop_inputs, + const std::vector& loop_outputs, size_t dim_idx) const; std::vector init_finalization_offsets(const std::vector& finalization_offsets, size_t work_amount) const; - std::vector init_element_type_sizes(const std::vector& loop_inputs, - const std::vector& loop_outputs); + std::vector init_element_type_sizes(const std::vector& loop_inputs, + const std::vector& loop_outputs); }; -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/lowered/loop_markup.hpp b/src/common/snippets/include/snippets/lowered/pass/loop_markup.hpp similarity index 74% rename from src/common/snippets/include/snippets/pass/lowered/loop_markup.hpp rename to src/common/snippets/include/snippets/lowered/pass/loop_markup.hpp index 10a716ed15b325..a81bb6c1194e94 100644 --- a/src/common/snippets/include/snippets/pass/lowered/loop_markup.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/loop_markup.hpp @@ -4,13 +4,13 @@ #pragma once -#include "linear_IR_transformation.hpp" -#include "snippets/tensor_descriptor.hpp" +#include "transformation.hpp" + namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { /** * @interface LoopMarkup @@ -20,17 +20,17 @@ namespace lowered { * - the consumer of the expression is explicitly after this expression - the pass marks the branches * @ingroup snippets */ -class LoopMarkup : public LinearIRTransformation { +class LoopMarkup : public Transformation { public: - OPENVINO_RTTI("LoopMarkup", "LinearIRTransformation") + OPENVINO_RTTI("LoopMarkup", "Transformation") LoopMarkup(size_t vector_size); - bool run(LoweredExprIR& linear_ir) override; + bool run(LinearIR& linear_ir) override; private: size_t m_vector_size; }; -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/lowered/move_result_out_of_loop.hpp b/src/common/snippets/include/snippets/lowered/pass/move_result_out_of_loop.hpp similarity index 69% rename from src/common/snippets/include/snippets/pass/lowered/move_result_out_of_loop.hpp rename to src/common/snippets/include/snippets/lowered/pass/move_result_out_of_loop.hpp index 9c6afa01501c22..7dc0af34563db6 100644 --- a/src/common/snippets/include/snippets/pass/lowered/move_result_out_of_loop.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/move_result_out_of_loop.hpp @@ -4,26 +4,26 @@ #pragma once -#include "linear_IR_transformation.hpp" +#include "transformation.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { /** * @interface MoveResultOutOfLoop * @brief After passes with Loop work results would be inside Loop. The pass extract them from Loop and insert after. * @ingroup snippets */ -class MoveResultOutOfLoop : public LinearIRTransformation { +class MoveResultOutOfLoop : public Transformation { public: - OPENVINO_RTTI("MoveResultOutOfLoop", "LinearIRTransformation") + OPENVINO_RTTI("MoveResultOutOfLoop", "Transformation") MoveResultOutOfLoop() = default; - bool run(LoweredExprIR& linear_ir) override; + bool run(LinearIR& linear_ir) override; }; -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/lowered/move_scalar_to_consumer.hpp b/src/common/snippets/include/snippets/lowered/pass/move_scalar_to_consumer.hpp similarity index 83% rename from src/common/snippets/include/snippets/pass/lowered/move_scalar_to_consumer.hpp rename to src/common/snippets/include/snippets/lowered/pass/move_scalar_to_consumer.hpp index 82a70182421642..d5151e71540c7a 100644 --- a/src/common/snippets/include/snippets/pass/lowered/move_scalar_to_consumer.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/move_scalar_to_consumer.hpp @@ -4,12 +4,12 @@ #pragma once -#include "linear_IR_transformation.hpp" +#include "transformation.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { /** * @interface MoveScalarToConsumer @@ -22,14 +22,14 @@ namespace lowered { * To avoid such cases, we move Constants to the places in Linear IR before right Consumer to execute Scalar on each Loop iteration. * @ingroup snippets */ -class MoveScalarToConsumer : public LinearIRTransformation { +class MoveScalarToConsumer : public Transformation { public: - OPENVINO_RTTI("MoveScalarsToConsumer", "LinearIRTransformation") + OPENVINO_RTTI("MoveScalarsToConsumer", "Transformation") MoveScalarToConsumer() = default; - bool run(LoweredExprIR& linear_ir) override; + bool run(LinearIR& linear_ir) override; }; -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/lowered/propagate_layout.hpp b/src/common/snippets/include/snippets/lowered/pass/propagate_layout.hpp similarity index 70% rename from src/common/snippets/include/snippets/pass/lowered/propagate_layout.hpp rename to src/common/snippets/include/snippets/lowered/pass/propagate_layout.hpp index 1f02ba7b94ab3e..4f7731b45449a6 100644 --- a/src/common/snippets/include/snippets/pass/lowered/propagate_layout.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/propagate_layout.hpp @@ -4,12 +4,12 @@ #pragma once -#include "linear_IR_transformation.hpp" +#include "transformation.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { /** * @interface PropagateLayout @@ -17,13 +17,13 @@ namespace lowered { * proper data pointer offsets in the Kernel; * @ingroup snippets */ -class PropagateLayout : public LinearIRTransformation { +class PropagateLayout : public Transformation { public: - OPENVINO_RTTI("PropagateLayout", "LinearIRTransformation") - bool run(LoweredExprIR& linear_ir) override; + OPENVINO_RTTI("PropagateLayout", "Transformation") + bool run(LinearIR& linear_ir) override; }; -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/lowered/softmax_decomposition.hpp b/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp similarity index 66% rename from src/common/snippets/include/snippets/pass/lowered/softmax_decomposition.hpp rename to src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp index 90d9589ffb59a3..7e86f7107a7611 100644 --- a/src/common/snippets/include/snippets/pass/lowered/softmax_decomposition.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp @@ -4,29 +4,29 @@ #pragma once -#include "linear_IR_transformation.hpp" +#include "transformation.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { /** * @interface SoftmaxDecomposition * @brief Decomposes Softmax to a range of low-level operations on linear IR * @ingroup snippets */ -class SoftmaxDecomposition : public LinearIRTransformation { +class SoftmaxDecomposition : public Transformation { public: explicit SoftmaxDecomposition(size_t vector_size); - OPENVINO_RTTI("SoftmaxDecomposition", "LinearIRTransformation") - bool run(LoweredExprIR& linear_ir) override; + OPENVINO_RTTI("SoftmaxDecomposition", "Transformation") + bool run(LinearIR& linear_ir) override; private: size_t m_vector_size; }; -} //namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/lowered/linear_IR_transformation.hpp b/src/common/snippets/include/snippets/lowered/pass/transformation.hpp similarity index 60% rename from src/common/snippets/include/snippets/pass/lowered/linear_IR_transformation.hpp rename to src/common/snippets/include/snippets/lowered/pass/transformation.hpp index ff9fccba676445..ef00e881662e3b 100644 --- a/src/common/snippets/include/snippets/pass/lowered/linear_IR_transformation.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/transformation.hpp @@ -4,28 +4,29 @@ #pragma once -#include "snippets/lowered_expr.hpp" +#include "snippets/lowered/linear_ir.hpp" + #include "openvino/core/rtti.hpp" #include "openvino/core/type.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { /** - * @interface linearIRTransformation + * @interface Transformation * @brief Base class for transformations on linear IR * @ingroup snippets */ -class LinearIRTransformation { +class Transformation { public: - LinearIRTransformation() = default; - virtual ~LinearIRTransformation() = default; + Transformation() = default; + virtual ~Transformation() = default; // Note that get_type_info_static and get_type_info are needed to mimic OPENVINO_RTTI interface, // so the standard OPENVINO_RTTI(...) macros could be used in derived classes. _OPENVINO_HIDDEN_METHOD static const ::ov::DiscreteTypeInfo& get_type_info_static() { - static ::ov::DiscreteTypeInfo type_info_static {"LinearIRTransformation"}; + static ::ov::DiscreteTypeInfo type_info_static {"Transformation"}; type_info_static.hash(); return type_info_static; } @@ -38,29 +39,29 @@ class LinearIRTransformation { return get_type_info().name; } - virtual bool run(LoweredExprIR& linear_ir) = 0; + virtual bool run(lowered::LinearIR& linear_ir) = 0; }; -class LinearIRTransformationPipeline { +class TransformationPipeline { public: - LinearIRTransformationPipeline() = default; + TransformationPipeline() = default; - void register_transformation(const std::shared_ptr& transformation); + void register_transformation(const std::shared_ptr& transformation); template void register_transformation(Args&&... args) { - static_assert(std::is_base_of::value, "Transformation not derived from LinearIRTransformation"); + static_assert(std::is_base_of::value, "Transformation not derived from lowered::Transformation"); auto transformation = std::make_shared(std::forward(args)...); register_transformation(transformation); } - void run(LoweredExprIR& linear_ir); + void run(lowered::LinearIR& linear_ir); private: - std::vector> m_transformations; + std::vector> m_transformations; }; -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/lowered/vector_to_scalar.hpp b/src/common/snippets/include/snippets/lowered/pass/vector_to_scalar.hpp similarity index 87% rename from src/common/snippets/include/snippets/pass/lowered/vector_to_scalar.hpp rename to src/common/snippets/include/snippets/lowered/pass/vector_to_scalar.hpp index 69c85fa0156f27..b6cb96e9bb977d 100644 --- a/src/common/snippets/include/snippets/pass/lowered/vector_to_scalar.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/vector_to_scalar.hpp @@ -4,12 +4,12 @@ #pragma once -#include "linear_IR_transformation.hpp" +#include "transformation.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { /** * @interface SetScalarCountForLoadStore @@ -35,14 +35,14 @@ namespace lowered { // Result // Note: Load* should be replaced with ScalarLoad in this example to avoid invalid read in vector Loop. -class SetScalarCountForLoadStore : public LinearIRTransformation { +class SetScalarCountForLoadStore : public Transformation { public: explicit SetScalarCountForLoadStore(); - OPENVINO_RTTI("SetScalarCountForLoadStore", "LinearIRTransformation") - bool run(LoweredExprIR& linear_ir) override; + OPENVINO_RTTI("SetScalarCountForLoadStore", "Transformation") + bool run(lowered::LinearIR& linear_ir) override; }; -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/include/snippets/lowered_expr.hpp b/src/common/snippets/include/snippets/lowered_expr.hpp deleted file mode 100644 index 5a5b9ae3c86dde..00000000000000 --- a/src/common/snippets/include/snippets/lowered_expr.hpp +++ /dev/null @@ -1,255 +0,0 @@ -// Copyright (C) 2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include "emitter.hpp" -#include "target_machine.hpp" -#include "snippets/tensor_descriptor.hpp" - -namespace ngraph { -namespace snippets { - -using code = const uint8_t *; -using RegInfo = std::pair, std::vector>; - -class LoweringConfig { -public: - // True if the lowered Emitters need to be accessed during runtime. Normally they're destroyed after code emission. - bool m_save_lowered_code = false; - // True if we should check runtime info for nodes to call specific needed transformations - bool m_need_fill_tail_register = false; - bool m_explicit_loop_insertion = false; - ov::PartialShape m_master_shape{}; - size_t m_loop_depth = 1; -}; - -class LoweredExprIR; -class LoweredExpr { - friend LoweredExprIR; - -public: - static size_t LOOP_NULL_ID; - - explicit LoweredExpr(const std::shared_ptr& n); - explicit LoweredExpr(const std::shared_ptr& n, std::vector inputs, std::vector outputs = {}); - LoweredExpr() = default; - virtual ~LoweredExpr() = default; - std::shared_ptr get_node() const; - std::shared_ptr get_emitter() const; - void init_emitter(const std::shared_ptr& target); - RegInfo get_reg_info() const {return m_reg_info;} - void set_reg_info(RegInfo rinfo) {m_reg_info = std::move(rinfo);} - const std::vector& get_inputs() {return m_inputs; } - const std::vector& get_outputs() {return m_outputs; } - std::vector get_loop_ids() const { return m_loop_ids; } - void set_loop_ids(const std::vector& loops) { m_loop_ids = loops; } - void set_loop_id(size_t id, size_t idx); - void remove_loop_id(size_t id); - bool is_outside_loop() const { return m_is_outside_loop; } - -protected: - void replace_input(size_t port, TensorDescriptorPtr to); - void replace_output(size_t port, TensorDescriptorPtr to); - std::shared_ptr m_source_node{nullptr}; - std::shared_ptr m_emitter{nullptr}; - std::vector m_inputs; - std::vector m_outputs; - RegInfo m_reg_info{{}, {}}; - // The order Loops identifies: Outer ---> Inner - std::vector m_loop_ids; - bool m_is_outside_loop = false; -}; - -class IOLoweredExpr : public LoweredExpr { -public: - enum class io_type {INPUT, OUTPUT, UNDEFINED}; - IOLoweredExpr(const std::shared_ptr& n, int64_t index); - IOLoweredExpr(const std::shared_ptr& n, int64_t index, std::vector inputs); - int64_t get_index() const {return m_index;} - io_type get_type() const {return m_type; } -private: - int64_t m_index = -1; - io_type m_type = io_type::UNDEFINED; -}; - -using LoweredExprPtr = std::shared_ptr; - -struct LoweredExprPort { - enum Type { - Input, - Output - }; - - LoweredExprPort() = default; - - static LoweredExprPort make_input(const LoweredExprPtr& expr, size_t port); - static LoweredExprPort make_output(const LoweredExprPtr& expr, size_t port); - - LoweredExprPtr expr = nullptr; - size_t port = 0; - Type type = Type::Input; - -private: - LoweredExprPort(const LoweredExprPtr& expr, size_t port, Type type); -}; - -bool operator==(const LoweredExprPort& lhs, const LoweredExprPort& rhs); -bool operator!=(const LoweredExprPort& lhs, const LoweredExprPort& rhs); -bool operator<(const LoweredExprPort& lhs, const LoweredExprPort& rhs); - -class LoweredExprIR { -public: - using container = std::list; - using io_container = std::list>; - using exprIt = container::iterator; - using constExprIt = container::const_iterator; - - explicit LoweredExprIR(const std::shared_ptr& m, LoweringConfig config = {}); - LoweredExprIR() = default; - LoweredExprIR deep_copy() const; - static LoweredExprIR::container deep_copy_range(LoweredExprIR::container::const_iterator begin, LoweredExprIR::container::const_iterator end); - const container& get_ops() const {return m_lowered_ops; } - const io_container& get_IO_ops() const {return m_io_lowered_ops; } - void init_emitters(const std::shared_ptr& target); - LoweringConfig get_config() {return m_config; } - LoweredExprPtr get_expr_by_node(const std::shared_ptr& n) const; - LoweredExprPort get_expr_by_output(const TensorDescriptorPtr& n) const; - const std::set& get_exprs_by_input(const TensorDescriptorPtr& n) const; - void replace_input(const LoweredExprPort& expr_port, const TensorDescriptorPtr& to); - void replace_input(const LoweredExprPtr& expr, size_t port, const TensorDescriptorPtr& to); - void replace_output(const LoweredExprPort& expr_port, const TensorDescriptorPtr& to); - void replace_output(const LoweredExprPtr& expr, size_t port, const TensorDescriptorPtr& to); - exprIt insert(constExprIt pos, const ov::NodeVector& nodes); - exprIt insert(constExprIt pos, const std::shared_ptr& n); - exprIt insert(constExprIt pos, container::value_type&& value); - exprIt insert(constExprIt pos, const container::value_type& value); - exprIt insert(constExprIt pos, exprIt begin, exprIt end); - exprIt insert(constExprIt pos, constExprIt begin, constExprIt end); - - /** - * @brief Move an expression from the position "from" to the position immediately before "to". - * Note: this method does NOT take care about data dependencies and no relevant checks are performed. - * and doesn't touch internal maps. - */ - void move(constExprIt from, constExprIt to); - - bool empty() const noexcept {return m_lowered_ops.empty(); } - void debug_print(bool tds_as_pointers = false) const; - - container::reference back() noexcept {return m_lowered_ops.back();} - container::const_reference back() const noexcept {return m_lowered_ops.back();} - container::reference front() noexcept {return m_lowered_ops.front();} - container::const_reference front() const noexcept {return m_lowered_ops.front();} - exprIt erase(exprIt pos); - exprIt erase(constExprIt pos); - exprIt begin() noexcept {return m_lowered_ops.begin();} - exprIt end() noexcept {return m_lowered_ops.end();} - constExprIt begin() const noexcept {return cbegin();} - constExprIt end() const noexcept {return cend();} - constExprIt cbegin() const noexcept {return m_lowered_ops.cbegin();} - constExprIt cend() const noexcept {return m_lowered_ops.cend();} - container::reverse_iterator rbegin() noexcept {return m_lowered_ops.rbegin();} - container::reverse_iterator rend() noexcept {return m_lowered_ops.rend();} - container::const_reverse_iterator crbegin() const noexcept {return m_lowered_ops.crbegin();} - container::const_reverse_iterator crend() const noexcept {return m_lowered_ops.crend();} - static ov::NodeVector get_ordered_ops(const std::shared_ptr& model); - void serialize(const std::string& xml, const std::string& bin); - - class LoweredLoopManager { - public: - LoweredLoopManager() = default; - - class LoweredLoopInfo { - public: - LoweredLoopInfo() = default; - LoweredLoopInfo(size_t work_amount, size_t increment, - const std::vector& entries, - const std::vector& exits) - : work_amount(work_amount), increment(increment), entry_exprs(entries), exit_exprs(exits) {} - size_t work_amount = 0; - size_t increment = 0; - // The order of entry and exit expressions is important: - // - The position before first entry expr is Loop Begin position - // - The position after last exit expr is Loop End position - // Note: Scalars aren't entry expressions but can be before first entry expr in Linear IR - std::vector entry_exprs = {}; - std::vector exit_exprs = {}; - }; - using LoweredLoopInfoPtr = std::shared_ptr; - - size_t add_loop_info(const LoweredLoopInfoPtr& loop); - void remove_loop_info(size_t index); - LoweredLoopInfoPtr get_loop_info(size_t index) const; - size_t get_loop_count() const { return m_map.size(); } - const std::map& get_map() const; - - static void skipped_mark(LoweredExprIR::constExprIt loop_begin_pos, - LoweredExprIR::constExprIt loop_end_pos, - size_t loop_depth); - void mark_loop(LoweredExprIR& linear_ir, - LoweredExprIR::constExprIt loop_begin_pos, - LoweredExprIR::constExprIt loop_end_pos, - size_t loop_depth, size_t vector_size); - void mark_loop(LoweredExprIR& linear_ir, - LoweredExprIR::constExprIt loop_begin_pos, - LoweredExprIR::constExprIt loop_end_pos, - size_t idx, - size_t work_amount, - size_t work_amount_increment, - const std::vector& entries, - const std::vector& exits); - - void get_loop_bounds(const LoweredExprIR& linear_ir, - size_t loop_id, - LoweredExprIR::constExprIt& loop_begin_pos, - LoweredExprIR::constExprIt& loop_end_pos) const; - static void get_loop_bounds(const LoweredExprIR& linear_ir, - const std::vector& entries, - const std::vector& exits, - LoweredExprIR::constExprIt& loop_begin_pos, - LoweredExprIR::constExprIt& loop_end_pos, - size_t loop_id = LoweredExpr::LOOP_NULL_ID); - - private: - static void exprs_marking(LoweredExprIR::constExprIt loop_begin_pos, - LoweredExprIR::constExprIt loop_end_pos, - size_t loop_id, size_t idx); - static void get_io_loop_ports(LoweredExprIR& linear_ir, - LoweredExprIR::constExprIt loop_begin_pos, - LoweredExprIR::constExprIt loop_end_pos, - std::vector& entries, - std::vector& exits); - - std::map m_map = {}; - size_t next_id = 0; - }; - using LoweredLoopManagerPtr = std::shared_ptr; - - const LoweredLoopManagerPtr& get_loop_manager() const { return m_loop_manager; } - -private: - void register_expression(const LoweredExprPtr& expr); - // Like register_expression, but doesn't allow Parameter or Result registration. You can do it only through constructon - void register_regular_expression(const LoweredExprPtr& expr); - void unregister_expression(const LoweredExprPtr& expr); - container m_lowered_ops{}; - std::unordered_map, std::shared_ptr> m_node2expression_map; - // Expression must be uniquely identified by an output, so there can't be expressions that have the same output - std::unordered_map m_output2expression_map; - // At the same time, several expressions can have the same input if they are connected to the same parent - // E.g. LoopEnd will always have the same input as a Load inside the loop (since it has to increment the same reg) - std::unordered_map> m_input2expression_map; - io_container m_io_lowered_ops; - LoweringConfig m_config{}; - LoweredLoopManagerPtr m_loop_manager = nullptr; -}; - -using AllocatedEmitter = std::pair, RegInfo>; - -} // namespace snippets -} // namespace ngraph \ No newline at end of file diff --git a/src/common/snippets/include/snippets/op/kernel.hpp b/src/common/snippets/include/snippets/op/kernel.hpp index a44b7ace630ab8..d1389bffe18847 100644 --- a/src/common/snippets/include/snippets/op/kernel.hpp +++ b/src/common/snippets/include/snippets/op/kernel.hpp @@ -5,7 +5,7 @@ #pragma once #include "ngraph/op/op.hpp" -#include "snippets/lowered_expr.hpp" +#include "snippets/lowered/linear_ir.hpp" namespace ngraph { namespace snippets { @@ -20,10 +20,10 @@ class Kernel : public ngraph::op::Op { public: OPENVINO_OP("Kernel", "SnippetsOpset"); - Kernel(LoweredExprIR region); + Kernel(lowered::LinearIR region); Kernel() = default; - LoweredExprIR region; + lowered::LinearIR region; std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override { return std::make_shared(region); diff --git a/src/common/snippets/include/snippets/op/serialization_node.hpp b/src/common/snippets/include/snippets/op/serialization_node.hpp index 8bd2ae9ba4cde0..a3f7f7a9b3ff1a 100644 --- a/src/common/snippets/include/snippets/op/serialization_node.hpp +++ b/src/common/snippets/include/snippets/op/serialization_node.hpp @@ -6,7 +6,7 @@ #include #include -#include +#include namespace ngraph { namespace snippets { @@ -14,7 +14,7 @@ namespace op { /** * @interface SerializationNode - * @brief Fake node needed to serialize LoweredExpressionIR + * @brief Fake node needed to serialize lowered::Expression sessionIR * @ingroup snippets */ class SerializationNode : public ngraph::op::Op { @@ -22,7 +22,7 @@ class SerializationNode : public ngraph::op::Op { OPENVINO_OP("SerializationNode", "SnippetsOpset"); SerializationNode() = default; - SerializationNode(const Output &arg, const std::shared_ptr& expr) + SerializationNode(const Output &arg, const std::shared_ptr& expr) : Op({arg}), m_expr(expr) { if (!m_expr || !m_expr->get_node()) throw ngraph_error("SerializationNode requires a valid expression with non-null node pointer"); @@ -68,9 +68,9 @@ class SerializationNode : public ngraph::op::Op { } private: - std::shared_ptr m_expr; + std::shared_ptr m_expr; }; } // namespace op } // namespace snippets -} // namespace ngraph \ No newline at end of file +} // namespace ngraph diff --git a/src/common/snippets/include/snippets/op/subgraph.hpp b/src/common/snippets/include/snippets/op/subgraph.hpp index 1f99f99cb555fc..c9a3a6beb1bb26 100644 --- a/src/common/snippets/include/snippets/op/subgraph.hpp +++ b/src/common/snippets/include/snippets/op/subgraph.hpp @@ -206,7 +206,7 @@ static inline auto build_subgraph(const std::shared_ptr& node, con auto inline update_out_tensor_name(const std::shared_ptr& subgraph) -> void { bool not_set = true; for (unsigned int i = 0; i < subgraph->get_output_size() && not_set; i++) { - for (const auto &in : subgraph->get_output_target_inputs(i)) { + for (const auto& in : subgraph->get_output_target_inputs(i)) { if (ov::is_type(in.get_node())) { const auto& body_result = subgraph->body_ptr()->get_output_op(i); const auto& body_result_input = body_result->get_input_source_output(0); diff --git a/src/common/snippets/include/snippets/pass/lowered/load_store_insertion.hpp b/src/common/snippets/include/snippets/pass/lowered/load_store_insertion.hpp deleted file mode 100644 index 1d7d2f130ecb2a..00000000000000 --- a/src/common/snippets/include/snippets/pass/lowered/load_store_insertion.hpp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (C) 2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "linear_IR_transformation.hpp" - -namespace ngraph { -namespace snippets { -namespace pass { -namespace lowered { - -/** - * @interface LoadStoreInsertion - * @brief The pass inserts Load and Store expressions in Linear IR after Parameters, Buffers and before Results, Buffers accordingly. - * Note: The pass should be called after LoopFusion and BufferInsertion passes to have all possible data expressions. - * @param m_vector_size - the count of elements for loading/storing - * @ingroup snippets - */ -class LoadStoreInsertion : public LinearIRTransformation { -public: - explicit LoadStoreInsertion(size_t vector_size); - OPENVINO_RTTI("LoadStoreInsertion", "LinearIRTransformation") - bool run(LoweredExprIR& linear_ir) override; - -private: - bool insert_load(LoweredExprIR& linear_ir, const LoweredExprIR::constExprIt& data_expr_it); - bool insert_store(LoweredExprIR& linear_ir, const LoweredExprIR::constExprIt& data_expr_it); - void update_loops(const LoweredExprIR::LoweredLoopManagerPtr& loop_manager, const std::vector& loop_ids, - const LoweredExprPort& actual_port, const std::vector& target_ports, bool is_entry = true); - void update_loop(const LoweredExprIR::LoweredLoopManager::LoweredLoopInfoPtr& loop_info, - const LoweredExprPort& actual_port, const std::vector& target_ports, bool is_entry = true); - std::vector get_loops_for_update(const std::vector& loop_ids, size_t loop_id); - - size_t m_vector_size; -}; - -} //namespace lowered -} // namespace pass -} // namespace snippets -} // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/lowered/loop_fusion.hpp b/src/common/snippets/include/snippets/pass/lowered/loop_fusion.hpp deleted file mode 100644 index 8d6fdeae7f1ea7..00000000000000 --- a/src/common/snippets/include/snippets/pass/lowered/loop_fusion.hpp +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (C) 2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "linear_IR_transformation.hpp" -#include "snippets/tensor_descriptor.hpp" - -namespace ngraph { -namespace snippets { -namespace pass { -namespace lowered { - -/** - * @interface LoopFusion - * @brief The pass fuses marking Loops. - * @ingroup snippets - */ -class LoopFusion : public LinearIRTransformation { -public: - OPENVINO_RTTI("LoopFusion", "LinearIRTransformation") - LoopFusion(); - bool run(LoweredExprIR& linear_ir) override; - -private: - static bool can_be_fused(const LoweredExprIR::LoweredLoopManager::LoweredLoopInfoPtr& loop_current, - const LoweredExprIR::LoweredLoopManager::LoweredLoopInfoPtr& loop_target); - static bool fuse_upper_into_current(LoweredExprIR& linear_ir, const LoweredExprIR::LoweredLoopManagerPtr& loop_manager, - const LoweredExprPort& current_entry_point, const LoweredExprPort& target_exit_point, - size_t current_loop_id, size_t target_loop_id, size_t dim_idx, - LoweredExprIR::constExprIt& current_loop_begin_pos, LoweredExprIR::constExprIt& current_loop_end_pos); - static bool fuse_lower_into_current(LoweredExprIR& linear_ir, const LoweredExprIR::LoweredLoopManagerPtr& loop_manager, - const LoweredExprPort& current_entry_point, const LoweredExprPort& target_exit_point, - size_t current_loop_id, size_t target_loop_id, size_t dim_idx, - LoweredExprIR::constExprIt& current_loop_begin_pos, LoweredExprIR::constExprIt& current_loop_end_pos); - static void fuse_points(LoweredExprIR& linear_ir, std::vector& exit_points, std::vector& entry_points, - LoweredExprIR::constExprIt loop_begin_pos, LoweredExprIR::constExprIt loop_end_pos); -}; - -} // namespace lowered -} // namespace pass -} // namespace snippets -} // namespace ngraph diff --git a/src/common/snippets/include/snippets/target_machine.hpp b/src/common/snippets/include/snippets/target_machine.hpp index dd23a8f0c94fa3..606ba6b9d3265a 100644 --- a/src/common/snippets/include/snippets/target_machine.hpp +++ b/src/common/snippets/include/snippets/target_machine.hpp @@ -9,7 +9,6 @@ #pragma once #include "emitter.hpp" -#include "lowered_expr.hpp" namespace ngraph { namespace snippets { diff --git a/src/common/snippets/src/generator.cpp b/src/common/snippets/src/generator.cpp index 9d532e22abe04e..0e72ad86a2e31c 100644 --- a/src/common/snippets/src/generator.cpp +++ b/src/common/snippets/src/generator.cpp @@ -3,77 +3,79 @@ // #include "snippets/generator.hpp" -#include "snippets/lowered_expr.hpp" -#include "snippets/op/loop.hpp" + +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/pass/assign_registers.hpp" +#include "snippets/lowered/pass/insert_tail_loop.hpp" +#include "snippets/lowered/pass/loop_markup.hpp" +#include "snippets/lowered/pass/loop_fusion.hpp" +#include "snippets/lowered/pass/loop_init.hpp" +#include "snippets/lowered/pass/buffer_insertion.hpp" +#include "snippets/lowered/pass/load_store_insertion.hpp" +#include "snippets/lowered/pass/vector_to_scalar.hpp" +#include "snippets/lowered/pass/load_movebroadcast_to_broadcastload.hpp" +#include "snippets/lowered/pass/buffer_allocation.hpp" +#include "snippets/lowered/pass/propagate_layout.hpp" +#include "snippets/lowered/pass/cleanup_loop_offsets.hpp" +#include "snippets/lowered/pass/softmax_decomposition.hpp" +#include "snippets/lowered/pass/move_scalar_to_consumer.hpp" +#include "snippets/lowered/pass/move_result_out_of_loop.hpp" +#include "snippets/lowered/pass/buffer_reset.hpp" +#include "snippets/lowered/pass/buffer_identification.hpp" + #include "snippets/op/kernel.hpp" -#include -#include "snippets/pass/lowered/assign_registers.hpp" -#include "snippets/pass/lowered/insert_tail_loop.hpp" -#include "snippets/pass/lowered/loop_markup.hpp" -#include "snippets/pass/lowered/loop_fusion.hpp" -#include "snippets/pass/lowered/loop_init.hpp" -#include "snippets/pass/lowered/buffer_insertion.hpp" -#include "snippets/pass/lowered/load_store_insertion.hpp" -#include "snippets/pass/lowered/vector_to_scalar.hpp" -#include "snippets/pass/lowered/load_movebroadcast_to_broadcastload.hpp" -#include "snippets/pass/lowered/buffer_allocation.hpp" -#include "snippets/pass/lowered/propagate_layout.hpp" -#include "snippets/pass/lowered/cleanup_loop_offsets.hpp" -#include "snippets/pass/lowered/softmax_decomposition.hpp" -#include "snippets/pass/lowered/move_scalar_to_consumer.hpp" -#include "snippets/pass/lowered/move_result_out_of_loop.hpp" -#include "snippets/pass/lowered/buffer_reset.hpp" -#include "snippets/pass/lowered/buffer_identification.hpp" #include "snippets/tensor_descriptor.hpp" +#include + namespace ngraph { namespace snippets { -Generator::LoweringResult Generator::generate(std::shared_ptr& m, const LoweringConfig& config, const void* compile_params) { +Generator::LoweringResult Generator::generate(std::shared_ptr& m, const lowered::Config& config, const void* compile_params) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::Generator::generate") OV_ITT_TASK_CHAIN(GENERATE, ngraph::pass::itt::domains::SnippetsTransform, "Snippets::Generator", "::Transformations") if (!target->is_supported()) throw ngraph_error("unsupported architecture for code generation"); - auto linear_ir = LoweredExprIR(m, config); + auto linear_ir = lowered::LinearIR(m, config); const size_t vector_size = get_target_machine()->get_lanes(); const int32_t buffer_allocation_rank = static_cast(config.m_loop_depth); // Note: The pass LoopInit uses LoopInfo that contains entry and exit points of the corresponding Loop. // To avoid the Loop information corruption, we should call the passes with Load/Store work // (for example, LoadMoveBroadcastToBroadcastLoad()) after explicit Loop insertion (LoopInit()) - pass::lowered::LinearIRTransformationPipeline common_pipeline; - common_pipeline.register_transformation(vector_size); - common_pipeline.register_transformation(vector_size); - common_pipeline.register_transformation(); - common_pipeline.register_transformation(); - common_pipeline.register_transformation(buffer_allocation_rank); - common_pipeline.register_transformation(vector_size); - common_pipeline.register_transformation(); - common_pipeline.register_transformation(); - common_pipeline.register_transformation(); - common_pipeline.register_transformation(); - common_pipeline.register_transformation(); // or should be in final? + lowered::pass::TransformationPipeline common_pipeline; + common_pipeline.register_transformation(vector_size); + common_pipeline.register_transformation(vector_size); + common_pipeline.register_transformation(); + common_pipeline.register_transformation(); + common_pipeline.register_transformation(buffer_allocation_rank); + common_pipeline.register_transformation(vector_size); + common_pipeline.register_transformation(); + common_pipeline.register_transformation(); + common_pipeline.register_transformation(); + common_pipeline.register_transformation(); + common_pipeline.register_transformation(); // or should be in final? common_pipeline.run(linear_ir); - pass::lowered::LinearIRTransformationPipeline target_pipeline = target_specific_transformations(); + lowered::pass::TransformationPipeline target_pipeline = target_specific_transformations(); target_pipeline.run(linear_ir); std::function& op)> reg_type_mapper = [&](const std::shared_ptr& op) -> opRegType { return get_op_reg_type(op); }; - const auto buffer_allocation_pass = std::make_shared(); - pass::lowered::LinearIRTransformationPipeline buffer_pipeline; - buffer_pipeline.register_transformation(); - buffer_pipeline.register_transformation(); + const auto buffer_allocation_pass = std::make_shared(); + lowered::pass::TransformationPipeline buffer_pipeline; + buffer_pipeline.register_transformation(); + buffer_pipeline.register_transformation(); buffer_pipeline.register_transformation(buffer_allocation_pass); buffer_pipeline.run(linear_ir); - pass::lowered::LinearIRTransformationPipeline final_pipeline; - final_pipeline.register_transformation(); - final_pipeline.register_transformation(reg_type_mapper); - final_pipeline.register_transformation(); + lowered::pass::TransformationPipeline final_pipeline; + final_pipeline.register_transformation(); + final_pipeline.register_transformation(reg_type_mapper); + final_pipeline.register_transformation(); final_pipeline.run(linear_ir); linear_ir.init_emitters(target); @@ -138,8 +140,8 @@ Generator::opRegType Generator::get_specific_op_reg_type(const std::shared_ptrget_type_name()) + " isn't determined!"); } -pass::lowered::LinearIRTransformationPipeline Generator::target_specific_transformations() const { - return pass::lowered::LinearIRTransformationPipeline(); +lowered::pass::TransformationPipeline Generator::target_specific_transformations() const { + return lowered::pass::TransformationPipeline(); } }// namespace snippets diff --git a/src/common/snippets/src/lowered/expression.cpp b/src/common/snippets/src/lowered/expression.cpp new file mode 100644 index 00000000000000..bc254fcd7869fc --- /dev/null +++ b/src/common/snippets/src/lowered/expression.cpp @@ -0,0 +1,120 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/lowered/expression.hpp" + +#include +#include "snippets/utils.hpp" + +#include +#include + +namespace ngraph { +namespace snippets { +namespace lowered { + +size_t Expression::LOOP_NULL_ID = SIZE_MAX; + +ExpressionPort::ExpressionPort(const ExpressionPtr& expr, size_t port, Type type) : expr(expr), port(port), m_type(type) { + if (type == Type::Input) { + OPENVINO_ASSERT(port < expr->get_inputs().size(), "The input port must be less than input count"); + } else if (type == Type::Output) { + OPENVINO_ASSERT(port < expr->get_outputs().size(), "The output port must be less than output count"); + } +} + +Expression::Expression(const std::shared_ptr& n) + : m_source_node{n}, m_emitter{nullptr}, m_reg_info{{}, {}}, m_is_outside_loop(utils::get_outside_loop_value(n)) { + for (const auto& in : n->inputs()) + m_inputs.emplace_back(get_tensor_descriptor_ptr(in.get_source_output())); + for (const auto& out : n->outputs()) + m_outputs.emplace_back(get_tensor_descriptor_ptr(out)); +} + +Expression::Expression(const std::shared_ptr& n, std::vector inputs) + : m_source_node{n}, m_emitter{nullptr}, m_inputs(std::move(inputs)), m_reg_info{{}, {}}, m_is_outside_loop(utils::get_outside_loop_value(n)) { + for (const auto& out : n->outputs()) + m_outputs.emplace_back(get_tensor_descriptor_ptr(out)); +} + +Expression::Expression(const std::shared_ptr& n, std::vector inputs, std::vector outputs) + : m_source_node{n}, m_emitter{nullptr}, m_inputs(std::move(inputs)), m_outputs(std::move(outputs)), + m_reg_info{{}, {}}, m_is_outside_loop(utils::get_outside_loop_value(n)) {} + +std::shared_ptr Expression::get_node() const { + if (!m_source_node) + throw ngraph_error("An attempt to get uninitialized node from lowered expression"); + return m_source_node; +} + +std::shared_ptr Expression::get_emitter() const { + return m_emitter; +} + +void Expression::init_emitter(const std::shared_ptr& target) { + m_emitter = target->get(m_source_node->get_type_info())(m_source_node); +} + +void Expression::replace_input(size_t port, TensorDescriptorPtr to) { + OPENVINO_ASSERT(port < m_inputs.size(), "Failed to replace: target input port must be less than input count!"); + m_inputs[port] = std::move(to); +} + +void Expression::replace_output(size_t port, TensorDescriptorPtr to) { + OPENVINO_ASSERT(port < m_outputs.size(), "Failed to replace: target output port must be less than output count!"); + m_outputs[port] = std::move(to); +} + +void Expression::set_loop_id(size_t id, size_t idx) { + OPENVINO_ASSERT((std::find(m_loop_ids.begin(), m_loop_ids.end(), id) == m_loop_ids.end()), + "Expression cannot have several the same Loops"); + if (m_loop_ids.size() <= idx) { + m_loop_ids.resize(idx + 1, LOOP_NULL_ID); + } + m_loop_ids[idx] = id; +} + +void Expression::remove_loop_id(size_t id) { + auto it = std::find(m_loop_ids.begin(), m_loop_ids.end(), id); + OPENVINO_ASSERT(it == m_loop_ids.end(), "Expression doesn't have the Loop with ID " + std::to_string(id)); + *it = Expression::LOOP_NULL_ID; +} + +ExpressionPort Expression::input_port(size_t i) { + OPENVINO_ASSERT(i < m_inputs.size(), "Failed to get input port: target input port must be less than input count!"); + return ExpressionPort(this->shared_from_this(), i, ExpressionPort::Type::Input); +} + +ExpressionPort Expression::output_port(size_t i) { + OPENVINO_ASSERT(i < m_outputs.size(), "Failed to get output port: target output port must be less than output count!"); + return ExpressionPort(this->shared_from_this(), i, ExpressionPort::Type::Output); +} + +IOExpression::IOExpression(const std::shared_ptr& par, int64_t index) + : Expression(par), m_index(index), m_type{io_type::INPUT} { +} + +IOExpression::IOExpression(const std::shared_ptr& res, int64_t index, std::vector inputs) + : Expression(res, inputs, {}), m_index(index), m_type{io_type::OUTPUT} { +} + +bool operator==(const ExpressionPort& lhs, const ExpressionPort& rhs) { + if (&lhs == &rhs) + return true; + OPENVINO_ASSERT(lhs.get_type() == rhs.get_type(), "Incorrect comparison: Ports are from different types!"); + return lhs.expr == rhs.expr && lhs.port == rhs.port; +} + +bool operator!=(const ExpressionPort& lhs, const ExpressionPort& rhs) { + return !(lhs == rhs); +} + +bool operator<(const ExpressionPort& lhs, const ExpressionPort& rhs) { + OPENVINO_ASSERT(lhs.get_type() == rhs.get_type(), "Incorrect comparison: Ports are from different types!"); + // Firstly ports + return (lhs.port < rhs.port) || (lhs.port == rhs.port && lhs.expr < rhs.expr); +} +}// namespace lowered +}// namespace snippets +}// namespace ngraph diff --git a/src/common/snippets/src/lowered/linear_ir.cpp b/src/common/snippets/src/lowered/linear_ir.cpp new file mode 100644 index 00000000000000..d3887fda6a02fb --- /dev/null +++ b/src/common/snippets/src/lowered/linear_ir.cpp @@ -0,0 +1,351 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/lowered/linear_ir.hpp" + +#include + +#include "snippets/lowered/loop_manager.hpp" +#include +#include "snippets/tensor_descriptor.hpp" +#include "snippets/utils.hpp" + +#include +#include + +namespace ngraph { +namespace snippets { +namespace lowered { + +LinearIR::LinearIR(const std::shared_ptr& model, Config config) + : m_io_lowered_ops{}, m_config{std::move(config)}, m_loop_manager(std::make_shared()) { + constExprIt scalar_pos = m_lowered_ops.begin(); + ExpressionPtr last_param = nullptr; + for (const auto& n : get_ordered_ops(model)) { + constExprIt insertion_pos = m_lowered_ops.end(); + std::shared_ptr expr; + std::vector input_tds; + for (const auto& in : n->inputs()) { + const auto& out = in.get_source_output(); + const auto& parent_out_tds = m_node2expression_map[out.get_node_shared_ptr()]->get_outputs(); + input_tds.push_back(parent_out_tds[out.get_index()]); + } + if (const auto& par = as_type_ptr(n)) { + auto io_expr = std::make_shared(par, model->get_parameter_index(par)); + m_io_lowered_ops.push_back(io_expr); + expr = io_expr; + last_param = expr; + } else if (const auto& res = as_type_ptr(n)) { + auto io_expr = std::make_shared(res, model->get_result_index(res), input_tds); + m_io_lowered_ops.push_back(io_expr); + expr = io_expr; + } else { + if (const auto& scalar = as_type_ptr(n)) { + // Scalar should be on the Linear IR beginning after Parameters to have valid expression order after Loop passes. + // After these passes we must call pass MoveScalarToConsumer() to have a correct accuracy. + // For more details, please see the pass description + if (scalar_pos == m_lowered_ops.end()) { + OPENVINO_ASSERT(last_param, "Scalars must be executed after Parameters"); + scalar_pos = std::find(m_lowered_ops.begin(), m_lowered_ops.end(), last_param); + } + insertion_pos = std::next(scalar_pos); + } + // Note that output tds must be empty since they are filled automatically from rt_info and/or tensor shapes + expr = std::make_shared(n, input_tds); + } + register_expression(expr); + m_lowered_ops.insert(insertion_pos, expr); + } +} + +ov::NodeVector LinearIR::get_ordered_ops(const std::shared_ptr& m) { + if (!m->get_sinks().empty()) + throw ngraph_error("Linear IR is not supposed to work for model with sinks. Check your transformation pipeline."); + + // Note that an important difference between this impl and Model::get_ordered_ops is that Results and Parameters + // are added in REVERSE order, so they will be visited in DIRECT order compared to get_parameters() and get_results() + NodeVector nodes; + const auto& results = m->get_results(); + std::copy(results.rbegin(), results.rend(), std::back_inserter(nodes)); + const auto& params = m->get_parameters(); + std::copy(params.rbegin(), params.rend(), std::back_inserter(nodes)); + + return ov::topological_sort(nodes); +} + +void LinearIR::serialize(const std::string& xml, const std::string& bin) { + auto first_node = std::make_shared(element::f32, Shape{}); + first_node->set_friendly_name("Start"); + first_node->get_rt_info()["execTimeMcs"] = 0; + std::shared_ptr body_node = first_node; + for (const auto& expr : m_lowered_ops) { + body_node = std::make_shared(body_node, expr); + } + auto last_node = std::make_shared(body_node); + last_node->set_friendly_name("End"); + const auto tmp_model = std::make_shared(ResultVector {last_node}, + ParameterVector {first_node}, + "Lowered_IR_Serialization"); + ov::pass::Serialize(xml, bin).run_on_model(tmp_model); +} + +LinearIR::container LinearIR::deep_copy_range(LinearIR::container::const_iterator begin, LinearIR::container::const_iterator end) { + LinearIR::container result; + NodeVector original_nodes; + for (auto it = begin; it != end; it++) + original_nodes.push_back((*it)->get_node()); + NodeMap node_map; + ngraph::clone_nodes(original_nodes, node_map); + for (auto it = begin; it != end; it++) { + // copy by value, so result shared_pointer point to new objects + Expression new_expr = **it; + new_expr.m_source_node = node_map[(*it)->get_node().get()]; + result.emplace_back(std::make_shared(new_expr)); + } + return result; +} + +LinearIR LinearIR::deep_copy() const { + LinearIR result; + auto& result_ops = result.m_lowered_ops; + for (const auto& expr : deep_copy_range(m_lowered_ops.begin(), m_lowered_ops.end())) + result_ops.emplace_back(expr); + result.m_config = m_config; + return result; +} + +void LinearIR::debug_print(bool tds_as_pointers) const { + auto print_rinfo = [](const RegInfo& rinfo) { + std::cerr << " : {"; + for (auto i : rinfo.first) + std::cerr << i << " "; + std::cerr << " => "; + for (auto i : rinfo.second) + std::cerr << i << " "; + std::cerr << "}"; + }; + std::map td2int; + int td_counter = 0; + int counter = 0; + for (const auto& expr : m_lowered_ops) { + const auto& node = expr->get_node(); + std::cerr << counter++ << " : " << + node->get_friendly_name() << " : "; + if (tds_as_pointers) { + for (const auto& in : expr->get_inputs()) { + if (td2int.count(in) == 0) + throw ngraph_error("Undefined input descriptor for op"); + std::cerr << td2int.at(in) << ", "; + } + std::cerr << "\b\b => "; + for (const auto& out : expr->get_outputs()) { + if (td2int.count(out) == 0) + td2int.insert({out, td_counter++}); + std::cerr << td2int.at(out) << ", "; + } + } else { + for (const auto& in : expr->get_inputs()) + std::cerr << *in << ", "; + std::cerr << "\b\b => "; + for (const auto& out : expr->get_outputs()) + std::cerr << *out << ", "; + } + std::cerr << "\b\b"; + const auto& rinfo = expr->get_reg_info(); + if (!rinfo.first.empty() || !rinfo.second.empty()) + print_rinfo(expr->get_reg_info()); + std::cerr << "\n"; + } +} + +void LinearIR::init_emitters(const std::shared_ptr& target) { + for (auto& expr : m_lowered_ops) { + if (!expr->get_emitter()) + expr->init_emitter(target); + } +} + +ExpressionPtr LinearIR::get_expr_by_node(const std::shared_ptr& n) const { + auto found = m_node2expression_map.find(n); + return found == m_node2expression_map.end() ? nullptr : found->second; +} + +ExpressionPort LinearIR::get_expr_by_output(const TensorDescriptorPtr& td) const { + auto found = m_output2expression_map.find(td); + if (found == m_output2expression_map.end()) + throw ngraph_error("Failed to find expression by output tensor descriptor"); + return found->second; +} + +const std::set& LinearIR::get_exprs_by_input(const TensorDescriptorPtr& td) const { + auto found = m_input2expression_map.find(td); + if (found == m_input2expression_map.end()) + throw ngraph_error("Failed to find expression by input tensor descriptor"); + return found->second; +} + +void LinearIR::replace_input(const ExpressionPtr& expr, size_t port, const TensorDescriptorPtr& to) { + replace_input(expr->input_port(port), to); +} + +void LinearIR::replace_input(const ExpressionPort& expr_port, const TensorDescriptorPtr& to) { + const auto& expr = expr_port.expr; + const auto port = expr_port.port; + OPENVINO_ASSERT(expr_port.get_type() == ExpressionPort::Type::Input, "Failed to replace: target input port must have Input type"); + OPENVINO_ASSERT(port < expr->m_inputs.size(), "Failed to replace: target input port must be less than input count!"); + const auto from = expr->m_inputs[port]; + auto found = m_input2expression_map.find(from); + if (found == m_input2expression_map.end() || found->second.count(expr_port) == 0) + throw ngraph_error("Invalid expression of input was provided to replace_input"); + found->second.erase(expr_port); + { + const auto& res = m_input2expression_map.insert({to, std::set{expr_port}}); + // If input is already in the map => add ExprPtr to the mapped set + if (!res.second) { + res.first->second.insert(expr_port); + } + } + expr->replace_input(port, std::move(to)); +} + +void LinearIR::replace_output(const ExpressionPtr& expr, size_t port, const TensorDescriptorPtr& to) { + replace_output(expr->output_port(port), to); +} + +void LinearIR::replace_output(const ExpressionPort& expr_port, const TensorDescriptorPtr& to) { + const auto& expr = expr_port.expr; + const auto port = expr_port.port; + OPENVINO_ASSERT(expr_port.get_type() == ExpressionPort::Type::Output, "Failed to replace: target output port must have Output type"); + OPENVINO_ASSERT(port < expr->m_outputs.size(), "Failed to replace: target output port must be less than output count!"); + const auto from = expr->m_outputs[port]; + auto found = m_output2expression_map.find(from); + if (found == m_output2expression_map.end() || found->second != expr_port) + throw ngraph_error("Invalid expression of output was provided to replace_output"); + m_output2expression_map.erase(found); + m_output2expression_map[to] = expr_port; + expr->replace_output(port, to); +} + +void LinearIR::register_regular_expression(const ExpressionPtr& expr) { + if (is_type(expr->get_node()) || is_type(expr->get_node())) + throw ngraph_error("LinearIR::insert can't be used to add Parameters or Results to IR"); + register_expression(expr); +} + +void LinearIR::register_expression(const ExpressionPtr& expr) { + const auto& node = expr->get_node(); + { + const auto& res = m_node2expression_map.insert({node, expr}); + if (!res.second) + throw ngraph_error("Duplicate node is detected in linear IR: " + std::string(node->get_friendly_name())); + } + for (size_t i = 0; i < expr->m_outputs.size(); ++i) { + const auto& out = expr->m_outputs[i]; + m_output2expression_map[out] = expr->output_port(i); + } + + for (size_t i = 0; i < expr->m_inputs.size(); ++i) { + const auto& in = expr->m_inputs[i]; + const auto expr_port = expr->input_port(i); + const auto& res = m_input2expression_map.insert({in, std::set{expr_port}}); + // If input is already in the map => add ExprPtr to the mapped set + if (!res.second) { + res.first->second.insert(expr_port); + } + } +} + +void LinearIR::unregister_expression(const ExpressionPtr& expr) { + for (const auto& out : expr->m_outputs) + m_output2expression_map.erase(out); + + size_t in_port = 0; + for (const auto& in : expr->m_inputs) { + const auto& found = m_input2expression_map.find(in); + if (found != m_input2expression_map.end()) { + // Note: If the input is used by only by this expr => delete the whole entry + // Otherwise delete the expr from the users set + auto& users = found->second; + if (users.size() == 1) + m_input2expression_map.erase(found); + else + users.erase(expr->input_port(in_port)); + } + ++in_port; + } + + m_node2expression_map.erase(expr->get_node()); +} + +LinearIR::exprIt LinearIR::insert(constExprIt pos, container::value_type&& value) { + register_regular_expression(value); + return m_lowered_ops.insert(pos, value); +} + +LinearIR::exprIt LinearIR::insert(constExprIt pos, const container::value_type& value) { + register_regular_expression(value); + return m_lowered_ops.insert(pos, value); +} + +LinearIR::exprIt LinearIR::insert(constExprIt pos, exprIt begin, exprIt end) { + constExprIt cbegin = begin; + constExprIt cend = end; + return insert(pos, cbegin, cend); +} + +LinearIR::exprIt LinearIR::insert(constExprIt pos, constExprIt begin, constExprIt end) { + for (auto b = begin; b != end; b++) + register_regular_expression(*b); + return m_lowered_ops.insert(pos, begin, end); +} + +LinearIR::exprIt LinearIR::insert(LinearIR::constExprIt pos, const NodeVector& nodes) { + auto ret = m_lowered_ops.end(); + for (const auto& n : nodes) { + std::vector input_tds; + for (const auto& in : n->inputs()) { + const auto& out = in.get_source_output(); + const auto& parent_out_tds = m_node2expression_map[out.get_node_shared_ptr()]->get_outputs(); + input_tds.push_back(parent_out_tds[out.get_index()]); + } + // Note that output tds must be empty since they are filled automatically from rt_info and/or tensor shapes + const auto& expr = std::make_shared(n, input_tds); + register_regular_expression(expr); + ret = m_lowered_ops.insert(pos, expr); + } + // Need to return iterator to the first of the inserted values + return std::prev(ret, static_cast(nodes.size())); +} + +LinearIR::exprIt LinearIR::insert(LinearIR::constExprIt pos, const std::shared_ptr& n) { + std::vector input_tds; + for (const auto& in : n->inputs()) { + const auto& out = in.get_source_output(); + const auto& parent_out_tds = m_node2expression_map[out.get_node_shared_ptr()]->get_outputs(); + input_tds.push_back(parent_out_tds[out.get_index()]); + } + // Note that output tds must be empty since they are filled automatically from rt_info and/or tensor shapes + const auto& expr = std::make_shared(n, input_tds); + register_regular_expression(expr); + return m_lowered_ops.insert(pos, expr); +} + +LinearIR::exprIt LinearIR::erase(LinearIR::exprIt pos) { + unregister_expression(*pos); + return m_lowered_ops.erase(pos); +} + +LinearIR::exprIt LinearIR::erase(LinearIR::constExprIt pos) { + unregister_expression(*pos); + return m_lowered_ops.erase(pos); +} + +void LinearIR::move(LinearIR::constExprIt from, LinearIR::constExprIt to) { + // Instead of `insert()` + `erase()`, we use `splice()` for the same list + m_lowered_ops.splice(to, m_lowered_ops, from); +} + +}// namespace lowered +}// namespace snippets +}// namespace ngraph diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp new file mode 100644 index 00000000000000..cf2caeea807631 --- /dev/null +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -0,0 +1,205 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/lowered/loop_manager.hpp" + +#include "snippets/lowered/expression.hpp" +#include "snippets/tensor_descriptor.hpp" + +#include +#include + +#include + +namespace ngraph { +namespace snippets { +namespace lowered { + +size_t LinearIR::LoopManager::add_loop_info(const LoopInfoPtr &loop) { + const auto index = next_id; + m_map[index] = loop; + next_id++; + return index; +} + +void LinearIR::LoopManager::remove_loop_info(size_t index) { + m_map.erase(index); +} + +using LoopInfoPtr = LinearIR::LoopManager::LoopInfoPtr; + +const std::map &LinearIR::LoopManager::get_map() const { + return m_map; +} + +LoopInfoPtr LinearIR::LoopManager::get_loop_info(size_t index) const { + const auto it = m_map.find(index); + OPENVINO_ASSERT(it != m_map.end(), "LoopInformation hasn't been found!"); + return it->second; +} + +void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir, + size_t loop_id, + LinearIR::constExprIt &loop_begin_pos, + LinearIR::constExprIt &loop_end_pos) const { + const auto loop_info = get_loop_info(loop_id); + get_loop_bounds(linear_ir, loop_info->entry_exprs, loop_info->exit_exprs, loop_begin_pos, loop_end_pos, + loop_id); +} + +void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir, + const std::vector &entries, + const std::vector &exits, + LinearIR::constExprIt &loop_begin_pos, + LinearIR::constExprIt &loop_end_pos, + size_t loop_id) { + OPENVINO_ASSERT(!entries.empty(), "Loop must have entry points"); + OPENVINO_ASSERT(!exits.empty(), "Loop must have entry points"); + loop_begin_pos = std::find(linear_ir.begin(), linear_ir.end(), entries.front().expr); + OPENVINO_ASSERT(loop_begin_pos != linear_ir.end(), "Loop begin hasn't been found!"); + + // Some operations in Loop can be before first entry points: Scalars, VectorBuffer. + // We should iterate by them till the expr is in the corresponding Loop + auto prev_loop_ids = (*std::prev(loop_begin_pos))->get_loop_ids(); + while (std::find(prev_loop_ids.begin(), prev_loop_ids.end(), loop_id) != prev_loop_ids.end()) { + loop_begin_pos = std::prev(loop_begin_pos); + prev_loop_ids = (*std::prev(loop_begin_pos))->get_loop_ids(); + } + + // At the moment all Loops must have exit points + loop_end_pos = std::next(std::find(loop_begin_pos, linear_ir.end(), exits.back().expr)); + OPENVINO_ASSERT(loop_end_pos != linear_ir.end(), "Loop end hasn't been found!"); +} + +void LinearIR::LoopManager::get_io_loop_ports(LinearIR &linear_ir, + LinearIR::constExprIt loop_begin_pos, + LinearIR::constExprIt loop_end_pos, + std::vector &entries, + std::vector &exits) { + entries.clear(); + exits.clear(); + for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { + const auto& expr = *expr_it; + const auto inputs = expr->get_inputs(); + const auto outputs = expr->get_outputs(); + + for (size_t in_port = 0; in_port < inputs.size(); ++in_port) { + const auto in_td = inputs[in_port]; + const auto parent_expr = linear_ir.get_expr_by_output(in_td).expr; + if (!ov::is_type(parent_expr->get_node()) && + std::find(loop_begin_pos, expr_it, parent_expr) == expr_it) { + entries.push_back(expr->input_port(in_port)); + } + } + + for (size_t out_port = 0; out_port < outputs.size(); ++out_port) { + const auto out_td = outputs[out_port]; + const auto consumer_exprs = linear_ir.get_exprs_by_input(out_td); + for (const auto& conumer_expr : consumer_exprs) { + if (std::find(expr_it, loop_end_pos, conumer_expr.expr) == loop_end_pos) { + exits.push_back(expr->output_port(out_port)); + break; + } + } + } + } +} + +void LinearIR::LoopManager::skipped_mark(LinearIR::constExprIt loop_begin_pos, + LinearIR::constExprIt loop_end_pos, + size_t loop_depth) { + const auto loop_ids = std::vector(loop_depth, Expression::LOOP_NULL_ID); + for (auto& expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { + const auto expr = *expr_it; + expr->set_loop_ids(loop_ids); + } +} + +void LinearIR::LoopManager::mark_loop(LinearIR &linear_ir, + LinearIR::constExprIt loop_begin_pos, + LinearIR::constExprIt loop_end_pos, + size_t loop_depth, size_t vector_size) { + std::vector loop_entry_points, loop_exit_points; + LoopManager::get_io_loop_ports(linear_ir, loop_begin_pos, loop_end_pos, loop_entry_points, + loop_exit_points); + + auto broadcast = [](std::vector &lhs, const std::vector &rhs) -> void { + if (rhs == lhs) + return; + const auto lhs_size = lhs.size(); + const auto rhs_size = rhs.size(); + const auto size = std::max(lhs_size, rhs_size); + std::vector result(size, 1); + lhs.resize(size, 1); + for (size_t i = 0; i < size; ++i) { + const auto lhs_value = i < lhs_size ? *(lhs.crbegin() + i) : 1; + const auto rhs_value = i < rhs_size ? *(rhs.crbegin() + i) : 1; + OPENVINO_ASSERT(lhs_value == rhs_value || lhs_value == 1 || rhs_value == 1, + "Output shapes of Loop must be broadcastable!"); + *(lhs.rbegin() + i) = std::max(lhs_value, rhs_value); + } + }; + + std::vector loop_subtensor; + std::vector loop_layout; + std::vector loop_tensor(1, 1); // Scalar + for (const auto& exit_point : loop_exit_points) { + const auto expr = exit_point.expr; + const auto port = exit_point.port; + const auto out_td = expr->get_outputs()[port]; + const auto out_tensor = out_td->get_tensor(); + const auto out_layout = out_td->get_layout(); + broadcast(loop_tensor, out_tensor); + if (loop_layout.empty()) + loop_layout = out_layout; + OPENVINO_ASSERT(loop_layout == out_layout, "Output layouts of Loop must be the same!"); + } + + for (const auto& entry_point : loop_entry_points) { + const auto expr = entry_point.expr; + const auto out_td = expr->get_outputs().front(); + const auto out_subtensor = out_td->get_subtensor(); + if (loop_subtensor.empty()) + loop_subtensor = out_subtensor; + OPENVINO_ASSERT(loop_subtensor == out_subtensor, "Subtensors of Loop must be the same!"); + } + + for (size_t dim_idx = 0; dim_idx < loop_depth; ++dim_idx) { + OPENVINO_ASSERT(dim_idx < loop_tensor.size(), "Incorrect indexes of Loop for markup"); + const auto dim = loop_layout.size() >= dim_idx ? *(loop_layout.rbegin() + dim_idx) : 0; + const auto work_amount = loop_tensor.size() > dim ? loop_tensor[dim] : 0; + const auto work_amount_increment = + loop_subtensor.size() > dim_idx ? *(loop_subtensor.rbegin() + dim_idx) : + dim_idx == 0 ? vector_size : 1; + + mark_loop(linear_ir, loop_begin_pos, loop_end_pos, loop_depth - dim_idx - 1, work_amount, + work_amount_increment, loop_entry_points, loop_exit_points); + } +} + +void LinearIR::LoopManager::mark_loop(LinearIR &linear_ir, + LinearIR::constExprIt loop_begin_pos, + LinearIR::constExprIt loop_end_pos, + size_t idx, + size_t work_amount, + size_t work_amount_increment, + const std::vector &entries, + const std::vector &exits) { + const auto loop_info = std::make_shared( + work_amount, work_amount_increment, entries, exits); + const auto loop_id = this->add_loop_info(loop_info); + exprs_marking(loop_begin_pos, loop_end_pos, loop_id, idx); +} + +void LinearIR::LoopManager::exprs_marking(LinearIR::constExprIt loop_begin_pos, + LinearIR::constExprIt loop_end_pos, + size_t loop_id, size_t idx) { + for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { + expr_it->get()->set_loop_id(loop_id, idx); + } +} + +}// namespace lowered +}// namespace snippets +}// namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/assign_registers.cpp b/src/common/snippets/src/lowered/pass/assign_registers.cpp similarity index 97% rename from src/common/snippets/src/pass/lowered/assign_registers.cpp rename to src/common/snippets/src/lowered/pass/assign_registers.cpp index e3d4f5fe8a3ea8..d33d6da1d6bfb3 100644 --- a/src/common/snippets/src/pass/lowered/assign_registers.cpp +++ b/src/common/snippets/src/lowered/pass/assign_registers.cpp @@ -2,25 +2,27 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/assign_registers.hpp" +#include "snippets/lowered/pass/assign_registers.hpp" + +#include "snippets/lowered/linear_ir.hpp" #include "snippets/snippets_isa.hpp" -#include "snippets/lowered_expr.hpp" #include "snippets/itt.hpp" + // This header is needed to avoid MSVC warning "C2039: 'inserter': is not a member of 'std'" #include namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { -bool AssignRegisters::run(LoweredExprIR& linear_ir) { +bool AssignRegisters::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::AssignRegisters") using Reg = size_t; using tensor = snippets::TensorDescriptorPtr; auto& expressions = linear_ir.get_ops(); - std::vector> typed_ops; + std::vector> typed_ops; NodeVector ops; Reg num_parameters = 0; Reg num_results = 0; @@ -43,10 +45,10 @@ bool AssignRegisters::run(LoweredExprIR& linear_ir) { auto accumulator_reg = 0lu; for (const auto& expr : expressions) { auto op = expr->get_node(); - if (const auto io_expr = std::dynamic_pointer_cast(expr)) { - if (io_expr->get_type() == IOLoweredExpr::io_type::INPUT) + if (const auto io_expr = std::dynamic_pointer_cast(expr)) { + if (io_expr->get_type() == IOExpression::io_type::INPUT) manually_assigned_gprs[expr->get_outputs()[0]] = io_expr->get_index(); - else if (io_expr->get_type() == IOLoweredExpr::io_type::OUTPUT) + else if (io_expr->get_type() == IOExpression::io_type::OUTPUT) manually_assigned_gprs[expr->get_inputs()[0]] = num_parameters + io_expr->get_index(); else throw ngraph_error("Unsupported io_type detected"); @@ -97,7 +99,7 @@ bool AssignRegisters::run(LoweredExprIR& linear_ir) { // Note: have to specify default capture "=" due to MSVC bug (it doesn't capture const expressions implicitly) // Otherwise WIN build fails with "IS_MANUALLY_ALLOCATED_REG cannot be implicitly captured because no default capture mode has been specified" // the same problem with all the other lambdas in this file - auto enumerate_out_tensors = [=] (const LoweredExprPtr& expr, + auto enumerate_out_tensors = [=] (const ExpressionPtr& expr, decltype(regs_vec)& reg_map, const std::map& manually_assigned_regs, size_t& counter) { @@ -329,8 +331,8 @@ bool AssignRegisters::run(LoweredExprIR& linear_ir) { return false; } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/buffer_allocation.cpp b/src/common/snippets/src/lowered/pass/buffer_allocation.cpp similarity index 94% rename from src/common/snippets/src/pass/lowered/buffer_allocation.cpp rename to src/common/snippets/src/lowered/pass/buffer_allocation.cpp index 6c2dd6ce7ed398..25f47c8b0b5600 100644 --- a/src/common/snippets/src/pass/lowered/buffer_allocation.cpp +++ b/src/common/snippets/src/lowered/pass/buffer_allocation.cpp @@ -2,16 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/buffer_allocation.hpp" +#include "snippets/lowered/pass/buffer_allocation.hpp" + +#include "snippets/lowered/linear_ir.hpp" #include "snippets/itt.hpp" -#include "snippets/lowered_expr.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { -void BufferAllocation::propagate_offset(const LoweredExprIR& linear_ir, const LoweredExprPtr& buffer_expr, const size_t offset) { +void BufferAllocation::propagate_offset(const LinearIR& linear_ir, const ExpressionPtr& buffer_expr, const size_t offset) { // If Buffer has offset We set this offset in the connected MemoryAccess ops // to correctly read and write data because all Buffers has the common data pointer on buffer scratchpad @@ -54,7 +55,7 @@ void BufferAllocation::propagate_offset(const LoweredExprIR& linear_ir, const Lo } -bool BufferAllocation::run(LoweredExprIR& linear_ir) { +bool BufferAllocation::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::BufferAllocation"); bool modified = false; @@ -100,7 +101,7 @@ bool BufferAllocation::run(LoweredExprIR& linear_ir) { return modified; } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/buffer_identification.cpp b/src/common/snippets/src/lowered/pass/buffer_identification.cpp similarity index 97% rename from src/common/snippets/src/pass/lowered/buffer_identification.cpp rename to src/common/snippets/src/lowered/pass/buffer_identification.cpp index 94b798da256f34..0f6f710b422004 100644 --- a/src/common/snippets/src/pass/lowered/buffer_identification.cpp +++ b/src/common/snippets/src/lowered/pass/buffer_identification.cpp @@ -2,15 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/buffer_identification.hpp" +#include "snippets/lowered/pass/buffer_identification.hpp" + +#include "snippets/lowered/linear_ir.hpp" #include "snippets/snippets_isa.hpp" -#include "snippets/lowered_expr.hpp" #include "snippets/itt.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { namespace { auto is_intermediate_buffer(const std::shared_ptr& op) -> std::shared_ptr { @@ -23,7 +24,7 @@ inline size_t index(size_t col_num, size_t row, size_t col) { } } // namespace -std::vector BufferIdentification::create_adjacency_matrix(const LoweredExprIR& linear_ir, const BufferSet& buffers) const { +std::vector BufferIdentification::create_adjacency_matrix(const LinearIR& linear_ir, const BufferSet& buffers) const { // The sync point to check for adjacency is Loop because only in Loop we increment pointers. // So if some Buffers in the one Loop have conflict (cannot be inplace: the different ptr increment and data sizes) // they are called as adjacent @@ -155,7 +156,7 @@ auto BufferIdentification::coloring(BufferSet& buffers, std::vector& adj) return color_groups; } -bool BufferIdentification::run(LoweredExprIR& linear_ir) { +bool BufferIdentification::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::BufferIdentification") // Unite Buffers using Graph coloring algorithm. // Notes: We identify only Buffer with Intermediate memory because Buffers with new memory are used only in Brgemm case @@ -188,7 +189,7 @@ bool BufferIdentification::run(LoweredExprIR& linear_ir) { return true; } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/buffer_insertion.cpp b/src/common/snippets/src/lowered/pass/buffer_insertion.cpp similarity index 84% rename from src/common/snippets/src/pass/lowered/buffer_insertion.cpp rename to src/common/snippets/src/lowered/pass/buffer_insertion.cpp index 4bcccec2b93094..be44dacdabd077 100644 --- a/src/common/snippets/src/pass/lowered/buffer_insertion.cpp +++ b/src/common/snippets/src/lowered/pass/buffer_insertion.cpp @@ -2,21 +2,24 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/buffer_insertion.hpp" +#include "snippets/lowered/pass/buffer_insertion.hpp" + +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_manager.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/itt.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { BufferInsertion::BufferInsertion(int32_t buffer_allocation_rank) - : LinearIRTransformation(), m_buffer_allocation_rank(buffer_allocation_rank) {} + : Transformation(), m_buffer_allocation_rank(buffer_allocation_rank) {} -LoweredExprIR::constExprIt BufferInsertion::insertion_position(const LoweredExprIR& linear_ir, const LoweredExprIR::LoweredLoopManagerPtr& loop_manager, - const LoweredExprPtr& up_expr, const LoweredExprPtr& down_expr) { +LinearIR::constExprIt BufferInsertion::insertion_position(const LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, + const ExpressionPtr& up_expr, const ExpressionPtr& down_expr) { const auto up_loops = up_expr->get_loop_ids(); const auto down_loops = down_expr->get_loop_ids(); OPENVINO_ASSERT(up_loops.size() == down_loops.size(), "The Loop IDs must be normalized!"); @@ -27,7 +30,7 @@ LoweredExprIR::constExprIt BufferInsertion::insertion_position(const LoweredExpr } // If loop_ids of expressions are equal and don't contain LOOP_NULL_ID, it's attempt to insert Buffer between expressions from the same Loop! - if (loop_idx == up_loops.size() && std::none_of(up_loops.begin(), up_loops.end(), [](const size_t id) { return id == LoweredExpr::LOOP_NULL_ID; })) + if (loop_idx == up_loops.size() && std::none_of(up_loops.begin(), up_loops.end(), [](const size_t id) { return id == Expression::LOOP_NULL_ID; })) throw ov::Exception("Buffer isn't supported in Inner Loop at the moment!"); // If the both expressions are outside Loops, insert Buffer explicitly after first Expression @@ -37,16 +40,16 @@ LoweredExprIR::constExprIt BufferInsertion::insertion_position(const LoweredExpr const auto up_loop_id = up_loops[loop_idx]; const auto down_loop_id = down_loops[loop_idx]; - if (up_loop_id != LoweredExpr::LOOP_NULL_ID) { + if (up_loop_id != Expression::LOOP_NULL_ID) { // If upper expression is inside Loop, we should insert Buffer after this Loop const auto loop_info = loop_manager->get_loop_info(up_loop_id); - LoweredExprIR::constExprIt loop_begin_pos, loop_end_pos; + LinearIR::constExprIt loop_begin_pos, loop_end_pos; loop_manager->get_loop_bounds(linear_ir, up_loop_id, loop_begin_pos, loop_end_pos); return loop_end_pos; - } else if (down_loop_id != LoweredExpr::LOOP_NULL_ID) { + } else if (down_loop_id != Expression::LOOP_NULL_ID) { // If lower expression is inside Loop, we should insert Buffer before this Loop const auto loop_info = loop_manager->get_loop_info(down_loop_id); - LoweredExprIR::constExprIt loop_begin_pos, loop_end_pos; + LinearIR::constExprIt loop_begin_pos, loop_end_pos; loop_manager->get_loop_bounds(linear_ir, down_loop_id, loop_begin_pos, loop_end_pos); return loop_begin_pos; } else { @@ -54,8 +57,8 @@ LoweredExprIR::constExprIt BufferInsertion::insertion_position(const LoweredExpr } } -void BufferInsertion::insertion(LoweredExprIR& linear_ir, const LoweredExprIR::LoweredLoopManagerPtr& loop_manager, size_t loop_id, - const std::vector& loop_entries, const std::vector& loop_exits) { +void BufferInsertion::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, size_t loop_id, + const std::vector& loop_entries, const std::vector& loop_exits) { for (const auto& entry_point : loop_entries) { const auto expr = entry_point.expr; const auto port = entry_point.port; @@ -85,8 +88,8 @@ void BufferInsertion::insertion(LoweredExprIR& linear_ir, const LoweredExprIR::L const auto current_loop_lvl = std::distance(current_loops.begin(), std::find(current_loops.begin(), current_loops.end(), loop_id)); for (size_t i = current_loop_lvl; i < current_loop_count; i++) { if (current_loops[i] != parent_loops[i] && - current_loops[i] != LoweredExpr::LOOP_NULL_ID && - parent_loops[i] != LoweredExpr::LOOP_NULL_ID) { + current_loops[i] != Expression::LOOP_NULL_ID && + parent_loops[i] != Expression::LOOP_NULL_ID) { is_buffer_needed = true; break; } @@ -106,7 +109,7 @@ void BufferInsertion::insertion(LoweredExprIR& linear_ir, const LoweredExprIR::L input_td->get_layout()); const std::vector buffer_outs = { td }; const std::vector parent_outs = { input_td }; - linear_ir.insert(pos, std::make_shared(buffer, parent_outs, buffer_outs)); + linear_ir.insert(pos, std::make_shared(buffer, parent_outs, buffer_outs)); linear_ir.replace_input(expr, port, td); } } @@ -121,8 +124,8 @@ void BufferInsertion::insertion(LoweredExprIR& linear_ir, const LoweredExprIR::L const auto current_loop_count = current_loops.size(); const std::vector node_outs = {output_td}; - std::set potential_consumers; - std::set buffers; + std::set potential_consumers; + std::set buffers; const auto current_loop_lvl = std::distance(current_loops.begin(), std::find(current_loops.begin(), current_loops.end(), loop_id)); for (const auto& child_expr_input : child_exprs_inputs) { const auto& child_expr = child_expr_input.expr; @@ -148,8 +151,8 @@ void BufferInsertion::insertion(LoweredExprIR& linear_ir, const LoweredExprIR::L OPENVINO_ASSERT(current_loop_count == child_loop_count, "The Loop IDs must be normalized!"); for (size_t i = current_loop_lvl; i < child_loop_count; i++) { if (current_loops[i] != child_loops[i] && - current_loops[i] != LoweredExpr::LOOP_NULL_ID && - child_loops[i] != LoweredExpr::LOOP_NULL_ID) { + current_loops[i] != Expression::LOOP_NULL_ID && + child_loops[i] != Expression::LOOP_NULL_ID) { potential_consumers.insert(child_expr_input); break; } @@ -193,7 +196,7 @@ void BufferInsertion::insertion(LoweredExprIR& linear_ir, const LoweredExprIR::L // | <- It should be new TD // Relu const std::vector buffer_outs = {td}; - linear_ir.insert(pos, std::make_shared(buffer, node_outs, buffer_outs)); + linear_ir.insert(pos, std::make_shared(buffer, node_outs, buffer_outs)); for (const auto& consumer_input : potential_consumers) { const auto consumer = consumer_input.expr; const auto consumer_port = consumer_input.port; @@ -203,7 +206,7 @@ void BufferInsertion::insertion(LoweredExprIR& linear_ir, const LoweredExprIR::L } } -bool BufferInsertion::run(LoweredExprIR& linear_ir) { +bool BufferInsertion::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::BufferInsertion") if (linear_ir.empty()) return false; @@ -228,22 +231,22 @@ bool BufferInsertion::run(LoweredExprIR& linear_ir) { const auto input_ports = ma->get_memory_access_input_ports(); const auto output_ports = ma->get_memory_access_output_ports(); - std::vector loop_entries(input_ports.size()), loop_exits(output_ports.size()); + std::vector loop_entries(input_ports.size()), loop_exits(output_ports.size()); // C++17: for (auto const& [loop_id, loop_info] : loop_data_map) for (const auto& p : input_ports) { - loop_entries[p.first] = LoweredExprPort::make_input(expr, p.first); + loop_entries[p.first] = expr->input_port(p.first); } for (const auto& p : output_ports) { - loop_exits[p.first] = LoweredExprPort::make_output(expr, p.first); + loop_exits[p.first] = expr->output_port(p.first); } - insertion(linear_ir, loop_manager, LoweredExpr::LOOP_NULL_ID, loop_entries, loop_exits); + insertion(linear_ir, loop_manager, Expression::LOOP_NULL_ID, loop_entries, loop_exits); } return true; } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/buffer_reset.cpp b/src/common/snippets/src/lowered/pass/buffer_reset.cpp similarity index 93% rename from src/common/snippets/src/pass/lowered/buffer_reset.cpp rename to src/common/snippets/src/lowered/pass/buffer_reset.cpp index 84e89db123c847..c826c584c21534 100644 --- a/src/common/snippets/src/pass/lowered/buffer_reset.cpp +++ b/src/common/snippets/src/lowered/pass/buffer_reset.cpp @@ -2,17 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/buffer_reset.hpp" +#include "snippets/lowered/pass/buffer_reset.hpp" + +#include "snippets/lowered/linear_ir.hpp" #include "snippets/snippets_isa.hpp" -#include "snippets/lowered_expr.hpp" #include "snippets/itt.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { -bool BufferReset::reuse_buffer_increments(const LoweredExprIR& linear_ir, const LoweredExprPtr& loop_end_expr) { +bool BufferReset::reuse_buffer_increments(const LinearIR& linear_ir, const ExpressionPtr& loop_end_expr) { const auto loop_end = ov::as_type_ptr(loop_end_expr->get_node()); if (!loop_end) return false; @@ -74,7 +75,7 @@ bool BufferReset::reuse_buffer_increments(const LoweredExprIR& linear_ir, const return true; } -bool BufferReset::run(LoweredExprIR& linear_ir) { +bool BufferReset::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::BufferReset") bool modified = false; @@ -88,7 +89,7 @@ bool BufferReset::run(LoweredExprIR& linear_ir) { return modified; } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/cleanup_loop_offsets.cpp b/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp similarity index 95% rename from src/common/snippets/src/pass/lowered/cleanup_loop_offsets.cpp rename to src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp index 15ccf948eb634e..b35043e132b39c 100644 --- a/src/common/snippets/src/pass/lowered/cleanup_loop_offsets.cpp +++ b/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp @@ -2,16 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/cleanup_loop_offsets.hpp" +#include "snippets/lowered/pass/cleanup_loop_offsets.hpp" + +#include "snippets/lowered/linear_ir.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/itt.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { -bool CleanupLoopOffsets::run(LoweredExprIR& linear_ir) { +bool CleanupLoopOffsets::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::CleanupLoopOffsets") if (linear_ir.empty()) return false; @@ -57,8 +59,8 @@ bool CleanupLoopOffsets::run(LoweredExprIR& linear_ir) { return is_modified; } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/insert_tail_loop.cpp b/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp similarity index 92% rename from src/common/snippets/src/pass/lowered/insert_tail_loop.cpp rename to src/common/snippets/src/lowered/pass/insert_tail_loop.cpp index 391d4cd7dd18ff..d9bed42e347d0f 100644 --- a/src/common/snippets/src/pass/lowered/insert_tail_loop.cpp +++ b/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp @@ -2,18 +2,20 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/insert_tail_loop.hpp" +#include "snippets/lowered/pass/insert_tail_loop.hpp" + +#include "snippets/lowered/linear_ir.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/itt.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { -void InsertTailLoop::tail_transformations(LoweredExprIR& linear_ir, - LoweredExprIR::container::const_iterator tail_begin, - LoweredExprIR::container::const_iterator tail_end, +void InsertTailLoop::tail_transformations(LinearIR& linear_ir, + LinearIR::container::const_iterator tail_begin, + LinearIR::container::const_iterator tail_end, const size_t tail_size) { const auto& config = linear_ir.get_config(); auto insertFill = [tail_size](const ov::Input& input) -> std::shared_ptr { @@ -41,7 +43,7 @@ void InsertTailLoop::tail_transformations(LoweredExprIR& linear_ir, if (auto fill = insertFill(op->input(i))) { std::vector inputs{expr_it->get()->get_inputs()[i]}; // Note: inputs == outputs, since we want to modify vector reg inplace - auto fill_expr = std::make_shared(fill, inputs, inputs); + auto fill_expr = std::make_shared(fill, inputs, inputs); auto reg = expr_it->get()->get_reg_info().first[i]; fill_expr->set_reg_info({{reg}, {reg}}); linear_ir.insert(expr_it, fill_expr); @@ -65,7 +67,7 @@ void InsertTailLoop::tail_transformations(LoweredExprIR& linear_ir, } } -bool InsertTailLoop::run(LoweredExprIR& linear_ir) { +bool InsertTailLoop::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::insertTailLoop") bool modified = false; // *1* solo vector/tail loop + empty outer loop @@ -100,7 +102,7 @@ bool InsertTailLoop::run(LoweredExprIR& linear_ir) { auto is_buffer_output = [&linear_ir](const TensorDescriptorPtr& output) { const auto& child_exprs_inputs = linear_ir.get_exprs_by_input(output); return std::any_of(child_exprs_inputs.begin(), child_exprs_inputs.end(), - [](const LoweredExprPort& lp) {return ov::is_type(lp.expr->get_node());}); + [](const ExpressionPort& lp) {return ov::is_type(lp.expr->get_node());}); }; const auto loop_end_expr = linear_ir.get_expr_by_node(loop_end); @@ -151,15 +153,15 @@ bool InsertTailLoop::run(LoweredExprIR& linear_ir) { // tail loop is fake loop because for tail we should calculate only // finalization offsets which are supported by LoopEnd. if (need_tail) { - LoweredExprIR::constExprIt tail_begin; - LoweredExprIR::constExprIt tail_end; + LinearIR::constExprIt tail_begin; + LinearIR::constExprIt tail_end; if (need_vector_loop) { // todo: we have to clone nodes here since tail transformations can change the same nodes // (e.g. reset Load&Store count). this is a bit costy. // an alternative is no pass target machine and create emitters for vector loop here // (then we don't care if the nodes are updated) - auto vector_loop_deep_copy = LoweredExprIR::deep_copy_range(loop_begin_expr_it, expr_it); - auto is_par_or_res = [](const LoweredExprPtr& expr) { + auto vector_loop_deep_copy = LinearIR::deep_copy_range(loop_begin_expr_it, expr_it); + auto is_par_or_res = [](const ExpressionPtr& expr) { return is_type(expr->get_node()) || is_type(expr->get_node()); }; @@ -197,8 +199,8 @@ bool InsertTailLoop::run(LoweredExprIR& linear_ir) { return modified; } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/load_movebroadcast_to_broadcastload.cpp b/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp similarity index 89% rename from src/common/snippets/src/pass/lowered/load_movebroadcast_to_broadcastload.cpp rename to src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp index 5e8a980bfcc679..8a13cf2328d6c1 100644 --- a/src/common/snippets/src/pass/lowered/load_movebroadcast_to_broadcastload.cpp +++ b/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp @@ -2,23 +2,24 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/itt.hpp" +#include "snippets/lowered/pass/load_movebroadcast_to_broadcastload.hpp" -#include "snippets/pass/lowered/load_movebroadcast_to_broadcastload.hpp" +#include "snippets/lowered/linear_ir.hpp" #include "snippets/snippets_isa.hpp" +#include "snippets/itt.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { -bool LoadMoveBroadcastToBroadcastLoad::run(LoweredExprIR& linear_ir) { +bool LoadMoveBroadcastToBroadcastLoad::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::LoadMoveBroadcastToBroadcastLoad") bool modified = false; for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) { - const auto &op = (*expr_it)->get_node(); + const auto& op = (*expr_it)->get_node(); // Match on MoveBroadcast because MoveBroadcast is rare node in bodies if (const auto move_broadcast = ov::as_type_ptr(op)) { const auto interm_td = (*expr_it)->get_inputs().front(); @@ -48,14 +49,14 @@ bool LoadMoveBroadcastToBroadcastLoad::run(LoweredExprIR& linear_ir) { const auto insertion_pos = std::next(expr_it); linear_ir.erase(std::find(linear_ir.begin(), mv_expr_it, parent_expr)); linear_ir.erase(mv_expr_it); - expr_it = linear_ir.insert(insertion_pos, std::make_shared(broadcastload, in_td, out_td)); + expr_it = linear_ir.insert(insertion_pos, std::make_shared(broadcastload, in_td, out_td)); modified |= true; } } return modified; } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/load_store_insertion.cpp b/src/common/snippets/src/lowered/pass/load_store_insertion.cpp similarity index 73% rename from src/common/snippets/src/pass/lowered/load_store_insertion.cpp rename to src/common/snippets/src/lowered/pass/load_store_insertion.cpp index 7a9cde9cf38a5e..b97375e2378d36 100644 --- a/src/common/snippets/src/pass/lowered/load_store_insertion.cpp +++ b/src/common/snippets/src/lowered/pass/load_store_insertion.cpp @@ -2,20 +2,23 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/load_store_insertion.hpp" +#include "snippets/lowered/pass/load_store_insertion.hpp" + +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_manager.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/itt.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { namespace { auto get_inner_loop_id(const std::vector& loop_ids) -> size_t { - size_t inner_loop = LoweredExpr::LOOP_NULL_ID; + size_t inner_loop = Expression::LOOP_NULL_ID; for (int i = static_cast(loop_ids.size()) - 1; i >= 0; --i) { - if (loop_ids[i] != LoweredExpr::LOOP_NULL_ID) { + if (loop_ids[i] != Expression::LOOP_NULL_ID) { inner_loop = loop_ids[i]; break; } @@ -24,21 +27,21 @@ auto get_inner_loop_id(const std::vector& loop_ids) -> size_t { } } // namespace -using LoweredLoopManager = LoweredExprIR::LoweredLoopManager; -using LoweredLoopInfoPtr = LoweredLoopManager::LoweredLoopInfoPtr; +using LoopManager = LinearIR::LoopManager; +using LoopInfoPtr = LoopManager::LoopInfoPtr; LoadStoreInsertion::LoadStoreInsertion(size_t vector_size) : m_vector_size(vector_size) {} -void LoadStoreInsertion::update_loops(const LoweredExprIR::LoweredLoopManagerPtr& loop_manager, const std::vector& loop_ids, - const LoweredExprPort& actual_port, const std::vector& target_ports, bool is_entry) { +void LoadStoreInsertion::update_loops(const LinearIR::LoopManagerPtr& loop_manager, const std::vector& loop_ids, + const ExpressionPort& actual_port, const std::vector& target_ports, bool is_entry) { for (auto loop_id : loop_ids) { - if (loop_id != LoweredExpr::LOOP_NULL_ID) + if (loop_id != Expression::LOOP_NULL_ID) update_loop(loop_manager->get_loop_info(loop_id), actual_port, target_ports, is_entry); } } -void LoadStoreInsertion::update_loop(const LoweredExprIR::LoweredLoopManager::LoweredLoopInfoPtr& loop_info, - const LoweredExprPort& actual_port, const std::vector& target_ports, bool is_entry) { +void LoadStoreInsertion::update_loop(const LinearIR::LoopManager::LoopInfoPtr& loop_info, + const ExpressionPort& actual_port, const std::vector& target_ports, bool is_entry) { auto& ports = is_entry ? loop_info->entry_exprs : loop_info->exit_exprs; auto port_it = std::find(ports.begin(), ports.end(), actual_port); if (port_it == ports.end()) @@ -47,7 +50,7 @@ void LoadStoreInsertion::update_loop(const LoweredExprIR::LoweredLoopManager::Lo ports.insert(port_it, target_ports.cbegin(), target_ports.cend()); } -bool LoadStoreInsertion::insert_load(LoweredExprIR& linear_ir, const LoweredExprIR::constExprIt& data_expr_it) { +bool LoadStoreInsertion::insert_load(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it) { const auto& loop_manager = linear_ir.get_loop_manager(); const auto& data_expr = *data_expr_it; const auto& data_node = data_expr->get_node(); @@ -66,7 +69,7 @@ bool LoadStoreInsertion::insert_load(LoweredExprIR& linear_ir, const LoweredExpr // Find Inner Loop const auto& loop_ids = consumer_expr->get_loop_ids(); const auto inner_loop = get_inner_loop_id(loop_ids); - OPENVINO_ASSERT(inner_loop != LoweredExpr::LOOP_NULL_ID, "Loop hasn't been found!"); + OPENVINO_ASSERT(inner_loop != Expression::LOOP_NULL_ID, "Loop hasn't been found!"); const auto load_td = std::make_shared(output_td->get_tensor(), output_td->get_subtensor(), @@ -74,7 +77,7 @@ bool LoadStoreInsertion::insert_load(LoweredExprIR& linear_ir, const LoweredExpr const auto load = std::make_shared(data_node->output(0), m_vector_size); const auto load_outs = std::vector{ load_td }; const auto param_outs = std::vector{ output_td }; - const auto load_expr = std::make_shared(load, param_outs, load_outs); + const auto load_expr = std::make_shared(load, param_outs, load_outs); linear_ir.insert(std::find(data_expr_it, linear_ir.cend(), consumer_expr), load_expr); linear_ir.replace_input(consumer_expr, port, load_td); // Copy Loop identifies @@ -82,7 +85,7 @@ bool LoadStoreInsertion::insert_load(LoweredExprIR& linear_ir, const LoweredExpr // Need to update all the corresponding Loops with the same Entry Point const auto prev_entry_point = consumer_input; - const auto new_entry_point = LoweredExprPort::make_input(load_expr, 0); + const auto new_entry_point = load_expr->input_port(0); update_loops(loop_manager, loop_ids, prev_entry_point, {new_entry_point}, true); was_inserted = true; } @@ -90,7 +93,7 @@ bool LoadStoreInsertion::insert_load(LoweredExprIR& linear_ir, const LoweredExpr return was_inserted; } -bool LoadStoreInsertion::insert_store(LoweredExprIR& linear_ir, const LoweredExprIR::constExprIt& data_expr_it) { +bool LoadStoreInsertion::insert_store(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it) { const auto& loop_manager = linear_ir.get_loop_manager(); const auto& data_expr = *data_expr_it; const auto& input_td = data_expr->get_inputs().front(); @@ -105,7 +108,7 @@ bool LoadStoreInsertion::insert_store(LoweredExprIR& linear_ir, const LoweredExp // Find Inner Loop const auto& loop_ids = parent_expr->get_loop_ids(); const auto inner_loop = get_inner_loop_id(loop_ids); - OPENVINO_ASSERT(inner_loop != LoweredExpr::LOOP_NULL_ID, "Loop hasn't been found!"); + OPENVINO_ASSERT(inner_loop != Expression::LOOP_NULL_ID, "Loop hasn't been found!"); const auto store_td = std::make_shared(input_td->get_tensor(), input_td->get_subtensor(), @@ -113,8 +116,8 @@ bool LoadStoreInsertion::insert_store(LoweredExprIR& linear_ir, const LoweredExp const auto store = std::make_shared(parent->output(port), m_vector_size); const auto store_outs = std::vector{ store_td }; const auto param_outs = std::vector{ input_td }; - const auto store_expr = std::make_shared(store, param_outs, store_outs); - const auto& reverse_insertion_pos = std::find(std::reverse_iterator(data_expr_it), linear_ir.crend(), parent_expr); + const auto store_expr = std::make_shared(store, param_outs, store_outs); + const auto& reverse_insertion_pos = std::find(std::reverse_iterator(data_expr_it), linear_ir.crend(), parent_expr); const auto& insertion_pos = reverse_insertion_pos.base(); linear_ir.insert(insertion_pos, store_expr); linear_ir.replace_input(data_expr, 0, store_td); @@ -127,24 +130,24 @@ bool LoadStoreInsertion::insert_store(LoweredExprIR& linear_ir, const LoweredExp // So we should verify on the possible future exit points const auto consumer_inputs = linear_ir.get_exprs_by_input(input_td); const auto should_be_saved = std::any_of(consumer_inputs.begin(), consumer_inputs.end(), - [](const LoweredExprPort& input_port) { + [](const ExpressionPort& input_port) { const auto& node = input_port.expr->get_node(); return ov::is_type(node) || ov::is_type(node); }); - const auto new_exit_point = LoweredExprPort::make_output(store_expr, 0); - const auto new_exit_points = should_be_saved ? std::vector{prev_exit_point, new_exit_point} - : std::vector{new_exit_point}; + const auto new_exit_point = store_expr->output_port(0); + const auto new_exit_points = should_be_saved ? std::vector{prev_exit_point, new_exit_point} + : std::vector{new_exit_point}; update_loops(loop_manager, loop_ids, prev_exit_point, new_exit_points, false); return true; } -bool LoadStoreInsertion::run(LoweredExprIR& linear_ir) { +bool LoadStoreInsertion::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::LoadStoreInsertion") bool modified = false; for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) { const auto expr = *expr_it; - const auto &node = expr->get_node(); + const auto& node = expr->get_node(); if (ov::is_type(node) || ov::is_type(node)) { modified |= insert_load(linear_ir, expr_it); } @@ -157,7 +160,7 @@ bool LoadStoreInsertion::run(LoweredExprIR& linear_ir) { return modified; } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/loop_fusion.cpp b/src/common/snippets/src/lowered/pass/loop_fusion.cpp similarity index 86% rename from src/common/snippets/src/pass/lowered/loop_fusion.cpp rename to src/common/snippets/src/lowered/pass/loop_fusion.cpp index 84c10e39a8b76a..cfc305d5dd245d 100644 --- a/src/common/snippets/src/pass/lowered/loop_fusion.cpp +++ b/src/common/snippets/src/lowered/pass/loop_fusion.cpp @@ -2,21 +2,24 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/loop_fusion.hpp" +#include "snippets/lowered/pass/loop_fusion.hpp" + +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_manager.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/itt.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { -using LoweredLoopManager = LoweredExprIR::LoweredLoopManager; -using LoweredLoopInfoPtr = LoweredLoopManager::LoweredLoopInfoPtr; +using LoopManager = LinearIR::LoopManager; +using LoopInfoPtr = LoopManager::LoopInfoPtr; -LoopFusion::LoopFusion() : LinearIRTransformation() {} +LoopFusion::LoopFusion() : Transformation() {} -bool LoopFusion::can_be_fused(const LoweredLoopInfoPtr& loop_current, const LoweredLoopInfoPtr& loop_target) { +bool LoopFusion::can_be_fused(const LoopInfoPtr& loop_current, const LoopInfoPtr& loop_target) { auto current_work_amount = loop_current->work_amount; auto current_increment = loop_current->increment; auto target_work_amount = loop_target->work_amount; @@ -26,21 +29,21 @@ bool LoopFusion::can_be_fused(const LoweredLoopInfoPtr& loop_current, const Lowe return supported_work_amount && supported_increment; } -void LoopFusion::fuse_points(LoweredExprIR& linear_ir, std::vector& exit_points, std::vector& entry_points, - LoweredExprIR::constExprIt loop_begin_pos, LoweredExprIR::constExprIt loop_end_pos) { - std::vector new_exit_points; +void LoopFusion::fuse_points(LinearIR& linear_ir, std::vector& exit_points, std::vector& entry_points, + LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos) { + std::vector new_exit_points; for (const auto& exit_point : exit_points) { const auto expr = exit_point.expr; const auto port = exit_point.port; const auto output_td = expr->get_outputs()[port]; const auto consumers_inputs = linear_ir.get_exprs_by_input(output_td); - std::vector mapped_entry_points; - std::vector outside_consumers; + std::vector mapped_entry_points; + std::vector outside_consumers; for (const auto& consumer_input : consumers_inputs) { const auto consumer = consumer_input.expr; const auto consumer_port = consumer_input.port; - const auto consumer_point = LoweredExprPort::make_input(consumer, consumer_port); + const auto consumer_point = consumer->input_port(consumer_port); const auto entry_point_it = std::find(entry_points.begin(), entry_points.end(), consumer_point); if (entry_point_it != entry_points.end()) { mapped_entry_points.push_back(*entry_point_it); @@ -69,16 +72,16 @@ void LoopFusion::fuse_points(LoweredExprIR& linear_ir, std::vectorget_loop_info(current_loop_id); const auto& loop_target = loop_manager->get_loop_info(target_loop_id); if (!can_be_fused(loop_current, loop_target)) return false; - LoweredExprIR::constExprIt target_loop_begin_pos, target_loop_end_pos; + LinearIR::constExprIt target_loop_begin_pos, target_loop_end_pos; loop_manager->get_loop_bounds(linear_ir, target_loop_id, target_loop_begin_pos, target_loop_end_pos); // We can fuse Loop_up to Loop_down only in cases when other consumers of Loop_up are after Loop_down @@ -132,9 +135,9 @@ bool LoopFusion::fuse_upper_into_current(LoweredExprIR& linear_ir, const Lowered // Update work_amount for Loop (increment is constant because increments must be the identical for fusion): loop_current->work_amount = std::max(loop_current->work_amount, loop_target->work_amount); - std::vector new_entries = target_entry_points; + std::vector new_entries = target_entry_points; new_entries.insert(new_entries.end(), current_entry_points.begin(), current_entry_points.end()); - std::vector new_exits = target_exit_points; + std::vector new_exits = target_exit_points; new_exits.insert(new_exits.end(), current_exit_points.begin(), current_exit_points.end()); loop_current->entry_exprs = new_entries; @@ -143,10 +146,10 @@ bool LoopFusion::fuse_upper_into_current(LoweredExprIR& linear_ir, const Lowered return true; } -bool LoopFusion::fuse_lower_into_current(LoweredExprIR& linear_ir, const LoweredExprIR::LoweredLoopManagerPtr& loop_manager, - const LoweredExprPort& current_exit_point, const LoweredExprPort& target_entry_point, +bool LoopFusion::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, + const ExpressionPort& current_exit_point, const ExpressionPort& target_entry_point, size_t current_loop_id, size_t target_loop_id, size_t dim_idx, - LoweredExprIR::constExprIt& current_loop_begin_pos, LoweredExprIR::constExprIt& current_loop_end_pos) { + LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos) { const auto& loop_current = loop_manager->get_loop_info(current_loop_id); const auto& loop_target = loop_manager->get_loop_info(target_loop_id); if (!can_be_fused(loop_current, loop_target)) @@ -171,7 +174,7 @@ bool LoopFusion::fuse_lower_into_current(LoweredExprIR& linear_ir, const Lowered if (!is_fusion_allowed) return false; - LoweredExprIR::constExprIt target_loop_begin_pos, target_loop_end_pos; + LinearIR::constExprIt target_loop_begin_pos, target_loop_end_pos; loop_manager->get_loop_bounds(linear_ir, target_loop_id, target_loop_begin_pos, target_loop_end_pos); // Update entry and exit points in current Loop information before moving till Loop iterators are valid @@ -202,9 +205,9 @@ bool LoopFusion::fuse_lower_into_current(LoweredExprIR& linear_ir, const Lowered // Update work_amount for Loop (increment is constant because increments must be the identical for fusion): loop_current->work_amount = std::max(loop_current->work_amount, loop_target->work_amount); - std::vector& new_entries = current_entry_points; + std::vector& new_entries = current_entry_points; new_entries.insert(new_entries.end(), target_entry_points.begin(), target_entry_points.end()); - std::vector& new_exits = current_exit_points; + std::vector& new_exits = current_exit_points; new_exits.insert(new_exits.end(), target_exit_points.begin(), target_exit_points.end()); loop_current->entry_exprs = new_entries; @@ -213,7 +216,7 @@ bool LoopFusion::fuse_lower_into_current(LoweredExprIR& linear_ir, const Lowered return true; } -bool LoopFusion::run(LoweredExprIR& linear_ir) { +bool LoopFusion::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::LoopFusion") if (linear_ir.empty()) return false; @@ -246,11 +249,11 @@ bool LoopFusion::run(LoweredExprIR& linear_ir) { for (size_t dim_idx = diff_idx; dim_idx < loop_depth; ++dim_idx) { const auto loop_id = expr_loops[dim_idx]; - if (loop_id == LoweredExpr::LOOP_NULL_ID) + if (loop_id == Expression::LOOP_NULL_ID) continue; const auto loop_info = loop_manager->get_loop_info(loop_id); - LoweredExprIR::constExprIt loop_begin_pos, loop_end_pos; + LinearIR::constExprIt loop_begin_pos, loop_end_pos; loop_manager->get_loop_bounds(linear_ir, loop_id, loop_begin_pos, loop_end_pos); // We fuse upper Loops into the current till we can do it. @@ -283,11 +286,11 @@ bool LoopFusion::run(LoweredExprIR& linear_ir) { const auto loop_id_target = loop_ids_target[dim_idx]; OPENVINO_ASSERT(loop_id != loop_id_target, "Loops cannot have parents of entry points with the same identifier"); - if (loop_id_target == LoweredExpr::LOOP_NULL_ID) + if (loop_id_target == Expression::LOOP_NULL_ID) continue; const auto loop_info_target = loop_manager->get_loop_info(loop_id_target); - const auto target_exit_port = LoweredExprPort::make_output(parent_expr, out_port); + const auto target_exit_port = parent_expr->output_port(out_port); if (fuse_upper_into_current(linear_ir, loop_manager, entry_point, target_exit_port, loop_id, loop_id_target, dim_idx, loop_begin_pos, loop_end_pos)) { was_fusion_up = true; @@ -325,11 +328,11 @@ bool LoopFusion::run(LoweredExprIR& linear_ir) { // The exit point of Loop can have several consumers where some of them can be in this Loop as well // So we skip this consumer. const auto loop_id_target = loop_ids_target[dim_idx]; - if (loop_id == loop_id_target || loop_id_target == LoweredExpr::LOOP_NULL_ID) + if (loop_id == loop_id_target || loop_id_target == Expression::LOOP_NULL_ID) continue; const auto loop_info_target = loop_manager->get_loop_info(loop_id_target); - const auto target_entry_port = LoweredExprPort::make_input(consumer_expr, in_port); + const auto target_entry_port = consumer_expr->input_port(in_port); if (fuse_lower_into_current(linear_ir, loop_manager, exit_point, target_entry_port, loop_id, loop_id_target, dim_idx, loop_begin_pos, loop_end_pos)) { was_fusion_down = true; @@ -350,7 +353,7 @@ bool LoopFusion::run(LoweredExprIR& linear_ir) { return true; } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/loop_init.cpp b/src/common/snippets/src/lowered/pass/loop_init.cpp similarity index 89% rename from src/common/snippets/src/pass/lowered/loop_init.cpp rename to src/common/snippets/src/lowered/pass/loop_init.cpp index cbb0f9ee36d38e..8e03c1853e4973 100644 --- a/src/common/snippets/src/pass/lowered/loop_init.cpp +++ b/src/common/snippets/src/lowered/pass/loop_init.cpp @@ -2,20 +2,23 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/loop_init.hpp" +#include "snippets/lowered/pass/loop_init.hpp" + +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_manager.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/itt.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { namespace { -void filter_ports(LoweredExprIR& linear_ir, - std::vector& loop_entries, std::vector& loop_exits) { - std::vector new_loop_entries; - std::vector new_loop_exits; +void filter_ports(LinearIR& linear_ir, + std::vector& loop_entries, std::vector& loop_exits) { + std::vector new_loop_entries; + std::vector new_loop_exits; new_loop_entries.reserve(loop_entries.size()); new_loop_exits.reserve(loop_exits.size()); @@ -60,10 +63,10 @@ int64_t get_dim_stride(const size_t dim, const std::vector& layout, cons } } // namespace -LoopInit::LoopInit() : LinearIRTransformation() {} +LoopInit::LoopInit() : Transformation() {} -std::vector LoopInit::init_ptr_increments(const std::vector& loop_inputs, - const std::vector& loop_outputs, +std::vector LoopInit::init_ptr_increments(const std::vector& loop_inputs, + const std::vector& loop_outputs, size_t dim_idx) const { std::vector ptr_increments; // Note: All loop inputs must have the same layout by definition. @@ -131,8 +134,8 @@ std::vector LoopInit::init_finalization_offsets(const std::vector LoopInit::init_element_type_sizes(const std::vector& loop_inputs, - const std::vector& loop_outputs) { +std::vector LoopInit::init_element_type_sizes(const std::vector& loop_inputs, + const std::vector& loop_outputs) { std::vector element_types; element_types.reserve(loop_inputs.size() + loop_outputs.size()); for (const auto& in : loop_inputs) { @@ -144,15 +147,15 @@ std::vector LoopInit::init_element_type_sizes(const std::vectorentry_exprs; auto loop_exits = loop_info->exit_exprs; const auto work_amount = loop_info->work_amount; const auto work_amount_increment = loop_info->increment; - LoweredExprIR::constExprIt loop_begin_pos, loop_end_pos; - LoweredExprIR::LoweredLoopManager::get_loop_bounds(linear_ir, loop_entries, loop_exits, loop_begin_pos, loop_end_pos, loop_id); + LinearIR::constExprIt loop_begin_pos, loop_end_pos; + LinearIR::LoopManager::get_loop_bounds(linear_ir, loop_entries, loop_exits, loop_begin_pos, loop_end_pos, loop_id); filter_ports(linear_ir, loop_entries, loop_exits); @@ -161,7 +164,7 @@ bool LoopInit::insertion(LoweredExprIR& linear_ir, const LoweredExprIR::LoweredL const auto io_data_sizes = init_element_type_sizes(loop_entries, loop_exits); const auto& loop_begin = std::make_shared(); - const auto& loop_begin_expr = std::make_shared(loop_begin, std::vector{}); + const auto& loop_begin_expr = std::make_shared(loop_begin); linear_ir.insert(loop_begin_pos, loop_begin_expr); const auto& loop_end = std::make_shared( @@ -176,12 +179,12 @@ bool LoopInit::insertion(LoweredExprIR& linear_ir, const LoweredExprIR::LoweredL loop_end_inputs.push_back(expr_port.expr->get_outputs()[expr_port.port]); loop_end_inputs.push_back(linear_ir.get_expr_by_node(loop_begin)->get_outputs().front()); - const auto& loop_end_expr = std::make_shared(loop_end, loop_end_inputs); + const auto& loop_end_expr = std::make_shared(loop_end, loop_end_inputs, std::vector{}); linear_ir.insert(loop_end_pos, loop_end_expr); return true; } -bool LoopInit::run(LoweredExprIR& linear_ir) { +bool LoopInit::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::LoopInit") if (linear_ir.empty()) return false; @@ -203,7 +206,7 @@ bool LoopInit::run(LoweredExprIR& linear_ir) { const auto loop_depth = expr_loops.size(); for (size_t i = 0; i < loop_depth; ++i) { const auto loop_id = expr_loops[i]; - if (loop_id == LoweredExpr::LOOP_NULL_ID) + if (loop_id == Expression::LOOP_NULL_ID) continue; bool need_to_insert = inserted_loops.find(loop_id) == inserted_loops.end(); if (need_to_insert) { @@ -220,7 +223,7 @@ bool LoopInit::run(LoweredExprIR& linear_ir) { return true; } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/loop_markup.cpp b/src/common/snippets/src/lowered/pass/loop_markup.cpp similarity index 93% rename from src/common/snippets/src/pass/lowered/loop_markup.cpp rename to src/common/snippets/src/lowered/pass/loop_markup.cpp index bc0a159638fd42..eabb8839317384 100644 --- a/src/common/snippets/src/pass/lowered/loop_markup.cpp +++ b/src/common/snippets/src/lowered/pass/loop_markup.cpp @@ -2,18 +2,21 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/loop_markup.hpp" +#include "snippets/lowered/pass/loop_markup.hpp" + +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_manager.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/itt.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { -LoopMarkup::LoopMarkup(size_t vector_size) : LinearIRTransformation(), m_vector_size(vector_size) {} +LoopMarkup::LoopMarkup(size_t vector_size) : Transformation(), m_vector_size(vector_size) {} -bool LoopMarkup::run(LoweredExprIR& linear_ir) { +bool LoopMarkup::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::LoopMarkup") if (linear_ir.empty()) return false; @@ -86,7 +89,7 @@ bool LoopMarkup::run(LoweredExprIR& linear_ir) { return true; } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/move_result_out_from_loop.cpp b/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp similarity index 88% rename from src/common/snippets/src/pass/lowered/move_result_out_from_loop.cpp rename to src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp index 796020de66d1f7..82a73e6328d7cf 100644 --- a/src/common/snippets/src/pass/lowered/move_result_out_from_loop.cpp +++ b/src/common/snippets/src/lowered/pass/move_result_out_from_loop.cpp @@ -2,16 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/move_result_out_of_loop.hpp" +#include "snippets/lowered/pass/move_result_out_of_loop.hpp" + +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_manager.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/itt.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { -bool MoveResultOutOfLoop::run(LoweredExprIR& linear_ir) { +bool MoveResultOutOfLoop::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::MoveResultOutOfLoop") if (linear_ir.empty()) return false; @@ -33,7 +36,7 @@ bool MoveResultOutOfLoop::run(LoweredExprIR& linear_ir) { const auto parent_loop_ids = parent_expr->get_loop_ids(); int outer_loop_id = static_cast(parent_loop_ids.size()) - 1; for (; outer_loop_id >= 0; --outer_loop_id) { - if (parent_loop_ids[outer_loop_id] != LoweredExpr::LOOP_NULL_ID) { + if (parent_loop_ids[outer_loop_id] != Expression::LOOP_NULL_ID) { break; } } @@ -52,7 +55,7 @@ bool MoveResultOutOfLoop::run(LoweredExprIR& linear_ir) { continue; } - LoweredExprIR::constExprIt loop_begin_pos, loop_end_pos; + LinearIR::constExprIt loop_begin_pos, loop_end_pos; loop_manager->get_loop_bounds(linear_ir, parent_loop_ids[outer_loop_id], loop_begin_pos, loop_end_pos); // If the Result isn't found after Outer LoopEnd, need to move it to there if (std::find(loop_end_pos, linear_ir.cend(), expr) == linear_ir.cend()) { @@ -65,7 +68,7 @@ bool MoveResultOutOfLoop::run(LoweredExprIR& linear_ir) { return modified; } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/move_scalar_to_consumer.cpp b/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp similarity index 89% rename from src/common/snippets/src/pass/lowered/move_scalar_to_consumer.cpp rename to src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp index 34403682635081..808530982446e3 100644 --- a/src/common/snippets/src/pass/lowered/move_scalar_to_consumer.cpp +++ b/src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp @@ -2,16 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/move_scalar_to_consumer.hpp" +#include "snippets/lowered/pass/move_scalar_to_consumer.hpp" + +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_manager.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/itt.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { -bool MoveScalarToConsumer::run(LoweredExprIR& linear_ir) { +bool MoveScalarToConsumer::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::MoveScalarToConsumer") if (linear_ir.empty()) return false; @@ -42,7 +45,7 @@ bool MoveScalarToConsumer::run(LoweredExprIR& linear_ir) { return modified; } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/propagate_layout.cpp b/src/common/snippets/src/lowered/pass/propagate_layout.cpp similarity index 89% rename from src/common/snippets/src/pass/lowered/propagate_layout.cpp rename to src/common/snippets/src/lowered/pass/propagate_layout.cpp index 688826c5401d36..fa3de373f0e23a 100644 --- a/src/common/snippets/src/pass/lowered/propagate_layout.cpp +++ b/src/common/snippets/src/lowered/pass/propagate_layout.cpp @@ -2,16 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/propagate_layout.hpp" +#include "snippets/lowered/pass/propagate_layout.hpp" + +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_manager.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/itt.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { -bool PropagateLayout::run(LoweredExprIR& linear_ir) { +bool PropagateLayout::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::PropagateLayout") const auto& io_ops = linear_ir.get_IO_ops(); auto io_ops_it = io_ops.begin(); @@ -19,7 +22,7 @@ bool PropagateLayout::run(LoweredExprIR& linear_ir) { if (*expr_it == *io_ops_it) { const auto& expr = io_ops_it->get(); io_ops_it++; - const bool is_input = expr->get_type() == IOLoweredExpr::io_type::INPUT; + const bool is_input = expr->get_type() == IOExpression::io_type::INPUT; const auto& tds = is_input ? expr->get_outputs() : expr->get_inputs(); if (tds.size() != 1) throw ngraph_error("Parameter/Results should have exactly one output/input"); @@ -52,7 +55,7 @@ bool PropagateLayout::run(LoweredExprIR& linear_ir) { return true; } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/softmax_decomposition.cpp b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp similarity index 75% rename from src/common/snippets/src/pass/lowered/softmax_decomposition.cpp rename to src/common/snippets/src/lowered/pass/softmax_decomposition.cpp index babfd3b590235d..ed6a1a34eb9422 100644 --- a/src/common/snippets/src/pass/lowered/softmax_decomposition.cpp +++ b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp @@ -2,21 +2,26 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/softmax_decomposition.hpp" +#include "snippets/lowered/pass/softmax_decomposition.hpp" + +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_manager.hpp" +#include "snippets/lowered/pass/loop_markup.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/itt.hpp" -#include + +#include "ngraph/pattern/op/wrap_type.hpp" #include "openvino/pass/pattern/matcher.hpp" -#include "snippets/pass/lowered/loop_markup.hpp" + namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { SoftmaxDecomposition::SoftmaxDecomposition(size_t vector_size) : m_vector_size{vector_size} {} -bool SoftmaxDecomposition::run(LoweredExprIR& linear_ir) { +bool SoftmaxDecomposition::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::SoftmaxDecompositionLowered") bool modified = false; const auto& loop_manager = linear_ir.get_loop_manager(); @@ -39,7 +44,7 @@ bool SoftmaxDecomposition::run(LoweredExprIR& linear_ir) { expr_it = linear_ir.erase(expr_it); // Remove Softmax - std::vector outer_exprs; + std::vector outer_exprs; // We need an iterator to the inserted element auto push_node = [&linear_ir, &expr_it](const std::shared_ptr& n) { @@ -57,9 +62,9 @@ bool SoftmaxDecomposition::run(LoweredExprIR& linear_ir) { // Markup of ReduceMax Loop loop_manager->mark_loop(linear_ir, max.first, horizon_max.first, 1, inner_work_amount, m_vector_size, - std::vector{LoweredExprPort::make_input(*max.first, 0), - LoweredExprPort::make_input(*max.first, 1)}, - std::vector{LoweredExprPort::make_output(*max.first, 0)}); + std::vector{(*max.first)->input_port(0), + (*max.first)->input_port(1)}, + std::vector{(*max.first)->output_port(0)}); const auto broadcast_horizon_max = push_node( std::make_shared(horizon_max.second, horizon_max.second->get_input_partial_shape(0))); @@ -77,11 +82,11 @@ bool SoftmaxDecomposition::run(LoweredExprIR& linear_ir) { // Markup of ReduceMax Loop loop_manager->mark_loop(linear_ir, sub.first, horizon_sum.first, 1, inner_work_amount, m_vector_size, - std::vector{LoweredExprPort::make_input(*sub.first, 0), - LoweredExprPort::make_input(*sub.first, 1), - LoweredExprPort::make_input(*sum.first, 1)}, - std::vector{LoweredExprPort::make_output(*exp.first, 0), - LoweredExprPort::make_output(*sum.first, 0)}); + std::vector{(*sub.first)->input_port(0), + (*sub.first)->input_port(1), + (*sum.first)->input_port(1)}, + std::vector{(*exp.first)->output_port(0), + (*sum.first)->output_port(0)}); // Divide is expensive operation, so we decompose it into 1 / x * y, where 1 / x is executed outside loop const auto pow = push_node(std::make_shared(horizon_sum.second, -1.f)); @@ -99,20 +104,20 @@ bool SoftmaxDecomposition::run(LoweredExprIR& linear_ir) { // Markup of Mul Loop loop_manager->mark_loop(linear_ir, mul.first, expr_it, 1, inner_work_amount, m_vector_size, - std::vector{LoweredExprPort::make_input(*mul.first, 0), - LoweredExprPort::make_input(*mul.first, 1)}, - std::vector{LoweredExprPort::make_output(*mul.first, 0)}); + std::vector{(*mul.first)->input_port(0), + (*mul.first)->input_port(1)}, + std::vector{(*mul.first)->output_port(0)}); // Markup inner loop for outside expression with null loop id for (const auto& expr : outer_exprs) { - expr->set_loop_id(LoweredExpr::LOOP_NULL_ID, 1); + expr->set_loop_id(Expression::LOOP_NULL_ID, 1); } // Outer Loop loop_manager->mark_loop(linear_ir, vector_buffer_max.first, expr_it, 0, outer_work_amount, 1, - std::vector{LoweredExprPort::make_input(*max.first, 0), - LoweredExprPort::make_input(*sub.first, 0)}, - std::vector{LoweredExprPort::make_output(*mul.first, 0)}); + std::vector{(*max.first)->input_port(0), + (*sub.first)->input_port(0)}, + std::vector{(*mul.first)->output_port(0)}); /* =========================================== */ @@ -129,7 +134,7 @@ bool SoftmaxDecomposition::run(LoweredExprIR& linear_ir) { return modified; } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/linear_IR_transformation.cpp b/src/common/snippets/src/lowered/pass/transformation.cpp similarity index 55% rename from src/common/snippets/src/pass/lowered/linear_IR_transformation.cpp rename to src/common/snippets/src/lowered/pass/transformation.cpp index c9d4f9b379b0d2..8af054830799e8 100644 --- a/src/common/snippets/src/pass/lowered/linear_IR_transformation.cpp +++ b/src/common/snippets/src/lowered/pass/transformation.cpp @@ -2,27 +2,25 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/linear_IR_transformation.hpp" -#include "snippets/snippets_isa.hpp" -#include "snippets/itt.hpp" +#include "snippets/lowered/pass/transformation.hpp" namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { -void LinearIRTransformationPipeline::register_transformation(const std::shared_ptr& transformation) { +void TransformationPipeline::register_transformation(const std::shared_ptr& transformation) { m_transformations.push_back(transformation); } -void LinearIRTransformationPipeline::run(LoweredExprIR& linear_ir) { +void TransformationPipeline::run(LinearIR& linear_ir) { for (const auto& transformation : m_transformations) { transformation->run(linear_ir); } } -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/pass/lowered/vector_to_scalar.cpp b/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp similarity index 89% rename from src/common/snippets/src/pass/lowered/vector_to_scalar.cpp rename to src/common/snippets/src/lowered/pass/vector_to_scalar.cpp index d7299bcd874f52..41335b74e7be70 100644 --- a/src/common/snippets/src/pass/lowered/vector_to_scalar.cpp +++ b/src/common/snippets/src/lowered/pass/vector_to_scalar.cpp @@ -2,22 +2,24 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass/lowered/vector_to_scalar.hpp" +#include "snippets/lowered/pass/vector_to_scalar.hpp" + #include "snippets/snippets_isa.hpp" #include "snippets/itt.hpp" + namespace ngraph { namespace snippets { -namespace pass { namespace lowered { +namespace pass { SetScalarCountForLoadStore::SetScalarCountForLoadStore() {} -bool SetScalarCountForLoadStore::run(LoweredExprIR& linear_ir) { +bool SetScalarCountForLoadStore::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::SetScalarCountForLoadStore") bool modified = false; for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) { - const auto &op = expr_it->get()->get_node(); + const auto& op = expr_it->get()->get_node(); const auto load = ov::as_type_ptr(op); const auto store = ov::as_type_ptr(op); if (load || store) { @@ -41,7 +43,7 @@ bool SetScalarCountForLoadStore::run(LoweredExprIR& linear_ir) { -} // namespace lowered } // namespace pass +} // namespace lowered } // namespace snippets } // namespace ngraph diff --git a/src/common/snippets/src/lowered_expr.cpp b/src/common/snippets/src/lowered_expr.cpp deleted file mode 100644 index caa9cc98cee578..00000000000000 --- a/src/common/snippets/src/lowered_expr.cpp +++ /dev/null @@ -1,630 +0,0 @@ -// Copyright (C) 2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "snippets/lowered_expr.hpp" -#include "snippets/op/loop.hpp" -#include "snippets/op/subgraph.hpp" -#include -#include -#include "snippets/tensor_descriptor.hpp" -#include "snippets/utils.hpp" - -#include -#include - -namespace ngraph { -namespace snippets { - -size_t LoweredExpr::LOOP_NULL_ID = SIZE_MAX; - -LoweredExpr::LoweredExpr(const std::shared_ptr& n) : m_source_node{n}, m_emitter{nullptr}, m_reg_info{{}, {}} { - for (const auto& in : n->inputs()) - m_inputs.emplace_back(get_tensor_descriptor_ptr(in.get_source_output())); - for (const auto& out : n->outputs()) - m_outputs.emplace_back(get_tensor_descriptor_ptr(out)); - m_is_outside_loop = utils::get_outside_loop_value(n); -} - -LoweredExpr::LoweredExpr(const std::shared_ptr& n, std::vector inputs, std::vector outputs) - : m_source_node{n}, m_emitter{nullptr}, m_inputs(std::move(inputs)), m_outputs(std::move(outputs)), m_reg_info{{}, {}} { - if (m_outputs.empty()) - for (const auto& out : n->outputs()) - m_outputs.emplace_back(get_tensor_descriptor_ptr(out)); - m_is_outside_loop = utils::get_outside_loop_value(n); -} - -std::shared_ptr LoweredExpr::get_node() const { - if (!m_source_node) - throw ngraph_error("An attempt to get uninitialized node from lowered expression"); - return m_source_node; -} - -std::shared_ptr LoweredExpr::get_emitter() const { - return m_emitter; -} - -void LoweredExpr::init_emitter(const std::shared_ptr& target) { - m_emitter = target->get(m_source_node->get_type_info())(m_source_node); -} - -void LoweredExpr::replace_input(size_t port, TensorDescriptorPtr to) { - OPENVINO_ASSERT(port < m_inputs.size(), "Failed to replace: target input port must be less than input count!"); - m_inputs[port] = std::move(to); -} - -void LoweredExpr::replace_output(size_t port, TensorDescriptorPtr to) { - OPENVINO_ASSERT(port < m_outputs.size(), "Failed to replace: target output port must be less than output count!"); - m_outputs[port] = std::move(to); -} - -void LoweredExpr::set_loop_id(size_t id, size_t idx) { - OPENVINO_ASSERT((std::find(m_loop_ids.begin(), m_loop_ids.end(), id) == m_loop_ids.end()), - "LoweredExpr cannot have several the same Loops"); - if (m_loop_ids.size() <= idx) { - m_loop_ids.resize(idx + 1, LOOP_NULL_ID); - } - m_loop_ids[idx] = id; -} - -void LoweredExpr::remove_loop_id(size_t id) { - auto it = std::find(m_loop_ids.begin(), m_loop_ids.end(), id); - OPENVINO_ASSERT(it == m_loop_ids.end(), "LoweredExpr doesn't have the Loop with ID " + std::to_string(id)); - *it = LoweredExpr::LOOP_NULL_ID; -} - -IOLoweredExpr::IOLoweredExpr(const std::shared_ptr& par, int64_t index) - : LoweredExpr(par), m_index(index), m_type{io_type::INPUT} { -} - -IOLoweredExpr::IOLoweredExpr(const std::shared_ptr& res, int64_t index, std::vector inputs) - : LoweredExpr(), m_index(index), m_type{io_type::OUTPUT} { - m_source_node = res; - if (inputs.size() != res->get_input_size()) - throw ngraph_error("Invalid number of inputs for IOLoweredExpr construction"); - m_inputs = std::move(inputs); - m_outputs = {}; -} - -LoweredExprPort::LoweredExprPort(const LoweredExprPtr& expr, size_t port, Type type) : expr(expr), port(port), type(type) { - if (type == Type::Input) { - OPENVINO_ASSERT(port < expr->get_inputs().size(), "The input port must be less than input count"); - } else if (type == Type::Output) { - OPENVINO_ASSERT(port < expr->get_outputs().size(), "The output port must be less than output count"); - } -} - -LoweredExprPort LoweredExprPort::make_input(const LoweredExprPtr& expr, size_t port) { - return LoweredExprPort(expr, port, Type::Input); -} -LoweredExprPort LoweredExprPort::make_output(const LoweredExprPtr& expr, size_t port) { - return LoweredExprPort(expr, port, Type::Output); -} - -bool operator==(const LoweredExprPort& lhs, const LoweredExprPort& rhs) { - if (&lhs == &rhs) - return true; - OPENVINO_ASSERT(lhs.type == rhs.type, "Incorrect comparison: Ports are from different types!"); - return lhs.expr == rhs.expr && lhs.port == rhs.port; -} - -bool operator!=(const LoweredExprPort& lhs, const LoweredExprPort& rhs) { - return !(lhs == rhs); -} - -bool operator<(const LoweredExprPort& lhs, const LoweredExprPort& rhs) { - OPENVINO_ASSERT(lhs.type == rhs.type, "Incorrect comparison: Ports are from different types!"); - // Firstly ports - return (lhs.port < rhs.port) || (lhs.port == rhs.port && lhs.expr < rhs.expr); -} - -LoweredExprIR::LoweredExprIR(const std::shared_ptr& model, LoweringConfig config) - : m_io_lowered_ops{}, m_config{std::move(config)}, m_loop_manager(std::make_shared()) { - constExprIt scalar_pos = m_lowered_ops.begin(); - LoweredExprPtr last_param = nullptr; - for (const auto& n : get_ordered_ops(model)) { - constExprIt insertion_pos = m_lowered_ops.end(); - std::shared_ptr expr; - std::vector input_tds; - for (const auto& in : n->inputs()) { - const auto& out = in.get_source_output(); - const auto& parent_out_tds = m_node2expression_map[out.get_node_shared_ptr()]->get_outputs(); - input_tds.push_back(parent_out_tds[out.get_index()]); - } - if (const auto& par = as_type_ptr(n)) { - auto io_expr = std::make_shared(par, model->get_parameter_index(par)); - m_io_lowered_ops.push_back(io_expr); - expr = io_expr; - last_param = expr; - } else if (const auto& res = as_type_ptr(n)) { - auto io_expr = std::make_shared(res, model->get_result_index(res), input_tds); - m_io_lowered_ops.push_back(io_expr); - expr = io_expr; - } else { - if (const auto& scalar = as_type_ptr(n)) { - // Scalar should be on the Linear IR beginning after Parameters to have valid expression order after Loop passes. - // After these passes we must call pass MoveScalarToConsumer() to have a correct accuracy. - // For more details, please see the pass description - if (scalar_pos == m_lowered_ops.end()) { - OPENVINO_ASSERT(last_param, "Scalars must be executed after Parameters"); - scalar_pos = std::find(m_lowered_ops.begin(), m_lowered_ops.end(), last_param); - } - insertion_pos = std::next(scalar_pos); - } - // Note that output tds must be empty since they are filled automatically from rt_info and/or tensor shapes - expr = std::make_shared(n, input_tds, std::vector{}); - } - register_expression(expr); - m_lowered_ops.insert(insertion_pos, expr); - } -} - -ov::NodeVector LoweredExprIR::get_ordered_ops(const std::shared_ptr& m) { - if (!m->get_sinks().empty()) - throw ngraph_error("Linear IR is not supposed to work for model with sinks. Check your transformation pipeline."); - - // Note that an important difference between this impl and Model::get_ordered_ops is that Results and Parameters - // are added in REVERSE order, so they will be visited in DIRECT order compared to get_parameters() and get_results() - NodeVector nodes; - const auto& results = m->get_results(); - std::copy(results.rbegin(), results.rend(), std::back_inserter(nodes)); - const auto& params = m->get_parameters(); - std::copy(params.rbegin(), params.rend(), std::back_inserter(nodes)); - - - return ov::topological_sort(nodes); -} - -void LoweredExprIR::serialize(const std::string& xml, const std::string& bin) { - auto first_node = std::make_shared(element::f32, Shape{}); - first_node->set_friendly_name("Start"); - first_node->get_rt_info()["execTimeMcs"] = 0; - std::shared_ptr body_node = first_node; - for (const auto& expr : m_lowered_ops) { - body_node = std::make_shared(body_node, expr); - } - auto last_node = std::make_shared(body_node); - last_node->set_friendly_name("End"); - const auto tmp_model = std::make_shared(ResultVector {last_node}, - ParameterVector {first_node}, - "Lowered_IR_Serialization"); - ov::pass::Serialize(xml, bin).run_on_model(tmp_model); -} - -LoweredExprIR::container LoweredExprIR::deep_copy_range(LoweredExprIR::container::const_iterator begin, LoweredExprIR::container::const_iterator end) { - LoweredExprIR::container result; - NodeVector original_nodes; - for (auto it = begin; it != end; it++) - original_nodes.push_back((*it)->get_node()); - NodeMap node_map; - ngraph::clone_nodes(original_nodes, node_map); - for (auto it = begin; it != end; it++) { - // copy by value, so result shared_pointer point to new objects - LoweredExpr new_expr = **it; - new_expr.m_source_node = node_map[(*it)->get_node().get()]; - result.emplace_back(std::make_shared(new_expr)); - } - return result; -} - -LoweredExprIR LoweredExprIR::deep_copy() const { - LoweredExprIR result; - auto& result_ops = result.m_lowered_ops; - for (const auto& expr : deep_copy_range(m_lowered_ops.begin(), m_lowered_ops.end())) - result_ops.emplace_back(expr); - result.m_config = m_config; - return result; -} - -void LoweredExprIR::debug_print(bool tds_as_pointers) const { - auto print_rinfo = [](const RegInfo& rinfo) { - std::cerr << " : {"; - for (auto i : rinfo.first) - std::cerr << i << " "; - std::cerr << " => "; - for (auto i : rinfo.second) - std::cerr << i << " "; - std::cerr << "}"; - }; - std::map td2int; - int td_counter = 0; - int counter = 0; - for (const auto& expr : m_lowered_ops) { - const auto& node = expr->get_node(); - std::cerr << counter++ << " : " << - node->get_friendly_name() << " : "; - if (tds_as_pointers) { - for (const auto& in : expr->get_inputs()) { - if (td2int.count(in) == 0) - throw ngraph_error("Undefined input descriptor for op"); - std::cerr << td2int.at(in) << ", "; - } - std::cerr << "\b\b => "; - for (const auto& out : expr->get_outputs()) { - if (td2int.count(out) == 0) - td2int.insert({out, td_counter++}); - std::cerr << td2int.at(out) << ", "; - } - } else { - for (const auto& in : expr->get_inputs()) - std::cerr << *in << ", "; - std::cerr << "\b\b => "; - for (const auto& out : expr->get_outputs()) - std::cerr << *out << ", "; - } - std::cerr << "\b\b"; - const auto& rinfo = expr->get_reg_info(); - if (!rinfo.first.empty() || !rinfo.second.empty()) - print_rinfo(expr->get_reg_info()); - std::cerr << "\n"; - } -} - -void LoweredExprIR::init_emitters(const std::shared_ptr& target) { - for (auto& expr : m_lowered_ops) { - if (!expr->get_emitter()) - expr->init_emitter(target); - } -} - -LoweredExprPtr LoweredExprIR::get_expr_by_node(const std::shared_ptr& n) const { - auto found = m_node2expression_map.find(n); - return found == m_node2expression_map.end() ? nullptr : found->second; -} - -LoweredExprPort LoweredExprIR::get_expr_by_output(const TensorDescriptorPtr& td) const { - auto found = m_output2expression_map.find(td); - if (found == m_output2expression_map.end()) - throw ngraph_error("Failed to find expression by output tensor descriptor"); - return found->second; -} - -const std::set& LoweredExprIR::get_exprs_by_input(const TensorDescriptorPtr& td) const { - auto found = m_input2expression_map.find(td); - if (found == m_input2expression_map.end()) - throw ngraph_error("Failed to find expression by input tensor descriptor"); - return found->second; -} - -void LoweredExprIR::replace_input(const LoweredExprPtr& expr, size_t port, const TensorDescriptorPtr& to) { - replace_input(LoweredExprPort::make_input(expr, port), to); -} - -void LoweredExprIR::replace_input(const LoweredExprPort& expr_port, const TensorDescriptorPtr& to) { - const auto& expr = expr_port.expr; - const auto port = expr_port.port; - OPENVINO_ASSERT(expr_port.type == LoweredExprPort::Type::Input, "Failed to replace: target input port must have Input type"); - OPENVINO_ASSERT(port < expr->m_inputs.size(), "Failed to replace: target input port must be less than input count!"); - const auto from = expr->m_inputs[port]; - auto found = m_input2expression_map.find(from); - if (found == m_input2expression_map.end() || found->second.count(expr_port) == 0) - throw ngraph_error("Invalid expression of input was provided to replace_input"); - found->second.erase(expr_port); - { - const auto& res = m_input2expression_map.insert({to, std::set{expr_port}}); - // If input is already in the map => add ExprPtr to the mapped set - if (!res.second) { - res.first->second.insert(expr_port); - } - } - expr->replace_input(port, std::move(to)); -} - -void LoweredExprIR::replace_output(const LoweredExprPtr& expr, size_t port, const TensorDescriptorPtr& to) { - replace_output(LoweredExprPort::make_output(expr, port), to); -} - -void LoweredExprIR::replace_output(const LoweredExprPort& expr_port, const TensorDescriptorPtr& to) { - const auto& expr = expr_port.expr; - const auto port = expr_port.port; - OPENVINO_ASSERT(expr_port.type == LoweredExprPort::Type::Output, "Failed to replace: target output port must have Output type"); - OPENVINO_ASSERT(port < expr->m_outputs.size(), "Failed to replace: target output port must be less than output count!"); - const auto from = expr->m_outputs[port]; - auto found = m_output2expression_map.find(from); - if (found == m_output2expression_map.end() || found->second != expr_port) - throw ngraph_error("Invalid expression of output was provided to replace_output"); - m_output2expression_map.erase(found); - m_output2expression_map[to] = expr_port; - expr->replace_output(port, to); -} - -void LoweredExprIR::register_regular_expression(const LoweredExprPtr& expr) { - if (is_type(expr->get_node()) || is_type(expr->get_node())) - throw ngraph_error("LoweredExprIR::insert can't be used to add Parameters or Results to IR"); - register_expression(expr); -} - -void LoweredExprIR::register_expression(const LoweredExprPtr& expr) { - const auto& node = expr->get_node(); - { - const auto& res = m_node2expression_map.insert({node, expr}); - if (!res.second) - throw ngraph_error("Duplicate node is detected in linear IR: " + std::string(node->get_friendly_name())); - } - for (size_t i = 0; i < expr->m_outputs.size(); ++i) { - const auto& out = expr->m_outputs[i]; - m_output2expression_map[out] = LoweredExprPort::make_output(expr, i); - } - - for (size_t i = 0; i < expr->m_inputs.size(); ++i) { - const auto& in = expr->m_inputs[i]; - const auto expr_port = LoweredExprPort::make_input(expr, i); - const auto& res = m_input2expression_map.insert({in, std::set{expr_port}}); - // If input is already in the map => add ExprPtr to the mapped set - if (!res.second) { - res.first->second.insert(expr_port); - } - } -} - -void LoweredExprIR::unregister_expression(const LoweredExprPtr& expr) { - for (const auto& out : expr->m_outputs) - m_output2expression_map.erase(out); - - size_t in_port = 0; - for (const auto& in : expr->m_inputs) { - const auto& found = m_input2expression_map.find(in); - if (found != m_input2expression_map.end()) { - // Note: If the input is used by only by this expr => delete the whole entry - // Otherwise delete the expr from the users set - auto& users = found->second; - if (users.size() == 1) - m_input2expression_map.erase(found); - else - users.erase(LoweredExprPort::make_input(expr, in_port)); - } - ++in_port; - } - - m_node2expression_map.erase(expr->get_node()); -} - -LoweredExprIR::exprIt LoweredExprIR::insert(constExprIt pos, container::value_type&& value) { - register_regular_expression(value); - return m_lowered_ops.insert(pos, value); -} - -LoweredExprIR::exprIt LoweredExprIR::insert(constExprIt pos, const container::value_type& value) { - register_regular_expression(value); - return m_lowered_ops.insert(pos, value); -} - -LoweredExprIR::exprIt LoweredExprIR::insert(constExprIt pos, exprIt begin, exprIt end) { - constExprIt cbegin = begin; - constExprIt cend = end; - return insert(pos, cbegin, cend); -} - -LoweredExprIR::exprIt LoweredExprIR::insert(constExprIt pos, constExprIt begin, constExprIt end) { - for (auto b = begin; b != end; b++) - register_regular_expression(*b); - return m_lowered_ops.insert(pos, begin, end); -} - -LoweredExprIR::exprIt LoweredExprIR::insert(LoweredExprIR::constExprIt pos, const NodeVector& nodes) { - auto ret = m_lowered_ops.end(); - for (const auto& n : nodes) { - std::vector input_tds; - for (const auto& in : n->inputs()) { - const auto& out = in.get_source_output(); - const auto& parent_out_tds = m_node2expression_map[out.get_node_shared_ptr()]->get_outputs(); - input_tds.push_back(parent_out_tds[out.get_index()]); - } - // Note that output tds must be empty since they are filled automatically from rt_info and/or tensor shapes - const auto& expr = std::make_shared(n, input_tds, std::vector{}); - register_regular_expression(expr); - ret = m_lowered_ops.insert(pos, expr); - } - // Need to return iterator to the first of the inserted values - return std::prev(ret, static_cast(nodes.size())); -} -// todo reuse for node vector to avoid code duplication -LoweredExprIR::exprIt LoweredExprIR::insert(LoweredExprIR::constExprIt pos, const std::shared_ptr& n) { - std::vector input_tds; - for (const auto& in : n->inputs()) { - const auto& out = in.get_source_output(); - const auto& parent_out_tds = m_node2expression_map[out.get_node_shared_ptr()]->get_outputs(); - input_tds.push_back(parent_out_tds[out.get_index()]); - } - // Note that output tds must be empty since they are filled automatically from rt_info and/or tensor shapes - const auto& expr = std::make_shared(n, input_tds, std::vector{}); - register_regular_expression(expr); - return m_lowered_ops.insert(pos, expr); -} - -LoweredExprIR::exprIt LoweredExprIR::erase(LoweredExprIR::exprIt pos) { - unregister_expression(*pos); - return m_lowered_ops.erase(pos); -} - -LoweredExprIR::exprIt LoweredExprIR::erase(LoweredExprIR::constExprIt pos) { - unregister_expression(*pos); - return m_lowered_ops.erase(pos); -} - -void LoweredExprIR::move(LoweredExprIR::constExprIt from, LoweredExprIR::constExprIt to) { - // Instead of `insert()` + `erase()`, we use `splice()` for the same list - m_lowered_ops.splice(to, m_lowered_ops, from); -} - -size_t LoweredExprIR::LoweredLoopManager::add_loop_info(const LoweredLoopInfoPtr& loop) { - const auto index = next_id; - m_map[index] = loop; - next_id++; - return index; -} - -void LoweredExprIR::LoweredLoopManager::remove_loop_info(size_t index) { - m_map.erase(index); -} - -using LoweredLoopInfoPtr = LoweredExprIR::LoweredLoopManager::LoweredLoopInfoPtr; - -const std::map& LoweredExprIR::LoweredLoopManager::get_map() const { - return m_map; -} - -LoweredLoopInfoPtr LoweredExprIR::LoweredLoopManager::get_loop_info(size_t index) const { - const auto it = m_map.find(index); - OPENVINO_ASSERT(it != m_map.end(), "LoopInformation hasn't been found!"); - return it->second; -} - -void LoweredExprIR::LoweredLoopManager::get_loop_bounds(const LoweredExprIR& linear_ir, - size_t loop_id, - LoweredExprIR::constExprIt& loop_begin_pos, - LoweredExprIR::constExprIt& loop_end_pos) const { - const auto loop_info = get_loop_info(loop_id); - get_loop_bounds(linear_ir, loop_info->entry_exprs, loop_info->exit_exprs, loop_begin_pos, loop_end_pos, loop_id); -} - -void LoweredExprIR::LoweredLoopManager::get_loop_bounds(const LoweredExprIR& linear_ir, - const std::vector& entries, - const std::vector& exits, - LoweredExprIR::constExprIt& loop_begin_pos, - LoweredExprIR::constExprIt& loop_end_pos, - size_t loop_id) { - OPENVINO_ASSERT(!entries.empty(), "Loop must have entry points"); - OPENVINO_ASSERT(!exits.empty(), "Loop must have entry points"); - loop_begin_pos = std::find(linear_ir.begin(), linear_ir.end(), entries.front().expr); - OPENVINO_ASSERT(loop_begin_pos != linear_ir.end(), "Loop begin hasn't been found!"); - - // Some operations in Loop can be before first entry points: Scalars, VectorBuffer. - // We should iterate by them till the expr is in the corresponding Loop - auto prev_loop_ids = (*std::prev(loop_begin_pos))->get_loop_ids(); - while (std::find(prev_loop_ids.begin(), prev_loop_ids.end(), loop_id) != prev_loop_ids.end()) { - loop_begin_pos = std::prev(loop_begin_pos); - prev_loop_ids = (*std::prev(loop_begin_pos))->get_loop_ids(); - } - - // At the moment all Loops must have exit points - loop_end_pos = std::next(std::find(loop_begin_pos, linear_ir.end(), exits.back().expr)); - OPENVINO_ASSERT(loop_end_pos != linear_ir.end(), "Loop end hasn't been found!"); -} - -void LoweredExprIR::LoweredLoopManager::get_io_loop_ports(LoweredExprIR& linear_ir, - LoweredExprIR::constExprIt loop_begin_pos, - LoweredExprIR::constExprIt loop_end_pos, - std::vector& entries, - std::vector& exits) { - entries.clear(); - exits.clear(); - for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { - const auto& expr = *expr_it; - const auto inputs = expr->get_inputs(); - const auto outputs = expr->get_outputs(); - - for (size_t in_port = 0; in_port < inputs.size(); ++in_port) { - const auto in_td = inputs[in_port]; - const auto parent_expr = linear_ir.get_expr_by_output(in_td).expr; - if (!ov::is_type(parent_expr->get_node()) && - std::find(loop_begin_pos, expr_it, parent_expr) == expr_it) { - entries.push_back(LoweredExprPort::make_input(expr, in_port)); - } - } - - for (size_t out_port = 0; out_port < outputs.size(); ++out_port) { - const auto out_td = outputs[out_port]; - const auto consumer_exprs = linear_ir.get_exprs_by_input(out_td); - for (const auto& conumer_expr : consumer_exprs) { - if (std::find(expr_it, loop_end_pos, conumer_expr.expr) == loop_end_pos) { - exits.push_back(LoweredExprPort::make_output(expr, out_port)); - break; - } - } - } - } -} - -void LoweredExprIR::LoweredLoopManager::skipped_mark(LoweredExprIR::constExprIt loop_begin_pos, - LoweredExprIR::constExprIt loop_end_pos, - size_t loop_depth) { - const auto loop_ids = std::vector(loop_depth, LoweredExpr::LOOP_NULL_ID); - for (auto& expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { - const auto expr = *expr_it; - expr->set_loop_ids(loop_ids); - } -} - -void LoweredExprIR::LoweredLoopManager::mark_loop(LoweredExprIR& linear_ir, - LoweredExprIR::constExprIt loop_begin_pos, - LoweredExprIR::constExprIt loop_end_pos, - size_t loop_depth, size_t vector_size) { - std::vector loop_entry_points, loop_exit_points; - LoweredLoopManager::get_io_loop_ports(linear_ir, loop_begin_pos, loop_end_pos, loop_entry_points, loop_exit_points); - - auto broadcast = [](std::vector& lhs, const std::vector& rhs) -> void { - if (rhs == lhs) - return; - const auto lhs_size = lhs.size(); - const auto rhs_size = rhs.size(); - const auto size = std::max(lhs_size, rhs_size); - std::vector result(size, 1); - lhs.resize(size, 1); - for (size_t i = 0; i < size; ++i) { - const auto lhs_value = i < lhs_size ? *(lhs.crbegin() + i) : 1; - const auto rhs_value = i < rhs_size ? *(rhs.crbegin() + i) : 1; - OPENVINO_ASSERT(lhs_value == rhs_value || lhs_value == 1 || rhs_value == 1, "Output shapes of Loop must be broadcastable!"); - *(lhs.rbegin() + i) = std::max(lhs_value, rhs_value); - } - }; - - std::vector loop_subtensor; - std::vector loop_layout; - std::vector loop_tensor(1, 1); // Scalar - for (const auto& exit_point : loop_exit_points) { - const auto expr = exit_point.expr; - const auto port = exit_point.port; - const auto out_td = expr->get_outputs()[port]; - const auto out_tensor = out_td->get_tensor(); - const auto out_layout = out_td->get_layout(); - broadcast(loop_tensor, out_tensor); - if (loop_layout.empty()) - loop_layout = out_layout; - OPENVINO_ASSERT(loop_layout == out_layout, "Output layouts of Loop must be the same!"); - } - - for (const auto& entry_point : loop_entry_points) { - const auto expr = entry_point.expr; - const auto out_td = expr->get_outputs().front(); - const auto out_subtensor = out_td->get_subtensor(); - if (loop_subtensor.empty()) - loop_subtensor = out_subtensor; - OPENVINO_ASSERT(loop_subtensor == out_subtensor, "Subtensors of Loop must be the same!"); - } - - for (size_t dim_idx = 0; dim_idx < loop_depth; ++dim_idx) { - OPENVINO_ASSERT(dim_idx < loop_tensor.size(), "Incorrect indexes of Loop for markup"); - const auto dim = loop_layout.size() >= dim_idx ? *(loop_layout.rbegin() + dim_idx) : 0; - const auto work_amount = loop_tensor.size() > dim ? loop_tensor[dim] : 0; - const auto work_amount_increment = loop_subtensor.size() > dim_idx ? *(loop_subtensor.rbegin() + dim_idx) : - dim_idx == 0 ? vector_size : 1; - - mark_loop(linear_ir, loop_begin_pos, loop_end_pos, loop_depth - dim_idx - 1, work_amount, work_amount_increment, loop_entry_points, loop_exit_points); - } -} - -void LoweredExprIR::LoweredLoopManager::mark_loop(LoweredExprIR& linear_ir, - LoweredExprIR::constExprIt loop_begin_pos, - LoweredExprIR::constExprIt loop_end_pos, - size_t idx, - size_t work_amount, - size_t work_amount_increment, - const std::vector& entries, - const std::vector& exits) { - const auto loop_info = std::make_shared( - work_amount, work_amount_increment, entries, exits); - const auto loop_id = this->add_loop_info(loop_info); - exprs_marking(loop_begin_pos, loop_end_pos, loop_id, idx); -} - -void LoweredExprIR::LoweredLoopManager::exprs_marking(LoweredExprIR::constExprIt loop_begin_pos, - LoweredExprIR::constExprIt loop_end_pos, - size_t loop_id, size_t idx) { - for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { - expr_it->get()->set_loop_id(loop_id, idx); - } -} - -}// namespace snippets -}// namespace ngraph diff --git a/src/common/snippets/src/op/kernel.cpp b/src/common/snippets/src/op/kernel.cpp index 5ed375d6a82fd9..0ce01faf22b131 100644 --- a/src/common/snippets/src/op/kernel.cpp +++ b/src/common/snippets/src/op/kernel.cpp @@ -8,8 +8,7 @@ namespace ngraph { namespace snippets { namespace op { -Kernel::Kernel(LoweredExprIR nested) : Op(), region(std::move(nested)) { -} +Kernel::Kernel(lowered::LinearIR nested) : Op(), region(std::move(nested)) {} } // namespace op } // namespace snippets diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index a058648a7449de..b838b3a02bd825 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -17,6 +17,7 @@ #include "snippets/pass/matmul_to_brgemm.hpp" #include "snippets/pass/fuse_transpose_brgemm.hpp" #include "snippets/utils.hpp" +#include "snippets/tensor_descriptor.hpp" #include "transformations/common_optimizations/nop_elimination.hpp" #include "transformations/utils/utils.hpp" @@ -25,7 +26,6 @@ #include "ngraph/pass/constant_folding.hpp" #include "ov_ops/type_relaxed.hpp" #include -#include "snippets/tensor_descriptor.hpp" #include #include @@ -315,7 +315,7 @@ ov::PartialShape snippets::op::Subgraph::canonicalize(const BlockedShapeVector& const auto baseRank = baseShape.size(); const bool baseIsBlocked = baseOrder.size() != std::set(baseOrder.begin(), baseOrder.end()).size(); for (size_t i = 0; i < inputShapes.size(); i++) { - const auto &blockedShape = inputShapes[i]; + const auto& blockedShape = inputShapes[i]; PartialShape inShape; AxisVector inOrder; element::Type inType; @@ -453,7 +453,7 @@ void snippets::op::Subgraph::align_element_types(const BlockedShapeVector& outpu void snippets::op::Subgraph::convert_to_snippet_dialect() { INTERNAL_OP_SCOPE(Subgraph); OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::convert_to_snippet_dialect") - const auto & params = body_ptr()->get_parameters(); + const auto& params = body_ptr()->get_parameters(); bool inputs_has_dynamic_last_dims = std::any_of(params.begin(), params.end(), [](const shared_ptr& p){ @@ -524,7 +524,7 @@ snippets::Schedule snippets::op::Subgraph::generate( const auto ops = body_ptr()->get_ops(); // actual code emission - LoweringConfig lowering_config; + lowered::Config lowering_config; lowering_config.m_save_lowered_code = config.m_has_domain_sensitive_ops; lowering_config.m_need_fill_tail_register = config.m_has_domain_sensitive_ops; lowering_config.m_loop_depth = tileRank; diff --git a/src/common/snippets/src/pass/collapse_subgraph.cpp b/src/common/snippets/src/pass/collapse_subgraph.cpp index 4811aadb8332ed..2b28df4223d9ec 100644 --- a/src/common/snippets/src/pass/collapse_subgraph.cpp +++ b/src/common/snippets/src/pass/collapse_subgraph.cpp @@ -181,11 +181,11 @@ auto has_supported_in_out(const std::shared_ptr &n) -> bool { (ov::is_type(n) || ov::is_type(n)))); }; - const auto & inputs = n->inputs(); - const auto & outputs = n->outputs(); + const auto& inputs = n->inputs(); + const auto& outputs = n->outputs(); // todo: Is this check necessary? Remove if not for (const auto& out : outputs) { - for (const auto &in_out : out.get_target_inputs()) { + for (const auto& in_out : out.get_target_inputs()) { if (ov::is_type(in_out.get_node()->shared_from_this())) { return false; } @@ -196,7 +196,7 @@ auto has_supported_in_out(const std::shared_ptr &n) -> bool { } auto has_result_child(const std::shared_ptr &node) -> bool { - for (const auto &child : node->get_users()) { + for (const auto& child : node->get_users()) { if (ov::is_type(child)) { return true; } @@ -206,7 +206,7 @@ auto has_result_child(const std::shared_ptr &node) -> bool { auto get_num_result_children(const std::shared_ptr &node) -> size_t { size_t result = 0; - for (const auto &child : node->get_users()) { + for (const auto& child : node->get_users()) { if (ov::is_type(child)) { result++; } @@ -312,14 +312,14 @@ TokenizeSnippets::TokenizeSnippets() { */ const auto cyclicDependencyIsIntoduced = [&node](const std::shared_ptr& nodeToExamine, std::pair& currentBounds) -> bool { assert(currentBounds.first < currentBounds.second && "Invalid currentBounds passed"); - const auto &parentNodes = ngraph::as_node_vector(nodeToExamine->input_values()); + const auto& parentNodes = ngraph::as_node_vector(nodeToExamine->input_values()); const int64_t maxParentOrder = std::accumulate(parentNodes.begin(), parentNodes.end(), currentBounds.first, [](int64_t maxOrder, std::shared_ptr n){ if (ngraph::op::is_constant(n) || ngraph::op::is_parameter(n)) return maxOrder; return std::max(maxOrder, GetTopologicalOrder(n)); }); - const auto &childNodes = nodeToExamine->get_users(); + const auto& childNodes = nodeToExamine->get_users(); // Skip the node being attached, since it will be a part of subgraph and can't introduce loop dependency const int64_t minChildOrder = std::accumulate(childNodes.begin(), childNodes.end(), currentBounds.second, [&node](int64_t minOrder, std::shared_ptr n){ @@ -334,7 +334,7 @@ TokenizeSnippets::TokenizeSnippets() { return true; }; - for (const auto &input_node : ngraph::as_node_vector(input_values)) { + for (const auto& input_node : ngraph::as_node_vector(input_values)) { if (auto subgraph = ov::as_type_ptr(input_node)) { if (!clones.count(input_node)) { auto f = subgraph->body().clone(); @@ -386,7 +386,7 @@ TokenizeSnippets::TokenizeSnippets() { // Todo: here we rely on friendly_name uniqueness. Propose a different algorithm. size_t current_input_index = body_parameters.size(); for (size_t p_ind = 0; p_ind < body_parameters.size(); p_ind++) { - const auto & p = body_parameters[p_ind]; + const auto& p = body_parameters[p_ind]; if (p->get_friendly_name() == found->get_node_shared_ptr()->get_friendly_name()) { current_input_index = p_ind; break; diff --git a/src/common/snippets/src/pass/tokenization.cpp b/src/common/snippets/src/pass/tokenization.cpp index e3d264e33e7db1..c39f349e8176a8 100644 --- a/src/common/snippets/src/pass/tokenization.cpp +++ b/src/common/snippets/src/pass/tokenization.cpp @@ -13,13 +13,13 @@ namespace snippets { namespace pass { void SetSnippetsNodeType(const std::shared_ptr &node, SnippetsNodeType nodeType) { - auto &rt = node->get_rt_info(); + auto& rt = node->get_rt_info(); rt["SnippetsNodeType"] = nodeType; } SnippetsNodeType GetSnippetsNodeType(const std::shared_ptr &node) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::GetSnippetsNodeType") - auto &rt = node->get_rt_info(); + auto& rt = node->get_rt_info(); const auto rinfo = rt.find("SnippetsNodeType"); if (rinfo == rt.end()) return SnippetsNodeType::NotSet; @@ -28,12 +28,12 @@ SnippetsNodeType GetSnippetsNodeType(const std::shared_ptr &node) { void SetTopologicalOrder(const std::shared_ptr &node, int64_t order) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::SetTopologicalOrder") - auto &rt = node->get_rt_info(); + auto& rt = node->get_rt_info(); rt["TopologicalOrder"] = order; } int64_t GetTopologicalOrder(const std::shared_ptr &node) { - auto &rt = node->get_rt_info(); + auto& rt = node->get_rt_info(); const auto rinfo = rt.find("TopologicalOrder"); if (rinfo == rt.end()) throw ngraph_error("Topological order is required, but not set."); @@ -44,7 +44,7 @@ bool EnumerateNodes::run_on_model(const std::shared_ptr &m) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::EnumerateNodes") int64_t order = 0; // Todo: We don't really have to set order for every node, just for subgraph parents and children would be enough - for (auto &node : m->get_ordered_ops()) { + for (auto& node : m->get_ordered_ops()) { SetTopologicalOrder(node, order++); } return true; diff --git a/src/common/snippets/src/utils.cpp b/src/common/snippets/src/utils.cpp index 5740120767f195..ba2b9df27542b9 100644 --- a/src/common/snippets/src/utils.cpp +++ b/src/common/snippets/src/utils.cpp @@ -75,7 +75,7 @@ std::vector get_node_output_layout(const Node* node) { return {}; if (node->is_dynamic()) throw ngraph_error("It's illegal to call get_node_output_layout for dynamic nodes"); - auto &rt = node->get_rt_info(); + auto& rt = node->get_rt_info(); const auto rinfo = rt.find("Layout"); if (rinfo != rt.end()) { std::vector layout(rinfo->second.as>()); diff --git a/src/common/snippets/tests/src/lowering_utils.cpp b/src/common/snippets/tests/src/lowering_utils.cpp index daf1c5bb0fbe76..be7f6514f6cd4b 100644 --- a/src/common/snippets/tests/src/lowering_utils.cpp +++ b/src/common/snippets/tests/src/lowering_utils.cpp @@ -82,7 +82,7 @@ void LoweringTests::TearDown() { std::shared_ptr LoweringTests::getSubgraph(const std::shared_ptr& f) { std::shared_ptr subgraph; - for (const auto &op : f->get_ops()) { + for (const auto& op : f->get_ops()) { bool is_subgraph = is_type(op); if (is_subgraph) { NGRAPH_CHECK(subgraph.use_count() == 0, diff --git a/src/common/snippets/tests/src/pass/canonicalization.cpp b/src/common/snippets/tests/src/pass/canonicalization.cpp index d96e3c817be27f..3bd3805e26a9fb 100644 --- a/src/common/snippets/tests/src/pass/canonicalization.cpp +++ b/src/common/snippets/tests/src/pass/canonicalization.cpp @@ -19,7 +19,7 @@ std::string CanonicalizationTests::getTestCaseName(testing::TestParamInfo(inputs[i]); + const auto& blockedshape = std::get<1>(inputs[i]); // input shape result << "IS[" << i << "]=" << CommonTestUtils::vec2str(std::get<0>(inputs[i])) << "_"; // input blocked shape diff --git a/src/common/snippets/tests/src/pass/collapse_subgraph.cpp b/src/common/snippets/tests/src/pass/collapse_subgraph.cpp index 086d3bdd9c131e..b42f7da9ee3066 100644 --- a/src/common/snippets/tests/src/pass/collapse_subgraph.cpp +++ b/src/common/snippets/tests/src/pass/collapse_subgraph.cpp @@ -26,56 +26,56 @@ void CollapseSubgraphTests::run() { } TEST_F(CollapseSubgraphTests, smoke_Snippets_Eltwise) { - const auto &f = EltwiseFunction(std::vector {{2, 3}, {1, 3}}); + const auto& f = EltwiseFunction(std::vector {{2, 3}, {1, 3}}); function = f.getOriginal(); function_ref = f.getReference(); run(); } TEST_F(CollapseSubgraphTests, smoke_Snippets_MatMulWithEltwise) { - const auto &f = MatMulEltwiseBranchesFunction(std::vector {{1, 3, 4, 4}, {1, 3, 4, 4}}); + const auto& f = MatMulEltwiseBranchesFunction(std::vector {{1, 3, 4, 4}, {1, 3, 4, 4}}); function = f.getOriginal(); function_ref = f.getReference(); run(); } TEST_F(CollapseSubgraphTests, smoke_Snippets_AvoidLoopEltwise) { - const auto &f = EltwiseLogLoopFunction(std::vector {{2, 5}, {2, 1}}); + const auto& f = EltwiseLogLoopFunction(std::vector {{2, 5}, {2, 1}}); function = f.getOriginal(); function_ref = f.getReference(); run(); } TEST_F(CollapseSubgraphTests, smoke_Snippets_OneConvert) { - const auto &f = ConvertFunction(std::vector{{2, 5}}); + const auto& f = ConvertFunction(std::vector{{2, 5}}); function = f.getOriginal(); function_ref = f.getReference(); run(); } TEST_F(CollapseSubgraphTests, smoke_Snippets_ConvertInput) { - const auto &f = ConvertInputFunction(std::vector{{2, 5}, {1, 5}}); + const auto& f = ConvertInputFunction(std::vector{{2, 5}, {1, 5}}); function = f.getOriginal(); function_ref = f.getReference(); run(); } TEST_F(CollapseSubgraphTests, smoke_Snippets_ConvertOutput) { - const auto &f = ConvertOutputFunction(std::vector{{2, 5}, {1, 5}}); + const auto& f = ConvertOutputFunction(std::vector{{2, 5}, {1, 5}}); function = f.getOriginal(); function_ref = f.getReference(); run(); } TEST_F(CollapseSubgraphTests, smoke_Snippets_ConvertStub) { - const auto &f = ConvertStubFunction(std::vector{{2, 5, 2}, {1, 5, 1}}); + const auto& f = ConvertStubFunction(std::vector{{2, 5, 2}, {1, 5, 1}}); function = f.getOriginal(); function_ref = f.getReference(); run(); } TEST_F(CollapseSubgraphTests, smoke_Snippets_ConvertPartialInputsAndResults) { - const auto &f = ConvertPartialInputsAndResultsFunction(std::vector{{2, 5, 1}, {1, 5, 1}, {2, 1, 10}}, + const auto& f = ConvertPartialInputsAndResultsFunction(std::vector{{2, 5, 1}, {1, 5, 1}, {2, 1, 10}}, std::vector{ov::element::i8, ov::element::bf16, ov::element::f32}, std::vector{ov::element::f32, ov::element::i8}); function = f.getOriginal(); @@ -84,7 +84,7 @@ TEST_F(CollapseSubgraphTests, smoke_Snippets_ConvertPartialInputsAndResults) { } TEST_F(CollapseSubgraphTests, smoke_Snippets_EltwiseTwoResultsFunction) { - const auto &f = EltwiseTwoResultsFunction(std::vector{{2, 5}, {2, 1}}); + const auto& f = EltwiseTwoResultsFunction(std::vector{{2, 5}, {2, 1}}); function = f.getOriginal(); function_ref = f.getReference(); comparator.enable(FunctionsComparator::CmpValues::NAMES); @@ -92,7 +92,7 @@ TEST_F(CollapseSubgraphTests, smoke_Snippets_EltwiseTwoResultsFunction) { } TEST_F(CollapseSubgraphTests, smoke_Snippets_ThreeFQFunction) { - const auto &f = ThreeFQFunction(std::vector{}); + const auto& f = ThreeFQFunction(std::vector{}); function = f.getOriginal(); function_ref = f.getReference(); run(); diff --git a/src/common/snippets/tests/src/pass/mha_tokenization.cpp b/src/common/snippets/tests/src/pass/mha_tokenization.cpp index c5e7dc983c6715..c6f9cc8f25485c 100644 --- a/src/common/snippets/tests/src/pass/mha_tokenization.cpp +++ b/src/common/snippets/tests/src/pass/mha_tokenization.cpp @@ -20,14 +20,14 @@ void TokenizeMHASnippetsTests::run() { } TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA) { - const auto &f = MHAFunction(std::vector{{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}}); + const auto& f = MHAFunction(std::vector{{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}}); function = f.getOriginal(); function_ref = f.getReference(); run(); } TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_with_MatMul0_Transpose) { - const auto &f = MHAMatMul0TransposeFunction(std::vector{{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}}); + const auto& f = MHAMatMul0TransposeFunction(std::vector{{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}}); function = f.getOriginal(); function_ref = f.getReference(); run(); diff --git a/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp index 5dc01048d4788c..53c3e1c58ee552 100644 --- a/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp @@ -185,8 +185,8 @@ ngraph::snippets::Generator::opRegType ov::intel_cpu::CPUGenerator::get_specific throw ov::Exception("Register type of the operation " + std::string(op->get_type_name()) + " isn't determined!"); } -ngraph::snippets::pass::lowered::LinearIRTransformationPipeline ov::intel_cpu::CPUGenerator::target_specific_transformations() const { - ngraph::snippets::pass::lowered::LinearIRTransformationPipeline target_specific_transformation; +ngraph::snippets::lowered::pass::TransformationPipeline ov::intel_cpu::CPUGenerator::target_specific_transformations() const { + ngraph::snippets::lowered::pass::TransformationPipeline target_specific_transformation; target_specific_transformation.register_transformation(); return target_specific_transformation; } diff --git a/src/plugins/intel_cpu/src/emitters/cpu_generator.hpp b/src/plugins/intel_cpu/src/emitters/cpu_generator.hpp index 54747477aa4f6b..c20a8db060b9c3 100644 --- a/src/plugins/intel_cpu/src/emitters/cpu_generator.hpp +++ b/src/plugins/intel_cpu/src/emitters/cpu_generator.hpp @@ -32,7 +32,7 @@ class CPUGenerator : public ngraph::snippets::Generator { protected: opRegType get_specific_op_reg_type(const std::shared_ptr& op) const override; - ngraph::snippets::pass::lowered::LinearIRTransformationPipeline target_specific_transformations() const override; + ngraph::snippets::lowered::pass::TransformationPipeline target_specific_transformations() const override; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp index 2a45ccca07282c..18e996e6f651f8 100644 --- a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp @@ -6,12 +6,13 @@ #include #include "jit_snippets_emitters.hpp" + +#include "snippets/lowered/expression.hpp" #include "snippets/op/subgraph.hpp" +#include "snippets/snippets_isa.hpp" #include "snippets/utils.hpp" #include "snippets_transformations/op/brgemm_copy_b.hpp" #include "snippets_transformations/op/brgemm_cpu.hpp" -#include "snippets/snippets_isa.hpp" -#include "snippets/op/subgraph.hpp" #include "snippets/tensor_descriptor.hpp" using namespace InferenceEngine; @@ -20,9 +21,9 @@ using ngraph::snippets::AllocatedEmitter; using namespace Xbyak; using namespace dnnl::impl; using namespace dnnl::impl::cpu::x64; -using ngraph::snippets::LoweredExpr; -using ngraph::snippets::IOLoweredExpr; -using ngraph::snippets::LoweredExprPtr; +using ngraph::snippets::lowered::Expression; +using ngraph::snippets::lowered::IOExpression; +using ngraph::snippets::lowered::ExpressionPtr; using ngraph::snippets::TensorDescriptorPtr; namespace ov { @@ -43,7 +44,7 @@ jit_container_emitter::jit_container_emitter(dnnl::impl::cpu::x64::jit_generator } void jit_container_emitter::map_abstract_registers(mapping_info& gpr_map_pool, mapping_info& vec_map_pool, - ngraph::snippets::LoweredExprIR::container& expressions) const { + ngraph::snippets::lowered::LinearIR::container& expressions) const { if (expressions.empty()) IE_THROW() << "Cannot map registers when there is no allocated_emitters provided"; auto map_regs = [](const std::vector& abstract_regs, mapping_info& mapping) { @@ -121,13 +122,13 @@ KernelEmitter::KernelEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl: TensorDescriptorPtr td {}; element::Type etype; switch (expr->get_type()) { - case IOLoweredExpr::io_type::INPUT: { + case ngraph::snippets::lowered::IOExpression::io_type::INPUT: { td = expr->get_outputs()[0]; etype = expr->get_node()->get_output_element_type(0); num_inputs++; break; } - case IOLoweredExpr::io_type::OUTPUT: { + case ngraph::snippets::lowered::IOExpression::io_type::OUTPUT: { num_outputs++; td = expr->get_inputs()[0]; etype = expr->get_node()->get_input_element_type(0); @@ -161,14 +162,14 @@ KernelEmitter::KernelEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl: mapping_info gpr_map_pool({}, gp_regs_pool); mapping_info vec_map_pool({}, vec_regs_pool); - ngraph::snippets::LoweredExprIR::container mem_access_exprs; - ngraph::snippets::LoweredExprIR::container general_exprs; + ngraph::snippets::lowered::LinearIR::container mem_access_exprs; + ngraph::snippets::lowered::LinearIR::container general_exprs; std::set unique_buffers; for (const auto& expr : body) { // Brgemm is a special case since it incorporates input and output (we use onednn kernel) // Just like Load & Store it requires offsets calculation - if (std::dynamic_pointer_cast(expr)) { + if (std::dynamic_pointer_cast(expr)) { mem_access_exprs.emplace_back(expr); } else if (const auto buffer = ov::as_type_ptr(expr->get_node())) { const auto buffer_id = buffer->get_id(); diff --git a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp index 4657915fcb6fe5..f5442cd1482563 100644 --- a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp @@ -6,7 +6,8 @@ #include #include -#include "snippets/lowered_expr.hpp" + +#include "snippets/lowered/linear_ir.hpp" #include "jit_emitter.hpp" #include "jit_load_store_emitters.hpp" @@ -51,8 +52,8 @@ class jit_container_emitter: public jit_emitter { // maps gpr and vec abstract registers to physical ones. Physical reg indexes are taken from the provided pools // (the first 2 args). All the used gpr and vec registers are also stored in the provided sets (the second 2 args). void map_abstract_registers(mapping_info& gpr_map_pool, mapping_info& vec_map_pool, - ngraph::snippets::LoweredExprIR::container& expressions) const; - ngraph::snippets::LoweredExprIR body; + ngraph::snippets::lowered::LinearIR::container& expressions) const; + ngraph::snippets::lowered::LinearIR body; }; /// /// \brief Kernel is the only entry point to Codogen Jit compilation. Kernel perform abstract-to-physical register diff --git a/src/plugins/intel_cpu/src/snippets_transformations/lowered/fuse_load_store_and_convert.cpp b/src/plugins/intel_cpu/src/snippets_transformations/lowered/fuse_load_store_and_convert.cpp index f6cd67e0fd5309..5c05c312a2e4ed 100644 --- a/src/plugins/intel_cpu/src/snippets_transformations/lowered/fuse_load_store_and_convert.cpp +++ b/src/plugins/intel_cpu/src/snippets_transformations/lowered/fuse_load_store_and_convert.cpp @@ -11,8 +11,8 @@ #include "snippets_transformations/op/store_convert.hpp" -bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_load_convert(ngraph::snippets::LoweredExprIR& linear_ir, - ngraph::snippets::LoweredExprIR::constExprIt& convert_it) { +bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_load_convert(ngraph::snippets::lowered::LinearIR& linear_ir, + ngraph::snippets::lowered::LinearIR::constExprIt& convert_it) { const auto& convert_expr = *convert_it; const auto& convert = ov::as_type_ptr(convert_expr->get_node()); const auto input_td = convert_expr->get_inputs().front(); @@ -51,12 +51,12 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_load_convert(ngraph::snippe const auto& insertion_pos = std::next(convert_it); linear_ir.erase(std::find(linear_ir.cbegin(), mv_expr_it, load_expr)); linear_ir.erase(mv_expr_it); - convert_it = linear_ir.insert(insertion_pos, std::make_shared(load_convert, in_td, out_td)); + convert_it = linear_ir.insert(insertion_pos, std::make_shared(load_convert, in_td, out_td)); return true; } -bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert(ngraph::snippets::LoweredExprIR& linear_ir, - ngraph::snippets::LoweredExprIR::constExprIt& convert_it) { +bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert(ngraph::snippets::lowered::LinearIR& linear_ir, + ngraph::snippets::lowered::LinearIR::constExprIt& convert_it) { const auto& convert_expr = *convert_it; const auto& convert = convert_expr->get_node(); const auto input_td = convert_expr->get_inputs().front(); @@ -93,11 +93,11 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert(ngraph::snipp const auto& insertion_pos = std::next(store_it); linear_ir.erase(store_it); convert_it = linear_ir.erase(convert_it); - linear_ir.insert(insertion_pos, std::make_shared(store_convert, in_td, out_td)); + linear_ir.insert(insertion_pos, std::make_shared(store_convert, in_td, out_td)); return true; } -bool ov::intel_cpu::pass::FuseLoadStoreConvert::run(ngraph::snippets::LoweredExprIR& linear_ir) { +bool ov::intel_cpu::pass::FuseLoadStoreConvert::run(ngraph::snippets::lowered::LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::FuseLoadStoreConvert") bool modified = false; diff --git a/src/plugins/intel_cpu/src/snippets_transformations/lowered/fuse_load_store_and_convert.hpp b/src/plugins/intel_cpu/src/snippets_transformations/lowered/fuse_load_store_and_convert.hpp index ef7d4e87d088ff..45a466b3691aa6 100644 --- a/src/plugins/intel_cpu/src/snippets_transformations/lowered/fuse_load_store_and_convert.hpp +++ b/src/plugins/intel_cpu/src/snippets_transformations/lowered/fuse_load_store_and_convert.hpp @@ -4,7 +4,7 @@ #pragma once -#include "snippets/pass/lowered/linear_IR_transformation.hpp" +#include "snippets/lowered/pass/transformation.hpp" namespace ov { namespace intel_cpu { @@ -18,17 +18,17 @@ namespace pass { * Fuse Store and ConvertTruncation into one op StoreConvertTruncation * @ingroup snippets */ -class FuseLoadStoreConvert: public ngraph::snippets::pass::lowered::LinearIRTransformation { +class FuseLoadStoreConvert: public ngraph::snippets::lowered::pass::Transformation { public: FuseLoadStoreConvert() = default; OPENVINO_RTTI("FuseLoadStoreConvert", "LinearIRTransformation"); - bool run(ngraph::snippets::LoweredExprIR& linear_ir) override; + bool run(ngraph::snippets::lowered::LinearIR& linear_ir) override; private: - bool fuse_load_convert(ngraph::snippets::LoweredExprIR& linear_ir, - ngraph::snippets::LoweredExprIR::constExprIt& convert_it); - bool fuse_store_convert(ngraph::snippets::LoweredExprIR& linear_ir, - ngraph::snippets::LoweredExprIR::constExprIt& convert_it); + bool fuse_load_convert(ngraph::snippets::lowered::LinearIR& linear_ir, + ngraph::snippets::lowered::LinearIR::constExprIt& convert_it); + bool fuse_store_convert(ngraph::snippets::lowered::LinearIR& linear_ir, + ngraph::snippets::lowered::LinearIR::constExprIt& convert_it); }; } // namespace pass