diff --git a/src/common/snippets/include/snippets/lowered/loop_manager.hpp b/src/common/snippets/include/snippets/lowered/loop_manager.hpp index 2c82ff1c46706a..93d1620f5fdbe7 100644 --- a/src/common/snippets/include/snippets/lowered/loop_manager.hpp +++ b/src/common/snippets/include/snippets/lowered/loop_manager.hpp @@ -21,18 +21,7 @@ class LinearIR::LoopManager { struct LoopPort { LoopPort() = default; - LoopPort(const ExpressionPort& port, bool is_incremented = true, size_t dim_idx = 0) - : expr_port(std::make_shared(port)), - is_incremented(is_incremented), - dim_idx(dim_idx) { - OPENVINO_ASSERT(dim_idx < port.get_descriptor_ptr()->get_shape().size(), - "LoopPort dim_idx (", - dim_idx, - ") must be less than the corresponding expression port shape rank (", - port.get_descriptor_ptr()->get_shape().size(), - ")"); - } - + LoopPort(const ExpressionPort& port, bool is_incremented = true, size_t dim_idx = 0); std::shared_ptr clone_with_new_expr(const ExpressionPtr& new_expr) const; friend bool operator==(const LoopPort& lhs, const LoopPort& rhs); @@ -51,35 +40,63 @@ class LinearIR::LoopManager { class LoopInfo { public: + enum {UNDEFINED_DIM_IDX = std::numeric_limits::max()}; LoopInfo() = default; LoopInfo(size_t work_amount, size_t increment, const std::vector& entries, - const std::vector& exits) - : work_amount(work_amount), increment(increment), - entry_points(entries), exit_points(exits), outer_splited_loop(false) {} + const std::vector& exits, + bool outer_splited_loop = false) + : m_work_amount(work_amount), m_increment(increment), + m_entry_points(entries), m_exit_points(exits), m_outer_splited_loop(outer_splited_loop) {} LoopInfo(size_t work_amount, size_t increment, const std::vector& entries, - const std::vector& exits); + const std::vector& exits, + bool outer_splited_loop = false); std::shared_ptr clone_with_new_expr(const ExressionMap& expr_map) const; - // Returns dimension index if dimension indices for all entry and exit points are equal, and SIZE_MAX otherwise - size_t get_dim_idx() const; - // TODO: replace this temporary solution when ticket 119851 is implemented + // Returns dimension index if dimension indices for all entry and exit points are equal, and UNDEFINED_DIM_IDX otherwise + size_t get_dim_idx() const; + size_t get_work_amount() const; + size_t get_increment() const; + const std::vector& get_entry_points() const; + const std::vector& get_exit_points() const; + bool get_outer_splited_loop() const; + + /** + * \brief Inserts a separate body for first loop iteration processing if needed. + * Can also modify both main and first iter loop bodies. + * TODO: replace this temporary solution when ticket 119851 is implemented + * + * \param linear_ir LIR which should be modified + * \param loop_end_it iterator on LoopEnd expression for which the handler is called + * + * \return bool value which indicates whether the linear_ir was changed or not. + */ using FirstIterHandler = std::function; + const FirstIterHandler& get_first_iter_handler() const; + + // Sets dim_idx to all entry and exit points + void set_dim_idx(size_t dim_idx); + void set_work_amount(size_t work_amount); + void set_increment(size_t increment); + void set_entry_points(std::vector entry_points); + void set_exit_points(std::vector exit_points); + void set_outer_splited_loop(bool outer_splited_loop); void set_first_iter_handler(FirstIterHandler handler); - FirstIterHandler fst_iter_handler = nullptr; - size_t work_amount = 0; - size_t increment = 0; + private: + size_t m_work_amount = 0; + size_t m_increment = 0; // The order of entry and exit expressions is important: // - The position before first entry expr is Loop Begin position // - The position after last exit expr is Loop End position // Note: Scalars aren't entry expressions but can be before first entry expr in Linear IR - std::vector entry_points = {}; - std::vector exit_points = {}; + std::vector m_entry_points = {}; + std::vector m_exit_points = {}; // True if this Loop is outer Loop for nested Loops that splits the same dimension - bool outer_splited_loop = false; + bool m_outer_splited_loop = false; + FirstIterHandler m_first_iter_handler = nullptr; }; using LoopInfoPtr = std::shared_ptr; @@ -106,12 +123,7 @@ class LinearIR::LoopManager { const std::vector& entries, const std::vector& exits) { const auto loop_info = std::make_shared(work_amount, work_amount_increment, entries, exits); - auto set_common_dim_idx = [dim_idx](std::vector& ports) { - for (auto& port : ports) - port.dim_idx = dim_idx; - }; - set_common_dim_idx(loop_info->entry_points); - set_common_dim_idx(loop_info->exit_points); + loop_info->set_dim_idx(dim_idx); const auto loop_id = this->add_loop_info(loop_info); for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { insert_loop_id(*expr_it, loop_id); @@ -134,13 +146,14 @@ class LinearIR::LoopManager { return loop_id; } - size_t mark_loop_with_old_loop_replacement(LinearIR::constExprIt loop_begin_pos, - LinearIR::constExprIt loop_end_pos, - size_t work_amount, - size_t increment, - const std::vector& entries, - const std::vector& exits, - const size_t old_id); + size_t replace_with_new_loop(const LinearIR& linear_ir, + LinearIR::constExprIt loop_begin_pos, + LinearIR::constExprIt loop_end_pos, + size_t work_amount, + size_t increment, + const std::vector& entries, + const std::vector& exits, + const size_t old_id); void fuse_loops(const LinearIR& linear_ir, size_t loop_id_upper, size_t loop_id_lower, bool fuse_into_upper = true); void fuse_loops(LinearIR::constExprIt loop_begin_target, LinearIR::constExprIt loop_end_target, diff --git a/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp b/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp index 7ef0f75c1bdaa3..3e00f3eec682cf 100644 --- a/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp @@ -25,14 +25,9 @@ class InitLoops : public Pass { bool run(LinearIR& linear_ir) override; private: - static void init_ptr_increments(std::vector& loop_inputs, - std::vector& loop_outputs, - size_t work_amount); - static void init_finalization_offsets(std::vector& loop_inputs, - std::vector& loop_outputs, - size_t work_amount); - static void init_element_type_sizes(std::vector& loop_inputs, - std::vector& loop_outputs); + static void init_ptr_increments(const LinearIR::LoopManager::LoopInfoPtr& loop_info); + static void init_finalization_offsets(const LinearIR::LoopManager::LoopInfoPtr& loop_info); + static void init_element_type_sizes(const LinearIR::LoopManager::LoopInfoPtr& loop_info); }; } // namespace pass diff --git a/src/common/snippets/include/snippets/op/broadcastload.hpp b/src/common/snippets/include/snippets/op/broadcastload.hpp index 7ca851ccf9f05b..a46311d30151ff 100644 --- a/src/common/snippets/include/snippets/op/broadcastload.hpp +++ b/src/common/snippets/include/snippets/op/broadcastload.hpp @@ -30,7 +30,7 @@ class BroadcastLoad : public MemoryAccess { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; void validate_and_infer_types() override; const ov::Dimension& get_bcast_dimension() {return bcast_dimension;} - void set_bcast_dimension(const ov::Dimension new_dim) {bcast_dimension = new_dim;} + void set_bcast_dimension(ov::Dimension new_dim) {bcast_dimension = std::move(new_dim);} // Note:BroadcastMove and BroadcastLoad are implemented as separate classes, // but have identical shapeInfer semantics. In order to avoid code duplication, diff --git a/src/common/snippets/include/snippets/op/broadcastmove.hpp b/src/common/snippets/include/snippets/op/broadcastmove.hpp index 5d8bd450e5a1ca..95579c174841c5 100644 --- a/src/common/snippets/include/snippets/op/broadcastmove.hpp +++ b/src/common/snippets/include/snippets/op/broadcastmove.hpp @@ -29,7 +29,7 @@ class BroadcastMove : public ov::op::Op { void validate_and_infer_types() override; const ov::Dimension& get_bcast_dimension() {return bcast_dimension;} - void set_bcast_dimension(const ov::Dimension new_dim) {bcast_dimension = new_dim;} + void set_bcast_dimension(ov::Dimension new_dim) {bcast_dimension = std::move(new_dim);} // Note:BroadcastMove and BroadcastLoad are implemented as separate classes, // but have identical shapeInfer semantics. In order to avoid code duplication, // we created dummy ShapeInfer classes that are essentially instantiations diff --git a/src/common/snippets/src/generator.cpp b/src/common/snippets/src/generator.cpp index cb8550146543f4..a9ffbc1f7a6d12 100644 --- a/src/common/snippets/src/generator.cpp +++ b/src/common/snippets/src/generator.cpp @@ -27,6 +27,13 @@ void Generator::generate(lowered::LinearIR& linear_ir, LoweringResult& result, c return get_op_reg_type(op); }; lowered::pass::PassPipeline lowered_pipeline; + // Note: the order of all passes in this pipeline must not be changed since they have hard dependencies + // 1. InsertTailLoop must be called after AssignRegisters since tail loop expressions must have the same + // assigned registers as the corresponding ops in the main body. + // 2. CleanupLoopOffsets must be called after InsertTailLoop to avoid violating the proportionality of the pointer increments + // (this might happen if tail loop and main loop have different increments) + // 3. OptimizeLoopSingleEvaluation must be called after CleanupLoopOffsets + // since CleanupLoopOffsets can't handle loops with evaluate_once = true lowered_pipeline.register_pass(reg_type_mapper); lowered_pipeline.register_pass(); lowered_pipeline.register_pass(); diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp index cb22910bfcfd6f..e7e83361ee0a39 100644 --- a/src/common/snippets/src/lowered/loop_manager.cpp +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -19,21 +19,38 @@ using LoopManager = LinearIR::LoopManager; using LoopPort = LoopManager::LoopPort; using LoopInfo = LoopManager::LoopInfo; +LoopPort::LoopPort(const ExpressionPort& port, bool is_incremented, size_t dim_idx) + : expr_port(std::make_shared(port)), + is_incremented(is_incremented), + dim_idx(dim_idx) { + OPENVINO_ASSERT(dim_idx < port.get_descriptor_ptr()->get_shape().size(), + "LoopPort dim_idx (", + dim_idx, + ") must be less than the corresponding expression port shape rank (", + port.get_descriptor_ptr()->get_shape().size(), + ")"); +} + std::shared_ptr LoopPort::clone_with_new_expr(const ExpressionPtr& new_expr) const { auto new_loop_port = std::make_shared(*this); new_loop_port->expr_port = expr_port->clone_with_new_expr(new_expr); return new_loop_port; } -LinearIR::LoopManager::LoopInfo::LoopInfo(size_t work_amount, size_t increment, - const std::vector& entries, const std::vector& exits) - : work_amount(work_amount), increment(increment), outer_splited_loop(false) { - entry_points.reserve(entries.size()); - exit_points.reserve(exits.size()); +LinearIR::LoopManager::LoopInfo::LoopInfo(size_t work_amount, + size_t increment, + const std::vector& entries, + const std::vector& exits, + bool outer_splited_loop) + : m_work_amount(work_amount), + m_increment(increment), + m_outer_splited_loop(outer_splited_loop) { + m_entry_points.reserve(entries.size()); + m_exit_points.reserve(exits.size()); for (const auto& port : entries) - entry_points.emplace_back(port); + m_entry_points.emplace_back(port); for (const auto& port : exits) - exit_points.emplace_back(port); + m_exit_points.emplace_back(port); } std::shared_ptr LoopInfo::clone_with_new_expr(const ExressionMap& expr_map) const { @@ -48,38 +65,80 @@ std::shared_ptr LoopInfo::clone_with_new_expr(const ExressionMap& expr } return cloned_port_points; }; - const auto& new_entry_points = clone_loop_ports(entry_points); - const auto& new_exit_points = clone_loop_ports(exit_points); + const auto& new_entry_points = clone_loop_ports(m_entry_points); + const auto& new_exit_points = clone_loop_ports(m_exit_points); - auto new_loop_info = std::make_shared(work_amount, increment, new_entry_points, new_exit_points); - new_loop_info->outer_splited_loop = outer_splited_loop; + return std::make_shared(m_work_amount, m_increment, new_entry_points, new_exit_points, m_outer_splited_loop); +} - return new_loop_info; +size_t LoopInfo::get_work_amount() const { + return m_work_amount; } -std::shared_ptr LoopManager::clone_with_new_expr(const ExressionMap& expr_map) const { - auto new_loop_manager = std::make_shared(); - for (const auto& id_info : m_map) - new_loop_manager->m_map.insert({id_info.first, id_info.second->clone_with_new_expr(expr_map)}); - new_loop_manager->next_id = next_id; - return new_loop_manager; +size_t LoopInfo::get_increment() const { + return m_increment; +} + +const std::vector& LoopInfo::get_entry_points() const { + return m_entry_points; +} + +const std::vector& LoopInfo::get_exit_points() const { + return m_exit_points; +} + +bool LoopInfo::get_outer_splited_loop() const { + return m_outer_splited_loop; +} + +const LoopInfo::FirstIterHandler& LoopInfo::get_first_iter_handler() const { + return m_first_iter_handler; } size_t LinearIR::LoopManager::LoopInfo::get_dim_idx() const { - OPENVINO_ASSERT(!entry_points.empty(), "Loop info must have at least one entry point"); + OPENVINO_ASSERT(!m_entry_points.empty(), "Loop info must have at least one entry point"); auto equal_dim_idxes = [&](const LinearIR::LoopManager::LoopPort& p) { - return p.dim_idx == entry_points[0].dim_idx; + return p.dim_idx == m_entry_points[0].dim_idx; }; - if (std::all_of(entry_points.begin(), entry_points.end(), equal_dim_idxes) && - std::all_of(exit_points.begin(), exit_points.end(), equal_dim_idxes)) { - return entry_points[0].dim_idx; + if (std::all_of(m_entry_points.begin(), m_entry_points.end(), equal_dim_idxes) && + std::all_of(m_exit_points.begin(), m_exit_points.end(), equal_dim_idxes)) { + return m_entry_points[0].dim_idx; } else { - return SIZE_MAX; + return UNDEFINED_DIM_IDX; } } -void LinearIR::LoopManager::LoopInfo::set_first_iter_handler(FirstIterHandler handler) { - fst_iter_handler = std::move(handler); +void LoopInfo::set_dim_idx(size_t dim_idx) { + auto set_common_dim_idx = [dim_idx](std::vector& ports) { + for (auto& port : ports) + port.dim_idx = dim_idx; + }; + set_common_dim_idx(m_entry_points); + set_common_dim_idx(m_exit_points); +} + +void LoopInfo::set_work_amount(size_t work_amount) { + m_work_amount = work_amount; +} + +void LoopInfo::set_increment(size_t increment) { + m_increment = increment; +} + +void LoopInfo::set_entry_points(std::vector entry_points) { + m_entry_points = std::move(entry_points); +} + +void LoopInfo::set_exit_points(std::vector exit_points) { + m_exit_points = std::move(exit_points);; +} + +void LoopInfo::set_outer_splited_loop(bool outer_splited_loop) { + m_outer_splited_loop = outer_splited_loop; +} + +void LoopInfo::set_first_iter_handler(LoopInfo::FirstIterHandler first_iter_handler) { + m_first_iter_handler = std::move(first_iter_handler); } bool operator==(const LinearIR::LoopManager::LoopPort& lhs, const LinearIR::LoopManager::LoopPort& rhs) { @@ -97,6 +156,14 @@ bool operator<(const LinearIR::LoopManager::LoopPort& lhs, const LinearIR::LoopM (lhs.is_incremented == rhs.is_incremented && lhs.dim_idx < rhs.dim_idx))); } +std::shared_ptr LoopManager::clone_with_new_expr(const ExressionMap& expr_map) const { + auto new_loop_manager = std::make_shared(); + for (const auto& id_info : m_map) + new_loop_manager->m_map.insert({id_info.first, id_info.second->clone_with_new_expr(expr_map)}); + new_loop_manager->next_id = next_id; + return new_loop_manager; +} + size_t LinearIR::LoopManager::add_loop_info(const LoopInfoPtr &loop) { const auto index = next_id; m_map[index] = loop; @@ -133,7 +200,7 @@ void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir, LinearIR::constExprIt &loop_end_pos, bool loop_ops_inserted) const { const auto loop_info = get_loop_info(loop_id); - get_loop_bounds(linear_ir, loop_info->entry_points, loop_info->exit_points, loop_begin_pos, loop_end_pos, loop_id, loop_ops_inserted); + get_loop_bounds(linear_ir, loop_info->get_entry_points(), loop_info->get_exit_points(), loop_begin_pos, loop_end_pos, loop_id, loop_ops_inserted); } void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir, @@ -177,8 +244,8 @@ LinearIR::LoopManager::LoopPort LinearIR::LoopManager::get_loop_port_by_expr_por return *it; }; const auto& loop_info = get_loop_info(loop_id); - return expr_port.get_type() == ExpressionPort::Input ? get_loop_port(loop_info->entry_points) - : get_loop_port(loop_info->exit_points); + return expr_port.get_type() == ExpressionPort::Input ? get_loop_port(loop_info->get_entry_points()) + : get_loop_port(loop_info->get_exit_points()); } void LinearIR::LoopManager::get_io_loop_ports(LinearIR::constExprIt loop_begin_pos, @@ -278,18 +345,28 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, } } -size_t LinearIR::LoopManager::mark_loop_with_old_loop_replacement(LinearIR::constExprIt loop_begin_pos, - LinearIR::constExprIt loop_end_pos, - size_t work_amount, - size_t increment, - const std::vector& entries, - const std::vector& exits, - const size_t old_id) { +size_t LinearIR::LoopManager::replace_with_new_loop(const LinearIR& linear_ir, + LinearIR::constExprIt loop_begin_pos, + LinearIR::constExprIt loop_end_pos, + size_t work_amount, + size_t increment, + const std::vector& entries, + const std::vector& exits, + const size_t old_id) { const auto loop_info = std::make_shared(work_amount, increment, entries, exits); const auto loop_id = this->add_loop_info(loop_info); for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { replace_loop_id(*expr_it, old_id, loop_id); } + + const auto old_loop_info = this->get_loop_info(old_id); + const auto old_loop_begin_pos = linear_ir.find(old_loop_info->get_entry_points().front().expr_port->get_expr()); + const auto old_loop_end_pos = linear_ir.find(old_loop_info->get_exit_points().back().expr_port->get_expr()); + // If new bounds are equal to old loop bounds, this means that old Loop is removed totally from LIR + // In this case old loop info must be completely removed from loop manager + if (loop_begin_pos == old_loop_begin_pos && loop_end_pos == old_loop_end_pos) { + this->remove_loop_info(old_id); + } return loop_id; } @@ -307,10 +384,10 @@ void LinearIR::LoopManager::fuse_loops(LinearIR::constExprIt loop_begin_target, const auto& loop_info_upper = m_map[loop_id_upper]; const auto& loop_info_lower = m_map[loop_id_lower]; - auto entry_points_upper = loop_info_upper->entry_points; - auto exit_points_upper = loop_info_upper->exit_points; - auto entry_points_lower = loop_info_lower->entry_points; - auto exit_points_lower = loop_info_lower->exit_points; + auto entry_points_upper = loop_info_upper->get_entry_points(); + auto exit_points_upper = loop_info_upper->get_exit_points(); + auto entry_points_lower = loop_info_lower->get_entry_points(); + auto exit_points_lower = loop_info_lower->get_exit_points(); fuse_loop_ports(exit_points_upper, entry_points_lower, loop_id_upper); std::vector new_entries = entry_points_upper; @@ -319,8 +396,8 @@ void LinearIR::LoopManager::fuse_loops(LinearIR::constExprIt loop_begin_target, new_exits.insert(new_exits.end(), exit_points_lower.begin(), exit_points_lower.end()); auto& loop_info = fuse_into_upper ? loop_info_upper : loop_info_lower; - loop_info->entry_points = new_entries; - loop_info->exit_points = new_exits; + loop_info->set_entry_points(new_entries); + loop_info->set_exit_points(new_exits); const auto& from = fuse_into_upper ? loop_id_lower : loop_id_upper; const auto& to = fuse_into_upper ? loop_id_upper : loop_id_lower; @@ -382,7 +459,7 @@ template<> void LinearIR::LoopManager::update_loop_port(size_t loop_id, const ExpressionPort& actual_port, const std::vector& target_ports, bool is_entry) { const auto& loop_info = get_loop_info(loop_id); - auto& ports = is_entry ? loop_info->entry_points : loop_info->exit_points; + auto ports = is_entry ? loop_info->get_entry_points() : loop_info->get_exit_points(); auto port_it = std::find_if(ports.begin(), ports.end(), [&actual_port](const LoopPort& point) { return *point.expr_port.get() == actual_port; }); // In some cases actual ExpressionPort may not be LoopPort. We shouldn't throw exception here since ExpressionPort is not strong condition as LoopPort @@ -400,18 +477,20 @@ void LinearIR::LoopManager::update_loop_port(size_t loop_id, const ExpressionPor }); port_it = ports.erase(port_it); ports.insert(port_it, target_loop_ports.cbegin(), target_loop_ports.cend()); + is_entry ? loop_info->set_entry_points(ports) : loop_info->set_exit_points(ports); } template<> void LinearIR::LoopManager::update_loop_port(size_t loop_id, const LoopPort& actual_port, const std::vector& target_ports, bool is_entry) { const auto& loop_info = get_loop_info(loop_id); - auto& ports = is_entry ? loop_info->entry_points : loop_info->exit_points; + auto ports = is_entry ? loop_info->get_entry_points() : loop_info->get_exit_points(); auto port_it = std::find_if(ports.begin(), ports.end(), [&actual_port](const LoopPort& point) { return point == actual_port; }); OPENVINO_ASSERT(port_it != ports.end(), "Failed update_loop_port: existing loop ports has not been found"); port_it = ports.erase(port_it); ports.insert(port_it, target_ports.cbegin(), target_ports.cend()); + is_entry ? loop_info->set_entry_points(ports) : loop_info->set_exit_points(ports); } void LinearIR::LoopManager::expression_replacement(constExprIt new_expr_begin, constExprIt new_expr_end, const ExpressionPtr& decomposed_expr, @@ -446,8 +525,8 @@ void LinearIR::LoopManager::sort_loop_ports(LinearIR::constExprIt& loop_begin_po } }; auto loop_info = get_loop_info(loop_id); - const auto& loop_entries = loop_info->entry_points; - const auto& loop_exits = loop_info->exit_points; + const auto& loop_entries = loop_info->get_entry_points(); + const auto& loop_exits = loop_info->get_exit_points(); std::vector entries, exits; entries.reserve(loop_entries.size()); exits.reserve(loop_exits.size()); @@ -456,8 +535,8 @@ void LinearIR::LoopManager::sort_loop_ports(LinearIR::constExprIt& loop_begin_po push(loop_entries, entries, expr); push(loop_exits, exits, expr); } - loop_info->entry_points = entries; - loop_info->exit_points = exits; + loop_info->set_entry_points(entries); + loop_info->set_exit_points(exits); } void LinearIR::LoopManager::insert_loop_id(const ExpressionPtr& expr, size_t new_id, bool before, size_t target_id) { diff --git a/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp b/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp index 775a48bad1893e..79c9a115718c1f 100644 --- a/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp +++ b/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp @@ -35,7 +35,6 @@ bool CleanupLoopOffsets::run(LinearIR& linear_ir) { } if (auto outer_loop_end = as_type_ptr(next_node)) { auto fin_offsets = loop_end->get_finalization_offsets(); - const auto& is_incremented = loop_end->get_is_incremented(); std::unordered_map per_port_connector_offset; const auto& loop_inputs = expr_it->get()->get_input_port_connectors(); for (size_t i = 0; i < fin_offsets.size(); i++) @@ -43,15 +42,11 @@ bool CleanupLoopOffsets::run(LinearIR& linear_ir) { const auto outer_increment = static_cast(outer_loop_end->get_increment()); auto outer_ptr_increments = outer_loop_end->get_ptr_increments(); - const auto& outer_is_incremented = outer_loop_end->get_is_incremented(); const auto& outer_loop_inputs = next_expr_it->get()->get_input_port_connectors(); for (size_t i = 0; i < outer_ptr_increments.size(); i++) { - if (outer_is_incremented[i] == false) - continue; - const auto& managed_connector = outer_loop_inputs[i]; const auto& found = per_port_connector_offset.find(managed_connector); - if (found != per_port_connector_offset.end() && is_incremented[found->second] == true) { + if (found != per_port_connector_offset.end()) { // Since data ptr is incremented on [ptr_increment x increment], // we should guarantee proportionality of ptr shifts. // If the data ptr can't be proportionally shifted, the optimization is not applied diff --git a/src/common/snippets/src/lowered/pass/fuse_loops.cpp b/src/common/snippets/src/lowered/pass/fuse_loops.cpp index 43c4c1e9bcda70..1738d6d8fe9574 100644 --- a/src/common/snippets/src/lowered/pass/fuse_loops.cpp +++ b/src/common/snippets/src/lowered/pass/fuse_loops.cpp @@ -28,7 +28,7 @@ bool FuseLoops::loop_ports_are_compatible(const LinearIR::LoopManagerPtr& loop_m const size_t loop_lower_id, const size_t loop_upper_id) { const auto loop_lower = loop_manager->get_loop_info(loop_lower_id); - for (const auto& entry : loop_lower->entry_points) { + for (const auto& entry : loop_lower->get_entry_points()) { const auto& src_port = entry.expr_port->get_port_connector_ptr()->get_source(); if (is_loop_id_found(src_port.get_expr()->get_loop_ids(), loop_upper_id)) { if (!entry.is_incremented) @@ -44,8 +44,8 @@ bool FuseLoops::loop_ports_are_compatible(const LinearIR::LoopManagerPtr& loop_m } bool FuseLoops::can_be_fused(const LoopInfoPtr& loop_current, const LoopInfoPtr& loop_target) { - auto current_work_amount = loop_current->work_amount; - auto target_work_amount = loop_target->work_amount; + auto current_work_amount = loop_current->get_work_amount(); + auto target_work_amount = loop_target->get_work_amount(); // Loop fusion is supported only if Loops have equal increments and the equal/broadcastable work amounts. // Note: For example, Broadcastable work amounts are possible in the following case: // Relu_0 [16x1] Relu_1 [16x128] @@ -56,7 +56,7 @@ bool FuseLoops::can_be_fused(const LoopInfoPtr& loop_current, const LoopInfoPtr& // - Relu_1 and Add with work amount `128` and increment `vector size` // We can fuse them into one Loop with work amount `128` and increment `vector size` const auto supported_work_amount = current_work_amount == target_work_amount || current_work_amount == 1 || target_work_amount == 1; - const auto supported_increment = loop_current->increment == loop_target->increment; + const auto supported_increment = loop_current->get_increment() == loop_target->get_increment(); return supported_work_amount && supported_increment; } @@ -103,8 +103,8 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo // We can fuse Loop_up to Loop_down only in cases when other consumers of Loop_up are after Loop_down // Because Loop_up should be explicitly moved before Loop_down in linear IR, and we must save control dependency bool is_fusion_allowed = true; - for (size_t i = 0; i < loop_target->exit_points.size() && is_fusion_allowed; ++i) { - const auto target_exit_point = loop_target->exit_points[i]; + for (size_t i = 0; i < loop_target->get_exit_points().size() && is_fusion_allowed; ++i) { + const auto target_exit_point = loop_target->get_exit_points()[i]; const auto consumer_inputs = target_exit_point.expr_port->get_connected_ports(); for (const auto& consumer_input : consumer_inputs) { const auto& consumer = consumer_input.get_expr(); @@ -125,10 +125,10 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo loop_manager->get_loop_bounds(linear_ir, target_loop_id, target_loop_begin_pos, target_loop_end_pos); loop_manager->fuse_loops(target_loop_begin_pos, target_loop_end_pos, target_loop_id, current_loop_id, false); // Update work_amount for Loop (increment is constant because increments must be the identical for fusion): - loop_current->work_amount = std::max(loop_current->work_amount, loop_target->work_amount); + loop_current->set_work_amount(std::max(loop_current->get_work_amount(), loop_target->get_work_amount())); // If one of the Loops is outer for nested loops that splits the same dimension, // after fusion new common Loop save this status - loop_current->outer_splited_loop = loop_current->outer_splited_loop || loop_target->outer_splited_loop; + loop_current->set_outer_splited_loop(loop_current->get_outer_splited_loop() || loop_target->get_outer_splited_loop()); const auto insertion_place = current_loop_begin_pos; const auto is_move_needed = target_loop_end_pos != current_loop_begin_pos; @@ -153,8 +153,8 @@ bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::Loo // We can fuse Loop_down to Loop_up only in cases when other parents of Loop_down are before Loop_up // Because Loop_down should be explicitly moved after Loop_up in linear IR, and we must save control dependency bool is_fusion_allowed = true; - for (size_t i = 0; i < loop_target->entry_points.size() && is_fusion_allowed; ++i) { - const auto target_entry_port = loop_target->entry_points[i]; + for (size_t i = 0; i < loop_target->get_entry_points().size() && is_fusion_allowed; ++i) { + const auto target_entry_port = loop_target->get_entry_points()[i]; const auto parent_expr_output = *target_entry_port.expr_port->get_connected_ports().begin(); const auto& parent_expr = parent_expr_output.get_expr(); if (ov::is_type(parent_expr->get_node()) || parent_expr == current_exit_point->get_expr()) @@ -170,10 +170,10 @@ bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::Loo loop_manager->get_loop_bounds(linear_ir, target_loop_id, target_loop_begin_pos, target_loop_end_pos); loop_manager->fuse_loops(target_loop_begin_pos, target_loop_end_pos, current_loop_id, target_loop_id); // Update work_amount for Loop (increment is constant because increments must be the identical for fusion): - loop_current->work_amount = std::max(loop_current->work_amount, loop_target->work_amount); + loop_current->set_work_amount(std::max(loop_current->get_work_amount(), loop_target->get_work_amount())); // If one of the Loops is outer for nested loops that splits the same dimension, // after fusion new common Loop save this status - loop_current->outer_splited_loop = loop_current->outer_splited_loop || loop_target->outer_splited_loop; + loop_current->set_outer_splited_loop(loop_current->get_outer_splited_loop() || loop_target->get_outer_splited_loop()); const auto insertion_place = current_loop_end_pos; const auto is_move_needed = insertion_place != target_loop_begin_pos; @@ -222,7 +222,7 @@ bool FuseLoops::run(LinearIR& linear_ir) { // Loop_0 (Upper) | // | => | // Loop_1 (Current) Loop_0 + Loop_1 => new `Loop_1` - auto entry_points = current_loop_info->entry_points; + auto entry_points = current_loop_info->get_entry_points(); bool was_fusion_up = false; for (size_t in_port = 0; in_port < entry_points.size() && !was_fusion_up; ++in_port) { const auto entry_point = entry_points[in_port]; @@ -260,13 +260,13 @@ bool FuseLoops::run(LinearIR& linear_ir) { } // If Loops were fused and there are new entry_points, we should check for possible fusion again - if (was_fusion_up && entry_points != current_loop_info->entry_points) + if (was_fusion_up && entry_points != current_loop_info->get_entry_points()) continue; // Loop_0 (Current) Loop_0 + Loop_1 => new `Loop_0` // | => | // Loop_1 (Lower) | - auto exit_points = current_loop_info->exit_points; + auto exit_points = current_loop_info->get_exit_points(); bool was_fusion_down = false; for (size_t out_port = 0; out_port < exit_points.size() && !was_fusion_down; ++out_port) { const auto exit_point = exit_points[out_port]; diff --git a/src/common/snippets/src/lowered/pass/init_loops.cpp b/src/common/snippets/src/lowered/pass/init_loops.cpp index 2b54452bc8607c..68e8cc7757e13f 100644 --- a/src/common/snippets/src/lowered/pass/init_loops.cpp +++ b/src/common/snippets/src/lowered/pass/init_loops.cpp @@ -37,63 +37,75 @@ int64_t get_output_stride(size_t dim, const VectorDims& shape) { InitLoops::InitLoops() : Pass() {} -void InitLoops::init_ptr_increments(std::vector& loop_inputs, std::vector& loop_outputs, size_t work_amount) { - for (auto& loop_input : loop_inputs) { - loop_input.ptr_increment = 0; - if (loop_input.is_incremented) { - const auto& port = loop_input.expr_port; +void InitLoops::init_ptr_increments(const LinearIR::LoopManager::LoopInfoPtr& loop_info) { + const auto work_amount = loop_info->get_work_amount(); + auto loop_entries = loop_info->get_entry_points(); + auto loop_exits = loop_info->get_exit_points(); + + for (auto& loop_entry : loop_entries) { + loop_entry.ptr_increment = 0; + if (loop_entry.is_incremented) { + const auto& port = loop_entry.expr_port; const auto source = *port->get_connected_ports().begin(); const auto loop_ids = port->get_expr()->get_loop_ids(); const auto& layout = port->get_descriptor_ptr()->get_layout(); const auto& shape = port->get_descriptor_ptr()->get_shape(); - const auto& dim = *(layout.rbegin() + loop_input.dim_idx); + const auto& dim = *(layout.rbegin() + loop_entry.dim_idx); // If relevant dim is not broadcasted, then ptr_increment is the dim stride in the new layout if (!(shape[dim] == 1 && work_amount != 1)) { // Input layout shows how we should read data by which order and strides - loop_input.ptr_increment = get_input_stride(dim, source.get_descriptor_ptr()->get_layout(), shape); + loop_entry.ptr_increment = get_input_stride(dim, source.get_descriptor_ptr()->get_layout(), shape); } } } - for (auto& loop_output : loop_outputs) { - loop_output.ptr_increment = 0; - if (loop_output.is_incremented) { - const auto& port = loop_output.expr_port; + for (auto& loop_exit : loop_exits) { + loop_exit.ptr_increment = 0; + if (loop_exit.is_incremented) { + const auto& port = loop_exit.expr_port; const auto loop_ids = port->get_expr()->get_loop_ids(); const auto& layout = port->get_descriptor_ptr()->get_layout(); const auto& shape = port->get_descriptor_ptr()->get_shape(); - const auto original_dim = layout.size() - 1 - loop_output.dim_idx; + const auto original_dim = layout.size() - 1 - loop_exit.dim_idx; const auto& dim = std::distance(layout.cbegin(), std::find(layout.cbegin(), layout.cend(), original_dim)); // If relevant dim is not broadcasted, then ptr_increment is the dim stride in the new layout if (!(shape[dim] == 1 && work_amount != 1)) { // Output layout shows how we already written data by which order and strides - loop_output.ptr_increment = get_output_stride(dim, shape); + loop_exit.ptr_increment = get_output_stride(dim, shape); } } } + loop_info->set_entry_points(loop_entries); + loop_info->set_exit_points(loop_exits); } -void InitLoops::init_finalization_offsets(std::vector& loop_inputs, - std::vector& loop_outputs, - size_t work_amount) { - for (auto& loop_input : loop_inputs) { - loop_input.finalization_offset = -1 * loop_input.ptr_increment * work_amount; +void InitLoops::init_finalization_offsets(const LinearIR::LoopManager::LoopInfoPtr& loop_info) { + const auto work_amount = loop_info->get_work_amount(); + auto loop_entries = loop_info->get_entry_points(); + auto loop_exits = loop_info->get_exit_points(); + for (auto& loop_entry : loop_entries) { + loop_entry.finalization_offset = -1 * loop_entry.ptr_increment * work_amount; } - for (auto& loop_output : loop_outputs) { - loop_output.finalization_offset = -1 * loop_output.ptr_increment * work_amount; + for (auto& loop_exit : loop_exits) { + loop_exit.finalization_offset = -1 * loop_exit.ptr_increment * work_amount; } + loop_info->set_entry_points(loop_entries); + loop_info->set_exit_points(loop_exits); } -void InitLoops::init_element_type_sizes(std::vector& loop_inputs, - std::vector& loop_outputs) { - for (auto& loop_input : loop_inputs) { - const auto& port = loop_input.expr_port; - loop_input.data_size = static_cast(port->get_expr()->get_node()->get_input_element_type(port->get_index()).size()); +void InitLoops::init_element_type_sizes(const LinearIR::LoopManager::LoopInfoPtr& loop_info) { + auto loop_entries = loop_info->get_entry_points(); + auto loop_exits = loop_info->get_exit_points(); + for (auto& loop_entry : loop_entries) { + const auto& port = loop_entry.expr_port; + loop_entry.data_size = static_cast(port->get_expr()->get_node()->get_input_element_type(port->get_index()).size()); } - for (auto& loop_output : loop_outputs) { - const auto& port = loop_output.expr_port; - loop_output.data_size = static_cast(port->get_expr()->get_node()->get_output_element_type(port->get_index()).size()); + for (auto& loop_exit : loop_exits) { + const auto& port = loop_exit.expr_port; + loop_exit.data_size = static_cast(port->get_expr()->get_node()->get_output_element_type(port->get_index()).size()); } + loop_info->set_entry_points(loop_entries); + loop_info->set_exit_points(loop_exits); } bool InitLoops::run(LinearIR& linear_ir) { @@ -105,9 +117,9 @@ bool InitLoops::run(LinearIR& linear_ir) { const auto& loops = loop_manager->get_map(); for (const auto& loop : loops) { const auto loop_info = loop.second; - init_ptr_increments(loop_info->entry_points, loop_info->exit_points, loop_info->work_amount); - init_finalization_offsets(loop_info->entry_points, loop_info->exit_points, loop_info->work_amount); - init_element_type_sizes(loop_info->entry_points, loop_info->exit_points); + init_ptr_increments(loop_info); + init_finalization_offsets(loop_info); + init_element_type_sizes(loop_info); } return true; diff --git a/src/common/snippets/src/lowered/pass/insert_buffers.cpp b/src/common/snippets/src/lowered/pass/insert_buffers.cpp index aefaca42f4094e..d2f8bba9074c0b 100644 --- a/src/common/snippets/src/lowered/pass/insert_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/insert_buffers.cpp @@ -61,12 +61,12 @@ ov::Shape compute_allocation_shape(const LinearIR::LoopManagerPtr& loop_manager, // TODO: Use general logic with the help of memory counts for allocation shape computation if (buffer_loop_ids.back() == parent_loop_ids.back()) { const auto buffer_loop = loop_manager->get_loop_info(buffer_loop_ids.back()); - *(allocation_shape.rbegin() + 1) = buffer_loop->increment; + *(allocation_shape.rbegin() + 1) = buffer_loop->get_increment(); set_rest_dims_to_ones(2); } else { for (size_t i = 0; i < std::min(rank, parent_loop_ids.size()); ++i) { const auto loop = loop_manager->get_loop_info(*(parent_loop_ids.rbegin() + i)); - *(allocation_shape.rbegin() + i) = loop->work_amount; + *(allocation_shape.rbegin() + i) = loop->get_work_amount(); } set_rest_dims_to_ones(static_cast(parent_loop_ids.size())); } @@ -275,8 +275,8 @@ bool InsertBuffers::run(LinearIR& linear_ir) { const auto loop_data_map = loop_manager->get_map(); for (const auto& loop_data : loop_data_map) { const auto loop_info = loop_data.second; - const auto loop_entries = loop_info->entry_points; - const auto loop_exits = loop_info->exit_points; + const auto loop_entries = loop_info->get_entry_points(); + const auto loop_exits = loop_info->get_exit_points(); // using begin() as expr_it because we work with LoopInfo, not expressions in Linear IR insertion(linear_ir, linear_ir.cbegin(), loop_manager, loop_entries, loop_exits); } diff --git a/src/common/snippets/src/lowered/pass/insert_loops.cpp b/src/common/snippets/src/lowered/pass/insert_loops.cpp index c8f381b0476730..3eab6e97df33fb 100644 --- a/src/common/snippets/src/lowered/pass/insert_loops.cpp +++ b/src/common/snippets/src/lowered/pass/insert_loops.cpp @@ -55,10 +55,10 @@ void InsertLoops::filter_ports(std::vector& loop_entries, std::vector< void InsertLoops::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, size_t loop_id, bool has_outer_loop) { const auto loop_info = loop_manager->get_loop_info(loop_id); - auto loop_entries = loop_info->entry_points; - auto loop_exits = loop_info->exit_points; - const auto work_amount = loop_info->work_amount; - const auto work_amount_increment = loop_info->increment; + auto loop_entries = loop_info->get_entry_points(); + auto loop_exits = loop_info->get_exit_points(); + const auto work_amount = loop_info->get_work_amount(); + const auto work_amount_increment = loop_info->get_increment(); LinearIR::constExprIt loop_begin_pos, loop_end_pos; loop_manager->get_loop_bounds(linear_ir, loop_id, loop_begin_pos, loop_end_pos); diff --git a/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp b/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp index 1feaa9056c2851..dac1f7bb029120 100644 --- a/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp +++ b/src/common/snippets/src/lowered/pass/insert_tail_loop.cpp @@ -23,7 +23,7 @@ void InsertTailLoop::propagate_updated_subtensor_through_loop(const LinearIR& li std::map original_shapes; // First step: set new dim value to the corresponding entry_points' dimensions if (new_dim_value != SIZE_MAX) { - for (const auto& port : loop_info->entry_points) { + for (const auto& port : loop_info->get_entry_points()) { if (port.is_incremented) { const auto& expr = port.expr_port->get_expr(); const auto node = expr->get_node(); @@ -98,7 +98,7 @@ void InsertTailLoop::propagate_updated_subtensor_through_loop(const LinearIR& li // The corresponding shapes of inner loops entry points must be updated using existing subtensor values if (new_dim_value == SIZE_MAX) { - for (const auto& port : loop_info->entry_points) + for (const auto& port : loop_info->get_entry_points()) update_only_dim_idx_with_subtensor_value(port); } propagate_updated_subtensor_through_loop(linear_ir, inner_loop_info, inner_begin, inner_end); @@ -131,34 +131,22 @@ LinearIR::container InsertTailLoop::copy_loop(const LinearIR& linear_ir, const s LinearIR::constExprIt loop_begin_pos, loop_end_pos; loop_manager->get_loop_bounds(linear_ir, loop_id, loop_begin_pos, loop_end_pos, true); ExressionMap expression_map; - auto loop_copy_range = LinearIR::deep_copy_range(loop_begin_pos, std::next(loop_end_pos), expression_map); - - auto update_loop_ports = [](const ExpressionPtr& expr, - const ExpressionPtr& tail_expr, - std::vector& ports) { - auto find_if_predicate = [&](const LinearIR::LoopManager::LoopPort& port) { - return port.expr_port->get_expr()->get_node() == expr->get_node(); - }; - auto pos = std::find_if(ports.begin(), ports.end(), find_if_predicate); - while (pos != ports.end()) { - pos->expr_port = std::make_shared(tail_expr, pos->expr_port->get_type(), pos->expr_port->get_index()); - pos = std::find_if(pos, ports.end(), find_if_predicate); - } - }; + const auto& loop_copy_range = LinearIR::deep_copy_range(loop_begin_pos, std::next(loop_end_pos), expression_map); const auto original_loop_info = loop_manager->get_loop_info(loop_id); - auto new_entry_points = original_loop_info->entry_points; - auto new_exit_points = original_loop_info->exit_points; + std::vector new_entry_points, new_exit_points; + // Clone loop ports from original loop info to new loop info + for (const auto& entry : original_loop_info->get_entry_points()) + new_entry_points.push_back(*entry.clone_with_new_expr(expression_map[entry.expr_port->get_expr().get()])); + for (const auto& exit : original_loop_info->get_exit_points()) + new_exit_points.push_back(*exit.clone_with_new_expr(expression_map[exit.expr_port->get_expr().get()])); + for (const auto& elem : expression_map) { const auto expr = elem.first->shared_from_this(); const auto& new_expr = elem.second; // Loop begin/end ops can't be loop ports if (ov::is_type(expr->get_node())) continue; - // Clone loop ports from original loop info to new loop info - update_loop_ports(expr, new_expr, new_entry_points); - update_loop_ports(expr, new_expr, new_exit_points); - // Update loop info of all outer loops with new loop ports const auto outer_loop_ids = LinearIR::LoopManager::get_outer_expr_loops(expr, loop_id); for (size_t i = 0; i < expr->get_input_count(); ++i) @@ -169,13 +157,14 @@ LinearIR::container InsertTailLoop::copy_loop(const LinearIR& linear_ir, const s const auto new_loop_begin_pos = loop_copy_range.begin(); const auto new_loop_end_pos = loop_copy_range.end(); - const auto new_id = loop_manager->mark_loop_with_old_loop_replacement(std::next(new_loop_begin_pos), - std::prev(new_loop_end_pos), - original_loop_info->work_amount, - original_loop_info->increment, - new_entry_points, - new_exit_points, - loop_id); + const auto new_id = loop_manager->replace_with_new_loop(linear_ir, + std::next(new_loop_begin_pos), + std::prev(new_loop_end_pos), + original_loop_info->get_work_amount(), + original_loop_info->get_increment(), + new_entry_points, + new_exit_points, + loop_id); const auto loop_end = ov::as_type_ptr(std::prev(new_loop_end_pos)->get()->get_node()); OPENVINO_ASSERT(loop_end, "Cloned Loop does not contain LoopEnd op at the expected place."); loop_end->set_id(new_id); @@ -204,26 +193,29 @@ void InsertTailLoop::create_tail_loop(LinearIR& linear_ir, // Note: new loop body is inserted before the original loop // So new loop becomes a main vector loop, the original loop becomes tail loop + // This is done in such way to have original ops from the main body at the end: + // this allows us to conveniently interact with outer loops in further passes linear_ir.insert(begin, new_loop_range.begin(), new_loop_range.end()); - const auto new_vector_loop_wa = original_loop_info->work_amount - tail_size; - original_loop_info->work_amount = new_vector_loop_wa; + const auto new_vector_loop_wa = original_loop_info->get_work_amount() - tail_size; + original_loop_info->set_work_amount(new_vector_loop_wa); new_loop_end->set_work_amount(new_vector_loop_wa); - original_loop_info->outer_splited_loop = tail_loop_info->outer_splited_loop; + original_loop_info->set_outer_splited_loop(tail_loop_info->get_outer_splited_loop()); // Note that finalization offsets should be applied after the last iteration. // So if there is a tail, then we should apply offsets after it, but not now. new_loop_end->set_finalization_offsets(std::vector(loop_end->get_finalization_offsets().size(), 0)); } loop_end->set_increment(tail_size); loop_end->set_work_amount(tail_size); - tail_loop_info->increment = tail_size; - tail_loop_info->work_amount = tail_size; + tail_loop_info->set_increment(tail_size); + tail_loop_info->set_work_amount(tail_size); // We have to check the loop body for any nested loops that work on the same dimension // and rescale their work_amount and increment accordingly - if (original_loop_info->outer_splited_loop) { + if (original_loop_info->get_outer_splited_loop()) { const auto current_dim_idx = original_loop_info->get_dim_idx(); - OPENVINO_ASSERT(current_dim_idx != SIZE_MAX, "Outer splitted loop unexpectedly iterates by several dimension indices"); + OPENVINO_ASSERT(current_dim_idx != LinearIR::LoopManager::LoopInfo::UNDEFINED_DIM_IDX, + "Outer splitted loop unexpectedly iterates by several dimension indices"); for (auto it = std::next(begin); it != std::prev(end); ++it) { const auto& expr = *it; const auto inner_loop_end = ov::as_type_ptr(expr->get_node()); @@ -337,9 +329,9 @@ bool InsertTailLoop::run(LinearIR& linear_ir) { continue; const auto loop_info = loop_manager->get_loop_info(loop_end->get_id()); - if (loop_info->fst_iter_handler) { - modified |= loop_info->fst_iter_handler(linear_ir, expr_it); - continue; + const auto& first_iter_handler = loop_info->get_first_iter_handler(); + if (first_iter_handler) { + modified |= first_iter_handler(linear_ir, expr_it); } const auto work_amount = loop_end->get_work_amount(); diff --git a/src/common/snippets/src/lowered/pass/split_loops.cpp b/src/common/snippets/src/lowered/pass/split_loops.cpp index d1bffd669c5769..ba036eca8011f9 100644 --- a/src/common/snippets/src/lowered/pass/split_loops.cpp +++ b/src/common/snippets/src/lowered/pass/split_loops.cpp @@ -15,6 +15,7 @@ namespace snippets { namespace lowered { namespace pass { using LoopManager = LinearIR::LoopManager; +using LoopInfo = LoopManager::LoopInfo; using LoopInfoPtr = LoopManager::LoopInfoPtr; SplitLoops::SplitLoops() : Pass() {} @@ -22,8 +23,8 @@ SplitLoops::SplitLoops() : Pass() {} bool SplitLoops::can_be_split(const LoopInfoPtr& current, const LoopInfoPtr& parent) { const auto current_dim_idx = current->get_dim_idx(); const auto parent_dim_idx = parent->get_dim_idx(); - const bool equal_dim_idxes = current_dim_idx != SIZE_MAX && current_dim_idx == parent_dim_idx; - return current->work_amount == parent->work_amount && current->increment != parent->increment && equal_dim_idxes; + const bool equal_dim_idxes = current_dim_idx != LoopInfo::UNDEFINED_DIM_IDX && current_dim_idx == parent_dim_idx; + return current->get_work_amount() == parent->get_work_amount() && current->get_increment() != parent->get_increment() && equal_dim_idxes; } bool SplitLoops::run(LinearIR& linear_ir) { @@ -44,7 +45,7 @@ bool SplitLoops::run(LinearIR& linear_ir) { // be in the same set of outer loops. Otherwise they won't be fused. const auto& loop_id = loop_ids.front(); const auto loop = loop_manager->get_loop_info(loop_id); - for (const auto& entry_point : loop->entry_points) { + for (const auto& entry_point : loop->get_entry_points()) { const auto& parent_port = entry_point.expr_port->get_port_connector_ptr()->get_source(); const auto& parent_expr = parent_port.get_expr(); const auto parent_loop_ids = parent_expr->get_loop_ids(); @@ -60,27 +61,27 @@ bool SplitLoops::run(LinearIR& linear_ir) { const auto parent_loop = loop_manager->get_loop_info(parent_loop_id); if (can_be_split(loop, parent_loop)) { loop_was_split = true; - const bool split_parent = parent_loop->increment < loop->increment; + const bool split_parent = parent_loop->get_increment() < loop->get_increment(); const auto& loop_to_split = split_parent ? parent_loop : loop; const auto& loop_to_split_id = split_parent ? parent_loop_id : loop_id; const auto& loop_to_fuse = !split_parent ? parent_loop : loop; - loop_to_split->work_amount = loop_to_fuse->increment; + loop_to_split->set_work_amount(loop_to_fuse->get_increment()); LinearIR::constExprIt loop_begin_pos, loop_end_pos; LoopManager::get_loop_bounds(linear_ir, - loop_to_split->entry_points, - loop_to_split->exit_points, + loop_to_split->get_entry_points(), + loop_to_split->get_exit_points(), loop_begin_pos, loop_end_pos, loop_to_split_id); const auto split_loop_id = loop_manager->mark_loop(loop_begin_pos, loop_end_pos, - loop_to_fuse->work_amount, - loop_to_fuse->increment, + loop_to_fuse->get_work_amount(), + loop_to_fuse->get_increment(), loop_to_split->get_dim_idx(), - loop_to_split->entry_points, - loop_to_split->exit_points); - loop_manager->get_loop_info(split_loop_id)->outer_splited_loop = true; + loop_to_split->get_entry_points(), + loop_to_split->get_exit_points()); + loop_manager->get_loop_info(split_loop_id)->set_outer_splited_loop(true); break; } } diff --git a/src/common/snippets/src/lowered/pass/validate_loops.cpp b/src/common/snippets/src/lowered/pass/validate_loops.cpp index ec1d6a0cddeba9..c652f51585c010 100644 --- a/src/common/snippets/src/lowered/pass/validate_loops.cpp +++ b/src/common/snippets/src/lowered/pass/validate_loops.cpp @@ -40,8 +40,8 @@ bool ValidateLoops::run(LinearIR& linear_ir) { std::vector dim_indexes; - auto validate_loop_ports = [&loop_manager, &dim_indexes, &validated_nested_loops, &is_already_verified](std::vector& loop_ports) { - for (auto& loop_port : loop_ports) { + auto validate_loop_ports = [&loop_manager, &dim_indexes, &validated_nested_loops, &is_already_verified](const std::vector& loop_ports) { + for (const auto& loop_port : loop_ports) { const auto expr = loop_port.expr_port->get_expr(); const auto loop_ids = expr->get_loop_ids(); // If loop_ids of the current port is subsequence of already validated IDs, skip @@ -54,15 +54,15 @@ bool ValidateLoops::run(LinearIR& linear_ir) { for (size_t i = 0; i < loop_ids.size(); ++i) { const auto id = loop_ids[i]; const auto dim_idx = loop_manager->get_loop_info(id)->get_dim_idx(); - // if the loop has different dimension indexes, it don't have to meet the next requirements - if (dim_idx == SIZE_MAX) + // if the loop has different dimension indexes, it don't have to meet the split loop related requirements + if (dim_idx == LinearIR::LoopManager::LoopInfo::UNDEFINED_DIM_IDX) continue; if (std::find(dim_indexes.cbegin(), dim_indexes.cend(), dim_idx) != dim_indexes.cend()) { OPENVINO_ASSERT(*dim_indexes.rbegin() == dim_idx, "Incorrect Loop ID configuration: the Loops with splitted dimension should be successively nested"); - OPENVINO_ASSERT(loop_manager->get_loop_info(loop_ids[i - 1])->increment == loop_manager->get_loop_info(id)->work_amount, + OPENVINO_ASSERT(loop_manager->get_loop_info(loop_ids[i - 1])->get_increment() == loop_manager->get_loop_info(id)->get_work_amount(), "Incorrect Loop ID configuration: the Loops with splitted dimension should be successively nested"); - OPENVINO_ASSERT(loop_manager->get_loop_info(loop_ids[i - 1])->outer_splited_loop, + OPENVINO_ASSERT(loop_manager->get_loop_info(loop_ids[i - 1])->get_outer_splited_loop(), "Incorrect Loop ID configuration: the outer Loop with splitted dimension should have `outer_splited_loop=True`"); } dim_indexes.push_back(dim_idx); @@ -71,10 +71,32 @@ bool ValidateLoops::run(LinearIR& linear_ir) { } }; + auto add_ports_dims_to_unique_dims = [](const std::vector& loop_ports, std::set& unique_dims) { + for (const auto& loop_port : loop_ports) { + if (!loop_port.is_incremented) + continue; + const auto& desc = loop_port.expr_port->get_descriptor_ptr(); + const auto& shape = desc->get_shape(); + const auto& layout = desc->get_layout(); + const auto& dim = shape[*(layout.rbegin() + loop_port.dim_idx)]; + // Since dim == 1 can be broadcasted to any value, it's not necessary to add it to unique dims + if (dim != 1) + unique_dims.insert(dim); + } + }; + for (const auto& pair : loops) { const auto& loop_info = pair.second; - validate_loop_ports(loop_info->entry_points); - validate_loop_ports(loop_info->exit_points); + const auto& entry_points = loop_info->get_entry_points(); + const auto& exit_points = loop_info->get_exit_points(); + validate_loop_ports(entry_points); + validate_loop_ports(exit_points); + + std::set unique_dimensions; + add_ports_dims_to_unique_dims(entry_points, unique_dimensions); + add_ports_dims_to_unique_dims(exit_points, unique_dimensions); + OPENVINO_ASSERT(unique_dimensions.size() <= 1, + "Loop ports have incompatible dimensions, by which the loop iterates"); } return true; diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index 344ae36e314545..5571635ab0d2cb 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -427,6 +427,10 @@ void Subgraph::control_flow_transformations(lowered::LinearIR& linear_ir, const size_t vector_size = get_generator()->get_target_machine()->get_lanes(); const int32_t buffer_allocation_rank = static_cast(linear_ir.get_config().m_loop_depth); + // We have to call MarkLoops before backend markup passes + // because these passes can update subtensor but not insert Loop (e.g. when loop increment is equal to the corresponding dim) + // If MarkLoops is called on such LIR, it inserts Eltwise-like loops which might not reflect backend expectations + // It should be fixed by ticket 113666 lowered::pass::PassPipeline markup_pipeline; markup_pipeline.register_pass(vector_size); markup_pipeline.run(linear_ir); diff --git a/src/common/snippets/tests/src/lowered/pass/loop.cpp b/src/common/snippets/tests/src/lowered/pass/loop.cpp index 6bf74cbd53558a..1e29efa955d517 100644 --- a/src/common/snippets/tests/src/lowered/pass/loop.cpp +++ b/src/common/snippets/tests/src/lowered/pass/loop.cpp @@ -46,7 +46,7 @@ static void init_linear_ir(const std::vector& in_shapes, Linea loop_manager->mark_loop(expr_it, std::next(expr_it), inner_wa, inner_inc, 0, loop_entry_points, loop_exit_points); loop_manager->mark_loop(expr_it, std::next(expr_it), blocked_wa, blocked_inc, 1, loop_entry_points, loop_exit_points); const auto loop_id = loop_manager->mark_loop(expr_it, std::next(expr_it), outer_wa, outer_inc, 1, loop_entry_points, loop_exit_points); - loop_manager->get_loop_info(loop_id)->outer_splited_loop = true; + loop_manager->get_loop_info(loop_id)->set_outer_splited_loop(true); } static void init_pipeline(pass::PassPipeline& pass_pipeline) { diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp index e90b892605b87f..e660a945485f2a 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp @@ -783,12 +783,6 @@ BrgemmEmitter::BrgemmEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPt } init_out_scheduling_params(output_desc); - const auto& output_subtensor = output_desc->get_subtensor(); - const auto& input_0_subtensor = input_0_desc->get_subtensor(); - m_K = *input_0_subtensor.rbegin(); - m_M = *(output_subtensor.rbegin() + 1); - m_N = *output_subtensor.rbegin(); - auto brg0Prc = InferenceEngine::details::convertPrecision(brgemm_node->get_input_element_type(0)); auto brg1Prc = InferenceEngine::details::convertPrecision(brgemm_node->get_input_element_type(1)); bool brgWithAMX = brgemm_node->is_amx(); @@ -801,9 +795,20 @@ BrgemmEmitter::BrgemmEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPt m_with_comp = brgemm_node->is_with_compensations(); m_with_scratch = brgemm_node->is_with_scratchpad(); - m_brgCtx.M = m_M; - m_brgCtx.N = m_N; - m_brgCtx.K = m_K; + const auto& output_subtensor = output_desc->get_subtensor(); + const auto& input_0_subtensor = input_0_desc->get_subtensor(); + const auto& input_1_subtensor = input_1_desc->get_subtensor(); + + OPENVINO_ASSERT(*(output_subtensor.rbegin() + 1) == *(input_0_subtensor.rbegin() + 1), + "Brgemm has different M dimension subtensors on input0 and output"); + OPENVINO_ASSERT(*output_subtensor.rbegin() == *input_1_subtensor.rbegin(), + "Brgemm has different N dimension subtensors on input1 and output"); + OPENVINO_ASSERT(*input_0_subtensor.rbegin() == *(input_1_subtensor.rbegin() + 1), + "Brgemm has different K dimension subtensors on input0 and input1"); + + m_brgCtx.M = *(output_subtensor.rbegin() + 1); + m_brgCtx.N = *output_subtensor.rbegin(); + m_brgCtx.K = *input_0_subtensor.rbegin(); m_brgCtx.LDA = leading_dimensions[0]; m_brgCtx.LDB = leading_dimensions[1]; m_brgCtx.LDC = leading_dimensions[2]; diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.hpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.hpp index 3689f07e67836a..a89a398605ffa7 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.hpp @@ -396,10 +396,6 @@ class BrgemmEmitter : public jit_emitter { brgemmCtx m_brgCtx; std::unique_ptr m_brgKernel = nullptr; - size_t m_M = 0lu; - size_t m_K = 0lu; - size_t m_N = 0lu; - bool m_with_scratch = false; bool m_with_comp = false; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp index 76c69a831af276..5895ba4487b413 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp @@ -17,7 +17,7 @@ BrgemmCPU::BrgemmCPU(const Output& A, const Output& B, const Type ty const size_t offset_a, const size_t offset_b, const size_t offset_c, std::vector layout_a, std::vector layout_b, std::vector layout_c, const size_t blk_size_m, const size_t blk_size_k, const size_t blk_size_n, const float beta) - : Brgemm(), m_type(type) { + : Brgemm(), m_type(type), m_beta(beta) { // We call default ctor of Brgemm class to avoid incorrect shape infer in constructor_validate_and_type_infer() call set_arguments({A, B}); set_output_size(1); diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp index 797afeb3e4c9e7..ee34b2f9076abe 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp @@ -38,8 +38,8 @@ bool BrgemmBlocking::run(LinearIR& linear_ir) { const auto& loop_ids = brgemm_expr->get_loop_ids(); for (const auto& id : loop_ids) { const auto loop = loop_manager->get_loop_info(id); - if (std::any_of(loop->entry_points.begin(), loop->entry_points.end(), check_port) || - std::any_of(loop->exit_points.begin(), loop->exit_points.end(), check_port)) { + if (std::any_of(loop->get_entry_points().begin(), loop->get_entry_points().end(), check_port) || + std::any_of(loop->get_exit_points().begin(), loop->get_exit_points().end(), check_port)) { return true; } } @@ -53,27 +53,27 @@ bool BrgemmBlocking::run(LinearIR& linear_ir) { if (!brgemm || blocking_loop_exists(brgemm_expr, brgemm)) continue; - const auto& input_0_desc = brgemm_expr->get_input_port_descriptor(0); - const auto& input_1_desc = brgemm_expr->get_input_port_descriptor(1); - const auto& output_desc = brgemm_expr->get_output_port_descriptor(0); + const auto& in_0_desc = brgemm_expr->get_input_port_descriptor(0); + const auto& in_1_desc = brgemm_expr->get_input_port_descriptor(1); + const auto& out_desc = brgemm_expr->get_output_port_descriptor(0); - auto input_0_subtensor = input_0_desc->get_subtensor(); - auto input_1_subtensor = input_1_desc->get_subtensor(); - auto output_subtensor = output_desc->get_subtensor(); + const auto& in_0_planar_dims = ov::snippets::utils::get_planar_vdims(in_0_desc->get_shape(), in_0_desc->get_layout()); + const auto& in_1_planar_dims = ov::snippets::utils::get_planar_vdims(in_1_desc->get_shape(), in_1_desc->get_layout()); + const auto& out_preordered_dims = ov::snippets::utils::get_preordered_vdims(out_desc->get_shape(), out_desc->get_layout()); - auto apply_m_blocking = [&]() { - const auto& output_shape = output_desc->get_shape(); - const auto& output_layout = output_desc->get_layout(); + auto in_0_subtensor = in_0_desc->get_subtensor(); + auto in_1_subtensor = in_1_desc->get_subtensor(); + auto out_subtensor = out_desc->get_subtensor(); - const auto& m_idx = *(output_layout.rbegin() + 1); - const auto& m = output_shape[m_idx]; + auto apply_m_blocking = [&]() { + const auto& m = *(out_preordered_dims.rbegin() + 1); const auto block_size_m = brgemm->get_m_block_size(); if (block_size_m >= m) { - *(input_0_subtensor.rbegin() + 1) = m; - *(output_subtensor.rbegin() + 1) = m; + *(in_0_subtensor.rbegin() + 1) = m; + *(out_subtensor.rbegin() + 1) = m; } else { - *(input_0_subtensor.rbegin() + 1) = block_size_m; - *(output_subtensor.rbegin() + 1) = block_size_m; + *(in_0_subtensor.rbegin() + 1) = block_size_m; + *(out_subtensor.rbegin() + 1) = block_size_m; std::vector entries{LoopPort(brgemm_expr->get_input_port(0), true), LoopPort(brgemm_expr->get_input_port(1), false)}; @@ -85,18 +85,14 @@ bool BrgemmBlocking::run(LinearIR& linear_ir) { }; auto apply_n_blocking = [&]() { - const auto& output_shape = output_desc->get_shape(); - const auto& output_layout = output_desc->get_layout(); - - const auto& n_idx = *output_layout.rbegin(); - const auto& n = output_shape[n_idx]; + const auto& n = *out_preordered_dims.rbegin(); const auto block_size_n = brgemm->get_n_block_size(); if (block_size_n >= n) { - *input_1_subtensor.rbegin() = n; - *output_subtensor.rbegin() = n; + *in_1_subtensor.rbegin() = n; + *out_subtensor.rbegin() = n; } else { - *input_1_subtensor.rbegin() = block_size_n; - *output_subtensor.rbegin() = block_size_n; + *in_1_subtensor.rbegin() = block_size_n; + *out_subtensor.rbegin() = block_size_n; std::vector entries{LoopPort(brgemm_expr->get_input_port(0), false), LoopPort(brgemm_expr->get_input_port(1), true)}; @@ -111,18 +107,15 @@ bool BrgemmBlocking::run(LinearIR& linear_ir) { }; auto apply_k_blocking = [&]() { - const auto& input_shape_0 = input_0_desc->get_shape(); - const auto& input_layout_0 = input_0_desc->get_layout(); - - const auto& k_idx = *input_layout_0.rbegin(); - const auto& k = input_shape_0[k_idx]; + const auto& k = *in_0_planar_dims.rbegin(); + OPENVINO_ASSERT(k == *(in_1_planar_dims.rbegin() + 1), "Brgemm input descriptors have different K dimension value."); const auto block_size_k = brgemm->get_k_block_size(); if (block_size_k >= k) { - *input_0_subtensor.rbegin() = k; - *(input_1_subtensor.rbegin() + 1) = k; + *in_0_subtensor.rbegin() = k; + *(in_1_subtensor.rbegin() + 1) = k; } else { - *input_0_subtensor.rbegin() = block_size_k; - *(input_1_subtensor.rbegin() + 1) = block_size_k; + *in_0_subtensor.rbegin() = block_size_k; + *(in_1_subtensor.rbegin() + 1) = block_size_k; std::vector entries{LoopPort(brgemm_expr->get_input_port(0), true, 0), LoopPort(brgemm_expr->get_input_port(1), true, 1)}; @@ -132,42 +125,38 @@ bool BrgemmBlocking::run(LinearIR& linear_ir) { auto loop_id = loop_manager->mark_loop(expr_it, std::next(expr_it), k, block_size_k, entries, exits); const auto loop_info = loop_manager->get_loop_info(loop_id); - auto first_iter_handler = [](LinearIR& linear_ir, LinearIR::constExprIt expr_it) { - const auto loop_end = ov::as_type_ptr(expr_it->get()->get_node()); + auto first_iter_handler = [](LinearIR& linear_ir, LinearIR::constExprIt loop_end_it) { + const auto loop_end = ov::as_type_ptr(loop_end_it->get()->get_node()); OPENVINO_ASSERT(loop_end, "First loop iteraton handler must be called on LoopEnd expression"); const auto loop_id = loop_end->get_id(); const auto& loop_manager = linear_ir.get_loop_manager(); const auto& loop_info = loop_manager->get_loop_info(loop_id); - const auto work_amount = loop_info->work_amount; - const auto increment = loop_info->increment; + const auto work_amount = loop_info->get_work_amount(); + const auto increment = loop_info->get_increment(); if (work_amount <= increment) return false; auto new_loop_range = snippets::lowered::pass::InsertTailLoop::copy_loop(linear_ir, loop_id); - const auto new_loop_end = ov::as_type_ptr(std::prev(new_loop_range.end())->get()->get_node()); - auto new_loop_info = loop_manager->get_loop_info(new_loop_end->get_id()); + const auto firt_iter_loop_end = ov::as_type_ptr(std::prev(new_loop_range.end())->get()->get_node()); + auto first_iter_loop_info = loop_manager->get_loop_info(firt_iter_loop_end->get_id()); + firt_iter_loop_end->set_work_amount(increment); + first_iter_loop_info->set_work_amount(increment); + firt_iter_loop_end->set_finalization_offsets(std::vector(loop_end->get_finalization_offsets().size(), 0)); + + const auto loop_begin_it = linear_ir.find(linear_ir.get_expr_by_node(loop_end->get_loop_begin())); + linear_ir.insert(loop_begin_it, new_loop_range.begin(), new_loop_range.end()); + const auto new_work_amount = work_amount - increment; - new_loop_end->set_work_amount(new_work_amount); - new_loop_info->work_amount = new_work_amount; - for (const auto& expr : new_loop_range) { - if (const auto brgemm = ov::as_type_ptr(expr->get_node())) { + loop_info->set_work_amount(new_work_amount); + loop_end->set_work_amount(new_work_amount); + + // Update original body's Brgemms with new beta parameter + for (auto expr_it = loop_begin_it; expr_it != loop_end_it; ++expr_it) { + const auto& expr_node = expr_it->get()->get_node(); + if (const auto brgemm = ov::as_type_ptr(expr_node)) { brgemm->set_beta(1.f); } } - - linear_ir.insert(std::next(expr_it), new_loop_range.begin(), new_loop_range.end()); - - loop_info->work_amount = increment; - loop_end->set_work_amount(increment); - loop_end->set_finalization_offsets(std::vector(loop_end->get_finalization_offsets().size(), 0)); - const auto begin_it = linear_ir.find(linear_ir.get_expr_by_node(new_loop_end->get_loop_begin())); - const auto end_it = linear_ir.find(linear_ir.get_expr_by_node(new_loop_end)); - snippets::lowered::pass::InsertTailLoop::propagate_updated_subtensor_through_loop( - linear_ir, - new_loop_info, - std::next(begin_it), - end_it, - increment); return true; }; loop_info->set_first_iter_handler(first_iter_handler); @@ -178,10 +167,9 @@ bool BrgemmBlocking::run(LinearIR& linear_ir) { apply_n_blocking(); apply_m_blocking(); - brgemm_expr->get_input_port_descriptor(0)->set_subtensor(input_0_subtensor); - brgemm_expr->get_input_port_descriptor(1)->set_subtensor(input_1_subtensor); - brgemm_expr->get_output_port_descriptor(0)->set_subtensor(output_subtensor); - + brgemm_expr->get_input_port_descriptor(0)->set_subtensor(in_0_subtensor); + brgemm_expr->get_input_port_descriptor(1)->set_subtensor(in_1_subtensor); + brgemm_expr->get_output_port_descriptor(0)->set_subtensor(out_subtensor); modified = true; } diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/set_brgemm_copy_b_buffers_shape.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/set_brgemm_copy_b_buffers_shape.hpp index 8475958a8fffcf..c7eec92700a16a 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/set_brgemm_copy_b_buffers_shape.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/set_brgemm_copy_b_buffers_shape.hpp @@ -13,7 +13,7 @@ namespace pass { /** * @interface SetBrgemmCopyBBuffersShape * @brief Sets the allocation shape for the Buffers after BrgemmCopyB node using BrgemmCopyB parameters - * This pass is a workaround until we have Buffer memory allocation based on subtensors + * This pass may be deprecated when a more generic memory management approach is introduced. * Ticket: 113744 * @ingroup snippets */ diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/set_brgemm_cpu_blocking_params.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/set_brgemm_cpu_blocking_params.cpp index 6ea13889d59f79..bd87737ed2c96e 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/set_brgemm_cpu_blocking_params.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/set_brgemm_cpu_blocking_params.cpp @@ -19,7 +19,6 @@ #include "cpu_shape.h" #include "utils/general_utils.h" - namespace ov { namespace intel_cpu { pass::SetBrgemmCPUBlockingParams::SetBrgemmCPUBlockingParams() {