diff --git a/src/common/snippets/include/snippets/lowered/port_descriptor.hpp b/src/common/snippets/include/snippets/lowered/port_descriptor.hpp index 3fc429bec4df1e..926136abf0e8f8 100644 --- a/src/common/snippets/include/snippets/lowered/port_descriptor.hpp +++ b/src/common/snippets/include/snippets/lowered/port_descriptor.hpp @@ -54,6 +54,9 @@ class PortDescriptor { void set_reg_type(RegType type) { m_reg.type = type; } void set_reg_idx(size_t idx) { m_reg.idx = idx; } + // Indexing starts from the end (rbegin() + idx) + void set_subtensor_value(size_t idx, VectorDims::value_type value); + std::string serialize() const; bool empty() const { return m_layout.empty() && m_subtensor_shape.empty();} PortDescriptorPtr clone() const; diff --git a/src/common/snippets/src/lowered/pass/propagate_subtensors.cpp b/src/common/snippets/src/lowered/pass/propagate_subtensors.cpp index 168bcf522585a1..ef64c4205b901a 100644 --- a/src/common/snippets/src/lowered/pass/propagate_subtensors.cpp +++ b/src/common/snippets/src/lowered/pass/propagate_subtensors.cpp @@ -18,11 +18,11 @@ namespace { // The algorithm uses the following special values in subtensors/shapes: // 1. Dynamic value in subtensor/shape : SIZE_MAX -// 2. Full fimension in subtensor : SIZE_MAX - 1 +// 2. Full dimension in subtensor : SIZE_MAX - 1 // 3. Default value of `new_dim_value` : SIZE_MAX - 2 // 4. `Forced` special dynamic value : SIZE_MAX - 3 // -// We have to introduce `SPECIAL_DYNAMIC_VALUE` to distinguish `new_dim_value = DYNAMIC` +// We have to introduce `FORCED_DYNAMIC_VALUE` to distinguish `new_dim_value = DYNAMIC` // from the real dynamic values in subtensors and shapes and force this value in subtensors. // For example, there is Brgemm with the following info in the tail Loop: // Input 0: shape [?, ?], existing subtensor [32, FULL_DIM] @@ -36,7 +36,7 @@ namespace { // 3. Update subtensor on output using shape: // new_subtensor[i] = std::min(planar_shape[i], subtensor[i]); // i = 0: std::min(SIZE_MAX(?), 32) // new subtensor [32, FULL_DIM] - has not been changed! But should be [?, FULL_DIM] -// Conculsion: we have to distinguish forced dynamic value with existing dynamic values in shape and subtensor +// Conclusion: we have to distinguish forced dynamic value with existing dynamic values in shape and subtensor constexpr size_t NEW_DEFAULT_VALUE = SIZE_MAX - 2; constexpr size_t FORCED_DYNAMIC_VALUE = SIZE_MAX - 3; @@ -61,9 +61,8 @@ void propagate_updated_subtensor_through_loop(const LinearIR& linear_ir, const auto& expr = port.expr_port->get_expr(); const auto& desc = port.expr_port->get_descriptor_ptr(); auto subtensor = desc->get_subtensor(); - if (port.dim_idx < subtensor.size()) { - *(subtensor.rbegin() + port.dim_idx) = new_dim_value; - desc->set_subtensor(subtensor); + if (port.dim_idx < desc->get_subtensor().size()) { + desc->set_subtensor_value(port.dim_idx, new_dim_value); } const auto parent_desc = expr->get_input_port_connector(port.expr_port->get_index())->get_source().get_descriptor_ptr(); diff --git a/src/common/snippets/src/lowered/port_descriptor.cpp b/src/common/snippets/src/lowered/port_descriptor.cpp index 522f03367f51c1..979f3120a68476 100644 --- a/src/common/snippets/src/lowered/port_descriptor.cpp +++ b/src/common/snippets/src/lowered/port_descriptor.cpp @@ -54,6 +54,11 @@ void PortDescriptor::set_shape(const VectorDims& tensor) { *m_tensor_shape = tensor; } +void PortDescriptor::set_subtensor_value(size_t idx, VectorDims::value_type value) { + OPENVINO_ASSERT(idx < m_subtensor_shape.size(), "Failed to set subtensor value: idx should be less than size"); + *(m_subtensor_shape.rbegin() + idx) = value; +} + PortDescriptorPtr PortDescriptor::clone() const { auto desc = std::make_shared(*m_tensor_shape, m_subtensor_shape, m_layout); desc->set_reg(m_reg); diff --git a/src/common/snippets/src/runtime_configurator.cpp b/src/common/snippets/src/runtime_configurator.cpp index c0725c4f5b38aa..971e7a03858fcf 100644 --- a/src/common/snippets/src/runtime_configurator.cpp +++ b/src/common/snippets/src/runtime_configurator.cpp @@ -186,7 +186,7 @@ void RuntimeConfigurator::update_loop_info(const std::shared_ptrget_work_amount(); if (expanded_loop_info->is_evaluate_once()) { - expanded_loop_info->update_ptr_increments(std::vector(ptr_increments.size(), 0)); + // Update only `finalization offsets`. `Ptr increments` are always zeroed in this case auto updated_finalization_offsets = current_work_amount > 0 ? std::vector(finalization_offsets.size(), 0) : finalization_offsets; // work_amount is equal to increment in cases with `evaluate_once` for (size_t i = 0; i < updated_finalization_offsets.size(); ++i) diff --git a/src/common/snippets/tests/include/lir_test_utils.hpp b/src/common/snippets/tests/include/lir_test_utils.hpp index ebe2e63cc2e66f..b653c86af8ab0b 100644 --- a/src/common/snippets/tests/include/lir_test_utils.hpp +++ b/src/common/snippets/tests/include/lir_test_utils.hpp @@ -44,40 +44,6 @@ void init_expr_descriptors(const ov::snippets::lowered::ExpressionPtr& expr, const std::vector& subtensors = {}, const std::vector& layouts = {}); -/** - * @brief Creates unified loop info based on provided entry and exit points, and adds it to the linear_ir's loops map - * @attention This helper wraps LoopManager::mark_loop method, but only for LoopInfo creation (whereas original - * mark_loop method also marks expressions with the corresponding loop info). - * @param linear_ir linear_ir in which loop info should be added - * @param entries entry points of loop - * @param exits exit points of loop - * @return ID of created loop - */ -size_t create_and_add_unified_loop_info(const std::shared_ptr& linear_ir, - size_t work_amount, - size_t increment, - const std::vector& entries, - const std::vector& exits, - bool add_default_handlers = true); -/** - * @brief Creates unified loop info based on provided entry and exit points, and adds it to the linear_ir's loops map. - * Meanwhile set loop id to expr range [loop_begin_pos, loop_end_pos). - * @attention This helper wraps LoopManager::mark_loop method, which also marks expressions with the corresponding loop info - * @param linear_ir linear_ir in which loop info should be added - * @param loop_begin_pos begin expr postion in this loop - * @param loop_end_pos end expr postion in this loop - * @param entries entry points of loop - * @param exits exit points of loop - * @return ID of created loop - */ -size_t create_and_add_unified_loop_info(const std::shared_ptr& linear_ir, - ov::snippets::lowered::LinearIR::constExprIt loop_begin_pos, - ov::snippets::lowered::LinearIR::constExprIt loop_end_pos, - size_t work_amount, - size_t increment, - const std::vector& entries, - const std::vector& exits, - bool add_default_handlers = true); } // namespace snippets } // namespace test } // namespace ov diff --git a/src/common/snippets/tests/src/lir_test_utils.cpp b/src/common/snippets/tests/src/lir_test_utils.cpp index 9b6a1c8f84af69..084a5a34a3bd55 100644 --- a/src/common/snippets/tests/src/lir_test_utils.cpp +++ b/src/common/snippets/tests/src/lir_test_utils.cpp @@ -85,28 +85,6 @@ void init_expr_descriptors(const ov::snippets::lowered::ExpressionPtr& expr, } } -size_t create_and_add_unified_loop_info(const LinearIRPtr& linear_ir, - size_t work_amount, - size_t increment, - const std::vector& entries, - const std::vector& exits, - bool set_default_handlers) { - // Equal begin and end iterators are set to avoid expressions marking with new loop id - return create_and_add_unified_loop_info(linear_ir, linear_ir->begin(), linear_ir->begin(), work_amount, increment, entries, exits, set_default_handlers); -} - -size_t create_and_add_unified_loop_info(const LinearIRPtr& linear_ir, - ov::snippets::lowered::LinearIR::constExprIt loop_begin_pos, - ov::snippets::lowered::LinearIR::constExprIt loop_end_pos, - size_t work_amount, - size_t increment, - const std::vector& entries, - const std::vector& exits, - bool set_default_handlers) { - const auto& loop_manager = linear_ir->get_loop_manager(); - return loop_manager->mark_loop(loop_begin_pos, loop_end_pos, work_amount, increment, entries, exits, set_default_handlers); -} - } // namespace snippets } // namespace test } // namespace ov diff --git a/src/common/snippets/tests/src/lowered/pass/extracted_loop_invariants.cpp b/src/common/snippets/tests/src/lowered/pass/extracted_loop_invariants.cpp index c3f4f5ea7f6877..ee762f4bfca746 100644 --- a/src/common/snippets/tests/src/lowered/pass/extracted_loop_invariants.cpp +++ b/src/common/snippets/tests/src/lowered/pass/extracted_loop_invariants.cpp @@ -66,11 +66,11 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsWithParams) { auto result = linear_ir->push_node(sub.second); auto begin = multiply.first; auto end = result.first; - create_and_add_unified_loop_info(linear_ir, begin, end, 512, vector_size, - {LoopPort((*multiply.first)->get_input_port(0)), - LoopPort((*multiply.first)->get_input_port(1)), - LoopPort((*sub.first)->get_input_port(0))}, - {LoopPort((*sub.first)->get_output_port(0))}); + linear_ir->get_loop_manager()->mark_loop(begin, end, 512, vector_size, + std::vector{LoopPort((*multiply.first)->get_input_port(0)), + LoopPort((*multiply.first)->get_input_port(1)), + LoopPort((*sub.first)->get_input_port(0))}, + std::vector{LoopPort((*sub.first)->get_output_port(0))}); linear_ir->set_loop_depth(1); } { @@ -85,10 +85,10 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsWithParams) { auto result = linear_ir_ref->push_node(sub.second); auto begin = sub.first; auto end = result.first; - create_and_add_unified_loop_info(linear_ir_ref, begin, end, 512, vector_size, - {LoopPort((*sub.first)->get_input_port(0)), - LoopPort((*sub.first)->get_input_port(1))}, - {LoopPort((*sub.first)->get_output_port(0))}); + linear_ir_ref->get_loop_manager()->mark_loop(begin, end, 512, vector_size, + std::vector{LoopPort((*sub.first)->get_input_port(0)), + LoopPort((*sub.first)->get_input_port(1))}, + std::vector{LoopPort((*sub.first)->get_output_port(0))}); } } @@ -124,10 +124,10 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsWithScalar) { auto result = linear_ir->push_node(sub.second); auto begin = scalar.first; auto end = result.first; - create_and_add_unified_loop_info(linear_ir, begin, end, 512, vector_size, - {LoopPort((*multiply.first)->get_input_port(0)), - LoopPort((*sub.first)->get_input_port(0))}, - {LoopPort((*sub.first)->get_output_port(0))}); + linear_ir->get_loop_manager()->mark_loop(begin, end, 512, vector_size, + std::vector{LoopPort((*multiply.first)->get_input_port(0)), + LoopPort((*sub.first)->get_input_port(0))}, + std::vector{LoopPort((*sub.first)->get_output_port(0))}); linear_ir->set_loop_depth(1); } { @@ -142,10 +142,10 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsWithScalar) { auto result = linear_ir_ref->push_node(sub.second); auto begin = sub.first; auto end = result.first; - create_and_add_unified_loop_info(linear_ir_ref, begin, end, 512, vector_size, - {LoopPort((*sub.first)->get_input_port(0)), - LoopPort((*sub.first)->get_input_port(1))}, - {LoopPort((*sub.first)->get_output_port(0))}); + linear_ir_ref->get_loop_manager()->mark_loop(begin, end, 512, vector_size, + std::vector{LoopPort((*sub.first)->get_input_port(0)), + LoopPort((*sub.first)->get_input_port(1))}, + std::vector{LoopPort((*sub.first)->get_output_port(0))}); } } @@ -187,20 +187,20 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsOutputLoopUpdateNotNeed auto result1 = linear_ir->push_node(sub.second); auto begin = multiply.first; auto end = result1.first; - create_and_add_unified_loop_info(linear_ir, begin, end, 16, vector_size, - {LoopPort((*multiply.first)->get_input_port(0), true, 0), - LoopPort((*multiply.first)->get_input_port(1), true, 0), - LoopPort((*add.first)->get_input_port(0), true, 0), - LoopPort((*sub.first)->get_input_port(0), true, 0)}, - {LoopPort((*add.first)->get_output_port(0), true, 0), - LoopPort((*sub.first)->get_output_port(0), true, 0)}); - create_and_add_unified_loop_info(linear_ir, begin, end, 3, 1, - {LoopPort((*multiply.first)->get_input_port(0), true, 1), - LoopPort((*multiply.first)->get_input_port(1), true, 1), - LoopPort((*add.first)->get_input_port(0), true, 1), - LoopPort((*sub.first)->get_input_port(0), true, 1)}, - {LoopPort((*add.first)->get_output_port(0), true, 1), - LoopPort((*sub.first)->get_output_port(0), true, 1)}); + linear_ir->get_loop_manager()->mark_loop(begin, end, 16, vector_size, + std::vector{LoopPort((*multiply.first)->get_input_port(0), true, 0), + LoopPort((*multiply.first)->get_input_port(1), true, 0), + LoopPort((*add.first)->get_input_port(0), true, 0), + LoopPort((*sub.first)->get_input_port(0), true, 0)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 0), + LoopPort((*sub.first)->get_output_port(0), true, 0)}); + linear_ir->get_loop_manager()->mark_loop(begin, end, 3, 1, + std::vector{LoopPort((*multiply.first)->get_input_port(0), true, 1), + LoopPort((*multiply.first)->get_input_port(1), true, 1), + LoopPort((*add.first)->get_input_port(0), true, 1), + LoopPort((*sub.first)->get_input_port(0), true, 1)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 1), + LoopPort((*sub.first)->get_output_port(0), true, 1)}); linear_ir->set_loop_depth(2); } { @@ -218,21 +218,21 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsOutputLoopUpdateNotNeed auto result1 = linear_ir_ref->push_node(sub.second); auto begin_inner = add.first; auto end_inner = result1.first; - create_and_add_unified_loop_info(linear_ir_ref, begin_inner, end_inner, 16, vector_size, - {LoopPort((*add.first)->get_input_port(0), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0), - LoopPort((*sub.first)->get_input_port(0), true, 0)}, - {LoopPort((*add.first)->get_output_port(0), true, 0), - LoopPort((*sub.first)->get_output_port(0), true, 0)}); + linear_ir_ref->get_loop_manager()->mark_loop(begin_inner, end_inner, 16, vector_size, + std::vector{LoopPort((*add.first)->get_input_port(0), true, 0), + LoopPort((*add.first)->get_input_port(1), true, 0), + LoopPort((*sub.first)->get_input_port(0), true, 0)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 0), + LoopPort((*sub.first)->get_output_port(0), true, 0)}); auto begin_outer = multiply.first; auto end_outer = result1.first; - create_and_add_unified_loop_info(linear_ir_ref, begin_outer, end_outer, 3, 1, - {LoopPort((*multiply.first)->get_input_port(0), true, 1), - LoopPort((*multiply.first)->get_input_port(1), true, 1), - LoopPort((*add.first)->get_input_port(0), true, 1), - LoopPort((*sub.first)->get_input_port(0), true, 1)}, - {LoopPort((*add.first)->get_output_port(0), true, 1), - LoopPort((*sub.first)->get_output_port(0), true, 1)}); + linear_ir_ref->get_loop_manager()->mark_loop(begin_outer, end_outer, 3, 1, + std::vector{LoopPort((*multiply.first)->get_input_port(0), true, 1), + LoopPort((*multiply.first)->get_input_port(1), true, 1), + LoopPort((*add.first)->get_input_port(0), true, 1), + LoopPort((*sub.first)->get_input_port(0), true, 1)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 1), + LoopPort((*sub.first)->get_output_port(0), true, 1)}); } } @@ -263,14 +263,14 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsFromInnermostToLoopOuts auto add = linear_ir->push_node(param_0.second, broadcastmove.second); init_expr_descriptors(*add.first, {subtensor, subtensor, subtensor}, {layout, layout, layout}); auto result = linear_ir->push_node(add.second); - create_and_add_unified_loop_info(linear_ir, broadcastmove.first, result.first, 3, 1, - {LoopPort((*broadcastmove.first)->get_input_port(0), true, 1), - LoopPort((*add.first)->get_input_port(0), true, 1)}, - {LoopPort((*add.first)->get_output_port(0), true, 1)}); - create_and_add_unified_loop_info(linear_ir, broadcastmove.first, result.first, 512, vector_size, - {LoopPort((*broadcastmove.first)->get_input_port(0), true, 0), - LoopPort((*add.first)->get_input_port(0), true, 0)}, - {LoopPort((*add.first)->get_output_port(0), true, 0)}); + linear_ir->get_loop_manager()->mark_loop(broadcastmove.first, result.first, 3, 1, + std::vector{LoopPort((*broadcastmove.first)->get_input_port(0), true, 1), + LoopPort((*add.first)->get_input_port(0), true, 1)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 1)}); + linear_ir->get_loop_manager()->mark_loop(broadcastmove.first, result.first, 512, vector_size, + std::vector{LoopPort((*broadcastmove.first)->get_input_port(0), true, 0), + LoopPort((*add.first)->get_input_port(0), true, 0)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 0)}); linear_ir->set_loop_depth(2); } { @@ -281,14 +281,14 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsFromInnermostToLoopOuts auto add = linear_ir_ref->push_node(param_0.second, broadcastmove.second); init_expr_descriptors(*add.first, {subtensor, subtensor, subtensor}, {layout, layout, layout}); auto result = linear_ir_ref->push_node(add.second); - create_and_add_unified_loop_info(linear_ir_ref, add.first, result.first, 3, 1, - {LoopPort((*add.first)->get_input_port(0), true, 1), - LoopPort((*add.first)->get_input_port(1), true, 1)}, - {LoopPort((*add.first)->get_output_port(0), true, 1)}); - create_and_add_unified_loop_info(linear_ir_ref, add.first, result.first, 512, vector_size, - {LoopPort((*add.first)->get_input_port(0), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0)}, - {LoopPort((*add.first)->get_output_port(0), true, 0)}); + linear_ir_ref->get_loop_manager()->mark_loop(add.first, result.first, 3, 1, + std::vector{LoopPort((*add.first)->get_input_port(0), true, 1), + LoopPort((*add.first)->get_input_port(1), true, 1)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 1)}); + linear_ir_ref->get_loop_manager()->mark_loop(add.first, result.first, 512, vector_size, + std::vector{LoopPort((*add.first)->get_input_port(0), true, 0), + LoopPort((*add.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 0)}); } } @@ -356,31 +356,31 @@ TEST_F(ExtractLoopInvariantsRemoveLoopsTest, ExtractedLoopInvariantsAllExprsInLo init_expr_descriptors(*multiply.first, {subtensor, subtensor, subtensor}, {layout, layout, layout}); auto result = linear_ir->push_node(multiply.second); // 3 inner loop - create_and_add_unified_loop_info(linear_ir, max.first, hmax.first, 1, vector_size, - {LoopPort((*max.first)->get_input_port(0), true, 0), - LoopPort((*max.first)->get_input_port(1), true, 0)}, - {LoopPort((*max.first)->get_output_port(0), true, 0)}); - create_and_add_unified_loop_info(linear_ir, sub.first, hsum.first, 1, vector_size, - {LoopPort((*sub.first)->get_input_port(0), true, 0), - LoopPort((*sub.first)->get_input_port(1), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0)}, - {LoopPort((*exp.first)->get_output_port(0), true, 0), - LoopPort((*add.first)->get_output_port(0), true, 0)}); - create_and_add_unified_loop_info(linear_ir, multiply.first, result.first, 1, vector_size, - {LoopPort((*multiply.first)->get_input_port(0), true, 0), - LoopPort((*multiply.first)->get_input_port(1), true, 0)}, - {LoopPort((*multiply.first)->get_output_port(0), true, 0)}); + linear_ir->get_loop_manager()->mark_loop(max.first, hmax.first, 1, vector_size, + std::vector{LoopPort((*max.first)->get_input_port(0), true, 0), + LoopPort((*max.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*max.first)->get_output_port(0), true, 0)}); + linear_ir->get_loop_manager()->mark_loop(sub.first, hsum.first, 1, vector_size, + std::vector{LoopPort((*sub.first)->get_input_port(0), true, 0), + LoopPort((*sub.first)->get_input_port(1), true, 0), + LoopPort((*add.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*exp.first)->get_output_port(0), true, 0), + LoopPort((*add.first)->get_output_port(0), true, 0)}); + linear_ir->get_loop_manager()->mark_loop(multiply.first, result.first, 1, vector_size, + std::vector{LoopPort((*multiply.first)->get_input_port(0), true, 0), + LoopPort((*multiply.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*multiply.first)->get_output_port(0), true, 0)}); // outer loop info const auto loop_begin = std::make_shared(); auto loop_begin_expr = linear_ir->insert_node(loop_begin, std::vector{}, {}, false, max.first); const auto loop_end = std::make_shared(); std::vector loop_end_inputs{(*loop_begin_expr)->get_output_port_connector(0)}; auto loop_end_expr = linear_ir->insert_node(loop_end, loop_end_inputs, {}, false, result.first); - create_and_add_unified_loop_info(linear_ir, loop_begin_expr, result.first, 10, 1, - {LoopPort((*max.first)->get_input_port(0), true, 1), - LoopPort((*max.first)->get_input_port(1), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0)}, - {LoopPort((*multiply.first)->get_output_port(0), true, 1)}); + linear_ir->get_loop_manager()->mark_loop(loop_begin_expr, result.first, 10, 1, + std::vector{LoopPort((*max.first)->get_input_port(0), true, 1), + LoopPort((*max.first)->get_input_port(1), true, 0), + LoopPort((*add.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*multiply.first)->get_output_port(0), true, 1)}); loop_end->set_id((*loop_end_expr)->get_loop_ids().back()); linear_ir->set_loop_depth(2); } @@ -409,11 +409,11 @@ TEST_F(ExtractLoopInvariantsRemoveLoopsTest, ExtractedLoopInvariantsAllExprsInLo const auto loop_end = std::make_shared(); std::vector loop_end_inputs{(*loop_begin_expr)->get_output_port_connector(0)}; auto loop_end_expr = linear_ir_ref->insert_node(loop_end, loop_end_inputs, {}, false, result.first); - create_and_add_unified_loop_info(linear_ir_ref, loop_begin_expr, result.first, 10, 1, - {LoopPort((*max.first)->get_input_port(0), true, 1), - LoopPort((*max.first)->get_input_port(1), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0)}, - {LoopPort((*multiply.first)->get_output_port(0), true, 1)}); + linear_ir_ref->get_loop_manager()->mark_loop(loop_begin_expr, result.first, 10, 1, + std::vector{LoopPort((*max.first)->get_input_port(0), true, 1), + LoopPort((*max.first)->get_input_port(1), true, 0), + LoopPort((*add.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*multiply.first)->get_output_port(0), true, 1)}); loop_end->set_id((*loop_end_expr)->get_loop_ids().back()); } } diff --git a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp index 1d3cbf022ed299..d062ab1b919ea0 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp @@ -51,7 +51,7 @@ void CPURuntimeConfigurator::initialization(const std::shared_ptrget_loop_info(loop_ids.front()); const auto& block_size_m = expanded_loop_info->get_work_amount(); - const auto& in_desc = brgemm_expr->get_input_port_descriptor(0); - const auto& out_desc = brgemm_expr->get_output_port_descriptor(0); - - auto in_subtensor = in_desc->get_subtensor(); - auto out_subtensor = out_desc->get_subtensor(); - *++in_subtensor.rbegin() = block_size_m; - *++out_subtensor.rbegin() = block_size_m; - in_desc->set_subtensor(in_subtensor); - out_desc->set_subtensor(out_subtensor); + brgemm_expr->get_input_port_descriptor(0)->set_subtensor_value(1, block_size_m); + brgemm_expr->get_output_port_descriptor(0)->set_subtensor_value(1, block_size_m); } } diff --git a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.hpp b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.hpp index b5ea975bb0c3c0..39ab1977f878d1 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.hpp @@ -42,16 +42,18 @@ class CPURuntimeConfigurator : public ov::snippets::RuntimeConfigurator { void init_tensor_rank(const std::shared_ptr& linear_ir) const override; /** * @brief Calculate Loop parameters of Loop emitters and update these values in CPURuntimeConfig - * @param linear_ir LinearIR + * @param loop_manager Loop Manager */ void update_loop_args(const ov::snippets::lowered::LoopManagerPtr& loop_manager) const; /** * @brief Update subtensors of Brgemms + * @param loop_manager Loop Manager */ void update_brgemms(const ov::snippets::lowered::LoopManagerPtr& loop_manager) const; const size_t rank6D = 6; - std::vector m_dynamic_brgemms = {}; + // Brgemm expressions with subtensors with dynamic values + std::unordered_set m_dynamic_brgemms = {}; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp index 5e4cb94d5bdd4f..03f63f2fb6e603 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp @@ -57,6 +57,15 @@ LinearIR::constExprIt BrgemmBlocking::get_loop_begin_pos(LinearIR& linear_ir, co return loop_begin_it; } +snippets::lowered::SpecificIterationHandlers BrgemmBlocking::get_default_blocking_loop_handlers(size_t work_amount, size_t block_size) { + SpecificIterationHandlers handlers; + const auto tail_size = snippets::utils::is_dynamic_value(work_amount) ? snippets::utils::get_dynamic_value() : work_amount % block_size; + if (tail_size != 0) + handlers.register_pass(tail_size); + handlers.register_pass(); + return handlers; +} + bool BrgemmBlocking::run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::BrgemmBlocking") const auto& loop_manager = linear_ir.get_loop_manager(); @@ -111,22 +120,18 @@ bool BrgemmBlocking::run(LinearIR& linear_ir, LinearIR::constExprIt begin, Linea // If block_size is dynamic, it means that Brgemm will process full tensor: // subtensor[i] = FULL_DIM as by default if (!snippets::utils::is_dynamic_value(block_size_m)) { - *++in_0_subtensor.rbegin() = block_size_m; - *++out_subtensor.rbegin() = block_size_m; + brgemm_expr->get_input_port_descriptor(0)->set_subtensor_value(1, block_size_m); + brgemm_expr->get_output_port_descriptor(0)->set_subtensor_value(1, block_size_m); } if (!snippets::utils::is_dynamic_value(block_size_n)) { - *in_1_subtensor.rbegin() = block_size_n; - *out_subtensor.rbegin() = block_size_n; + brgemm_expr->get_input_port_descriptor(1)->set_subtensor_value(0, block_size_n); + brgemm_expr->get_output_port_descriptor(0)->set_subtensor_value(0, block_size_n); } if (!snippets::utils::is_dynamic_value(block_size_k)) { - *in_0_subtensor.rbegin() = block_size_k; - *++in_1_subtensor.rbegin() = block_size_k; + brgemm_expr->get_input_port_descriptor(0)->set_subtensor_value(0, block_size_k); + brgemm_expr->get_input_port_descriptor(1)->set_subtensor_value(1, block_size_k); } - brgemm_expr->get_input_port_descriptor(0)->set_subtensor(in_0_subtensor); - brgemm_expr->get_input_port_descriptor(1)->set_subtensor(in_1_subtensor); - brgemm_expr->get_output_port_descriptor(0)->set_subtensor(out_subtensor); - ov::snippets::lowered::ExpressionPtr copy_b_expr = nullptr; if (brgemm_cpu && brgemm_cpu->is_with_data_repacking()) { const auto copy_b = brgemm_cpu->get_brgemm_copy(); @@ -150,15 +155,6 @@ bool BrgemmBlocking::run(LinearIR& linear_ir, LinearIR::constExprIt begin, Linea } } - auto get_default_handlers = [](size_t work_amount, size_t block_size) { - SpecificIterationHandlers handlers; - const auto tail_size = snippets::utils::is_dynamic_value(work_amount) ? snippets::utils::get_dynamic_value() : work_amount % block_size; - if (tail_size != 0) - handlers.register_pass(tail_size); - handlers.register_pass(true); - return handlers; - }; - auto mark_m_blocking = [&](bool include_repacking) { const auto loop_begin_it = get_loop_begin_pos(linear_ir, expr_it, include_repacking); const auto loop_end_it = std::next(expr_it); @@ -173,7 +169,7 @@ bool BrgemmBlocking::run(LinearIR& linear_ir, LinearIR::constExprIt begin, Linea const std::vector exits{LoopPort(brgemm_expr->get_output_port(0), true)}; const auto id = loop_manager->mark_loop(loop_begin_it, loop_end_it, m, block_size_m, 1, entries, exits, false); - loop_manager->get_loop_info(id)->set_handlers(get_default_handlers(m, block_size_m)); + loop_manager->get_loop_info(id)->set_handlers(get_default_blocking_loop_handlers(m, block_size_m)); }; auto mark_n_blocking = [&]() { @@ -186,7 +182,7 @@ bool BrgemmBlocking::run(LinearIR& linear_ir, LinearIR::constExprIt begin, Linea const std::vector exits{LoopPort(brgemm_expr->get_output_port(0), true)}; const auto id = loop_manager->mark_loop(loop_begin_it, loop_end_it, n, block_size_n, 0, entries, exits, false); - loop_manager->get_loop_info(id)->set_handlers(get_default_handlers(n, block_size_n)); + loop_manager->get_loop_info(id)->set_handlers(get_default_blocking_loop_handlers(n, block_size_n)); }; auto mark_k_blocking = [&]() { @@ -198,7 +194,7 @@ bool BrgemmBlocking::run(LinearIR& linear_ir, LinearIR::constExprIt begin, Linea LoopPort(brgemm_cpu && brgemm_cpu->is_with_data_repacking() ? copy_b_expr->get_input_port(0) : brgemm_expr->get_input_port(1), true, 1)}; const std::vector exits{LoopPort(brgemm_expr->get_output_port(0), false)}; - auto handlers = get_default_handlers(k, block_size_k); + auto handlers = get_default_blocking_loop_handlers(k, block_size_k); handlers.register_pass(0.f); const auto id = loop_manager->mark_loop(loop_begin_it, loop_end_it, k, block_size_k, entries, exits, false); diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.hpp index cdc2d05cffd1e5..4d29267f034fc9 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.hpp @@ -5,6 +5,7 @@ #pragma once #include "snippets/lowered/pass/pass.hpp" +#include "snippets/lowered/specific_loop_iter_handlers.hpp" namespace ov { namespace intel_cpu { @@ -24,6 +25,8 @@ class BrgemmBlocking : public snippets::lowered::pass::RangedPass { snippets::lowered::LinearIR::constExprIt begin, snippets::lowered::LinearIR::constExprIt end) override; + static snippets::lowered::SpecificIterationHandlers get_default_blocking_loop_handlers(size_t work_amount, size_t block_size); + private: static snippets::lowered::LinearIR::constExprIt move_new_memory_buffer(snippets::lowered::LinearIR& linear_ir, const snippets::lowered::LinearIR::constExprIt& brgemm_it); diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.cpp index 97c8c2a5299f6b..6acf68fe245467 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.cpp @@ -36,24 +36,16 @@ std::shared_ptr SetBrgemmBeta::merge(const st return merged_pass; } -SetEvaluanceOnce::SetEvaluanceOnce(bool evaluation) : snippets::lowered::pass::RangedPass(), m_evaluation(evaluation) {} - bool SetEvaluanceOnce::run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) { const auto& loop_end = ov::as_type_ptr(end->get()->get_node()); OPENVINO_ASSERT(loop_end, "SetEvaluanceOnce expected LoopEnd node in iterator `end`."); const auto& loop_info = linear_ir.get_loop_manager()->get_loop_info(loop_end->get_id()); - loop_info->set_evaluate_once(m_evaluation); + loop_info->set_evaluate_once(true); return true; } std::shared_ptr SetEvaluanceOnce::merge(const std::shared_ptr& other) { - const auto merged_pass = std::make_shared(m_evaluation); - if (other == nullptr) - return merged_pass; - const auto casted_pass = ov::as_type_ptr(other); - if (!casted_pass || m_evaluation != casted_pass->m_evaluation) - return nullptr; - return merged_pass; + return !other || ov::is_type(other) ? std::make_shared() : nullptr; } } // namespace pass diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.hpp index 7616954bc2cca5..2a9492713e0c46 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.hpp @@ -30,22 +30,18 @@ class SetBrgemmBeta : public snippets::lowered::pass::RangedPass { /** * @interface SetEvaluanceOnce - * @brief The pass set `evaluate once` only to ExpandedLoopInfo which is mapped on LoopEnd in the passed iterator `end`. + * @brief The pass set `evaluate once = true` only to ExpandedLoopInfo which is mapped on LoopEnd in the passed iterator `end`. * The pointer arithmetic should be updated in the separate optimization `OptimizeLoopSingleEvaluation` - * @param m_evaluation - value which must be set * @ingroup snippets */ class SetEvaluanceOnce : public snippets::lowered::pass::RangedPass { public: - SetEvaluanceOnce(bool evaluation); + SetEvaluanceOnce() = default; OPENVINO_RTTI("SetEvaluanceOnce", "RangedPass") bool run(snippets::lowered::LinearIR& linear_ir, snippets::lowered::LinearIR::constExprIt begin, snippets::lowered::LinearIR::constExprIt end) override; std::shared_ptr merge(const std::shared_ptr& other) override; - -private: - bool m_evaluation = false; }; } // namespace pass } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp index 8e1959224ef6a1..1089bdc3faffaa 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp @@ -96,7 +96,7 @@ std::vector> input_shapes_dynamic{ }, // Only N dimension is dynamic { - STATIC_SHAPE(2, 2, 65, 550), + {PartialShape{}, {{2, 2, 65, 550}}}, {PartialShape{2, 2, 550, -1}, {{2, 2, 550, 70}, {2, 2, 550, 12}, {2, 2, 550, 70}, {2, 2, 550, 12}, {2, 2, 550, 10}}} }, diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp index 9b55564ea09186..f416d06791e182 100644 --- a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp +++ b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp @@ -7,8 +7,8 @@ #include "lir_test_utils.hpp" #include "openvino/opsets/opset10.hpp" #include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_info.hpp" #include "snippets/lowered/pass/propagate_subtensors.hpp" -#include "snippets/lowered/pass/serialize_control_flow.hpp" #include "snippets/snippets_isa.hpp" #include "transformations/snippets/x64/op/brgemm_copy_b.hpp" #include "transformations/snippets/x64/op/brgemm_cpu.hpp" @@ -23,14 +23,6 @@ using namespace ov::snippets::lowered; using namespace ov::snippets; namespace { -SpecificIterationHandlers get_default_handlers(size_t work_amount, size_t block_size) { - SpecificIterationHandlers handlers; - const auto tail_size = snippets::utils::is_dynamic_value(work_amount) ? snippets::utils::get_dynamic_value() : work_amount % block_size; - if (tail_size != 0) - handlers.register_pass(tail_size); - handlers.register_pass(true); - return handlers; -} void create_brgemm_loop_infos(const LinearIRPtr& linear_ir, const ExpressionPtr& brgemm_expr, @@ -41,29 +33,30 @@ void create_brgemm_loop_infos(const LinearIRPtr& linear_ir, const bool n_block = k != 0 && k_blk != 0; const bool m_block = m != 0 && m_blk != 0; if (k_block) { - const size_t loop_id = create_and_add_unified_loop_info(linear_ir, k, k_blk, - {LoopPort(brgemm_expr->get_input_port(0)), - LoopPort(brgemm_expr->get_input_port(1), true, 1)}, - {LoopPort(brgemm_expr->get_output_port(0), false)}, false); - const auto& loop_info = linear_ir->get_loop_manager()->get_loop_info(loop_id); - loop_info->set_handlers(get_default_handlers(k, k_block)); + const auto loop_info = + std::make_shared(k, k_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0)), + LoopPort(brgemm_expr->get_input_port(1), true, 1)}, + std::vector{LoopPort(brgemm_expr->get_output_port(0), false)}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(k, k_block)); loop_info->register_pass_to_handler(0.f); + linear_ir->get_loop_manager()->add_loop_info(loop_info); } if (n_block) { - const size_t loop_id = create_and_add_unified_loop_info(linear_ir, n, n_blk, - {LoopPort(brgemm_expr->get_input_port(0), false), - LoopPort(brgemm_expr->get_input_port(1))}, - {LoopPort(brgemm_expr->get_output_port(0))}, false); - const auto& loop_info = linear_ir->get_loop_manager()->get_loop_info(loop_id); - loop_info->set_handlers(get_default_handlers(n, n_block)); + linear_ir->get_loop_manager()->add_loop_info( + std::make_shared(n, n_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0), false), + LoopPort(brgemm_expr->get_input_port(1))}, + std::vector{LoopPort(brgemm_expr->get_output_port(0))}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(n, n_block))); } if (m_block) { - const size_t loop_id = create_and_add_unified_loop_info(linear_ir, m, m_blk, - {LoopPort(brgemm_expr->get_input_port(0), true, 1), - LoopPort(brgemm_expr->get_input_port(1), false, 1)}, - {LoopPort(brgemm_expr->get_output_port(0), true, 1)}, false); - const auto& loop_info = linear_ir->get_loop_manager()->get_loop_info(loop_id); - loop_info->set_handlers(get_default_handlers(m, m_block)); + linear_ir->get_loop_manager()->add_loop_info( + std::make_shared(m, m_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0), true, 1), + LoopPort(brgemm_expr->get_input_port(1), false, 1)}, + std::vector{LoopPort(brgemm_expr->get_output_port(0), true, 1)}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(m, m_block))); } } @@ -77,30 +70,31 @@ void create_brgemm_with_copy_b_loop_infos(const LinearIRPtr& linear_ir, const bool n_block = k != 0 && k_blk != 0; const bool m_block = m != 0 && m_blk != 0; if (k_block) { - const size_t loop_id = create_and_add_unified_loop_info(linear_ir, k, k_blk, - {LoopPort(brgemm_expr->get_input_port(0)), - LoopPort(copy_b_expr->get_input_port(0), true, 1)}, - {LoopPort(brgemm_expr->get_output_port(0), false)}); - const auto& loop_info = linear_ir->get_loop_manager()->get_loop_info(loop_id); - loop_info->set_handlers(get_default_handlers(k, k_block)); + const auto loop_info = + std::make_shared(k, k_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0)), + LoopPort(copy_b_expr->get_input_port(0), true, 1)}, + std::vector{LoopPort(brgemm_expr->get_output_port(0), false)}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(k, k_block)); loop_info->register_pass_to_handler(0.f); + linear_ir->get_loop_manager()->add_loop_info(loop_info); } if (n_block) { - const size_t loop_id = create_and_add_unified_loop_info(linear_ir, n, n_blk, - {LoopPort(brgemm_expr->get_input_port(0), false), - LoopPort(copy_b_expr->get_input_port(0))}, - {LoopPort(brgemm_expr->get_output_port(0))}); - const auto& loop_info = linear_ir->get_loop_manager()->get_loop_info(loop_id); - loop_info->set_handlers(get_default_handlers(n, n_block)); + linear_ir->get_loop_manager()->add_loop_info( + std::make_shared(n, n_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0), false), + LoopPort(copy_b_expr->get_input_port(0))}, + std::vector{LoopPort(brgemm_expr->get_output_port(0))}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(n, n_block))); } if (m_block) { const auto& second_input_port = k_block || n_block ? copy_b_expr->get_input_port(0) : brgemm_expr->get_input_port(1); - const size_t loop_id = create_and_add_unified_loop_info(linear_ir, m, m_blk, - {LoopPort(brgemm_expr->get_input_port(0), true, 1), - LoopPort(second_input_port, false, 1)}, - {LoopPort(brgemm_expr->get_output_port(0), true, 1)}); - const auto& loop_info = linear_ir->get_loop_manager()->get_loop_info(loop_id); - loop_info->set_handlers(get_default_handlers(m, m_block)); + linear_ir->get_loop_manager()->add_loop_info( + std::make_shared(m, m_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0), true, 1), + LoopPort(second_input_port, false, 1)}, + std::vector{LoopPort(brgemm_expr->get_output_port(0), true, 1)}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(m, m_block))); } } } // namespace