From 1462a374c63e4beaa262cfb5c3b49a8003381876 Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Fri, 15 Sep 2023 13:17:35 +0200 Subject: [PATCH] Cleanup --- .../include/snippets/lowered/loop_manager.hpp | 34 ++++++++----------- .../snippets/src/lowered/loop_manager.cpp | 15 ++++++++ .../src/lowered/pass/validate_loops.cpp | 3 +- .../emitters/x64/jit_snippets_emitters.cpp | 24 +------------ .../snippets/x64/op/brgemm_cpu.cpp | 2 -- .../x64/pass/lowered/brgemm_blocking.cpp | 16 ++++----- .../pass/set_brgemm_cpu_blocking_params.cpp | 2 ++ 7 files changed, 42 insertions(+), 54 deletions(-) diff --git a/src/common/snippets/include/snippets/lowered/loop_manager.hpp b/src/common/snippets/include/snippets/lowered/loop_manager.hpp index 0fa4ed6d0dfd39..2b71e8605ab393 100644 --- a/src/common/snippets/include/snippets/lowered/loop_manager.hpp +++ b/src/common/snippets/include/snippets/lowered/loop_manager.hpp @@ -53,6 +53,7 @@ class LinearIR::LoopManager { // Returns dimension index if dimension indices for all entry and exit points are equal, and SIZE_MAX otherwise size_t get_dim_idx() const; + // TODO: replace this temporary solution when ticket 119851 is implemented using FirstIterHandler = std::function; void set_first_iter_handler(FirstIterHandler handler); FirstIterHandler fst_iter_handler = nullptr; @@ -85,10 +86,12 @@ class LinearIR::LoopManager { // Return Loop ID template size_t mark_loop(LinearIR::constExprIt loop_begin_pos, - LinearIR::constExprIt loop_end_pos, - size_t work_amount, size_t work_amount_increment, size_t dim_idx, - const std::vector& entries, - const std::vector& exits) { + LinearIR::constExprIt loop_end_pos, + size_t work_amount, + size_t work_amount_increment, + size_t dim_idx, + const std::vector& entries, + const std::vector& exits) { const auto loop_info = std::make_shared(work_amount, work_amount_increment, entries, exits); for (auto& entry : loop_info->entry_points) { entry.dim_idx = dim_idx; @@ -105,10 +108,11 @@ class LinearIR::LoopManager { template size_t mark_loop(LinearIR::constExprIt loop_begin_pos, - LinearIR::constExprIt loop_end_pos, - size_t work_amount, size_t increment, - const std::vector& entries, - const std::vector& exits) { + LinearIR::constExprIt loop_end_pos, + size_t work_amount, + size_t increment, + const std::vector& entries, + const std::vector& exits) { const auto loop_info = std::make_shared(work_amount, increment, entries, exits); const auto loop_id = this->add_loop_info(loop_info); for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { @@ -117,21 +121,13 @@ class LinearIR::LoopManager { return loop_id; } - template size_t mark_loop_with_old_loop_replacement(LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, size_t work_amount, size_t increment, - const std::vector& entries, - const std::vector& exits, - const size_t old_id) { - const auto loop_info = std::make_shared(work_amount, increment, entries, exits); - const auto loop_id = this->add_loop_info(loop_info); - for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { - replace_loop_id(*expr_it, old_id, loop_id); - } - return loop_id; - } + const std::vector& entries, + const std::vector& exits, + const size_t old_id); void fuse_loops(const LinearIR& linear_ir, size_t loop_id_upper, size_t loop_id_lower, bool fuse_into_upper = true); void fuse_loops(LinearIR::constExprIt loop_begin_target, LinearIR::constExprIt loop_end_target, diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp index 0183268e5f8720..e3a0d2aec40250 100644 --- a/src/common/snippets/src/lowered/loop_manager.cpp +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -242,6 +242,21 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, } } +size_t LinearIR::LoopManager::mark_loop_with_old_loop_replacement(LinearIR::constExprIt loop_begin_pos, + LinearIR::constExprIt loop_end_pos, + size_t work_amount, + size_t increment, + const std::vector& entries, + const std::vector& exits, + const size_t old_id) { + const auto loop_info = std::make_shared(work_amount, increment, entries, exits); + const auto loop_id = this->add_loop_info(loop_info); + for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) { + replace_loop_id(*expr_it, old_id, loop_id); + } + return loop_id; +} + void LinearIR::LoopManager::fuse_loops(const LinearIR& linear_ir, size_t loop_id_upper, size_t loop_id_lower, bool fuse_into_upper) { LinearIR::constExprIt loop_begin_target, loop_end_target; get_loop_bounds(linear_ir, fuse_into_upper ? loop_id_lower : loop_id_upper, loop_begin_target, loop_end_target); diff --git a/src/common/snippets/src/lowered/pass/validate_loops.cpp b/src/common/snippets/src/lowered/pass/validate_loops.cpp index 7e428c89a2935a..ec1d6a0cddeba9 100644 --- a/src/common/snippets/src/lowered/pass/validate_loops.cpp +++ b/src/common/snippets/src/lowered/pass/validate_loops.cpp @@ -54,6 +54,7 @@ bool ValidateLoops::run(LinearIR& linear_ir) { for (size_t i = 0; i < loop_ids.size(); ++i) { const auto id = loop_ids[i]; const auto dim_idx = loop_manager->get_loop_info(id)->get_dim_idx(); + // if the loop has different dimension indexes, it don't have to meet the next requirements if (dim_idx == SIZE_MAX) continue; if (std::find(dim_indexes.cbegin(), dim_indexes.cend(), dim_idx) != dim_indexes.cend()) { @@ -64,8 +65,6 @@ bool ValidateLoops::run(LinearIR& linear_ir) { OPENVINO_ASSERT(loop_manager->get_loop_info(loop_ids[i - 1])->outer_splited_loop, "Incorrect Loop ID configuration: the outer Loop with splitted dimension should have `outer_splited_loop=True`"); } - OPENVINO_ASSERT(i == 0 || loop_manager->get_loop_info(loop_ids[i - 1])->get_dim_idx() >= dim_idx, - "Incorrect Loop ID configuration: dim_idx should be sorted in accordance with loop nesting"); dim_indexes.push_back(dim_idx); } validated_nested_loops.insert(loop_ids); diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp index 91009e5361acd5..044782e2225414 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp @@ -713,18 +713,12 @@ BrgemmEmitter::BrgemmEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPt const auto& brgemm_node = as_type_ptr(expr->get_node()); if (brgemm_node->is_dynamic()) IE_THROW() << "Snippets don't support code generation for dynamic Brgemm"; - const auto brgemm_copy = brgemm_node->is_with_data_repacking() ? brgemm_node->get_brgemm_copy() : nullptr; std::vector leading_dimensions; - std::vector> io_layouts; - auto init_scheduling_params = [&](const std::vector& layout, const ov::Shape& io_shape) { if (layout.empty()) { // empty value indicates a planar layout leading_dimensions.push_back(io_shape.back()); - std::vector default_layout(io_shape.size()); - std::iota(default_layout.begin(), default_layout.end(), 0); - io_layouts.push_back(default_layout); } else { // The idea here is to find "2" (for 4D shapes) in the layout and multiply dimensions that are to the right // This implies that "3" is the last layout value, otherwise this layout is not supported. @@ -734,7 +728,6 @@ BrgemmEmitter::BrgemmEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPt IE_THROW() << "BrgemmEmitter detected invalid layout values: check that this shape + layout combination is schedulable"; leading_dimensions.emplace_back( std::accumulate(io_shape.end() - num_last_dims, io_shape.end(), 1, std::multiplies())); - io_layouts.push_back(layout); } }; @@ -746,26 +739,11 @@ BrgemmEmitter::BrgemmEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPt init_scheduling_params(input_1_desc->get_layout(), input_1_desc->get_shape()); init_scheduling_params(output_desc->get_layout(), output_desc->get_shape()); - // We need find original M,N,K having layouts and ordered shapes - // Layout: 0, 1, 2, 3 => New layout: 0, 2, 1, 3 - // Shape: 1, 3, 5, 9 => New Shape: 1, 5, 3, 9 - // To find original 2nd dimension, we should find index of position value `2` in new layout - // and get dimension from new shape by this index - auto get_ordered_idx = [](const std::vector& layout, size_t idx) { - return std::distance(layout.begin(), std::find(layout.begin(), layout.end(), idx)); - }; - const auto& output_subtensor = output_desc->get_subtensor(); const auto& input_0_subtensor = input_0_desc->get_subtensor(); m_K = *input_0_subtensor.rbegin(); m_M = *(output_subtensor.rbegin() + 1); m_N = *output_subtensor.rbegin(); - // TODO: N dim on input can be not equal to N dim on output. This case must be handled - if (brgemm_node->is_with_data_repacking()) { - const auto& C_shape = brgemm_node->get_output_shape(0); - const auto& C_layout = io_layouts[2]; - m_N = C_shape[get_ordered_idx(C_layout, C_layout.size() - 1)]; - } auto brg0Prc = InferenceEngine::details::convertPrecision(brgemm_node->get_input_element_type(0)); auto brg1Prc = InferenceEngine::details::convertPrecision(brgemm_node->get_input_element_type(1)); @@ -783,7 +761,7 @@ BrgemmEmitter::BrgemmEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPt m_brgCtx.N = m_N; m_brgCtx.K = m_K; m_brgCtx.LDA = leading_dimensions[0]; - m_brgCtx.LDB = brgemm_node->is_with_data_repacking() ? rnd_up(m_N, brgemm_copy->get_n_block_size()) : leading_dimensions[1]; + m_brgCtx.LDB = brgemm_node->is_with_data_repacking() ? rnd_up(m_N, brgemm_node->get_brgemm_copy()->get_n_block_size()) : leading_dimensions[1]; m_brgCtx.LDC = leading_dimensions[2]; m_brgCtx.dt_in0 = static_cast(DnnlExtensionUtils::IEPrecisionToDataType(brg0Prc)); m_brgCtx.dt_in1 = static_cast(DnnlExtensionUtils::IEPrecisionToDataType(brg1Prc)); diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp index 32036dfd86d9b2..f2c00dfbeff7c8 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp @@ -199,8 +199,6 @@ bool BrgemmCPU::visit_attributes(AttributeVisitor& visitor) { visitor.on_attribute("blk_K", m_K_blk); visitor.on_attribute("blk_N", m_N_blk); visitor.on_attribute("beta", m_beta); - auto instance_id = get_instance_id(); - visitor.on_attribute("instance_id", instance_id); return true; } diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp index af0cc9ac020ef9..851e9d9270a4f6 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp @@ -61,11 +61,11 @@ bool BrgemmBlocking::run(LinearIR& linear_ir) { auto output_subtensor = output_desc->get_subtensor(); auto apply_m_blocking = [&]() { - const auto& input_shape_0 = input_0_desc->get_shape(); - const auto& input_layout_0 = input_0_desc->get_layout(); + const auto& output_shape = output_desc->get_shape(); + const auto& output_layout = output_desc->get_layout(); - const auto& m_idx = *(input_layout_0.rbegin() + 1); - const auto& m = input_shape_0[m_idx]; + const auto& m_idx = *(output_layout.rbegin() + 1); + const auto& m = output_shape[m_idx]; const auto block_size_m = brgemm->get_m_block_size(); if (block_size_m >= m) { *(input_0_subtensor.rbegin() + 1) = m; @@ -83,11 +83,11 @@ bool BrgemmBlocking::run(LinearIR& linear_ir) { }; auto apply_n_blocking = [&]() { - const auto& input_shape_1 = input_1_desc->get_shape(); - const auto& input_layout_1 = input_1_desc->get_layout(); + const auto& output_shape = output_desc->get_shape(); + const auto& output_layout = output_desc->get_layout(); - const auto& n_idx = *input_layout_1.rbegin(); - const auto& n = input_shape_1[n_idx]; + const auto& n_idx = *output_layout.rbegin(); + const auto& n = output_shape[n_idx]; const auto block_size_n = brgemm->get_n_block_size(); if (block_size_n >= n) { *input_1_subtensor.rbegin() = n; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/set_brgemm_cpu_blocking_params.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/set_brgemm_cpu_blocking_params.cpp index 9e1b8c06ae9c5d..ab4aa46df972cf 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/set_brgemm_cpu_blocking_params.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/set_brgemm_cpu_blocking_params.cpp @@ -67,6 +67,8 @@ pass::SetBrgemmCPUBlockingParams::SetBrgemmCPUBlockingParams() { if (brgemm->is_with_data_repacking()) { const auto brgemm_copy_b = brgemm->get_brgemm_copy(); const auto out_dims = snippets::utils::get_planar_pshape(brgemm_copy_b->output(0)).get_shape(); + // Due to the semantic of BrgemmCopyB operation its N dimension might be not equal + // to the corresponding BrgemmCPU dimension. const auto N = *out_dims.rbegin(); const bool isAMXSupported = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx);