From 879ae7a564503baa48422a2d8c95f198a7dfe975 Mon Sep 17 00:00:00 2001 From: Tikhonov Ivan Date: Tue, 17 Dec 2024 11:56:22 +0400 Subject: [PATCH] delete debug prints --- .../position_ids_replacer.cpp | 6 ----- .../prev_sequence_length_pattern.cpp | 1 - .../total_sequence_length_pattern.cpp | 17 ++++++-------- src/core/include/openvino/core/version.hpp | 4 ++-- src/core/src/pass/sdpa_to_paged_attention.cpp | 4 ++-- src/core/src/pass/visualize_tree.cpp | 18 +++++++-------- src/frontends/ir/src/frontend.cpp | 4 ---- .../nodes/kernels/scaled_attn/executor_pa.cpp | 22 +++---------------- .../intel_cpu/src/nodes/scaled_attn.cpp | 7 +----- 9 files changed, 24 insertions(+), 59 deletions(-) diff --git a/src/common/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp b/src/common/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp index 4db9e95008933c..3eb954d5daaac9 100644 --- a/src/common/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp +++ b/src/common/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp @@ -36,7 +36,6 @@ ov::pass::PositionIDsReplacer::PositionIDsReplacer(const Output& position_ auto add = pattern::wrap_type({mul, position_embed}); ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { - // std::cout << "XXXXXX PositionIDsReplacer" << std::endl; const auto& pattern_map = m.get_pattern_value_map(); replace_node(pattern_map.at(position_ids_pattern).get_node_shared_ptr(), position_ids.get_node_shared_ptr()); return true; @@ -62,7 +61,6 @@ ov::pass::PositionIDsReplacerQwen::PositionIDsReplacerQwen(const Output& p const auto& pattern_map = m.get_pattern_value_map(); auto max_context_len = pattern_map.at(max_context_len_pattern).get_node_shared_ptr(); if (max_context_len->get_friendly_name() != "max_context_len") { - // std::cout << "XXXX return false;" << std::endl; return false; } @@ -80,10 +78,6 @@ ov::pass::PositionIDsReplacerQwen::PositionIDsReplacerQwen(const Output& p replace_node(slice_2, reshape); gather->validate_and_infer_types(); - /* std::cout << "slice_2 in(0) " << slice_2->input(0).get_partial_shape() << std::endl; - std::cout << "slice_2 out " << slice_2->output(0).get_partial_shape() << std::endl; - std::cout << "gather in " << gather->input(0).get_partial_shape() << std::endl; - std::cout << "gather out " << gather->output(0).get_partial_shape() << std::endl;*/ return true; }; diff --git a/src/common/transformations/src/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.cpp b/src/common/transformations/src/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.cpp index 19bb21637e2a52..36d9d88975b2e0 100644 --- a/src/common/transformations/src/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.cpp +++ b/src/common/transformations/src/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.cpp @@ -26,7 +26,6 @@ ov::pass::PrevSequenceLengthPattern::PrevSequenceLengthPattern(std::shared_ptr({kv_shape, pattern::any_input(), pattern::any_input()}); ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { - std::cout << "XXXXXX PrevSequenceLengthPattern" << std::endl; // TODO: Check that seq has axis that really takes sequence len but not any other dimension -- use symbolics or // look at the constant input // Detect the case by taking initialization expression for ReadValue and compare it with the second gather index diff --git a/src/common/transformations/src/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.cpp b/src/common/transformations/src/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.cpp index 24f20696c83657..e0f6ee41249f07 100644 --- a/src/common/transformations/src/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.cpp +++ b/src/common/transformations/src/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.cpp @@ -35,7 +35,6 @@ ov::pass::TotalSequenceLengthPattern::TotalSequenceLengthPattern( ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { // TODO: Check that seq has axis that really takes sequence len but not any other dimension -- // use symbolic infra or look at the constant input - std::cout << "XXXXXX TotalSequenceLengthPattern" << std::endl; const auto& pattern_map = m.get_pattern_value_map(); auto concat = ov::as_type_ptr(pattern_map.at(kv_concat).get_node_shared_ptr()); @@ -109,24 +108,22 @@ ov::pass::TotalSequenceLengthPatternQwen::TotalSequenceLengthPatternQwen( const std::shared_ptr& max_context_len) { MATCHER_SCOPE(TotalSequenceLengthPatternQwen); - auto kv_past = pattern::wrap_type({pattern::any_input()}); - auto kv_gather = pattern::wrap_type({kv_past, pattern::any_input(), pattern::any_input()}); - auto kv_shape = pattern::wrap_type({kv_gather}); - auto seq_past = pattern::wrap_type({kv_shape, pattern::any_input(), pattern::any_input()}); + auto prev_max_seq_len = pattern::wrap_type(); + auto opt_convert_1 = pattern::optional(prev_max_seq_len); + auto opt_reshape_1 = pattern::optional({opt_convert_1, pattern::any_input()}); auto input_ids = pattern::wrap_type(); auto unsqueeze = pattern::wrap_type({input_ids, pattern::any_input()}); - auto optional_reshape = pattern::optional({unsqueeze, pattern::any_input()}); - auto optional_convert = pattern::optional(optional_reshape); - auto kv_shape_current = pattern::wrap_type({optional_convert}); + auto opt_reshape_2 = pattern::optional({unsqueeze, pattern::any_input()}); + auto opt_convert_2 = pattern::optional(opt_reshape_2); + auto kv_shape_current = pattern::wrap_type({opt_convert_2}); auto seq_current = pattern::wrap_type({kv_shape_current, pattern::any_input(), pattern::any_input()}); - auto pattern_total_seq = pattern::wrap_type({seq_current, seq_past}); + auto pattern_total_seq = pattern::wrap_type({seq_current, opt_reshape_1}); ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { // TODO: Check that seq has axis that really takes sequence len but not any other dimension -- // use symbolic infra or look at the constant input - std::cout << "XXXXXX TotalSequenceLengthPatternQwen" << std::endl; const auto& pattern_map = m.get_pattern_value_map(); auto total_seq = pattern_map.at(pattern_total_seq).get_node_shared_ptr(); diff --git a/src/core/include/openvino/core/version.hpp b/src/core/include/openvino/core/version.hpp index db1595f7addcb6..79f688795ce37c 100644 --- a/src/core/include/openvino/core/version.hpp +++ b/src/core/include/openvino/core/version.hpp @@ -19,8 +19,8 @@ * @brief Defines OpenVINO patch version */ -#define OPENVINO_VERSION_MAJOR 2024 -#define OPENVINO_VERSION_MINOR 5 +#define OPENVINO_VERSION_MAJOR 2025 +#define OPENVINO_VERSION_MINOR 0 #define OPENVINO_VERSION_PATCH 0 namespace ov { diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp index 5a8497a02d0080..0641ca99d44070 100644 --- a/src/core/src/pass/sdpa_to_paged_attention.cpp +++ b/src/core/src/pass/sdpa_to_paged_attention.cpp @@ -12,7 +12,6 @@ #include "openvino/op/subtract.hpp" #include "openvino/op/unsqueeze.hpp" #include "openvino/pass/manager.hpp" -#include "openvino/pass/visualize_tree.hpp" #include "transformations/sdpa_to_paged_attention/position_ids_replacer.hpp" #include "transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.hpp" #include "transformations/sdpa_to_paged_attention/state_management_pattern.hpp" @@ -128,9 +127,10 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr(max_context_len); + manager.register_pass(prev_max_seq_len, batch_dim); manager.register_pass(max_context_len); + manager.register_pass(max_context_len); manager.register_pass(unsqueezed_position_ids->output(0)); manager.register_pass(unsqueezed_position_ids->output(0)); manager.run_passes(model); diff --git a/src/core/src/pass/visualize_tree.cpp b/src/core/src/pass/visualize_tree.cpp index ccf1c07899b1a9..cf7bb6e85bdf37 100644 --- a/src/core/src/pass/visualize_tree.cpp +++ b/src/core/src/pass/visualize_tree.cpp @@ -216,7 +216,7 @@ static void collect_symbol_print_values(const std::shared_ptr& m, bool ov::pass::VisualizeTree::run_on_model(const std::shared_ptr& f) { RUN_ON_MODEL_SCOPE(VisualizeTree); - static const bool ovasp = true; + static const bool ovasp = ov::util::getenv_bool("OV_VISUALIZE_APPLY_SYMBOLIC_PROPAGATION"); if (ovasp) { std::cerr << "Warning: OV_VISUALIZE_APPLY_SYMBOLIC_PROPAGATION enabled. ov::pass::SymbolicPropagation will be " "triggered" @@ -561,11 +561,11 @@ std::string ov::pass::VisualizeTree::get_attributes(std::shared_ptr node) std::stringstream label; label << "label=\"" << get_node_name(node); - static const bool nvtos = true; - static const bool nvtot = true; - static const bool nvtio = true; - static const bool nvtrti = true; - static const bool ovpvl = true; + static const bool nvtos = ov::util::getenv_bool("OV_VISUALIZE_TREE_OUTPUT_SHAPES"); + static const bool nvtot = ov::util::getenv_bool("OV_VISUALIZE_TREE_OUTPUT_TYPES"); + static const bool nvtio = ov::util::getenv_bool("OV_VISUALIZE_TREE_IO"); + static const bool nvtrti = ov::util::getenv_bool("OV_VISUALIZE_TREE_RUNTIME_INFO"); + static const bool ovpvl = ov::util::getenv_bool("OV_VISUALIZE_PARTIAL_VALUES_AND_LABELS"); if (nvtos || nvtot || nvtio) { if (nvtio) { @@ -618,7 +618,7 @@ std::string ov::pass::VisualizeTree::get_attributes(std::shared_ptr node) } std::string ov::pass::VisualizeTree::get_node_name(std::shared_ptr node) { - static const bool nvtmn = true; + static const bool nvtmn = ov::util::getenv_bool("OV_VISUALIZE_TREE_MEMBERS_NAME"); std::string rc = (nvtmn ? std::string("friendly_name: ") : "") + node->get_friendly_name(); if (node->get_friendly_name() != node->get_name()) { rc += "\\n" + (nvtmn ? std::string("name: ") : "") + node->get_name(); @@ -627,7 +627,7 @@ std::string ov::pass::VisualizeTree::get_node_name(std::shared_ptr node) { rc += "\\n" + (nvtmn ? std::string("type_name: ") : "") + std::string(type_info.version_id) + "::" + std::string(type_info.name); - static const bool nvttn = true; + static const bool nvttn = ov::util::getenv_bool("OV_VISUALIZE_TREE_TENSORS_NAME"); if (nvttn) { auto to_string = [](const std::unordered_set& names) { std::stringstream ss; @@ -665,7 +665,7 @@ std::string ov::pass::VisualizeTree::get_node_name(std::shared_ptr node) { } } - static const bool nvtrti = true; + static const bool nvtrti = ov::util::getenv_bool("OV_VISUALIZE_TREE_RUNTIME_INFO"); if (nvtrti) { const auto& rt = node->get_rt_info(); if (!rt.empty()) { diff --git a/src/frontends/ir/src/frontend.cpp b/src/frontends/ir/src/frontend.cpp index ec5338bfd36d67..c5e137e1decc89 100644 --- a/src/frontends/ir/src/frontend.cpp +++ b/src/frontends/ir/src/frontend.cpp @@ -11,8 +11,6 @@ #include "input_model.hpp" #include "openvino/core/any.hpp" #include "openvino/core/so_extension.hpp" -#include "openvino/pass/sdpa_to_paged_attention.hpp" -#include "openvino/pass/visualize_tree.hpp" #include "openvino/runtime/aligned_buffer.hpp" #include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/file_util.hpp" @@ -304,8 +302,6 @@ std::string FrontEnd::get_name() const { void FrontEnd::normalize(const std::shared_ptr& model) const { ov::pass::Manager manager("Frontend:IR:normalize"); manager.register_pass(); - // manager.register_pass("baichuan_sdpa.svg"); - // manager.register_pass(); manager.run_passes(model); } diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp index 99d6b4153bf214..a74021d8ac0d05 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp @@ -1775,26 +1775,15 @@ struct AttentionExecutor : public PagedAttentionExecutor { auto H = q.size(1) / S; auto h_each_group_len = 1; if (Hk != H) { - h_each_group_len = H / Hk; + h_each_group_len = H / Hk; } auto B_seq = past_lens.size(0); - std::cout << "1 assert " << std::endl; - q.assert_dims({B_token, H * S}); - std::cout << "b token" << B_token << std::endl; - std::cout << "Hk " << Hk << std::endl; - std::cout << "S " << S << std::endl; - std::cout << "SV " << SV << std::endl; - std::cout << "3 assert " << std::endl; - v.assert_dims({B_token, Hk * SV}); - std::cout << "2 assert " << std::endl; + q.assert_dims({B_token, H * S}); k.assert_dims({B_token, Hk * S}); - - std::cout << "4 assert " << std::endl; + v.assert_dims({B_token, Hk * SV}); q = q.reshape({B_token, H, 1, S}); - std::cout << "5 assert " << std::endl; k = k.reshape({B_token, Hk, 1, S}); - std::cout << "6 assert " << std::endl; v = v.reshape({B_token, Hk, 1, SV}); if (k_cache.m_dt == ov::element::Type_t::u8) { k_cache.assert_dims({0, Hk, block_size, S + sizeof(float) * 2}, true); @@ -1803,20 +1792,15 @@ struct AttentionExecutor : public PagedAttentionExecutor { k_cache.assert_dims({0, Hk, block_size, S}, true); v_cache.assert_dims({k_cache.m_dims[0], Hk, block_size, SV}); } - std::cout << "7 assert " << std::endl; past_lens.assert_dims({B_seq}); - std::cout << "8 assert " << std::endl; subsequence_begins.assert_dims({B_seq + 1}); - std::cout << "9 assert " << std::endl; block_indices.assert_dims({0}, true); - std::cout << "10 assert " << std::endl; block_indices_begins.assert_dims({B_seq + 1}); if (scale == 0.0f) scale = 1.0f / sqrt(S); if (alibi_slopes) { alibi_slopes.assert_dims({H}); } - std::cout << "11 assert " << std::endl; output_emb.assert_dims({B_token, H * SV}); output_emb = output_emb.reshape({B_token, 1, H * SV}); diff --git a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp index bb06b229cffd08..7fe3fc8dc5045d 100644 --- a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp +++ b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp @@ -973,11 +973,7 @@ struct ScaledDotProductAttention::AttentionExecutor : public ScaledDotProductAtt SV = v_input.size(3); L0 = present_key.size(2) - L1; auto Hk = k_input.size(1); - std::cout << "B: " << B << std::endl; - std::cout << "Hk: " << Hk << std::endl; - std::cout << "S: " << S << std::endl; - std::cout << "L1: " << L1 << std::endl; - std::cout << "SV: " << SV << std::endl; + if (fuse_concat) { k_input.assert_dims({B, Hk, L1, S}); v_input.assert_dims({B, Hk, L1, SV}); @@ -1199,7 +1195,6 @@ void ScaledDotProductAttention::createPrimitive() { } void ScaledDotProductAttention::execute(dnnl::stream strm) { - std::cout << "XXXXXXX SDPA 2" << std::endl; auto orginSDPInputNumber = getOriginalInputsNumber() - (m_config.config.fuse_concat ? 3 : 0); std::vector inputs(orginSDPInputNumber); auto output = getDstMemoryAtPort(0);