From 22f795ae33642176547e45d082459a04ebeaa84e Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Wed, 16 Oct 2024 13:31:53 +0200 Subject: [PATCH] Minor corrections --- src/common/transformations/include/ov_ops/lora_subgraph.hpp | 3 ++- src/common/transformations/src/ov_ops/lora_subgraph.cpp | 4 ++++ .../common_optimizations/lora_subgraph_fusion.cpp | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/common/transformations/include/ov_ops/lora_subgraph.hpp b/src/common/transformations/include/ov_ops/lora_subgraph.hpp index 5f16700e92eef8..ecfc330750626c 100644 --- a/src/common/transformations/include/ov_ops/lora_subgraph.hpp +++ b/src/common/transformations/include/ov_ops/lora_subgraph.hpp @@ -16,7 +16,8 @@ namespace internal { * @brief LoraSubgraph operation, which is used for LoRA subgraphs fusion. * It always has only 1 output, and the following inputs, whose order is fixed: * 1. main_flow_input: input from original model. - * 2. LoRA_input: input to which the Low-Rank adaptation is applied. The adapted input is combined with `main_flow_input`. + * 2. LoRA_input: input to which the Low-Rank adaptation is applied. + * The adapted input is combined with `main_flow_input`. * 3. LoRA_matrices: 3 Low-Rank adaptation matrices applied to `LoRA_input`. * The fused subgraph can be optimized in runtime based on LoRA semantic. * For instance, `main_flow_input` can be fast-forwarded to output in case of empty `LoRA_matrices`. diff --git a/src/common/transformations/src/ov_ops/lora_subgraph.cpp b/src/common/transformations/src/ov_ops/lora_subgraph.cpp index e729682fa6cada..8a7a5a75c69c7e 100644 --- a/src/common/transformations/src/ov_ops/lora_subgraph.cpp +++ b/src/common/transformations/src/ov_ops/lora_subgraph.cpp @@ -4,6 +4,8 @@ #include "ov_ops/lora_subgraph.hpp" +#include "itt.hpp" + namespace ov { namespace op { namespace internal { @@ -18,11 +20,13 @@ LoraSubgraph::LoraSubgraph(const OutputVector& args, const std::shared_ptr LoraSubgraph::clone_with_new_inputs(const OutputVector& new_args) const { + INTERNAL_OP_SCOPE(internal_LoraSubgraph_clone_with_new_inputs); check_new_args_count(this, new_args); return std::make_shared(new_args, get_function()->clone()); } void LoraSubgraph::validate_and_infer_types() { + INTERNAL_OP_SCOPE(internal_LoraSubgraph_validate_and_infer_types); OPENVINO_ASSERT(get_input_size() == 5, "LoraSubgraph must have 5 inputs whereas it has ", get_input_size()); OPENVINO_ASSERT(get_output_size() == 1, "LoraSubgraph must have 1 output whereas it has ", get_output_size()); const auto& body = get_function(); diff --git a/src/common/transformations/src/transformations/common_optimizations/lora_subgraph_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/lora_subgraph_fusion.cpp index a0da48a5b16241..366ce00894242e 100644 --- a/src/common/transformations/src/transformations/common_optimizations/lora_subgraph_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/lora_subgraph_fusion.cpp @@ -65,8 +65,8 @@ ov::pass::LoraSubgraphFusion::LoraSubgraphFusion() { // Note: internal_inputs/external_connections order corresponds to LoraSubgraph semantic const std::vector> internal_inputs{ - find_connected_input(add.get_node(), main_flow.get_node()), // For commutative eltwise ops, input idx may be any, so it must be computed + find_connected_input(add.get_node(), main_flow.get_node()), pattern_map.count(transpose1_m) ? pattern_map.at(transpose1_m).get_node()->input(0) : matmul1.get_node()->input(0), matmul1.get_node()->input(1),