From 2f6fbd7c8c2897be4ae4ba7e3e61b49e5bfbf7d1 Mon Sep 17 00:00:00 2001 From: Kelvin Choi Date: Wed, 15 Mar 2023 20:56:01 +0900 Subject: [PATCH] [GPU] Need to exclude fused mem_dep from shape_infer_dep --- .../src/graph/include/program_node.h | 4 +++- .../intel_gpu/src/graph/primitive_inst.cpp | 6 +++++- .../intel_gpu/src/graph/program_node.cpp | 20 ++++++++++++++++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index 1f72b49bd8b5d1..845936ed18d0f8 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -99,7 +99,9 @@ struct program_node { return false; } - std::map get_const_memory_deps() const; + bool is_fused_dep(size_t dep_idx) const; + + std::map get_const_memory_deps(bool exclude_fused_dep = false) const; virtual std::unique_ptr get_kernel_impl_params() const { return get_kernel_impl_params(get_input_layouts(), output_layouts); diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 29b97f400be713..ad1e2e61b5b3e5 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -165,7 +165,7 @@ void primitive_inst::update_shape() { set_shape_change(); // Even though the predecessors' shapes are not changed, the output shape might be udpated by the mem_dep - auto memory_deps = _node->get_const_memory_deps(); + auto memory_deps = _node->get_const_memory_deps(true); for (auto& i : _node->get_shape_infer_dependencies()) { if (memory_deps.count(i) > 0) { continue; @@ -190,6 +190,10 @@ void primitive_inst::update_shape() { } auto& dep = _node->get_dependency(i); auto dep_id = dep.id(); + // exclude fused node from memory_deps + if (_node->is_fused_dep(i)) { + break; + } // Events may be not created for in-order queue, so take them for OOO queue only if (_network.has_event(dep.id()) && queue_type == QueueTypes::out_of_order) { dependencies_events.push_back(_network.get_primitive_event(dep_id)); diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 140562e8d9354d..7f7508116ad28d 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -378,13 +378,31 @@ bool program_node::has_padded_dependency() const { }); } -std::map program_node::get_const_memory_deps() const { +bool program_node::is_fused_dep(size_t dep_idx) const { + for (auto fused : get_fused_primitives()) { + if (dep_idx >= fused.dep_start_idx) { + return true; + } + } + + return false; +} + +std::map program_node::get_const_memory_deps(bool exclude_fused_dep) const { std::map mem_deps; for (auto& i : get_shape_infer_dependencies()) { // Some primitives may have flexible count of deps (e.g. reshape), thus allow skipping some deps if (i >= get_dependencies().size()) continue; + // exclude fused dependency + if (exclude_fused_dep) { + if (is_fused_dep(i)) { + continue; + } + } + + // constant type only auto& dep = get_dependency(i); if (dep.is_type()) { mem_deps.insert({i, dep.as().get_attached_memory_ptr()});