diff --git a/src/plugins/intel_gpu/src/graph/include/fused_primitive_desc.h b/src/plugins/intel_gpu/include/intel_gpu/graph/fused_primitive_desc.hpp similarity index 91% rename from src/plugins/intel_gpu/src/graph/include/fused_primitive_desc.h rename to src/plugins/intel_gpu/include/intel_gpu/graph/fused_primitive_desc.hpp index d00da8e567bc6b..538378a8b747c7 100644 --- a/src/plugins/intel_gpu/src/graph/include/fused_primitive_desc.h +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/fused_primitive_desc.hpp @@ -5,7 +5,7 @@ #pragma once #include "intel_gpu/primitives/primitive.hpp" -#include "meta_utils.h" +#include "intel_gpu/runtime/utils.hpp" namespace cldnn { @@ -41,6 +41,17 @@ struct fused_primitive_desc { return p; } + bool operator==(const fused_primitive_desc& rhs) const { + if (total_num_deps != rhs.total_num_deps) + return false; + if (dep_start_idx != rhs.dep_start_idx) + return false; + + return *desc == *rhs.desc; + } + + bool operator!=(const fused_primitive_desc& rhs) const { return !(*this == rhs); } + std::shared_ptr desc; layout input_layout = layout(data_types::f32, format::bfyx, tensor()); diff --git a/src/plugins/intel_gpu/src/graph/include/kernel_impl_params.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp similarity index 72% rename from src/plugins/intel_gpu/src/graph/include/kernel_impl_params.hpp rename to src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp index 40cbc5095886c2..796ab93a4b867e 100644 --- a/src/plugins/intel_gpu/src/graph/include/kernel_impl_params.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp @@ -11,8 +11,7 @@ #include "intel_gpu/runtime/tensor.hpp" #include "intel_gpu/primitives/primitive.hpp" -#include "tensor_type.h" -#include "fused_primitive_desc.h" +#include "intel_gpu/graph/fused_primitive_desc.hpp" #include #include @@ -49,7 +48,7 @@ struct kernel_impl_params { memory::ptr reordered_weights = nullptr; - kernel_impl_params() {} + kernel_impl_params() : prog(nullptr), desc(nullptr), unique_id(0) {} kernel_impl_params(program& _prog, std::shared_ptr _desc, @@ -116,6 +115,54 @@ struct kernel_impl_params { OPENVINO_ASSERT(prog != nullptr, "[GPU] Program pointer in kernel_impl_params in not initialized"); return *prog; } + + size_t hash() const { + size_t seed = desc->hash(); + const size_t prime_number = 2654435761; // magic number to reduce hash collision rate. + for (auto& in : input_layouts) { + seed = hash_combine(seed, in.hash() * prime_number); + } + for (auto& out : output_layouts) { + seed = hash_combine(seed, out.hash() * prime_number); + } + + // hash for fused prims + for (auto& fd : fused_desc) { + seed = hash_combine(seed, fd.desc->hash()); + } + return seed; + } + + bool operator==(const kernel_impl_params& rhs) const { + if (*desc != *rhs.desc) + return false; + + if (rhs.input_layouts.size() != input_layouts.size()) + return false; + + if (rhs.output_layouts.size() != output_layouts.size()) + return false; + + for (size_t i = 0; i < input_layouts.size(); i++) { + if (input_layouts[i] != rhs.input_layouts[i]) + return false; + } + + for (size_t i = 0; i < output_layouts.size(); i++) { + if (output_layouts[i] != rhs.output_layouts[i]) + return false; + } + + if (fused_desc.size() != rhs.fused_desc.size()) + return false; + + for (size_t i = 0; i < rhs.fused_desc.size(); i++) { + if (fused_desc[i] != rhs.fused_desc[i]) + return false; + } + + return true; + } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index e9f4254ed72508..b30aafd6977742 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -8,6 +8,7 @@ #include "intel_gpu/runtime/stream.hpp" #include "intel_gpu/runtime/lru_cache.hpp" #include "intel_gpu/runtime/execution_config.hpp" +#include "intel_gpu/graph/kernel_impl_params.hpp" #include #include @@ -249,8 +250,14 @@ struct program { std::pair get_estimated_device_mem_usage(); void remove_kernel(kernel_id id); - void calc_nodes_hash(); + struct ImplHasher { + size_t operator()(const kernel_impl_params &k) const { + return k.hash(); + } + }; + + using ImplementationsCache = cldnn::LruCacheThreadSafe, ImplHasher>; ImplementationsCache& get_implementations_cache() const { return *_impls_cache; } ICompilationContext& get_compilation_context() const { return *_compilation_context; } void cancel_compilation_context(); diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp index 81e0dbcf774ee7..6ef82e4421f18d 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp @@ -18,7 +18,7 @@ namespace cldnn { struct primitive_impl; /// @brief LRU cache which remove the least recently used data when cache is full. -template +template> class LruCache { public: using data_type = std::pair; @@ -141,7 +141,7 @@ class LruCache { using lru_data_list_iter = typename lru_data_list_type::iterator; std::list _lru_data_list; - std::unordered_map _key_map; + std::unordered_map _key_map; const size_t _capacity; /** @@ -168,11 +168,13 @@ class LruCache { using KernelsCache = cldnn::LruCache; -template -class LruCacheThreadSafe : LruCache { +template> +class LruCacheThreadSafe : public LruCache { public: - using parent = LruCache; - using FuncRemoveItem = std::function&)>; + using parent = LruCache; + using ItemType = std::pair; + using FuncRemoveItem = std::function; + using parent::parent; explicit LruCacheThreadSafe(size_t caps) : parent(caps) { } @@ -205,7 +207,4 @@ class LruCacheThreadSafe : LruCache { mutable std::mutex _mutex; }; - -using ImplementationsCache = cldnn::LruCacheThreadSafe>; - } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/utils.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/utils.hpp index 9a72338b727320..69347683064d20 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/utils.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/utils.hpp @@ -47,6 +47,13 @@ struct all : public std::true_type {}; template struct all : public std::integral_constant::value> {}; +template +struct is_primitive + : public std::integral_constant::value && + !std::is_same::type>::value && + std::is_same::type>::value> {}; + } // namespace meta /// @cond CPP_HELPERS diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h index e43363e7b9b8c5..a5885860e5ca4b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h @@ -5,6 +5,8 @@ #pragma once #include "intel_gpu/graph/serialization/binary_buffer.hpp" +#include "intel_gpu/graph/kernel_impl_params.hpp" +#include "intel_gpu/graph/fused_primitive_desc.hpp" #include "intel_gpu/runtime/engine.hpp" #include "intel_gpu/runtime/utils.hpp" #include "intel_gpu/runtime/tensor.hpp" @@ -15,9 +17,7 @@ #include "kernel_selector_params.h" #include "kernel_selector_common.h" -#include "kernel_impl_params.hpp" #include "tensor_type.h" -#include "fused_primitive_desc.h" #include #include diff --git a/src/plugins/intel_gpu/src/graph/include/implementation_map.hpp b/src/plugins/intel_gpu/src/graph/include/implementation_map.hpp index 1282b0e05bbc87..1ca0415ad96866 100644 --- a/src/plugins/intel_gpu/src/graph/include/implementation_map.hpp +++ b/src/plugins/intel_gpu/src/graph/include/implementation_map.hpp @@ -5,7 +5,7 @@ #pragma once #include "intel_gpu/primitives/implementation_desc.hpp" -#include "kernel_impl_params.hpp" +#include "intel_gpu/graph/kernel_impl_params.hpp" #include #include diff --git a/src/plugins/intel_gpu/src/graph/include/meta_utils.h b/src/plugins/intel_gpu/src/graph/include/meta_utils.h deleted file mode 100644 index 89c859b49913ae..00000000000000 --- a/src/plugins/intel_gpu/src/graph/include/meta_utils.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "intel_gpu/runtime/utils.hpp" - -#include - -namespace cldnn { - -struct primitive; - -namespace meta { - -template -struct is_primitive - : public std::integral_constant::value && - !std::is_same::type>::value && - std::is_same::type>::value> {}; - - -} // namespace meta -} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h index 50ab0b037cea0e..8cb23f520f375a 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h @@ -8,7 +8,7 @@ #include "intel_gpu/runtime/event.hpp" #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/graph/network.hpp" -#include "meta_utils.h" +#include "intel_gpu/runtime/utils.hpp" #include "program_node.h" #include "primitive_type.h" #include "intel_gpu/graph/serialization/binary_buffer.hpp" @@ -219,9 +219,6 @@ class primitive_inst { virtual void update_output_memory() {} - virtual size_t get_impl_key(const kernel_impl_params& params) const; - virtual size_t get_impl_key() const; - protected: primitive_inst(network& network, program_node const& node, bool allocate_memory); diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_type.h b/src/plugins/intel_gpu/src/graph/include/primitive_type.h index 475da762b5a5bc..2803e74a92bedd 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_type.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_type.h @@ -6,7 +6,7 @@ #include "intel_gpu/runtime/layout.hpp" #include "intel_gpu/runtime/memory.hpp" -#include "kernel_impl_params.hpp" +#include "intel_gpu/graph/kernel_impl_params.hpp" #include #include diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h b/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h index 7e859f65ac5816..20cda5d157e98f 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h @@ -8,7 +8,7 @@ #include "intel_gpu/runtime/layout.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" -#include "meta_utils.h" +#include "intel_gpu/runtime/utils.hpp" #include "primitive_type.h" #include "program_node.h" #include "primitive_inst.h" diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index 0526694d07d782..1f72b49bd8b5d1 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -9,9 +9,9 @@ #include "intel_gpu/primitives/implementation_desc.hpp" #include "intel_gpu/graph/program.hpp" -#include "fused_primitive_desc.h" -#include "kernel_impl_params.hpp" -#include "meta_utils.h" +#include "intel_gpu/graph/fused_primitive_desc.hpp" +#include "intel_gpu/graph/kernel_impl_params.hpp" +#include "intel_gpu/runtime/utils.hpp" #include #include @@ -386,10 +386,6 @@ struct program_node { void set_preferred_input_fmt(size_t idx, format::type type); void set_preferred_output_fmt(size_t idx, format::type type); - virtual void calculate_hash() {} - - size_t get_hash() const { return seed; } - protected: size_t unique_id = 0; static thread_local size_t cur_id; @@ -430,8 +426,6 @@ struct program_node { void invalidate_users() const; - size_t seed = 0; - private: #ifdef ENABLE_ONEDNN_FOR_GPU std::vector fused_prims_onednn; @@ -475,16 +469,6 @@ struct typed_program_node_base : public program_node { return std::static_pointer_cast(program_node::get_primitive()); } - void calculate_hash() override { - // hash for primitive - seed = get_primitive()->hash(); - - // hash for fused prims - for (auto& prim : fused_prims) { - seed = hash_combine(seed, prim.desc->hash()); - } - } - protected: std::shared_ptr typed_desc() const { return std::static_pointer_cast(desc); } }; diff --git a/src/plugins/intel_gpu/src/graph/include/sliding_window_utils.hpp b/src/plugins/intel_gpu/src/graph/include/sliding_window_utils.hpp index e39007e7060a61..6899ef9bc3c2ef 100644 --- a/src/plugins/intel_gpu/src/graph/include/sliding_window_utils.hpp +++ b/src/plugins/intel_gpu/src/graph/include/sliding_window_utils.hpp @@ -10,7 +10,7 @@ #include "openvino/core/coordinate_diff.hpp" #include "openvino/core/strides.hpp" -#include "meta_utils.h" +#include "intel_gpu/runtime/utils.hpp" #include #include diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index f072f798facbe2..185d3cfe5f7270 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -323,13 +323,12 @@ bool primitive_inst::update_impl() { if (!_node->is_type() && !(_node->is_type() && _node->get_dependencies().empty())) { // Update param if fake_alignment is available auto updated_params = _node->type()->get_fake_aligned_params(*_impl_params); - auto impl_key = get_impl_key(updated_params); auto& cache = get_network().get_program()->get_implementations_cache(); - bool has_cached_impl = false; + std::shared_ptr cached_impl = nullptr; { - has_cached_impl = cache.has(impl_key); - if (has_cached_impl) { - _impl = cache.get(impl_key)->clone(); + cached_impl = cache.get(updated_params); + if (cached_impl) { + _impl = cached_impl->clone(); GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(true); GPU_DEBUG_TRACE_DETAIL << id() << ": get impl from cache " << _impl->get_kernel_name() << std::endl; // impl is not replaced @@ -337,10 +336,10 @@ bool primitive_inst::update_impl() { return false; } } - if (!has_cached_impl) { + if (!cached_impl) { if (_dynamic_impl) { auto& compilation_context = get_network().get_program()->get_compilation_context(); - compilation_context.push_task(impl_key, [this, &compilation_context, updated_params, impl_key]() { + compilation_context.push_task(updated_params.hash(), [this, &compilation_context, updated_params]() { if (compilation_context.is_stopped()) return; auto _program = get_network().get_program(); @@ -348,14 +347,14 @@ bool primitive_inst::update_impl() { { // Check existense in the cache one more time as several iterations of model execution could happens and multiple compilation // tasks created for same shapes - if (cache.has(impl_key)) + if (cache.has(updated_params)) return; } auto impl = _node->type()->choose_impl(*_node, updated_params); auto kernels = _program->get_kernels_cache().compile(impl->get_kernels_source()); impl->set_kernels(kernels); - cache.add(impl_key, impl->clone()); + cache.add(updated_params, impl->clone()); }); _impl = _dynamic_impl->clone(); _impl->update_dispatch_data(*_impl_params); @@ -366,7 +365,7 @@ bool primitive_inst::update_impl() { auto& kernels_cache = get_network().get_program()->get_kernels_cache(); auto kernels = kernels_cache.compile(_impl->get_kernels_source()); _impl->set_kernels(kernels); - cache.add(impl_key, _impl->clone()); + cache.add(updated_params, _impl->clone()); auto new_impl_str = _impl != nullptr ? _impl->get_kernel_name() : "nullptr"; GPU_DEBUG_TRACE_DETAIL << id() << ": update impl from " << prev_impl_str << " to " << new_impl_str << std::endl; @@ -685,7 +684,7 @@ event::ptr primitive_inst::update_weights() { auto& engine = _network.get_engine(); auto get_kernel_key = [&]() -> size_t { - auto seed = _node->get_hash(); + auto seed = _node->get_primitive()->hash(); seed = hash_combine(seed, expected_layout.hash()); seed = hash_combine(seed, original_layout.hash()); return seed; @@ -1268,20 +1267,4 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) { } } -size_t primitive_inst::get_impl_key(const kernel_impl_params& params) const { - size_t seed = _node->get_hash(); - const size_t prime_number = 2654435761; // magic number to avoid hash collision. - for (auto& in : params.input_layouts) { - seed = hash_combine(seed, in.hash() * prime_number); - } - for (auto& out : params.output_layouts) { - seed = hash_combine(seed, out.hash() * prime_number); - } - return seed; -} - -size_t primitive_inst::get_impl_key() const { - auto updated_params = _node->type()->get_fake_aligned_params(*_impl_params); - return get_impl_key(updated_params); -} } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index b429b7c20ed616..d40ed83e763f47 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -124,7 +124,6 @@ program::program(engine& engine_ref, } else { build_program(is_internal); } - calc_nodes_hash(); } program::program(engine& engine_ref, @@ -142,7 +141,6 @@ program::program(engine& engine_ref, init_program(); prepare_nodes(nodes); build_program(is_internal); - calc_nodes_hash(); } program::program(engine& engine) @@ -150,6 +148,7 @@ program::program(engine& engine) _stream(_engine.create_stream({})), _config(), processing_order() { + init_primitives(); _config.apply_user_properties(_engine.get_device_info()); } @@ -171,8 +170,8 @@ void program::init_program() { _impls_cache = cldnn::make_unique(_impls_cache_capacity); // Remove items of compilation context's internal queue when some impl is popped in kernels_cache // compilation context's queue check duplication of inserted task - _impls_cache->set_remove_item_callback([this](std::pair>& item) { - get_compilation_context().remove_keys({item.first}); + _impls_cache->set_remove_item_callback([this](ImplementationsCache::ItemType& item) { + get_compilation_context().remove_keys({item.first.hash()}); }); } @@ -504,12 +503,6 @@ void program::set_options() { } } -void program::calc_nodes_hash() { - for (auto& node : processing_order) { - node->calculate_hash(); - } -} - void program::build_program(bool is_internal) { init_graph(); { pre_optimize_graph(is_internal); } diff --git a/src/plugins/intel_gpu/tests/test_cases/hash_key_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/hash_key_gpu_test.cpp index 39eecb5b28a957..154ca0f08eb019 100644 --- a/src/plugins/intel_gpu/tests/test_cases/hash_key_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/hash_key_gpu_test.cpp @@ -42,12 +42,10 @@ TEST(check_hash_value, eltwise_basic) { const auto& prog_node = net.get_program()->get_node(key_prim_id); const auto primitive_hash = primitve->hash(); - const auto prog_node_hash = prog_node.get_hash(); - const auto prim_inst_hash = prim_inst->get_impl_key(); + const auto params_hash = prog_node.get_kernel_impl_params()->hash(); ASSERT_EQ(primitive_hash, 11385140218618178073UL); - ASSERT_EQ(prog_node_hash, 11385140218618178073UL); - ASSERT_EQ(prim_inst_hash, 10460622021476296271UL); + ASSERT_EQ(params_hash, 10460622021476296271UL); } TEST(check_hash_value, fc_basic) { @@ -74,12 +72,10 @@ TEST(check_hash_value, fc_basic) { const auto& prog_node = net.get_program()->get_node(key_prim_id); const auto primitive_hash = primitve->hash(); - const auto prog_node_hash = prog_node.get_hash(); - const auto prim_inst_hash = prim_inst->get_impl_key(); + const auto params_hash = prog_node.type()->get_fake_aligned_params(*prog_node.get_kernel_impl_params()).hash(); ASSERT_EQ(primitive_hash, 7881065839556591629UL); - ASSERT_EQ(prog_node_hash, 7881065839556591629UL); - ASSERT_EQ(prim_inst_hash, 12327057149074647711UL); + ASSERT_EQ(params_hash, 12327057149074647711UL); } TEST(check_hash_value, gather_basic) { @@ -107,12 +103,10 @@ TEST(check_hash_value, gather_basic) { const auto& prog_node = net.get_program()->get_node(key_prim_id); const auto primitive_hash = primitve->hash(); - const auto prog_node_hash = prog_node.get_hash(); - const auto prim_inst_hash = prim_inst->get_impl_key(); + const auto params_hash = prog_node.get_kernel_impl_params()->hash(); ASSERT_EQ(primitive_hash, 93320679543770233UL); - ASSERT_EQ(prog_node_hash, 93320679543770233UL); - ASSERT_EQ(prim_inst_hash, 18126277300376770566UL); + ASSERT_EQ(params_hash, 18126277300376770566UL); } TEST(check_hash_value, gemm_basic) { @@ -135,12 +129,10 @@ TEST(check_hash_value, gemm_basic) { const auto& prog_node = net.get_program()->get_node(key_prim_id); const auto primitive_hash = primitve->hash(); - const auto prog_node_hash = prog_node.get_hash(); - const auto prim_inst_hash = prim_inst->get_impl_key(); + const auto params_hash = prog_node.get_kernel_impl_params()->hash(); ASSERT_EQ(primitive_hash, 8009877756431655269UL); - ASSERT_EQ(prog_node_hash, 8009877756431655269UL); - ASSERT_EQ(prim_inst_hash, 2966249915421110547UL); + ASSERT_EQ(params_hash, 2966249915421110547UL); } TEST(check_hash_value, permute_basic) { @@ -160,12 +152,10 @@ TEST(check_hash_value, permute_basic) { const auto& prog_node = net.get_program()->get_node(key_prim_id); const auto primitive_hash = primitve->hash(); - const auto prog_node_hash = prog_node.get_hash(); - const auto prim_inst_hash = prim_inst->get_impl_key(); + const auto params_hash = prog_node.get_kernel_impl_params()->hash(); ASSERT_EQ(primitive_hash, 4658575237077439700UL); - ASSERT_EQ(prog_node_hash, 4658575237077439700UL); - ASSERT_EQ(prim_inst_hash, 4319508487906266226UL); + ASSERT_EQ(params_hash, 4319508487906266226UL); } TEST(check_hash_value, reorder_basic) { @@ -191,12 +181,10 @@ TEST(check_hash_value, reorder_basic) { const auto& prog_node = net.get_program()->get_node(key_prim_id); const auto primitive_hash = primitve->hash(); - const auto prog_node_hash = prog_node.get_hash(); - const auto prim_inst_hash = prim_inst->get_impl_key(); + const auto params_hash = prog_node.get_kernel_impl_params()->hash(); ASSERT_EQ(primitive_hash, 16293979194373117693UL); - ASSERT_EQ(prog_node_hash, 16293979194373117693UL); - ASSERT_EQ(prim_inst_hash, 1719378641386629286UL); + ASSERT_EQ(params_hash, 1719378641386629286UL); } TEST(check_hash_value, reshape_basic) { @@ -219,12 +207,10 @@ TEST(check_hash_value, reshape_basic) { const auto& prog_node = net.get_program()->get_node(key_prim_id); const auto primitive_hash = primitve->hash(); - const auto prog_node_hash = prog_node.get_hash(); - const auto prim_inst_hash = prim_inst->get_impl_key(); + const auto params_hash = prog_node.get_kernel_impl_params()->hash(); ASSERT_EQ(primitive_hash, 1534749073560581535UL); - ASSERT_EQ(prog_node_hash, 1534749073560581535UL); - ASSERT_EQ(prim_inst_hash, 1686780870642992006UL); + ASSERT_EQ(params_hash, 1686780870642992006UL); } TEST(check_hash_value, conv_basic) { @@ -248,12 +234,10 @@ TEST(check_hash_value, conv_basic) { const auto& prog_node = net.get_program()->get_node(key_prim_id); const auto primitive_hash = primitve->hash(); - const auto prog_node_hash = prog_node.get_hash(); - const auto prim_inst_hash = prim_inst->get_impl_key(); + const auto params_hash = prog_node.get_kernel_impl_params()->hash(); ASSERT_EQ(primitive_hash, 14591385718963138714UL); - ASSERT_EQ(prog_node_hash, 14591385718963138714UL); - ASSERT_EQ(prim_inst_hash, 6876197578014654797UL); + ASSERT_EQ(params_hash, 6876197578014654797UL); } TEST(check_hash_value, quantize_basic) { @@ -283,10 +267,8 @@ TEST(check_hash_value, quantize_basic) { const auto& prog_node = net.get_program()->get_node(key_prim_id); const auto primitive_hash = primitve->hash(); - const auto prog_node_hash = prog_node.get_hash(); - const auto prim_inst_hash = prim_inst->get_impl_key(); + const auto params_hash = prog_node.get_kernel_impl_params()->hash(); ASSERT_EQ(primitive_hash, 4135863035456568493UL); - ASSERT_EQ(prog_node_hash, 4135863035456568493UL); - ASSERT_EQ(prim_inst_hash, 13898649554943348250UL); + ASSERT_EQ(params_hash, 13898649554943348250UL); } diff --git a/src/plugins/intel_gpu/tests/test_cases/lru_caches_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/lru_caches_gpu_test.cpp index 22ff33c3fd5c0c..8016ccddad6a3e 100644 --- a/src/plugins/intel_gpu/tests/test_cases/lru_caches_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/lru_caches_gpu_test.cpp @@ -3,15 +3,16 @@ // #include "test_utils.h" +#include "program_wrapper.h" #include "intel_gpu/runtime/lru_cache.hpp" +#include "shape_of_inst.h" + #include using namespace cldnn; using namespace ::tests; - - TEST(lru_cache, basic_data_type) { const size_t cap = 4; @@ -122,3 +123,58 @@ TEST(lru_cache, custom_data_type) { ASSERT_EQ(key, expected_keys[idx--]); } } + +namespace { +struct ImplHasher { + size_t operator()(const kernel_impl_params &k) const { + return k.hash(); + } +}; +} // namespace + +TEST(lru_cache, collisions) { + auto l1 = layout{{8, 3, 131, 384}, data_types::f32, format::bfyx}; + auto l2 = layout{{8, 5, 1, 384}, data_types::f32, format::bfyx}; + auto input1_prim = std::make_shared("input1", l1); + auto input2_prim = std::make_shared("input2", l2); + auto shape_of1_prim = std::make_shared("shape_of1", input_info("input1"), 4, data_types::i64); + auto shape_of2_prim = std::make_shared("shape_of2", input_info("input2"), 4, data_types::i64); + + using ImplementationsCache = cldnn::LruCacheThreadSafe, ImplHasher>; + ImplementationsCache cache(0); + + program prog(get_test_engine()); + auto& input1_node = prog.get_or_create(input1_prim); + auto& input2_node = prog.get_or_create(input2_prim); + auto& shape_of1_node = prog.get_or_create(shape_of1_prim); + auto& shape_of2_node = prog.get_or_create(shape_of2_prim); + program_wrapper::add_connection(prog, input1_node, shape_of1_node); + program_wrapper::add_connection(prog, input2_node, shape_of2_node); + + auto params1 = *shape_of1_node.get_kernel_impl_params(); + auto params2 = *shape_of1_node.get_kernel_impl_params(); + + auto out_layouts1 = shape_of_inst::calc_output_layouts(shape_of1_node, params1); + auto out_layouts2 = shape_of_inst::calc_output_layouts(shape_of2_node, params2); + + shape_of1_node.set_output_layouts(out_layouts1); + shape_of2_node.set_output_layouts(out_layouts2); + + shape_of1_node.set_preferred_impl_type(impl_types::ocl); + shape_of2_node.set_preferred_impl_type(impl_types::ocl); + + auto impl1 = shape_of1_node.type()->choose_impl(shape_of1_node); + auto impl2 = shape_of2_node.type()->choose_impl(shape_of2_node); + + // Ensure that hashes for primitive, input layouts and full impl params are same due to collision + ASSERT_EQ(shape_of1_prim->hash(), shape_of2_prim->hash()); + ASSERT_EQ(l1.hash(), l2.hash()); + ASSERT_EQ(shape_of1_node.get_kernel_impl_params()->hash(), shape_of2_node.get_kernel_impl_params()->hash()); + ASSERT_FALSE(shape_of1_node.get_kernel_impl_params() == shape_of2_node.get_kernel_impl_params()); + + cache.add(*shape_of1_node.get_kernel_impl_params(), impl1->clone()); + cache.add(*shape_of2_node.get_kernel_impl_params(), impl2->clone()); + + // But cache still contains both entries, as input layouts are differenet - thus kernels are different + ASSERT_EQ(cache.size(), 2); +}