openvinotoolkit · vladimir-paramuzov · Mar 8, 2023 · Mar 7, 2023 · Mar 8, 2023 · sshlyapn
@@ -5,7 +5,7 @@
 #pragma once
 
 #include "intel_gpu/primitives/primitive.hpp"
-#include "meta_utils.h"
+#include "intel_gpu/runtime/utils.hpp"
 
 namespace cldnn {
 
@@ -41,6 +41,17 @@ struct fused_primitive_desc {
         return p;
     }
 
+    bool operator==(const fused_primitive_desc& rhs) const {
+        if (total_num_deps != rhs.total_num_deps)
+            return false;
+        if (dep_start_idx != rhs.dep_start_idx)
+            return false;
+
+        return *desc == *rhs.desc;
+    }
+
+    bool operator!=(const fused_primitive_desc& rhs) const { return !(*this == rhs); }
+
     std::shared_ptr<const primitive> desc;
 
     layout input_layout = layout(data_types::f32, format::bfyx, tensor());

@@ -11,8 +11,7 @@
 #include "intel_gpu/runtime/tensor.hpp"
 #include "intel_gpu/primitives/primitive.hpp"
 
-#include "tensor_type.h"
-#include "fused_primitive_desc.h"
+#include "intel_gpu/graph/fused_primitive_desc.hpp"
 
 #include <cstdint>
 #include <string>
@@ -49,7 +48,7 @@ struct kernel_impl_params {
 
     memory::ptr reordered_weights = nullptr;
 
-    kernel_impl_params() {}
+    kernel_impl_params() : prog(nullptr), desc(nullptr), unique_id(0) {}
 
     kernel_impl_params(program& _prog,
                        std::shared_ptr<const primitive> _desc,
@@ -116,6 +115,54 @@ struct kernel_impl_params {
         OPENVINO_ASSERT(prog != nullptr, "[GPU] Program pointer in kernel_impl_params in not initialized");
         return *prog;
     }
+
+    size_t hash() const {
+        size_t seed = desc->hash();
+        const size_t prime_number = 2654435761; // magic number to reduce hash collision rate.
+        for (auto& in : input_layouts) {
+            seed = hash_combine(seed, in.hash() * prime_number);
+        }
+        for (auto& out : output_layouts) {
+            seed = hash_combine(seed, out.hash() * prime_number);
+        }
+
+        // hash for fused prims
+        for (auto& fd : fused_desc) {
+            seed = hash_combine(seed, fd.desc->hash());
+        }
+        return seed;
+    }
+
+    bool operator==(const kernel_impl_params& rhs) const {
+        if (*desc != *rhs.desc)
+            return false;
+
+        if (rhs.input_layouts.size() != input_layouts.size())
+            return false;
+
+        if (rhs.output_layouts.size() != output_layouts.size())
+            return false;
+
+        for (size_t i = 0; i < input_layouts.size(); i++) {
+            if (input_layouts[i] != rhs.input_layouts[i])
+                return false;
+        }
+
+        for (size_t i = 0; i < output_layouts.size(); i++) {
+            if (output_layouts[i] != rhs.output_layouts[i])
+                return false;
+        }
+
+        if (fused_desc.size() != rhs.fused_desc.size())
+            return false;
+
+        for (size_t i = 0; i < rhs.fused_desc.size(); i++) {
+            if (fused_desc[i] != rhs.fused_desc[i])
+                return false;
+        }
+
+        return true;
+    }
 };
 
 }  // namespace cldnn
@@ -8,6 +8,7 @@
 #include "intel_gpu/runtime/stream.hpp"
 #include "intel_gpu/runtime/lru_cache.hpp"
 #include "intel_gpu/runtime/execution_config.hpp"
+#include "intel_gpu/graph/kernel_impl_params.hpp"
 
 #include <list>
 #include <string>
@@ -249,8 +250,14 @@ struct program {
     std::pair<int64_t/*const alloc*/, int64_t/*general alloc*/> get_estimated_device_mem_usage();
 
     void remove_kernel(kernel_id id);
-    void calc_nodes_hash();
 
+    struct ImplHasher {
+        size_t operator()(const kernel_impl_params &k) const {
+            return k.hash();
+        }
+    };
+
+    using ImplementationsCache = cldnn::LruCacheThreadSafe<kernel_impl_params, std::shared_ptr<primitive_impl>, ImplHasher>;
     ImplementationsCache& get_implementations_cache() const { return *_impls_cache; }
     ICompilationContext& get_compilation_context() const { return *_compilation_context; }
     void cancel_compilation_context();

@@ -18,7 +18,7 @@ namespace cldnn {
 struct primitive_impl;
 
 /// @brief LRU cache which remove the least recently used data when cache is full.
-template<typename Key, typename Value>
+template<typename Key, typename Value, typename KeyHasher = std::hash<Key>>
 class LruCache {
 public:
     using data_type = std::pair<Key, Value>;
@@ -141,7 +141,7 @@ class LruCache {
     using lru_data_list_iter = typename lru_data_list_type::iterator;
 
     std::list<data_type> _lru_data_list;
-    std::unordered_map<Key, lru_data_list_iter> _key_map;
+    std::unordered_map<Key, lru_data_list_iter, KeyHasher> _key_map;
     const size_t _capacity;
 
     /**
@@ -168,11 +168,13 @@ class LruCache {
 
 using KernelsCache = cldnn::LruCache<size_t, cldnn::kernel::ptr>;
 
-template<typename Key, typename Value>
-class LruCacheThreadSafe : LruCache<Key, Value> {
+template<typename Key, typename Value, typename KeyHasher = std::hash<Key>>
+class LruCacheThreadSafe : public LruCache<Key, Value, KeyHasher> {
 public:
-    using parent = LruCache<Key, Value>;
-    using FuncRemoveItem = std::function<void(std::pair<Key, Value>&)>;
+    using parent = LruCache<Key, Value, KeyHasher>;
+    using ItemType = std::pair<Key, Value>;
+    using FuncRemoveItem = std::function<void(ItemType&)>;
+    using parent::parent;
 
     explicit LruCacheThreadSafe(size_t caps) : parent(caps) { }
 
@@ -205,7 +207,4 @@ class LruCacheThreadSafe : LruCache<Key, Value> {
     mutable std::mutex _mutex;
 };
 
-
-using ImplementationsCache = cldnn::LruCacheThreadSafe<size_t, std::shared_ptr<primitive_impl>>;
-
 }  // namespace cldnn
@@ -47,6 +47,13 @@ struct all : public std::true_type {};
 template <bool Val, bool... Values>
 struct all<Val, Values...> : public std::integral_constant<bool, Val && all<Values...>::value> {};
 
+template <class T>
+struct is_primitive
+    : public std::integral_constant<bool,
+                                    std::is_base_of<primitive, T>::value &&
+                                        !std::is_same<primitive, typename std::remove_cv<T>::type>::value &&
+                                        std::is_same<T, typename std::remove_cv<T>::type>::value> {};
+
 }  // namespace meta
 
 /// @cond CPP_HELPERS

@@ -5,6 +5,8 @@
 #pragma once
 
 #include "intel_gpu/graph/serialization/binary_buffer.hpp"
+#include "intel_gpu/graph/kernel_impl_params.hpp"
+#include "intel_gpu/graph/fused_primitive_desc.hpp"
 #include "intel_gpu/runtime/engine.hpp"
 #include "intel_gpu/runtime/utils.hpp"
 #include "intel_gpu/runtime/tensor.hpp"
@@ -15,9 +17,7 @@
 
 #include "kernel_selector_params.h"
 #include "kernel_selector_common.h"
-#include "kernel_impl_params.hpp"
 #include "tensor_type.h"
-#include "fused_primitive_desc.h"
 
 #include <cstdint>
 #include <string>

@@ -5,7 +5,7 @@
 #pragma once
 
 #include "intel_gpu/primitives/implementation_desc.hpp"
-#include "kernel_impl_params.hpp"
+#include "intel_gpu/graph/kernel_impl_params.hpp"
 
 #include <functional>
 #include <map>

@@ -8,7 +8,7 @@
 #include "intel_gpu/runtime/event.hpp"
 #include "intel_gpu/runtime/memory.hpp"
 #include "intel_gpu/graph/network.hpp"
-#include "meta_utils.h"
+#include "intel_gpu/runtime/utils.hpp"
 #include "program_node.h"
 #include "primitive_type.h"
 #include "intel_gpu/graph/serialization/binary_buffer.hpp"
@@ -219,9 +219,6 @@ class primitive_inst {
 
     virtual void update_output_memory() {}
 
-    virtual size_t get_impl_key(const kernel_impl_params& params) const;
-    virtual size_t get_impl_key() const;
-
 protected:
     primitive_inst(network& network, program_node const& node, bool allocate_memory);
 

@@ -6,7 +6,7 @@
 
 #include "intel_gpu/runtime/layout.hpp"
 #include "intel_gpu/runtime/memory.hpp"
-#include "kernel_impl_params.hpp"
+#include "intel_gpu/graph/kernel_impl_params.hpp"
 
 #include <memory>
 #include <string>

@@ -8,7 +8,7 @@
 #include "intel_gpu/runtime/layout.hpp"
 #include "intel_gpu/runtime/debug_configuration.hpp"
 
-#include "meta_utils.h"
+#include "intel_gpu/runtime/utils.hpp"
 #include "primitive_type.h"
 #include "program_node.h"
 #include "primitive_inst.h"

@@ -9,9 +9,9 @@
 #include "intel_gpu/primitives/implementation_desc.hpp"
 #include "intel_gpu/graph/program.hpp"
 
-#include "fused_primitive_desc.h"
-#include "kernel_impl_params.hpp"
-#include "meta_utils.h"
+#include "intel_gpu/graph/fused_primitive_desc.hpp"
+#include "intel_gpu/graph/kernel_impl_params.hpp"
+#include "intel_gpu/runtime/utils.hpp"
 
 #include <set>
 #include <array>
@@ -386,10 +386,6 @@ struct program_node {
     void set_preferred_input_fmt(size_t idx, format::type type);
     void set_preferred_output_fmt(size_t idx, format::type type);
 
-    virtual void calculate_hash() {}
-
-    size_t get_hash() const { return seed; }
-
 protected:
     size_t unique_id = 0;
     static thread_local size_t cur_id;
@@ -430,8 +426,6 @@ struct program_node {
 
     void invalidate_users() const;
 
-    size_t seed = 0;
-
 private:
 #ifdef ENABLE_ONEDNN_FOR_GPU
     std::vector<fused_primitive_desc_onednn> fused_prims_onednn;
@@ -475,16 +469,6 @@ struct typed_program_node_base : public program_node {
         return std::static_pointer_cast<const PType>(program_node::get_primitive());
     }
 
-    void calculate_hash() override {
-        // hash for primitive
-        seed = get_primitive()->hash();
-
-        // hash for fused prims
-        for (auto& prim : fused_prims) {
-            seed = hash_combine(seed, prim.desc->hash());
-        }
-    }
-
 protected:
     std::shared_ptr<PType> typed_desc() const { return std::static_pointer_cast<PType>(desc); }
 };

@@ -10,7 +10,7 @@
 #include "openvino/core/coordinate_diff.hpp"
 #include "openvino/core/strides.hpp"
 
-#include "meta_utils.h"
+#include "intel_gpu/runtime/utils.hpp"
 
 #include <algorithm>
 #include <cassert>