Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] Changed impls cache key type to avoid hash collisions #16130

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#pragma once

#include "intel_gpu/primitives/primitive.hpp"
#include "meta_utils.h"
#include "intel_gpu/runtime/utils.hpp"

namespace cldnn {

Expand Down Expand Up @@ -41,6 +41,17 @@ struct fused_primitive_desc {
return p;
}

bool operator==(const fused_primitive_desc& rhs) const {
if (total_num_deps != rhs.total_num_deps)
return false;
if (dep_start_idx != rhs.dep_start_idx)
return false;

return *desc == *rhs.desc;
}

bool operator!=(const fused_primitive_desc& rhs) const { return !(*this == rhs); }

std::shared_ptr<const primitive> desc;

layout input_layout = layout(data_types::f32, format::bfyx, tensor());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
#include "intel_gpu/runtime/tensor.hpp"
#include "intel_gpu/primitives/primitive.hpp"

#include "tensor_type.h"
#include "fused_primitive_desc.h"
#include "intel_gpu/graph/fused_primitive_desc.hpp"

#include <cstdint>
#include <string>
Expand Down Expand Up @@ -49,7 +48,7 @@ struct kernel_impl_params {

memory::ptr reordered_weights = nullptr;

kernel_impl_params() {}
kernel_impl_params() : prog(nullptr), desc(nullptr), unique_id(0) {}

kernel_impl_params(program& _prog,
std::shared_ptr<const primitive> _desc,
Expand Down Expand Up @@ -116,6 +115,54 @@ struct kernel_impl_params {
OPENVINO_ASSERT(prog != nullptr, "[GPU] Program pointer in kernel_impl_params in not initialized");
return *prog;
}

size_t hash() const {
size_t seed = desc->hash();
const size_t prime_number = 2654435761; // magic number to reduce hash collision rate.
for (auto& in : input_layouts) {
seed = hash_combine(seed, in.hash() * prime_number);
}
for (auto& out : output_layouts) {
seed = hash_combine(seed, out.hash() * prime_number);
}

// hash for fused prims
for (auto& fd : fused_desc) {
seed = hash_combine(seed, fd.desc->hash());
}
return seed;
}

bool operator==(const kernel_impl_params& rhs) const {
if (*desc != *rhs.desc)
return false;

if (rhs.input_layouts.size() != input_layouts.size())
return false;

if (rhs.output_layouts.size() != output_layouts.size())
return false;

for (size_t i = 0; i < input_layouts.size(); i++) {
if (input_layouts[i] != rhs.input_layouts[i])
return false;
}

for (size_t i = 0; i < output_layouts.size(); i++) {
if (output_layouts[i] != rhs.output_layouts[i])
return false;
}

if (fused_desc.size() != rhs.fused_desc.size())
return false;

for (size_t i = 0; i < rhs.fused_desc.size(); i++) {
if (fused_desc[i] != rhs.fused_desc[i])
return false;
}

return true;
}
};

} // namespace cldnn
9 changes: 8 additions & 1 deletion src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "intel_gpu/runtime/stream.hpp"
#include "intel_gpu/runtime/lru_cache.hpp"
#include "intel_gpu/runtime/execution_config.hpp"
#include "intel_gpu/graph/kernel_impl_params.hpp"

#include <list>
#include <string>
Expand Down Expand Up @@ -249,8 +250,14 @@ struct program {
std::pair<int64_t/*const alloc*/, int64_t/*general alloc*/> get_estimated_device_mem_usage();

void remove_kernel(kernel_id id);
void calc_nodes_hash();

struct ImplHasher {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it not make sense to make this structure a part of kernel_impl_params.hpp?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. Will update it in a next PR

size_t operator()(const kernel_impl_params &k) const {
return k.hash();
}
};

using ImplementationsCache = cldnn::LruCacheThreadSafe<kernel_impl_params, std::shared_ptr<primitive_impl>, ImplHasher>;
ImplementationsCache& get_implementations_cache() const { return *_impls_cache; }
ICompilationContext& get_compilation_context() const { return *_compilation_context; }
void cancel_compilation_context();
Expand Down
17 changes: 8 additions & 9 deletions src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace cldnn {
struct primitive_impl;

/// @brief LRU cache which remove the least recently used data when cache is full.
template<typename Key, typename Value>
template<typename Key, typename Value, typename KeyHasher = std::hash<Key>>
class LruCache {
public:
using data_type = std::pair<Key, Value>;
Expand Down Expand Up @@ -141,7 +141,7 @@ class LruCache {
using lru_data_list_iter = typename lru_data_list_type::iterator;

std::list<data_type> _lru_data_list;
std::unordered_map<Key, lru_data_list_iter> _key_map;
std::unordered_map<Key, lru_data_list_iter, KeyHasher> _key_map;
const size_t _capacity;

/**
Expand All @@ -168,11 +168,13 @@ class LruCache {

using KernelsCache = cldnn::LruCache<size_t, cldnn::kernel::ptr>;

template<typename Key, typename Value>
class LruCacheThreadSafe : LruCache<Key, Value> {
template<typename Key, typename Value, typename KeyHasher = std::hash<Key>>
class LruCacheThreadSafe : public LruCache<Key, Value, KeyHasher> {
public:
using parent = LruCache<Key, Value>;
using FuncRemoveItem = std::function<void(std::pair<Key, Value>&)>;
using parent = LruCache<Key, Value, KeyHasher>;
using ItemType = std::pair<Key, Value>;
using FuncRemoveItem = std::function<void(ItemType&)>;
using parent::parent;

explicit LruCacheThreadSafe(size_t caps) : parent(caps) { }

Expand Down Expand Up @@ -205,7 +207,4 @@ class LruCacheThreadSafe : LruCache<Key, Value> {
mutable std::mutex _mutex;
};


using ImplementationsCache = cldnn::LruCacheThreadSafe<size_t, std::shared_ptr<primitive_impl>>;

} // namespace cldnn
7 changes: 7 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/runtime/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ struct all : public std::true_type {};
template <bool Val, bool... Values>
struct all<Val, Values...> : public std::integral_constant<bool, Val && all<Values...>::value> {};

template <class T>
struct is_primitive
: public std::integral_constant<bool,
std::is_base_of<primitive, T>::value &&
!std::is_same<primitive, typename std::remove_cv<T>::type>::value &&
std::is_same<T, typename std::remove_cv<T>::type>::value> {};

} // namespace meta

/// @cond CPP_HELPERS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#pragma once

#include "intel_gpu/graph/serialization/binary_buffer.hpp"
#include "intel_gpu/graph/kernel_impl_params.hpp"
#include "intel_gpu/graph/fused_primitive_desc.hpp"
#include "intel_gpu/runtime/engine.hpp"
#include "intel_gpu/runtime/utils.hpp"
#include "intel_gpu/runtime/tensor.hpp"
Expand All @@ -15,9 +17,7 @@

#include "kernel_selector_params.h"
#include "kernel_selector_common.h"
#include "kernel_impl_params.hpp"
#include "tensor_type.h"
#include "fused_primitive_desc.h"

#include <cstdint>
#include <string>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#pragma once

#include "intel_gpu/primitives/implementation_desc.hpp"
#include "kernel_impl_params.hpp"
#include "intel_gpu/graph/kernel_impl_params.hpp"

#include <functional>
#include <map>
Expand Down
26 changes: 0 additions & 26 deletions src/plugins/intel_gpu/src/graph/include/meta_utils.h

This file was deleted.

5 changes: 1 addition & 4 deletions src/plugins/intel_gpu/src/graph/include/primitive_inst.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include "intel_gpu/runtime/event.hpp"
#include "intel_gpu/runtime/memory.hpp"
#include "intel_gpu/graph/network.hpp"
#include "meta_utils.h"
#include "intel_gpu/runtime/utils.hpp"
#include "program_node.h"
#include "primitive_type.h"
#include "intel_gpu/graph/serialization/binary_buffer.hpp"
Expand Down Expand Up @@ -219,9 +219,6 @@ class primitive_inst {

virtual void update_output_memory() {}

virtual size_t get_impl_key(const kernel_impl_params& params) const;
virtual size_t get_impl_key() const;

protected:
primitive_inst(network& network, program_node const& node, bool allocate_memory);

Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/include/primitive_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

#include "intel_gpu/runtime/layout.hpp"
#include "intel_gpu/runtime/memory.hpp"
#include "kernel_impl_params.hpp"
#include "intel_gpu/graph/kernel_impl_params.hpp"

#include <memory>
#include <string>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include "intel_gpu/runtime/layout.hpp"
#include "intel_gpu/runtime/debug_configuration.hpp"

#include "meta_utils.h"
#include "intel_gpu/runtime/utils.hpp"
#include "primitive_type.h"
#include "program_node.h"
#include "primitive_inst.h"
Expand Down
22 changes: 3 additions & 19 deletions src/plugins/intel_gpu/src/graph/include/program_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
#include "intel_gpu/primitives/implementation_desc.hpp"
#include "intel_gpu/graph/program.hpp"

#include "fused_primitive_desc.h"
#include "kernel_impl_params.hpp"
#include "meta_utils.h"
#include "intel_gpu/graph/fused_primitive_desc.hpp"
#include "intel_gpu/graph/kernel_impl_params.hpp"
#include "intel_gpu/runtime/utils.hpp"

#include <set>
#include <array>
Expand Down Expand Up @@ -386,10 +386,6 @@ struct program_node {
void set_preferred_input_fmt(size_t idx, format::type type);
void set_preferred_output_fmt(size_t idx, format::type type);

virtual void calculate_hash() {}

size_t get_hash() const { return seed; }

protected:
size_t unique_id = 0;
static thread_local size_t cur_id;
Expand Down Expand Up @@ -430,8 +426,6 @@ struct program_node {

void invalidate_users() const;

size_t seed = 0;

private:
#ifdef ENABLE_ONEDNN_FOR_GPU
std::vector<fused_primitive_desc_onednn> fused_prims_onednn;
Expand Down Expand Up @@ -475,16 +469,6 @@ struct typed_program_node_base : public program_node {
return std::static_pointer_cast<const PType>(program_node::get_primitive());
}

void calculate_hash() override {
// hash for primitive
seed = get_primitive()->hash();

// hash for fused prims
for (auto& prim : fused_prims) {
seed = hash_combine(seed, prim.desc->hash());
}
}

protected:
std::shared_ptr<PType> typed_desc() const { return std::static_pointer_cast<PType>(desc); }
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include "openvino/core/coordinate_diff.hpp"
#include "openvino/core/strides.hpp"

#include "meta_utils.h"
#include "intel_gpu/runtime/utils.hpp"

#include <algorithm>
#include <cassert>
Expand Down
Loading