Skip to content

Commit

Permalink
[GPU] Changed impls cache key type to avoid hash collisions (openvino…
Browse files Browse the repository at this point in the history
  • Loading branch information
vladimir-paramuzov authored and andrei-cv committed Mar 21, 2023
1 parent 6470148 commit ee2d229
Show file tree
Hide file tree
Showing 17 changed files with 184 additions and 144 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#pragma once

#include "intel_gpu/primitives/primitive.hpp"
#include "meta_utils.h"
#include "intel_gpu/runtime/utils.hpp"

namespace cldnn {

Expand Down Expand Up @@ -41,6 +41,17 @@ struct fused_primitive_desc {
return p;
}

bool operator==(const fused_primitive_desc& rhs) const {
if (total_num_deps != rhs.total_num_deps)
return false;
if (dep_start_idx != rhs.dep_start_idx)
return false;

return *desc == *rhs.desc;
}

bool operator!=(const fused_primitive_desc& rhs) const { return !(*this == rhs); }

std::shared_ptr<const primitive> desc;

layout input_layout = layout(data_types::f32, format::bfyx, tensor());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
#include "intel_gpu/runtime/tensor.hpp"
#include "intel_gpu/primitives/primitive.hpp"

#include "tensor_type.h"
#include "fused_primitive_desc.h"
#include "intel_gpu/graph/fused_primitive_desc.hpp"

#include <cstdint>
#include <string>
Expand Down Expand Up @@ -49,7 +48,7 @@ struct kernel_impl_params {

memory::ptr reordered_weights = nullptr;

kernel_impl_params() {}
kernel_impl_params() : prog(nullptr), desc(nullptr), unique_id(0) {}

kernel_impl_params(program& _prog,
std::shared_ptr<const primitive> _desc,
Expand Down Expand Up @@ -116,6 +115,54 @@ struct kernel_impl_params {
OPENVINO_ASSERT(prog != nullptr, "[GPU] Program pointer in kernel_impl_params in not initialized");
return *prog;
}

size_t hash() const {
size_t seed = desc->hash();
const size_t prime_number = 2654435761; // magic number to reduce hash collision rate.
for (auto& in : input_layouts) {
seed = hash_combine(seed, in.hash() * prime_number);
}
for (auto& out : output_layouts) {
seed = hash_combine(seed, out.hash() * prime_number);
}

// hash for fused prims
for (auto& fd : fused_desc) {
seed = hash_combine(seed, fd.desc->hash());
}
return seed;
}

bool operator==(const kernel_impl_params& rhs) const {
if (*desc != *rhs.desc)
return false;

if (rhs.input_layouts.size() != input_layouts.size())
return false;

if (rhs.output_layouts.size() != output_layouts.size())
return false;

for (size_t i = 0; i < input_layouts.size(); i++) {
if (input_layouts[i] != rhs.input_layouts[i])
return false;
}

for (size_t i = 0; i < output_layouts.size(); i++) {
if (output_layouts[i] != rhs.output_layouts[i])
return false;
}

if (fused_desc.size() != rhs.fused_desc.size())
return false;

for (size_t i = 0; i < rhs.fused_desc.size(); i++) {
if (fused_desc[i] != rhs.fused_desc[i])
return false;
}

return true;
}
};

} // namespace cldnn
9 changes: 8 additions & 1 deletion src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "intel_gpu/runtime/stream.hpp"
#include "intel_gpu/runtime/lru_cache.hpp"
#include "intel_gpu/runtime/execution_config.hpp"
#include "intel_gpu/graph/kernel_impl_params.hpp"

#include <list>
#include <string>
Expand Down Expand Up @@ -249,8 +250,14 @@ struct program {
std::pair<int64_t/*const alloc*/, int64_t/*general alloc*/> get_estimated_device_mem_usage();

void remove_kernel(kernel_id id);
void calc_nodes_hash();

struct ImplHasher {
size_t operator()(const kernel_impl_params &k) const {
return k.hash();
}
};

using ImplementationsCache = cldnn::LruCacheThreadSafe<kernel_impl_params, std::shared_ptr<primitive_impl>, ImplHasher>;
ImplementationsCache& get_implementations_cache() const { return *_impls_cache; }
ICompilationContext& get_compilation_context() const { return *_compilation_context; }
void cancel_compilation_context();
Expand Down
17 changes: 8 additions & 9 deletions src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace cldnn {
struct primitive_impl;

/// @brief LRU cache which remove the least recently used data when cache is full.
template<typename Key, typename Value>
template<typename Key, typename Value, typename KeyHasher = std::hash<Key>>
class LruCache {
public:
using data_type = std::pair<Key, Value>;
Expand Down Expand Up @@ -141,7 +141,7 @@ class LruCache {
using lru_data_list_iter = typename lru_data_list_type::iterator;

std::list<data_type> _lru_data_list;
std::unordered_map<Key, lru_data_list_iter> _key_map;
std::unordered_map<Key, lru_data_list_iter, KeyHasher> _key_map;
const size_t _capacity;

/**
Expand All @@ -168,11 +168,13 @@ class LruCache {

using KernelsCache = cldnn::LruCache<size_t, cldnn::kernel::ptr>;

template<typename Key, typename Value>
class LruCacheThreadSafe : LruCache<Key, Value> {
template<typename Key, typename Value, typename KeyHasher = std::hash<Key>>
class LruCacheThreadSafe : public LruCache<Key, Value, KeyHasher> {
public:
using parent = LruCache<Key, Value>;
using FuncRemoveItem = std::function<void(std::pair<Key, Value>&)>;
using parent = LruCache<Key, Value, KeyHasher>;
using ItemType = std::pair<Key, Value>;
using FuncRemoveItem = std::function<void(ItemType&)>;
using parent::parent;

explicit LruCacheThreadSafe(size_t caps) : parent(caps) { }

Expand Down Expand Up @@ -205,7 +207,4 @@ class LruCacheThreadSafe : LruCache<Key, Value> {
mutable std::mutex _mutex;
};


using ImplementationsCache = cldnn::LruCacheThreadSafe<size_t, std::shared_ptr<primitive_impl>>;

} // namespace cldnn
7 changes: 7 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/runtime/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ struct all : public std::true_type {};
template <bool Val, bool... Values>
struct all<Val, Values...> : public std::integral_constant<bool, Val && all<Values...>::value> {};

template <class T>
struct is_primitive
: public std::integral_constant<bool,
std::is_base_of<primitive, T>::value &&
!std::is_same<primitive, typename std::remove_cv<T>::type>::value &&
std::is_same<T, typename std::remove_cv<T>::type>::value> {};

} // namespace meta

/// @cond CPP_HELPERS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#pragma once

#include "intel_gpu/graph/serialization/binary_buffer.hpp"
#include "intel_gpu/graph/kernel_impl_params.hpp"
#include "intel_gpu/graph/fused_primitive_desc.hpp"
#include "intel_gpu/runtime/engine.hpp"
#include "intel_gpu/runtime/utils.hpp"
#include "intel_gpu/runtime/tensor.hpp"
Expand All @@ -15,9 +17,7 @@

#include "kernel_selector_params.h"
#include "kernel_selector_common.h"
#include "kernel_impl_params.hpp"
#include "tensor_type.h"
#include "fused_primitive_desc.h"

#include <cstdint>
#include <string>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#pragma once

#include "intel_gpu/primitives/implementation_desc.hpp"
#include "kernel_impl_params.hpp"
#include "intel_gpu/graph/kernel_impl_params.hpp"

#include <functional>
#include <map>
Expand Down
26 changes: 0 additions & 26 deletions src/plugins/intel_gpu/src/graph/include/meta_utils.h

This file was deleted.

5 changes: 1 addition & 4 deletions src/plugins/intel_gpu/src/graph/include/primitive_inst.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include "intel_gpu/runtime/event.hpp"
#include "intel_gpu/runtime/memory.hpp"
#include "intel_gpu/graph/network.hpp"
#include "meta_utils.h"
#include "intel_gpu/runtime/utils.hpp"
#include "program_node.h"
#include "primitive_type.h"
#include "intel_gpu/graph/serialization/binary_buffer.hpp"
Expand Down Expand Up @@ -219,9 +219,6 @@ class primitive_inst {

virtual void update_output_memory() {}

virtual size_t get_impl_key(const kernel_impl_params& params) const;
virtual size_t get_impl_key() const;

protected:
primitive_inst(network& network, program_node const& node, bool allocate_memory);

Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/include/primitive_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

#include "intel_gpu/runtime/layout.hpp"
#include "intel_gpu/runtime/memory.hpp"
#include "kernel_impl_params.hpp"
#include "intel_gpu/graph/kernel_impl_params.hpp"

#include <memory>
#include <string>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include "intel_gpu/runtime/layout.hpp"
#include "intel_gpu/runtime/debug_configuration.hpp"

#include "meta_utils.h"
#include "intel_gpu/runtime/utils.hpp"
#include "primitive_type.h"
#include "program_node.h"
#include "primitive_inst.h"
Expand Down
22 changes: 3 additions & 19 deletions src/plugins/intel_gpu/src/graph/include/program_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
#include "intel_gpu/primitives/implementation_desc.hpp"
#include "intel_gpu/graph/program.hpp"

#include "fused_primitive_desc.h"
#include "kernel_impl_params.hpp"
#include "meta_utils.h"
#include "intel_gpu/graph/fused_primitive_desc.hpp"
#include "intel_gpu/graph/kernel_impl_params.hpp"
#include "intel_gpu/runtime/utils.hpp"

#include <set>
#include <array>
Expand Down Expand Up @@ -386,10 +386,6 @@ struct program_node {
void set_preferred_input_fmt(size_t idx, format::type type);
void set_preferred_output_fmt(size_t idx, format::type type);

virtual void calculate_hash() {}

size_t get_hash() const { return seed; }

protected:
size_t unique_id = 0;
static thread_local size_t cur_id;
Expand Down Expand Up @@ -430,8 +426,6 @@ struct program_node {

void invalidate_users() const;

size_t seed = 0;

private:
#ifdef ENABLE_ONEDNN_FOR_GPU
std::vector<fused_primitive_desc_onednn> fused_prims_onednn;
Expand Down Expand Up @@ -475,16 +469,6 @@ struct typed_program_node_base : public program_node {
return std::static_pointer_cast<const PType>(program_node::get_primitive());
}

void calculate_hash() override {
// hash for primitive
seed = get_primitive()->hash();

// hash for fused prims
for (auto& prim : fused_prims) {
seed = hash_combine(seed, prim.desc->hash());
}
}

protected:
std::shared_ptr<PType> typed_desc() const { return std::static_pointer_cast<PType>(desc); }
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include "openvino/core/coordinate_diff.hpp"
#include "openvino/core/strides.hpp"

#include "meta_utils.h"
#include "intel_gpu/runtime/utils.hpp"

#include <algorithm>
#include <cassert>
Expand Down
Loading

0 comments on commit ee2d229

Please sign in to comment.