Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] Graph serialization for GPU #13801

Merged
merged 26 commits into from
Nov 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
9fba555
gpu graph serialization
e-ddykim Nov 2, 2022
5f1d5c4
fix to rebase
e-ddykim Nov 2, 2022
72dc32e
onednn_gpu.patch for serialization
e-ddykim Nov 3, 2022
dee8c2e
git apply --verbode to --quiet
e-ddykim Nov 3, 2022
4278920
functional tests
e-ddykim Nov 6, 2022
7998c91
removed referece of mas_unpooling.hpp
e-ddykim Nov 7, 2022
60aef22
git apply --verbose
e-ddykim Nov 7, 2022
ab34465
add no args ctor for proposal_impl
e-ddykim Nov 7, 2022
59f3ef3
changed kernel_cache save/load error messages
e-ddykim Nov 7, 2022
843d443
gpu model cacning control env. variable
e-ddykim Nov 7, 2022
e03c7fa
fixed nonnull warning
e-ddykim Nov 7, 2022
59811aa
impl_params are added to save and load
e-ddykim Nov 7, 2022
593b500
changed a way to use kernel_impl_params in save and load
e-ddykim Nov 7, 2022
eeac92a
get_arguments_by_idx is added
e-ddykim Nov 7, 2022
4289eb4
setenv is disabled in windows
e-ddykim Nov 8, 2022
98ee767
added missed part for onednn
e-ddykim Nov 8, 2022
2aba64a
code refactoring based on code review
e-ddykim Nov 8, 2022
6b59b36
fixed to use get_node_output_layout()
e-ddykim Nov 9, 2022
48d406d
OV_GPU_MODEL_CACHING is changed to OV_GPU_CACHE_MODEL
e-ddykim Nov 9, 2022
2155a98
reference to node and primitive are removed
e-ddykim Nov 9, 2022
4e12dff
fixed hash of enum class
e-ddykim Nov 10, 2022
41b4227
restored CanShareContextWith
e-ddykim Nov 10, 2022
cb1a1cb
serialization of intermediate memory
e-ddykim Nov 10, 2022
53ea9be
fix to rebase
e-ddykim Nov 11, 2022
d002f5e
multiclass_nms serialization
e-ddykim Nov 12, 2022
efa8436
caching_properties added
e-ddykim Nov 12, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "intel_gpu/runtime/event.hpp"
#include "intel_gpu/runtime/stream.hpp"
#include "intel_gpu/runtime/lru_cache.hpp"
#include "serialization/binary_buffer.hpp"
vladimir-paramuzov marked this conversation as resolved.
Show resolved Hide resolved

#include <map>
#include <vector>
Expand Down Expand Up @@ -79,8 +80,11 @@ struct network {

network(program::ptr program, stream::ptr stream, uint16_t stream_id);

network(cldnn::BinaryInputBuffer& ifs, stream::ptr stream, engine& engine, uint16_t stream_id = 0);

~network();

void save(cldnn::BinaryOutputBuffer& ob);

static ptr build_network(engine& engine,
const topology& topology,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault
typedef std::shared_ptr<CompiledModel> Ptr;

CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config);
CompiledModel(std::istream& networkModel, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config);

void Export(std::ostream& networkModel) override;
bool isSerializable();
vladimir-paramuzov marked this conversation as resolved.
Show resolved Hide resolved
std::shared_ptr<ngraph::Function> GetExecGraphInfo() override;
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ class Graph {
using variable_states_map = std::map<std::string, std::vector<cldnn::network::VariableState::Ptr>>;

Graph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
Graph(cldnn::BinaryInputBuffer& ib, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
explicit Graph(std::shared_ptr<Graph> graph, uint16_t stream_id = 0);
void Export(cldnn::BinaryOutputBuffer &ob);
std::shared_ptr<ngraph::Function> GetExecGraphInfo();

bool IsLoaded() const;
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class Plugin : public InferenceEngine::IInferencePlugin,
std::shared_ptr<impl> _impl;
bool streamsSet = false;
bool throttlingSet = false;
bool isModelCachingEnabled = false;

// key: device_id, value: cldnn device
std::map<std::string, cldnn::device::ptr> device_map;
Expand Down Expand Up @@ -58,6 +59,8 @@ class Plugin : public InferenceEngine::IInferencePlugin,
InferenceEngine::Parameter GetMetric(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network,
const std::map<std::string, std::string>& config) const override;
InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(std::istream& networkModel,
const std::map<std::string, std::string>& config) override;

std::shared_ptr<InferenceEngine::RemoteContext> CreateContext(const InferenceEngine::ParamMap& params) override;
std::shared_ptr<InferenceEngine::RemoteContext> GetDefaultContext(const InferenceEngine::ParamMap& params) override;
Expand Down
55 changes: 55 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,61 @@ struct kernel_arguments_data {
const scalars_desc* scalars = nullptr;
};

struct kernel_arguments_data_idx {
std::vector<int32_t> inputs;
int32_t weights;
int32_t recurrent;
int32_t hidden;
int32_t cell;
int32_t bias;
int32_t weights_zero_points;
int32_t activations_zero_points;
int32_t compensation;
int32_t lookup_table;
int32_t scale_table;
int32_t slope;

std::vector<int32_t> fused_op_inputs;
int32_t split = 0;
scalars_desc scalars;

template <typename BufferType>
void save(BufferType& ob) const {
ob << inputs;
ob << weights;
ob << recurrent;
ob << hidden;
ob << cell;
ob << bias;
ob << weights_zero_points;
ob << activations_zero_points;
ob << compensation;
ob << lookup_table;
ob << scale_table;
ob << slope;
ob << fused_op_inputs;
ob << split;
}

template <typename BufferType>
void load(BufferType& ib) {
ib >> inputs;
ib >> weights;
ib >> recurrent;
ib >> hidden;
ib >> cell;
ib >> bias;
ib >> weights_zero_points;
ib >> activations_zero_points;
ib >> compensation;
ib >> lookup_table;
ib >> scale_table;
ib >> slope;
ib >> fused_op_inputs;
ib >> split;
}
};

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// KernelString
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
6 changes: 6 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,12 @@ struct layout {

layout(const layout& other) = default;

layout()
: data_type(cldnn::data_types::bin)
, format(cldnn::format::any)
, data_padding(padding())
, size(ov::PartialShape()) { }

layout& operator=(const layout& other) {
if (this == &other)
return *this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ struct padded_pool_comparer {
class memory_pool {
memory_pool();

memory_ptr alloc_memory(const layout& layout, allocation_type type);
memory_ptr alloc_memory(const layout& layout, allocation_type type, bool reset = true);
static bool has_conflict(const memory_set&, const std::set<primitive_id>&, uint32_t network_id);

std::multimap<uint64_t, memory_record> _non_padded_pool;
Expand All @@ -107,7 +107,7 @@ class memory_pool {
const std::set<primitive_id>& restrictions,
allocation_type type,
bool reusable = true); // get from pool or create memory allocation
memory_ptr get_memory(const layout& layout, allocation_type type);
memory_ptr get_memory(const layout& layout, allocation_type type, bool reset = true);
memory_ptr get_from_non_padded_pool(const layout& layout,
const primitive_id& id,
uint32_t network_id,
Expand Down
16 changes: 16 additions & 0 deletions src/plugins/intel_gpu/src/graph/convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -585,4 +585,20 @@ convolution_inst::typed_primitive_inst(network& network, convolution_node const&
"Weights/ifm mismatch");
}
}

void convolution_inst::save(cldnn::BinaryOutputBuffer& ob) const {
parent::save(ob);

ob << _groups;
ob << _split;
ob << _deform_conv_dep_offset;
}

void convolution_inst::load(cldnn::BinaryInputBuffer& ib) {
parent::load(ib);

ib >> _groups;
ib >> _split;
ib >> _deform_conv_dep_offset;
}
} // namespace cldnn
9 changes: 5 additions & 4 deletions src/plugins/intel_gpu/src/graph/crop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ crop_inst::typed_primitive_inst(network& network, crop_node const& node) : paren
}

void crop_inst::on_execute() {
if (!node->can_be_optimized())
if (!can_be_optimized())
return;

if (_outputs[0] && _network.get_engine().is_the_same_buffer(output_memory(), input_memory()))
Expand All @@ -254,17 +254,18 @@ void crop_inst::on_execute() {
}

void crop_inst::reuse_input() {
_outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), node->get_output_layout());
update_output_memory();
}

void crop_inst::update_output_memory() {
if (!node->can_be_optimized())
if (!can_be_optimized())
return;

if (_outputs[0] && _network.get_engine().is_the_same_buffer(output_memory(), input_memory()))
return;

_outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), node->get_output_layout());
_outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), _impl_params->output_layout);
_mem_allocated = false;
}

} // namespace cldnn
18 changes: 17 additions & 1 deletion src/plugins/intel_gpu/src/graph/deconvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,9 @@ std::string deconvolution_inst::to_string(deconvolution_node const& node) {
}

deconvolution_inst::typed_primitive_inst(network& network, deconvolution_node const& node)
: parent(network, node) {
: parent(network, node),
_groups(node.get_groups()),
_split(node.get_split()) {
auto stride = argument->stride;
auto pad = argument->pad;

Expand Down Expand Up @@ -220,4 +222,18 @@ deconvolution_inst::typed_primitive_inst(network& network, deconvolution_node co
"Weights/ifm mismatch");
}
}

void deconvolution_inst::save(cldnn::BinaryOutputBuffer& ob) const {
parent::save(ob);

ob << _groups;
ob << _split;
}

void deconvolution_inst::load(cldnn::BinaryInputBuffer& ib) {
parent::load(ib);

ib >> _groups;
ib >> _split;
}
} // namespace cldnn
88 changes: 88 additions & 0 deletions src/plugins/intel_gpu/src/graph/detection_output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "intel_gpu/runtime/error_handler.hpp"
#include "json_object.h"
#include <string>
#include "serialization/string_serializer.hpp"

namespace cldnn {
primitive_type_id detection_output::type_id() {
Expand Down Expand Up @@ -180,4 +181,91 @@ detection_output_inst::typed_primitive_inst(network& network, detection_output_n
"Detection output layer doesn't support input padding in Prior-Box input");
}

void detection_output_inst::save(cldnn::BinaryOutputBuffer& ob) const {
parent::save(ob);

// argument (struct detection_output)
ob << argument->id;
ob << argument->input[0];
ob << argument->input[1];
ob << argument->input[2];
ob << cldnn::make_data(&argument->output_padding, sizeof(argument->output_padding));
ob << argument->num_classes;
ob << argument->keep_top_k;
ob << argument->share_location;
ob << argument->background_label_id;
ob << argument->nms_threshold;
ob << argument->top_k;
ob << argument->eta;
ob << cldnn::make_data(&argument->code_type, sizeof(argument->code_type));
ob << argument->variance_encoded_in_target;
ob << argument->confidence_threshold;
ob << argument->prior_info_size;
ob << argument->prior_coordinates_offset;
ob << argument->prior_is_normalized;
ob << argument->input_width;
ob << argument->input_height;
ob << argument->decrease_label_id;
ob << argument->clip_before_nms;
ob << argument->clip_after_nms;
}

void detection_output_inst::load(cldnn::BinaryInputBuffer& ib) {
parent::load(ib);

primitive_id id;
primitive_id input_location;
primitive_id input_confidence;
primitive_id input_prior_box;
uint32_t num_classes;
uint32_t keep_top_k;
bool share_location;
int background_label_id;
float nms_threshold;
int top_k;
float eta;
prior_box_code_type code_type;
bool variance_encoded_in_target;
float confidence_threshold;
int32_t prior_info_size;
int32_t prior_coordinates_offset;
bool prior_is_normalized;
int32_t input_width;
int32_t input_height;
bool decrease_label_id;
bool clip_before_nms;
bool clip_after_nms;
// primitive_id ext_prim_id;
padding output_padding;

ib >> id;
ib >> input_location;
ib >> input_confidence;
ib >> input_prior_box;
ib >> cldnn::make_data(&output_padding, sizeof(output_padding));
ib >> num_classes;
ib >> keep_top_k;
ib >> share_location;
ib >> background_label_id;
ib >> nms_threshold;
ib >> top_k;
ib >> eta;
ib >> cldnn::make_data(&code_type, sizeof(code_type));
ib >> variance_encoded_in_target;
ib >> confidence_threshold;
ib >> prior_info_size;
ib >> prior_coordinates_offset;
ib >> prior_is_normalized;
ib >> input_width;
ib >> input_height;
ib >> decrease_label_id;
ib >> clip_before_nms;
ib >> clip_after_nms;

argument = std::make_shared<detection_output>(id, input_location, input_confidence, input_prior_box,
num_classes, keep_top_k, share_location, background_label_id, nms_threshold, top_k, eta, code_type,
variance_encoded_in_target, confidence_threshold, prior_info_size, prior_coordinates_offset,
prior_is_normalized, input_width, input_height, decrease_label_id, clip_before_nms, clip_after_nms,
output_padding);
}
} // namespace cldnn
Loading