Skip to content

Commit

Permalink
[GPU] Graph serialization for GPU (#13801)
Browse files Browse the repository at this point in the history
* gpu graph serialization

* fix to rebase

* onednn_gpu.patch for serialization

* git apply --verbode to --quiet

* functional tests

* removed referece of mas_unpooling.hpp

* git apply --verbose

* add no args ctor for proposal_impl

* changed kernel_cache save/load error messages

* gpu model cacning control env. variable

* fixed nonnull warning

* impl_params are added to save and load

* changed a way to use kernel_impl_params in save and load

* get_arguments_by_idx is added

* setenv is disabled in windows

* added missed part for onednn

* code refactoring based on code review

* fixed to use get_node_output_layout()

* OV_GPU_MODEL_CACHING is changed to OV_GPU_CACHE_MODEL

* reference to node and primitive are removed

* fixed hash of enum class

* restored CanShareContextWith

* serialization of intermediate memory

* fix to rebase

* multiclass_nms serialization

* caching_properties added
  • Loading branch information
e-ddykim authored Nov 14, 2022
1 parent e4b21aa commit f488e6c
Show file tree
Hide file tree
Showing 234 changed files with 3,943 additions and 110 deletions.
4 changes: 4 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "intel_gpu/runtime/event.hpp"
#include "intel_gpu/runtime/stream.hpp"
#include "intel_gpu/runtime/lru_cache.hpp"
#include "serialization/binary_buffer.hpp"

#include <map>
#include <vector>
Expand Down Expand Up @@ -79,8 +80,11 @@ struct network {

network(program::ptr program, stream::ptr stream, uint16_t stream_id);

network(cldnn::BinaryInputBuffer& ifs, stream::ptr stream, engine& engine, uint16_t stream_id = 0);

~network();

void save(cldnn::BinaryOutputBuffer& ob);

static ptr build_network(engine& engine,
const topology& topology,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault
typedef std::shared_ptr<CompiledModel> Ptr;

CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config);
CompiledModel(std::istream& networkModel, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config);

void Export(std::ostream& networkModel) override;
bool isSerializable();
std::shared_ptr<ngraph::Function> GetExecGraphInfo() override;
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ class Graph {
using variable_states_map = std::map<std::string, std::vector<cldnn::network::VariableState::Ptr>>;

Graph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
Graph(cldnn::BinaryInputBuffer& ib, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
explicit Graph(std::shared_ptr<Graph> graph, uint16_t stream_id = 0);
void Export(cldnn::BinaryOutputBuffer &ob);
std::shared_ptr<ngraph::Function> GetExecGraphInfo();

bool IsLoaded() const;
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class Plugin : public InferenceEngine::IInferencePlugin,
std::shared_ptr<impl> _impl;
bool streamsSet = false;
bool throttlingSet = false;
bool isModelCachingEnabled = false;

// key: device_id, value: cldnn device
std::map<std::string, cldnn::device::ptr> device_map;
Expand Down Expand Up @@ -58,6 +59,8 @@ class Plugin : public InferenceEngine::IInferencePlugin,
InferenceEngine::Parameter GetMetric(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network,
const std::map<std::string, std::string>& config) const override;
InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(std::istream& networkModel,
const std::map<std::string, std::string>& config) override;

std::shared_ptr<InferenceEngine::RemoteContext> CreateContext(const InferenceEngine::ParamMap& params) override;
std::shared_ptr<InferenceEngine::RemoteContext> GetDefaultContext(const InferenceEngine::ParamMap& params) override;
Expand Down
55 changes: 55 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,61 @@ struct kernel_arguments_data {
const scalars_desc* scalars = nullptr;
};

struct kernel_arguments_data_idx {
std::vector<int32_t> inputs;
int32_t weights;
int32_t recurrent;
int32_t hidden;
int32_t cell;
int32_t bias;
int32_t weights_zero_points;
int32_t activations_zero_points;
int32_t compensation;
int32_t lookup_table;
int32_t scale_table;
int32_t slope;

std::vector<int32_t> fused_op_inputs;
int32_t split = 0;
scalars_desc scalars;

template <typename BufferType>
void save(BufferType& ob) const {
ob << inputs;
ob << weights;
ob << recurrent;
ob << hidden;
ob << cell;
ob << bias;
ob << weights_zero_points;
ob << activations_zero_points;
ob << compensation;
ob << lookup_table;
ob << scale_table;
ob << slope;
ob << fused_op_inputs;
ob << split;
}

template <typename BufferType>
void load(BufferType& ib) {
ib >> inputs;
ib >> weights;
ib >> recurrent;
ib >> hidden;
ib >> cell;
ib >> bias;
ib >> weights_zero_points;
ib >> activations_zero_points;
ib >> compensation;
ib >> lookup_table;
ib >> scale_table;
ib >> slope;
ib >> fused_op_inputs;
ib >> split;
}
};

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// KernelString
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
6 changes: 6 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,12 @@ struct layout {

layout(const layout& other) = default;

layout()
: data_type(cldnn::data_types::bin)
, format(cldnn::format::any)
, data_padding(padding())
, size(ov::PartialShape()) { }

layout& operator=(const layout& other) {
if (this == &other)
return *this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ struct padded_pool_comparer {
class memory_pool {
memory_pool();

memory_ptr alloc_memory(const layout& layout, allocation_type type);
memory_ptr alloc_memory(const layout& layout, allocation_type type, bool reset = true);
static bool has_conflict(const memory_set&, const std::set<primitive_id>&, uint32_t network_id);

std::multimap<uint64_t, memory_record> _non_padded_pool;
Expand All @@ -107,7 +107,7 @@ class memory_pool {
const std::set<primitive_id>& restrictions,
allocation_type type,
bool reusable = true); // get from pool or create memory allocation
memory_ptr get_memory(const layout& layout, allocation_type type);
memory_ptr get_memory(const layout& layout, allocation_type type, bool reset = true);
memory_ptr get_from_non_padded_pool(const layout& layout,
const primitive_id& id,
uint32_t network_id,
Expand Down
16 changes: 16 additions & 0 deletions src/plugins/intel_gpu/src/graph/convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -585,4 +585,20 @@ convolution_inst::typed_primitive_inst(network& network, convolution_node const&
"Weights/ifm mismatch");
}
}

void convolution_inst::save(cldnn::BinaryOutputBuffer& ob) const {
parent::save(ob);

ob << _groups;
ob << _split;
ob << _deform_conv_dep_offset;
}

void convolution_inst::load(cldnn::BinaryInputBuffer& ib) {
parent::load(ib);

ib >> _groups;
ib >> _split;
ib >> _deform_conv_dep_offset;
}
} // namespace cldnn
9 changes: 5 additions & 4 deletions src/plugins/intel_gpu/src/graph/crop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ crop_inst::typed_primitive_inst(network& network, crop_node const& node) : paren
}

void crop_inst::on_execute() {
if (!node->can_be_optimized())
if (!can_be_optimized())
return;

if (_outputs[0] && _network.get_engine().is_the_same_buffer(output_memory(), input_memory()))
Expand All @@ -254,17 +254,18 @@ void crop_inst::on_execute() {
}

void crop_inst::reuse_input() {
_outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), node->get_output_layout());
update_output_memory();
}

void crop_inst::update_output_memory() {
if (!node->can_be_optimized())
if (!can_be_optimized())
return;

if (_outputs[0] && _network.get_engine().is_the_same_buffer(output_memory(), input_memory()))
return;

_outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), node->get_output_layout());
_outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), _impl_params->output_layout);
_mem_allocated = false;
}

} // namespace cldnn
18 changes: 17 additions & 1 deletion src/plugins/intel_gpu/src/graph/deconvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,9 @@ std::string deconvolution_inst::to_string(deconvolution_node const& node) {
}

deconvolution_inst::typed_primitive_inst(network& network, deconvolution_node const& node)
: parent(network, node) {
: parent(network, node),
_groups(node.get_groups()),
_split(node.get_split()) {
auto stride = argument->stride;
auto pad = argument->pad;

Expand Down Expand Up @@ -220,4 +222,18 @@ deconvolution_inst::typed_primitive_inst(network& network, deconvolution_node co
"Weights/ifm mismatch");
}
}

void deconvolution_inst::save(cldnn::BinaryOutputBuffer& ob) const {
parent::save(ob);

ob << _groups;
ob << _split;
}

void deconvolution_inst::load(cldnn::BinaryInputBuffer& ib) {
parent::load(ib);

ib >> _groups;
ib >> _split;
}
} // namespace cldnn
88 changes: 88 additions & 0 deletions src/plugins/intel_gpu/src/graph/detection_output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "intel_gpu/runtime/error_handler.hpp"
#include "json_object.h"
#include <string>
#include "serialization/string_serializer.hpp"

namespace cldnn {
primitive_type_id detection_output::type_id() {
Expand Down Expand Up @@ -180,4 +181,91 @@ detection_output_inst::typed_primitive_inst(network& network, detection_output_n
"Detection output layer doesn't support input padding in Prior-Box input");
}

void detection_output_inst::save(cldnn::BinaryOutputBuffer& ob) const {
parent::save(ob);

// argument (struct detection_output)
ob << argument->id;
ob << argument->input[0];
ob << argument->input[1];
ob << argument->input[2];
ob << cldnn::make_data(&argument->output_padding, sizeof(argument->output_padding));
ob << argument->num_classes;
ob << argument->keep_top_k;
ob << argument->share_location;
ob << argument->background_label_id;
ob << argument->nms_threshold;
ob << argument->top_k;
ob << argument->eta;
ob << cldnn::make_data(&argument->code_type, sizeof(argument->code_type));
ob << argument->variance_encoded_in_target;
ob << argument->confidence_threshold;
ob << argument->prior_info_size;
ob << argument->prior_coordinates_offset;
ob << argument->prior_is_normalized;
ob << argument->input_width;
ob << argument->input_height;
ob << argument->decrease_label_id;
ob << argument->clip_before_nms;
ob << argument->clip_after_nms;
}

void detection_output_inst::load(cldnn::BinaryInputBuffer& ib) {
parent::load(ib);

primitive_id id;
primitive_id input_location;
primitive_id input_confidence;
primitive_id input_prior_box;
uint32_t num_classes;
uint32_t keep_top_k;
bool share_location;
int background_label_id;
float nms_threshold;
int top_k;
float eta;
prior_box_code_type code_type;
bool variance_encoded_in_target;
float confidence_threshold;
int32_t prior_info_size;
int32_t prior_coordinates_offset;
bool prior_is_normalized;
int32_t input_width;
int32_t input_height;
bool decrease_label_id;
bool clip_before_nms;
bool clip_after_nms;
// primitive_id ext_prim_id;
padding output_padding;

ib >> id;
ib >> input_location;
ib >> input_confidence;
ib >> input_prior_box;
ib >> cldnn::make_data(&output_padding, sizeof(output_padding));
ib >> num_classes;
ib >> keep_top_k;
ib >> share_location;
ib >> background_label_id;
ib >> nms_threshold;
ib >> top_k;
ib >> eta;
ib >> cldnn::make_data(&code_type, sizeof(code_type));
ib >> variance_encoded_in_target;
ib >> confidence_threshold;
ib >> prior_info_size;
ib >> prior_coordinates_offset;
ib >> prior_is_normalized;
ib >> input_width;
ib >> input_height;
ib >> decrease_label_id;
ib >> clip_before_nms;
ib >> clip_after_nms;

argument = std::make_shared<detection_output>(id, input_location, input_confidence, input_prior_box,
num_classes, keep_top_k, share_location, background_label_id, nms_threshold, top_k, eta, code_type,
variance_encoded_in_target, confidence_threshold, prior_info_size, prior_coordinates_offset,
prior_is_normalized, input_width, input_height, decrease_label_id, clip_before_nms, clip_after_nms,
output_padding);
}
} // namespace cldnn
Loading

0 comments on commit f488e6c

Please sign in to comment.