From 34c2c224bbdb0300a7725096edd6227c8c80838b Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Thu, 23 Feb 2023 22:03:19 +0900 Subject: [PATCH] updated to share constant data memories across multiple streams --- .../graph/serialization/binary_buffer.hpp | 9 ++++++-- src/plugins/intel_gpu/src/graph/data.cpp | 23 +++++++++++++------ .../intel_gpu/src/graph/primitive_inst.cpp | 4 +--- .../intel_gpu/src/plugin/compiled_model.cpp | 3 +++ 4 files changed, 27 insertions(+), 12 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp index 7880b79a85eb4d..182865306e4611 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp @@ -14,7 +14,8 @@ namespace cldnn { class BinaryOutputBuffer : public OutputBuffer { public: - BinaryOutputBuffer(std::ostream& stream) : OutputBuffer(this), stream(stream) {} + BinaryOutputBuffer(std::ostream& stream) + : OutputBuffer(this), stream(stream), _impl_params(nullptr) {} void write(void const * data, std::streamsize size) { auto const written_size = stream.rdbuf()->sputn(reinterpret_cast(data), size); @@ -32,7 +33,8 @@ class BinaryOutputBuffer : public OutputBuffer { class BinaryInputBuffer : public InputBuffer { public: - BinaryInputBuffer(std::istream& stream, engine& engine) : InputBuffer(this, engine), stream(stream) {} + BinaryInputBuffer(std::istream& stream, engine& engine) + : InputBuffer(this, engine), stream(stream), _impl_params(nullptr), _network(nullptr) {} void read(void* const data, std::streamsize size) { auto const read_size = stream.rdbuf()->sgetn(reinterpret_cast(data), size); @@ -42,6 +44,8 @@ class BinaryInputBuffer : public InputBuffer { void setKernlImplParams(void* impl_params) { _impl_params = impl_params; } void* getKernlImplParams() const { return _impl_params; } + void setNetwork(void* network) { _network = network; } + void* getNetwork() const { return _network; } std::streampos tellg() { return stream.tellg(); } void seekg(std::streampos pos) { stream.seekg(pos); } @@ -49,6 +53,7 @@ class BinaryInputBuffer : public InputBuffer { private: std::istream& stream; void* _impl_params; + void* _network; }; template diff --git a/src/plugins/intel_gpu/src/graph/data.cpp b/src/plugins/intel_gpu/src/graph/data.cpp index 10be2a3504e81a..16e0edb6d2d033 100644 --- a/src/plugins/intel_gpu/src/graph/data.cpp +++ b/src/plugins/intel_gpu/src/graph/data.cpp @@ -85,15 +85,24 @@ void data_inst::load(BinaryInputBuffer& ib) { size_t data_size; ib >> make_data(&data_size, sizeof(size_t)); - _outputs[0] = get_network().get_memory_pool().get_memory(output_layout, _allocation_type, false); - if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { - ib >> make_data(_outputs[0]->buffer_ptr(), data_size); + if (ib.getNetwork()) { + const network* primary_network = reinterpret_cast(ib.getNetwork()); + _outputs[0] = primary_network->get_primitive(id())->output_memory_ptr(); + auto pos = ib.tellg(); + pos += data_size; + ib.seekg(pos); } else { - std::vector _buf; - _buf.resize(data_size); - ib >> make_data(_buf.data(), data_size); - _outputs[0]->copy_from(get_network().get_stream(), _buf.data()); + _outputs[0] = get_network().get_memory_pool().get_memory(output_layout, _allocation_type, false); + + if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { + ib >> make_data(_outputs[0]->buffer_ptr(), data_size); + } else { + std::vector _buf; + _buf.resize(data_size); + ib >> make_data(_buf.data(), data_size); + _outputs[0]->copy_from(get_network().get_stream(), _buf.data()); + } } } diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 697384adb3b0f2..2eb6b3ec55a776 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -518,7 +518,7 @@ void primitive_inst::rebuild_exec_deps( primitive_inst::primitive_inst(network& network) : _network(network) , _node(nullptr) - , _impl_params(nullptr) + , _impl_params(make_unique()) , _impl(nullptr) , _dynamic_impl(nullptr) , _outputs({memory::ptr()}) @@ -1149,8 +1149,6 @@ int32_t primitive_inst::get_index_in_deps(memory::cptr arg) const { } void primitive_inst::load(cldnn::BinaryInputBuffer& ib) { - _impl_params.release(); - _impl_params = make_unique(); _impl_params->load(ib); ib.setKernlImplParams(_impl_params.get()); diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index d638e38dec1304..c699b379984c94 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -241,6 +241,9 @@ CompiledModel::CompiledModel(std::istream& networkModel, InferenceEngine::Remote ib.seekg(pos); auto graph = std::make_shared(ib, context_impl, m_config, n); m_graphs.push_back(graph); + if (n == 0) { + ib.setNetwork(graph->GetNetwork().get()); + } } }