openvinotoolkit · yeonbok · Nov 14, 2022 · Nov 2, 2022 · Nov 2, 2022 · Nov 3, 2022
@@ -12,6 +12,7 @@
 #include "intel_gpu/runtime/event.hpp"
 #include "intel_gpu/runtime/stream.hpp"
 #include "intel_gpu/runtime/lru_cache.hpp"
+#include "serialization/binary_buffer.hpp"
 
 #include <map>
 #include <vector>
@@ -79,8 +80,11 @@ struct network {
 
     network(program::ptr program, stream::ptr stream, uint16_t stream_id);
 
+    network(cldnn::BinaryInputBuffer& ifs, stream::ptr stream, engine& engine, uint16_t stream_id = 0);
+
     ~network();
 
+    void save(cldnn::BinaryOutputBuffer& ob);
 
     static ptr build_network(engine& engine,
                              const topology& topology,

@@ -25,7 +25,10 @@ class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault
     typedef std::shared_ptr<CompiledModel> Ptr;
 
     CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config);
+    CompiledModel(std::istream& networkModel, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config);
 
+    void Export(std::ostream& networkModel) override;
+    bool isSerializable();
     std::shared_ptr<ngraph::Function> GetExecGraphInfo() override;
     InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
     InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,

@@ -41,7 +41,9 @@ class Graph {
     using variable_states_map = std::map<std::string, std::vector<cldnn::network::VariableState::Ptr>>;
 
     Graph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
+    Graph(cldnn::BinaryInputBuffer& ib, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
     explicit Graph(std::shared_ptr<Graph> graph, uint16_t stream_id = 0);
+    void Export(cldnn::BinaryOutputBuffer &ob);
     std::shared_ptr<ngraph::Function> GetExecGraphInfo();
 
     bool IsLoaded() const;

@@ -23,6 +23,7 @@ class Plugin : public InferenceEngine::IInferencePlugin,
     std::shared_ptr<impl> _impl;
     bool streamsSet = false;
     bool throttlingSet = false;
+    bool isModelCachingEnabled = false;
 
     // key: device_id, value: cldnn device
     std::map<std::string, cldnn::device::ptr> device_map;
@@ -58,6 +59,8 @@ class Plugin : public InferenceEngine::IInferencePlugin,
     InferenceEngine::Parameter GetMetric(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
     InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network,
                                                      const std::map<std::string, std::string>& config) const override;
+    InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(std::istream& networkModel,
+                                                     const std::map<std::string, std::string>& config) override;
 
     std::shared_ptr<InferenceEngine::RemoteContext> CreateContext(const InferenceEngine::ParamMap& params) override;
     std::shared_ptr<InferenceEngine::RemoteContext> GetDefaultContext(const InferenceEngine::ParamMap& params) override;

@@ -114,6 +114,61 @@ struct kernel_arguments_data {
     const scalars_desc* scalars = nullptr;
 };
 
+struct kernel_arguments_data_idx {
+    std::vector<int32_t> inputs;
+    int32_t weights;
+    int32_t recurrent;
+    int32_t hidden;
+    int32_t cell;
+    int32_t bias;
+    int32_t weights_zero_points;
+    int32_t activations_zero_points;
+    int32_t compensation;
+    int32_t lookup_table;
+    int32_t scale_table;
+    int32_t slope;
+
+    std::vector<int32_t> fused_op_inputs;
+    int32_t split = 0;
+    scalars_desc scalars;
+
+    template <typename BufferType>
+    void save(BufferType& ob) const {
+        ob << inputs;
+        ob << weights;
+        ob << recurrent;
+        ob << hidden;
+        ob << cell;
+        ob << bias;
+        ob << weights_zero_points;
+        ob << activations_zero_points;
+        ob << compensation;
+        ob << lookup_table;
+        ob << scale_table;
+        ob << slope;
+        ob << fused_op_inputs;
+        ob << split;
+    }
+
+    template <typename BufferType>
+    void load(BufferType& ib) {
+        ib >> inputs;
+        ib >> weights;
+        ib >> recurrent;
+        ib >> hidden;
+        ib >> cell;
+        ib >> bias;
+        ib >> weights_zero_points;
+        ib >> activations_zero_points;
+        ib >> compensation;
+        ib >> lookup_table;
+        ib >> scale_table;
+        ib >> slope;
+        ib >> fused_op_inputs;
+        ib >> split;
+    }
+};
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // KernelString
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

@@ -371,6 +371,12 @@ struct layout {
 
     layout(const layout& other) = default;
 
+    layout()
+        : data_type(cldnn::data_types::bin)
+        , format(cldnn::format::any)
+        , data_padding(padding())
+        , size(ov::PartialShape()) { }
+
     layout& operator=(const layout& other) {
         if (this == &other)
             return *this;

@@ -90,7 +90,7 @@ struct padded_pool_comparer {
 class memory_pool {
     memory_pool();
 
-    memory_ptr alloc_memory(const layout& layout, allocation_type type);
+    memory_ptr alloc_memory(const layout& layout, allocation_type type, bool reset = true);
     static bool has_conflict(const memory_set&, const std::set<primitive_id>&, uint32_t network_id);
 
     std::multimap<uint64_t, memory_record> _non_padded_pool;
@@ -107,7 +107,7 @@ class memory_pool {
                           const std::set<primitive_id>& restrictions,
                           allocation_type type,
                           bool reusable = true);  // get from pool or create memory allocation
-    memory_ptr get_memory(const layout& layout, allocation_type type);
+    memory_ptr get_memory(const layout& layout, allocation_type type, bool reset = true);
     memory_ptr get_from_non_padded_pool(const layout& layout,
                                         const primitive_id& id,
                                         uint32_t network_id,

@@ -585,4 +585,20 @@ convolution_inst::typed_primitive_inst(network& network, convolution_node const&
                               "Weights/ifm mismatch");
     }
 }
+
+void convolution_inst::save(cldnn::BinaryOutputBuffer& ob) const {
+    parent::save(ob);
+
+    ob << _groups;
+    ob << _split;
+    ob << _deform_conv_dep_offset;
+}
+
+void convolution_inst::load(cldnn::BinaryInputBuffer& ib) {
+    parent::load(ib);
+
+    ib >> _groups;
+    ib >> _split;
+    ib >> _deform_conv_dep_offset;
+}
 }  // namespace cldnn
@@ -244,7 +244,7 @@ crop_inst::typed_primitive_inst(network& network, crop_node const& node) : paren
 }
 
 void crop_inst::on_execute() {
-    if (!node->can_be_optimized())
+    if (!can_be_optimized())
         return;
 
     if (_outputs[0] && _network.get_engine().is_the_same_buffer(output_memory(), input_memory()))
@@ -254,17 +254,18 @@ void crop_inst::on_execute() {
 }
 
 void crop_inst::reuse_input() {
-    _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), node->get_output_layout());
+    update_output_memory();
 }
 
 void crop_inst::update_output_memory() {
-    if (!node->can_be_optimized())
+    if (!can_be_optimized())
         return;
 
     if (_outputs[0] && _network.get_engine().is_the_same_buffer(output_memory(), input_memory()))
         return;
 
-    _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), node->get_output_layout());
+    _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), _impl_params->output_layout);
+    _mem_allocated = false;
 }
 
 }  // namespace cldnn
@@ -141,7 +141,9 @@ std::string deconvolution_inst::to_string(deconvolution_node const& node) {
 }
 
 deconvolution_inst::typed_primitive_inst(network& network, deconvolution_node const& node)
-    : parent(network, node) {
+    : parent(network, node),
+    _groups(node.get_groups()),
+    _split(node.get_split()) {
     auto stride = argument->stride;
     auto pad = argument->pad;
 
@@ -220,4 +222,18 @@ deconvolution_inst::typed_primitive_inst(network& network, deconvolution_node co
                               "Weights/ifm mismatch");
     }
 }
+
+void deconvolution_inst::save(cldnn::BinaryOutputBuffer& ob) const {
+    parent::save(ob);
+
+    ob << _groups;
+    ob << _split;
+}
+
+void deconvolution_inst::load(cldnn::BinaryInputBuffer& ib) {
+    parent::load(ib);
+
+    ib >> _groups;
+    ib >> _split;
+}
 }  // namespace cldnn
@@ -7,6 +7,7 @@
 #include "intel_gpu/runtime/error_handler.hpp"
 #include "json_object.h"
 #include <string>
+#include "serialization/string_serializer.hpp"
 
 namespace cldnn {
 primitive_type_id detection_output::type_id() {
@@ -180,4 +181,91 @@ detection_output_inst::typed_primitive_inst(network& network, detection_output_n
                      "Detection output layer doesn't support input padding in Prior-Box input");
 }
 
+void detection_output_inst::save(cldnn::BinaryOutputBuffer& ob) const {
+    parent::save(ob);
+
+    // argument (struct detection_output)
+    ob << argument->id;
+    ob << argument->input[0];
+    ob << argument->input[1];
+    ob << argument->input[2];
+    ob << cldnn::make_data(&argument->output_padding, sizeof(argument->output_padding));
+    ob << argument->num_classes;
+    ob << argument->keep_top_k;
+    ob << argument->share_location;
+    ob << argument->background_label_id;
+    ob << argument->nms_threshold;
+    ob << argument->top_k;
+    ob << argument->eta;
+    ob << cldnn::make_data(&argument->code_type, sizeof(argument->code_type));
+    ob << argument->variance_encoded_in_target;
+    ob << argument->confidence_threshold;
+    ob << argument->prior_info_size;
+    ob << argument->prior_coordinates_offset;
+    ob << argument->prior_is_normalized;
+    ob << argument->input_width;
+    ob << argument->input_height;
+    ob << argument->decrease_label_id;
+    ob << argument->clip_before_nms;
+    ob << argument->clip_after_nms;
+}
+
+void detection_output_inst::load(cldnn::BinaryInputBuffer& ib) {
+    parent::load(ib);
+
+    primitive_id id;
+    primitive_id input_location;
+    primitive_id input_confidence;
+    primitive_id input_prior_box;
+    uint32_t num_classes;
+    uint32_t keep_top_k;
+    bool share_location;
+    int background_label_id;
+    float nms_threshold;
+    int top_k;
+    float eta;
+    prior_box_code_type code_type;
+    bool variance_encoded_in_target;
+    float confidence_threshold;
+    int32_t prior_info_size;
+    int32_t prior_coordinates_offset;
+    bool prior_is_normalized;
+    int32_t input_width;
+    int32_t input_height;
+    bool decrease_label_id;
+    bool clip_before_nms;
+    bool clip_after_nms;
+    // primitive_id ext_prim_id;
+    padding output_padding;
+
+    ib >> id;
+    ib >> input_location;
+    ib >> input_confidence;
+    ib >> input_prior_box;
+    ib >> cldnn::make_data(&output_padding, sizeof(output_padding));
+    ib >> num_classes;
+    ib >> keep_top_k;
+    ib >> share_location;
+    ib >> background_label_id;
+    ib >> nms_threshold;
+    ib >> top_k;
+    ib >> eta;
+    ib >> cldnn::make_data(&code_type, sizeof(code_type));
+    ib >> variance_encoded_in_target;
+    ib >> confidence_threshold;
+    ib >> prior_info_size;
+    ib >> prior_coordinates_offset;
+    ib >> prior_is_normalized;
+    ib >> input_width;
+    ib >> input_height;
+    ib >> decrease_label_id;
+    ib >> clip_before_nms;
+    ib >> clip_after_nms;
+
+    argument = std::make_shared<detection_output>(id, input_location, input_confidence, input_prior_box,
+        num_classes, keep_top_k, share_location, background_label_id, nms_threshold, top_k, eta, code_type,
+        variance_encoded_in_target, confidence_threshold, prior_info_size, prior_coordinates_offset,
+        prior_is_normalized, input_width, input_height, decrease_label_id, clip_before_nms, clip_after_nms,
+        output_padding);
+}
 }  // namespace cldnn