From 9fba55584e8372ca85c453aae79ddc37338906ab Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Thu, 3 Nov 2022 00:53:43 +0900 Subject: [PATCH 01/26] gpu graph serialization --- .../include/intel_gpu/graph/network.hpp | 4 + .../intel_gpu/plugin/compiled_model.hpp | 3 + .../include/intel_gpu/plugin/graph.hpp | 2 + .../include/intel_gpu/plugin/plugin.hpp | 2 + .../include/intel_gpu/runtime/kernel_args.hpp | 59 ++++ .../include/intel_gpu/runtime/layout.hpp | 6 + .../include/intel_gpu/runtime/memory_pool.hpp | 4 +- .../intel_gpu/src/graph/convolution.cpp | 16 + src/plugins/intel_gpu/src/graph/crop.cpp | 2 +- .../intel_gpu/src/graph/detection_output.cpp | 88 +++++ .../intel_gpu/src/graph/get_type_id.cpp | 198 +++++++++++ .../graph/impls/common/wait_for_events.cpp | 14 + .../intel_gpu/src/graph/impls/cpu/assign.cpp | 4 + .../src/graph/impls/cpu/detection_output.cpp | 19 ++ .../graph/impls/cpu/non_max_suppression.cpp | 4 + .../src/graph/impls/cpu/proposal.cpp | 7 + .../src/graph/impls/cpu/read_value.cpp | 4 + .../src/graph/impls/ocl/activation.cpp | 19 +- .../src/graph/impls/ocl/adaptive_pooling.cpp | 6 +- .../src/graph/impls/ocl/arg_max_min.cpp | 6 +- .../src/graph/impls/ocl/average_unpooling.cpp | 6 +- .../src/graph/impls/ocl/batch_to_space.cpp | 4 + .../graph/impls/ocl/binary_convolution.cpp | 18 +- .../intel_gpu/src/graph/impls/ocl/border.cpp | 4 + .../src/graph/impls/ocl/broadcast.cpp | 4 + .../src/graph/impls/ocl/bucketize.cpp | 4 + .../src/graph/impls/ocl/concatenation.cpp | 17 + .../src/graph/impls/ocl/convert_color.cpp | 6 +- .../src/graph/impls/ocl/convolution.cpp | 22 +- .../intel_gpu/src/graph/impls/ocl/crop.cpp | 16 + .../graph/impls/ocl/ctc_greedy_decoder.cpp | 4 + .../src/graph/impls/ocl/ctc_loss.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/cum_sum.cpp | 4 + .../src/graph/impls/ocl/custom_primitive.cpp | 21 ++ .../src/graph/impls/ocl/deconvolution.cpp | 20 +- .../impls/ocl/deformable_convolution.cpp | 23 +- .../src/graph/impls/ocl/depth_to_space.cpp | 4 + .../src/graph/impls/ocl/detection_output.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/dft.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/eltwise.cpp | 6 +- .../src/graph/impls/ocl/embedding_bag.cpp | 4 + ...xperimental_detectron_detection_output.cpp | 7 +- ...ectron_generate_proposals_single_image.cpp | 7 +- ...imental_detectron_prior_grid_generator.cpp | 5 + ...mental_detectron_roi_feature_extractor.cpp | 7 +- .../ocl/experimental_detectron_topk_rois.cpp | 5 + .../graph/impls/ocl/extract_image_patches.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/eye.cpp | 4 + .../src/graph/impls/ocl/fully_connected.cpp | 6 +- .../intel_gpu/src/graph/impls/ocl/gather.cpp | 4 + .../src/graph/impls/ocl/gather_elements.cpp | 4 + .../src/graph/impls/ocl/gather_nd.cpp | 4 + .../src/graph/impls/ocl/gather_tree.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/gemm.cpp | 4 + .../graph/impls/ocl/generate_proposals.cpp | 6 +- .../src/graph/impls/ocl/generic_layer.cpp | 25 +- .../src/graph/impls/ocl/grid_sample.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/grn.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/lrn.cpp | 4 + .../graph/impls/ocl/lstm_dynamic_input.cpp | 6 +- .../graph/impls/ocl/lstm_dynamic_timeloop.cpp | 6 +- .../src/graph/impls/ocl/lstm_elt.cpp | 6 +- .../src/graph/impls/ocl/lstm_gemm.cpp | 6 +- .../src/graph/impls/ocl/mutable_data.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/mvn.cpp | 4 + .../graph/impls/ocl/non_max_suppression.cpp | 6 +- .../src/graph/impls/ocl/non_zero.cpp | 7 + .../src/graph/impls/ocl/normalize.cpp | 6 +- .../intel_gpu/src/graph/impls/ocl/one_hot.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/permute.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/pooling.cpp | 6 +- .../src/graph/impls/ocl/primitive_base.hpp | 148 +++++++- .../src/graph/impls/ocl/prior_box.cpp | 4 + .../src/graph/impls/ocl/pyramid_roi_align.cpp | 4 + .../src/graph/impls/ocl/quantize.cpp | 6 +- .../src/graph/impls/ocl/random_uniform.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/range.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/reduce.cpp | 4 + .../src/graph/impls/ocl/region_yolo.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/reorder.cpp | 20 +- .../src/graph/impls/ocl/reorg_yolo.cpp | 4 + .../src/graph/impls/ocl/resample.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/reshape.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/reverse.cpp | 4 + .../src/graph/impls/ocl/reverse_sequence.cpp | 4 + .../src/graph/impls/ocl/roi_align.cpp | 6 +- .../src/graph/impls/ocl/roi_pooling.cpp | 6 +- .../intel_gpu/src/graph/impls/ocl/roll.cpp | 4 + .../impls/ocl/scatter_elements_update.cpp | 5 + .../src/graph/impls/ocl/scatter_nd_update.cpp | 4 + .../src/graph/impls/ocl/scatter_update.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/select.cpp | 4 + .../src/graph/impls/ocl/shape_of.cpp | 4 + .../src/graph/impls/ocl/shuffle_channels.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/slice.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/softmax.cpp | 4 + .../src/graph/impls/ocl/space_to_batch.cpp | 4 + .../src/graph/impls/ocl/space_to_depth.cpp | 4 + .../src/graph/impls/ocl/strided_slice.cpp | 4 + .../intel_gpu/src/graph/impls/ocl/tile.cpp | 4 + .../impls/onednn/concatenation_onednn.cpp | 72 +++- .../graph/impls/onednn/convolution_onednn.cpp | 36 +- .../impls/onednn/deconvolution_onednn.cpp | 27 ++ .../impls/onednn/fully_connected_onednn.cpp | 27 ++ .../src/graph/impls/onednn/gemm_onednn.cpp | 27 ++ .../src/graph/impls/onednn/pooling_onednn.cpp | 27 ++ .../impls/onednn/primitive_onednn_base.h | 1 + .../graph/impls/onednn/reduction_onednn.cpp | 27 ++ .../src/graph/impls/onednn/reorder_onednn.cpp | 48 ++- .../src/graph/include/activation_inst.h | 1 + .../src/graph/include/adaptive_pooling_inst.h | 1 + .../src/graph/include/arg_max_min_inst.h | 1 + .../intel_gpu/src/graph/include/assign_inst.h | 1 + .../graph/include/average_unpooling_inst.h | 1 + .../src/graph/include/batch_to_space_inst.h | 1 + .../graph/include/binary_convolution_inst.h | 1 + .../intel_gpu/src/graph/include/border_inst.h | 1 + .../src/graph/include/broadcast_inst.h | 1 + .../src/graph/include/concatenation_inst.h | 1 + .../src/graph/include/condition_inst.h | 1 + .../src/graph/include/convert_color_inst.h | 1 + .../src/graph/include/convolution_inst.h | 4 + .../intel_gpu/src/graph/include/crop_inst.h | 1 + .../graph/include/ctc_greedy_decoder_inst.h | 1 + .../src/graph/include/cum_sum_inst.h | 1 + .../graph/include/custom_gpu_primitive_inst.h | 1 + .../intel_gpu/src/graph/include/data_inst.h | 1 + .../src/graph/include/deconvolution_inst.h | 1 + .../include/deformable_convolution_inst.h | 3 + .../src/graph/include/depth_to_space_inst.h | 1 + .../src/graph/include/detection_output_inst.h | 4 + .../src/graph/include/eltwise_inst.h | 1 + .../src/graph/include/embedding_bag_inst.h | 1 + ...mental_detectron_detection_output_inst.hpp | 1 + ...n_generate_proposals_single_image_inst.hpp | 1 + .../experimental_detectron_topk_rois_inst.h | 1 + .../include/extract_image_patches_inst.h | 1 + .../intel_gpu/src/graph/include/eye_inst.h | 1 + .../src/graph/include/fully_connected_inst.h | 1 + .../src/graph/include/gather_elements_inst.h | 1 + .../intel_gpu/src/graph/include/gather_inst.h | 1 + .../src/graph/include/gather_nd_inst.h | 1 + .../src/graph/include/gather_tree_inst.h | 1 + .../intel_gpu/src/graph/include/gemm_inst.h | 1 + .../graph/include/generate_proposals_inst.h | 1 + .../src/graph/include/generic_layer_inst.h | 1 + .../intel_gpu/src/graph/include/grn_inst.h | 1 + .../src/graph/include/input_layout_inst.h | 1 + .../graph/include/kernel_selector_helper.h | 6 + .../intel_gpu/src/graph/include/loop_inst.h | 1 + .../intel_gpu/src/graph/include/lrn_inst.h | 1 + .../graph/include/lstm_dynamic_input_inst.h | 1 + .../src/graph/include/lstm_dynamic_inst.h | 1 + .../include/lstm_dynamic_timeloop_inst.h | 1 + .../src/graph/include/lstm_elt_inst.h | 1 + .../src/graph/include/lstm_gemm_inst.h | 1 + .../intel_gpu/src/graph/include/lstm_inst.h | 1 + .../src/graph/include/mutable_data_inst.h | 4 + .../intel_gpu/src/graph/include/mvn_inst.h | 1 + .../graph/include/non_max_suppression_inst.h | 1 + .../src/graph/include/non_zero_inst.h | 2 + .../src/graph/include/normalize_inst.h | 1 + .../src/graph/include/one_hot_inst.h | 1 + .../src/graph/include/permute_inst.h | 1 + .../src/graph/include/primitive_inst.h | 48 ++- .../src/graph/include/primitive_type.h | 1 + .../src/graph/include/primitive_type_base.h | 4 + .../src/graph/include/prior_box_inst.h | 1 + .../src/graph/include/proposal_inst.h | 1 + .../graph/include/pyramid_roi_align_inst.h | 1 + .../src/graph/include/quantize_inst.h | 1 + .../src/graph/include/random_uniform_inst.h | 1 + .../intel_gpu/src/graph/include/range_inst.h | 3 + .../src/graph/include/read_value_inst.h | 1 + .../intel_gpu/src/graph/include/reduce_inst.h | 1 + .../src/graph/include/region_yolo_inst.h | 1 + .../src/graph/include/reorder_inst.h | 7 + .../src/graph/include/reorg_yolo_inst.h | 1 + .../src/graph/include/resample_inst.h | 1 + .../src/graph/include/reshape_inst.h | 1 + .../src/graph/include/reverse_inst.h | 1 + .../src/graph/include/reverse_sequence_inst.h | 1 + .../src/graph/include/roi_align_inst.h | 1 + .../include/scatter_elements_update_inst.h | 1 + .../graph/include/scatter_nd_update_inst.h | 1 + .../src/graph/include/scatter_update_inst.h | 1 + .../intel_gpu/src/graph/include/select_inst.h | 1 + .../include/serialization/binary_buffer.hpp | 85 +++++ .../src/graph/include/serialization/bind.hpp | 178 ++++++++++ .../graph/include/serialization/buffer.hpp | 84 +++++ .../cl_kernel_data_serializer.hpp | 59 ++++ .../graph/include/serialization/helpers.hpp | 26 ++ .../serialization/layout_serializer.hpp | 65 ++++ .../include/serialization/map_serializer.hpp | 40 +++ .../include/serialization/object_types.hpp | 119 +++++++ .../serialization/polymorphic_serializer.hpp | 52 +++ .../include/serialization/serializer.hpp | 20 ++ .../include/serialization/set_serializer.hpp | 39 +++ .../include/serialization/static_instance.hpp | 56 +++ .../serialization/string_serializer.hpp | 34 ++ .../serialization/vector_serializer.hpp | 64 ++++ .../src/graph/include/shape_of_inst.h | 1 + .../src/graph/include/shuffle_channels_inst.h | 1 + .../intel_gpu/src/graph/include/slice_inst.h | 1 + .../src/graph/include/softmax_inst.h | 1 + .../src/graph/include/space_to_batch_inst.h | 1 + .../src/graph/include/space_to_depth_inst.h | 1 + .../intel_gpu/src/graph/include/split_inst.h | 1 + .../src/graph/include/strided_slice_inst.h | 1 + .../intel_gpu/src/graph/include/tile_inst.h | 1 + .../intel_gpu/src/graph/input_layout.cpp | 4 +- .../src/graph/kernel_selector_helper.cpp | 43 +++ .../intel_gpu/src/graph/mutable_data.cpp | 29 ++ src/plugins/intel_gpu/src/graph/network.cpp | 163 ++++++++- .../intel_gpu/src/graph/primitive_inst.cpp | 260 ++++++++++++++ src/plugins/intel_gpu/src/graph/program.cpp | 2 +- src/plugins/intel_gpu/src/graph/reorder.cpp | 20 +- .../intel_gpu/src/plugin/compiled_model.cpp | 318 ++++++++++++++++++ src/plugins/intel_gpu/src/plugin/graph.cpp | 33 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 41 +++ .../intel_gpu/src/runtime/CMakeLists.txt | 5 +- .../intel_gpu/src/runtime/kernels_cache.cpp | 96 ++++++ .../intel_gpu/src/runtime/kernels_cache.hpp | 5 + .../intel_gpu/src/runtime/memory_pool.cpp | 8 +- 224 files changed, 3454 insertions(+), 70 deletions(-) create mode 100644 src/plugins/intel_gpu/src/graph/get_type_id.cpp create mode 100644 src/plugins/intel_gpu/src/graph/include/serialization/binary_buffer.hpp create mode 100644 src/plugins/intel_gpu/src/graph/include/serialization/bind.hpp create mode 100644 src/plugins/intel_gpu/src/graph/include/serialization/buffer.hpp create mode 100644 src/plugins/intel_gpu/src/graph/include/serialization/cl_kernel_data_serializer.hpp create mode 100644 src/plugins/intel_gpu/src/graph/include/serialization/helpers.hpp create mode 100644 src/plugins/intel_gpu/src/graph/include/serialization/layout_serializer.hpp create mode 100644 src/plugins/intel_gpu/src/graph/include/serialization/map_serializer.hpp create mode 100644 src/plugins/intel_gpu/src/graph/include/serialization/object_types.hpp create mode 100644 src/plugins/intel_gpu/src/graph/include/serialization/polymorphic_serializer.hpp create mode 100644 src/plugins/intel_gpu/src/graph/include/serialization/serializer.hpp create mode 100644 src/plugins/intel_gpu/src/graph/include/serialization/set_serializer.hpp create mode 100644 src/plugins/intel_gpu/src/graph/include/serialization/static_instance.hpp create mode 100644 src/plugins/intel_gpu/src/graph/include/serialization/string_serializer.hpp create mode 100644 src/plugins/intel_gpu/src/graph/include/serialization/vector_serializer.hpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp index 805db2b576b365..6ede25b2b348df 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp @@ -12,6 +12,7 @@ #include "intel_gpu/runtime/event.hpp" #include "intel_gpu/runtime/stream.hpp" #include "intel_gpu/runtime/lru_cache.hpp" +#include "serialization/binary_buffer.hpp" #include #include @@ -79,8 +80,11 @@ struct network { network(program::ptr program, stream::ptr stream, uint16_t stream_id); + network(cldnn::BinaryInputBuffer& ifs, stream::ptr stream, engine& engine, uint16_t stream_id = 0); + ~network(); + void save(cldnn::BinaryOutputBuffer& ob); static ptr build_network(engine& engine, const topology& topology, diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp index ca23fa687ee7af..09aa0a03bf7cb2 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp @@ -25,7 +25,10 @@ class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault typedef std::shared_ptr Ptr; CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_ptr context, Config config); + CompiledModel(std::istream& networkModel, std::shared_ptr context, Config config); + void Export(std::ostream& networkModel) override; + bool isSerializable(); std::shared_ptr GetExecGraphInfo() override; InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override; InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs, diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp index 8bf698f641b1d1..47c2e99ac00460 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp @@ -41,7 +41,9 @@ class Graph { using variable_states_map = std::map>; Graph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0); + Graph(cldnn::BinaryInputBuffer& ib, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0); explicit Graph(std::shared_ptr graph, uint16_t stream_id = 0); + void Export(cldnn::BinaryOutputBuffer &ob); std::shared_ptr GetExecGraphInfo(); bool IsLoaded() const; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp index ec002873ce1438..4259ee5eb5ff1b 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp @@ -58,6 +58,8 @@ class Plugin : public InferenceEngine::IInferencePlugin, InferenceEngine::Parameter GetMetric(const std::string& name, const std::map& options) const override; InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network, const std::map& config) const override; + InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(std::istream& networkModel, + const std::map& config) override; std::shared_ptr CreateContext(const InferenceEngine::ParamMap& params) override; std::shared_ptr GetDefaultContext(const InferenceEngine::ParamMap& params) override; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp index ad82ba65a18fdc..060fe254675d4a 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp @@ -114,6 +114,65 @@ struct kernel_arguments_data { const scalars_desc* scalars = nullptr; }; +struct kernel_arguments_data_idx { + std::vector inputs; + std::vector intermediates; + // std::vector outputs; + int32_t weights; + int32_t recurrent; + int32_t hidden; + int32_t cell; + int32_t bias; + int32_t weights_zero_points; + int32_t activations_zero_points; + int32_t compensation; + int32_t lookup_table; + int32_t scale_table; + int32_t slope; + + std::vector fused_op_inputs; + int32_t split = 0; + scalars_desc scalars; + + template + void save(BufferType& ob) const { + ob << inputs; + ob << intermediates; + ob << weights; + ob << recurrent; + ob << hidden; + ob << cell; + ob << bias; + ob << weights_zero_points; + ob << activations_zero_points; + ob << compensation; + ob << lookup_table; + ob << scale_table; + ob << slope; + ob << fused_op_inputs; + ob << split; + } + + template + void load(BufferType& ib) { + ib >> inputs; + ib >> intermediates; + ib >> weights; + ib >> recurrent; + ib >> hidden; + ib >> cell; + ib >> bias; + ib >> weights_zero_points; + ib >> activations_zero_points; + ib >> compensation; + ib >> lookup_table; + ib >> scale_table; + ib >> slope; + ib >> fused_op_inputs; + ib >> split; + } +}; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // KernelString //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp index 60a59ee8007ae4..9e058f772fd5b4 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp @@ -371,6 +371,12 @@ struct layout { layout(const layout& other) = default; + layout() + : data_type(cldnn::data_types::bin) + , format(cldnn::format::any) + , data_padding(padding()) + , size(ov::PartialShape()) { } + layout& operator=(const layout& other) { if (this == &other) return *this; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp index 4e1b773ea8bb65..4771fcb7759fea 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp @@ -90,7 +90,7 @@ struct padded_pool_comparer { class memory_pool { memory_pool(); - memory_ptr alloc_memory(const layout& layout, allocation_type type); + memory_ptr alloc_memory(const layout& layout, allocation_type type, bool reset = true); static bool has_conflict(const memory_set&, const std::set&, uint32_t network_id); std::multimap _non_padded_pool; @@ -107,7 +107,7 @@ class memory_pool { const std::set& restrictions, allocation_type type, bool reusable = true); // get from pool or create memory allocation - memory_ptr get_memory(const layout& layout, allocation_type type); + memory_ptr get_memory(const layout& layout, allocation_type type, bool reset = true); memory_ptr get_from_non_padded_pool(const layout& layout, const primitive_id& id, uint32_t network_id, diff --git a/src/plugins/intel_gpu/src/graph/convolution.cpp b/src/plugins/intel_gpu/src/graph/convolution.cpp index 02b5e1fcf7016f..2c0ff9e3a4f691 100644 --- a/src/plugins/intel_gpu/src/graph/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/convolution.cpp @@ -585,4 +585,20 @@ convolution_inst::typed_primitive_inst(network& network, convolution_node const& "Weights/ifm mismatch"); } } + +void convolution_inst::save(cldnn::BinaryOutputBuffer& ob) const { + parent::save(ob); + + ob << _groups; + ob << _split; + ob << _deform_conv_dep_offset; +} + +void convolution_inst::load(cldnn::BinaryInputBuffer& ib) { + parent::load(ib); + + ib >> _groups; + ib >> _split; + ib >> _deform_conv_dep_offset; +} } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp index bced7ab850a60c..df6411599ad517 100644 --- a/src/plugins/intel_gpu/src/graph/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/crop.cpp @@ -244,7 +244,7 @@ crop_inst::typed_primitive_inst(network& network, crop_node const& node) : paren } void crop_inst::on_execute() { - if (!node->can_be_optimized()) + if (!can_be_optimized()) return; if (_outputs[0] && _network.get_engine().is_the_same_buffer(output_memory(), input_memory())) diff --git a/src/plugins/intel_gpu/src/graph/detection_output.cpp b/src/plugins/intel_gpu/src/graph/detection_output.cpp index a47b5a209d7c6a..c25ca051922148 100644 --- a/src/plugins/intel_gpu/src/graph/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/detection_output.cpp @@ -7,6 +7,7 @@ #include "intel_gpu/runtime/error_handler.hpp" #include "json_object.h" #include +#include "serialization/string_serializer.hpp" namespace cldnn { primitive_type_id detection_output::type_id() { @@ -180,4 +181,91 @@ detection_output_inst::typed_primitive_inst(network& network, detection_output_n "Detection output layer doesn't support input padding in Prior-Box input"); } +void detection_output_inst::save(cldnn::BinaryOutputBuffer& ob) const { + parent::save(ob); + + // argument (struct detection_output) + ob << argument->id; + ob << argument->input[0]; + ob << argument->input[1]; + ob << argument->input[2]; + ob << cldnn::make_data(&argument->output_padding, sizeof(argument->output_padding)); + ob << argument->num_classes; + ob << argument->keep_top_k; + ob << argument->share_location; + ob << argument->background_label_id; + ob << argument->nms_threshold; + ob << argument->top_k; + ob << argument->eta; + ob << cldnn::make_data(&argument->code_type, sizeof(argument->code_type)); + ob << argument->variance_encoded_in_target; + ob << argument->confidence_threshold; + ob << argument->prior_info_size; + ob << argument->prior_coordinates_offset; + ob << argument->prior_is_normalized; + ob << argument->input_width; + ob << argument->input_height; + ob << argument->decrease_label_id; + ob << argument->clip_before_nms; + ob << argument->clip_after_nms; +} + +void detection_output_inst::load(cldnn::BinaryInputBuffer& ib) { + parent::load(ib); + + primitive_id id; + primitive_id input_location; + primitive_id input_confidence; + primitive_id input_prior_box; + uint32_t num_classes; + uint32_t keep_top_k; + bool share_location; + int background_label_id; + float nms_threshold; + int top_k; + float eta; + prior_box_code_type code_type; + bool variance_encoded_in_target; + float confidence_threshold; + int32_t prior_info_size; + int32_t prior_coordinates_offset; + bool prior_is_normalized; + int32_t input_width; + int32_t input_height; + bool decrease_label_id; + bool clip_before_nms; + bool clip_after_nms; + // primitive_id ext_prim_id; + padding output_padding; + + ib >> id; + ib >> input_location; + ib >> input_confidence; + ib >> input_prior_box; + ib >> cldnn::make_data(&output_padding, sizeof(output_padding)); + ib >> num_classes; + ib >> keep_top_k; + ib >> share_location; + ib >> background_label_id; + ib >> nms_threshold; + ib >> top_k; + ib >> eta; + ib >> cldnn::make_data(&code_type, sizeof(code_type)); + ib >> variance_encoded_in_target; + ib >> confidence_threshold; + ib >> prior_info_size; + ib >> prior_coordinates_offset; + ib >> prior_is_normalized; + ib >> input_width; + ib >> input_height; + ib >> decrease_label_id; + ib >> clip_before_nms; + ib >> clip_after_nms; + + argument = std::make_shared(id, input_location, input_confidence, input_prior_box, + num_classes, keep_top_k, share_location, background_label_id, nms_threshold, top_k, eta, code_type, + variance_encoded_in_target, confidence_threshold, prior_info_size, prior_coordinates_offset, + prior_is_normalized, input_width, input_height, decrease_label_id, clip_before_nms, clip_after_nms, + output_padding); +} } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/get_type_id.cpp b/src/plugins/intel_gpu/src/graph/get_type_id.cpp new file mode 100644 index 00000000000000..7e0d4906c800f1 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/get_type_id.cpp @@ -0,0 +1,198 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +#include "primitive_inst.h" +#include "generic_layer.hpp" +#include "intel_gpu/primitives/activation.hpp" +#include "intel_gpu/primitives/adaptive_pooling.hpp" +#include "intel_gpu/primitives/arg_max_min.hpp" +#include "intel_gpu/primitives/assign.hpp" +#include "intel_gpu/primitives/average_unpooling.hpp" +#include "intel_gpu/primitives/batch_to_space.hpp" +#include "intel_gpu/primitives/binary_convolution.hpp" +#include "intel_gpu/primitives/border.hpp" +#include "intel_gpu/primitives/broadcast.hpp" +#include "intel_gpu/primitives/bucketize.hpp" +#include "intel_gpu/primitives/concatenation.hpp" +#include "intel_gpu/primitives/condition.hpp" +#include "intel_gpu/primitives/convert_color.hpp" +#include "intel_gpu/primitives/convolution.hpp" +#include "intel_gpu/primitives/crop.hpp" +#include "intel_gpu/primitives/ctc_greedy_decoder.hpp" +#include "intel_gpu/primitives/ctc_loss.hpp" +#include "intel_gpu/primitives/cum_sum.hpp" +#include "intel_gpu/primitives/custom_gpu_primitive.hpp" +#include "intel_gpu/primitives/data.hpp" +#include "intel_gpu/primitives/deconvolution.hpp" +#include "intel_gpu/primitives/depth_to_space.hpp" +#include "intel_gpu/primitives/detection_output.hpp" +#include "intel_gpu/primitives/dft.hpp" +#include "intel_gpu/primitives/eltwise.hpp" +#include "intel_gpu/primitives/embedding_bag.hpp" +#include "intel_gpu/primitives/experimental_detectron_detection_output.hpp" +#include "intel_gpu/primitives/experimental_detectron_generate_proposals_single_image.hpp" +#include "intel_gpu/primitives/experimental_detectron_prior_grid_generator.hpp" +#include "intel_gpu/primitives/experimental_detectron_roi_feature_extractor.hpp" +#include "intel_gpu/primitives/experimental_detectron_topk_rois.hpp" +#include "intel_gpu/primitives/extract_image_patches.hpp" +#include "intel_gpu/primitives/eye.hpp" +#include "intel_gpu/primitives/fully_connected.hpp" +#include "intel_gpu/primitives/gather_elements.hpp" +#include "intel_gpu/primitives/gather_nd.hpp" +#include "intel_gpu/primitives/gather_tree.hpp" +#include "intel_gpu/primitives/gather.hpp" +#include "intel_gpu/primitives/gemm.hpp" +#include "intel_gpu/primitives/generate_proposals.hpp" +#include "intel_gpu/primitives/grn.hpp" +#include "intel_gpu/primitives/grn.hpp" +#include "intel_gpu/primitives/input_layout.hpp" +#include "intel_gpu/primitives/loop.hpp" +#include "intel_gpu/primitives/lrn.hpp" +#include "intel_gpu/primitives/lstm_dynamic_input.hpp" +#include "intel_gpu/primitives/lstm_dynamic_timeloop.hpp" +#include "intel_gpu/primitives/lstm_dynamic.hpp" +#include "intel_gpu/primitives/lstm.hpp" +#include "intel_gpu/primitives/max_unpooling.hpp" +#include "intel_gpu/primitives/mutable_data.hpp" +#include "intel_gpu/primitives/mvn.hpp" +#include "intel_gpu/primitives/non_max_suppression.hpp" +#include "intel_gpu/primitives/non_zero.hpp" +#include "intel_gpu/primitives/normalize.hpp" +#include "intel_gpu/primitives/one_hot.hpp" +#include "intel_gpu/primitives/permute.hpp" +#include "intel_gpu/primitives/pooling.hpp" +#include "intel_gpu/primitives/prior_box.hpp" +#include "intel_gpu/primitives/proposal.hpp" +#include "intel_gpu/primitives/pyramid_roi_align.hpp" +#include "intel_gpu/primitives/quantize.hpp" +#include "intel_gpu/primitives/random_uniform.hpp" +#include "intel_gpu/primitives/range.hpp" +#include "intel_gpu/primitives/read_value.hpp" +#include "intel_gpu/primitives/reduce.hpp" +#include "intel_gpu/primitives/region_yolo.hpp" +#include "intel_gpu/primitives/reorder.hpp" +#include "intel_gpu/primitives/reorg_yolo.hpp" +#include "intel_gpu/primitives/resample.hpp" +#include "intel_gpu/primitives/reshape.hpp" +#include "intel_gpu/primitives/reverse_sequence.hpp" +#include "intel_gpu/primitives/reverse.hpp" +#include "intel_gpu/primitives/roi_align.hpp" +#include "intel_gpu/primitives/roi_pooling.hpp" +#include "intel_gpu/primitives/roll.hpp" +#include "intel_gpu/primitives/scatter_elements_update.hpp" +#include "intel_gpu/primitives/scatter_nd_update.hpp" +#include "intel_gpu/primitives/scatter_update.hpp" +#include "intel_gpu/primitives/select.hpp" +#include "intel_gpu/primitives/shape_of.hpp" +#include "intel_gpu/primitives/shuffle_channels.hpp" +#include "intel_gpu/primitives/slice.hpp" +#include "intel_gpu/primitives/softmax.hpp" +#include "intel_gpu/primitives/space_to_batch.hpp" +#include "intel_gpu/primitives/space_to_depth.hpp" +#include "intel_gpu/primitives/split.hpp" +#include "intel_gpu/primitives/strided_slice.hpp" +#include "intel_gpu/primitives/tile.hpp" + +cldnn::primitive_type_id cldnn::get_type_id(std::string type_str) { + static std::unordered_map primitive_inst_types = { + {"activation", cldnn::activation::type_id()}, + {"adaptive_pooling", cldnn::adaptive_pooling::type_id()}, + {"arg_max_min", cldnn::arg_max_min::type_id()}, + {"assign", cldnn::assign::type_id()}, + {"average_unpooling", cldnn::average_unpooling::type_id()}, + {"binary_convolution", cldnn::binary_convolution::type_id()}, + {"border", cldnn::border::type_id()}, + {"broadcast", cldnn::broadcast::type_id()}, + {"bucketize", cldnn::bucketize::type_id()}, + {"batch_to_space", cldnn::batch_to_space::type_id()}, + {"space_to_batch", cldnn::space_to_batch::type_id()}, + {"concatenation", cldnn::concatenation::type_id()}, + {"condition", cldnn::condition::type_id()}, + {"convert_color", cldnn::convert_color::type_id()}, + {"convolution", cldnn::convolution::type_id()}, + {"count_nonzero", cldnn::count_nonzero::type_id()}, + {"crop", cldnn::crop::type_id()}, + {"ctc_greedy_decoder", cldnn::ctc_greedy_decoder::type_id()}, + {"ctc_loss", cldnn::ctc_loss::type_id()}, + {"cum_sum", cldnn::cum_sum::type_id()}, + {"custom_gpu_primitive", cldnn::custom_gpu_primitive::type_id()}, + {"data", cldnn::data::type_id()}, + {"deconvolution", cldnn::deconvolution::type_id()}, + {"deformable_conv", cldnn::deformable_conv::type_id()}, + {"deformable_interp", cldnn::deformable_interp::type_id()}, + {"depth_to_space", cldnn::depth_to_space::type_id()}, + {"detection_output", cldnn::detection_output::type_id()}, + {"dft", cldnn::dft::type_id()}, + {"eltwise", cldnn::eltwise::type_id()}, + {"embedding_bag", cldnn::embedding_bag::type_id()}, + {"experimental_detectron_detection_output", cldnn::experimental_detectron_detection_output::type_id()}, + {"experimental_detectron_generate_proposals_single_image", cldnn::experimental_detectron_generate_proposals_single_image::type_id()}, + {"experimental_detectron_prior_grid_generator", cldnn::experimental_detectron_prior_grid_generator::type_id()}, + {"experimental_detectron_roi_feature_extractor", cldnn::experimental_detectron_roi_feature_extractor::type_id()}, + {"experimental_detectron_topk_rois", cldnn::experimental_detectron_topk_rois::type_id()}, + {"extract_image_patches", cldnn::extract_image_patches::type_id()}, + {"eye", cldnn::eye::type_id()}, + {"fully_connected", cldnn::fully_connected::type_id()}, + {"gather", cldnn::gather::type_id()}, + {"gather_elements", cldnn::gather_elements::type_id()}, + {"gather_nd", cldnn::gather_nd::type_id()}, + {"gather_nonzero", cldnn::gather_nonzero::type_id()}, + {"gather_tree", cldnn::gather_tree::type_id()}, + {"gemm", cldnn::gemm::type_id()}, + {"generate_proposals", cldnn::generate_proposals::type_id()}, + {"generic_layer", cldnn::generic_layer::type_id()}, + {"grn", cldnn::grn::type_id()}, + {"input_layout", cldnn::input_layout::type_id()}, + {"loop", cldnn::loop::type_id()}, + {"lrn", cldnn::lrn::type_id()}, + {"lstm", cldnn::lstm::type_id()}, + {"lstm_dynamic", cldnn::lstm_dynamic::type_id()}, + {"lstm_dynamic_input", cldnn::lstm_dynamic_input::type_id()}, + {"lstm_dynamic_timeloop", cldnn::lstm_dynamic_timeloop::type_id()}, + {"lstm_elt", cldnn::lstm_elt::type_id()}, + {"lstm_gemm", cldnn::lstm_gemm::type_id()}, + {"max_unpooling", cldnn::max_unpooling::type_id()}, + {"mutable_data", cldnn::mutable_data::type_id()}, + {"mvn", cldnn::mvn::type_id()}, + {"non_max_suppression", cldnn::non_max_suppression::type_id()}, + {"normalize", cldnn::normalize::type_id()}, + {"one_hot", cldnn::one_hot::type_id()}, + {"permute", cldnn::permute::type_id()}, + {"pooling", cldnn::pooling::type_id()}, + {"prior_box", cldnn::prior_box::type_id()}, + {"proposal", cldnn::proposal::type_id()}, + {"pyramid_roi_align", cldnn::pyramid_roi_align::type_id()}, + {"quantize", cldnn::quantize::type_id()}, + {"random_uniform", cldnn::random_uniform::type_id()}, + {"range", cldnn::range::type_id()}, + {"read_value", cldnn::read_value::type_id()}, + {"reduce", cldnn::reduce::type_id()}, + {"region_yolo", cldnn::region_yolo::type_id()}, + {"reorder", cldnn::reorder::type_id()}, + {"reorg_yolo", cldnn::reorg_yolo::type_id()}, + {"resample", cldnn::resample::type_id()}, + {"reshape", cldnn::reshape::type_id()}, + {"reverse", cldnn::reverse::type_id()}, + {"reverse_sequence", cldnn::reverse_sequence::type_id()}, + {"roi_align", cldnn::roi_align::type_id()}, + {"roi_pooling", cldnn::roi_pooling::type_id()}, + {"roll", cldnn::roll::type_id()}, + {"scatter_elements_update", cldnn::scatter_elements_update::type_id()}, + {"scatter_nd_update", cldnn::scatter_nd_update::type_id()}, + {"scatter_update", cldnn::scatter_update::type_id()}, + {"select", cldnn::select::type_id()}, + {"shape_of", cldnn::shape_of::type_id()}, + {"shuffle_channels", cldnn::shuffle_channels::type_id()}, + {"slice", cldnn::slice::type_id()}, + {"softmax", cldnn::softmax::type_id()}, + {"space_to_depth", cldnn::space_to_depth::type_id()}, + {"split", cldnn::split::type_id()}, + {"strided_slice", cldnn::strided_slice::type_id()}, + {"tile", cldnn::tile::type_id()}, + }; + + return primitive_inst_types[type_str]; +} \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp b/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp index 4a13c8674f9edc..c0eae1e6c74320 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp @@ -8,22 +8,34 @@ #include "input_layout_inst.h" #include "impls/implementation_map.hpp" #include "register.hpp" +#include "serialization/binary_buffer.hpp" #include namespace cldnn { namespace common { class wait_for_events_impl : public primitive_impl { + using primitive_impl::primitive_impl; + public: explicit wait_for_events_impl(const program_node& /*node*/) : primitive_impl(kernel_selector::weights_reorder_params{}, "wait_for_events") { } + wait_for_events_impl() : primitive_impl() {} + + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } void init_kernels(const kernels_cache&) override {} void set_arguments(primitive_inst& /*instance*/) override {} + void set_arguments(kernel_arguments_data_idx& /*instance*/) override {} + kernel_arguments_data get_arguments(const primitive_inst& /*instance*/) const override { + kernel_arguments_data args; + return args; + } std::vector get_internal_buffer_layouts() const override { return {}; } event::ptr execute(const std::vector& events, primitive_inst& instance) override { @@ -62,3 +74,5 @@ attach_prior_box_common::attach_prior_box_common() { } // namespace detail } // namespace common } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::common::wait_for_events_impl, cldnn::object_type::WAIT_FOR_EVENTS_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp index f1ead25d8bfdbf..6a8fea664e070e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp @@ -10,6 +10,8 @@ namespace cldnn { namespace cpu { struct assign_impl : public typed_primitive_impl { + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -53,3 +55,5 @@ attach_assign_impl::attach_assign_impl() { } // namespace detail } // namespace cpu } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::cpu::assign_impl, cldnn::object_type::ASSIGN_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp index 4bbe066eb8fe39..fac3a6428734a9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp @@ -43,12 +43,21 @@ bool comp_score_descend>(const std::pair { + using parent = typed_primitive_impl; + using parent::parent; + +public: enum NMSType {CAFFE, MXNET}; NMSType nms_type; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } + + detection_output_impl() : parent() {} + explicit detection_output_impl(const detection_output_node& outer) { set_node_params(outer); } @@ -59,6 +68,14 @@ struct detection_output_impl : typed_primitive_impl { nms_type = (node.get_primitive()->decrease_label_id ? NMSType::MXNET : NMSType::CAFFE); } + void save(BinaryOutputBuffer& ob) const override { + ob << make_data(&nms_type, sizeof(NMSType)); + } + + void load(BinaryInputBuffer& ib) override { + ib >> make_data(&nms_type, sizeof(NMSType)); + } + static inline void intersect_bbox(const bounding_box& bbox1, const bounding_box& bbox2, bounding_box& intersect_bbox) { @@ -854,3 +871,5 @@ attach_detection_output_impl::attach_detection_output_impl() { } // namespace cpu } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::cpu::detection_output_impl, cldnn::object_type::DETECTION_OUTPUT_IMPL_CPU) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp index b6c3491041ecb6..720f6cbc41cdb5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp @@ -383,6 +383,8 @@ void run(non_max_suppression_inst& instance) { struct non_max_suppression_impl : typed_primitive_impl { using parent = typed_primitive_impl; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -421,3 +423,5 @@ attach_non_max_suppression_impl::attach_non_max_suppression_impl() { } // namespace detail } // namespace cpu } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::cpu::non_max_suppression_impl, cldnn::object_type::NON_MAX_SUPPRESSION_IMPL_CPU) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp index a9174c5506af0b..a94095ead7e9b4 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp @@ -188,8 +188,13 @@ struct im_info_t { }; struct proposal_impl : typed_primitive_impl { + using parent = typed_primitive_impl; + using parent::parent; + explicit proposal_impl(const proposal_node& arg) {} + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -453,3 +458,5 @@ attach_proposal_impl::attach_proposal_impl() { } // namespace detail } // namespace cpu } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::cpu::proposal_impl, cldnn::object_type::PROPOSAL_IMPL_CPU) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp index e20a0bd1fed11f..1be3ccd7d839fe 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp @@ -10,6 +10,8 @@ namespace cldnn { namespace cpu { struct read_value_impl : public typed_primitive_impl { + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -53,3 +55,5 @@ attach_read_value_impl::attach_read_value_impl() { } // namespace detail } // namespace cpu } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::cpu::read_value_impl, cldnn::object_type::READ_VALUE_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp index 8843b98831af59..c3bb3bac00f000 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp @@ -17,10 +17,14 @@ struct activation_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } + activation_impl() : parent() {} + explicit activation_impl(const activation_impl& other) : parent(other), _is_parameterized(other._is_parameterized) {} @@ -34,7 +38,7 @@ struct activation_impl : typed_primitive_impl_ocl { _is_parameterized = node.is_parameterized(); } - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t split) const override { kernel_arguments_data args = parent::get_arguments(instance, split); if (_is_parameterized) { @@ -43,6 +47,17 @@ struct activation_impl : typed_primitive_impl_ocl { return args; } + + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); + ob << _is_parameterized; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + ib >> _is_parameterized; + } + static primitive_impl* create(const activation_node& arg, const kernel_impl_params& impl_param) { const auto& prim = arg.get_primitive(); auto activation_params = get_default_params(impl_param); @@ -153,3 +168,5 @@ attach_activation_impl::attach_activation_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::activation_impl, cldnn::object_type::ACTIVATION_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/adaptive_pooling.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/adaptive_pooling.cpp index e32af5a88e2ccc..098257b1b97ada 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/adaptive_pooling.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/adaptive_pooling.cpp @@ -17,12 +17,14 @@ struct adaptive_pooling_impl : public typed_primitive_impl_ocl using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t) const override { kernel_arguments_data args; const auto num_inputs = instance.inputs_memory_count(); for (size_t i = 0; i < num_inputs; ++i) { @@ -96,3 +98,5 @@ attach_adaptive_pooling_impl::attach_adaptive_pooling_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::adaptive_pooling_impl, cldnn::object_type::ADAPTIVE_POOLING_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp index 5fe96a4e9e9cd9..3b8fdd92f9cfd5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp @@ -40,12 +40,14 @@ struct arg_max_min_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t) const override { kernel_arguments_data args = parent::get_arguments(instance, 0); if (instance.node->has_second_output()) { @@ -129,3 +131,5 @@ attach_arg_max_min_impl::attach_arg_max_min_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::arg_max_min_impl, cldnn::object_type::ARG_MAX_MIN_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/average_unpooling.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/average_unpooling.cpp index 6683701a68db45..6775827e85ca73 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/average_unpooling.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/average_unpooling.cpp @@ -17,12 +17,14 @@ struct average_unpooling_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t split) const override { kernel_arguments_data args = parent::get_arguments(instance, split); return args; } @@ -77,3 +79,5 @@ attach_average_unpooling_impl::attach_average_unpooling_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::average_unpooling_impl, cldnn::object_type::AVERAGE_UNPOOLING_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/batch_to_space.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/batch_to_space.cpp index 8f499a81ee50dc..14dcf7c00ec442 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/batch_to_space.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/batch_to_space.cpp @@ -20,6 +20,8 @@ struct batch_to_space_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -75,3 +77,5 @@ attach_batch_to_space_impl::attach_batch_to_space_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::batch_to_space_impl, cldnn::object_type::BATCH_TO_SPACE_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp index 3a0c6dd18a71f5..7d851c891643ad 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp @@ -21,10 +21,14 @@ struct binary_convolution_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } + binary_convolution_impl() : parent() {} + explicit binary_convolution_impl(const binary_convolution_impl& other) : parent(other), _split(other._split) {} @@ -61,7 +65,7 @@ struct binary_convolution_impl : typed_primitive_impl_ocl { return res; } - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t split) const override { kernel_arguments_data args = parent::get_arguments(instance, split); args.weights = instance.weights_memory(split); @@ -71,6 +75,16 @@ struct binary_convolution_impl : typed_primitive_impl_ocl { int32_t get_split() const override { return _split; } public: + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); + ob << _split; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + ib >> _split; + } + static primitive_impl* create(const binary_convolution_node& arg, const kernel_impl_params& impl_param) { const auto& primitive = arg.get_primitive(); const auto& weights_layout = (*impl_param.weights_layout).convert_to_weights_layout(false); @@ -154,3 +168,5 @@ attach_binary_convolution_impl::attach_binary_convolution_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::binary_convolution_impl, cldnn::object_type::BINARY_CONVOLUTION_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp index ec6de5732c045f..371709ba51e14c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp @@ -18,6 +18,8 @@ struct border_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -173,3 +175,5 @@ attach_border_impl::attach_border_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::border_impl, cldnn::object_type::BORDER_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp index 13653a28fe08ff..e5bdc30e881534 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp @@ -18,6 +18,8 @@ struct broadcast_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -172,3 +174,5 @@ attach_broadcast_impl::attach_broadcast_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::broadcast_impl, cldnn::object_type::BROADCAST_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/bucketize.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/bucketize.cpp index 8f566a2abae57e..9aded41a719679 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/bucketize.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/bucketize.cpp @@ -15,6 +15,8 @@ struct bucketize_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -74,3 +76,5 @@ attach_bucketize_impl::attach_bucketize_impl() { } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::bucketize_impl, cldnn::object_type::BUCKETIZE_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp index fba658603fc722..54a8caa445083d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp @@ -47,11 +47,16 @@ kernel_selector::concat_axis convert_axis(int64_t axis, size_t rank) { struct concatenation_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; + using parent::parent; + + DECLARE_OBJECT_TYPE_SERIALIZATION std::unique_ptr clone() const override { return make_unique(*this); } + concatenation_impl() : parent() {} + explicit concatenation_impl(const concatenation_impl& other) : parent(other), _can_be_optimized(other._can_be_optimized) {} @@ -80,6 +85,16 @@ struct concatenation_impl : typed_primitive_impl_ocl { } public: + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); + ob << _can_be_optimized; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + ib >> _can_be_optimized; + } + static primitive_impl* create(const concatenation_node& arg, const kernel_impl_params& impl_param) { if (arg.can_be_optimized()) { return new concatenation_impl(arg, {}); @@ -194,3 +209,5 @@ attach_concatenation_impl::attach_concatenation_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::concatenation_impl, cldnn::object_type::CONCATENATION_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convert_color.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convert_color.cpp index 75cffe426896d6..c855b2d453b3bc 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convert_color.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convert_color.cpp @@ -20,12 +20,14 @@ struct convert_color_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t split) const override { kernel_arguments_data args = parent::get_arguments(instance, split); return args; } @@ -76,3 +78,5 @@ attach_convert_color_impl::attach_convert_color_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::convert_color_impl, cldnn::object_type::CONVERT_COLOR_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp index be8e308d0dc428..aa7a918aaa1028 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp @@ -21,10 +21,14 @@ struct convolution_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } + convolution_impl() : parent() {} + explicit convolution_impl(const convolution_impl& other) : parent(other), _split(other._split), _groups(other._groups), @@ -59,7 +63,7 @@ struct convolution_impl : typed_primitive_impl_ocl { return res; } - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t split) const override { kernel_arguments_data args = parent::get_arguments(instance, split); args.weights = instance.weights_memory(split); @@ -76,6 +80,20 @@ struct convolution_impl : typed_primitive_impl_ocl { bool get_depthwise_sep_opt() const override { return _depthwise_sep_opt; } public: + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); + ob << _split; + ob << _groups; + ob << _depthwise_sep_opt; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + ib >> _split; + ib >> _groups; + ib >> _depthwise_sep_opt; + } + static primitive_impl* create(const convolution_node& arg, const kernel_impl_params& impl_param) { const auto& primitive = arg.get_primitive(); @@ -266,3 +284,5 @@ attach_convolution_impl::attach_convolution_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::convolution_impl, cldnn::object_type::CONVOLUTION_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp index d6b1eb9f1e522c..0d861a61fbcb6d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp @@ -17,10 +17,14 @@ struct crop_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } + crop_impl() : parent() {} + explicit crop_impl(const crop_impl& other) : parent(other), _can_be_optimized(other._can_be_optimized) {} @@ -40,6 +44,16 @@ struct crop_impl : typed_primitive_impl_ocl { } public: + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); + ob << _can_be_optimized; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + ib >> _can_be_optimized; + } + static primitive_impl* create(const crop_node& arg, const kernel_impl_params& impl_param) { auto ew_params = get_default_params(impl_param, 1); auto ew_optional_params = get_default_optional_params(arg.get_program()); @@ -151,3 +165,5 @@ attach_crop_impl::attach_crop_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::crop_impl, cldnn::object_type::CROP_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/ctc_greedy_decoder.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/ctc_greedy_decoder.cpp index 41ec3b01838533..6702dc8da4cb62 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/ctc_greedy_decoder.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/ctc_greedy_decoder.cpp @@ -21,6 +21,8 @@ struct ctc_greedy_decoder_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -71,3 +73,5 @@ attach_ctc_greedy_decoder_impl::attach_ctc_greedy_decoder_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::ctc_greedy_decoder_impl, cldnn::object_type::CTC_GREEDY_DECODER_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/ctc_loss.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/ctc_loss.cpp index ce5a60688d3478..cdb98540fffb30 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/ctc_loss.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/ctc_loss.cpp @@ -15,6 +15,8 @@ struct ctc_loss_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -62,3 +64,5 @@ attach_ctc_loss_impl::attach_ctc_loss_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::ctc_loss_impl, cldnn::object_type::CTC_LOSS_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp index 0dd0cb1c11baf1..29109505dcb030 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp @@ -51,6 +51,8 @@ struct cum_sum_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -104,3 +106,5 @@ attach_cum_sum_impl::attach_cum_sum_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::cum_sum_impl, cldnn::object_type::CUM_SUM_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp index 7108b44b696d4f..3421e8c5718b07 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp @@ -24,6 +24,11 @@ namespace cldnn { namespace ocl { struct custom_gpu_primitive_impl : typed_primitive_impl { + using parent = typed_primitive_impl; + using parent::parent; + + DECLARE_OBJECT_TYPE_SERIALIZATION + std::shared_ptr cl_kernel; std::vector _kernels; kernel_id _kernel_id; @@ -32,6 +37,9 @@ struct custom_gpu_primitive_impl : typed_primitive_impl { return make_unique(*this); } + custom_gpu_primitive_impl() + : _kernels() {} + custom_gpu_primitive_impl(const custom_gpu_primitive_impl& other) : cl_kernel(other.cl_kernel) , _kernels({}) @@ -76,6 +84,17 @@ struct custom_gpu_primitive_impl : typed_primitive_impl { std::vector get_kernel_ids() override { return {_kernel_id}; } + + void save(BinaryOutputBuffer& ob) const override { + ob << *cl_kernel; + ob << _kernel_id; + } + + void load(BinaryInputBuffer& ib) override { + cl_kernel = std::make_shared(); + ib >> *cl_kernel; + ib >> _kernel_id; + } }; static kernel_selector::kernel_argument_element get_arg(custom_gpu_primitive::arg_desc arg) { @@ -238,3 +257,5 @@ attach_custom_gpu_primitive_impl::attach_custom_gpu_primitive_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::custom_gpu_primitive_impl, cldnn::object_type::CUSTOM_GPU_PRIMITIVE_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp index 1adf091c57bc1b..7d939b66288e70 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp @@ -18,10 +18,14 @@ struct deconvolution_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } + deconvolution_impl() : parent() {} + explicit deconvolution_impl(const deconvolution_impl& other) : parent(other), _split(other._split), _groups(other._groups) {} @@ -37,6 +41,18 @@ struct deconvolution_impl : typed_primitive_impl_ocl { _groups = node.get_groups(); } + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); + ob << _split; + ob << _groups; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + ib >> _split; + ib >> _groups; + } + protected: // TODO: share it with convolution and fully connected bool validate_impl(const typed_primitive_inst& instance) const override { @@ -52,7 +68,7 @@ struct deconvolution_impl : typed_primitive_impl_ocl { return res; } - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t split) const override { kernel_arguments_data args = parent::get_arguments(instance, split); args.weights = instance.weights_memory(split); @@ -158,3 +174,5 @@ attach_deconvolution_impl::attach_deconvolution_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::deconvolution_impl, cldnn::object_type::DECONVOLUTION_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp index 0e801dff396d3c..57255cc12c3468 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp @@ -19,10 +19,14 @@ struct deformable_conv_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } + deformable_conv_impl() : parent() {} + explicit deformable_conv_impl(const deformable_conv_impl& other) : parent(other), _split(other._split), _groups(other._groups) {} @@ -38,8 +42,20 @@ struct deformable_conv_impl : typed_primitive_impl_ocl { _groups = node.get_groups(); } + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); + ob << _split; + ob << _groups; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + ib >> _split; + ib >> _groups; + } + protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t split) const override { kernel_arguments_data args = parent::get_arguments(instance, split); args.weights = instance.weights_memory(split); @@ -101,6 +117,8 @@ struct deformable_interp_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -193,3 +211,6 @@ attach_deformable_interp_impl::attach_deformable_interp_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::deformable_conv_impl, cldnn::object_type::DEFORMABLE_CONV_IMPL) +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::deformable_interp_impl, cldnn::object_type::DEFORMABLE_INTERP_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/depth_to_space.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/depth_to_space.cpp index 8af82345cfe866..67a6d243d6ddae 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/depth_to_space.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/depth_to_space.cpp @@ -19,6 +19,8 @@ struct depth_to_space_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -73,3 +75,5 @@ attach_depth_to_space_impl::attach_depth_to_space_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::depth_to_space_impl, cldnn::object_type::DEPTH_TO_SPACE_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/detection_output.cpp index dd7885447ecd15..73ff0dc10796c6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/detection_output.cpp @@ -18,6 +18,8 @@ struct detection_output_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -101,3 +103,5 @@ attach_detection_output_impl::attach_detection_output_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::detection_output_impl, cldnn::object_type::DETECTION_OUTPUT_IMPL_OCL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp index c6b69428359f8d..c470fdbb56c73a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp @@ -18,6 +18,8 @@ namespace ocl { struct dft_impl : typed_primitive_impl_ocl { using typed_primitive_impl_ocl::typed_primitive_impl_ocl; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -109,3 +111,5 @@ attach_dft_impl::attach_dft_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::dft_impl, cldnn::object_type::DFT_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp index 484df750d5c3d8..4f66169b4b1493 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp @@ -18,12 +18,14 @@ struct eltwise_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t split) const override { kernel_arguments_data args = parent::get_arguments(instance, split); return args; } @@ -285,3 +287,5 @@ attach_eltwise_impl::attach_eltwise_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::eltwise_impl, cldnn::object_type::ELTWISE_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/embedding_bag.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/embedding_bag.cpp index 68efe7ca8d195c..4f17ef5d2f3297 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/embedding_bag.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/embedding_bag.cpp @@ -19,6 +19,8 @@ struct embedding_bag_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -77,3 +79,5 @@ attach_embedding_bag_impl::attach_embedding_bag_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::embedding_bag_impl, cldnn::object_type::EMBEDDING_BAG_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_detection_output.cpp index b148716df33367..bed045f0916551 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_detection_output.cpp @@ -16,12 +16,14 @@ struct experimental_detectron_detection_output_impl using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t unused) const override { kernel_arguments_data args = parent::get_arguments(instance, unused); args.inputs.push_back(instance.output_classes_memory()); @@ -85,3 +87,6 @@ attach_experimental_detectron_detection_output_impl::attach_experimental_detectr } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::experimental_detectron_detection_output_impl, + cldnn::object_type::ACTIVATION_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_generate_proposals_single_image.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_generate_proposals_single_image.cpp index 79e352f48fc332..ad7e29cc3ccc68 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_generate_proposals_single_image.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_generate_proposals_single_image.cpp @@ -17,12 +17,14 @@ struct experimental_detectron_generate_proposals_single_image_impl using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t) const override { kernel_arguments_data args; const auto num_inputs = instance.inputs_memory_count(); for (size_t i = 0; i < num_inputs; ++i) { @@ -86,3 +88,6 @@ attach_experimental_detectron_generate_proposals_single_image_impl::attach_exper } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::experimental_detectron_generate_proposals_single_image_impl, + cldnn::object_type::EXPERIMENTAL_DETECTRON_GENERATE_PROPOSALS_SINGLE_IMAGE_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_prior_grid_generator.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_prior_grid_generator.cpp index 51a1dba15f99fd..65f571f31032e9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_prior_grid_generator.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_prior_grid_generator.cpp @@ -22,6 +22,8 @@ struct experimental_detectron_prior_grid_generator_impl using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -68,3 +70,6 @@ attach_experimental_detectron_prior_grid_generator_impl::attach_experimental_det } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::experimental_detectron_prior_grid_generator_impl, + cldnn::object_type::EXPERIMENTAL_DETECTRON_PRIOR_GRID_GENERATOR_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_roi_feature_extractor.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_roi_feature_extractor.cpp index ceb727413b276b..58863dacf9fbcd 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_roi_feature_extractor.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_roi_feature_extractor.cpp @@ -16,12 +16,14 @@ struct experimental_detectron_roi_feature_extractor_impl : public typed_primitiv using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(experimental_detectron_roi_feature_extractor_inst& instance, int32_t) const override { + kernel_arguments_data get_arguments(const experimental_detectron_roi_feature_extractor_inst& instance, int32_t) const override { kernel_arguments_data args; for (std::size_t i = 0; i < instance.inputs_memory_count(); i++) { @@ -85,3 +87,6 @@ attach_experimental_detectron_roi_feature_extractor_impl::attach_experimental_de } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::experimental_detectron_roi_feature_extractor_impl, + cldnn::object_type::EXPERIMENTAL_DETECTRON_ROI_FEATURE_EXTRACTOR_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_topk_rois.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_topk_rois.cpp index dc37efccd25fee..db4ffef4fad63b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_topk_rois.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_topk_rois.cpp @@ -17,6 +17,8 @@ struct experimental_detectron_topk_rois_impl : typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -58,3 +60,6 @@ attach_experimental_detectron_topk_rois_impl::attach_experimental_detectron_topk } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::experimental_detectron_topk_rois_impl, + cldnn::object_type::EXPERIMENTAL_DETECTRON_TOPK_ROIS_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/extract_image_patches.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/extract_image_patches.cpp index 93b2033f338caf..f127183a9ebb35 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/extract_image_patches.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/extract_image_patches.cpp @@ -18,6 +18,8 @@ struct extract_image_patches_impl : typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -64,3 +66,5 @@ attach_extract_image_patches_impl::attach_extract_image_patches_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::extract_image_patches_impl, cldnn::object_type::EXTRACT_IMAGE_PATCHES_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/eye.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/eye.cpp index 78b071e9ba79ee..b3975c9e1f49a0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/eye.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/eye.cpp @@ -22,6 +22,8 @@ struct eye_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -77,3 +79,5 @@ attach_eye_impl::attach_eye_impl() { } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::eye_impl, cldnn::object_type::EYE_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index 7668797fd58a70..1cb8538ffbdb63 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -26,12 +26,14 @@ struct fully_connected_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t split) const override { kernel_arguments_data args = parent::get_arguments(instance, split); args.weights = instance.weights_memory(); @@ -159,3 +161,5 @@ attach_fully_connected_impl::attach_fully_connected_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::fully_connected_impl, cldnn::object_type::FULLY_CONNECTED_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp index 74312f9644669e..42be9e16b7f909 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp @@ -63,6 +63,8 @@ struct gather_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -218,3 +220,5 @@ attach_gather_impl::attach_gather_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::gather_impl, cldnn::object_type::GATHER_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp index b1c9d72c155181..78e7297049bb63 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp @@ -50,6 +50,8 @@ struct gather_elements_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -101,3 +103,5 @@ attach_gather_elements_impl::attach_gather_elements_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::gather_elements_impl, cldnn::object_type::GATHER_ELEMENTS_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_nd.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_nd.cpp index 3c5a79e7c97c7b..739bd3fb02bfb5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_nd.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_nd.cpp @@ -18,6 +18,8 @@ struct gather_nd_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -67,3 +69,5 @@ attach_gather_nd_impl::attach_gather_nd_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::gather_nd_impl, cldnn::object_type::GATHER_ND_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_tree.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_tree.cpp index 03149ddea81f45..6c18e8969fe3bc 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_tree.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_tree.cpp @@ -18,6 +18,8 @@ struct gather_tree_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -66,3 +68,5 @@ attach_gather_tree_impl::attach_gather_tree_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::gather_tree_impl, cldnn::object_type::GATHER_TREE_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp index 0b48e9a68cfed0..6ee129eb98d787 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp @@ -18,6 +18,8 @@ struct gemm_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -170,3 +172,5 @@ attach_gemm_impl::attach_gemm_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::gemm_impl, cldnn::object_type::GEMM_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/generate_proposals.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/generate_proposals.cpp index 23abd87e31afe9..614123adbfe110 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/generate_proposals.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/generate_proposals.cpp @@ -18,12 +18,14 @@ struct generate_proposals_impl using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t) const override { auto args = parent::get_arguments(instance, 0); args.inputs.push_back(instance.output_rois_scores_memory()); args.inputs.push_back(instance.output_rois_nums_memory()); @@ -87,3 +89,5 @@ namespace detail { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::generate_proposals_impl, cldnn::object_type::GENERATE_PROPOSALS_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp index 7f9822c3d5c2fc..a25203538dd00e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp @@ -13,14 +13,21 @@ namespace cldnn { namespace ocl { struct generic_layer_impl : typed_primitive_impl { - const kernel_selector::cl_kernel_data& _cl_kernel_data; + using parent = typed_primitive_impl; + using parent::parent; + + kernel_selector::cl_kernel_data _cl_kernel_data; std::vector _kernels; kernel_id _kernel_id; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } + generic_layer_impl() : parent() {} + generic_layer_impl(const generic_layer_impl& other) : _cl_kernel_data(other._cl_kernel_data) , _kernels({}) @@ -28,7 +35,7 @@ struct generic_layer_impl : typed_primitive_impl { if (other._kernels.empty()) { throw std::runtime_error("Can't copy generic_layer_impl node: kernels vector is empty"); } - _kernels.push_back(other._kernels.front()->clone()); + _kernels.push_back(std::move(other._kernels.front()->clone())); } generic_layer_impl(const generic_layer_node& arg) @@ -37,8 +44,18 @@ struct generic_layer_impl : typed_primitive_impl { _kernel_id = arg.get_program().add_kernel(arg.get_primitive()->generic_params.clKernel->code.kernelString); } + void save(BinaryOutputBuffer& ob) const override { + ob <<_cl_kernel_data; + ob << _kernel_id; + } + + void load(BinaryInputBuffer& ib) override { + ib >> _cl_kernel_data; + ib >> _kernel_id; + } + void init_kernels(const kernels_cache& kernels_cache) override { - _kernels.push_back(kernels_cache.get_kernel(_kernel_id)); + _kernels.push_back(std::move(kernels_cache.get_kernel(_kernel_id))); } void set_arguments_impl(generic_layer_inst& instance) override { @@ -118,3 +135,5 @@ attach_generic_layer_impl::attach_generic_layer_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::generic_layer_impl, cldnn::object_type::GENERIC_LAYER_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/grid_sample.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/grid_sample.cpp index fbf0e5c8e3adc7..c4cfe3a798ca6b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/grid_sample.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/grid_sample.cpp @@ -43,6 +43,8 @@ struct grid_sample_impl : public typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -90,3 +92,5 @@ attach_grid_sample_impl::attach_grid_sample_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::grid_sample_impl, cldnn::object_type::GRID_SAMPLE_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/grn.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/grn.cpp index 182869a8d57cfb..ce06c0750931ab 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/grn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/grn.cpp @@ -21,6 +21,8 @@ struct grn_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -59,3 +61,5 @@ attach_grn_impl::attach_grn_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::grn_impl, cldnn::object_type::GRN_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/lrn.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/lrn.cpp index d7aa1cefbf13dc..ce357fc800c28a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/lrn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/lrn.cpp @@ -17,6 +17,8 @@ struct lrn_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -98,3 +100,5 @@ attach_lrn_impl::attach_lrn_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::lrn_impl, cldnn::object_type::LRN_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_dynamic_input.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_dynamic_input.cpp index 4546d7274f2f19..4f2b120ab9ac8a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_dynamic_input.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_dynamic_input.cpp @@ -19,12 +19,14 @@ struct lstm_dynamic_input_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t) const override { kernel_arguments_data args; args.inputs = { instance.input_memory_ptr(), instance.dyn_length_memory()}; args.outputs = { instance.output_memory_ptr() }; @@ -85,3 +87,5 @@ attach_lstm_dynamic_input_impl::attach_lstm_dynamic_input_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::lstm_dynamic_input_impl, cldnn::object_type::LSTM_DYNAMIC_INPUT_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_dynamic_timeloop.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_dynamic_timeloop.cpp index ecec3c48a93547..f808f54d12be4f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_dynamic_timeloop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_dynamic_timeloop.cpp @@ -19,12 +19,14 @@ struct lstm_dynamic_timeloop_impl : typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t) const override { kernel_arguments_data args; args.inputs = {instance.input_memory_ptr(), instance.dyn_length_memory()}; if (instance.last_hidden_output_term()) @@ -102,3 +104,5 @@ attach_lstm_dynamic_timeloop_impl::attach_lstm_dynamic_timeloop_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::lstm_dynamic_timeloop_impl, cldnn::object_type::LSTM_DYNAMIC_TIMELOOP_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_elt.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_elt.cpp index d59c34d55658ab..1fa20e06a151ff 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_elt.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_elt.cpp @@ -19,12 +19,14 @@ struct lstm_elt_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t) const override { kernel_arguments_data args = parent::get_arguments(instance, 0); args.cell = instance.cell_term() ? instance.cell_memory() : nullptr; @@ -105,3 +107,5 @@ attach_lstm_elt_impl::attach_lstm_elt_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::lstm_elt_impl, cldnn::object_type::LSTM_ELT_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_gemm.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_gemm.cpp index 12f8aea5e5a334..d85b8cf90b9463 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_gemm.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_gemm.cpp @@ -19,12 +19,14 @@ struct lstm_gemm_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t) const override { kernel_arguments_data args = parent::get_arguments(instance, 0); args.outputs = { instance.output_memory_ptr() }; @@ -106,3 +108,5 @@ attach_lstm_gemm_impl::attach_lstm_gemm_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::lstm_gemm_impl, cldnn::object_type::LSTM_GEMM_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/mutable_data.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/mutable_data.cpp index a3d876d6ec9e98..4c5cb9a9d89042 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/mutable_data.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/mutable_data.cpp @@ -13,6 +13,8 @@ struct mutable_data_impl : public typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -30,3 +32,5 @@ attach_mutable_data_impl::attach_mutable_data_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::mutable_data_impl, cldnn::object_type::MUTABLE_DATA_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp index 0830cb0307ea0e..285f89e33a12be 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp @@ -21,6 +21,8 @@ struct mvn_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -107,3 +109,5 @@ attach_mvn_impl::attach_mvn_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::mvn_impl, cldnn::object_type::MVN_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp index 63279d297670ea..a7521183e6bc27 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp @@ -16,12 +16,14 @@ struct non_max_suppression_impl : typed_primitive_impl_ocl using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t) const override { kernel_arguments_data args; for (size_t i = 0; i < instance.inputs_memory_count(); i++) { args.inputs.push_back(instance.input_memory_ptr(i)); @@ -205,3 +207,5 @@ attach_non_max_suppression_impl::attach_non_max_suppression_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::non_max_suppression_impl, cldnn::object_type::NON_MAX_SUPPRESSION_IMPL_OCL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp index d688f16f19c963..f5e716ae5b1046 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp @@ -21,6 +21,8 @@ struct count_nonzero_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -45,6 +47,8 @@ struct gather_nonzero_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -118,3 +122,6 @@ attach_gather_nonzero_impl::attach_gather_nonzero_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::count_nonzero_impl, cldnn::object_type::COUNT_NONZERO_IMPL) +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::gather_nonzero_impl, cldnn::object_type::GATHER_NONZERO_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/normalize.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/normalize.cpp index cef72d0f4dc69f..9e55ecc0538f2d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/normalize.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/normalize.cpp @@ -21,12 +21,14 @@ struct normalize_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t split) const override { kernel_arguments_data args = parent::get_arguments(instance, split); args.scale_table = instance.scale_memory(); return args; @@ -84,3 +86,5 @@ attach_normalize_impl::attach_normalize_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::normalize_impl, cldnn::object_type::NORMALIZE_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/one_hot.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/one_hot.cpp index 61229524250d9b..5d1bfd004866ac 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/one_hot.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/one_hot.cpp @@ -19,6 +19,8 @@ struct one_hot_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -71,3 +73,5 @@ attach_one_hot_impl::attach_one_hot_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::one_hot_impl, cldnn::object_type::ONE_HOT_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/permute.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/permute.cpp index 555c8e23ef1157..51ed0963db9bb7 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/permute.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/permute.cpp @@ -46,6 +46,8 @@ struct permute_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -82,3 +84,5 @@ attach_permute_impl::attach_permute_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::permute_impl, cldnn::object_type::PERMUTE_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp index ced941dfbdb032..07ac921204e8a2 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp @@ -59,12 +59,14 @@ struct pooling_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t split) const override { kernel_arguments_data args = parent::get_arguments(instance, split); return args; } @@ -223,3 +225,5 @@ attach_pooling_impl::attach_pooling_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::pooling_impl, cldnn::object_type::POOLING_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp index 370aa55e04ca0b..cd538973ca49c7 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp @@ -11,6 +11,12 @@ #include "intel_gpu/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "intel_gpu/graph/network.hpp" +#include "serialization/binary_buffer.hpp" +#include "serialization/cl_kernel_data_serializer.hpp" +#include "serialization/helpers.hpp" +#include "serialization/set_serializer.hpp" +#include "serialization/string_serializer.hpp" +#include "serialization/vector_serializer.hpp" #include "register.hpp" #include #include @@ -25,10 +31,17 @@ For example, all gpu convolution implementations should derive from typed_primit */ template struct typed_primitive_impl_ocl : public typed_primitive_impl { - const primitive_id& _node_id; + primitive_id _node_id; kernel_selector::kernel_data _kernel_data; std::vector _kernel_ids; std::vector _kernels; + kernel_arguments_data_idx _kernel_args; + + typed_primitive_impl_ocl() : _node_id(""), _kernel_data({}), _kernel_ids({}), _kernels({}) { + _kernel_data.weightsReorderParams.engine = kernel_selector::generic_kernel_params::Engine::NONE; + _kernel_data.weightsReorderParams.cpuKernel = nullptr; + _kernel_data.weightsReorderParams.clKernel = nullptr; + } typed_primitive_impl_ocl(const typed_primitive_impl_ocl& other) : typed_primitive_impl(other._weights_reorder_params, other._kernel_name) @@ -54,10 +67,26 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { bool is_cpu() const override { return false; } + void save(BinaryOutputBuffer& ob) const override { + ob << make_data(&_kernel_data.internalBufferDataType, sizeof(kernel_selector::Datatype)); + ob << _kernel_data.internalBufferSizes; + ob << _kernel_data.kernels; + ob << _kernel_ids; + ob << _kernel_args; + } + + void load(BinaryInputBuffer& ib) override { + ib >> make_data(&_kernel_data.internalBufferDataType, sizeof(kernel_selector::Datatype)); + ib >> _kernel_data.internalBufferSizes; + ib >> _kernel_data.kernels; + ib >> _kernel_ids; + ib >> _kernel_args; + } + protected: virtual bool optimized_out(typed_primitive_inst&) const { return false; } - virtual kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t /*split*/) const { + virtual kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t /*split*/) const { kernel_arguments_data args; for (size_t i = 0; i < instance.inputs_memory_count(); i++) { @@ -132,6 +161,64 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { stream& stream = instance.get_network().get_stream(); + // we iterate over split first in order to be able parallelism with OOOQ mechanism. + for (size_t k = 0; k < _kernels.size(); ++k) { + for (decltype(split) i = 0; i < split; i++) { + kernel_arguments_data args; + + // [TODO] get args from cache + if (_kernel_args.inputs.size() > 0) { + for (uint32_t i = 0; i < _kernel_args.inputs.size(); i++) { + args.inputs.push_back(instance.dep_memory_ptr(_kernel_args.inputs[i])); + } + + for (uint32_t i = 0; i < _kernel_args.intermediates.size(); i++) { + args.intermediates.push_back(instance.dep_memory_ptr(_kernel_args.intermediates[i])); + } + + args.weights = (_kernel_args.weights >= 0) ? instance.dep_memory_ptr(_kernel_args.weights) : args.weights; + args.recurrent = (_kernel_args.recurrent >= 0) ? instance.dep_memory_ptr(_kernel_args.recurrent) : args.recurrent; + args.hidden = (_kernel_args.hidden >= 0) ? instance.dep_memory_ptr(_kernel_args.hidden) : args.hidden; + args.cell = (_kernel_args.cell >= 0) ? instance.dep_memory_ptr(_kernel_args.cell) : args.cell; + args.cell = (_kernel_args.cell >= 0) ? instance.dep_memory_ptr(_kernel_args.cell) : args.cell; + args.bias = (_kernel_args.bias >= 0) ? instance.dep_memory_ptr(_kernel_args.bias) : args.bias; + args.weights_zero_points = (_kernel_args.weights_zero_points >= 0) ? + instance.dep_memory_ptr(_kernel_args.weights_zero_points) : args.weights_zero_points; + args.activations_zero_points = (_kernel_args.activations_zero_points >= 0) ? + instance.dep_memory_ptr(_kernel_args.activations_zero_points) : args.activations_zero_points; + args.compensation = (_kernel_args.compensation >= 0) ? instance.dep_memory_ptr(_kernel_args.compensation) : args.compensation; + args.lookup_table = (_kernel_args.lookup_table >= 0) ? instance.dep_memory_ptr(_kernel_args.lookup_table) : args.lookup_table; + args.scale_table = (_kernel_args.scale_table >= 0) ? instance.dep_memory_ptr(_kernel_args.scale_table) : args.scale_table; + args.slope = (_kernel_args.slope >= 0) ? instance.dep_memory_ptr(_kernel_args.slope) : args.slope; + + for (size_t i = 0; i < _kernel_args.fused_op_inputs.size(); i++) { + args.fused_op_inputs.push_back(instance.dep_memory_ptr(_kernel_args.fused_op_inputs[i])); + } + + args.outputs.push_back(instance.output_memory_ptr()); + } else { + args = get_arguments(instance, i); + + for (const auto& m : instance.get_intermediates_memories()) { + args.intermediates.push_back(m); + } + } + + args.scalars = &_kernel_data.kernels[k].params.scalars; + args.split = i; + + stream.set_arguments(*_kernels[k], _kernel_data.kernels[k].params, args); + } + } + } + + void set_arguments_impl(kernel_arguments_data_idx& args_idx) override { + this->_kernel_args = args_idx; + } + + kernel_arguments_data get_arguments_impl(const typed_primitive_inst& instance) const override { + auto split = get_split(); + // we iterate over split first in order to be able parallelism with OOOQ mechanism. for (size_t k = 0; k < _kernels.size(); ++k) { for (decltype(split) i = 0; i < split; i++) { @@ -143,10 +230,12 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { args.intermediates.push_back(m); } - - stream.set_arguments(*_kernels[k], _kernel_data.kernels[k].params, args); + return args; } } + + kernel_arguments_data args; + return args; } event::ptr execute_impl(const std::vector& events, @@ -167,17 +256,54 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { std::vector new_events; for (decltype(split) i = 0; i < split; i++) { // is any user of the prim's users is an detecion output, set prim as a output event (event won't be nullptr) - auto users = instance.node->get_users(); - bool is_output_event = is_any_user_cpu(users) || instance.node->is_output(); + // [TODO] + // auto users = instance.node->get_users(); + // bool is_output_event = is_any_user_cpu(users) || instance.node->is_output(); + bool is_output_event = instance.is_output(); + + kernel_arguments_data args; + + // [TODO] get args from cache + if (_kernel_args.inputs.size() > 0) { + for (uint32_t i = 0; i < _kernel_args.inputs.size(); i++) { + args.inputs.push_back(instance.dep_memory_ptr(_kernel_args.inputs[i])); + } + + for (uint32_t i = 0; i < _kernel_args.intermediates.size(); i++) { + args.intermediates.push_back(instance.dep_memory_ptr(_kernel_args.intermediates[i])); + } + + args.weights = (_kernel_args.weights >= 0) ? instance.dep_memory_ptr(_kernel_args.weights) : args.weights; + args.recurrent = (_kernel_args.recurrent >= 0) ? instance.dep_memory_ptr(_kernel_args.recurrent) : args.recurrent; + args.hidden = (_kernel_args.hidden >= 0) ? instance.dep_memory_ptr(_kernel_args.hidden) : args.hidden; + args.cell = (_kernel_args.cell >= 0) ? instance.dep_memory_ptr(_kernel_args.cell) : args.cell; + args.cell = (_kernel_args.cell >= 0) ? instance.dep_memory_ptr(_kernel_args.cell) : args.cell; + args.bias = (_kernel_args.bias >= 0) ? instance.dep_memory_ptr(_kernel_args.bias) : args.bias; + args.weights_zero_points = (_kernel_args.weights_zero_points >= 0) ? + instance.dep_memory_ptr(_kernel_args.weights_zero_points) : args.weights_zero_points; + args.activations_zero_points = (_kernel_args.activations_zero_points >= 0) ? + instance.dep_memory_ptr(_kernel_args.activations_zero_points) : args.activations_zero_points; + args.compensation = (_kernel_args.compensation >= 0) ? instance.dep_memory_ptr(_kernel_args.compensation) : args.compensation; + args.lookup_table = (_kernel_args.lookup_table >= 0) ? instance.dep_memory_ptr(_kernel_args.lookup_table) : args.lookup_table; + args.scale_table = (_kernel_args.scale_table >= 0) ? instance.dep_memory_ptr(_kernel_args.scale_table) : args.scale_table; + args.slope = (_kernel_args.slope >= 0) ? instance.dep_memory_ptr(_kernel_args.slope) : args.slope; + + for (size_t i = 0; i < _kernel_args.fused_op_inputs.size(); i++) { + args.fused_op_inputs.push_back(instance.dep_memory_ptr(_kernel_args.fused_op_inputs[i])); + } + + args.outputs.push_back(instance.output_memory_ptr()); + } else { + args = get_arguments(instance, i); + + for (const auto& m : instance.get_intermediates_memories()) { + args.intermediates.push_back(m); + } + } - auto args = get_arguments(instance, i); args.scalars = &_kernel_data.kernels[k].params.scalars; args.split = i; - for (const auto& m : instance.get_intermediates_memories()) { - args.intermediates.push_back(m); - } - auto ev = stream.enqueue_kernel(*_kernels[k], _kernel_data.kernels[k].params, args, tmp_events, is_output_event); new_events.push_back(ev); all_events.push_back(ev); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/prior_box.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/prior_box.cpp index 3b72edd393ceb4..0270c22ab34fcb 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/prior_box.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/prior_box.cpp @@ -19,6 +19,8 @@ struct prior_box_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -100,3 +102,5 @@ attach_prior_box_impl::attach_prior_box_impl() { } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::prior_box_impl, cldnn::object_type::PRIOR_BOX_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/pyramid_roi_align.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/pyramid_roi_align.cpp index bae6f50467e839..114a1fb922e6df 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/pyramid_roi_align.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/pyramid_roi_align.cpp @@ -19,6 +19,8 @@ struct pyramid_roi_align_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -77,3 +79,5 @@ attach_pyramid_roi_align_impl::attach_pyramid_roi_align_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::pyramid_roi_align_impl, cldnn::object_type::PYRAMID_ROI_ALIGN_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp index f6401a7cbd9923..af06585954b5ae 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp @@ -19,12 +19,14 @@ struct quantize_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t) const override { kernel_arguments_data args; for (size_t i = 0; i < instance.inputs_memory_count(); i++) { @@ -205,3 +207,5 @@ attach_quantize_impl::attach_quantize_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::quantize_impl, cldnn::object_type::QUANTIZE_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/random_uniform.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/random_uniform.cpp index f187509b2d34f9..e12fd7b63fac61 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/random_uniform.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/random_uniform.cpp @@ -17,6 +17,8 @@ struct random_uniform_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -63,3 +65,5 @@ attach_random_uniform_impl::attach_random_uniform_impl() { } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::random_uniform_impl, cldnn::object_type::RANDOM_UNIFORM_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp index 16387895d8bc1c..87e071b046a596 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp @@ -16,6 +16,8 @@ namespace ocl { struct range_impl : typed_primitive_impl_ocl { using typed_primitive_impl_ocl::typed_primitive_impl_ocl; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -58,3 +60,5 @@ attach_range_impl::attach_range_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::range_impl, cldnn::object_type::RANGE_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp index 7b990f7e9c4d42..577b2792e7512c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp @@ -70,6 +70,8 @@ struct reduce_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -141,3 +143,5 @@ attach_reduce_impl::attach_reduce_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::reduce_impl, cldnn::object_type::REDUCE_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/region_yolo.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/region_yolo.cpp index 7e139a018d3557..3dd2bd648feada 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/region_yolo.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/region_yolo.cpp @@ -17,6 +17,8 @@ struct region_yolo_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -65,3 +67,5 @@ attach_region_yolo_impl::attach_region_yolo_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::region_yolo_impl, cldnn::object_type::REGION_YOLO_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp index 526ea72133c785..0e6c6f8f24ba10 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp @@ -17,10 +17,14 @@ struct reorder_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } + reorder_impl() : parent() {} + explicit reorder_impl(const reorder_impl& other) : parent(other), _can_be_optimized(other._can_be_optimized), _has_mean(other._has_mean) {} @@ -36,12 +40,24 @@ struct reorder_impl : typed_primitive_impl_ocl { _has_mean = node.has_mean(); } + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); + ob << _can_be_optimized; + ob << _has_mean; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + ib >> _can_be_optimized; + ib >> _has_mean; + } + protected: bool optimized_out(reorder_inst& instance) const override { return parent::optimized_out(instance) || _can_be_optimized; } - kernel_arguments_data get_arguments(reorder_inst& instance, int32_t split) const override { + kernel_arguments_data get_arguments(const reorder_inst& instance, int32_t split) const override { kernel_arguments_data args = parent::get_arguments(instance, split); auto input = &instance.input_memory(); auto input_layout = input->get_layout(); @@ -142,3 +158,5 @@ attach_reorder_impl::attach_reorder_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::reorder_impl, cldnn::object_type::REORDER_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reorg_yolo.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reorg_yolo.cpp index fbccf7439b95e2..f7aecae890687a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reorg_yolo.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reorg_yolo.cpp @@ -17,6 +17,8 @@ struct reorg_yolo_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -64,3 +66,5 @@ attach_reorg_yolo_impl::attach_reorg_yolo_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::reorg_yolo_impl, cldnn::object_type::REORG_YOLO_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp index debca5c451c03a..e0df7fbd66f52b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp @@ -129,6 +129,8 @@ struct resample_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -214,3 +216,5 @@ attach_resample_impl::attach_resample_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::resample_impl, cldnn::object_type::RESAMPLE_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reshape.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reshape.cpp index 40be6e75f9961b..f3e7fa73486fc8 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reshape.cpp @@ -17,6 +17,8 @@ struct reshape_impl : public typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -53,3 +55,5 @@ attach_reshape_impl::attach_reshape_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::reshape_impl, cldnn::object_type::RESHAPE_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reverse.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reverse.cpp index 856dac7fbf35ef..91a901afb26c56 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reverse.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reverse.cpp @@ -19,6 +19,8 @@ struct reverse_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -82,3 +84,5 @@ attach_reverse_impl::attach_reverse_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::reverse_impl, cldnn::object_type::REVERSE_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reverse_sequence.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reverse_sequence.cpp index 766104a681f7d3..7c18f080d4ae87 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reverse_sequence.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reverse_sequence.cpp @@ -18,6 +18,8 @@ struct reverse_sequence_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -63,3 +65,5 @@ attach_reverse_sequence_impl::attach_reverse_sequence_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::reverse_sequence_impl, cldnn::object_type::REVERSE_SEQUENCE_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/roi_align.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/roi_align.cpp index f8d62db162f098..a87ee0e696905c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/roi_align.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/roi_align.cpp @@ -41,12 +41,14 @@ struct roi_align_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t) const override { kernel_arguments_data args; args.inputs = {instance.input_memory_ptr(), instance.rois_memory(), instance.batches_memory()}; args.outputs = {instance.output_memory_ptr()}; @@ -118,3 +120,5 @@ attach_roi_align_impl::attach_roi_align_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::roi_align_impl, cldnn::object_type::ROI_ALIGN_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/roi_pooling.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/roi_pooling.cpp index 35601e816f08c7..133860faf15495 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/roi_pooling.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/roi_pooling.cpp @@ -37,12 +37,14 @@ struct roi_pooling_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t) const override { kernel_arguments_data args; if (instance.argument->mode == pooling_mode::deformable_bilinear && !instance.argument->no_trans) @@ -129,3 +131,5 @@ attach_roi_pooling_impl::attach_roi_pooling_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::roi_pooling_impl, cldnn::object_type::ROI_POOLING_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/roll.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/roll.cpp index c5e2232ae39c9a..4c8d524abd6057 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/roll.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/roll.cpp @@ -14,6 +14,8 @@ struct roll_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -73,3 +75,5 @@ attach_roll_impl::attach_roll_impl() { } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::roll_impl, cldnn::object_type::ROLL_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_elements_update.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_elements_update.cpp index b644fb95bdef33..58e6c733b91534 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_elements_update.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_elements_update.cpp @@ -46,6 +46,8 @@ struct scatter_elements_update_impl : typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -104,3 +106,6 @@ attach_scatter_elements_update_impl::attach_scatter_elements_update_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::scatter_elements_update_impl, + cldnn::object_type::SCATTER_ELEMENTS_UPDATE_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_nd_update.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_nd_update.cpp index 645b6ac23dd896..6f8500a5ddd13a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_nd_update.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_nd_update.cpp @@ -19,6 +19,8 @@ struct scatter_nd_update_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -147,3 +149,5 @@ attach_scatter_nd_update_impl::attach_scatter_nd_update_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::scatter_nd_update_impl, cldnn::object_type::SCATTER_ND_UPDATE_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_update.cpp index 52f80c45f45eee..cc2b44e3abbdbf 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_update.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_update.cpp @@ -45,6 +45,8 @@ struct scatter_update_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -101,3 +103,5 @@ attach_scatter_update_impl::attach_scatter_update_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::scatter_update_impl, cldnn::object_type::SCATTER_UPDATE_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp index e779b5b8781688..af5c4762be4e61 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp @@ -17,6 +17,8 @@ struct select_impl : typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -94,3 +96,5 @@ attach_select_impl::attach_select_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::select_impl, cldnn::object_type::SELECT_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/shape_of.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/shape_of.cpp index ca043bf6c0c95c..bb6759a3d3c948 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/shape_of.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/shape_of.cpp @@ -17,6 +17,8 @@ struct shape_of_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -52,3 +54,5 @@ attach_shape_of_impl::attach_shape_of_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::shape_of_impl, cldnn::object_type::SHAPE_OF_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/shuffle_channels.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/shuffle_channels.cpp index 88de36549d9f26..1839e3eace1b52 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/shuffle_channels.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/shuffle_channels.cpp @@ -19,6 +19,8 @@ struct shuffle_channels_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -81,3 +83,5 @@ attach_shuffle_channels_impl::attach_shuffle_channels_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::shuffle_channels_impl, cldnn::object_type::SHUFFLE_CHANNELS_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp index e736934db38a62..9ba6994b61cfa1 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp @@ -68,6 +68,8 @@ struct slice_impl : typed_primitive_impl_ocl { kInputsNum }; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -135,3 +137,5 @@ attach_slice_impl::attach_slice_impl() { } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::slice_impl, cldnn::object_type::SLICE_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/softmax.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/softmax.cpp index 836d151884d927..67b3bd93bf7f80 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/softmax.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/softmax.cpp @@ -39,6 +39,8 @@ struct softmax_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -83,3 +85,5 @@ attach_softmax_impl::attach_softmax_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::softmax_impl, cldnn::object_type::SOFTMAX_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/space_to_batch.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/space_to_batch.cpp index 209ee8a72beffb..38d58bf98d506d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/space_to_batch.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/space_to_batch.cpp @@ -20,6 +20,8 @@ struct space_to_batch_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -75,3 +77,5 @@ attach_space_to_batch_impl::attach_space_to_batch_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::space_to_batch_impl, cldnn::object_type::SPACE_TO_BATCH_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/space_to_depth.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/space_to_depth.cpp index 822851a3b46528..89b757f69f8b6c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/space_to_depth.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/space_to_depth.cpp @@ -18,6 +18,8 @@ struct space_to_depth_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -75,3 +77,5 @@ attach_space_to_depth_impl::attach_space_to_depth_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::space_to_depth_impl, cldnn::object_type::SPACE_TO_DEPTH_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp index 6421a5595e4aa8..a828ec6501461f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp @@ -49,6 +49,8 @@ struct strided_slice_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -167,3 +169,5 @@ attach_strided_slice_impl::attach_strided_slice_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::strided_slice_impl, cldnn::object_type::STRIDED_SLICE_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/tile.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/tile.cpp index 4fe072a2318a0a..0194306c6b0df8 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/tile.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/tile.cpp @@ -19,6 +19,8 @@ struct tile_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } @@ -81,3 +83,5 @@ attach_tile_impl::attach_tile_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::tile_impl, cldnn::object_type::TILE_IMPL) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp index d6250f1f700118..999b7eb58a16ad 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp @@ -21,7 +21,11 @@ struct concatenation_onednn : typed_primitive_onednn_impl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + protected: + const concatenation_node* _outer; + std::unique_ptr clone() const override { return make_unique(*this); } @@ -62,6 +66,68 @@ struct concatenation_onednn : typed_primitive_onednn_implget_dependencies().size(); + for (auto& input : _outer->get_dependencies()) { + ob << input->get_output_layout(); + } + ob << _outer->get_primitive()->axis; + ob << _outer->get_output_layout(); + + std::vector prim_cache; + prim_cache = _prim.get_cache_blob(); + ob << prim_cache; + } + + void load(BinaryInputBuffer& ib) override { + bool has_prim; + ib >> has_prim; + + if (!has_prim) + return; + + parent::load(ib); + + size_t num_deps; + ib >> num_deps; + + std::vector input_mds; + for (size_t idx = 0; idx < num_deps; ++idx) { + layout input_layout = layout(cldnn::data_types::bin, cldnn::format::any, cldnn::tensor()); + ib >> input_layout; + input_mds.push_back(onednn::layout_to_memory_desc(input_layout)); + } + + int64_t prim_axis; + ib >> prim_axis; + + layout output_layout = layout(cldnn::data_types::bin, cldnn::format::any, cldnn::tensor()); + ib >> output_layout; + auto output_md = onednn::layout_to_memory_desc(output_layout); + + auto desc = std::make_shared( + output_md, + prim_axis, + input_mds, + ib.get_engine().get_onednn_engine()); + + _pd = *desc; + + std::vector prim_cache; + ib >> prim_cache; + + _prim = dnnl::concat(_pd, prim_cache); + } + static primitive_impl* create(const concatenation_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); if (arg.can_be_optimized()) @@ -71,7 +137,9 @@ struct concatenation_onednn : typed_primitive_onednn_impl dummy = nullptr; - return new concatenation_onednn(engine, dummy, attr, *desc); + auto new_impl = new concatenation_onednn(engine, dummy, attr, *desc); + new_impl->_outer = &arg; + return new_impl; } }; @@ -108,3 +176,5 @@ attach_concatenation_onednn::attach_concatenation_onednn() { } // namespace detail } // namespace onednn } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::onednn::concatenation_onednn, cldnn::object_type::CONCATENATION_ONEDNN) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp index c204f03d1879c7..f10dde97ceaa96 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp @@ -26,6 +26,8 @@ struct convolution_onednn : typed_primitive_onednn_impl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + protected: std::unique_ptr clone() const override { return make_unique(*this); @@ -50,7 +52,6 @@ struct convolution_onednn : typed_primitive_onednn_impl get_arguments(convolution_inst& instance) const override { std::unordered_map args = parent::get_arguments(instance); - auto attrs = instance.get_node().get_onednn_primitive_attributes(); { auto weights = instance.weights_memory(0); @@ -62,7 +63,7 @@ struct convolution_onednn : typed_primitive_onednn_implget_onednn_memory(_pd.weights_desc(1))}); } - if (has_zero_points(DNNL_ARG_SRC, attrs)) { + if (has_zero_points(DNNL_ARG_SRC, _attrs)) { auto a_zp = instance.activations_zero_points_memory(0); dnnl::memory::desc desc = onednn::layout_to_memory_desc(a_zp->get_layout(), dnnl::memory::format_tag::a, true); args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC, a_zp->get_onednn_memory(desc)}); @@ -72,7 +73,7 @@ struct convolution_onednn : typed_primitive_onednn_implget_onednn_memory(desc); void *mapped_ptr = dnnl_mem.map_data(); if (mapped_ptr) { - GPU_DEBUG_COUT << instance.get_node().id() << " activations_zero_points: "; + GPU_DEBUG_COUT << instance.id() << " activations_zero_points: "; for (size_t i = 0; i < desc.get_size(); ++i) { std::cout << static_cast(mapped_ptr)[i] << " "; } @@ -82,7 +83,7 @@ struct convolution_onednn : typed_primitive_onednn_implget_layout(), dnnl::memory::format_tag::a, true); args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS, w_zp->get_onednn_memory(desc)}); @@ -92,7 +93,7 @@ struct convolution_onednn : typed_primitive_onednn_implget_onednn_memory(desc); void *mapped_ptr = dnnl_mem.map_data(); if (mapped_ptr) { - GPU_DEBUG_COUT << instance.get_node().id() << " weights_zero_points: "; + GPU_DEBUG_COUT << instance.id() << " weights_zero_points: "; for (size_t i = 0; i < desc.get_size(); ++i) { std::cout << static_cast(mapped_ptr)[i] << " "; } @@ -184,6 +185,29 @@ struct convolution_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_convolution_desc_t)); + + std::vector prim_cache; + prim_cache = _prim.get_cache_blob(); + ob << prim_cache; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + + _desc = std::make_shared(); + ib >> make_data(&_desc->data, sizeof(dnnl_convolution_desc_t)); + + std::vector prim_cache; + ib >> prim_cache; + + _pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr); + _prim = dnnl::primitive(_pd, prim_cache); + } + static primitive_impl* create(const convolution_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); auto desc = get_convolution_descriptor(impl_params); @@ -241,3 +265,5 @@ attach_convolution_onednn::attach_convolution_onednn() { } // namespace detail } // namespace onednn } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::onednn::convolution_onednn, cldnn::object_type::CONVOLUTION_ONEDNN) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp index c99569071095cd..8be3740fbdff54 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp @@ -22,6 +22,8 @@ struct deconvolution_onednn : typed_primitive_onednn_impl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + protected: std::unique_ptr clone() const override { return make_unique(*this); @@ -102,6 +104,29 @@ struct deconvolution_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_deconvolution_desc_t)); + + std::vector prim_cache; + prim_cache = _prim.get_cache_blob(); + ob << prim_cache; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + + _desc = std::make_shared(); + ib >> make_data(&_desc->data, sizeof(dnnl_deconvolution_desc_t)); + + std::vector prim_cache; + ib >> prim_cache; + + _pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr); + _prim = dnnl::primitive(_pd, prim_cache); + } + static primitive_impl* create(const deconvolution_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); auto desc = get_deconvolution_descriptor(impl_params); @@ -142,3 +167,5 @@ attach_deconvolution_onednn::attach_deconvolution_onednn() { } // namespace detail } // namespace onednn } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::onednn::deconvolution_onednn, cldnn::object_type::DECONVOLUTION_ONEDNN) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp index 5bd611c3f192fa..af875b5f6b38b9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp @@ -19,6 +19,8 @@ struct fully_connected_onednn : typed_primitive_onednn_impl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + private: static std::vector reshape_to_2d(const ov::PartialShape& shape, int64_t feature) { auto staticShape = shape.to_shape(); @@ -164,6 +166,29 @@ struct fully_connected_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_inner_product_desc_t)); + + std::vector prim_cache; + prim_cache = _prim.get_cache_blob(); + ob << prim_cache; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + + _desc = std::make_shared(); + ib >> make_data(&_desc->data, sizeof(dnnl_inner_product_desc_t)); + + std::vector prim_cache; + ib >> prim_cache; + + _pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr); + _prim = dnnl::primitive(_pd, prim_cache); + } + static primitive_impl* create(const fully_connected_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); auto desc = get_fully_connected_descriptor(impl_params); @@ -193,3 +218,5 @@ attach_fully_connected_onednn::attach_fully_connected_onednn() { } // namespace detail } // namespace onednn } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::onednn::fully_connected_onednn, cldnn::object_type::FULLY_CONNECTED_ONEDNN) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp index 2e0b00dc09ca7d..126c0c59b9c893 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp @@ -19,6 +19,8 @@ struct gemm_onednn : typed_primitive_onednn_impl { using parent = typed_primitive_onednn_impl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + protected: std::unique_ptr clone() const override { return make_unique(*this); @@ -207,6 +209,29 @@ struct gemm_onednn : typed_primitive_onednn_impl { } public: + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); + + ob << make_data(&_desc->data, sizeof(dnnl_matmul_desc_t)); + + std::vector prim_cache; + prim_cache = _prim.get_cache_blob(); + ob << prim_cache; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + + _desc = std::make_shared(); + ib >> make_data(&_desc->data, sizeof(dnnl_matmul_desc_t)); + + std::vector prim_cache; + ib >> prim_cache; + + _pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr); + _prim = dnnl::primitive(_pd, prim_cache); + } + static primitive_impl* create(const gemm_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); auto desc = get_gemm_descriptor(impl_params); @@ -237,3 +262,5 @@ attach_gemm_onednn::attach_gemm_onednn() { } // namespace detail } // namespace onednn } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::onednn::gemm_onednn, cldnn::object_type::GEMM_ONEDNN) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp index 9bd38957e44f1f..6707da450898d0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp @@ -19,6 +19,8 @@ struct pooling_onednn : typed_primitive_onednn_impl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + protected: std::unique_ptr clone() const override { return make_unique(*this); @@ -62,6 +64,29 @@ struct pooling_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_pooling_desc_t)); + + std::vector prim_cache; + prim_cache = _prim.get_cache_blob(); + ob << prim_cache; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + + _desc = std::make_shared(); + ib >> make_data(&_desc->data, sizeof(dnnl_pooling_desc_t)); + + std::vector prim_cache; + ib >> prim_cache; + + _pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr); + _prim = dnnl::primitive(_pd, prim_cache); + } + static primitive_impl* create(const pooling_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); auto desc = get_pooling_descriptor(impl_params); @@ -103,3 +128,5 @@ attach_pooling_onednn::attach_pooling_onednn() { } // namespace detail } // namespace onednn } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::onednn::pooling_onednn, cldnn::object_type::POOLING_ONEDNN) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 2fe3de8598ba67..042dd9c067c71d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -17,6 +17,7 @@ #include "reorder/reorder_weights_kernel_selector.h" #include "reorder/reorder_kernel_base.h" +#include "serialization/binary_buffer.hpp" #include #include diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp index db0542137e7956..1f25295e4b3fac 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp @@ -41,6 +41,8 @@ struct reduction_onednn : typed_primitive_onednn_impl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + protected: std::unique_ptr clone() const override { return make_unique(*this); @@ -91,6 +93,29 @@ struct reduction_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_reduction_desc_t)); + + std::vector prim_cache; + prim_cache = _prim.get_cache_blob(); + ob << prim_cache; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + + _desc = std::make_shared(); + ib >> make_data(&_desc->data, sizeof(dnnl_reduction_desc_t)); + + std::vector prim_cache; + ib >> prim_cache; + + _pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr); + _prim = dnnl::primitive(_pd, prim_cache); + } + static primitive_impl* create(const reduce_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); auto desc = get_reduction_descriptor(impl_params); @@ -132,3 +157,5 @@ attach_reduction_onednn::attach_reduction_onednn() { } // namespace detail } // namespace onednn } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::onednn::reduction_onednn, cldnn::object_type::REDUCTION_ONEDNN) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp index 89ebf88d62d319..aeeccb0f043787 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp @@ -19,7 +19,11 @@ struct reorder_onednn : typed_primitive_onednn_impl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + protected: + const reorder_node* _outer; + std::unique_ptr clone() const override { return make_unique(*this); } @@ -60,6 +64,44 @@ struct reorder_onednn : typed_primitive_onednn_implget_dependency(0).get_output_layout(); + ob << _outer->get_output_layout(); + + std::vector prim_cache; + prim_cache = _prim.get_cache_blob(); + ob << prim_cache; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + + layout input_layout = layout(cldnn::data_types::bin, cldnn::format::any, cldnn::tensor()); + ib >> input_layout; + + layout output_layout = layout(cldnn::data_types::bin, cldnn::format::any, cldnn::tensor()); + ib >> output_layout; + + auto input_md = onednn::layout_to_memory_desc(input_layout); + auto output_md = onednn::layout_to_memory_desc(output_layout); + + auto desc = std::make_shared( + ib.get_engine().get_onednn_engine(), + input_md, + ib.get_engine().get_onednn_engine(), + output_md, + *(_attrs)); + + _pd = *desc; + + std::vector prim_cache; + ib >> prim_cache; + + _prim = dnnl::reorder(_pd, prim_cache); + } + static primitive_impl* create(const reorder_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); auto attr = arg.get_onednn_primitive_attributes(); @@ -67,7 +109,9 @@ struct reorder_onednn : typed_primitive_onednn_impl dummy = nullptr; - return new reorder_onednn(engine, dummy, attr, *desc); + auto new_impl = new reorder_onednn(engine, dummy, attr, *desc); + new_impl->_outer = &arg; + return new_impl; } }; @@ -80,3 +124,5 @@ attach_reorder_onednn::attach_reorder_onednn() { } // namespace detail } // namespace onednn } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::onednn::reorder_onednn, cldnn::object_type::REORDER_ONEDNN) diff --git a/src/plugins/intel_gpu/src/graph/include/activation_inst.h b/src/plugins/intel_gpu/src/graph/include/activation_inst.h index f3f6533fdcfd83..9b5b5128a5df5b 100644 --- a/src/plugins/intel_gpu/src/graph/include/activation_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/activation_inst.h @@ -42,6 +42,7 @@ using activation_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(activation_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/adaptive_pooling_inst.h b/src/plugins/intel_gpu/src/graph/include/adaptive_pooling_inst.h index 3cb115480967c4..4a4d81324c3a79 100644 --- a/src/plugins/intel_gpu/src/graph/include/adaptive_pooling_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/adaptive_pooling_inst.h @@ -33,6 +33,7 @@ using adaptive_pooling_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(const adaptive_pooling_node& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/arg_max_min_inst.h b/src/plugins/intel_gpu/src/graph/include/arg_max_min_inst.h index d8d9c7ba293e52..16ba786c9d543c 100644 --- a/src/plugins/intel_gpu/src/graph/include/arg_max_min_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/arg_max_min_inst.h @@ -32,6 +32,7 @@ using arg_max_min_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(arg_max_min_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/assign_inst.h b/src/plugins/intel_gpu/src/graph/include/assign_inst.h index cf81036da0a1b7..34349ee1d4b813 100644 --- a/src/plugins/intel_gpu/src/graph/include/assign_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/assign_inst.h @@ -36,6 +36,7 @@ class typed_primitive_inst : public typed_primitive_inst_base, p public: typed_primitive_inst(network& network, const assign_node& desc); + typed_primitive_inst(network& network) : parent(network), memory_state::variable("") {} }; using assign_inst = typed_primitive_inst; diff --git a/src/plugins/intel_gpu/src/graph/include/average_unpooling_inst.h b/src/plugins/intel_gpu/src/graph/include/average_unpooling_inst.h index 1d39955c38a055..4f73223fe64286 100644 --- a/src/plugins/intel_gpu/src/graph/include/average_unpooling_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/average_unpooling_inst.h @@ -16,6 +16,7 @@ using average_unpooling_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: typed_primitive_inst(network& network, average_unpooling_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/batch_to_space_inst.h b/src/plugins/intel_gpu/src/graph/include/batch_to_space_inst.h index 83c4b4581e6ecb..82b99eb854d662 100644 --- a/src/plugins/intel_gpu/src/graph/include/batch_to_space_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/batch_to_space_inst.h @@ -16,6 +16,7 @@ using batch_to_space_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(batch_to_space_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/binary_convolution_inst.h b/src/plugins/intel_gpu/src/graph/include/binary_convolution_inst.h index 05d067b40f917c..a6125510b5d0ea 100644 --- a/src/plugins/intel_gpu/src/graph/include/binary_convolution_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/binary_convolution_inst.h @@ -57,6 +57,7 @@ using binary_convolution_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(binary_convolution_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/border_inst.h b/src/plugins/intel_gpu/src/graph/include/border_inst.h index 94b85438c53af2..0041af30485c4b 100644 --- a/src/plugins/intel_gpu/src/graph/include/border_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/border_inst.h @@ -31,6 +31,7 @@ using border_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/broadcast_inst.h b/src/plugins/intel_gpu/src/graph/include/broadcast_inst.h index be55667f613d5b..b24149c6ae23d6 100644 --- a/src/plugins/intel_gpu/src/graph/include/broadcast_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/broadcast_inst.h @@ -32,6 +32,7 @@ using broadcast_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/concatenation_inst.h b/src/plugins/intel_gpu/src/graph/include/concatenation_inst.h index 6f11acb4bc0bc9..34d6570a1409a9 100644 --- a/src/plugins/intel_gpu/src/graph/include/concatenation_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/concatenation_inst.h @@ -33,6 +33,7 @@ using concatenation_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/condition_inst.h b/src/plugins/intel_gpu/src/graph/include/condition_inst.h index 7f42334a68ca11..91f6adc19b8ecb 100644 --- a/src/plugins/intel_gpu/src/graph/include/condition_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/condition_inst.h @@ -83,6 +83,7 @@ using condition_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(condition_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/convert_color_inst.h b/src/plugins/intel_gpu/src/graph/include/convert_color_inst.h index fb34d321b1d999..6866192a34bdc3 100644 --- a/src/plugins/intel_gpu/src/graph/include/convert_color_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/convert_color_inst.h @@ -23,6 +23,7 @@ using convert_color_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(convert_color_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/convolution_inst.h b/src/plugins/intel_gpu/src/graph/include/convolution_inst.h index fe0bd1f84dcd80..677ea1e488a640 100644 --- a/src/plugins/intel_gpu/src/graph/include/convolution_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/convolution_inst.h @@ -149,6 +149,7 @@ using convolution_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template @@ -215,6 +216,9 @@ class typed_primitive_inst : public typed_primitive_inst_basecompensation_layout.has_value(); } bool activations_zero_points_term() const { return _impl_params->activations_zero_points_layout.has_value(); } + void save(cldnn::BinaryOutputBuffer& ob) const override; + void load(cldnn::BinaryInputBuffer& ib) override; + private: uint32_t _groups; int32_t _split; diff --git a/src/plugins/intel_gpu/src/graph/include/crop_inst.h b/src/plugins/intel_gpu/src/graph/include/crop_inst.h index 01875bfb2c1a60..0cbb2bb3f50932 100644 --- a/src/plugins/intel_gpu/src/graph/include/crop_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/crop_inst.h @@ -47,6 +47,7 @@ using crop_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/ctc_greedy_decoder_inst.h b/src/plugins/intel_gpu/src/graph/include/ctc_greedy_decoder_inst.h index 314183592cf1f0..dddd0f2f80960f 100644 --- a/src/plugins/intel_gpu/src/graph/include/ctc_greedy_decoder_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/ctc_greedy_decoder_inst.h @@ -30,6 +30,7 @@ using ctc_greedy_decoder_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(ctc_greedy_decoder_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h b/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h index e90388ba457af6..0feb8f1ceeeda1 100644 --- a/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h @@ -24,6 +24,7 @@ using cum_sum_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(cum_sum_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/custom_gpu_primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/custom_gpu_primitive_inst.h index e692f42746376d..de77f8a2ae8747 100644 --- a/src/plugins/intel_gpu/src/graph/include/custom_gpu_primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/custom_gpu_primitive_inst.h @@ -16,6 +16,7 @@ using custom_gpu_primitive_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(custom_gpu_primitive_node const& node, kernel_impl_params const& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/include/data_inst.h b/src/plugins/intel_gpu/src/graph/include/data_inst.h index ca9610ed2b254b..bc872844e160e7 100644 --- a/src/plugins/intel_gpu/src/graph/include/data_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/data_inst.h @@ -31,6 +31,7 @@ using data_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(data_node const& node, kernel_impl_params const& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h b/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h index 8e01b3c1f4d35f..1eee63df6ab551 100644 --- a/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h @@ -94,6 +94,7 @@ using deconvolution_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(deconvolution_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/deformable_convolution_inst.h b/src/plugins/intel_gpu/src/graph/include/deformable_convolution_inst.h index fc6c26dee6106c..fb59bd1d0d6d58 100644 --- a/src/plugins/intel_gpu/src/graph/include/deformable_convolution_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/deformable_convolution_inst.h @@ -76,6 +76,7 @@ using deformable_conv_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(deformable_conv_node const& node, kernel_impl_params const& impl_param); @@ -112,6 +113,7 @@ using deformable_conv_inst = typed_primitive_inst; template <> struct typed_program_node : public typed_program_node_base { using parent = typed_program_node_base; + using parent::parent; public: typed_program_node(std::shared_ptr prim, program& prog) @@ -156,6 +158,7 @@ using deformable_interp_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(deformable_interp_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/depth_to_space_inst.h b/src/plugins/intel_gpu/src/graph/include/depth_to_space_inst.h index 9fc1e71414ad7a..edaa8571ecbf17 100644 --- a/src/plugins/intel_gpu/src/graph/include/depth_to_space_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/depth_to_space_inst.h @@ -30,6 +30,7 @@ using depth_to_space_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(depth_to_space_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/detection_output_inst.h b/src/plugins/intel_gpu/src/graph/include/detection_output_inst.h index e84e547f351bac..938751e7884d31 100644 --- a/src/plugins/intel_gpu/src/graph/include/detection_output_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/detection_output_inst.h @@ -33,6 +33,7 @@ using detection_output_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(detection_output_node const& node, kernel_impl_params const& impl_param); @@ -44,6 +45,9 @@ class typed_primitive_inst : public typed_primitive_inst_base< memory::ptr location_memory() const { return dep_memory_ptr(0); } memory::ptr confidence_memory() const { return dep_memory_ptr(1); } memory::ptr prior_box_memory() const { return dep_memory_ptr(2); } + + void save(cldnn::BinaryOutputBuffer& ob) const override; + void load(cldnn::BinaryInputBuffer& ib) override; }; using detection_output_inst = typed_primitive_inst; diff --git a/src/plugins/intel_gpu/src/graph/include/eltwise_inst.h b/src/plugins/intel_gpu/src/graph/include/eltwise_inst.h index 245e52c356e1dd..d52147dc99c2ed 100644 --- a/src/plugins/intel_gpu/src/graph/include/eltwise_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/eltwise_inst.h @@ -83,6 +83,7 @@ using eltwise_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; static void check_inputs_count(eltwise_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/embedding_bag_inst.h b/src/plugins/intel_gpu/src/graph/include/embedding_bag_inst.h index 649eeb95ab5c4b..07b9984bf8a909 100644 --- a/src/plugins/intel_gpu/src/graph/include/embedding_bag_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/embedding_bag_inst.h @@ -26,6 +26,7 @@ using embedding_bag_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(embedding_bag_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_detection_output_inst.hpp b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_detection_output_inst.hpp index 93ed26582a7dd5..50266346ac7092 100644 --- a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_detection_output_inst.hpp +++ b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_detection_output_inst.hpp @@ -45,6 +45,7 @@ template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(const experimental_detectron_detection_output_node& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_generate_proposals_single_image_inst.hpp b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_generate_proposals_single_image_inst.hpp index b5a533b409a1be..5c4bca022ae392 100644 --- a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_generate_proposals_single_image_inst.hpp +++ b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_generate_proposals_single_image_inst.hpp @@ -32,6 +32,7 @@ template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(const experimental_detectron_generate_proposals_single_image_node& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_topk_rois_inst.h b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_topk_rois_inst.h index b454afff274cc1..f199003d41e11d 100644 --- a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_topk_rois_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_topk_rois_inst.h @@ -15,6 +15,7 @@ using experimental_detectron_topk_rois_node = typed_program_node class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(experimental_detectron_topk_rois_node const &node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/extract_image_patches_inst.h b/src/plugins/intel_gpu/src/graph/include/extract_image_patches_inst.h index 5d1f6b3ca81cfa..e98a569e938fa6 100644 --- a/src/plugins/intel_gpu/src/graph/include/extract_image_patches_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/extract_image_patches_inst.h @@ -14,6 +14,7 @@ using extract_image_patches_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(extract_image_patches_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/eye_inst.h b/src/plugins/intel_gpu/src/graph/include/eye_inst.h index 39c56bf487f3b0..2b3009551a8d55 100644 --- a/src/plugins/intel_gpu/src/graph/include/eye_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/eye_inst.h @@ -15,6 +15,7 @@ using eye_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(eye_node const& node, const kernel_impl_params& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/fully_connected_inst.h b/src/plugins/intel_gpu/src/graph/include/fully_connected_inst.h index 1f5f8a367afaee..b9e010fc39c689 100644 --- a/src/plugins/intel_gpu/src/graph/include/fully_connected_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/fully_connected_inst.h @@ -42,6 +42,7 @@ using fully_connected_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/gather_elements_inst.h b/src/plugins/intel_gpu/src/graph/include/gather_elements_inst.h index 58a0f0d2c1c625..cb944702af0673 100644 --- a/src/plugins/intel_gpu/src/graph/include/gather_elements_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/gather_elements_inst.h @@ -37,6 +37,7 @@ using gather_elements_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/gather_inst.h b/src/plugins/intel_gpu/src/graph/include/gather_inst.h index 44eae39f1f87cc..e6295f608ee1ef 100644 --- a/src/plugins/intel_gpu/src/graph/include/gather_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/gather_inst.h @@ -26,6 +26,7 @@ using gather_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/gather_nd_inst.h b/src/plugins/intel_gpu/src/graph/include/gather_nd_inst.h index c0dc83b9683beb..c4edc947efe3b7 100644 --- a/src/plugins/intel_gpu/src/graph/include/gather_nd_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/gather_nd_inst.h @@ -14,6 +14,7 @@ using gather_nd_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(gather_nd_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/gather_tree_inst.h b/src/plugins/intel_gpu/src/graph/include/gather_tree_inst.h index d32a08f2d385bd..c09c758c7e894b 100644 --- a/src/plugins/intel_gpu/src/graph/include/gather_tree_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/gather_tree_inst.h @@ -16,6 +16,7 @@ using gather_tree_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(gather_tree_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/gemm_inst.h b/src/plugins/intel_gpu/src/graph/include/gemm_inst.h index ce831c977bfdf3..585fbe094f7fa8 100644 --- a/src/plugins/intel_gpu/src/graph/include/gemm_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/gemm_inst.h @@ -27,6 +27,7 @@ using gemm_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/generate_proposals_inst.h b/src/plugins/intel_gpu/src/graph/include/generate_proposals_inst.h index 46789b1fad8387..d647f50780045f 100644 --- a/src/plugins/intel_gpu/src/graph/include/generate_proposals_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/generate_proposals_inst.h @@ -33,6 +33,7 @@ template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(const generate_proposals_node& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/generic_layer_inst.h b/src/plugins/intel_gpu/src/graph/include/generic_layer_inst.h index 7ace6b15d6c475..520c7a10c55242 100644 --- a/src/plugins/intel_gpu/src/graph/include/generic_layer_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/generic_layer_inst.h @@ -28,6 +28,7 @@ using generic_layer_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(generic_layer_node const& node, kernel_impl_params const& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/include/grn_inst.h b/src/plugins/intel_gpu/src/graph/include/grn_inst.h index 5e04b9afdddf62..30ecac24f00bcd 100644 --- a/src/plugins/intel_gpu/src/graph/include/grn_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/grn_inst.h @@ -16,6 +16,7 @@ using grn_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(grn_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/input_layout_inst.h b/src/plugins/intel_gpu/src/graph/include/input_layout_inst.h index 9471faf298521f..49e3747ce6ac97 100644 --- a/src/plugins/intel_gpu/src/graph/include/input_layout_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/input_layout_inst.h @@ -26,6 +26,7 @@ using input_layout_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(input_layout_node const& node, kernel_impl_params const& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h b/src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h index 5b6ca266aba718..52dae6c793cbe2 100644 --- a/src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h +++ b/src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h @@ -17,6 +17,7 @@ #include "kernel_selector_common.h" #include "tensor_type.h" #include "fused_primitive_desc.h" +#include "serialization/binary_buffer.hpp" #include #include @@ -133,6 +134,8 @@ struct kernel_impl_params { memory::ptr reordered_weights = nullptr; + kernel_impl_params() {} + kernel_impl_params(program& _prog, std::shared_ptr _desc, size_t _uid, @@ -177,6 +180,9 @@ struct kernel_impl_params { template std::shared_ptr typed_desc() const { return std::static_pointer_cast(desc); } + + void save(BinaryOutputBuffer& ob) const; + void load(BinaryInputBuffer& ib); }; template diff --git a/src/plugins/intel_gpu/src/graph/include/loop_inst.h b/src/plugins/intel_gpu/src/graph/include/loop_inst.h index b3681f103bb557..a166becad9ec35 100644 --- a/src/plugins/intel_gpu/src/graph/include/loop_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/loop_inst.h @@ -361,6 +361,7 @@ using loop_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: struct backedge_memory_mapping { diff --git a/src/plugins/intel_gpu/src/graph/include/lrn_inst.h b/src/plugins/intel_gpu/src/graph/include/lrn_inst.h index f72b0f101fa0c6..2f1db2e7e08041 100644 --- a/src/plugins/intel_gpu/src/graph/include/lrn_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/lrn_inst.h @@ -16,6 +16,7 @@ using lrn_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(lrn_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/lstm_dynamic_input_inst.h b/src/plugins/intel_gpu/src/graph/include/lstm_dynamic_input_inst.h index 102e3c1921e4ee..9dfc11b98720dc 100644 --- a/src/plugins/intel_gpu/src/graph/include/lstm_dynamic_input_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/lstm_dynamic_input_inst.h @@ -39,6 +39,7 @@ using lstm_dynamic_input_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(lstm_dynamic_input_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/lstm_dynamic_inst.h b/src/plugins/intel_gpu/src/graph/include/lstm_dynamic_inst.h index dc786bf42d9935..aa9972f6c50b91 100644 --- a/src/plugins/intel_gpu/src/graph/include/lstm_dynamic_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/lstm_dynamic_inst.h @@ -35,6 +35,7 @@ using lstm_dynamic_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(lstm_dynamic_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/lstm_dynamic_timeloop_inst.h b/src/plugins/intel_gpu/src/graph/include/lstm_dynamic_timeloop_inst.h index a219e5036cbdf9..88356b8a09d146 100644 --- a/src/plugins/intel_gpu/src/graph/include/lstm_dynamic_timeloop_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/lstm_dynamic_timeloop_inst.h @@ -60,6 +60,7 @@ using lstm_dynamic_timeloop_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(lstm_dynamic_timeloop_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/lstm_elt_inst.h b/src/plugins/intel_gpu/src/graph/include/lstm_elt_inst.h index ad06268fbdf8ff..64b74d93eb0d68 100644 --- a/src/plugins/intel_gpu/src/graph/include/lstm_elt_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/lstm_elt_inst.h @@ -36,6 +36,7 @@ using lstm_elt_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(lstm_elt_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/lstm_gemm_inst.h b/src/plugins/intel_gpu/src/graph/include/lstm_gemm_inst.h index ce3dfb44b48a6a..382894e83b7eb6 100644 --- a/src/plugins/intel_gpu/src/graph/include/lstm_gemm_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/lstm_gemm_inst.h @@ -32,6 +32,7 @@ using lstm_gemm_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(lstm_gemm_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/lstm_inst.h b/src/plugins/intel_gpu/src/graph/include/lstm_inst.h index b2e188e5efeaf6..7fc160633f3340 100644 --- a/src/plugins/intel_gpu/src/graph/include/lstm_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/lstm_inst.h @@ -44,6 +44,7 @@ using lstm_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(lstm_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/mutable_data_inst.h b/src/plugins/intel_gpu/src/graph/include/mutable_data_inst.h index ad07c31a8d0864..c86171c75f6045 100644 --- a/src/plugins/intel_gpu/src/graph/include/mutable_data_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/mutable_data_inst.h @@ -33,6 +33,7 @@ using mutable_data_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(mutable_data_node const& node, kernel_impl_params const& impl_param) { @@ -43,6 +44,9 @@ class typed_primitive_inst : public typed_primitive_inst_base; diff --git a/src/plugins/intel_gpu/src/graph/include/mvn_inst.h b/src/plugins/intel_gpu/src/graph/include/mvn_inst.h index 8ab1d3d0c0d434..1c96a57ee97ab3 100644 --- a/src/plugins/intel_gpu/src/graph/include/mvn_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/mvn_inst.h @@ -27,6 +27,7 @@ using mvn_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/non_max_suppression_inst.h b/src/plugins/intel_gpu/src/graph/include/non_max_suppression_inst.h index 2708d5ad6c2b73..e849a1d9c5409a 100644 --- a/src/plugins/intel_gpu/src/graph/include/non_max_suppression_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/non_max_suppression_inst.h @@ -87,6 +87,7 @@ using non_max_suppression_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: typed_primitive_inst(network& network, non_max_suppression_node const& node) diff --git a/src/plugins/intel_gpu/src/graph/include/non_zero_inst.h b/src/plugins/intel_gpu/src/graph/include/non_zero_inst.h index c4abe9cc3f7601..d01fa9d43995d4 100644 --- a/src/plugins/intel_gpu/src/graph/include/non_zero_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/non_zero_inst.h @@ -33,6 +33,7 @@ using count_nonzero_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template @@ -72,6 +73,7 @@ using gather_nonzero_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/normalize_inst.h b/src/plugins/intel_gpu/src/graph/include/normalize_inst.h index 458e687b207fc3..0009c5a8177e6f 100644 --- a/src/plugins/intel_gpu/src/graph/include/normalize_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/normalize_inst.h @@ -27,6 +27,7 @@ using normalize_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(normalize_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/one_hot_inst.h b/src/plugins/intel_gpu/src/graph/include/one_hot_inst.h index af1da5de26dc3c..1823dcfbfadbd8 100644 --- a/src/plugins/intel_gpu/src/graph/include/one_hot_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/one_hot_inst.h @@ -31,6 +31,7 @@ using one_hot_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/permute_inst.h b/src/plugins/intel_gpu/src/graph/include/permute_inst.h index 6be1827d7dfaf7..2304c42949eb4c 100644 --- a/src/plugins/intel_gpu/src/graph/include/permute_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/permute_inst.h @@ -44,6 +44,7 @@ using permute_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h index 27543aa70feb4d..a015b60c72ebe6 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h @@ -14,6 +14,14 @@ #include "meta_utils.h" #include "program_node.h" #include "primitive_type.h" +#include "serialization/binary_buffer.hpp" +#include "serialization/helpers.hpp" +#include "serialization/cl_kernel_data_serializer.hpp" +#include "serialization/object_types.hpp" +#include "serialization/polymorphic_serializer.hpp" +#include "serialization/string_serializer.hpp" +#include "serialization/layout_serializer.hpp" +#include "serialization/vector_serializer.hpp" #include "runtime/kernels_cache.hpp" #include @@ -25,6 +33,8 @@ namespace cldnn { // checks if any user in a list is a cpu primitive bool is_any_user_cpu(const std::list& users); +primitive_type_id get_type_id(std::string type_str); + class primitive_inst; template @@ -41,7 +51,10 @@ struct primitive_impl { virtual std::vector get_internal_buffer_layouts() const = 0; virtual void set_node_params(const program_node&) {} + virtual object_type get_type() const { return object_type::NONE; } virtual void set_arguments(primitive_inst& instance) = 0; + virtual void set_arguments(kernel_arguments_data_idx& args_idx) = 0; + virtual kernel_arguments_data get_arguments(const primitive_inst& instance) const = 0; virtual event::ptr execute(const std::vector& events, primitive_inst& instance) = 0; virtual bool validate(const primitive_inst& instance) const = 0; std::string get_kernel_name() const { return _kernel_name; } @@ -57,6 +70,8 @@ struct primitive_impl { virtual std::vector> get_kernels_source() { return {}; } virtual void set_kernels(std::vector) {} virtual void set_kernel_ids(std::vector kernel_ids) {} + virtual void save(cldnn::BinaryOutputBuffer& ob) const {} + virtual void load(cldnn::BinaryInputBuffer& ib) {} // If this flag is set as false, the memory allocated for this primitive is not allowed to be reused bool can_reuse_memory = true; @@ -76,6 +91,7 @@ class primitive_inst { friend class typed_primitive_inst; public: + primitive_inst(network& network); virtual ~primitive_inst() = default; const std::vector>& dependencies() const { @@ -139,7 +155,9 @@ class primitive_inst { } event::ptr execute(const std::vector& events); - void init_kernels(); + void init_kernels(const kernels_cache& kernels_cache) { + _impl->init_kernels(kernels_cache); + } void set_arguments(); bool validate() const { @@ -181,6 +199,12 @@ class primitive_inst { std::vector get_intermediates_memories() const { return _intermediates_memory; } + virtual void save(cldnn::BinaryOutputBuffer& ob) const; + virtual void load(cldnn::BinaryInputBuffer& ib); + void rebuild_deps( + std::unordered_map> const& primitives); + void rebuild_exec_deps( + std::list> const& primitives); std::string get_implementation_name() const; void add_profiling_data(instrumentation::pipeline_stage stage, bool cache_hit, int64_t time); @@ -201,7 +225,7 @@ class primitive_inst { network& _network; program_node const* _node; - const layout _node_output_layout; + layout _node_output_layout; std::unique_ptr _impl_params; std::unique_ptr _impl; @@ -210,6 +234,7 @@ class primitive_inst { // it should be added to this set std::vector> _deps; std::vector, int32_t>> _deps_new; + std::vector _dep_ids; // this is a set of dependencies in terms of execution // execution of all primitives from this set should be enough to guarantee that all memory deps (see _deps) @@ -219,6 +244,7 @@ class primitive_inst { // manner) in general - this member is introduced to relax logical connection between primitives which have to be // executed and memories which are used by this primitive std::vector> _exec_deps; + std::vector _exec_dep_ids; // This is sub-network generated on demand to execute unfused primitives sequence instead of single fused primitive // Needed for dynamic path only, as fusion in some cases may be illegal, but it can't be checked on program build phase, @@ -257,6 +283,8 @@ class primitive_inst { std::vector allocate_outputs(); static std::vector> build_exec_deps( std::vector> const& mem_deps); + void convert_args(const kernel_arguments_data& args, kernel_arguments_data_idx& args_idx) const; + int32_t get_index_in_deps(memory::cptr arg) const; // event function called by primitive_inst::execute after checking if primitive should rerun and before calling // _impl->execute() mainly for reshape (to update output memory if reshape_node.is_in_place() == true) @@ -335,7 +363,20 @@ struct typed_primitive_impl : public primitive_impl { return set_arguments_impl(reinterpret_cast&>(instance)); } + void set_arguments(kernel_arguments_data_idx& args_idx) override { + return set_arguments_impl(args_idx); + } + + kernel_arguments_data get_arguments(const primitive_inst& instance) const override { + return get_arguments_impl(reinterpret_cast&>(instance)); + } + virtual void set_arguments_impl(typed_primitive_inst& /*instance*/) {} + virtual void set_arguments_impl(kernel_arguments_data_idx& /*args_idx*/) {} + virtual kernel_arguments_data get_arguments_impl(const typed_primitive_inst& /*instance*/) const { + kernel_arguments_data args; + return args; + } virtual event::ptr execute_impl(const std::vector& event, typed_primitive_inst& instance) = 0; @@ -370,6 +411,9 @@ class typed_primitive_inst_base : public primitive_inst { typed_primitive_inst_base(network& network, typed_node const& node) : typed_primitive_inst_base(network, node, do_allocate_memory(node)) {} + typed_primitive_inst_base(network& network) + : primitive_inst(network), node(nullptr), argument(nullptr) {} + protected: typed_primitive_inst_base(network& network, typed_node const& node, bool allocate_memory) : primitive_inst(network, node, allocate_memory), node(&node), argument(node.get_primitive()) {} diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_type.h b/src/plugins/intel_gpu/src/graph/include/primitive_type.h index 7ed8e4732cee90..5f6d13930930de 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_type.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_type.h @@ -28,6 +28,7 @@ struct primitive_type { const std::shared_ptr prim) const = 0; virtual std::shared_ptr create_instance(network& network, const program_node& node) const = 0; + virtual std::shared_ptr create_instance(network& network) const = 0; virtual std::unique_ptr choose_impl(const program_node& node) const = 0; virtual std::unique_ptr choose_impl(const program_node& node, const kernel_impl_params& params) const = 0; diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h b/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h index 964b79a53e9eb8..bb96069e39a679 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h @@ -32,6 +32,10 @@ struct primitive_type_base : primitive_type { return std::make_shared>(network, node); } + std::shared_ptr create_instance(network& network) const override { + return std::make_shared>(network); + } + // TODO: Should we get rid of engine type in impl map? Or we must pass internal build engine to get real ocl type? std::unique_ptr choose_impl(const cldnn::program_node& node) const override { return choose_impl(node, *node.get_kernel_impl_params()); diff --git a/src/plugins/intel_gpu/src/graph/include/prior_box_inst.h b/src/plugins/intel_gpu/src/graph/include/prior_box_inst.h index 5eec00b4a4cb29..c914652b93d0d0 100644 --- a/src/plugins/intel_gpu/src/graph/include/prior_box_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/prior_box_inst.h @@ -33,6 +33,7 @@ using prior_box_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(prior_box_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/proposal_inst.h b/src/plugins/intel_gpu/src/graph/include/proposal_inst.h index 7eee356468e1da..d4eb8c31b9bb6d 100644 --- a/src/plugins/intel_gpu/src/graph/include/proposal_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/proposal_inst.h @@ -27,6 +27,7 @@ using proposal_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: struct anchor { diff --git a/src/plugins/intel_gpu/src/graph/include/pyramid_roi_align_inst.h b/src/plugins/intel_gpu/src/graph/include/pyramid_roi_align_inst.h index 0d670464286ebb..7f2ade5baede2f 100644 --- a/src/plugins/intel_gpu/src/graph/include/pyramid_roi_align_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/pyramid_roi_align_inst.h @@ -30,6 +30,7 @@ using pyramid_roi_align_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(pyramid_roi_align_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/quantize_inst.h b/src/plugins/intel_gpu/src/graph/include/quantize_inst.h index a81fe81bd6d785..783142b4d3e9c5 100644 --- a/src/plugins/intel_gpu/src/graph/include/quantize_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/quantize_inst.h @@ -133,6 +133,7 @@ using quantize_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/random_uniform_inst.h b/src/plugins/intel_gpu/src/graph/include/random_uniform_inst.h index 2391fb6f259700..3d912a4466e3a6 100644 --- a/src/plugins/intel_gpu/src/graph/include/random_uniform_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/random_uniform_inst.h @@ -15,6 +15,7 @@ using random_uniform_node = typed_program_node; template<> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(random_uniform_node const &node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/range_inst.h b/src/plugins/intel_gpu/src/graph/include/range_inst.h index 7c9a4d160fa339..ea085f880b5721 100644 --- a/src/plugins/intel_gpu/src/graph/include/range_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/range_inst.h @@ -26,6 +26,9 @@ using range_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { + using parent = typed_primitive_inst_base; + using parent::parent; + public: template static std::vector calc_output_layouts(range_node const& /*node*/, const kernel_impl_params& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/read_value_inst.h b/src/plugins/intel_gpu/src/graph/include/read_value_inst.h index 22c31219f96503..ebd9272381a40c 100644 --- a/src/plugins/intel_gpu/src/graph/include/read_value_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/read_value_inst.h @@ -23,6 +23,7 @@ class typed_primitive_inst : public typed_primitive_inst_base; diff --git a/src/plugins/intel_gpu/src/graph/include/reduce_inst.h b/src/plugins/intel_gpu/src/graph/include/reduce_inst.h index 7dd3526a6e3570..48aadb2d7907c9 100644 --- a/src/plugins/intel_gpu/src/graph/include/reduce_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reduce_inst.h @@ -16,6 +16,7 @@ using reduce_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/region_yolo_inst.h b/src/plugins/intel_gpu/src/graph/include/region_yolo_inst.h index 22b479f1ad674e..9ebf8a6ab4a299 100644 --- a/src/plugins/intel_gpu/src/graph/include/region_yolo_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/region_yolo_inst.h @@ -15,6 +15,7 @@ using region_yolo_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(region_yolo_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/reorder_inst.h b/src/plugins/intel_gpu/src/graph/include/reorder_inst.h index 29ea902c8be0c1..265b43caaf84ff 100644 --- a/src/plugins/intel_gpu/src/graph/include/reorder_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reorder_inst.h @@ -53,6 +53,7 @@ using reorder_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template @@ -68,10 +69,16 @@ class typed_primitive_inst : public typed_primitive_inst_base bool has_mean() const { return !argument->mean.empty(); } void update_output_memory() override; + bool requires_reinterpret() const { return _req_reinterpr; } + + void save(cldnn::BinaryOutputBuffer& ob) const override; + void load(cldnn::BinaryInputBuffer& ib) override; private: void on_execute() override; void reuse_input(); + + bool _req_reinterpr = false; }; using reorder_inst = typed_primitive_inst; diff --git a/src/plugins/intel_gpu/src/graph/include/reorg_yolo_inst.h b/src/plugins/intel_gpu/src/graph/include/reorg_yolo_inst.h index ba680748990333..cc9abd460ed89a 100644 --- a/src/plugins/intel_gpu/src/graph/include/reorg_yolo_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reorg_yolo_inst.h @@ -15,6 +15,7 @@ using reorg_yolo_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(reorg_yolo_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/resample_inst.h b/src/plugins/intel_gpu/src/graph/include/resample_inst.h index 0e7180e5d19154..b8220f547f8fce 100644 --- a/src/plugins/intel_gpu/src/graph/include/resample_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/resample_inst.h @@ -32,6 +32,7 @@ using resample_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/reshape_inst.h b/src/plugins/intel_gpu/src/graph/include/reshape_inst.h index 17933725d8b469..05e5c304c1e66d 100644 --- a/src/plugins/intel_gpu/src/graph/include/reshape_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reshape_inst.h @@ -42,6 +42,7 @@ using reshape_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/reverse_inst.h b/src/plugins/intel_gpu/src/graph/include/reverse_inst.h index 64c5b613f07686..599a46adfc6c8b 100644 --- a/src/plugins/intel_gpu/src/graph/include/reverse_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reverse_inst.h @@ -16,6 +16,7 @@ using reverse_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(reverse_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/reverse_sequence_inst.h b/src/plugins/intel_gpu/src/graph/include/reverse_sequence_inst.h index 944d24e8de9a51..3c16a002a143cb 100644 --- a/src/plugins/intel_gpu/src/graph/include/reverse_sequence_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reverse_sequence_inst.h @@ -16,6 +16,7 @@ using reverse_sequence_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(reverse_sequence_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/roi_align_inst.h b/src/plugins/intel_gpu/src/graph/include/roi_align_inst.h index 71d8f45ae4aa7f..443b0429d47503 100644 --- a/src/plugins/intel_gpu/src/graph/include/roi_align_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/roi_align_inst.h @@ -14,6 +14,7 @@ using roi_align_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(roi_align_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/scatter_elements_update_inst.h b/src/plugins/intel_gpu/src/graph/include/scatter_elements_update_inst.h index cf0c50a608bdb7..bc354e0420ad00 100644 --- a/src/plugins/intel_gpu/src/graph/include/scatter_elements_update_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/scatter_elements_update_inst.h @@ -15,6 +15,7 @@ using scatter_elements_update_node = typed_program_node template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(scatter_elements_update_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/scatter_nd_update_inst.h b/src/plugins/intel_gpu/src/graph/include/scatter_nd_update_inst.h index 6d27fcae28da46..7479a18d108beb 100644 --- a/src/plugins/intel_gpu/src/graph/include/scatter_nd_update_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/scatter_nd_update_inst.h @@ -15,6 +15,7 @@ using scatter_nd_update_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(scatter_nd_update_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/scatter_update_inst.h b/src/plugins/intel_gpu/src/graph/include/scatter_update_inst.h index 1aaef332d8caea..09bb778317d380 100644 --- a/src/plugins/intel_gpu/src/graph/include/scatter_update_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/scatter_update_inst.h @@ -16,6 +16,7 @@ using scatter_update_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/select_inst.h b/src/plugins/intel_gpu/src/graph/include/select_inst.h index 7d36dae16a68b8..b49898ccb6806b 100644 --- a/src/plugins/intel_gpu/src/graph/include/select_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/select_inst.h @@ -26,6 +26,7 @@ using select_node = typed_program_node : public typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/binary_buffer.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/binary_buffer.hpp new file mode 100644 index 00000000000000..7b19fb3ca2b142 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/serialization/binary_buffer.hpp @@ -0,0 +1,85 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include +#include +#include +#include "buffer.hpp" +#include "helpers.hpp" +#include "bind.hpp" + +namespace cldnn { +class BinaryOutputBuffer : public OutputBuffer { +public: + BinaryOutputBuffer(std::ostream& stream) : OutputBuffer(this), stream(stream) {} + + void write(void const * data, std::streamsize size) { + auto const written_size = stream.rdbuf()->sputn(reinterpret_cast(data), size); + if (written_size != size) { + throw std::runtime_error("Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + std::to_string(written_size)); + } + } + +private: + std::ostream& stream; +}; + +class BinaryInputBuffer : public InputBuffer { +public: + BinaryInputBuffer(std::istream& stream, engine& engine) : InputBuffer(this, engine), stream(stream) {} + + void read(void* const data, std::streamsize size) { + auto const read_size = stream.rdbuf()->sgetn(reinterpret_cast(data), size); + if (read_size != size) { + throw std::runtime_error("Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size)); + } + } + +private: + std::istream& stream; +}; + +template +class Serializer::value>::type> { +public: + static void save(BinaryOutputBuffer& buffer, const T& object) { + buffer.write(std::addressof(object), sizeof(object)); + } +}; + +template +class Serializer::value>::type> { +public: + static void load(BinaryInputBuffer& buffer, T& object) { + buffer.read(std::addressof(object), sizeof(object)); + } +}; + +template +class Serializer> { +public: + static void save(BinaryOutputBuffer& buffer, const Data& bin_data) { + buffer.write(bin_data.data, static_cast(bin_data.number_of_bytes)); + } +}; + +template +class Serializer> { +public: + static void load(BinaryInputBuffer& buffer, Data& bin_data) { + buffer.read(bin_data.data, static_cast(bin_data.number_of_bytes)); + } +}; + +} // namespace cldnn + +#define BIND_BINARY_BUFFER_WITH_TYPE(cls_name, obj_type) \ + namespace cldnn { \ + const object_type cls_name::type = obj_type; \ + BIND_TO_BUFFER(BinaryOutputBuffer, cls_name) \ + BIND_TO_BUFFER(BinaryInputBuffer, cls_name) \ + } diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/bind.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/bind.hpp new file mode 100644 index 00000000000000..258274b606e409 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/serialization/bind.hpp @@ -0,0 +1,178 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include +#include +#include +#include "buffer.hpp" +#include "static_instance.hpp" +#include "object_types.hpp" + +#define DECLARE_OBJECT_TYPE_SERIALIZATION \ + static const object_type type; \ + object_type get_type() const override { return type; } + +#define BIND_TO_BUFFER(buffer, type) \ + template <> \ + class bind_creator { \ + private: \ + static const instance_creator& creator; \ + }; \ + const instance_creator& bind_creator::creator = \ + static_instance>::get_instance().instantiate(); + +namespace cldnn { + +template +struct saver_storage { + using save_function = std::function; + using value_type = typename std::unordered_map::value_type; + + static saver_storage& instance() { + static saver_storage instance; + return instance; + } + + const save_function& get_save_function(const object_type& type) const { + return map.at(type); + } + + void set_save_function(const value_type& pair) { + map.insert(pair); + } + +private: + saver_storage() = default; + saver_storage(const saver_storage&) = delete; + void operator=(const saver_storage&) = delete; + + std::unordered_map map; +}; + +template +struct void_deleter { + void operator()(const T*) const { } +}; + +template +struct loader_storage { + using value_type = typename std::unordered_map::value_type; + + static loader_storage& instance() { + static loader_storage instance; + return instance; + } + + const FuncT& get_load_function(const object_type& type) { + return map.at(type); + } + + void set_load_function(const value_type& pair) { + map.insert(pair); + } + +private: + loader_storage() = default; + loader_storage(const loader_storage&) = delete; + void operator=(const loader_storage&) = delete; + + std::unordered_map map; +}; + +template +using def = loader_storage>&)>>; + +template +using dif = loader_storage>&, engine&)>>; + +template +class buffer_binder; + +template +class buffer_binder, BufferType>::value>::type> { +public: + static buffer_binder& instance() { + static buffer_binder instance; + return instance; + } + +private: + buffer_binder() { + saver_storage::instance().set_save_function({T::type, save}); + } + + buffer_binder(const buffer_binder&) = delete; + void operator=(const buffer_binder&) = delete; + + template + static const Derived* downcast(const void* base_ptr) { + return static_cast(base_ptr); + } + + static void save(BufferType& buffer, const void* base_ptr) { + const auto derived_ptr = downcast(base_ptr); + derived_ptr->save(buffer); + } +}; + +template +class buffer_binder, BufferType>::value && + std::is_default_constructible::value>::type> { +public: + static buffer_binder& instance() { + static buffer_binder instance; + return instance; + } + +private: + buffer_binder() { + def::instance().set_load_function({T::type, [](BufferType& buffer, std::unique_ptr>& result_ptr) { + std::unique_ptr derived_ptr = std::unique_ptr(new T()); + derived_ptr->load(buffer); + result_ptr.reset(derived_ptr.release()); + }}); + } + + buffer_binder(const buffer_binder&) = delete; + void operator=(const buffer_binder&) = delete; +}; + +template +class buffer_binder, BufferType>::value && + !std::is_default_constructible::value>::type> { +public: + static buffer_binder& instance() { + static buffer_binder instance; + return instance; + } + +private: + buffer_binder() { + dif::instance().set_load_function({T::type, [](BufferType& buffer, std::unique_ptr>& result_ptr, engine& engine) { + std::unique_ptr derived_ptr = std::unique_ptr(new T(engine)); + derived_ptr->load(buffer); + result_ptr.reset(derived_ptr.release()); + }}); + } + + buffer_binder(const buffer_binder&) = delete; + void operator=(const buffer_binder&) = delete; +}; + +template +class bind_creator; + +template +class instance_creator { +public: + const instance_creator& instantiate() { + static_instance>::get_instance(); + return *this; + } +}; + +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/buffer.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/buffer.hpp new file mode 100644 index 00000000000000..9b0dcfc153f06c --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/serialization/buffer.hpp @@ -0,0 +1,84 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once +#include +#include +#include "intel_gpu/runtime/engine.hpp" +#include "serializer.hpp" + +namespace cldnn { + +template +class Buffer { +public: + Buffer(BufferType* const buffer) : buffer(buffer) {} + + template + inline BufferType& operator()(Types&& ... args) { + process(std::forward(args)...); + return *buffer; + } + +protected: + inline BufferType& getBuffer() { + return *buffer; + } + + BufferType* const buffer; + +private: + template + inline void process(T&& first, OtherTypes&& ... remains) { + process(std::forward(first)); + process(std::forward(remains)...); + } + + template + inline void process(T&& object){ + buffer->process(std::forward(object)); + } +}; + +template +class OutputBuffer : public Buffer { + friend class Buffer; +public: + OutputBuffer(BufferType* const buffer) : Buffer(buffer) {} + + template + inline BufferType& operator<<(T&& arg) { + process(std::forward(arg)); + return Buffer::getBuffer(); + } +private: + template + inline void process(T&& object) { + Serializer::type>::type>::save(*Buffer::buffer, object); + } +}; + +template +class InputBuffer : public Buffer { + friend class Buffer; +public: + InputBuffer(BufferType* const buffer, engine& engine) : Buffer(buffer), _engine(engine) {} + + template + inline BufferType& operator>>(T&& arg) { + process(std::forward(arg)); + return Buffer::getBuffer(); + } + + engine& get_engine() { return _engine; } +private: + template + inline void process(T&& object) { + Serializer::type>::load(*Buffer::buffer, object); + } + + engine& _engine; +}; +} // namespace cldnn \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/cl_kernel_data_serializer.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/cl_kernel_data_serializer.hpp new file mode 100644 index 00000000000000..1d8b95f37f3fda --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/serialization/cl_kernel_data_serializer.hpp @@ -0,0 +1,59 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include +#include "buffer.hpp" +#include "helpers.hpp" +#include "kernel_selector_common.h" +#include "intel_gpu/runtime/kernel_args.hpp" + +namespace cldnn { + +template +class Serializer, BufferType>::value>::type> { +public: + static void save(BufferType& buffer, const kernel_selector::clKernelData& data) { + const auto& params = data.params; + buffer(params.workGroups.global, params.workGroups.local); + buffer << params.arguments.size(); + for (const auto& arg : params.arguments) { + buffer << make_data(&arg.t, sizeof(argument_desc::Types)) << arg.index; + } + buffer << params.scalars.size(); + for (const auto& scalar : params.scalars) { + buffer << make_data(&scalar.t, sizeof(scalar_desc::Types)) << make_data(&scalar.v, sizeof(scalar_desc::ValueT)); + } + buffer << params.layerID; + } +}; + +template +class Serializer, BufferType>::value>::type> { +public: + static void load(BufferType& buffer, kernel_selector::clKernelData& data) { + auto& params = data.params; + buffer(params.workGroups.global, params.workGroups.local); + + typename arguments_desc::size_type arguments_desc_size = 0UL; + buffer >> arguments_desc_size; + params.arguments.resize(arguments_desc_size); + for (auto& arg : params.arguments) { + buffer >> make_data(&arg.t, sizeof(argument_desc::Types)) >> arg.index; + } + + typename scalars_desc::size_type scalars_desc_size = 0UL; + buffer >> scalars_desc_size; + params.scalars.resize(scalars_desc_size); + for (auto& scalar : params.scalars) { + buffer >> make_data(&scalar.t, sizeof(scalar_desc::Types)) >> make_data(&scalar.v, sizeof(scalar_desc::ValueT)); + } + + buffer >> params.layerID; + } +}; + +} // namespace cldnn \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/helpers.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/helpers.hpp new file mode 100644 index 00000000000000..cede64c119fd9c --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/serialization/helpers.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once +#include +#include + +namespace cldnn { +template +struct Data { + using DataType = typename std::conditional::type>::type>::value, + const void*, void*>::type; + + Data(T&& data, uint64_t number_of_bytes) : data(std::forward(data)), number_of_bytes(number_of_bytes) {} + + DataType data; + uint64_t number_of_bytes; +}; + +template +static Data make_data(T&& data, uint64_t number_of_bytes) { + return {std::forward(data), number_of_bytes}; +} +} // namespace cldnn \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/layout_serializer.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/layout_serializer.hpp new file mode 100644 index 00000000000000..31c1ebdc68f8ad --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/serialization/layout_serializer.hpp @@ -0,0 +1,65 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include +#include +#include "buffer.hpp" +#include "helpers.hpp" +#include "intel_gpu/runtime/layout.hpp" + +namespace cldnn { +template +class Serializer, BufferType>::value>::type> { +public: + static void save(BufferType& buffer, const cldnn::layout& _layout) { + buffer << make_data(&_layout.data_type, sizeof(cldnn::data_types)); + buffer << make_data(&_layout.format, sizeof(cldnn::format)); + buffer << _layout.data_padding.filling_value(); + buffer << _layout.data_padding.lower_size().sizes(); + buffer << _layout.data_padding.upper_size().sizes(); + + std::vector _sizes = _layout.get_tensor().sizes(_layout.format); + // Temp WA for bs_x_bsv16 + if (_layout.format == cldnn::format::bs_x_bsv16) { + std::vector _tmp_sizes = _layout.get_tensor().sizes(); + _sizes[0] = _tmp_sizes[0]; + _sizes[1] = _tmp_sizes[1]; + } + buffer << _sizes; + } +}; + +template +class Serializer, BufferType>::value>::type> { +public: + static void load(BufferType& buffer, cldnn::layout& _layout) { + buffer >> make_data(&_layout.data_type, sizeof(cldnn::data_types)); + buffer >> make_data(&_layout.format, sizeof(cldnn::format)); + + { + float _filling_value; + buffer >> _filling_value; + std::vector _lower_size; + buffer >> _lower_size; + std::vector _upper_size; + buffer >> _upper_size; + _layout.data_padding = cldnn::padding(_lower_size, _upper_size, _filling_value); + } + + std::vector _sizes; + buffer >> _sizes; + + // Temp WA for bs_x_bsv16 + if (_layout.format == cldnn::format::bs_x_bsv16) { + _layout.set_tensor(tensor(_sizes)); + } else { + _layout.set_tensor(tensor(_layout.format, _sizes)); + } + } +}; + +} // namespace cldnn \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/map_serializer.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/map_serializer.hpp new file mode 100644 index 00000000000000..aac326c7bed975 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/serialization/map_serializer.hpp @@ -0,0 +1,40 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include +#include +#include "buffer.hpp" + +namespace cldnn { + +template +class Serializer, typename std::enable_if, BufferType>::value>::type> { +public: + static void save(BufferType& buffer, const std::map& map) { + buffer << map.size(); + for (const auto& pair : map) { + buffer(pair.first, pair.second); + } + } +}; + +template +class Serializer, typename std::enable_if, BufferType>::value>::type> { +public: + static void load(BufferType& buffer, std::map& map) { + typename std::map::size_type map_size = 0UL; + buffer >> map_size; + map.clear(); + Key key; + for (size_t i = 0; i < map_size; i++) { + buffer >> key; + buffer >> map[std::move(key)]; + } + } +}; + +} // namespace cldnn \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/object_types.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/object_types.hpp new file mode 100644 index 00000000000000..820aac71acc22c --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/serialization/object_types.hpp @@ -0,0 +1,119 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once + +namespace cldnn { + +enum class object_type { + ACTIVATION_IMPL, + ADAPTIVE_POOLING_IMPL, + ARG_MAX_MIN_IMPL, + ASSIGN_IMPL, + AVERAGE_UNPOOLING_IMPL, + BATCH_TO_SPACE_IMPL, + BINARY_CONVOLUTION_IMPL, + BORDER_IMPL, + BROADCAST_IMPL, + BUCKETIZE_IMPL, + CONCATENATION_IMPL, + CONVERT_COLOR_IMPL, + CONVOLUTION_IMPL, + CROP_IMPL, + CTC_GREEDY_DECODER_IMPL, + CTC_LOSS_IMPL, + CUM_SUM_IMPL, + CUSTOM_GPU_PRIMITIVE_IMPL, + DECONVOLUTION_IMPL, + DEFORMABLE_CONV_IMPL, + DEFORMABLE_INTERP_IMPL, + DEPTH_TO_SPACE_IMPL, + DETECTION_OUTPUT_IMPL_OCL, + DFT_IMPL, + ELTWISE_IMPL, + EMBEDDING_BAG_IMPL, + EXPERIMENTAL_DETECTRON_DETECTION_OUTPUT_IMPL, + EXPERIMENTAL_DETECTRON_GENERATE_PROPOSALS_SINGLE_IMAGE_IMPL, + EXPERIMENTAL_DETECTRON_PRIOR_GRID_GENERATOR_IMPL, + EXPERIMENTAL_DETECTRON_ROI_FEATURE_EXTRACTOR_IMPL, + EXPERIMENTAL_DETECTRON_TOPK_ROIS_IMPL, + EXTRACT_IMAGE_PATCHES_IMPL, + EYE_IMPL, + FULLY_CONNECTED_IMPL, + FUSED_CONV_ELTWISE_IMPL, + GATHER_ELEMENTS_IMPL, + GATHER_ND_IMPL, + GATHER_TREE_IMPL, + GATHER_IMPL, + GEMM_IMPL, + GENERATE_PROPOSALS_IMPL, + GENERIC_LAYER_IMPL, + GRID_SAMPLE_IMPL, + GRN_IMPL, + LRN_IMPL, + LSTM_DYNAMIC_INPUT_IMPL, + LSTM_DYNAMIC_TIMELOOP_IMPL, + LSTM_ELT_IMPL, + LSTM_GEMM_IMPL, + MAX_UNPOOLING_IMPL, + MUTABLE_DATA_IMPL, + MVN_IMPL, + NON_MAX_SUPPRESSION_IMPL_OCL, + COUNT_NONZERO_IMPL, + GATHER_NONZERO_IMPL, + NORMALIZE_IMPL, + ONE_HOT_IMPL, + PERMUTE_IMPL, + POOLING_IMPL, + PRIOR_BOX_IMPL, + PYRAMID_ROI_ALIGN_IMPL, + QUANTIZE_IMPL, + RANDOM_UNIFORM_IMPL, + RANGE_IMPL, + READ_VALUE_IMPL, + REDUCE_IMPL, + REGION_YOLO_IMPL, + REORDER_IMPL, + REORG_YOLO_IMPL, + RESAMPLE_IMPL, + RESHAPE_IMPL, + REVERSE_SEQUENCE_IMPL, + REVERSE_IMPL, + ROI_ALIGN_IMPL, + ROI_POOLING_IMPL, + ROLL_IMPL, + SCALE_IMPL, + SCATTER_ELEMENTS_UPDATE_IMPL, + SCATTER_ND_UPDATE_IMPL, + SCATTER_UPDATE_IMPL, + SELECT_IMPL, + SHAPE_OF_IMPL, + SHUFFLE_CHANNELS_IMPL, + SLICE_IMPL, + SOFTMAX_IMPL, + SPACE_TO_BATCH_IMPL, + SPACE_TO_DEPTH_IMPL, + STRIDED_SLICE_IMPL, + TILE_IMPL, + WAIT_FOR_EVENTS_IMPL, + CONDITION_IMPL, + LOOP_IMPL, + DETECTION_OUTPUT_IMPL_CPU, + NON_MAX_SUPPRESSION_IMPL_CPU, + PROPOSAL_IMPL_CPU, + DATA_INST, + EXECUTABLE_INST, + CONVOLUTION_ONEDNN, + POOLING_ONEDNN, + CONCATENATION_ONEDNN, + DECONVOLUTION_ONEDNN, + FULLY_CONNECTED_ONEDNN, + GEMM_ONEDNN, + REDUCTION_ONEDNN, + REORDER_ONEDNN, + NONE +}; + +} //namespace cldnn \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/polymorphic_serializer.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/polymorphic_serializer.hpp new file mode 100644 index 00000000000000..b6d7dd730eb186 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/serialization/polymorphic_serializer.hpp @@ -0,0 +1,52 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include +#include +#include +#include "intel_gpu/runtime/engine.hpp" +#include "buffer.hpp" +#include "bind.hpp" +#include "helpers.hpp" +#include "object_types.hpp" + +namespace cldnn { + +template +class Serializer, typename std::enable_if, BufferType>::value>::type> { +public: + static void save(BufferType& buffer, const std::unique_ptr& ptr) { + const auto& type = ptr->get_type(); + buffer << cldnn::make_data(&type, sizeof(object_type)); + const auto save_func = saver_storage::instance().get_save_function(type); + save_func(buffer, ptr.get()); + } +}; + +template +class Serializer, typename std::enable_if, BufferType>::value>::type> { +public: + static void load(BufferType& buffer, std::unique_ptr& ptr, engine& engine) { + object_type type; + buffer >> cldnn::make_data(&type, sizeof(object_type)); + const auto load_func = dif::instance().get_load_function(type); + std::unique_ptr> result; + load_func(buffer, result, engine); + ptr.reset(static_cast(result.release())); + } + + static void load(BufferType& buffer, std::unique_ptr& ptr) { + object_type type; + buffer >> cldnn::make_data(&type, sizeof(object_type)); + const auto load_func = def::instance().get_load_function(type); + std::unique_ptr> result; + load_func(buffer, result); + ptr.reset(static_cast(result.release())); + } +}; + +} // namespace cldnn \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/serializer.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/serializer.hpp new file mode 100644 index 00000000000000..ccec0a1b9fba62 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/serialization/serializer.hpp @@ -0,0 +1,20 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once + +namespace cldnn { +template +class Serializer { +public: + static void save(BufferType& buffer, const T& object) { + object.save(buffer); + } + + static void load(BufferType& buffer, T& object) { + object.load(buffer); + } +}; +} // namespace cldnn \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/set_serializer.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/set_serializer.hpp new file mode 100644 index 00000000000000..93645abf849915 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/serialization/set_serializer.hpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include +#include +#include "buffer.hpp" +#include "helpers.hpp" + +namespace cldnn { +template +class Serializer, typename std::enable_if, BufferType>::value>::type> { +public: + static void save(BufferType& buffer, const std::set& set) { + buffer << set.size(); + for (const auto& el : set) { + buffer << el; + } + } +}; + +template +class Serializer, typename std::enable_if, BufferType>::value>::type> { +public: + static void load(BufferType& buffer, std::set& set) { + typename std::set::size_type set_size = 0UL; + buffer >> set_size; + + for (long unsigned int i = 0; i < set_size; i++) { + T el; + buffer >> el; + set.insert(el); + } + } +}; +} // namespace cldnn \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/static_instance.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/static_instance.hpp new file mode 100644 index 00000000000000..41de5e090721e1 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/serialization/static_instance.hpp @@ -0,0 +1,56 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include +#include +#include + +namespace cldnn { + +template +class static_instance; + +template +class static_instance::value>::type> { +public: + static T& get_instance() { + return instantiate(); + } + +private: + static T& instantiate() { + static T singleton; + (void)instance; + return singleton; + } + + static const T& instance; +}; + +template +const T& static_instance::value>::type>::instance = static_instance::instantiate(); + +template +class static_instance::value>::type> { +public: + static T& get_instance() { + return instantiate(); + } + +private: + static T& instantiate() { + (void)instance; + return T::instance(); + } + + static const T& instance; +}; + +template +const T& static_instance::value>::type>::instance = static_instance::instantiate(); + +} // namespace cldnn \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/string_serializer.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/string_serializer.hpp new file mode 100644 index 00000000000000..df9fbbd2ea9cce --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/serialization/string_serializer.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include +#include "buffer.hpp" +#include "helpers.hpp" + +namespace cldnn { + +template +class Serializer, BufferType>::value>::type> { +public: + static void save(BufferType& buffer, const std::string& str) { + buffer << str.size(); + buffer << make_data(str.data(), static_cast(str.size() * sizeof(std::string::value_type))); + } +}; + +template +class Serializer, BufferType>::value>::type> { +public: + static void load(BufferType& buffer, std::string& str) { + std::string::size_type size; + buffer >> size; + str.resize(size); + buffer >> make_data(const_cast(str.data()), static_cast(size * sizeof(std::string::value_type))); + } +}; + +} // namespace cldnn \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/vector_serializer.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/vector_serializer.hpp new file mode 100644 index 00000000000000..61aa4d9cfa40d5 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/serialization/vector_serializer.hpp @@ -0,0 +1,64 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include +#include +#include "buffer.hpp" +#include "helpers.hpp" + +namespace cldnn { +template +class Serializer, typename std::enable_if, BufferType>::value && + std::is_arithmetic::value && + !std::is_same::value>::type> { +public: + static void save(BufferType& buffer, const std::vector& vector) { + buffer << vector.size(); //static_cast() + buffer << make_data(vector.data(), static_cast(vector.size() * sizeof(T))); + } +}; + +template +class Serializer, typename std::enable_if, BufferType>::value && + std::is_arithmetic::value && + !std::is_same::value>::type> { +public: + static void load(BufferType& buffer, std::vector& vector) { + typename std::vector::size_type vector_size = 0UL; + buffer >> vector_size; + vector.resize(vector_size); + buffer >> make_data(vector.data(), static_cast(vector_size * sizeof(T))); + } +}; + +template +class Serializer, typename std::enable_if, BufferType>::value && + !std::is_arithmetic::value>::type> { +public: + static void save(BufferType& buffer, const std::vector& vector) { + buffer << vector.size(); + for (const auto& el : vector) { + buffer << el; + } + } +}; + +template +class Serializer, typename std::enable_if, BufferType>::value && + !std::is_arithmetic::value>::type> { +public: + static void load(BufferType& buffer, std::vector& vector) { + typename std::vector::size_type vector_size = 0UL; + buffer >> vector_size; + vector.resize(vector_size); + for (auto& el : vector) { + buffer >> el; + } + } +}; + +} // namespace cldnn \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/include/shape_of_inst.h b/src/plugins/intel_gpu/src/graph/include/shape_of_inst.h index fb0254fe465a4d..a4b0b7984a2a4e 100644 --- a/src/plugins/intel_gpu/src/graph/include/shape_of_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/shape_of_inst.h @@ -30,6 +30,7 @@ using shape_of_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/shuffle_channels_inst.h b/src/plugins/intel_gpu/src/graph/include/shuffle_channels_inst.h index 4a522d6398eef1..1882a3f41a37f8 100644 --- a/src/plugins/intel_gpu/src/graph/include/shuffle_channels_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/shuffle_channels_inst.h @@ -16,6 +16,7 @@ using shuffle_channels_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(shuffle_channels_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/slice_inst.h b/src/plugins/intel_gpu/src/graph/include/slice_inst.h index 8c7583f4e1c288..d9a684401e5b13 100644 --- a/src/plugins/intel_gpu/src/graph/include/slice_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/slice_inst.h @@ -14,6 +14,7 @@ using slice_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(slice_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/softmax_inst.h b/src/plugins/intel_gpu/src/graph/include/softmax_inst.h index 6750df843b6409..7b39ecb5c024de 100644 --- a/src/plugins/intel_gpu/src/graph/include/softmax_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/softmax_inst.h @@ -24,6 +24,7 @@ using softmax_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(softmax_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/space_to_batch_inst.h b/src/plugins/intel_gpu/src/graph/include/space_to_batch_inst.h index f9d6b7838ae906..8f0af9eab10d61 100644 --- a/src/plugins/intel_gpu/src/graph/include/space_to_batch_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/space_to_batch_inst.h @@ -17,6 +17,7 @@ using space_to_batch_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(space_to_batch_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/space_to_depth_inst.h b/src/plugins/intel_gpu/src/graph/include/space_to_depth_inst.h index 16b7b3d9e882fb..c74bed2aeb3c0f 100644 --- a/src/plugins/intel_gpu/src/graph/include/space_to_depth_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/space_to_depth_inst.h @@ -17,6 +17,7 @@ using space_to_depth_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(space_to_depth_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/split_inst.h b/src/plugins/intel_gpu/src/graph/include/split_inst.h index 5c00c952d65967..b099a7f8204b06 100644 --- a/src/plugins/intel_gpu/src/graph/include/split_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/split_inst.h @@ -17,6 +17,7 @@ using split_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(split_node const& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/strided_slice_inst.h b/src/plugins/intel_gpu/src/graph/include/strided_slice_inst.h index 8bd2c5e437570d..ea989c12563b1e 100644 --- a/src/plugins/intel_gpu/src/graph/include/strided_slice_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/strided_slice_inst.h @@ -30,6 +30,7 @@ using strided_slice_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/include/tile_inst.h b/src/plugins/intel_gpu/src/graph/include/tile_inst.h index 3353209263e993..a8202178b2b6a3 100644 --- a/src/plugins/intel_gpu/src/graph/include/tile_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/tile_inst.h @@ -28,6 +28,7 @@ using tile_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: template diff --git a/src/plugins/intel_gpu/src/graph/input_layout.cpp b/src/plugins/intel_gpu/src/graph/input_layout.cpp index 884eecef029e60..a755695871b3da 100644 --- a/src/plugins/intel_gpu/src/graph/input_layout.cpp +++ b/src/plugins/intel_gpu/src/graph/input_layout.cpp @@ -41,9 +41,7 @@ input_layout_inst::typed_primitive_inst(network& network, input_layout_node cons } void input_layout_inst::set_data(memory::ptr mem) { - auto ol = node->get_output_layout(); - - check_memory_to_set(*mem, ol); + check_memory_to_set(*mem, _impl_params->output_layout); if (mem->is_allocated_by(get_network().get_engine())) { OPENVINO_ASSERT(!_outputs.empty(), "[GPU] Can't set data for empty input memory"); diff --git a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp index c4cf7757d8605c..0c22e28e37457f 100644 --- a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp @@ -8,6 +8,9 @@ #include "kernel_selector_params.h" #include "to_string_utils.h" #include "program_node.h" +#include "serialization/layout_serializer.hpp" +#include "serialization/string_serializer.hpp" +#include "serialization/vector_serializer.hpp" #include #include @@ -1039,3 +1042,43 @@ void set_optional_params(const program& program, kernel_selector::optional_param params.tuningParams.mode = to_tuning_mode(tuning_config->config.mode); params.tuningParams.cacheFilePath = tuning_config->config.cache_file_path; } + +void kernel_impl_params::save(BinaryOutputBuffer& ob) const { + ob << has_runtime_layouts; + ob << unique_id; + ob << input_layouts; + ob << output_layout; + ob << primary_input_idx; + ob << fused_desc.size(); +#ifdef ENABLE_ONEDNN_FOR_GPU + size_t num_fused_prims = fused_desc_onednn.size(); + ob << num_fused_prims; + for (auto fused_prim : fused_desc_onednn) { + ob << make_data(&fused_prim, sizeof(fused_primitive_desc_onednn)); + } +#endif // ENABLE_ONEDNN_FOR_GPU +} + +void kernel_impl_params::load(BinaryInputBuffer& ib) { + ib >> has_runtime_layouts; + ib >> unique_id; + ib >> input_layouts; + ib >> output_layout; + ib >> primary_input_idx; + { + // Fake fused_desc just for has_fused_primitives() + size_t num_fused_desc; + ib >> num_fused_desc; + if (num_fused_desc > 0) { + fused_desc.emplace_back(cldnn::fused_primitive_desc(nullptr)); + } + } +#ifdef ENABLE_ONEDNN_FOR_GPU + size_t num_fused_prims; + ib >> num_fused_prims; + fused_desc_onednn.resize(num_fused_prims); + for (size_t idx = 0; idx < num_fused_prims; ++idx) { + ib >> make_data(&fused_desc_onednn[idx], sizeof(fused_primitive_desc_onednn)); + } +#endif // ENABLE_ONEDNN_FOR_GPU +} diff --git a/src/plugins/intel_gpu/src/graph/mutable_data.cpp b/src/plugins/intel_gpu/src/graph/mutable_data.cpp index 296977ac4e6377..cf3e5a763e8145 100644 --- a/src/plugins/intel_gpu/src/graph/mutable_data.cpp +++ b/src/plugins/intel_gpu/src/graph/mutable_data.cpp @@ -79,4 +79,33 @@ void mutable_data_inst::set_output_memory(memory::ptr mem_new, bool check, size_ mutable_data_inst::typed_primitive_inst(network& network, mutable_data_node const& node) : parent(network, node, attach_or_copy_data(network, node.get_attached_memory_ptr(), network.is_primary_stream())) {} +void mutable_data_inst::save(cldnn::BinaryOutputBuffer& ob) const { + parent::save(ob); + + if (!_mem_allocated) { + for (size_t dep_idx = 0; dep_idx < _deps.size(); ++dep_idx) { + for (size_t m_idx = 0; m_idx < _deps[dep_idx]->_deps.size(); ++m_idx) { + if (get_network().get_engine().is_the_same_buffer(*_outputs[0], *_deps[dep_idx]->_deps[m_idx]->_outputs[0])) { + ob << true << dep_idx << m_idx; + return; + } + } + } + } + ob << false; +} + +void mutable_data_inst::load(cldnn::BinaryInputBuffer& ib) { + parent::load(ib); + + bool from_dep; + ib >> from_dep; + if (from_dep && !_mem_allocated) { + size_t dep_idx, m_idx; + ib >> dep_idx >> m_idx; + + auto prev_node = get_network().get_primitive(_dep_ids[dep_idx]); + _outputs[0] = get_network().get_primitive(prev_node->_dep_ids[m_idx])->output_memory_ptr(); + } +} } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 665ebcfc18a33e..4f7e35f83552eb 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -32,6 +32,7 @@ #include "program_helpers.h" #include "runtime/cldnn_itt.hpp" #include "kernels_cache.hpp" +#include "serialization/map_serializer.hpp" #include #include @@ -42,6 +43,7 @@ #include #include #include +#include #ifdef GPU_DEBUG_CONFIG #include @@ -321,6 +323,101 @@ network::network(program::ptr program, uint16_t stream_id) network::network(program::ptr program, stream::ptr stream, uint16_t stream_id) : network(program, stream, false, stream_id == 0) {} +network::network(cldnn::BinaryInputBuffer& ib, stream::ptr stream, engine& engine, uint16_t stream_id) + : _program(nullptr) + , _engine(ib.get_engine()) + , _stream(stream) + , _memory_pool(new memory_pool(engine)) + , _internal(false) + , _is_primary_stream(false) + , _reset_arguments(true) { + net_id += 1; + + uint32_t prog_id; + std::vector batch_header_str; + ib >> prog_id; + ib >> batch_header_str; + kernels_cache kernels_cache(get_engine(), prog_id, batch_header_str); + ib >> kernels_cache; + + int num_data_nodes; + ib >> num_data_nodes; + + _memory_pool->clear_pool_for_network(net_id); + + for (int i = 0; i < num_data_nodes; ++i) { + std::string type; + std::string _primitive_id; + ib >> type >> _primitive_id; + std::shared_ptr new_primitive_inst = cldnn::get_type_id(type)->create_instance(*this); + ib >> *new_primitive_inst; + _primitives[_primitive_id] = new_primitive_inst; + } + + int exec_order_size; + ib >> exec_order_size; + _exec_order.clear(); + + std::vector _exec_order_types; + _exec_order_types.resize(exec_order_size); + + for (auto& type : _exec_order_types) { + ib >> type; + std::shared_ptr new_primitive_inst = cldnn::get_type_id(type)->create_instance(*this); + _exec_order.emplace_back(new_primitive_inst); + } + + _outputs.clear(); + _output_chains.clear(); + + for (const auto& p_inst : _exec_order) { + ib >> *p_inst; + _primitives[p_inst->id()] = p_inst; + if (p_inst->is_input()) + _inputs.push_back(p_inst); + if (p_inst->is_output()) + _outputs.push_back(p_inst); + + p_inst->init_kernels(kernels_cache); + } + + for (auto p_inst : _exec_order) { + p_inst->rebuild_deps(_primitives); + p_inst->rebuild_exec_deps(_exec_order); + + if (p_inst->type() == cldnn::concatenation::type_id() && p_inst->can_be_optimized()) { + // implicit concat + std::list>*> stack = {&p_inst->dependencies()}; + while (!stack.empty()) { + auto nodes_list = stack.front(); + stack.pop_front(); + + for (auto processed_node : *nodes_list) { + auto dep_node = _primitives[processed_node->id()]; + dep_node->set_output_memory(p_inst->output_memory_ptr(), false); + if (processed_node->type() == concatenation::type_id() && processed_node->can_be_optimized()) { + if (!processed_node->dependencies().empty()) + stack.push_back(&processed_node->dependencies()); + } + } + } + } + } + + std::map reuse_map; + ib >> reuse_map; + + for (auto reuse_pair : reuse_map) { + auto& eltw_inst = _primitives.at(reuse_pair.second); + auto& prim_inst = _primitives.at(reuse_pair.first); + auto& eltw_mem = eltw_inst->output_memory(); + auto new_mem = eltw_mem.get_engine()->reinterpret_buffer(eltw_mem, prim_inst->output_memory_ptr()->get_layout()); + prim_inst->set_output_memory(new_mem); + } + + add_default_output_chains(); +} + network::~network() { _memory_pool->clear_pool_for_network(net_id); GPU_DEBUG_GET_INSTANCE(debug_config); @@ -329,6 +426,64 @@ network::~network() { } } +void network::save(cldnn::BinaryOutputBuffer& ob) { + ob << _program->get_kernels_cache(); + + int num_data_nodes = 0; + for (const auto& p_inst : _primitives) { + if (p_inst.second->type() == cldnn::data::type_id() || + (p_inst.second->type() == cldnn::mutable_data::type_id() && p_inst.second->get_impl() == nullptr)) { + num_data_nodes += 1; + } + } + ob << num_data_nodes; + + for (const auto& p_inst : _primitives) { + if (p_inst.second->type() == cldnn::data::type_id() || + (p_inst.second->type() == cldnn::mutable_data::type_id() && p_inst.second->get_impl() == nullptr)) { + ob << p_inst.second->get_node().get_primitive()->type_string(); + ob << p_inst.second->id(); + ob << *(p_inst.second); + } + } + + int exec_order_size; + exec_order_size = _exec_order.size(); + ob << exec_order_size; + + for (const auto& p_inst : _exec_order) { + ob << p_inst->get_node().get_primitive()->type_string(); + } + + for (const auto& p_inst : _exec_order) { + ob << *p_inst; + } + + std::map reuse_map; + + auto& po = _program->get_processing_order(); + for (auto const& node : po) { + if (node->get_preferred_impl_type() == impl_types::onednn) { + size_t eltw_dep = 0; + for (auto& fused_op : node->get_fused_primitives()) { + if (fused_op.is_type() && fused_op.deps.size() == 1) { + // If it is first sum, reuse the buffer + auto fusing_type = onednn_add_fusing_helpers::get_add_fusing_type(*node, fused_op); + if (fusing_type != add_fusing_type::sum || eltw_dep != 0) + continue; + eltw_dep = fused_op.dep_start_idx; + auto& eltw_in = node->get_dependency(eltw_dep); + if (_primitives.find(eltw_in.id()) != _primitives.end() && _primitives.find(node->id()) != _primitives.end()) { + reuse_map[node->id()] = eltw_in.id(); + } + } + } + } + } + + ob << reuse_map; +} + network::ptr network::allocate_network(stream::ptr stream, program::ptr program, bool is_internal, bool is_primary_stream) { return std::make_shared(program, stream, is_internal, is_primary_stream); } @@ -508,8 +663,10 @@ void network::set_output_memory(const primitive_id& id, memory::ptr mem_new) { for (auto& prim : o_iter->second) { prim->set_output_memory(eng.reinterpret_buffer(*mem_new, prim->output_memory().get_layout()), false); - if (!_reset_arguments && - (!prim->get_node().is_type() && !(prim->get_node().is_type() && prim->get_node().get_dependencies().empty()))) { + // [TODO] + // if (!_reset_arguments && + // (!prim->get_node().is_type() && !(prim->get_node().is_type() && prim->get_node().get_dependencies().empty()))) { + if (!_reset_arguments) { prim->set_arguments(); } } @@ -770,6 +927,7 @@ void network::execute_impl(const std::vector& events) { auto store_events = get_stream().get_queue_type() == queue_types::out_of_order || get_engine().configuration().enable_profiling; if (store_events) { + if (_program != nullptr) { for (auto& inst : _program->get_processing_order()) { // Special handling for mutable data. The event should be the same as the user or dependency with highest // processing_num as the mutable_data can be updated when is both user or dependency. @@ -794,6 +952,7 @@ void network::execute_impl(const std::vector& events) { } } } + } for (auto& dout : _data_outputs) { // data primitives are not executed so if they are marked as output we need to add // them valid events manually diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 2564ae6651bf2a..361fa86d5151d7 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -24,6 +24,7 @@ #include "intel_gpu/runtime/error_handler.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" +#include "serialization/set_serializer.hpp" #include "json_object.h" #include #include @@ -425,6 +426,9 @@ void primitive_inst::set_arguments() { } void primitive_inst::build_deps() { + if (_node == nullptr) + return; + if (_deps.empty() && !_node->get_dependencies().empty()) { _deps = _network.get_primitives(_node->get_dependencies()); _exec_deps = build_exec_deps(_deps); @@ -434,6 +438,47 @@ void primitive_inst::build_deps() { } } +void primitive_inst::rebuild_deps( + std::unordered_map> const& primitives) { + + _deps.resize(_dep_ids.size()); + for (size_t i = 0; i < _dep_ids.size(); i++) { + if (primitives.count(_dep_ids[i]) > 0) { + _deps[i] = primitives.at(_dep_ids[i]); + } else { + std::cout << _dep_ids[i] << " is not found in _primitives" << std::endl; + } + } +} + +void primitive_inst::rebuild_exec_deps( + std::list> const& primitives) { + + _exec_deps.resize(_exec_dep_ids.size()); + for (size_t i = 0; i < _exec_dep_ids.size(); i++) { + bool found = false; + for (auto& prim_inst : primitives) { + if (prim_inst->id().compare(_exec_dep_ids[i]) == 0) { + _exec_deps[i] = prim_inst; + found = true; + break; + } + } + if (found == false) { + std::cout << "not found in _exec_order" << std::endl; + } + } +} + +primitive_inst::primitive_inst(network& network) + : _network(network) + , _node(nullptr) + , _impl_params(nullptr) + , _impl(nullptr) + , _outputs({memory::ptr()}) + , _output_changed(false) + , _mem_allocated(false) {} + primitive_inst::primitive_inst(network& network, program_node const& node, bool allocate_memory) : _network(network) , _node(&node) @@ -904,4 +949,219 @@ std::string primitive_inst::get_implementation_name() const { return "undef"; } +void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const { + if (type() == cldnn::data::type_id() || + (type() == cldnn::mutable_data::type_id() && _impl == nullptr)) { + object_type _object_type = object_type::DATA_INST; + ob << cldnn::make_data(&_object_type, sizeof(object_type)); + ob << _node->get_primitive()->type_string(); + _impl_params->save(ob); + ob << _outputs[0]->get_layout(); + + const auto _allocation_type = _outputs[0]->get_allocation_type(); + ob << make_data(&_allocation_type, sizeof(_allocation_type)); + + size_t data_size = _outputs[0]->size(); + ob << cldnn::make_data(&data_size, sizeof(size_t)); + + if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { + ob << cldnn::make_data(_outputs[0]->buffer_ptr(), data_size); + } else { + mem_lock lock{_outputs[0], get_node().get_program().get_stream()}; + ob << cldnn::make_data(lock.begin(), data_size); + } + } else { + object_type _object_type = object_type::EXECUTABLE_INST; + ob << cldnn::make_data(&_object_type, sizeof(object_type)); + + kernel_arguments_data args = _impl->get_arguments(*this); + kernel_arguments_data_idx args_idx; + convert_args(args, args_idx); + _impl->set_arguments(args_idx); + + ob << _impl; + _impl_params->save(ob); + + ob << _node_output_layout; + ob << has_mutable_input(); + ob << mem_allocated(); + ob << is_dynamic(); + ob << _node->get_primitive()->type_string(); + ob << id(); + ob << org_id(); + ob << is_input(); + ob << is_output(); + ob << inputs_memory_count(); + ob << outputs_memory_count(); + ob << get_fused_mem_count(); + ob << get_fused_mem_offset(); + ob << can_be_optimized(); + ob << can_share_buffer(); + ob << is_constant(); + + ob << _outputs[0]->get_layout(); + const auto _allocation_type = _outputs[0]->get_allocation_type(); + ob << make_data(&_allocation_type, sizeof(_allocation_type)); + + ob << _node->get_memory_dependencies(); + + ob << _deps.size(); + for (const auto& dep : _deps) { + ob << dep->id(); + } + + ob << _exec_deps.size(); + for (const auto& dep : _exec_deps) { + ob << dep->id(); + } + } +} + +void primitive_inst::convert_args(const kernel_arguments_data& args, kernel_arguments_data_idx& args_idx) const { + if (args.inputs.size() > 0) { + args_idx.inputs.resize(args.inputs.size()); + for (uint32_t idx = 0; idx < args.inputs.size(); ++idx) { + args_idx.inputs[idx] = get_index_in_deps(args.inputs[idx]); + } + } + + if (args.intermediates.size() > 0) { + args_idx.intermediates.resize(args.intermediates.size()); + for (uint32_t idx = 0; idx < args.intermediates.size(); ++idx) { + args_idx.intermediates[idx] = get_index_in_deps(args.intermediates[idx]); + } + } + + args_idx.weights = (args.weights == nullptr) ? -1 : get_index_in_deps(args.weights); + args_idx.recurrent = (args.recurrent == nullptr) ? -1 : get_index_in_deps(args.recurrent); + args_idx.hidden = (args.hidden == nullptr) ? -1 : get_index_in_deps(args.hidden); + args_idx.cell = (args.cell == nullptr) ? -1 : get_index_in_deps(args.cell); + args_idx.bias = (args.bias == nullptr) ? -1 : get_index_in_deps(args.bias); + args_idx.weights_zero_points = (args.weights_zero_points == nullptr) ? -1 : get_index_in_deps(args.weights_zero_points); + args_idx.activations_zero_points = (args.activations_zero_points == nullptr) ? -1 : get_index_in_deps(args.activations_zero_points); + args_idx.compensation = (args.compensation == nullptr) ? -1 : get_index_in_deps(args.compensation); + args_idx.lookup_table = (args.lookup_table == nullptr) ? -1 : get_index_in_deps(args.lookup_table); + args_idx.scale_table = (args.scale_table == nullptr) ? -1 : get_index_in_deps(args.scale_table); + args_idx.slope = (args.slope == nullptr) ? -1 : get_index_in_deps(args.slope); + + if (args.fused_op_inputs.size() > 0) { + args_idx.fused_op_inputs.resize(args.fused_op_inputs.size()); + for (uint32_t idx = 0; idx < args.fused_op_inputs.size(); ++idx) { + args_idx.fused_op_inputs[idx] = get_index_in_deps(args.fused_op_inputs[idx]); + } + } + + args_idx.split = args.split; +} + +int32_t primitive_inst::get_index_in_deps(memory::cptr arg) const { + uint32_t idx = 0; + + for (idx = 0; idx < _deps.size(); ++idx) { + if (arg == _deps[idx]->_outputs[0]) + break; + } + + if (idx == _deps.size()) + std::cout << "[get_index_in_deps]: not found" << std::endl; + + return (idx == _deps.size()) ? -1 : idx; +} + +void primitive_inst::load(cldnn::BinaryInputBuffer& ib) { + object_type _object_type; + ib >> make_data(&_object_type, sizeof(object_type)); + + if (_object_type == object_type::DATA_INST) { + std::string type_str; + ib >> type_str; + _type = get_type_id(type_str); + + _impl_params.release(); + _impl_params = make_unique(); + _impl_params->load(ib); + + layout output_layout = layout(cldnn::data_types::bin, cldnn::format::any, cldnn::tensor()); + ib >> output_layout; + + allocation_type _allocation_type; + ib >> make_data(&_allocation_type, sizeof(_allocation_type)); + + size_t data_size; // = _output->size(); + ib >> cldnn::make_data(&data_size, sizeof(size_t)); + _outputs[0] = get_network().get_memory_pool().get_memory(output_layout, _allocation_type, false); + + if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { + ib >> cldnn::make_data(_outputs[0]->buffer_ptr(), data_size); + } else { + char *_buf = new char[data_size]; + ib >> cldnn::make_data(_buf, data_size); + _outputs[0]->copy_from(get_network().get_stream(), _buf); + delete[] _buf; + } + } else if (_object_type == object_type::EXECUTABLE_INST) { + // primitive_impl + _impl.release(); + ib >> _impl; + _impl_params.release(); + _impl_params = make_unique(); + _impl_params->load(ib); + + ib >> _node_output_layout; + ib >> _has_mutable_input; + ib >> _mem_allocated; + ib >> _is_dynamic; + std::string type_str; + ib >> type_str; + _type = get_type_id(type_str); + ib >> _id; + ib >> _org_id; + ib >> _is_input; + ib >> _is_output; + ib >> _inputs_memory_count; + ib >> _outputs_memory_count; + ib >> _fused_mem_count; + ib >> _fused_mem_offset; + ib >> _can_be_optimized; + ib >> _can_share_buffer; + ib >> _is_constant; + + // output memory + layout output_layout = layout(cldnn::data_types::bin, cldnn::format::any, cldnn::tensor()); + ib >> output_layout; + + allocation_type _allocation_type; + ib >> make_data(&_allocation_type, sizeof(_allocation_type)); + + std::set _node_mem_deps; + ib >> _node_mem_deps; + + size_t vector_size = 0UL; + ib >> vector_size; + _dep_ids.resize(vector_size); + for (auto& el : _dep_ids) { + ib >> el; + } + + ib >> vector_size; + _exec_dep_ids.resize(vector_size); + for (auto& el : _exec_dep_ids) { + ib >> el; + } + + _outputs[0] = nullptr; + if (!_mem_allocated) { + if (can_be_optimized()) + _outputs[0] = get_network().get_engine().reinterpret_buffer(get_network().get_primitive(_dep_ids[0])->output_memory(), output_layout); + } else { + if ((!can_share_buffer()) || can_be_optimized() || is_output()) { + _outputs[0] = get_network().get_engine().allocate_memory(output_layout, _allocation_type); + } else { + _outputs[0] = get_network().get_memory_pool().get_memory(output_layout, id(), get_network_id(), _node_mem_deps, _allocation_type, true); + } + } + _output_changed = false; + } +} + } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 3eb99669c83b09..881a8dc12acccf 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -762,7 +762,7 @@ void program::cleanup() { } } } - _kernels_cache->reset(); + // _kernels_cache->reset(); } void program::add_split_outputs() { diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp index f618e6706369a6..56bb828751ca5a 100644 --- a/src/plugins/intel_gpu/src/graph/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/reorder.cpp @@ -200,7 +200,8 @@ std::string reorder_inst::to_string(reorder_node const& node) { } reorder_inst::typed_primitive_inst(network& network, reorder_node const& node) - : parent(network, node, (!node.can_be_optimized() && node.get_output_layout().is_static()) ? true : false) { + : parent(network, node, (!node.can_be_optimized() && node.get_output_layout().is_static()) ? true : false) + , _req_reinterpr(node.requires_reinterpret()) { if (node.can_be_optimized()) reuse_input(); @@ -238,7 +239,7 @@ reorder_inst::typed_primitive_inst(network& network, reorder_node const& node) } void reorder_inst::on_execute() { - if (node->can_be_optimized()) + if (can_be_optimized()) reuse_input(); } @@ -247,7 +248,7 @@ void reorder_inst::reuse_input() { } void reorder_inst::update_output_memory() { - if (!node->can_be_optimized()) + if (!can_be_optimized()) return; if (static_cast(_outputs[0]) && _network.get_engine().is_the_same_buffer(output_memory(), input_memory())) @@ -255,11 +256,20 @@ void reorder_inst::update_output_memory() { build_deps(); - if (node->requires_reinterpret()) { - _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), node->get_output_layout()); + if (requires_reinterpret()) { + _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), get_output_layout()); } else { _outputs[0] = input_memory_ptr(); } } +void reorder_inst::save(cldnn::BinaryOutputBuffer& ob) const { + parent::save(ob); + ob << _req_reinterpr; +} + +void reorder_inst::load(cldnn::BinaryInputBuffer& ib) { + parent::load(ib); + ib >> _req_reinterpr; +} } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 7cce4c131b5d26..6117260aebddf1 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -10,6 +10,8 @@ #include "intel_gpu/plugin/async_infer_request.hpp" #include "intel_gpu/plugin/async_infer_request_legacy.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" +#include "serialization/binary_buffer.hpp" +#include "serialization/string_serializer.hpp" #include #include @@ -64,6 +66,207 @@ CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_p } } +InferenceEngine::Layout layout_from_string(const std::string & name) { + static const std::unordered_map layouts = { + { "ANY", InferenceEngine::Layout::ANY }, + { "NCHW", InferenceEngine::Layout::NCHW }, + { "NHWC", InferenceEngine::Layout::NHWC }, + { "NCDHW", InferenceEngine::Layout::NCDHW }, + { "NDHWC", InferenceEngine::Layout::NDHWC }, + { "OIHW", InferenceEngine::Layout::OIHW }, + { "C", InferenceEngine::Layout::C }, + { "CHW", InferenceEngine::Layout::CHW }, + { "HWC", InferenceEngine::Layout::HWC }, + { "HW", InferenceEngine::Layout::HW }, + { "NC", InferenceEngine::Layout::NC }, + { "CN", InferenceEngine::Layout::CN }, + { "BLOCKED", InferenceEngine::Layout::BLOCKED } + }; + auto it = layouts.find(name); + if (it != layouts.end()) { + return it->second; + } + IE_THROW(NetworkNotRead) << "Unknown layout with name '" << name << "'"; +} + +CompiledModel::CompiledModel(std::istream& networkModel, std::shared_ptr context, Config config) : + InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]() -> InferenceEngine::ITaskExecutor::Ptr { + if (config.exclusiveAsyncRequests) { + //exclusiveAsyncRequests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior + return executorManager()->getExecutor("GPU"); + } else if (config.throughput_streams > 1) { + return std::make_shared( + IStreamsExecutor::Config{"Intel GPU plugin executor", config.throughput_streams}); + } else { + return std::make_shared( + IStreamsExecutor::Config{"Intel GPU plugin executor", 1}); + } + }()}, + m_config(config), + m_taskExecutor{ _taskExecutor }, + m_waitExecutor(executorManager()->getIdleCPUStreamsExecutor({ "GPUWaitExecutor" })) { + auto casted_context = std::dynamic_pointer_cast(context); + + if (nullptr == casted_context) { + IE_THROW() << "Invalid remote context"; + } + + m_context = casted_context; + + cldnn::BinaryInputBuffer ib(networkModel, *getContextImpl(m_context)->GetEngine()); + + // InputsInfo and OutputsInfor for CNNNetwork + { + size_t inputSize; + ib >> inputSize; + + InputsDataMap inputs; + + for (size_t idx = 0; idx < inputSize; ++idx) { + std::string name; + std::string precision; + std::string layout; + ib >> name; + ib >> precision; + ib >> layout; + + DataPtr input = std::make_shared(name, Precision::FromStr(precision), layout_from_string(layout)); + InputInfo::Ptr infoNew = std::make_shared(); + infoNew->setInputData(input); + inputs.emplace(std::make_pair(name, infoNew)); + } + + size_t outputSize; + ib >> outputSize; + + OutputsDataMap outputs; + + for (size_t idx = 0; idx < outputSize; ++idx) { + std::string name; + std::string precision; + std::string layout; + ib >> name; + ib >> precision; + ib >> layout; + + DataPtr output = std::make_shared(name, Precision::FromStr(precision), layout_from_string(layout)); + outputs.emplace(std::make_pair(name, output)); + } + + setNetworkInputs(inputs); + setNetworkOutputs(outputs); + } + + { + std::vector> new_params; + size_t num_params; + ib >> num_params; + + for (size_t idx = 0; idx < num_params; ++idx) { + std::string param_name; + ib >> param_name; + ov::element::Type param_element_type; + std::string str_element_type; + ib >> str_element_type; + std::stringstream oss(str_element_type); + oss >> param_element_type; + ov::Shape param_shape; + size_t shape_size; + ib >> shape_size; + param_shape.resize(shape_size); + for (size_t i = 0; i < shape_size; ++i) { + size_t dim; + ib >> dim; + param_shape[i] = dim; + } + std::string str_layout; + ib >> str_layout; + ov::Layout param_layout(str_layout); + std::unordered_set param_names; + size_t num_names; + ib >> num_names; + for (size_t i = 0; i < num_names; ++i) { + std::string name; + ib >> name; + param_names.emplace(name); + } + + auto new_param = std::make_shared(param_element_type, param_shape); + new_param->set_friendly_name(param_name); + new_param->set_element_type(param_element_type); + new_param->set_layout(param_layout); + // hoho->output(0).get_rt_info() = param_rt_info; + new_param->output(0).get_tensor().set_names(param_names); + new_param->validate_and_infer_types(); + new_params.emplace_back(new_param); + } + + setInputs(new_params); + } + + { + std::vector> new_results; + size_t num_results; + ib >> num_results; + + for (size_t idx = 0; idx < num_results; ++idx) { + ov::element::Type fake_element_type; + std::string str_element_type; + ib >> str_element_type; + std::stringstream oss(str_element_type); + oss >> fake_element_type; + + ov::Shape fake_shape; + size_t shape_size; + ib >> shape_size; + fake_shape.resize(shape_size); + for (size_t i = 0; i < shape_size; ++i) { + size_t dim; + ib >> dim; + fake_shape[i] = dim; + } + + std::string fake_name; + ib >> fake_name; + + std::string param_name; + ib >> param_name; + + std::string str_layout; + ib >> str_layout; + ov::Layout param_layout(str_layout); + + std::unordered_set param_names; + size_t num_names; + ib >> num_names; + for (size_t i = 0; i < num_names; ++i) { + std::string name; + ib >> name; + param_names.emplace(name); + } + + auto fake_param = std::make_shared(fake_element_type, fake_shape); + fake_param->set_friendly_name(fake_name); + fake_param->validate_and_infer_types(); + + auto new_result = std::make_shared(fake_param); + new_result->set_friendly_name(param_name); + new_result->set_layout(param_layout); + new_result->output(0).get_tensor().set_names(param_names); + new_result->validate_and_infer_types(); + new_results.emplace_back(new_result); + } + + setOutputs(new_results); + } + + auto graph_base = std::make_shared(ib, m_context, m_config, 0); + for (uint16_t n = 0; n < m_config.throughput_streams; n++) { + auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); + m_graphs.push_back(graph); + } +} + template IInferRequestInternal::Ptr CompiledModel::GetInferRequestImpl(const std::vector>& inputs, const std::vector>& outputs) { @@ -144,6 +347,121 @@ IInferRequestInternal::Ptr CompiledModel::CreateInferRequest() { _callbackExecutor); } +bool CompiledModel::isSerializable() { + // Model with multiple graphs is not yet supported. + if (m_graphs.size() != 1) + return false; + + // Dynamic model serialization is not yet supported. + if (m_graphs[0]->GetNetwork()->is_dynamic()) + return false; + + return true; +} + +void CompiledModel::Export(std::ostream& networkModel) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::Export"); + if (m_graphs.empty()) + IE_THROW(NetworkNotLoaded); + + if (!isSerializable()) + return; + + cldnn::BinaryOutputBuffer ob(networkModel); + + // InputsInfo and OutputsInfo for CNNNetwork + { + ob << GetInputsInfo().size(); + + for (const auto & in : GetInputsInfo()) { + ob << in.first; + std::string precision(in.second->getPrecision().name()); + ob << precision; + std::stringstream ss; + ss << in.second->getInputData()->getLayout(); + ob << ss.str(); + } + + ob << GetOutputsInfo().size(); + + for (const auto & out : GetOutputsInfo()) { + ob << out.first; + std::string precision(out.second->getPrecision().name()); + ob << precision; + std::stringstream ss; + ss << out.second->getLayout(); + ob << ss.str(); + } + } + + // Inputs + { + std::vector> const_params = getInputs(); + ob << const_params.size(); + + for (const auto& param : const_params) { + auto new_param = ov::as_type_ptr(param); + std::string param_name = new_param->get_friendly_name(); + ov::element::Type param_element_type = new_param->get_element_type(); + ov::PartialShape param_shape = new_param->get_partial_shape(); + ov::Layout param_layout = new_param->get_layout(); + // ov::RTMap param_rt_info = new_param->output(0).get_rt_info(); + auto param_names = new_param->output(0).get_tensor().get_names(); + + ob << param_name; + std::stringstream ss; + ss << param_element_type; + ob << ss.str(); + ov::Shape static_shape = param_shape.get_shape(); + ob << static_shape.size(); + for (size_t dim : static_shape) { + ob << dim; + } + ob << param_layout.to_string(); + ob << param_names.size(); + for (auto name : param_names) { + ob << name; + } + } + } + + // Outputs + { + std::vector> const_results = getOutputs(); + ob << const_results.size(); + + for (const auto& param : const_results) { + auto new_param = ov::as_type_ptr(param); + + ov::element::Type fake_element_type = new_param->get_input_element_type(0); + ov::PartialShape fake_shape = new_param->get_input_partial_shape(0); + std::string fake_name = new_param->get_input_node_ptr(0)->get_friendly_name(); + + std::string param_name = new_param->get_friendly_name(); + ov::Layout param_layout = new_param->get_layout(); + auto param_names = new_param->output(0).get_tensor().get_names(); + + std::stringstream ss; + ss << fake_element_type; + ob << ss.str(); + ov::Shape static_shape = fake_shape.get_shape(); + ob << static_shape.size(); + for (size_t dim : static_shape) { + ob << dim; + } + ob << fake_name; + ob << param_name; + ob << param_layout.to_string(); + ob << param_names.size(); + for (auto name : param_names) { + ob << name; + } + } + } + + return m_graphs.front()->Export(ob); +} + std::shared_ptr CompiledModel::GetExecGraphInfo() { if (m_graphs.empty()) IE_THROW(NetworkNotLoaded); diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 2275ed515258ca..ba88004c3b7cc6 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -11,6 +11,11 @@ #include #include "intel_gpu/plugin/infer_request.hpp" #include "intel_gpu/plugin/itt.hpp" +#include "serialization/binary_buffer.hpp" +#include "serialization/map_serializer.hpp" +#include "serialization/layout_serializer.hpp" +#include "serialization/string_serializer.hpp" +#include "serialization/vector_serializer.hpp" #include #include @@ -53,6 +58,22 @@ Graph::Graph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr context, Build(); } +Graph::Graph(cldnn::BinaryInputBuffer &ib, gpu::ClContext::Ptr context, Config config, uint16_t stream_id) + : m_context(context) + , m_config(config) + , m_stream_id(stream_id) + , m_state(0) { + m_program = std::make_shared(GetEngine(), m_config); + if (m_program->m_max_batch > 1) + m_config.max_dynamic_batch = m_program->m_max_batch; + + ib >> m_program->inputLayouts; + ib >> primitiveIDs; + ib >> outputDims; + + m_networks.emplace_back(std::make_shared(ib, GetEngine()->create_stream(), *GetEngine(), m_stream_id)); +} + Graph::Graph(std::shared_ptr graph, uint16_t stream_id) : m_context(graph->m_context) , m_program(graph->m_program) @@ -447,9 +468,19 @@ std::shared_ptr Graph::GetExecGraphInfoByPrimitivesInfo(std::v return std::make_shared(results, params, "runtime_gpu_graph"); } +void Graph::Export(cldnn::BinaryOutputBuffer &ob) { + ob << m_program->inputLayouts; + ob << primitiveIDs; + ob << outputDims; + + auto m_network = m_networks.back(); + + m_network->save(ob); +} + std::shared_ptr Graph::GetExecGraphInfo() { auto primitives_info = GetNetwork()->get_primitives_info(); - return GetExecGraphInfoByPrimitivesInfo(primitives_info, true); + return GetExecGraphInfoByPrimitivesInfo(primitives_info, false); } diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 0767499de32aab..bbec4eca94101d 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -442,6 +442,44 @@ QueryNetworkResult Plugin::QueryNetwork(const CNNNetwork& network, return res; } +InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(std::istream& networkModel, + const std::map& orig_config) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::ImportNetwork"); + Configs confs = _impl->m_configs; + std::string device_id = GetDeviceIDFromConfig(orig_config); + Config conf = confs.GetConfig(device_id); + + auto config = ConvertPerfHintsToConfig(orig_config, conf); + + RemoteCLContext::Ptr context; + + auto canReuseDefaultContext = [&]() -> bool { + if (m_defaultContexts.find(conf.device_id) == m_defaultContexts.end()) + return false; + + return m_defaultContexts.at(conf.device_id)->GetConfig().CanShareContextWith(conf); + }; + + { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::ImportNetwork::CreateContext"); + std::lock_guard lock(engine_mutex); + if (!canReuseDefaultContext()) { + context = std::make_shared(shared_from_this(), AnyMap(), conf); + m_defaultContexts[conf.device_id] = context; + } + } + + context = m_defaultContexts[conf.device_id]; + + { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::ImportNetwork::CreateExeNetwork"); + CompiledModel::Ptr exeNetwork = std::make_shared(networkModel, context, conf); + exeNetwork->SetPointerToPlugin(shared_from_this()); + UpdateStatistics(context); + return exeNetwork; + } +} + Parameter Plugin::GetConfig(const std::string& name, const std::map& options) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::GetConfig"); Parameter result; @@ -575,6 +613,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map(max_batch_size)}; + } else if (name == METRIC_KEY(IMPORT_EXPORT_SUPPORT)) { + IE_SET_METRIC_RETURN(IMPORT_EXPORT_SUPPORT, true); } else { IE_THROW() << "Unsupported metric key " << name; } diff --git a/src/plugins/intel_gpu/src/runtime/CMakeLists.txt b/src/plugins/intel_gpu/src/runtime/CMakeLists.txt index cbe9760953b708..294f6ad0fef2fb 100644 --- a/src/plugins/intel_gpu/src/runtime/CMakeLists.txt +++ b/src/plugins/intel_gpu/src/runtime/CMakeLists.txt @@ -29,8 +29,9 @@ endif() target_include_directories(${TARGET_NAME} PUBLIC $ - $) - + $ + $) + target_compile_options(${TARGET_NAME} PRIVATE $<$:$,/Os,-Os>>) diff --git a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp index 5a6b8ab563f992..fa674b0f0db4a5 100644 --- a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp @@ -9,6 +9,10 @@ #include "ocl/ocl_common.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" #include "openvino/util/file_util.hpp" +#include "serialization/set_serializer.hpp" +#include "serialization/vector_serializer.hpp" +#include "serialization/map_serializer.hpp" +#include "serialization/string_serializer.hpp" #include #include @@ -466,4 +470,96 @@ void kernels_cache::compile() { malloc_trim(0); #endif } +void kernels_cache::save(BinaryOutputBuffer& ob) const { + OPENVINO_ASSERT(_engine.type() == engine_types::ocl, "[GPU] not supported engine type"); + + ob << _prog_id; + ob << batch_header_str; + + std::map entry_point_to_id; + for (auto iter = _kernels.begin(); iter != _kernels.end(); iter++) { + std::string k_id = iter->first; + kernel::ptr kernel = iter->second; + + auto ocl_kernel = std::static_pointer_cast(kernel); + const auto& entry_point = ocl_kernel->get_handle().getInfo(); + + entry_point_to_id[entry_point] = k_id; + } + ob << entry_point_to_id; + + std::unique_ptr build_engine = + cldnn::make_unique(_engine.get_device(), runtime_types::ocl, _engine.configuration(), _engine.get_task_executor()); + + std::vector> precompiled_kernels; + + for (auto iter = _kernels.begin(); iter != _kernels.end(); iter++) { + kernel::ptr kernel = iter->second; + auto ocl_kernel = std::static_pointer_cast(kernel); + auto program = ocl_kernel->get_handle().getInfo(); + const auto& entry_point = ocl_kernel->get_handle().getInfo(); + const auto& k_id = entry_point_to_id.find(entry_point); + + if (k_id != entry_point_to_id.end()) { + cl::Program::Binaries binary_kernels = {getProgramBinaries(program)}; + + try { + cl::vector kernels; + cl::Program programs(build_engine->get_cl_context(), {build_engine->get_cl_device()}, binary_kernels); + programs.build(build_engine->get_cl_device()); + programs.createKernels(&kernels); + + for (auto& k : kernels) { + const auto& entry_point = k.getInfo(); + entry_point_to_id.erase(entry_point); + } + + precompiled_kernels.push_back(std::move(binary_kernels[0])); + } catch (const cl::BuildError& err) { + std::cout << "+++++ OpenCL build error" << std::endl; + } + } + } + ob << precompiled_kernels; +} + +void kernels_cache::load(BinaryInputBuffer& ib) { + OPENVINO_ASSERT(_engine.type() == engine_types::ocl, "[GPU] not supported engine type"); + + std::unique_ptr build_engine = + cldnn::make_unique(_engine.get_device(), runtime_types::ocl, _engine.configuration(), _engine.get_task_executor()); + + std::map entry_point_to_id; + std::vector> precompiled_kernels; + ib >> entry_point_to_id; + ib >> precompiled_kernels; + + try { + std::lock_guard lock(_mutex); + _kernels.clear(); + + for (auto& binary_kernels : precompiled_kernels) { + cl::vector kernels; + cl::Program program(build_engine->get_cl_context(), {build_engine->get_cl_device()}, {binary_kernels}); + program.build(build_engine->get_cl_device()); + program.createKernels(&kernels); + + for (auto& k : kernels) { + const auto& entry_point = k.getInfo(); + const auto& k_id = entry_point_to_id.find(entry_point); + if (k_id != entry_point_to_id.end()) { + cl_kernel cl_kernel = k.get(); + cl_context cl_context = build_engine->get_cl_context().get(); + kernel::ptr kernel = kernels_factory::create(_engine, cl_context, cl_kernel, entry_point); + _kernels.insert({k_id->second, kernel}); + } else { + throw std::runtime_error("Could not find entry point"); + } + } + } + } catch (const cl::BuildError& err) { + std::cout << "+++++ OpenCL build error" << std::endl; + } +} + } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp b/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp index cf47c68258705c..1aea4775a8edc5 100644 --- a/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp +++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp @@ -16,6 +16,9 @@ #include #include +#include "kernels_factory.hpp" +#include "ocl/ocl_engine.hpp" +#include "serialization/binary_buffer.hpp" namespace cldnn { class kernels_cache { @@ -106,6 +109,8 @@ class kernels_cache { } std::vector add_kernels_source(std::vector> kernel_sources, bool dump_custom_program = false); void compile(); + void save(BinaryOutputBuffer& ob) const; + void load(BinaryInputBuffer& ib); }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp index 513e9429b3d2f9..5bf970f22c7073 100644 --- a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp +++ b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp @@ -24,8 +24,8 @@ memory_record::memory_record(memory_set users, allocation_type type) : _users(users), _memory(memory), _network_id(net_id), _type(type) {} -memory::ptr memory_pool::alloc_memory(const layout& layout, allocation_type type) { - return _engine->allocate_memory(layout, type); +memory::ptr memory_pool::alloc_memory(const layout& layout, allocation_type type, bool reset) { + return _engine->allocate_memory(layout, type, reset); } memory_pool::~memory_pool() {} @@ -218,8 +218,8 @@ memory::ptr memory_pool::get_from_across_networks_pool(const layout& layout, return mem; } -memory::ptr memory_pool::get_memory(const layout& layout, allocation_type type) { - return alloc_memory(layout, type); +memory::ptr memory_pool::get_memory(const layout& layout, allocation_type type, bool reset) { + return alloc_memory(layout, type, reset); } memory::ptr memory_pool::get_memory(const layout& layout, From 5f1d5c4adba6258f748f9c8f8ff935ee9abd0846 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Thu, 3 Nov 2022 02:15:16 +0900 Subject: [PATCH 02/26] fix to rebase --- .../intel_gpu/src/graph/impls/ocl/matrix_nms.cpp | 2 +- .../src/graph/impls/ocl/primitive_base.hpp | 13 +++++++------ .../intel_gpu/src/graph/include/matrix_nms_inst.h | 1 + src/plugins/intel_gpu/src/graph/network.cpp | 6 ++---- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/matrix_nms.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/matrix_nms.cpp index 23d1b42aa4ae14..636aa5fd9e40c5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/matrix_nms.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/matrix_nms.cpp @@ -46,7 +46,7 @@ struct matrix_nms_impl : typed_primitive_impl_ocl { } protected: - kernel_arguments_data get_arguments(matrix_nms_inst& instance, int32_t) const override { + kernel_arguments_data get_arguments(const matrix_nms_inst& instance, int32_t) const override { kernel_arguments_data args; args.inputs = {instance.input_boxes_mem(), instance.input_scores_mem(), diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp index cd538973ca49c7..d6697cf777947e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp @@ -166,7 +166,6 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { for (decltype(split) i = 0; i < split; i++) { kernel_arguments_data args; - // [TODO] get args from cache if (_kernel_args.inputs.size() > 0) { for (uint32_t i = 0; i < _kernel_args.inputs.size(); i++) { args.inputs.push_back(instance.dep_memory_ptr(_kernel_args.inputs[i])); @@ -256,14 +255,16 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { std::vector new_events; for (decltype(split) i = 0; i < split; i++) { // is any user of the prim's users is an detecion output, set prim as a output event (event won't be nullptr) - // [TODO] - // auto users = instance.node->get_users(); - // bool is_output_event = is_any_user_cpu(users) || instance.node->is_output(); - bool is_output_event = instance.is_output(); + bool is_output_event; + if (instance.node != nullptr) { + auto users = instance.node->get_users(); + is_output_event = is_any_user_cpu(users) || instance.node->is_output(); + } else { + is_output_event = instance.is_output(); + } kernel_arguments_data args; - // [TODO] get args from cache if (_kernel_args.inputs.size() > 0) { for (uint32_t i = 0; i < _kernel_args.inputs.size(); i++) { args.inputs.push_back(instance.dep_memory_ptr(_kernel_args.inputs[i])); diff --git a/src/plugins/intel_gpu/src/graph/include/matrix_nms_inst.h b/src/plugins/intel_gpu/src/graph/include/matrix_nms_inst.h index 15c400e282acac..4a97ffb8c09f56 100644 --- a/src/plugins/intel_gpu/src/graph/include/matrix_nms_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/matrix_nms_inst.h @@ -17,6 +17,7 @@ using matrix_nms_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: typed_primitive_inst(network& network, const matrix_nms_node& node) : parent(network, node) {} diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 4f7e35f83552eb..4617471033ccdc 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -663,10 +663,8 @@ void network::set_output_memory(const primitive_id& id, memory::ptr mem_new) { for (auto& prim : o_iter->second) { prim->set_output_memory(eng.reinterpret_buffer(*mem_new, prim->output_memory().get_layout()), false); - // [TODO] - // if (!_reset_arguments && - // (!prim->get_node().is_type() && !(prim->get_node().is_type() && prim->get_node().get_dependencies().empty()))) { - if (!_reset_arguments) { + if (!_reset_arguments && + (prim->type() != cldnn::data::type_id() && !(prim->type() == cldnn::mutable_data::type_id() && prim->dependencies().empty()))) { prim->set_arguments(); } } From 72dc32e4ac87946f024fd05d1623f7fc3c89b705 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Thu, 3 Nov 2022 11:58:26 +0900 Subject: [PATCH 03/26] onednn_gpu.patch for serialization --- .../intel_gpu/thirdparty/CMakeLists.txt | 2 + .../intel_gpu/thirdparty/onednn_gpu.patch | 49 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 src/plugins/intel_gpu/thirdparty/onednn_gpu.patch diff --git a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt index 0d25a3aa66cf68..cef26d50ecc684 100644 --- a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt +++ b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt @@ -20,6 +20,8 @@ if(ENABLE_ONEDNN_FOR_GPU) set(ONEDNN_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_build/") set(ONEDNN_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_install/") set(ONEDNN_PREFIX_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_root") + execute_process(COMMAND git apply --verbose ../onednn_gpu.patch + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu") if(CMAKE_COMPILER_IS_GNUCXX) ie_add_compiler_flags(-Wno-undef -Wno-suggest-override -Wno-missing-declarations) if(OV_COMPILER_IS_CONDA) diff --git a/src/plugins/intel_gpu/thirdparty/onednn_gpu.patch b/src/plugins/intel_gpu/thirdparty/onednn_gpu.patch new file mode 100644 index 00000000000000..1c04035c36aa10 --- /dev/null +++ b/src/plugins/intel_gpu/thirdparty/onednn_gpu.patch @@ -0,0 +1,49 @@ +diff --git a/include/oneapi/dnnl/dnnl.hpp b/include/oneapi/dnnl/dnnl.hpp +index 9114c0a5f..28907d98f 100644 +--- a/include/oneapi/dnnl/dnnl.hpp ++++ b/include/oneapi/dnnl/dnnl.hpp +@@ -4686,6 +4686,8 @@ struct convolution_forward : public primitive { + struct desc { + dnnl_convolution_desc_t data; + ++ desc() {} ++ + /// Constructs a descriptor for a convolution forward propagation + /// primitive with bias. + /// +@@ -5433,6 +5435,8 @@ struct deconvolution_forward : public primitive { + struct desc { + dnnl_deconvolution_desc_t data; + ++ desc() {} ++ + /// Constructs a descriptor for a deconvolution forward propagation + /// primitive with bias. + /// +@@ -6390,6 +6394,8 @@ struct pooling_forward : public primitive { + struct desc { + dnnl_pooling_desc_t data; + ++ desc() {} ++ + /// Constructs a descriptor for pooling forward propagation primitive. + /// + /// Arrays @p strides, @p kernel, @p padding_l, and @p padding_r +@@ -8241,6 +8247,8 @@ struct inner_product_forward : public primitive { + struct desc { + dnnl_inner_product_desc_t data; + ++ desc() {} ++ + /// Constructs a descriptor for an inner product forward propagation + /// primitive with bias. + /// +@@ -11972,6 +11980,8 @@ struct matmul : public primitive { + struct desc { + dnnl_matmul_desc_t data; + ++ desc() {} ++ + /// Constructs a descriptor for a matmul primitive. + /// + /// @param src_desc Memory descriptor for source (matrix A). From dee8c2e3348d4dae9426d2a002e16dfaafcadc4f Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Thu, 3 Nov 2022 14:34:41 +0900 Subject: [PATCH 04/26] git apply --verbode to --quiet --- src/plugins/intel_gpu/thirdparty/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt index cef26d50ecc684..2fbc958d04dd1c 100644 --- a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt +++ b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt @@ -20,7 +20,7 @@ if(ENABLE_ONEDNN_FOR_GPU) set(ONEDNN_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_build/") set(ONEDNN_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_install/") set(ONEDNN_PREFIX_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_root") - execute_process(COMMAND git apply --verbose ../onednn_gpu.patch + execute_process(COMMAND git apply --quiet ../onednn_gpu.patch WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu") if(CMAKE_COMPILER_IS_GNUCXX) ie_add_compiler_flags(-Wno-undef -Wno-suggest-override -Wno-missing-declarations) From 427892050978901a27edd8e4dfa134232df8ec4d Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Sun, 6 Nov 2022 23:26:31 +0900 Subject: [PATCH 05/26] functional tests --- src/plugins/intel_gpu/src/graph/crop.cpp | 7 ++++--- src/plugins/intel_gpu/src/graph/network.cpp | 2 +- .../intel_gpu/src/graph/primitive_inst.cpp | 2 +- src/plugins/intel_gpu/src/graph/reorder.cpp | 1 + src/plugins/intel_gpu/src/graph/reshape.cpp | 5 +++-- .../intel_gpu/src/plugin/compiled_model.cpp | 2 +- .../intel_gpu/src/plugin/device_config.cpp | 1 - src/plugins/intel_gpu/src/plugin/plugin.cpp | 15 ++++++++------- .../shared_tests_instances/skip_tests_config.cpp | 4 +++- 9 files changed, 22 insertions(+), 17 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp index df6411599ad517..305eb168b0988e 100644 --- a/src/plugins/intel_gpu/src/graph/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/crop.cpp @@ -254,17 +254,18 @@ void crop_inst::on_execute() { } void crop_inst::reuse_input() { - _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), node->get_output_layout()); + update_output_memory(); } void crop_inst::update_output_memory() { - if (!node->can_be_optimized()) + if (!can_be_optimized()) return; if (_outputs[0] && _network.get_engine().is_the_same_buffer(output_memory(), input_memory())) return; - _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), node->get_output_layout()); + _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), _impl_params->output_layout); + _mem_allocated = false; } } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 4617471033ccdc..305f4c6cf635a1 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -898,7 +898,7 @@ void network::execute_impl(const std::vector& events) { GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) { const std::string layer_name = inst->id(); GPU_DEBUG_IF(debug_config->verbose >= 2) { - std::cerr << get_primitive_info(inst->id()) << std::endl; + std::cerr << inst->id() << std::endl; } GPU_DEBUG_IF(debug_config->dump_layers_dst_only == 0 && diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 361fa86d5151d7..d87f525612a1c9 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -1151,7 +1151,7 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) { _outputs[0] = nullptr; if (!_mem_allocated) { - if (can_be_optimized()) + if (can_be_optimized() && type() != cldnn::concatenation::type_id()) _outputs[0] = get_network().get_engine().reinterpret_buffer(get_network().get_primitive(_dep_ids[0])->output_memory(), output_layout); } else { if ((!can_share_buffer()) || can_be_optimized() || is_output()) { diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp index 56bb828751ca5a..ed1c0beafa6619 100644 --- a/src/plugins/intel_gpu/src/graph/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/reorder.cpp @@ -261,6 +261,7 @@ void reorder_inst::update_output_memory() { } else { _outputs[0] = input_memory_ptr(); } + _mem_allocated = false; } void reorder_inst::save(cldnn::BinaryOutputBuffer& ob) const { diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index abb36487703985..0edb989b1fa9f7 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -180,7 +180,7 @@ reshape_inst::typed_primitive_inst(network& network, reshape_node const& node) : } void reshape_inst::on_execute() { - if (!node->can_be_optimized()) + if (!can_be_optimized()) return; if (_outputs[0] && _network.get_engine().is_the_same_buffer(output_memory(), input_memory())) @@ -194,7 +194,7 @@ void reshape_inst::reuse_input() { } void reshape_inst::update_output_memory() { - if (!node->can_be_optimized()) + if (!can_be_optimized()) return; if (_outputs[0] && _network.get_engine().is_the_same_buffer(output_memory(), input_memory())) @@ -203,6 +203,7 @@ void reshape_inst::update_output_memory() { build_deps(); // reshape need deps OPENVINO_ASSERT(input_memory_ptr() != nullptr, "[GPU] Failed to reuse input in ", id(), " primitive: input memory was not allocated"); _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->output_layout)}; + _mem_allocated = false; } } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 6117260aebddf1..c4ec038d6acee7 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -66,7 +66,7 @@ CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_p } } -InferenceEngine::Layout layout_from_string(const std::string & name) { +static InferenceEngine::Layout layout_from_string(const std::string & name) { static const std::unordered_map layouts = { { "ANY", InferenceEngine::Layout::ANY }, { "NCHW", InferenceEngine::Layout::NCHW }, diff --git a/src/plugins/intel_gpu/src/plugin/device_config.cpp b/src/plugins/intel_gpu/src/plugin/device_config.cpp index 550b80e6553be1..720918902aa3cf 100644 --- a/src/plugins/intel_gpu/src/plugin/device_config.cpp +++ b/src/plugins/intel_gpu/src/plugin/device_config.cpp @@ -566,7 +566,6 @@ bool Config::CanShareContextWith(const Config& other) const { this->sources_dumps_dir == other.sources_dumps_dir && this->tuningConfig.mode == other.tuningConfig.mode && this->tuningConfig.cache_file_path == other.tuningConfig.cache_file_path && - this->kernels_cache_dir == other.kernels_cache_dir && this->device_id == other.device_id && this->task_exec_config._streams == other.task_exec_config._streams && this->task_exec_config._threadPreferredCoreType == other.task_exec_config._threadPreferredCoreType && diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index bbec4eca94101d..a7aed24c9230f7 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -261,8 +261,6 @@ IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine auto config = ConvertPerfHintsToConfig(orig_config, conf); UpdateConfig(conf, network, config); - RemoteCLContext::Ptr context; - auto canReuseDefaultContext = [&]() -> bool { if (m_defaultContexts.find(conf.device_id) == m_defaultContexts.end()) return false; @@ -274,16 +272,19 @@ IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateContext"); std::lock_guard lock(engine_mutex); if (!canReuseDefaultContext()) { - context = std::make_shared(shared_from_this(), AnyMap(), conf); - m_defaultContexts[conf.device_id] = context; + // if (m_defaultContexts.find(conf.device_id) != m_defaultContexts.end()) { + // statistics_map.erase(m_defaultContexts[conf.device_id]); + // } + m_defaultContexts[conf.device_id] = std::make_shared(shared_from_this(), AnyMap(), conf); + } else { + m_defaultContexts[conf.device_id]->GetConfig().kernels_cache_dir = conf.kernels_cache_dir; } } - context = m_defaultContexts[conf.device_id]; - auto transformedNetwork = CloneAndTransformNetwork(network, conf); { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateExeNetwork"); + RemoteCLContext::Ptr context = m_defaultContexts[conf.device_id]; CompiledModel::Ptr exeNetwork = std::make_shared(transformedNetwork, context, conf); UpdateStatistics(context); return exeNetwork; @@ -763,7 +764,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map range = std::make_tuple(1, 2, 1); diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp index 5401fd1f558286..768e6775a4357a 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp @@ -91,7 +91,9 @@ std::vector disabledTestPatterns() { // Issue: 76197 R"(.*registerPluginsXMLUnicodePath.*)", // Not supported yet - R"(.*CompileModelCacheTestBase.*)", + R"(.*CompileModelCacheTestBase.*KSOFunction.*)", + R"(.*CompileModelCacheTestBase.*ReadConcatSplitAssign.*)", + R"(.*LoadNetworkCacheTestBase.*)", // Issue: 83014 R"(.*smoke_RemoteBlob.*canInferOnUserQueue.*)", // Issue: CVS-76980 From 7998c91eff4c96b16a390524c3439f0727930200 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Mon, 7 Nov 2022 11:19:23 +0900 Subject: [PATCH 06/26] removed referece of mas_unpooling.hpp --- src/plugins/intel_gpu/src/graph/get_type_id.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/get_type_id.cpp b/src/plugins/intel_gpu/src/graph/get_type_id.cpp index 7e0d4906c800f1..5e85e705f38ae1 100644 --- a/src/plugins/intel_gpu/src/graph/get_type_id.cpp +++ b/src/plugins/intel_gpu/src/graph/get_type_id.cpp @@ -55,7 +55,6 @@ #include "intel_gpu/primitives/lstm_dynamic_timeloop.hpp" #include "intel_gpu/primitives/lstm_dynamic.hpp" #include "intel_gpu/primitives/lstm.hpp" -#include "intel_gpu/primitives/max_unpooling.hpp" #include "intel_gpu/primitives/mutable_data.hpp" #include "intel_gpu/primitives/mvn.hpp" #include "intel_gpu/primitives/non_max_suppression.hpp" @@ -154,7 +153,6 @@ cldnn::primitive_type_id cldnn::get_type_id(std::string type_str) { {"lstm_dynamic_timeloop", cldnn::lstm_dynamic_timeloop::type_id()}, {"lstm_elt", cldnn::lstm_elt::type_id()}, {"lstm_gemm", cldnn::lstm_gemm::type_id()}, - {"max_unpooling", cldnn::max_unpooling::type_id()}, {"mutable_data", cldnn::mutable_data::type_id()}, {"mvn", cldnn::mvn::type_id()}, {"non_max_suppression", cldnn::non_max_suppression::type_id()}, From 60aef229bf06830799386112614687536ff68627 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Mon, 7 Nov 2022 12:09:25 +0900 Subject: [PATCH 07/26] git apply --verbose --- src/plugins/intel_gpu/thirdparty/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt index 2fbc958d04dd1c..cef26d50ecc684 100644 --- a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt +++ b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt @@ -20,7 +20,7 @@ if(ENABLE_ONEDNN_FOR_GPU) set(ONEDNN_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_build/") set(ONEDNN_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_install/") set(ONEDNN_PREFIX_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_root") - execute_process(COMMAND git apply --quiet ../onednn_gpu.patch + execute_process(COMMAND git apply --verbose ../onednn_gpu.patch WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu") if(CMAKE_COMPILER_IS_GNUCXX) ie_add_compiler_flags(-Wno-undef -Wno-suggest-override -Wno-missing-declarations) From ab344657fab884f967d9c482b7320cad5304d368 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Mon, 7 Nov 2022 13:05:11 +0900 Subject: [PATCH 08/26] add no args ctor for proposal_impl --- src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp index a94095ead7e9b4..12de7321c9d33b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp @@ -191,6 +191,8 @@ struct proposal_impl : typed_primitive_impl { using parent = typed_primitive_impl; using parent::parent; + proposal_impl() : parent() {} + explicit proposal_impl(const proposal_node& arg) {} DECLARE_OBJECT_TYPE_SERIALIZATION From 59f3ef356347abf6264a038bea30c15ea77bc966 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Mon, 7 Nov 2022 13:48:02 +0900 Subject: [PATCH 09/26] changed kernel_cache save/load error messages --- src/plugins/intel_gpu/src/runtime/kernels_cache.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp index fa674b0f0db4a5..57ed5e03d5f8e0 100644 --- a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp @@ -516,7 +516,11 @@ void kernels_cache::save(BinaryOutputBuffer& ob) const { precompiled_kernels.push_back(std::move(binary_kernels[0])); } catch (const cl::BuildError& err) { - std::cout << "+++++ OpenCL build error" << std::endl; + std::string err_log = ""; + for (auto& p : err.getBuildLog()) { + err_log += p.second + '\n'; + } + IE_THROW() << err_log; } } } @@ -558,7 +562,11 @@ void kernels_cache::load(BinaryInputBuffer& ib) { } } } catch (const cl::BuildError& err) { - std::cout << "+++++ OpenCL build error" << std::endl; + std::string err_log = ""; + for (auto& p : err.getBuildLog()) { + err_log += p.second + '\n'; + } + IE_THROW() << err_log; } } From 843d4439b2674f60d3ab89555a39c62f10c787c1 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Mon, 7 Nov 2022 17:11:30 +0900 Subject: [PATCH 10/26] gpu model cacning control env. variable --- .../intel_gpu/include/intel_gpu/plugin/plugin.hpp | 1 + .../graph/impls/onednn/primitive_onednn_base.h | 7 +++++++ src/plugins/intel_gpu/src/plugin/plugin.cpp | 15 ++++++++++++--- .../intel_gpu/src/runtime/kernels_cache.cpp | 6 ++++++ .../src/behavior/ov_plugin/caching_tests.cpp | 6 ++++++ 5 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp index 4259ee5eb5ff1b..4d6bf160e0c598 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp @@ -23,6 +23,7 @@ class Plugin : public InferenceEngine::IInferencePlugin, std::shared_ptr _impl; bool streamsSet = false; bool throttlingSet = false; + bool isModelCachingEnabled = false; // key: device_id, value: cldnn device std::map device_map; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 042dd9c067c71d..b7b5baa2ca66ac 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -87,6 +87,13 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { void build_primitive() { auto cache_outpath = get_cache_directory(); + + if (const char* env_p = std::getenv("OV_GPU_MODEL_CACHING")) { + if (env_p[0] == '1') { + cache_outpath = ""; + } + } + if (cache_outpath.empty()) { _prim = PrimType(_pd); } else { diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index a7aed24c9230f7..a8893b93eced31 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -162,6 +162,13 @@ Plugin::Plugin() : m_defaultContexts({}) { for (auto& config : _impl->m_configs) { CustomLayer::LoadFromFile(config_path, config.second.customLayers, true); } + + isModelCachingEnabled = false; + if (const char* env_p = std::getenv("OV_GPU_MODEL_CACHING")) { + if (env_p[0] == '1') { + isModelCachingEnabled = true; + } + } } auto check_inputs = [](InferenceEngine::InputsDataMap _networkInputs) { @@ -614,7 +621,8 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map range = std::make_tuple(1, 2, 1); @@ -957,7 +966,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map(max_batch_size)}; - } else if (name == METRIC_KEY(IMPORT_EXPORT_SUPPORT)) { + } else if (isModelCachingEnabled && name == METRIC_KEY(IMPORT_EXPORT_SUPPORT)) { IE_SET_METRIC_RETURN(IMPORT_EXPORT_SUPPORT, true); } else { IE_THROW() << "Unsupported metric key " << name; diff --git a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp index 57ed5e03d5f8e0..59cf36bde71bee 100644 --- a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp @@ -70,6 +70,12 @@ std::string kernels_cache::get_cache_path() const { } bool kernels_cache::is_cache_enabled() const { + if (const char* env_p = std::getenv("OV_GPU_MODEL_CACHING")) { + if (env_p[0] == '1') { + return false; + } + } + return !_engine.configuration().kernels_cache_path.empty(); } diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp index e6a3576e521ce8..e8f7307eaa6b66 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp @@ -181,6 +181,9 @@ void CompileModelCacheTestBase::run() { } init_input_shapes(static_shapes_to_test_representation(inShapes)); } + if ((targetDevice.find("GPU") != std::string::npos)) { + setenv("OV_GPU_MODEL_CACHING", "1", 1); + } if ((targetDevice.find("AUTO") == std::string::npos) && !importExportSupported(*core)) { GTEST_COUT << "Plugin doesn't support import and export - skipping test" << std::endl; GTEST_SKIP(); @@ -217,6 +220,9 @@ void CompileModelCacheTestBase::run() { ASSERT_EQ(CommonTestUtils::listFilesWithExt(m_cacheFolderName, "blob").size(), 1); compare(originalOutputs, get_plugin_outputs()); } + if ((targetDevice.find("GPU") != std::string::npos)) { + setenv("OV_GPU_MODEL_CACHING", NULL, 1); + } } TEST_P(CompileModelCacheTestBase, CompareWithRefImpl) { From e03c7fa283e6f027280f7b0bb30845c11c2d6a83 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Mon, 7 Nov 2022 18:00:52 +0900 Subject: [PATCH 11/26] fixed nonnull warning --- .../plugin/shared/src/behavior/ov_plugin/caching_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp index e8f7307eaa6b66..ebdc4da6ed5936 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp @@ -221,7 +221,7 @@ void CompileModelCacheTestBase::run() { compare(originalOutputs, get_plugin_outputs()); } if ((targetDevice.find("GPU") != std::string::npos)) { - setenv("OV_GPU_MODEL_CACHING", NULL, 1); + setenv("OV_GPU_MODEL_CACHING", "", 1); } } From 59811aaaf69563529fa6591b531f929fdf11a921 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Mon, 7 Nov 2022 19:06:32 +0900 Subject: [PATCH 12/26] impl_params are added to save and load --- .../src/graph/impls/cpu/detection_output.cpp | 4 +- .../src/graph/impls/ocl/activation.cpp | 8 +-- .../graph/impls/ocl/binary_convolution.cpp | 8 +-- .../src/graph/impls/ocl/concatenation.cpp | 8 +-- .../src/graph/impls/ocl/convolution.cpp | 8 +-- .../intel_gpu/src/graph/impls/ocl/crop.cpp | 8 +-- .../src/graph/impls/ocl/custom_primitive.cpp | 4 +- .../src/graph/impls/ocl/deconvolution.cpp | 8 +-- .../impls/ocl/deformable_convolution.cpp | 8 +-- .../src/graph/impls/ocl/generic_layer.cpp | 4 +- .../src/graph/impls/ocl/primitive_base.hpp | 4 +- .../intel_gpu/src/graph/impls/ocl/reorder.cpp | 8 +-- .../impls/onednn/concatenation_onednn.cpp | 55 +++++-------------- .../graph/impls/onednn/convolution_onednn.cpp | 8 +-- .../impls/onednn/deconvolution_onednn.cpp | 8 +-- .../impls/onednn/fully_connected_onednn.cpp | 8 +-- .../src/graph/impls/onednn/gemm_onednn.cpp | 8 +-- .../src/graph/impls/onednn/pooling_onednn.cpp | 8 +-- .../graph/impls/onednn/reduction_onednn.cpp | 8 +-- .../src/graph/impls/onednn/reorder_onednn.cpp | 41 ++++---------- .../intel_gpu/src/graph/primitive_inst.cpp | 6 +- 21 files changed, 91 insertions(+), 139 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp index fac3a6428734a9..d211b610c0c3d7 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp @@ -68,11 +68,11 @@ struct detection_output_impl : typed_primitive_impl { nms_type = (node.get_primitive()->decrease_label_id ? NMSType::MXNET : NMSType::CAFFE); } - void save(BinaryOutputBuffer& ob) const override { + void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { ob << make_data(&nms_type, sizeof(NMSType)); } - void load(BinaryInputBuffer& ib) override { + void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { ib >> make_data(&nms_type, sizeof(NMSType)); } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp index c3bb3bac00f000..6b367a00b14d71 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp @@ -48,13 +48,13 @@ struct activation_impl : typed_primitive_impl_ocl { return args; } - void save(BinaryOutputBuffer& ob) const override { - parent::save(ob); + void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + parent::save(ob, impl_params); ob << _is_parameterized; } - void load(BinaryInputBuffer& ib) override { - parent::load(ib); + void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + parent::load(ib, impl_params); ib >> _is_parameterized; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp index 7d851c891643ad..096aa56db787da 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp @@ -75,13 +75,13 @@ struct binary_convolution_impl : typed_primitive_impl_ocl { int32_t get_split() const override { return _split; } public: - void save(BinaryOutputBuffer& ob) const override { - parent::save(ob); + void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + parent::save(ob, impl_params); ob << _split; } - void load(BinaryInputBuffer& ib) override { - parent::load(ib); + void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + parent::load(ib, impl_params); ib >> _split; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp index 54a8caa445083d..5286df10e71868 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp @@ -85,13 +85,13 @@ struct concatenation_impl : typed_primitive_impl_ocl { } public: - void save(BinaryOutputBuffer& ob) const override { - parent::save(ob); + void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + parent::save(ob, impl_params); ob << _can_be_optimized; } - void load(BinaryInputBuffer& ib) override { - parent::load(ib); + void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + parent::load(ib, impl_params); ib >> _can_be_optimized; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp index aa7a918aaa1028..e0110774718bbb 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp @@ -80,15 +80,15 @@ struct convolution_impl : typed_primitive_impl_ocl { bool get_depthwise_sep_opt() const override { return _depthwise_sep_opt; } public: - void save(BinaryOutputBuffer& ob) const override { - parent::save(ob); + void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + parent::save(ob, impl_params); ob << _split; ob << _groups; ob << _depthwise_sep_opt; } - void load(BinaryInputBuffer& ib) override { - parent::load(ib); + void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + parent::load(ib, impl_params); ib >> _split; ib >> _groups; ib >> _depthwise_sep_opt; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp index 0d861a61fbcb6d..4807a8ddf4634a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp @@ -44,13 +44,13 @@ struct crop_impl : typed_primitive_impl_ocl { } public: - void save(BinaryOutputBuffer& ob) const override { - parent::save(ob); + void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + parent::save(ob, impl_params); ob << _can_be_optimized; } - void load(BinaryInputBuffer& ib) override { - parent::load(ib); + void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + parent::load(ib, impl_params); ib >> _can_be_optimized; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp index 3421e8c5718b07..f384e70797c210 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp @@ -85,12 +85,12 @@ struct custom_gpu_primitive_impl : typed_primitive_impl { return {_kernel_id}; } - void save(BinaryOutputBuffer& ob) const override { + void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { ob << *cl_kernel; ob << _kernel_id; } - void load(BinaryInputBuffer& ib) override { + void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { cl_kernel = std::make_shared(); ib >> *cl_kernel; ib >> _kernel_id; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp index 7d939b66288e70..e214b33962a17f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp @@ -41,14 +41,14 @@ struct deconvolution_impl : typed_primitive_impl_ocl { _groups = node.get_groups(); } - void save(BinaryOutputBuffer& ob) const override { - parent::save(ob); + void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + parent::save(ob, impl_params); ob << _split; ob << _groups; } - void load(BinaryInputBuffer& ib) override { - parent::load(ib); + void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + parent::load(ib, impl_params); ib >> _split; ib >> _groups; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp index 57255cc12c3468..50fa6ca5f4d2c8 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp @@ -42,14 +42,14 @@ struct deformable_conv_impl : typed_primitive_impl_ocl { _groups = node.get_groups(); } - void save(BinaryOutputBuffer& ob) const override { - parent::save(ob); + void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + parent::save(ob, impl_params); ob << _split; ob << _groups; } - void load(BinaryInputBuffer& ib) override { - parent::load(ib); + void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + parent::load(ib, impl_params); ib >> _split; ib >> _groups; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp index a25203538dd00e..7283b3f56c5128 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp @@ -44,12 +44,12 @@ struct generic_layer_impl : typed_primitive_impl { _kernel_id = arg.get_program().add_kernel(arg.get_primitive()->generic_params.clKernel->code.kernelString); } - void save(BinaryOutputBuffer& ob) const override { + void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { ob <<_cl_kernel_data; ob << _kernel_id; } - void load(BinaryInputBuffer& ib) override { + void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { ib >> _cl_kernel_data; ib >> _kernel_id; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp index d6697cf777947e..bf45115b5162fa 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp @@ -67,7 +67,7 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { bool is_cpu() const override { return false; } - void save(BinaryOutputBuffer& ob) const override { + void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { ob << make_data(&_kernel_data.internalBufferDataType, sizeof(kernel_selector::Datatype)); ob << _kernel_data.internalBufferSizes; ob << _kernel_data.kernels; @@ -75,7 +75,7 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { ob << _kernel_args; } - void load(BinaryInputBuffer& ib) override { + void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { ib >> make_data(&_kernel_data.internalBufferDataType, sizeof(kernel_selector::Datatype)); ib >> _kernel_data.internalBufferSizes; ib >> _kernel_data.kernels; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp index 0e6c6f8f24ba10..c5b24f5a7ae7de 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp @@ -40,14 +40,14 @@ struct reorder_impl : typed_primitive_impl_ocl { _has_mean = node.has_mean(); } - void save(BinaryOutputBuffer& ob) const override { - parent::save(ob); + void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + parent::save(ob, impl_params); ob << _can_be_optimized; ob << _has_mean; } - void load(BinaryInputBuffer& ib) override { - parent::load(ib); + void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + parent::load(ib, impl_params); ib >> _can_be_optimized; ib >> _has_mean; } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp index 999b7eb58a16ad..3c6bbeac5d71f0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp @@ -24,8 +24,6 @@ struct concatenation_onednn : typed_primitive_onednn_impl clone() const override { return make_unique(*this); } @@ -49,10 +47,9 @@ struct concatenation_onednn : typed_primitive_onednn_impl get_concatenation_descriptor(const kernel_impl_params& impl_params) { - auto prim = impl_params.typed_desc(); - - auto& engine = impl_params.prog->get_engine(); + static std::shared_ptr get_concatenation_descriptor(const kernel_impl_params& impl_params, + const int64_t axis, + const cldnn::engine& engine) { std::vector input_mds; for (size_t i = 0; i < impl_params.input_layouts.size(); i++) { input_mds.push_back(onednn::layout_to_memory_desc(impl_params.get_input_layout(i))); @@ -60,13 +57,13 @@ struct concatenation_onednn : typed_primitive_onednn_impl( output_md, - prim->axis, + axis, input_mds, engine.get_onednn_engine()); } public: - void save(BinaryOutputBuffer& ob) const override { + void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { if (_prim.get(true) == nullptr) { ob << false; return; @@ -74,52 +71,29 @@ struct concatenation_onednn : typed_primitive_onednn_implget_dependencies().size(); - for (auto& input : _outer->get_dependencies()) { - ob << input->get_output_layout(); - } - ob << _outer->get_primitive()->axis; - ob << _outer->get_output_layout(); + auto prim = impl_params->typed_desc(); + ob << prim->axis; std::vector prim_cache; prim_cache = _prim.get_cache_blob(); ob << prim_cache; } - void load(BinaryInputBuffer& ib) override { + void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { bool has_prim; ib >> has_prim; if (!has_prim) return; - parent::load(ib); - - size_t num_deps; - ib >> num_deps; - - std::vector input_mds; - for (size_t idx = 0; idx < num_deps; ++idx) { - layout input_layout = layout(cldnn::data_types::bin, cldnn::format::any, cldnn::tensor()); - ib >> input_layout; - input_mds.push_back(onednn::layout_to_memory_desc(input_layout)); - } + parent::load(ib, impl_params); int64_t prim_axis; ib >> prim_axis; - layout output_layout = layout(cldnn::data_types::bin, cldnn::format::any, cldnn::tensor()); - ib >> output_layout; - auto output_md = onednn::layout_to_memory_desc(output_layout); - - auto desc = std::make_shared( - output_md, - prim_axis, - input_mds, - ib.get_engine().get_onednn_engine()); - + auto desc = get_concatenation_descriptor(*impl_params, prim_axis, ib.get_engine()); _pd = *desc; std::vector prim_cache; @@ -132,14 +106,13 @@ struct concatenation_onednn : typed_primitive_onednn_implget_engine(); if (arg.can_be_optimized()) return new concatenation_onednn(engine); - auto desc = get_concatenation_descriptor(impl_params); + auto prim = impl_params.typed_desc(); + auto desc = get_concatenation_descriptor(impl_params, prim->axis, impl_params.prog->get_engine()); auto attr = arg.get_onednn_primitive_attributes(); std::shared_ptr dummy = nullptr; - auto new_impl = new concatenation_onednn(engine, dummy, attr, *desc); - new_impl->_outer = &arg; - return new_impl; + return new concatenation_onednn(engine, dummy, attr, *desc); } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp index f10dde97ceaa96..27c1f2f94f8f01 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp @@ -185,8 +185,8 @@ struct convolution_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_convolution_desc_t)); @@ -195,8 +195,8 @@ struct convolution_onednn : typed_primitive_onednn_impl(); ib >> make_data(&_desc->data, sizeof(dnnl_convolution_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp index 8be3740fbdff54..cf40323f8d1528 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp @@ -104,8 +104,8 @@ struct deconvolution_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_deconvolution_desc_t)); @@ -114,8 +114,8 @@ struct deconvolution_onednn : typed_primitive_onednn_impl(); ib >> make_data(&_desc->data, sizeof(dnnl_deconvolution_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp index af875b5f6b38b9..99b5aea13bc91a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp @@ -166,8 +166,8 @@ struct fully_connected_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_inner_product_desc_t)); @@ -176,8 +176,8 @@ struct fully_connected_onednn : typed_primitive_onednn_impl(); ib >> make_data(&_desc->data, sizeof(dnnl_inner_product_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp index 126c0c59b9c893..20f8ced65186a1 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp @@ -209,8 +209,8 @@ struct gemm_onednn : typed_primitive_onednn_impl { } public: - void save(BinaryOutputBuffer& ob) const override { - parent::save(ob); + void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + parent::save(ob, impl_params); ob << make_data(&_desc->data, sizeof(dnnl_matmul_desc_t)); @@ -219,8 +219,8 @@ struct gemm_onednn : typed_primitive_onednn_impl { ob << prim_cache; } - void load(BinaryInputBuffer& ib) override { - parent::load(ib); + void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + parent::load(ib, impl_params); _desc = std::make_shared(); ib >> make_data(&_desc->data, sizeof(dnnl_matmul_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp index 6707da450898d0..20eae95f89c00b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp @@ -64,8 +64,8 @@ struct pooling_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_pooling_desc_t)); @@ -74,8 +74,8 @@ struct pooling_onednn : typed_primitive_onednn_impl(); ib >> make_data(&_desc->data, sizeof(dnnl_pooling_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp index 1f25295e4b3fac..e4a6834183882d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp @@ -93,8 +93,8 @@ struct reduction_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_reduction_desc_t)); @@ -103,8 +103,8 @@ struct reduction_onednn : typed_primitive_onednn_impl(); ib >> make_data(&_desc->data, sizeof(dnnl_reduction_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp index aeeccb0f043787..1575e388088287 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp @@ -22,8 +22,6 @@ struct reorder_onednn : typed_primitive_onednn_impl clone() const override { return make_unique(*this); } @@ -45,12 +43,13 @@ struct reorder_onednn : typed_primitive_onednn_impl get_reorder_descriptor(const kernel_impl_params& impl_params, const dnnl::primitive_attr& attr) { + static std::shared_ptr get_reorder_descriptor(const kernel_impl_params& impl_params, + const dnnl::primitive_attr& attr, + const cldnn::engine& engine) { auto prim = impl_params.typed_desc(); auto input_layout = impl_params.get_input_layout(0); auto output_layout = impl_params.output_layout; - auto& engine = impl_params.prog->get_engine(); auto input_md = onednn::layout_to_memory_desc(input_layout); auto output_md = onednn::layout_to_memory_desc(output_layout); @@ -64,36 +63,18 @@ struct reorder_onednn : typed_primitive_onednn_implget_dependency(0).get_output_layout(); - ob << _outer->get_output_layout(); + void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + parent::save(ob, impl_params); std::vector prim_cache; prim_cache = _prim.get_cache_blob(); ob << prim_cache; } - void load(BinaryInputBuffer& ib) override { - parent::load(ib); - - layout input_layout = layout(cldnn::data_types::bin, cldnn::format::any, cldnn::tensor()); - ib >> input_layout; - - layout output_layout = layout(cldnn::data_types::bin, cldnn::format::any, cldnn::tensor()); - ib >> output_layout; - - auto input_md = onednn::layout_to_memory_desc(input_layout); - auto output_md = onednn::layout_to_memory_desc(output_layout); - - auto desc = std::make_shared( - ib.get_engine().get_onednn_engine(), - input_md, - ib.get_engine().get_onednn_engine(), - output_md, - *(_attrs)); + void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + parent::load(ib, impl_params); + auto desc = get_reorder_descriptor(*impl_params, *_attrs, ib.get_engine()); _pd = *desc; std::vector prim_cache; @@ -105,13 +86,11 @@ struct reorder_onednn : typed_primitive_onednn_implget_engine(); auto attr = arg.get_onednn_primitive_attributes(); - auto desc = get_reorder_descriptor(impl_params, *attr); + auto desc = get_reorder_descriptor(impl_params, *attr, impl_params.prog->get_engine()); std::shared_ptr dummy = nullptr; - auto new_impl = new reorder_onednn(engine, dummy, attr, *desc); - new_impl->_outer = &arg; - return new_impl; + return new reorder_onednn(engine, dummy, attr, *desc); } }; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index d87f525612a1c9..8954c398e61188 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -979,8 +979,8 @@ void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const { convert_args(args, args_idx); _impl->set_arguments(args_idx); - ob << _impl; _impl_params->save(ob); + _impl->save(ob, _impl_params.get()); ob << _node_output_layout; ob << has_mutable_input(); @@ -1101,11 +1101,11 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) { } } else if (_object_type == object_type::EXECUTABLE_INST) { // primitive_impl - _impl.release(); - ib >> _impl; _impl_params.release(); _impl_params = make_unique(); _impl_params->load(ib); + _impl.release(); + _impl->load(ib, _impl_params.get()); ib >> _node_output_layout; ib >> _has_mutable_input; From 593b500f96b2d610373db82a9da393af5d2848d4 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Mon, 7 Nov 2022 20:42:51 +0900 Subject: [PATCH 13/26] changed a way to use kernel_impl_params in save and load --- .../intel_gpu/src/graph/impls/cpu/detection_output.cpp | 4 ++-- .../intel_gpu/src/graph/impls/ocl/activation.cpp | 8 ++++---- .../src/graph/impls/ocl/binary_convolution.cpp | 8 ++++---- .../intel_gpu/src/graph/impls/ocl/concatenation.cpp | 8 ++++---- .../intel_gpu/src/graph/impls/ocl/convolution.cpp | 8 ++++---- src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp | 8 ++++---- .../intel_gpu/src/graph/impls/ocl/custom_primitive.cpp | 4 ++-- .../intel_gpu/src/graph/impls/ocl/deconvolution.cpp | 8 ++++---- .../src/graph/impls/ocl/deformable_convolution.cpp | 8 ++++---- .../intel_gpu/src/graph/impls/ocl/generic_layer.cpp | 4 ++-- .../intel_gpu/src/graph/impls/ocl/primitive_base.hpp | 4 ++-- src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp | 8 ++++---- .../src/graph/impls/onednn/concatenation_onednn.cpp | 10 ++++++---- .../src/graph/impls/onednn/convolution_onednn.cpp | 8 ++++---- .../src/graph/impls/onednn/deconvolution_onednn.cpp | 8 ++++---- .../src/graph/impls/onednn/fully_connected_onednn.cpp | 8 ++++---- .../intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp | 8 ++++---- .../src/graph/impls/onednn/pooling_onednn.cpp | 8 ++++---- .../src/graph/impls/onednn/reduction_onednn.cpp | 8 ++++---- .../src/graph/impls/onednn/reorder_onednn.cpp | 9 +++++---- .../src/graph/include/serialization/binary_buffer.hpp | 8 ++++++++ src/plugins/intel_gpu/src/graph/primitive_inst.cpp | 6 ++++-- 22 files changed, 87 insertions(+), 74 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp index d211b610c0c3d7..fac3a6428734a9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp @@ -68,11 +68,11 @@ struct detection_output_impl : typed_primitive_impl { nms_type = (node.get_primitive()->decrease_label_id ? NMSType::MXNET : NMSType::CAFFE); } - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + void save(BinaryOutputBuffer& ob) const override { ob << make_data(&nms_type, sizeof(NMSType)); } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + void load(BinaryInputBuffer& ib) override { ib >> make_data(&nms_type, sizeof(NMSType)); } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp index 6b367a00b14d71..c3bb3bac00f000 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp @@ -48,13 +48,13 @@ struct activation_impl : typed_primitive_impl_ocl { return args; } - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << _is_parameterized; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> _is_parameterized; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp index 096aa56db787da..7d851c891643ad 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp @@ -75,13 +75,13 @@ struct binary_convolution_impl : typed_primitive_impl_ocl { int32_t get_split() const override { return _split; } public: - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << _split; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> _split; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp index 5286df10e71868..54a8caa445083d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp @@ -85,13 +85,13 @@ struct concatenation_impl : typed_primitive_impl_ocl { } public: - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << _can_be_optimized; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> _can_be_optimized; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp index e0110774718bbb..aa7a918aaa1028 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp @@ -80,15 +80,15 @@ struct convolution_impl : typed_primitive_impl_ocl { bool get_depthwise_sep_opt() const override { return _depthwise_sep_opt; } public: - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << _split; ob << _groups; ob << _depthwise_sep_opt; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> _split; ib >> _groups; ib >> _depthwise_sep_opt; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp index 4807a8ddf4634a..0d861a61fbcb6d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp @@ -44,13 +44,13 @@ struct crop_impl : typed_primitive_impl_ocl { } public: - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << _can_be_optimized; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> _can_be_optimized; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp index f384e70797c210..3421e8c5718b07 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp @@ -85,12 +85,12 @@ struct custom_gpu_primitive_impl : typed_primitive_impl { return {_kernel_id}; } - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + void save(BinaryOutputBuffer& ob) const override { ob << *cl_kernel; ob << _kernel_id; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + void load(BinaryInputBuffer& ib) override { cl_kernel = std::make_shared(); ib >> *cl_kernel; ib >> _kernel_id; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp index e214b33962a17f..7d939b66288e70 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp @@ -41,14 +41,14 @@ struct deconvolution_impl : typed_primitive_impl_ocl { _groups = node.get_groups(); } - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << _split; ob << _groups; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> _split; ib >> _groups; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp index 50fa6ca5f4d2c8..57255cc12c3468 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp @@ -42,14 +42,14 @@ struct deformable_conv_impl : typed_primitive_impl_ocl { _groups = node.get_groups(); } - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << _split; ob << _groups; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> _split; ib >> _groups; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp index 7283b3f56c5128..a25203538dd00e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp @@ -44,12 +44,12 @@ struct generic_layer_impl : typed_primitive_impl { _kernel_id = arg.get_program().add_kernel(arg.get_primitive()->generic_params.clKernel->code.kernelString); } - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + void save(BinaryOutputBuffer& ob) const override { ob <<_cl_kernel_data; ob << _kernel_id; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + void load(BinaryInputBuffer& ib) override { ib >> _cl_kernel_data; ib >> _kernel_id; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp index bf45115b5162fa..d6697cf777947e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp @@ -67,7 +67,7 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { bool is_cpu() const override { return false; } - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + void save(BinaryOutputBuffer& ob) const override { ob << make_data(&_kernel_data.internalBufferDataType, sizeof(kernel_selector::Datatype)); ob << _kernel_data.internalBufferSizes; ob << _kernel_data.kernels; @@ -75,7 +75,7 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { ob << _kernel_args; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + void load(BinaryInputBuffer& ib) override { ib >> make_data(&_kernel_data.internalBufferDataType, sizeof(kernel_selector::Datatype)); ib >> _kernel_data.internalBufferSizes; ib >> _kernel_data.kernels; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp index c5b24f5a7ae7de..0e6c6f8f24ba10 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp @@ -40,14 +40,14 @@ struct reorder_impl : typed_primitive_impl_ocl { _has_mean = node.has_mean(); } - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << _can_be_optimized; ob << _has_mean; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> _can_be_optimized; ib >> _has_mean; } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp index 3c6bbeac5d71f0..bf89db82a19fde 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp @@ -63,7 +63,7 @@ struct concatenation_onednn : typed_primitive_onednn_impl(ob.getKernlImplParams()); auto prim = impl_params->typed_desc(); ob << prim->axis; @@ -81,18 +82,19 @@ struct concatenation_onednn : typed_primitive_onednn_impl> has_prim; if (!has_prim) return; - parent::load(ib, impl_params); + parent::load(ib); int64_t prim_axis; ib >> prim_axis; + const kernel_impl_params* impl_params = reinterpret_cast(ib.getKernlImplParams()); auto desc = get_concatenation_descriptor(*impl_params, prim_axis, ib.get_engine()); _pd = *desc; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp index 27c1f2f94f8f01..f10dde97ceaa96 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp @@ -185,8 +185,8 @@ struct convolution_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_convolution_desc_t)); @@ -195,8 +195,8 @@ struct convolution_onednn : typed_primitive_onednn_impl(); ib >> make_data(&_desc->data, sizeof(dnnl_convolution_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp index cf40323f8d1528..8be3740fbdff54 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp @@ -104,8 +104,8 @@ struct deconvolution_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_deconvolution_desc_t)); @@ -114,8 +114,8 @@ struct deconvolution_onednn : typed_primitive_onednn_impl(); ib >> make_data(&_desc->data, sizeof(dnnl_deconvolution_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp index 99b5aea13bc91a..af875b5f6b38b9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp @@ -166,8 +166,8 @@ struct fully_connected_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_inner_product_desc_t)); @@ -176,8 +176,8 @@ struct fully_connected_onednn : typed_primitive_onednn_impl(); ib >> make_data(&_desc->data, sizeof(dnnl_inner_product_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp index 20f8ced65186a1..126c0c59b9c893 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp @@ -209,8 +209,8 @@ struct gemm_onednn : typed_primitive_onednn_impl { } public: - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << make_data(&_desc->data, sizeof(dnnl_matmul_desc_t)); @@ -219,8 +219,8 @@ struct gemm_onednn : typed_primitive_onednn_impl { ob << prim_cache; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); _desc = std::make_shared(); ib >> make_data(&_desc->data, sizeof(dnnl_matmul_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp index 20eae95f89c00b..6707da450898d0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp @@ -64,8 +64,8 @@ struct pooling_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_pooling_desc_t)); @@ -74,8 +74,8 @@ struct pooling_onednn : typed_primitive_onednn_impl(); ib >> make_data(&_desc->data, sizeof(dnnl_pooling_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp index e4a6834183882d..1f25295e4b3fac 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp @@ -93,8 +93,8 @@ struct reduction_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_reduction_desc_t)); @@ -103,8 +103,8 @@ struct reduction_onednn : typed_primitive_onednn_impl(); ib >> make_data(&_desc->data, sizeof(dnnl_reduction_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp index 1575e388088287..bfe776d9a64b46 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp @@ -63,17 +63,18 @@ struct reorder_onednn : typed_primitive_onednn_impl prim_cache; prim_cache = _prim.get_cache_blob(); ob << prim_cache; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + const kernel_impl_params* impl_params = reinterpret_cast(ib.getKernlImplParams()); auto desc = get_reorder_descriptor(*impl_params, *_attrs, ib.get_engine()); _pd = *desc; diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/binary_buffer.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/binary_buffer.hpp index 7b19fb3ca2b142..1d211cc6d5caf4 100644 --- a/src/plugins/intel_gpu/src/graph/include/serialization/binary_buffer.hpp +++ b/src/plugins/intel_gpu/src/graph/include/serialization/binary_buffer.hpp @@ -24,8 +24,12 @@ class BinaryOutputBuffer : public OutputBuffer { } } + void setKernlImplParams(void* impl_params) { _impl_params = impl_params; } + void* getKernlImplParams() const { return _impl_params; } + private: std::ostream& stream; + void* _impl_params; }; class BinaryInputBuffer : public InputBuffer { @@ -39,8 +43,12 @@ class BinaryInputBuffer : public InputBuffer { } } + void setKernlImplParams(void* impl_params) { _impl_params = impl_params; } + void* getKernlImplParams() const { return _impl_params; } + private: std::istream& stream; + void* _impl_params; }; template diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 8954c398e61188..8e1487993227eb 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -980,7 +980,8 @@ void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const { _impl->set_arguments(args_idx); _impl_params->save(ob); - _impl->save(ob, _impl_params.get()); + ob.setKernlImplParams(_impl_params.get()); + ob << _impl; ob << _node_output_layout; ob << has_mutable_input(); @@ -1105,7 +1106,8 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) { _impl_params = make_unique(); _impl_params->load(ib); _impl.release(); - _impl->load(ib, _impl_params.get()); + ib.setKernlImplParams(_impl_params.get()); + ib >> _impl; ib >> _node_output_layout; ib >> _has_mutable_input; From eeac92a5024f0e4426d50699179e41e2d0722a81 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Mon, 7 Nov 2022 21:06:24 +0900 Subject: [PATCH 14/26] get_arguments_by_idx is added --- .../src/graph/impls/ocl/primitive_base.hpp | 92 ++++++++----------- 1 file changed, 36 insertions(+), 56 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp index d6697cf777947e..f7de871985e819 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp @@ -107,6 +107,40 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { return args; } + kernel_arguments_data get_arguments_by_idx(const typed_primitive_inst& instance, int32_t /*split*/) const { + kernel_arguments_data args; + + for (uint32_t i = 0; i < _kernel_args.inputs.size(); i++) { + args.inputs.push_back(instance.dep_memory_ptr(_kernel_args.inputs[i])); + } + + for (uint32_t i = 0; i < _kernel_args.intermediates.size(); i++) { + args.intermediates.push_back(instance.dep_memory_ptr(_kernel_args.intermediates[i])); + } + + args.weights = (_kernel_args.weights >= 0) ? instance.dep_memory_ptr(_kernel_args.weights) : args.weights; + args.recurrent = (_kernel_args.recurrent >= 0) ? instance.dep_memory_ptr(_kernel_args.recurrent) : args.recurrent; + args.hidden = (_kernel_args.hidden >= 0) ? instance.dep_memory_ptr(_kernel_args.hidden) : args.hidden; + args.cell = (_kernel_args.cell >= 0) ? instance.dep_memory_ptr(_kernel_args.cell) : args.cell; + args.bias = (_kernel_args.bias >= 0) ? instance.dep_memory_ptr(_kernel_args.bias) : args.bias; + args.weights_zero_points = (_kernel_args.weights_zero_points >= 0) ? + instance.dep_memory_ptr(_kernel_args.weights_zero_points) : args.weights_zero_points; + args.activations_zero_points = (_kernel_args.activations_zero_points >= 0) ? + instance.dep_memory_ptr(_kernel_args.activations_zero_points) : args.activations_zero_points; + args.compensation = (_kernel_args.compensation >= 0) ? instance.dep_memory_ptr(_kernel_args.compensation) : args.compensation; + args.lookup_table = (_kernel_args.lookup_table >= 0) ? instance.dep_memory_ptr(_kernel_args.lookup_table) : args.lookup_table; + args.scale_table = (_kernel_args.scale_table >= 0) ? instance.dep_memory_ptr(_kernel_args.scale_table) : args.scale_table; + args.slope = (_kernel_args.slope >= 0) ? instance.dep_memory_ptr(_kernel_args.slope) : args.slope; + + for (size_t i = 0; i < _kernel_args.fused_op_inputs.size(); i++) { + args.fused_op_inputs.push_back(instance.dep_memory_ptr(_kernel_args.fused_op_inputs[i])); + } + + args.outputs.push_back(instance.output_memory_ptr()); + + return args; + } + virtual int32_t get_split() const { return 1; } virtual uint32_t get_groups() const { return 1; } virtual bool get_depthwise_sep_opt() const { return false; } @@ -167,34 +201,7 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { kernel_arguments_data args; if (_kernel_args.inputs.size() > 0) { - for (uint32_t i = 0; i < _kernel_args.inputs.size(); i++) { - args.inputs.push_back(instance.dep_memory_ptr(_kernel_args.inputs[i])); - } - - for (uint32_t i = 0; i < _kernel_args.intermediates.size(); i++) { - args.intermediates.push_back(instance.dep_memory_ptr(_kernel_args.intermediates[i])); - } - - args.weights = (_kernel_args.weights >= 0) ? instance.dep_memory_ptr(_kernel_args.weights) : args.weights; - args.recurrent = (_kernel_args.recurrent >= 0) ? instance.dep_memory_ptr(_kernel_args.recurrent) : args.recurrent; - args.hidden = (_kernel_args.hidden >= 0) ? instance.dep_memory_ptr(_kernel_args.hidden) : args.hidden; - args.cell = (_kernel_args.cell >= 0) ? instance.dep_memory_ptr(_kernel_args.cell) : args.cell; - args.cell = (_kernel_args.cell >= 0) ? instance.dep_memory_ptr(_kernel_args.cell) : args.cell; - args.bias = (_kernel_args.bias >= 0) ? instance.dep_memory_ptr(_kernel_args.bias) : args.bias; - args.weights_zero_points = (_kernel_args.weights_zero_points >= 0) ? - instance.dep_memory_ptr(_kernel_args.weights_zero_points) : args.weights_zero_points; - args.activations_zero_points = (_kernel_args.activations_zero_points >= 0) ? - instance.dep_memory_ptr(_kernel_args.activations_zero_points) : args.activations_zero_points; - args.compensation = (_kernel_args.compensation >= 0) ? instance.dep_memory_ptr(_kernel_args.compensation) : args.compensation; - args.lookup_table = (_kernel_args.lookup_table >= 0) ? instance.dep_memory_ptr(_kernel_args.lookup_table) : args.lookup_table; - args.scale_table = (_kernel_args.scale_table >= 0) ? instance.dep_memory_ptr(_kernel_args.scale_table) : args.scale_table; - args.slope = (_kernel_args.slope >= 0) ? instance.dep_memory_ptr(_kernel_args.slope) : args.slope; - - for (size_t i = 0; i < _kernel_args.fused_op_inputs.size(); i++) { - args.fused_op_inputs.push_back(instance.dep_memory_ptr(_kernel_args.fused_op_inputs[i])); - } - - args.outputs.push_back(instance.output_memory_ptr()); + args = get_arguments_by_idx(instance, i); } else { args = get_arguments(instance, i); @@ -266,34 +273,7 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { kernel_arguments_data args; if (_kernel_args.inputs.size() > 0) { - for (uint32_t i = 0; i < _kernel_args.inputs.size(); i++) { - args.inputs.push_back(instance.dep_memory_ptr(_kernel_args.inputs[i])); - } - - for (uint32_t i = 0; i < _kernel_args.intermediates.size(); i++) { - args.intermediates.push_back(instance.dep_memory_ptr(_kernel_args.intermediates[i])); - } - - args.weights = (_kernel_args.weights >= 0) ? instance.dep_memory_ptr(_kernel_args.weights) : args.weights; - args.recurrent = (_kernel_args.recurrent >= 0) ? instance.dep_memory_ptr(_kernel_args.recurrent) : args.recurrent; - args.hidden = (_kernel_args.hidden >= 0) ? instance.dep_memory_ptr(_kernel_args.hidden) : args.hidden; - args.cell = (_kernel_args.cell >= 0) ? instance.dep_memory_ptr(_kernel_args.cell) : args.cell; - args.cell = (_kernel_args.cell >= 0) ? instance.dep_memory_ptr(_kernel_args.cell) : args.cell; - args.bias = (_kernel_args.bias >= 0) ? instance.dep_memory_ptr(_kernel_args.bias) : args.bias; - args.weights_zero_points = (_kernel_args.weights_zero_points >= 0) ? - instance.dep_memory_ptr(_kernel_args.weights_zero_points) : args.weights_zero_points; - args.activations_zero_points = (_kernel_args.activations_zero_points >= 0) ? - instance.dep_memory_ptr(_kernel_args.activations_zero_points) : args.activations_zero_points; - args.compensation = (_kernel_args.compensation >= 0) ? instance.dep_memory_ptr(_kernel_args.compensation) : args.compensation; - args.lookup_table = (_kernel_args.lookup_table >= 0) ? instance.dep_memory_ptr(_kernel_args.lookup_table) : args.lookup_table; - args.scale_table = (_kernel_args.scale_table >= 0) ? instance.dep_memory_ptr(_kernel_args.scale_table) : args.scale_table; - args.slope = (_kernel_args.slope >= 0) ? instance.dep_memory_ptr(_kernel_args.slope) : args.slope; - - for (size_t i = 0; i < _kernel_args.fused_op_inputs.size(); i++) { - args.fused_op_inputs.push_back(instance.dep_memory_ptr(_kernel_args.fused_op_inputs[i])); - } - - args.outputs.push_back(instance.output_memory_ptr()); + args = get_arguments_by_idx(instance, i); } else { args = get_arguments(instance, i); From 4289eb48b6f272d9e8c34fb3b52af6ccb047c5c1 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Tue, 8 Nov 2022 11:21:36 +0900 Subject: [PATCH 15/26] setenv is disabled in windows --- .../plugin/shared/src/behavior/ov_plugin/caching_tests.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp index ebdc4da6ed5936..40901540a97d55 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp @@ -182,7 +182,9 @@ void CompileModelCacheTestBase::run() { init_input_shapes(static_shapes_to_test_representation(inShapes)); } if ((targetDevice.find("GPU") != std::string::npos)) { +#if !defined(_WIN32) && !defined(_WIN64) setenv("OV_GPU_MODEL_CACHING", "1", 1); +#endif } if ((targetDevice.find("AUTO") == std::string::npos) && !importExportSupported(*core)) { GTEST_COUT << "Plugin doesn't support import and export - skipping test" << std::endl; @@ -221,7 +223,9 @@ void CompileModelCacheTestBase::run() { compare(originalOutputs, get_plugin_outputs()); } if ((targetDevice.find("GPU") != std::string::npos)) { +#if !defined(_WIN32) && !defined(_WIN64) setenv("OV_GPU_MODEL_CACHING", "", 1); +#endif } } From 98ee767d5ba24efa5b23878e57970dc2527199aa Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Tue, 8 Nov 2022 14:42:55 +0900 Subject: [PATCH 16/26] added missed part for onednn --- .../impls/onednn/primitive_onednn_base.h | 304 +++++++++++++++++- src/plugins/intel_gpu/src/plugin/graph.cpp | 2 +- 2 files changed, 301 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index b7b5baa2ca66ac..15f5f3c112ce13 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -32,7 +32,7 @@ static std::mutex cacheAccessMutex; template struct typed_primitive_onednn_impl : public typed_primitive_impl { - const engine& _engine; + const engine* _engine; std::shared_ptr _desc; std::shared_ptr _attrs; PrimDescType _pd; @@ -45,7 +45,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { const PrimDescType& pd, kernel_selector::WeightsReorderParams weights_reorder = {}) : typed_primitive_impl(weights_reorder, pd.impl_info_str()), - _engine(engine), + _engine(&engine), _desc(desc), _attrs(attrs), _pd(pd) { @@ -54,16 +54,312 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { typed_primitive_onednn_impl(const engine& engine) : typed_primitive_impl({}, "undef"), - _engine(engine), + _engine(&engine), _pd(), _prim() { } + typed_primitive_onednn_impl() + : typed_primitive_impl({}, "undef"), + _desc(nullptr), _pd(), _prim() { + _attrs = std::make_shared(); + } + bool is_cpu() const override { return false; } + void save(BinaryOutputBuffer& ob) const override { + if (_attrs.get() == nullptr) { + ob << false; + } else { + ob << true; + } + + if (_attrs.get() != nullptr) { + { + int mask; + std::vector scales; + std::vector zero_points; + + _attrs.get()->get_output_scales(mask, scales); + ob << mask << scales; + + scales.clear(); + _attrs.get()->get_scales(DNNL_ARG_SRC_0, mask, scales); + ob << mask << scales; + scales.clear(); + _attrs.get()->get_scales(DNNL_ARG_SRC_1, mask, scales); + ob << mask << scales; + + _attrs.get()->get_zero_points(DNNL_ARG_SRC, mask, zero_points); + ob << mask << zero_points; + zero_points.clear(); + _attrs.get()->get_zero_points(DNNL_ARG_WEIGHTS, mask, zero_points); + ob << mask << zero_points; + zero_points.clear(); + _attrs.get()->get_zero_points(DNNL_ARG_DST, mask, zero_points); + ob << mask << zero_points; + } + { + dnnl::scratchpad_mode _scratchpad_mode = _attrs.get()->get_scratchpad_mode(); + ob << make_data(&_scratchpad_mode, sizeof(dnnl::scratchpad_mode)); + } + { + dnnl::fpmath_mode _fmath_mode = _attrs.get()->get_fpmath_mode(); + ob << make_data(&_fmath_mode, sizeof(dnnl::fpmath_mode)); + } + { + const dnnl::post_ops _post_ops = _attrs.get()->get_post_ops(); + + ob << _post_ops.len(); + for (int idx = 0; idx < _post_ops.len(); ++idx) { + dnnl::primitive::kind _kind = _post_ops.kind(idx); + + ob << make_data(&_kind, sizeof(dnnl::primitive::kind)); + + if (_kind == dnnl::primitive::kind::sum) { + float scale; + int32_t zero_point; + dnnl::memory::data_type data_type; + + _post_ops.get_params_sum(idx, scale, zero_point, data_type); + + ob << scale; + ob << zero_point; + ob << make_data(&data_type, sizeof(dnnl::memory::data_type)); + } else if (_kind == dnnl::primitive::kind::eltwise) { + float scale; + dnnl::algorithm aalgorithm; + float alpha; + float beta; + + _post_ops.get_params_eltwise(idx, scale, aalgorithm, alpha, beta); + ob << scale; + ob << make_data(&aalgorithm, sizeof(dnnl::algorithm)); + ob << alpha; + ob << beta; + } else if (_kind == dnnl::primitive::kind::convolution) { + dnnl::memory::data_type weights_data_type; + dnnl::memory::data_type bias_data_type; + dnnl::memory::data_type dst_data_type; + int mask; + std::vector scales; + + try { + _post_ops.get_params_dw_k3s1p1(idx, weights_data_type, bias_data_type, dst_data_type, mask, scales); + int stride = 1; + ob << stride; + } catch (...) { + _post_ops.get_params_dw_k3s2p1(idx, weights_data_type, bias_data_type, dst_data_type, mask, scales); + int stride = 2; + ob << stride; + } + + ob << make_data(&weights_data_type, sizeof(dnnl::memory::data_type)); + ob << make_data(&bias_data_type, sizeof(dnnl::memory::data_type)); + ob << make_data(&dst_data_type, sizeof(dnnl::memory::data_type)); + ob << mask; + ob << scales; + } else if (_kind == dnnl::primitive::kind::binary) { + dnnl::algorithm aalgorithm; + dnnl::memory::desc src1_desc; + + _post_ops.get_params_binary(idx, aalgorithm, src1_desc); + + ob << make_data(&aalgorithm, sizeof(dnnl::algorithm)); + ob << make_data(&src1_desc, sizeof(dnnl::memory::desc)); + } else if (_kind == dnnl::primitive::kind::prelu) { + int mask; + + _post_ops.get_params_prelu(idx, mask); + + ob << mask; + } + } + } + { + float scale, shift; + _attrs.get()->get_rnn_data_qparams(scale, shift); + ob << scale << shift; + } + { + int mask; + std::vector scales; + + _attrs.get()->get_rnn_weights_qparams(mask, scales); + + ob << mask; + ob << scales; + } + { + int mask; + std::vector scales; + + _attrs.get()->get_rnn_weights_projection_qparams(mask, scales); + + ob << mask; + ob << scales; + } + } + } + + void load(BinaryInputBuffer& ib) override { + bool has_attrs; + ib >> has_attrs; + + if (has_attrs) { + { + int mask; + std::vector scales; + ib >> mask >> scales; + + _attrs.get()->set_output_scales(mask, scales); + } + { + int mask; + std::vector scales; + bool default_output_scales = true; + + _attrs.get()->get_output_scales(mask, scales); + for (float scale : scales) { + if (scale != 1.) { + default_output_scales = false; + break; + } + } + + scales.clear(); + ib >> mask >> scales; + if (default_output_scales) + _attrs.get()->set_scales(DNNL_ARG_SRC_0, mask, scales); + scales.clear(); + ib >> mask >> scales; + if (default_output_scales) + _attrs.get()->set_scales(DNNL_ARG_SRC_1, mask, scales); + } + { + int mask; + std::vector zero_points; + ib >> mask >> zero_points; + _attrs.get()->set_zero_points(DNNL_ARG_SRC, mask, zero_points); + zero_points.clear(); + ib >> mask >> zero_points; + _attrs.get()->set_zero_points(DNNL_ARG_WEIGHTS, mask, zero_points); + zero_points.clear(); + ib >> mask >> zero_points; + _attrs.get()->set_zero_points(DNNL_ARG_DST, mask, zero_points); + } + { + dnnl::scratchpad_mode _scratchpad_mode; + ib >> make_data(&_scratchpad_mode, sizeof(dnnl::scratchpad_mode)); + _attrs.get()->set_scratchpad_mode(_scratchpad_mode); + } + { + dnnl::fpmath_mode _fmath_mode; + ib >> make_data(&_fmath_mode, sizeof(dnnl::fpmath_mode)); + _attrs.get()->set_fpmath_mode(_fmath_mode); + } + { + dnnl::post_ops _post_ops; + + int post_ops_len; + + ib >> post_ops_len; + for (int idx = 0; idx < post_ops_len; ++idx) { + dnnl::primitive::kind _kind; + + ib >> make_data(&_kind, sizeof(dnnl::primitive::kind)); + + if (_kind == dnnl::primitive::kind::sum) { + float scale; + int32_t zero_point; + dnnl::memory::data_type data_type; + + ib >> scale; + ib >> zero_point; + ib >> make_data(&data_type, sizeof(dnnl::memory::data_type)); + + _post_ops.append_sum(scale, zero_point, data_type); + } else if (_kind == dnnl::primitive::kind::eltwise) { + float scale; + dnnl::algorithm aalgorithm; + float alpha; + float beta; + + ib >> scale; + ib >> make_data(&aalgorithm, sizeof(dnnl::algorithm)); + ib >> alpha; + ib >> beta; + _post_ops.append_eltwise(scale, aalgorithm, alpha, beta); + } else if (_kind == dnnl::primitive::kind::convolution) { + int stride; + dnnl::memory::data_type weights_data_type; + dnnl::memory::data_type bias_data_type; + dnnl::memory::data_type dst_data_type; + int mask; + std::vector scales; + + ib >> stride; + ib >> make_data(&weights_data_type, sizeof(dnnl::memory::data_type)); + ib >> make_data(&bias_data_type, sizeof(dnnl::memory::data_type)); + ib >> make_data(&dst_data_type, sizeof(dnnl::memory::data_type)); + ib >> mask; + ib >> scales; + + if (stride == 1) { + _post_ops.append_dw_k3s1p1(weights_data_type, bias_data_type, dst_data_type, mask, scales); + } else { + _post_ops.append_dw_k3s2p1(weights_data_type, bias_data_type, dst_data_type, mask, scales); + } + } else if (_kind == dnnl::primitive::kind::binary) { + dnnl::algorithm aalgorithm; + dnnl::memory::desc src1_desc; + + ib >> make_data(&aalgorithm, sizeof(dnnl::algorithm)); + ib >> make_data(&src1_desc, sizeof(dnnl::memory::desc)); + + _post_ops.append_binary(aalgorithm, src1_desc); + } else if (_kind == dnnl::primitive::kind::prelu) { + int mask; + ib >> mask; + _post_ops.append_prelu(mask); + } + } + + _attrs.get()->set_post_ops(_post_ops); + } + { + float scale; + float shift; + + ib >> scale >> shift; + _attrs.get()->set_rnn_data_qparams(scale, shift); + } + { + int mask; + std::vector scales; + + ib >> mask; + ib >> scales; + + _attrs.get()->set_rnn_weights_qparams(mask, scales); + } + { + int mask; + std::vector scales; + + ib >> mask; + ib >> scales; + + _attrs.get()->set_rnn_weights_projection_qparams(mask, scales); + } + + _engine = &ib.get_engine(); + } + } + private: std::string get_cache_directory() const { - auto path = _engine.configuration().kernels_cache_path; + auto path = _engine->configuration().kernels_cache_path; if (path.empty()) { return {}; } diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index ba88004c3b7cc6..e6289b18bbb089 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -480,7 +480,7 @@ void Graph::Export(cldnn::BinaryOutputBuffer &ob) { std::shared_ptr Graph::GetExecGraphInfo() { auto primitives_info = GetNetwork()->get_primitives_info(); - return GetExecGraphInfoByPrimitivesInfo(primitives_info, false); + return GetExecGraphInfoByPrimitivesInfo(primitives_info, true); } From 2aba64a20e014ce78635955e9ddbc1f308e7a113 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Tue, 8 Nov 2022 18:36:43 +0900 Subject: [PATCH 17/26] code refactoring based on code review --- .../src/graph/kernel_selector_helper.cpp | 87 ++++++++++++++++++- .../intel_gpu/src/graph/primitive_inst.cpp | 16 +--- src/plugins/intel_gpu/src/graph/program.cpp | 4 +- .../intel_gpu/src/plugin/compiled_model.cpp | 9 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 4 - 5 files changed, 94 insertions(+), 26 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp index 0c22e28e37457f..216f30848231b3 100644 --- a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp @@ -1048,7 +1048,46 @@ void kernel_impl_params::save(BinaryOutputBuffer& ob) const { ob << unique_id; ob << input_layouts; ob << output_layout; - ob << primary_input_idx; + ob << input_offsets.size(); + for (size_t i = 0; i < input_offsets.size(); i++) { + ob << input_offsets[i].sizes(); + } + + if (weights_layout.has_value()) { + ob << true; + ob << weights_layout.value(); + } else { + ob << false; + } + + if (bias_layout.has_value()) { + ob << true; + ob << bias_layout.value(); + } else { + ob << false; + } + + if (weights_zero_points_layout.has_value()) { + ob << true; + ob << weights_zero_points_layout.value(); + } else { + ob << false; + } + + if (activations_zero_points_layout.has_value()) { + ob << true; + ob << activations_zero_points_layout.value(); + } else { + ob << false; + } + + if (compensation_layout.has_value()) { + ob << true; + ob << compensation_layout.value(); + } else { + ob << false; + } + ob << fused_desc.size(); #ifdef ENABLE_ONEDNN_FOR_GPU size_t num_fused_prims = fused_desc_onednn.size(); @@ -1057,6 +1096,7 @@ void kernel_impl_params::save(BinaryOutputBuffer& ob) const { ob << make_data(&fused_prim, sizeof(fused_primitive_desc_onednn)); } #endif // ENABLE_ONEDNN_FOR_GPU + ob << primary_input_idx; } void kernel_impl_params::load(BinaryInputBuffer& ib) { @@ -1064,7 +1104,49 @@ void kernel_impl_params::load(BinaryInputBuffer& ib) { ib >> unique_id; ib >> input_layouts; ib >> output_layout; - ib >> primary_input_idx; + { + size_t num_input_offsets; + ib >> num_input_offsets; + input_offsets.resize(num_input_offsets); + for (size_t i = 0; i < num_input_offsets; i++) { + std::vector sizes; + ib >> sizes; + input_offsets[i] = cldnn::tensor(sizes); + } + } + bool has_value = false; + layout layout_buf; + + ib >> has_value; + if (has_value) { + ib >> layout_buf; + weights_layout = layout_buf; + } + + ib >> has_value; + if (has_value) { + ib >> layout_buf; + bias_layout = layout_buf; + } + + ib >> has_value; + if (has_value) { + ib >> layout_buf; + weights_zero_points_layout = layout_buf; + } + + ib >> has_value; + if (has_value) { + ib >> layout_buf; + activations_zero_points_layout = layout_buf; + } + + ib >> has_value; + if (has_value) { + ib >> layout_buf; + compensation_layout = layout_buf; + } + { // Fake fused_desc just for has_fused_primitives() size_t num_fused_desc; @@ -1081,4 +1163,5 @@ void kernel_impl_params::load(BinaryInputBuffer& ib) { ib >> make_data(&fused_desc_onednn[idx], sizeof(fused_primitive_desc_onednn)); } #endif // ENABLE_ONEDNN_FOR_GPU + ib >> primary_input_idx; } diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 8e1487993227eb..ff01247c5b8ee1 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -443,11 +443,8 @@ void primitive_inst::rebuild_deps( _deps.resize(_dep_ids.size()); for (size_t i = 0; i < _dep_ids.size(); i++) { - if (primitives.count(_dep_ids[i]) > 0) { - _deps[i] = primitives.at(_dep_ids[i]); - } else { - std::cout << _dep_ids[i] << " is not found in _primitives" << std::endl; - } + OPENVINO_ASSERT((primitives.count(_dep_ids[i]) > 0), _dep_ids[i], "is not found in _primitives"); + _deps[i] = primitives.at(_dep_ids[i]); } } @@ -464,9 +461,7 @@ void primitive_inst::rebuild_exec_deps( break; } } - if (found == false) { - std::cout << "not found in _exec_order" << std::endl; - } + OPENVINO_ASSERT(found, _exec_dep_ids[i], "not found in _exec_order"); } } @@ -1088,7 +1083,7 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) { allocation_type _allocation_type; ib >> make_data(&_allocation_type, sizeof(_allocation_type)); - size_t data_size; // = _output->size(); + size_t data_size; ib >> cldnn::make_data(&data_size, sizeof(size_t)); _outputs[0] = get_network().get_memory_pool().get_memory(output_layout, _allocation_type, false); @@ -1101,7 +1096,6 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) { delete[] _buf; } } else if (_object_type == object_type::EXECUTABLE_INST) { - // primitive_impl _impl_params.release(); _impl_params = make_unique(); _impl_params->load(ib); @@ -1128,7 +1122,6 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) { ib >> _can_share_buffer; ib >> _is_constant; - // output memory layout output_layout = layout(cldnn::data_types::bin, cldnn::format::any, cldnn::tensor()); ib >> output_layout; @@ -1165,5 +1158,4 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) { _output_changed = false; } } - } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 881a8dc12acccf..ef43eeb12d0b7b 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -762,7 +762,9 @@ void program::cleanup() { } } } - // _kernels_cache->reset(); + + if (_engine.configuration().kernels_cache_path.empty()) + _kernels_cache->reset(); } void program::add_split_outputs() { diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index c4ec038d6acee7..422f8533a3a0ed 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -53,9 +53,7 @@ CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_p m_waitExecutor(executorManager()->getIdleCPUStreamsExecutor({ "GPUWaitExecutor" })) { auto casted_context = std::dynamic_pointer_cast(context); - if (nullptr == casted_context) { - IE_THROW() << "Invalid remote context"; - } + OPENVINO_ASSERT((casted_context != nullptr), "Invalid remote context"); m_context = casted_context; @@ -107,9 +105,7 @@ CompiledModel::CompiledModel(std::istream& networkModel, std::shared_ptrgetIdleCPUStreamsExecutor({ "GPUWaitExecutor" })) { auto casted_context = std::dynamic_pointer_cast(context); - if (nullptr == casted_context) { - IE_THROW() << "Invalid remote context"; - } + OPENVINO_ASSERT((casted_context != nullptr), "Invalid remote context"); m_context = casted_context; @@ -195,7 +191,6 @@ CompiledModel::CompiledModel(std::istream& networkModel, std::shared_ptrset_friendly_name(param_name); new_param->set_element_type(param_element_type); new_param->set_layout(param_layout); - // hoho->output(0).get_rt_info() = param_rt_info; new_param->output(0).get_tensor().set_names(param_names); new_param->validate_and_infer_types(); new_params.emplace_back(new_param); diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index a8893b93eced31..05fd568f6494e3 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -163,7 +163,6 @@ Plugin::Plugin() : m_defaultContexts({}) { CustomLayer::LoadFromFile(config_path, config.second.customLayers, true); } - isModelCachingEnabled = false; if (const char* env_p = std::getenv("OV_GPU_MODEL_CACHING")) { if (env_p[0] == '1') { isModelCachingEnabled = true; @@ -279,9 +278,6 @@ IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateContext"); std::lock_guard lock(engine_mutex); if (!canReuseDefaultContext()) { - // if (m_defaultContexts.find(conf.device_id) != m_defaultContexts.end()) { - // statistics_map.erase(m_defaultContexts[conf.device_id]); - // } m_defaultContexts[conf.device_id] = std::make_shared(shared_from_this(), AnyMap(), conf); } else { m_defaultContexts[conf.device_id]->GetConfig().kernels_cache_dir = conf.kernels_cache_dir; From 6b59b3690bed6b4dc72402c94e2fec801f86fee7 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Wed, 9 Nov 2022 17:25:02 +0900 Subject: [PATCH 18/26] fixed to use get_node_output_layout() --- src/plugins/intel_gpu/src/graph/input_layout.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/input_layout.cpp b/src/plugins/intel_gpu/src/graph/input_layout.cpp index a755695871b3da..1fa652dc073cb2 100644 --- a/src/plugins/intel_gpu/src/graph/input_layout.cpp +++ b/src/plugins/intel_gpu/src/graph/input_layout.cpp @@ -41,7 +41,9 @@ input_layout_inst::typed_primitive_inst(network& network, input_layout_node cons } void input_layout_inst::set_data(memory::ptr mem) { - check_memory_to_set(*mem, _impl_params->output_layout); + auto ol = get_node_output_layout(); + + check_memory_to_set(*mem, ol); if (mem->is_allocated_by(get_network().get_engine())) { OPENVINO_ASSERT(!_outputs.empty(), "[GPU] Can't set data for empty input memory"); From 48d406dacbade5154e3057cc080511fe47dfdc3a Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Wed, 9 Nov 2022 17:26:54 +0900 Subject: [PATCH 19/26] OV_GPU_MODEL_CACHING is changed to OV_GPU_CACHE_MODEL --- .../intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h | 2 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 2 +- src/plugins/intel_gpu/src/runtime/kernels_cache.cpp | 2 +- .../plugin/shared/src/behavior/ov_plugin/caching_tests.cpp | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 15f5f3c112ce13..2207245d0e3e94 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -384,7 +384,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { void build_primitive() { auto cache_outpath = get_cache_directory(); - if (const char* env_p = std::getenv("OV_GPU_MODEL_CACHING")) { + if (const char* env_p = std::getenv("OV_GPU_CACHE_MODEL")) { if (env_p[0] == '1') { cache_outpath = ""; } diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 05fd568f6494e3..9902ffe280bdbf 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -163,7 +163,7 @@ Plugin::Plugin() : m_defaultContexts({}) { CustomLayer::LoadFromFile(config_path, config.second.customLayers, true); } - if (const char* env_p = std::getenv("OV_GPU_MODEL_CACHING")) { + if (const char* env_p = std::getenv("OV_GPU_CACHE_MODEL")) { if (env_p[0] == '1') { isModelCachingEnabled = true; } diff --git a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp index 59cf36bde71bee..4b05d74318c676 100644 --- a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp @@ -70,7 +70,7 @@ std::string kernels_cache::get_cache_path() const { } bool kernels_cache::is_cache_enabled() const { - if (const char* env_p = std::getenv("OV_GPU_MODEL_CACHING")) { + if (const char* env_p = std::getenv("OV_GPU_CACHE_MODEL")) { if (env_p[0] == '1') { return false; } diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp index 40901540a97d55..4b8869369ef47b 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp @@ -183,7 +183,7 @@ void CompileModelCacheTestBase::run() { } if ((targetDevice.find("GPU") != std::string::npos)) { #if !defined(_WIN32) && !defined(_WIN64) - setenv("OV_GPU_MODEL_CACHING", "1", 1); + setenv("OV_GPU_CACHE_MODEL", "1", 1); #endif } if ((targetDevice.find("AUTO") == std::string::npos) && !importExportSupported(*core)) { @@ -224,7 +224,7 @@ void CompileModelCacheTestBase::run() { } if ((targetDevice.find("GPU") != std::string::npos)) { #if !defined(_WIN32) && !defined(_WIN64) - setenv("OV_GPU_MODEL_CACHING", "", 1); + setenv("OV_GPU_CACHE_MODEL", "", 1); #endif } } From 2155a98661c90a78805e4ed0a730b7badd110b32 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Wed, 9 Nov 2022 22:16:37 +0900 Subject: [PATCH 20/26] reference to node and primitive are removed --- .../intel_gpu/src/graph/include/fully_connected_inst.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/include/fully_connected_inst.h b/src/plugins/intel_gpu/src/graph/include/fully_connected_inst.h index b9e010fc39c689..652b45ae7ba521 100644 --- a/src/plugins/intel_gpu/src/graph/include/fully_connected_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/fully_connected_inst.h @@ -54,11 +54,11 @@ class typed_primitive_inst : public typed_primitive_inst_baseis_dynamic() && _impl_params->reordered_weights != nullptr ? _impl_params->reordered_weights : dep_memory_ptr(1); + return is_dynamic() && _impl_params->reordered_weights != nullptr ? _impl_params->reordered_weights : dep_memory_ptr(1); } memory::ptr bias_memory() const { return dep_memory_ptr(2); } - bool bias_term() const { return !argument->bias.empty(); } + bool bias_term() const { return _impl_params->bias_layout.has_value(); } }; using fully_connected_inst = typed_primitive_inst; From 4e12dfff1a41539782e33a4b6731973ba551bf57 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Thu, 10 Nov 2022 13:49:41 +0900 Subject: [PATCH 21/26] fixed hash of enum class --- .../src/graph/include/serialization/bind.hpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/bind.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/bind.hpp index 258274b606e409..9002eaf209373a 100644 --- a/src/plugins/intel_gpu/src/graph/include/serialization/bind.hpp +++ b/src/plugins/intel_gpu/src/graph/include/serialization/bind.hpp @@ -25,12 +25,19 @@ const instance_creator& bind_creator::creator = \ static_instance>::get_instance().instantiate(); +// It's a defect, and was fixed in C++14 +// https://www.open-std.org/jtc1/sc22/wg21/docs/lwg-defects.html#2148 +struct enum_class_hash { + template + std::size_t operator()(T t) const { return static_cast(t); } +}; + namespace cldnn { template struct saver_storage { using save_function = std::function; - using value_type = typename std::unordered_map::value_type; + using value_type = typename std::unordered_map::value_type; static saver_storage& instance() { static saver_storage instance; @@ -50,7 +57,7 @@ struct saver_storage { saver_storage(const saver_storage&) = delete; void operator=(const saver_storage&) = delete; - std::unordered_map map; + std::unordered_map map; }; template @@ -60,7 +67,7 @@ struct void_deleter { template struct loader_storage { - using value_type = typename std::unordered_map::value_type; + using value_type = typename std::unordered_map::value_type; static loader_storage& instance() { static loader_storage instance; @@ -80,7 +87,7 @@ struct loader_storage { loader_storage(const loader_storage&) = delete; void operator=(const loader_storage&) = delete; - std::unordered_map map; + std::unordered_map map; }; template From 41b4227261321d84aceeecaf45c53debb935d882 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Thu, 10 Nov 2022 16:23:27 +0900 Subject: [PATCH 22/26] restored CanShareContextWith --- src/plugins/intel_gpu/src/plugin/device_config.cpp | 1 + src/plugins/intel_gpu/src/plugin/plugin.cpp | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/device_config.cpp b/src/plugins/intel_gpu/src/plugin/device_config.cpp index 720918902aa3cf..550b80e6553be1 100644 --- a/src/plugins/intel_gpu/src/plugin/device_config.cpp +++ b/src/plugins/intel_gpu/src/plugin/device_config.cpp @@ -566,6 +566,7 @@ bool Config::CanShareContextWith(const Config& other) const { this->sources_dumps_dir == other.sources_dumps_dir && this->tuningConfig.mode == other.tuningConfig.mode && this->tuningConfig.cache_file_path == other.tuningConfig.cache_file_path && + this->kernels_cache_dir == other.kernels_cache_dir && this->device_id == other.device_id && this->task_exec_config._streams == other.task_exec_config._streams && this->task_exec_config._threadPreferredCoreType == other.task_exec_config._threadPreferredCoreType && diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 9902ffe280bdbf..54b39d0a3fca49 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -267,6 +267,8 @@ IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine auto config = ConvertPerfHintsToConfig(orig_config, conf); UpdateConfig(conf, network, config); + RemoteCLContext::Ptr context; + auto canReuseDefaultContext = [&]() -> bool { if (m_defaultContexts.find(conf.device_id) == m_defaultContexts.end()) return false; @@ -277,17 +279,15 @@ IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateContext"); std::lock_guard lock(engine_mutex); - if (!canReuseDefaultContext()) { + if (!canReuseDefaultContext()) m_defaultContexts[conf.device_id] = std::make_shared(shared_from_this(), AnyMap(), conf); - } else { - m_defaultContexts[conf.device_id]->GetConfig().kernels_cache_dir = conf.kernels_cache_dir; - } } + context = m_defaultContexts[conf.device_id]; + auto transformedNetwork = CloneAndTransformNetwork(network, conf); { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateExeNetwork"); - RemoteCLContext::Ptr context = m_defaultContexts[conf.device_id]; CompiledModel::Ptr exeNetwork = std::make_shared(transformedNetwork, context, conf); UpdateStatistics(context); return exeNetwork; From cb1a1cbeaf4a619b2324e9fab1e690ed9c85454e Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Thu, 10 Nov 2022 22:35:51 +0900 Subject: [PATCH 23/26] serialization of intermediate memory --- .../include/intel_gpu/runtime/kernel_args.hpp | 4 --- .../intel_gpu/src/graph/deconvolution.cpp | 18 ++++++++++++- .../src/graph/impls/ocl/primitive_base.hpp | 10 +++---- .../src/graph/include/deconvolution_inst.h | 27 ++++++++++--------- .../intel_gpu/src/graph/primitive_inst.cpp | 25 ++++++++++++----- 5 files changed, 52 insertions(+), 32 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp index 060fe254675d4a..3a1d25067142fd 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp @@ -116,8 +116,6 @@ struct kernel_arguments_data { struct kernel_arguments_data_idx { std::vector inputs; - std::vector intermediates; - // std::vector outputs; int32_t weights; int32_t recurrent; int32_t hidden; @@ -137,7 +135,6 @@ struct kernel_arguments_data_idx { template void save(BufferType& ob) const { ob << inputs; - ob << intermediates; ob << weights; ob << recurrent; ob << hidden; @@ -156,7 +153,6 @@ struct kernel_arguments_data_idx { template void load(BufferType& ib) { ib >> inputs; - ib >> intermediates; ib >> weights; ib >> recurrent; ib >> hidden; diff --git a/src/plugins/intel_gpu/src/graph/deconvolution.cpp b/src/plugins/intel_gpu/src/graph/deconvolution.cpp index 0b0c8950c904b8..dac38d87a7577a 100644 --- a/src/plugins/intel_gpu/src/graph/deconvolution.cpp +++ b/src/plugins/intel_gpu/src/graph/deconvolution.cpp @@ -141,7 +141,9 @@ std::string deconvolution_inst::to_string(deconvolution_node const& node) { } deconvolution_inst::typed_primitive_inst(network& network, deconvolution_node const& node) - : parent(network, node) { + : parent(network, node), + _groups(node.get_groups()), + _split(node.get_split()) { auto stride = argument->stride; auto pad = argument->pad; @@ -220,4 +222,18 @@ deconvolution_inst::typed_primitive_inst(network& network, deconvolution_node co "Weights/ifm mismatch"); } } + +void deconvolution_inst::save(cldnn::BinaryOutputBuffer& ob) const { + parent::save(ob); + + ob << _groups; + ob << _split; +} + +void deconvolution_inst::load(cldnn::BinaryInputBuffer& ib) { + parent::load(ib); + + ib >> _groups; + ib >> _split; +} } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp index f7de871985e819..025baa085d80d3 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp @@ -114,10 +114,6 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { args.inputs.push_back(instance.dep_memory_ptr(_kernel_args.inputs[i])); } - for (uint32_t i = 0; i < _kernel_args.intermediates.size(); i++) { - args.intermediates.push_back(instance.dep_memory_ptr(_kernel_args.intermediates[i])); - } - args.weights = (_kernel_args.weights >= 0) ? instance.dep_memory_ptr(_kernel_args.weights) : args.weights; args.recurrent = (_kernel_args.recurrent >= 0) ? instance.dep_memory_ptr(_kernel_args.recurrent) : args.recurrent; args.hidden = (_kernel_args.hidden >= 0) ? instance.dep_memory_ptr(_kernel_args.hidden) : args.hidden; @@ -204,10 +200,10 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { args = get_arguments_by_idx(instance, i); } else { args = get_arguments(instance, i); + } - for (const auto& m : instance.get_intermediates_memories()) { - args.intermediates.push_back(m); - } + for (const auto& m : instance.get_intermediates_memories()) { + args.intermediates.push_back(m); } args.scalars = &_kernel_data.kernels[k].params.scalars; diff --git a/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h b/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h index 1eee63df6ab551..c6f1cfa00c6499 100644 --- a/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h @@ -104,10 +104,10 @@ class typed_primitive_inst : public typed_primitive_inst_baseis_dynamic() && _impl_params->reordered_weights != nullptr) { + if (is_dynamic() && _impl_params->reordered_weights != nullptr) { return _impl_params->reordered_weights; - } else if (node->get_groups() == 1) { - if (static_cast(index) >= node->get_split()) + } else if (_groups == 1) { + if (static_cast(index) >= _split) throw std::range_error("weights offset too big"); return dep_memory_ptr(1 + index); } else { // all weights are in one buffer @@ -116,23 +116,24 @@ class typed_primitive_inst : public typed_primitive_inst_baseget_groups() == 1) { - if (argument->bias.size() == 0 && static_cast(index) >= node->get_split()) + if (_groups == 1) { + if (!bias_term() && static_cast(index) >= _split) throw std::range_error("no bias data"); - if (static_cast(index) > node->get_split()) + if (static_cast(index) > _split) throw std::range_error("bias offset too big"); - return dep_memory_ptr(1 + node->get_split() + index); + return dep_memory_ptr(1 + _split + index); } else { // all bias are in one buffer return dep_memory_ptr(2); } } - bool bias_term() const { - if (argument->bias.size() != 0) - return true; - else - return false; - } + bool bias_term() const { return _impl_params->bias_layout.has_value(); } + void save(cldnn::BinaryOutputBuffer& ob) const override; + void load(cldnn::BinaryInputBuffer& ib) override; + +private: + uint32_t _groups; + int32_t _split; }; using deconvolution_inst = typed_primitive_inst; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index ff01247c5b8ee1..2e32cdb35ee047 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -1010,6 +1010,13 @@ void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const { for (const auto& dep : _exec_deps) { ob << dep->id(); } + + ob << _intermediates_memory.size(); + for (const auto& ibuf : _intermediates_memory) { + ob << ibuf->get_layout(); + const auto _allocation_type = ibuf->get_allocation_type(); + ob << make_data(&_allocation_type, sizeof(_allocation_type)); + } } } @@ -1021,13 +1028,6 @@ void primitive_inst::convert_args(const kernel_arguments_data& args, kernel_argu } } - if (args.intermediates.size() > 0) { - args_idx.intermediates.resize(args.intermediates.size()); - for (uint32_t idx = 0; idx < args.intermediates.size(); ++idx) { - args_idx.intermediates[idx] = get_index_in_deps(args.intermediates[idx]); - } - } - args_idx.weights = (args.weights == nullptr) ? -1 : get_index_in_deps(args.weights); args_idx.recurrent = (args.recurrent == nullptr) ? -1 : get_index_in_deps(args.recurrent); args_idx.hidden = (args.hidden == nullptr) ? -1 : get_index_in_deps(args.hidden); @@ -1156,6 +1156,17 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) { } } _output_changed = false; + + ib >> vector_size; + _intermediates_memory.resize(vector_size); + for (size_t i = 0; i < vector_size; i++) { + layout ibuf_layout = layout(cldnn::data_types::bin, cldnn::format::any, cldnn::tensor()); + ib >> ibuf_layout; + allocation_type _allocation_type; + ib >> make_data(&_allocation_type, sizeof(_allocation_type)); + + _intermediates_memory[i] = get_network().get_engine().allocate_memory(ibuf_layout, _allocation_type); + } } } } // namespace cldnn From 53ea9befb984c1fccc5b387552df9867a83ac7c0 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Fri, 11 Nov 2022 22:21:55 +0900 Subject: [PATCH 24/26] fix to rebase --- .../intel_gpu/src/graph/impls/ocl/multiclass_nms.cpp | 6 +++++- .../intel_gpu/src/graph/include/multiclass_nms_inst.h | 1 + .../src/graph/include/serialization/object_types.hpp | 1 + .../plugin/gpu/shared_tests_instances/skip_tests_config.cpp | 1 + 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/multiclass_nms.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/multiclass_nms.cpp index b986e6056f94f3..d8313c269b0174 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/multiclass_nms.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/multiclass_nms.cpp @@ -51,12 +51,14 @@ struct multiclass_nms_impl : public typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION + std::unique_ptr clone() const override { return make_unique(*this); } protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t unused) const override { + kernel_arguments_data get_arguments(const typed_primitive_inst& instance, int32_t unused) const override { kernel_arguments_data args = parent::get_arguments(instance, unused); args.inputs.push_back(instance.output_indices_memory()); args.inputs.push_back(instance.output_num_memory()); @@ -122,3 +124,5 @@ attach_multiclass_nms_impl::attach_multiclass_nms_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::multiclass_nms_impl, cldnn::object_type::MULTICLASS_NMS_IMPL) \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/graph/include/multiclass_nms_inst.h b/src/plugins/intel_gpu/src/graph/include/multiclass_nms_inst.h index d1d28e444bfd43..73a619abc8bd9d 100644 --- a/src/plugins/intel_gpu/src/graph/include/multiclass_nms_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/multiclass_nms_inst.h @@ -57,6 +57,7 @@ using multiclass_nms_node = typed_program_node; template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; + using parent::parent; public: static layout calc_output_layout(const multiclass_nms_node& node, const kernel_impl_params& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/object_types.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/object_types.hpp index 820aac71acc22c..3fcf15d8eaec26 100644 --- a/src/plugins/intel_gpu/src/graph/include/serialization/object_types.hpp +++ b/src/plugins/intel_gpu/src/graph/include/serialization/object_types.hpp @@ -58,6 +58,7 @@ enum class object_type { LSTM_ELT_IMPL, LSTM_GEMM_IMPL, MAX_UNPOOLING_IMPL, + MULTICLASS_NMS_IMPL, MUTABLE_DATA_IMPL, MVN_IMPL, NON_MAX_SUPPRESSION_IMPL_OCL, diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp index 768e6775a4357a..37b012394437e3 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp @@ -91,6 +91,7 @@ std::vector disabledTestPatterns() { // Issue: 76197 R"(.*registerPluginsXMLUnicodePath.*)", // Not supported yet + R"(.*CompileModelCacheTestBase.*ConvBias.*)", R"(.*CompileModelCacheTestBase.*KSOFunction.*)", R"(.*CompileModelCacheTestBase.*ReadConcatSplitAssign.*)", R"(.*LoadNetworkCacheTestBase.*)", From d002f5e02ce320dee19909bc58ab66ad93134b7b Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Sat, 12 Nov 2022 11:06:01 +0900 Subject: [PATCH 25/26] multiclass_nms serialization --- src/plugins/intel_gpu/src/graph/get_type_id.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/plugins/intel_gpu/src/graph/get_type_id.cpp b/src/plugins/intel_gpu/src/graph/get_type_id.cpp index 5e85e705f38ae1..7e68782abb59f2 100644 --- a/src/plugins/intel_gpu/src/graph/get_type_id.cpp +++ b/src/plugins/intel_gpu/src/graph/get_type_id.cpp @@ -55,6 +55,7 @@ #include "intel_gpu/primitives/lstm_dynamic_timeloop.hpp" #include "intel_gpu/primitives/lstm_dynamic.hpp" #include "intel_gpu/primitives/lstm.hpp" +#include "intel_gpu/primitives/multiclass_nms.hpp" #include "intel_gpu/primitives/mutable_data.hpp" #include "intel_gpu/primitives/mvn.hpp" #include "intel_gpu/primitives/non_max_suppression.hpp" @@ -153,6 +154,7 @@ cldnn::primitive_type_id cldnn::get_type_id(std::string type_str) { {"lstm_dynamic_timeloop", cldnn::lstm_dynamic_timeloop::type_id()}, {"lstm_elt", cldnn::lstm_elt::type_id()}, {"lstm_gemm", cldnn::lstm_gemm::type_id()}, + {"multiclass_nms", cldnn::multiclass_nms::type_id()}, {"mutable_data", cldnn::mutable_data::type_id()}, {"mvn", cldnn::mvn::type_id()}, {"non_max_suppression", cldnn::non_max_suppression::type_id()}, From efa84360be29066b9e976b0edb8770eb73d1dae7 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Sat, 12 Nov 2022 22:19:28 +0900 Subject: [PATCH 26/26] caching_properties added --- src/plugins/intel_gpu/src/plugin/plugin.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 54b39d0a3fca49..6ccaf319aa486c 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -579,6 +579,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map(max_batch_size)}; } else if (isModelCachingEnabled && name == METRIC_KEY(IMPORT_EXPORT_SUPPORT)) { IE_SET_METRIC_RETURN(IMPORT_EXPORT_SUPPORT, true); + } else if (name == ov::caching_properties) { + std::vector cachingProperties; + return decltype(ov::caching_properties)::value_type(cachingProperties); } else { IE_THROW() << "Unsupported metric key " << name; }