diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp index d211b610c0c3d7..fac3a6428734a9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp @@ -68,11 +68,11 @@ struct detection_output_impl : typed_primitive_impl { nms_type = (node.get_primitive()->decrease_label_id ? NMSType::MXNET : NMSType::CAFFE); } - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + void save(BinaryOutputBuffer& ob) const override { ob << make_data(&nms_type, sizeof(NMSType)); } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + void load(BinaryInputBuffer& ib) override { ib >> make_data(&nms_type, sizeof(NMSType)); } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp index 6b367a00b14d71..c3bb3bac00f000 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp @@ -48,13 +48,13 @@ struct activation_impl : typed_primitive_impl_ocl { return args; } - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << _is_parameterized; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> _is_parameterized; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp index 096aa56db787da..7d851c891643ad 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp @@ -75,13 +75,13 @@ struct binary_convolution_impl : typed_primitive_impl_ocl { int32_t get_split() const override { return _split; } public: - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << _split; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> _split; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp index 5286df10e71868..54a8caa445083d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp @@ -85,13 +85,13 @@ struct concatenation_impl : typed_primitive_impl_ocl { } public: - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << _can_be_optimized; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> _can_be_optimized; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp index e0110774718bbb..aa7a918aaa1028 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp @@ -80,15 +80,15 @@ struct convolution_impl : typed_primitive_impl_ocl { bool get_depthwise_sep_opt() const override { return _depthwise_sep_opt; } public: - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << _split; ob << _groups; ob << _depthwise_sep_opt; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> _split; ib >> _groups; ib >> _depthwise_sep_opt; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp index 4807a8ddf4634a..0d861a61fbcb6d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp @@ -44,13 +44,13 @@ struct crop_impl : typed_primitive_impl_ocl { } public: - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << _can_be_optimized; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> _can_be_optimized; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp index f384e70797c210..3421e8c5718b07 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp @@ -85,12 +85,12 @@ struct custom_gpu_primitive_impl : typed_primitive_impl { return {_kernel_id}; } - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + void save(BinaryOutputBuffer& ob) const override { ob << *cl_kernel; ob << _kernel_id; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + void load(BinaryInputBuffer& ib) override { cl_kernel = std::make_shared(); ib >> *cl_kernel; ib >> _kernel_id; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp index e214b33962a17f..7d939b66288e70 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp @@ -41,14 +41,14 @@ struct deconvolution_impl : typed_primitive_impl_ocl { _groups = node.get_groups(); } - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << _split; ob << _groups; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> _split; ib >> _groups; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp index 50fa6ca5f4d2c8..57255cc12c3468 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp @@ -42,14 +42,14 @@ struct deformable_conv_impl : typed_primitive_impl_ocl { _groups = node.get_groups(); } - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << _split; ob << _groups; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> _split; ib >> _groups; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp index 7283b3f56c5128..a25203538dd00e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp @@ -44,12 +44,12 @@ struct generic_layer_impl : typed_primitive_impl { _kernel_id = arg.get_program().add_kernel(arg.get_primitive()->generic_params.clKernel->code.kernelString); } - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + void save(BinaryOutputBuffer& ob) const override { ob <<_cl_kernel_data; ob << _kernel_id; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + void load(BinaryInputBuffer& ib) override { ib >> _cl_kernel_data; ib >> _kernel_id; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp index 05711264aa017d..ee4fd671733ab3 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp @@ -73,7 +73,7 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { bool is_cpu() const override { return false; } - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { + void save(BinaryOutputBuffer& ob) const override { ob << make_data(&_kernel_data.internalBufferDataType, sizeof(kernel_selector::Datatype)); ob << _kernel_data.internalBufferSizes; ob << _kernel_data.kernels; @@ -81,7 +81,7 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { ob << _kernel_args; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { + void load(BinaryInputBuffer& ib) override { ib >> make_data(&_kernel_data.internalBufferDataType, sizeof(kernel_selector::Datatype)); ib >> _kernel_data.internalBufferSizes; ib >> _kernel_data.kernels; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp index c5b24f5a7ae7de..0e6c6f8f24ba10 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp @@ -40,14 +40,14 @@ struct reorder_impl : typed_primitive_impl_ocl { _has_mean = node.has_mean(); } - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << _can_be_optimized; ob << _has_mean; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> _can_be_optimized; ib >> _has_mean; } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp index 3c6bbeac5d71f0..bf89db82a19fde 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp @@ -63,7 +63,7 @@ struct concatenation_onednn : typed_primitive_onednn_impl(ob.getKernlImplParams()); auto prim = impl_params->typed_desc(); ob << prim->axis; @@ -81,18 +82,19 @@ struct concatenation_onednn : typed_primitive_onednn_impl> has_prim; if (!has_prim) return; - parent::load(ib, impl_params); + parent::load(ib); int64_t prim_axis; ib >> prim_axis; + const kernel_impl_params* impl_params = reinterpret_cast(ib.getKernlImplParams()); auto desc = get_concatenation_descriptor(*impl_params, prim_axis, ib.get_engine()); _pd = *desc; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp index 27c1f2f94f8f01..f10dde97ceaa96 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp @@ -185,8 +185,8 @@ struct convolution_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_convolution_desc_t)); @@ -195,8 +195,8 @@ struct convolution_onednn : typed_primitive_onednn_impl(); ib >> make_data(&_desc->data, sizeof(dnnl_convolution_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp index cf40323f8d1528..8be3740fbdff54 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp @@ -104,8 +104,8 @@ struct deconvolution_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_deconvolution_desc_t)); @@ -114,8 +114,8 @@ struct deconvolution_onednn : typed_primitive_onednn_impl(); ib >> make_data(&_desc->data, sizeof(dnnl_deconvolution_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp index 99b5aea13bc91a..af875b5f6b38b9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp @@ -166,8 +166,8 @@ struct fully_connected_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_inner_product_desc_t)); @@ -176,8 +176,8 @@ struct fully_connected_onednn : typed_primitive_onednn_impl(); ib >> make_data(&_desc->data, sizeof(dnnl_inner_product_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp index 20f8ced65186a1..126c0c59b9c893 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp @@ -209,8 +209,8 @@ struct gemm_onednn : typed_primitive_onednn_impl { } public: - void save(BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const override { - parent::save(ob, impl_params); + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << make_data(&_desc->data, sizeof(dnnl_matmul_desc_t)); @@ -219,8 +219,8 @@ struct gemm_onednn : typed_primitive_onednn_impl { ob << prim_cache; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); _desc = std::make_shared(); ib >> make_data(&_desc->data, sizeof(dnnl_matmul_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp index 20eae95f89c00b..6707da450898d0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp @@ -64,8 +64,8 @@ struct pooling_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_pooling_desc_t)); @@ -74,8 +74,8 @@ struct pooling_onednn : typed_primitive_onednn_impl(); ib >> make_data(&_desc->data, sizeof(dnnl_pooling_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp index e4a6834183882d..1f25295e4b3fac 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp @@ -93,8 +93,8 @@ struct reduction_onednn : typed_primitive_onednn_impldata, sizeof(dnnl_reduction_desc_t)); @@ -103,8 +103,8 @@ struct reduction_onednn : typed_primitive_onednn_impl(); ib >> make_data(&_desc->data, sizeof(dnnl_reduction_desc_t)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp index 1575e388088287..bfe776d9a64b46 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp @@ -63,17 +63,18 @@ struct reorder_onednn : typed_primitive_onednn_impl prim_cache; prim_cache = _prim.get_cache_blob(); ob << prim_cache; } - void load(BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) override { - parent::load(ib, impl_params); + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + const kernel_impl_params* impl_params = reinterpret_cast(ib.getKernlImplParams()); auto desc = get_reorder_descriptor(*impl_params, *_attrs, ib.get_engine()); _pd = *desc; diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h index 0e18ad256f029b..27d82b20911cf9 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h @@ -67,8 +67,8 @@ struct primitive_impl { virtual std::vector get_kernel_ids() { return {}; } - virtual void save(cldnn::BinaryOutputBuffer& ob, const kernel_impl_params* impl_params = nullptr) const {} - virtual void load(cldnn::BinaryInputBuffer& ib, const kernel_impl_params* impl_params = nullptr) {} + virtual void save(cldnn::BinaryOutputBuffer& ob) const {} + virtual void load(cldnn::BinaryInputBuffer& ib) {} // If this flag is set as false, the memory allocated for this primitive is not allowed to be reused bool can_reuse_memory = true; diff --git a/src/plugins/intel_gpu/src/graph/include/serialization/binary_buffer.hpp b/src/plugins/intel_gpu/src/graph/include/serialization/binary_buffer.hpp index 7b19fb3ca2b142..1d211cc6d5caf4 100644 --- a/src/plugins/intel_gpu/src/graph/include/serialization/binary_buffer.hpp +++ b/src/plugins/intel_gpu/src/graph/include/serialization/binary_buffer.hpp @@ -24,8 +24,12 @@ class BinaryOutputBuffer : public OutputBuffer { } } + void setKernlImplParams(void* impl_params) { _impl_params = impl_params; } + void* getKernlImplParams() const { return _impl_params; } + private: std::ostream& stream; + void* _impl_params; }; class BinaryInputBuffer : public InputBuffer { @@ -39,8 +43,12 @@ class BinaryInputBuffer : public InputBuffer { } } + void setKernlImplParams(void* impl_params) { _impl_params = impl_params; } + void* getKernlImplParams() const { return _impl_params; } + private: std::istream& stream; + void* _impl_params; }; template diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 0859f777ec29ca..1a892f45b597f1 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -975,7 +975,8 @@ void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const { _impl->set_arguments(args_idx); _impl_params->save(ob); - _impl->save(ob, _impl_params.get()); + ob.setKernlImplParams(_impl_params.get()); + ob << _impl; ob << _node_output_layout; ob << has_mutable_input(); @@ -1100,7 +1101,8 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) { _impl_params = make_unique(); _impl_params->load(ib); _impl.release(); - _impl->load(ib, _impl_params.get()); + ib.setKernlImplParams(_impl_params.get()); + ib >> _impl; ib >> _node_output_layout; ib >> _has_mutable_input;