diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp index 2317c550826cd1..84f559799343c2 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp @@ -68,6 +68,7 @@ class optional_data_type { storage_type storage; }; + /// Converts C++ type to @ref data_types . template struct type_to_data_type; @@ -429,6 +430,38 @@ struct layout { tensor size; }; +class optional_layout { +public: + optional_layout() {} + optional_layout(const layout& lay) { + this->opt_layout_ptr = make_unique(lay); + } + + optional_layout(const optional_layout& new_opt_lay) { + if (new_opt_lay) { + layout copied_lay = *new_opt_lay; + this->opt_layout_ptr = make_unique(copied_lay); + } + } + + operator bool() const { + return this->opt_layout_ptr != nullptr; + } + + layout operator*() const { + if (opt_layout_ptr == nullptr) + throw std::runtime_error("Attempt to access uninitialized optional layout!"); + return *this->opt_layout_ptr; + } + + std::unique_ptr& get_layout_ptr() { + return opt_layout_ptr; + } + +private: + std::unique_ptr opt_layout_ptr = nullptr; +}; + /// @} /// @} } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_onednn_optimization_attributes.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_onednn_optimization_attributes.cpp index 1dfc07822f135e..9c4b6ce82f61e7 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_onednn_optimization_attributes.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_onednn_optimization_attributes.cpp @@ -25,8 +25,7 @@ void add_onednn_optimization_attributes::run(program& p) { // Reshape fused ops tensors for OneDNN FC if needed if (fc_prim->input_size == 3) { for (auto& fused_prim : node->get_fused_primitives()) { - auto fused_node = fused_prim.node; - if (fused_node->is_type()) { + if (fused_prim.is_type()) { auto& dependency = node->get_dependency(fused_prim.dep_start_idx); auto original_layout = dependency.get_output_layout(); onednn::combine_bf_with_first_spatial_dim(original_layout); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/basic_memory_dependencies.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/basic_memory_dependencies.cpp index 526a5828b5195f..4d1024c227f70b 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/basic_memory_dependencies.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/basic_memory_dependencies.cpp @@ -41,7 +41,7 @@ void basic_memory_dependencies::run(program& p) { && (node->is_type() || node->is_type())) { size_t eltw_dep = 0; for (auto& fused_op : node->get_fused_primitives()) { - if (fused_op.node->is_type() && fused_op.deps.size() == 1) { + if (fused_op.is_type() && fused_op.deps.size() == 1) { // If it is first sum, reuse the buffer auto fusing_type = onednn_add_fusing_helpers::get_add_fusing_type(*node, fused_op); if (fusing_type != add_fusing_type::sum || eltw_dep != 0) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index f5389a48e2b150..6cf9ea167d67d7 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -674,10 +674,10 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { auto& fused_descs = input_data.get_fused_primitives(); auto origin_input_iter = std::find_if(fused_descs.begin(), fused_descs.end(), [&](cldnn::fused_primitive_desc& desc) { - return (desc.node->id() == prim_id.first); + return (desc.desc->id == prim_id.first); }); if (origin_input_iter != fused_descs.end()) { - auto users = get_users_from_fusing_history(origin_input_iter->node->id()); + auto users = get_users_from_fusing_history(origin_input_iter->desc->id); if (users.size() != 1) { return false; } @@ -1167,10 +1167,10 @@ void prepare_primitive_fusing::optimize_fused_ops(program& p) { auto remove_deps_of_node = [&](cldnn::fused_primitive_desc& desc) { for (auto& prim : fused_prims) { - if (desc.node->id() == prim.node->id()) { + if (desc.desc->id == prim.desc->id) { continue; } - auto rm_iter = prim.fused_deps.find(desc.node->id()); + auto rm_iter = prim.fused_deps.find(desc.desc->id); if (rm_iter != prim.fused_deps.end()) { prim.fused_deps.erase(rm_iter); prim.fused_deps.insert(desc.fused_deps.begin(), desc.fused_deps.end()); @@ -1187,16 +1187,13 @@ void prepare_primitive_fusing::optimize_fused_ops(program& p) { auto& fp = *curr_itr; auto& fp_next = *fp_itr; + if (fp.is_type() && fp_next.is_type()) { + const auto& act_prim = fp.typed_desc();; + const auto& quant_param = fp_next.get_typed_fuse_params(); - if (fp.node->is_type() && fp_next.node->is_type()) { - auto& activation_node = fp.node->as(); - auto& quantize_node = fp_next.node->as(); - bool can_skip = activation_node.get_primitive()->activation_function == activation_func::relu && - activation_node.get_primitive()->additional_params.a == 0.0f && - fp.deps.empty() && - data_type_traits::is_i8_u8(quantize_node.get_output_layout().data_type) && - quantize_node.get_scale_shift_opt() && - !quantize_node.get_need_pre_shift(); + bool can_skip = fp.deps.empty() && data_type_traits::is_i8_u8(fp_next.output_layout.data_type); + can_skip &= ((act_prim->activation_function == activation_func::relu) && (act_prim->additional_params.a == 0.0f)); + can_skip &= (quant_param->scale_shift_opt && !quant_param->has_pre_shift); if (can_skip) { remove_deps_of_node(fp); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp index f77a9a1e08d0d3..7191cd2f88d7e2 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp @@ -485,14 +485,14 @@ void remove_redundant_reorders::run(program& p) { input.set_output_padding(node->get_output_layout().data_padding); // Add fused_primitive_desc of reorder to convolution which propagate original output layout to jitter - fused_primitive_desc local_desc; - local_desc.node = p.get_node_ptr(node->id()); + fused_primitive_desc local_desc(node->get_primitive()); + local_desc.input_layout = input.get_dependency(0).get_output_layout(); // original convolution's output layout + node->set_input_layout(local_desc.input_layout); + local_desc.f_param = node->get_fuse_params(); local_desc.dep_start_idx = input.get_fused_primitives().size(); local_desc.output_layout = output_layout; - local_desc.input_layout = input.get_dependency(0).get_output_layout(); // original convolution's output layout local_desc.activation = activation_func::none; input.add_fused_primitive(local_desc); - node->set_input_layout(local_desc.input_layout); // remove reorder node LOG_NODE_REMOVAL(node->id()); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index f7c91977e13088..d3c47ca6f67f45 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -695,7 +695,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf) // changes the input format of eltwise sum post-op to use binary add. if (conv_node.get_preferred_impl_type() == impl_types::onednn) { onednn_add_fusing_helpers::for_eltwise(conv_node, eltwise_mode::sum, - [&](const program_node& p_node, const eltwise_node& e_node, const fused_primitive_desc& desc) { + [&](const program_node& p_node, const fused_primitive_desc& desc) { auto fusing_type = onednn_add_fusing_helpers::get_add_fusing_type(p_node, desc); if (fusing_type == add_fusing_type::binary_per_tensor) { auto& dep_node = p_node.get_dependency(desc.dep_start_idx); diff --git a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp index cd30eefe655251..92cedf20e6e308 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp @@ -41,7 +41,7 @@ struct condition_impl : typed_primitive_impl { return ev; } - static primitive_impl* create(const condition_node& arg) { return new condition_impl(arg); } + static primitive_impl* create(const condition_node& arg, std::shared_ptr) { return new condition_impl(arg); } void init_kernels() override {} diff --git a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp index 0a8f05cacb3b9b..a9f6b36f4c081d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp @@ -164,7 +164,7 @@ struct loop_impl : typed_primitive_impl { return ev; } - static primitive_impl* create(const loop_node& arg) { return new loop_impl(arg); } + static primitive_impl* create(const loop_node& arg, std::shared_ptr) { return new loop_impl(arg); } }; namespace detail { diff --git a/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp b/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp index 7f77619f7f2444..6a85557bce5f1e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp @@ -32,13 +32,13 @@ class wait_for_events_impl : public primitive_impl { bool validate(const primitive_inst&) const override { return true; } - static primitive_impl* create_data(const data_node& data) { return new wait_for_events_impl(data); } + static primitive_impl* create_data(const data_node& data, std::shared_ptr) { return new wait_for_events_impl(data); } - static primitive_impl* create_input_layout(const input_layout_node& input) { + static primitive_impl* create_input_layout(const input_layout_node& input, std::shared_ptr) { return new wait_for_events_impl(input); } - static primitive_impl* create_prior_box(const prior_box_node& prior_box) { + static primitive_impl* create_prior_box(const prior_box_node& prior_box, std::shared_ptr) { // This primitive is being executed on CPU during network compilation. return new wait_for_events_impl(prior_box); } diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp index 031ff30cf725ac..3d18e359285a91 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp @@ -38,7 +38,9 @@ struct assign_impl : public typed_primitive_impl { void init_kernels() override {} public: - static primitive_impl* create(assign_node const& arg) { return new assign_impl{}; } + static primitive_impl* create(const assign_node& arg, std::shared_ptr impl_param) { + return new assign_impl{}; + } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp index 377ef6368faed3..c0fd99697e5907 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp @@ -833,7 +833,7 @@ struct detection_output_impl : typed_primitive_impl { void init_kernels() override {} - static primitive_impl* create(const detection_output_node& arg) { return new detection_output_impl(arg); } + static primitive_impl* create(const detection_output_node& arg, std::shared_ptr) { return new detection_output_impl(arg); } }; namespace detail { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp index 6b41ef35f68e5b..6aeecee9016eb5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp @@ -401,7 +401,7 @@ struct non_max_suppression_impl : typed_primitive_impl { return ev; } - static primitive_impl* create(const non_max_suppression_node&) { + static primitive_impl* create(const non_max_suppression_node&, std::shared_ptr) { return new non_max_suppression_impl(); } void init_kernels() override {} diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp index 984a8577cab75a..0d81194cd7e9ae 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp @@ -427,8 +427,8 @@ struct proposal_impl : typed_primitive_impl { void init_kernels() override {} - static primitive_impl* create(const proposal_node& arg) { - const layout& l = arg.image_info().get_output_layout(); + static primitive_impl* create(const proposal_node& arg, std::shared_ptr impl_param) { + const layout& l = impl_param->input_layouts[2]; const size_t count = l.feature() == 1 ? static_cast(l.batch()) : static_cast(l.feature()); // Supported image_info sizes and components meaning: diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp index 6fa28707ed3867..a213906dc970a0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp @@ -39,7 +39,9 @@ struct read_value_impl : public typed_primitive_impl { void init_kernels() override {} public: - static primitive_impl* create(read_value_node const& arg) { return new read_value_impl{}; } + static primitive_impl* create(const read_value_node& arg, std::shared_ptr impl_param) { + return new read_value_impl{}; + } }; namespace detail { diff --git a/src/plugins/intel_gpu/src/graph/impls/implementation_map.hpp b/src/plugins/intel_gpu/src/graph/impls/implementation_map.hpp index ed008ef7a31400..5e1b135f824657 100644 --- a/src/plugins/intel_gpu/src/graph/impls/implementation_map.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/implementation_map.hpp @@ -11,6 +11,8 @@ #include #include #include "to_string_utils.h" +#include "kernel_selector_helper.h" +#include "activation_inst.h" namespace cldnn { @@ -145,42 +147,39 @@ class implementation_map { public: using key_builder = implementation_key; using key_type = typename key_builder::type; - using factory_type = std::function&)>; + using factory_type = std::function&, std::shared_ptr)>; using map_type = singleton_map, factory_type>>; - static factory_type get(const typed_program_node& primitive) { - impl_types target_impl_type = primitive.get_preferred_impl_type(); - // lookup in database; throw if not found - auto key = key_builder()(primitive); + static factory_type get(std::shared_ptr impl_param, impl_types preferred_impl_type) { + auto key = key_builder()(impl_param->input_layouts[0]); for (auto& kv : map_type::instance()) { impl_types impl_type = kv.first; - if ((target_impl_type & impl_type) != impl_type) + if ((preferred_impl_type & impl_type) != impl_type) continue; - std::set& keys_set = kv.second.first; auto& factory = kv.second.second; - if (keys_set.empty() || keys_set.find(key) != keys_set.end()) { + if (keys_set.empty() || keys_set.find(key) != keys_set.end()) { return factory; } } std::stringstream target_impl_type_ss; - target_impl_type_ss << target_impl_type; + target_impl_type_ss << preferred_impl_type; throw std::runtime_error(std::string("implementation_map for ") + typeid(primitive_kind).name() + " could not find any implementation to match key: " + - get_key_name(key) + ", impl_type: " + target_impl_type_ss.str() + ", node_id: " + primitive.id()); + get_key_name(key) + ", impl_type: " + target_impl_type_ss.str() + ", node_id: " + impl_param->desc->id); } // check if for a given engine and type there exist an implementation - static bool check(const typed_program_node& primitive) { + static bool check(const typed_program_node& primitive, std::shared_ptr impl_params) { impl_types target_impl_type = primitive.get_preferred_impl_type(); - auto key = key_builder()(primitive); + auto key = key_builder()(impl_params->input_layouts[0]); return check_key(target_impl_type, key); } // check if there exists a kernel implementation of a primitive with output set it primitive's output layout - static bool check_io_eq(const typed_program_node& primitive) { + static bool check_io_eq(const typed_program_node& primitive, std::shared_ptr impl_params) { impl_types target_impl_type = primitive.get_preferred_impl_type(); - auto key = key_builder()(primitive.get_output_layout()); + auto key = key_builder()(impl_params->output_layout); return check_key(target_impl_type, key); } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp index d7fb9da8b76c11..4508a771e35cfe 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp @@ -30,17 +30,17 @@ struct activation_impl : typed_primitive_impl_ocl { return args; } - - static primitive_impl* create(const activation_node& arg) { - auto activation_params = get_default_params(arg); + static primitive_impl* create(const activation_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + auto activation_params = get_default_params(*impl_param); auto activation_optional_params = get_default_optional_params(arg.get_program()); - convert_new_activation_func(arg.get_primitive(), activation_params.activations); + convert_new_activation_func(prim, activation_params.activations); if (arg.is_parameterized()) { - const auto& slope_layout = arg.slope_input().get_output_layout(); - const auto& output_layout = arg.get_output_layout(); + const auto& slope_layout = impl_param->input_layouts[1]; + const auto& output_layout = impl_param->output_layout; const auto params_num = kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activations[0].function); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/adaptive_pooling.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/adaptive_pooling.cpp index 84a9fde6f0fd50..35e06fb78c35fd 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/adaptive_pooling.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/adaptive_pooling.cpp @@ -35,8 +35,8 @@ struct adaptive_pooling_impl : public typed_primitive_impl_ocl } public: - static primitive_impl* create(const adaptive_pooling_node& arg) { - auto params = get_default_params(arg); + static primitive_impl* create(const adaptive_pooling_node& arg, std::shared_ptr impl_param) { + auto params = get_default_params(*impl_param); auto optional_params = get_default_optional_params(arg.get_program()); const auto& primitive = arg.get_primitive(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp index 1df36cf25c47be..641affe72ed2ec 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp @@ -34,9 +34,8 @@ struct arg_max_min_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const arg_max_min_node& arg) { + static primitive_impl* create(const arg_max_min_node& arg, std::shared_ptr impl_param) { const auto& primitive = arg.get_primitive(); - const auto& axis = primitive->axis; const auto& top_k = primitive->top_k; const auto& out_type = primitive->output_type; @@ -45,7 +44,7 @@ struct arg_max_min_impl : typed_primitive_impl_ocl { const auto& values_first = primitive->values_first; const auto& outputs_num = primitive->input.size() == 3 ? 2 : 1; // second output passed as input for TOP_K layer - auto argm_params = get_default_params(arg); + auto argm_params = get_default_params(*impl_param); auto argm_optional_params = get_default_optional_params(arg.get_program()); @@ -84,7 +83,7 @@ struct arg_max_min_impl : typed_primitive_impl_ocl { argm_params.argMaxMinSortType = kernel_selector::argm_sort::INDEX; if (outputs_num == 2) { - argm_params.inputs.push_back(convert_data_tensor(arg.get_dependency(2).get_output_layout())); + argm_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[2])); } argm_params.values_first = values_first; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/average_unpooling.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/average_unpooling.cpp index 09d1beddb1d822..4d9787057f85e8 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/average_unpooling.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/average_unpooling.cpp @@ -28,13 +28,13 @@ struct average_unpooling_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const average_unpooling_node& arg) { - auto average_unpooling_params = get_default_params(arg); + static primitive_impl* create(const average_unpooling_node& arg, std::shared_ptr impl_param) { + auto primitive = arg.get_primitive(); + auto average_unpooling_params = get_default_params(*impl_param); auto average_unpooling_optional_params = get_default_optional_params(arg.get_program()); auto& params = average_unpooling_params; - auto primitive = arg.get_primitive(); auto stride = primitive->stride; params.unpoolSize = { diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/batch_to_space.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/batch_to_space.cpp index 52dba85e6a7ef7..3b086d0593bba2 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/batch_to_space.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/batch_to_space.cpp @@ -25,13 +25,12 @@ struct batch_to_space_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const batch_to_space_node& arg) { - auto batch_to_space_params = get_default_params(arg); + static primitive_impl* create(const batch_to_space_node& arg, std::shared_ptr impl_param) { + auto primitive = arg.get_primitive(); + auto batch_to_space_params = get_default_params(*impl_param); auto batch_to_space_optional_params = get_default_optional_params(arg.get_program()); - auto primitive = arg.get_primitive(); - batch_to_space_params.block_shape = convert_dim_vector(primitive->block_shape); batch_to_space_params.crops_begin = convert_dim_vector(primitive->crops_begin); batch_to_space_params.crops_end = convert_dim_vector(primitive->crops_end); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp index 8fbbc1ee9f14ae..f0b290841b8eb5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp @@ -60,9 +60,9 @@ struct binary_convolution_impl : typed_primitive_impl_ocl { int32_t get_split() const override { return _outer.get_split(); } public: - static primitive_impl* create(const binary_convolution_node& arg) { + static primitive_impl* create(const binary_convolution_node& arg, std::shared_ptr impl_param) { const auto& primitive = arg.get_primitive(); - const auto& weights_layout = arg.weights(0).get_output_layout().convert_to_weights_layout(false); + const auto& weights_layout = (*impl_param->weights_layout).convert_to_weights_layout(false); const auto& weights_size = weights_layout.get_tensor(); const auto& split = primitive->split(); @@ -74,12 +74,10 @@ struct binary_convolution_impl : typed_primitive_impl_ocl { const auto depthwise_separable_opt = arg.get_depthwise_sep_opt(); const auto actual_split = depthwise_separable_opt ? (decltype(split))1 : split; - assert(arg.get_output_layout().feature() / primitive->split() == weights_layout.batch()); + assert(impl_param->output_layout.feature() / primitive->split() == weights_layout.batch()); - auto conv_params = - get_weights_bias_default_params(arg, actual_split); - auto conv_optional_params = - get_default_weights_bias_optional_params( + auto conv_params = get_weights_bias_default_params(*impl_param, actual_split); + auto conv_optional_params = get_default_weights_bias_optional_params( arg.get_program()); conv_params.pad_value = primitive->pad_value; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp index 702005b54471b4..b3ee4358dbeb34 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp @@ -22,13 +22,13 @@ struct border_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static primitive_impl* create(const border_node& arg) { - auto b_params = get_default_params(arg, 1); + static primitive_impl* create(const border_node& arg, std::shared_ptr impl_param) { + auto desc = arg.get_primitive(); + + auto b_params = get_default_params(*impl_param, 1); auto b_optional_params = get_default_optional_params(arg.get_program()); - auto desc = arg.get_primitive(); - b_params.lt_sizes = convert_dim_vector(desc->left_top_sizes); b_params.rb_sizes = convert_dim_vector(desc->right_bottom_sizes); b_params.border_value = desc->border_value; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp index 240c5a276aae2c..9961c5a81a8dea 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp @@ -22,15 +22,16 @@ struct broadcast_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static primitive_impl* create(const broadcast_node& arg) { - auto bc_params = get_default_params(arg, 1); + static primitive_impl* create(const broadcast_node& arg, std::shared_ptr impl_param) { + const auto& primitive = arg.get_primitive(); + auto bc_params = get_default_params(*impl_param, 1); auto bc_optional_params = get_default_optional_params(arg.get_program()); - const auto format = arg.get_output_layout().format; + const auto format = impl_param->output_layout.format; size_t max_axes_num = format.dimension(); - const auto& broadcast_axes = arg.get_primitive()->broadcast_axes; + const auto& broadcast_axes = primitive->broadcast_axes; uint16_t index = (uint16_t)0; uint16_t input_index = (uint16_t)broadcast_axes.size(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/bucketize.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/bucketize.cpp index ddf972fd77b182..8e49e7e9c2fd15 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/bucketize.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/bucketize.cpp @@ -24,8 +24,8 @@ struct bucketize_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static primitive_impl* create(const bucketize_node& arg) { - auto params = get_default_params(arg); + static primitive_impl* create(const bucketize_node& arg, std::shared_ptr impl_param) { + auto params = get_default_params(*impl_param); auto optional_params = get_default_optional_params(arg.get_program()); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp index e0e22e31762cab..645ee08f1830ab 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp @@ -69,23 +69,22 @@ struct concatenation_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const concatenation_node& arg) { + static primitive_impl* create(const concatenation_node& arg, std::shared_ptr impl_param) { if (arg.can_be_optimized()) { return new concatenation_impl(arg, {}); } - - auto concat_params = get_default_params(arg); - auto concat_optional_params = - get_default_optional_params(arg.get_program()); - auto axis = arg.get_primitive()->axis; + const auto& primitive = arg.get_primitive(); + auto concat_params = get_default_params(*impl_param); + auto concat_optional_params = get_default_optional_params(arg.get_program()); + auto axis = primitive->axis; concat_params.inputs.resize(arg.inputs_count()); for (size_t i = 0; i < arg.inputs_count(); ++i) { - const layout& input_layout = arg.input(i).get_output_layout(); + const layout& input_layout = impl_param->input_layouts[i]; concat_params.inputs[i] = convert_data_tensor(input_layout); } - concat_params.axis = convert_axis(axis, arg.get_output_layout().get_rank()); + concat_params.axis = convert_axis(axis, impl_param->output_layout.get_rank()); concat_optional_params.kernelPerInput = true; auto& kernel_selector = kernel_selector::concatenation_kernel_selector::Instance(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convert_color.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convert_color.cpp index a8c1920afe7dc6..133c8ec9654326 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convert_color.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convert_color.cpp @@ -31,17 +31,17 @@ struct convert_color_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const convert_color_node& arg) { - auto convert_color_params = get_default_params(arg); + static primitive_impl* create(const convert_color_node& arg, std::shared_ptr impl_param) { + auto primitive = arg.get_primitive(); + + auto convert_color_params = get_default_params(*impl_param); auto convert_color_optional_params = get_default_optional_params(arg.get_program()); - for (size_t i = 1; i < arg.inputs_count(); ++i) { - convert_color_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout())); + for (size_t i = 1; i < impl_param->input_layouts.size(); ++i) { + convert_color_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[i])); } - auto primitive = arg.get_primitive(); - convert_color_params.input_color_format = static_cast(primitive->input_color_format); convert_color_params.output_color_format = static_cast(primitive->output_color_format); convert_color_params.mem_type = static_cast(primitive->mem_type); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp index 41f8295c0a357d..9fc27bc70049fc 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp @@ -60,9 +60,8 @@ struct convolution_impl : typed_primitive_impl_ocl { bool get_depthwise_sep_opt() const override { return _outer.get_depthwise_sep_opt(); } public: - static primitive_impl* create(const convolution_node& arg) { + static primitive_impl* create(const convolution_node& arg, std::shared_ptr impl_param) { const auto& primitive = arg.get_primitive(); - const auto& weights_layout = arg.weights(0).get_output_layout().convert_to_weights_layout(primitive->grouped_weights_shape); const auto &split = primitive->split(); auto stride = primitive->stride; @@ -73,15 +72,15 @@ struct convolution_impl : typed_primitive_impl_ocl { const auto transposed = arg.get_transposed(); auto conv_params = get_weight_bias_zero_point_default_params( - arg, split, 1, primitive->grouped_weights_shape); + *impl_param, split, 1, primitive->grouped_weights_shape); auto conv_optional_params = get_default_weights_bias_optional_params(arg.get_program()); if (primitive->deformable_mode) { - conv_params.inputs.push_back(convert_data_tensor(arg.trans().get_output_layout())); + conv_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[1])); conv_params.deformable_mode = true; if (primitive->input.size() == 3) { - conv_params.inputs.push_back(convert_data_tensor(arg.mask().get_output_layout())); + conv_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[2])); conv_params.deformable_mask_enabled = true; } conv_params.bilinear_interpolation_pad = arg.bilinear_interpolation_pad(); @@ -93,6 +92,8 @@ struct convolution_impl : typed_primitive_impl_ocl { conv_params.split = split; conv_params.groups = groups; + const auto& weights_layout = impl_param->input_layouts[1 + 0 + arg.get_deform_conv_dep_offset()] + .convert_to_weights_layout(primitive->grouped_weights_shape); uint32_t kx = weights_layout.spatial(0); uint32_t ky = weights_layout.spatial(1); uint32_t kz = weights_layout.spatial(2); @@ -113,9 +114,9 @@ struct convolution_impl : typed_primitive_impl_ocl { uint32_t dilation_x = dilation.size() >= 1 ? dilation[dilation.size() - 1] : 1; conv_params.dilation = {dilation_x, dilation_y, dilation_z}; - if ((arg.get_dependency(0).get_output_layout().data_type == data_types::u8 || - arg.get_dependency(0).get_output_layout().data_type == data_types::i8) && - arg.get_dependency(1).get_output_layout().data_type == data_types::i8) { + if ((impl_param->input_layouts[0].data_type == data_types::u8 || + impl_param->input_layouts[0].data_type == data_types::i8) && + impl_param->input_layouts[1].data_type == data_types::i8) { if (!primitive->weights_zero_points.empty() && !primitive->activations_zero_points.empty()) { conv_params.quantization = kernel_selector::QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS; } else if (!primitive->weights_zero_points.empty()) { @@ -129,7 +130,7 @@ struct convolution_impl : typed_primitive_impl_ocl { conv_params.quantization = kernel_selector::QuantizationType::NONE; } - auto format = arg.get_output_layout().format; + auto format = impl_param->output_layout.format; if (format == format::b_fs_zyx_fsv16 || format == format::bs_fs_zyx_bsv16_fsv16 || format == format::bs_fs_yx_bsv16_fsv16 || diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp index 5136a426814213..deb8fb573142c1 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp @@ -27,16 +27,17 @@ struct crop_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const crop_node& arg) { - auto ew_params = get_default_params(arg, 1); - auto ew_optional_params = - get_default_optional_params(arg.get_program()); + static primitive_impl* create(const crop_node& arg, std::shared_ptr impl_param) { + const auto& primitive = arg.get_primitive(); + + auto ew_params = get_default_params(*impl_param, 1); + auto ew_optional_params = get_default_optional_params(arg.get_program()); ew_params.operations.push_back( {{kernel_selector::eltwise_params::InputType::Buffer(0)}, kernel_selector::eltwise_mode::ASSIGN}); - const auto& input_layout = arg.input().get_output_layout(); - ew_params.inputs[0] = convert_data_tensor(input_layout, 1, arg.get_primitive()->offsets); + const auto& input_layout = impl_param->input_layouts[0]; + ew_params.inputs[0] = convert_data_tensor(input_layout, 1, primitive->offsets); auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance(); auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/ctc_greedy_decoder.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/ctc_greedy_decoder.cpp index 89eaa6672a3a67..3fbd097019faf6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/ctc_greedy_decoder.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/ctc_greedy_decoder.cpp @@ -26,20 +26,20 @@ struct ctc_greedy_decoder_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const ctc_greedy_decoder_node& arg) { - auto ctc_gd_params = get_default_params(arg); - auto ctc_gd_optional_params = get_default_optional_params(arg.get_program()); + static primitive_impl* create(const ctc_greedy_decoder_node& arg, std::shared_ptr impl_param) { auto prim = arg.get_primitive(); - ctc_gd_params.inputs.push_back( - convert_data_tensor(arg.seq_indicators().get_output_layout())); + auto ctc_gd_params = get_default_params(*impl_param); + auto ctc_gd_optional_params = get_default_optional_params(arg.get_program()); + + ctc_gd_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[1])); ctc_gd_params.merge_repeated = prim->ctc_merge_repeated; ctc_gd_params.blank_index = prim->blank_index; ctc_gd_params.outputs_num = arg.has_second_output() ? 2 : 1; if (ctc_gd_params.outputs_num == 2) { - ctc_gd_params.inputs.push_back( - convert_data_tensor(arg.second_output().get_output_layout())); + const auto& second_output_layout = impl_param->input_layouts[1]; + ctc_gd_params.inputs.push_back(convert_data_tensor(second_output_layout)); } auto& kernel_selector = kernel_selector::ctc_greedy_decoder_kernel_selector::Instance(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp index 3a6b08e365f1fd..fa855a3f1c2d27 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp @@ -45,12 +45,14 @@ struct cum_sum_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const cum_sum_node& arg) { - auto cum_sum_params = get_default_params(arg); + static primitive_impl* create(const cum_sum_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + + auto cum_sum_params = get_default_params(*impl_param); auto cum_sum_optional_params = get_default_optional_params(arg.get_program()); - cum_sum_params.axis = convert_axis(arg.get_primitive()->axis); + cum_sum_params.axis = convert_axis(prim->axis); cum_sum_params.exclusive = arg.get_primitive()->exclusive; cum_sum_params.reverse = arg.get_primitive()->reverse; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp index 87e922ca9560b5..1c843bbc508117 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp @@ -179,7 +179,7 @@ static void add_layout_to_jit(kernel_selector::jit_constants& mem_consts, const mem_consts.AddConstant(kernel_selector::MakeJitConstant(name + "_OFFSET", std::to_string(offset))); } -static std::string get_jit_constant(const custom_gpu_primitive_node& outer) { +static std::string get_jit_constant(const custom_gpu_primitive_node& outer, const kernel_impl_params& impl_param) { kernel_selector::jit_constants mem_consts{ kernel_selector::MakeJitConstant("NUM_INPUTS", std::to_string(outer.get_dependencies().size()))}; const auto primitive = outer.get_primitive().get(); @@ -189,11 +189,11 @@ static std::string get_jit_constant(const custom_gpu_primitive_node& outer) { kernel_selector::MakeJitConstant("LOCAL_WORKSIZE", primitive->lws), }); - for (size_t i = 0; i < outer.get_dependencies().size(); i++) { - add_layout_to_jit(mem_consts, "INPUT" + std::to_string(i), outer.input(i).get_output_layout()); + for (size_t i = 0; i < impl_param.input_layouts.size(); i++) { + add_layout_to_jit(mem_consts, "INPUT" + std::to_string(i), impl_param.input_layouts[i]); } - add_layout_to_jit(mem_consts, "OUTPUT0", outer.get_output_layout()); + add_layout_to_jit(mem_consts, "OUTPUT0", impl_param.output_layout); std::ostringstream oss; oss << "// Custom Layer Built-ins\n\n"; @@ -204,14 +204,14 @@ static std::string get_jit_constant(const custom_gpu_primitive_node& outer) { return oss.str(); } -static primitive_impl* create(const custom_gpu_primitive_node& arg) { +static primitive_impl* create(const custom_gpu_primitive_node& arg, std::shared_ptr impl_param) { const auto primitive = arg.get_primitive().get(); auto cl_kernel = std::make_shared(); cl_kernel->code.kernelString = std::make_shared(); cl_kernel->code.kernelString->entry_point = primitive->kernel_entry_point; cl_kernel->code.kernelString->options = primitive->build_options; - cl_kernel->code.kernelString->jit = get_jit_constant(arg); + cl_kernel->code.kernelString->jit = get_jit_constant(arg, *impl_param); for (const auto& s : primitive->kernels_code) { cl_kernel->code.kernelString->str += s + "\n"; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp index 133a3f876d956a..189d098553afdd 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp @@ -51,24 +51,18 @@ struct deconvolution_impl : typed_primitive_impl_ocl { uint32_t get_groups() const override { return _outer.get_groups(); } public: - static primitive_impl* create(const deconvolution_node& arg) { + static primitive_impl* create(const deconvolution_node& arg, std::shared_ptr impl_param) { const auto& primitive = arg.get_primitive(); - const auto& weights_layout = arg.weights(0).get_output_layout().convert_to_weights_layout(primitive->grouped_weights_shape); - const auto& split = primitive->split(); const auto& stride = primitive->stride; -#if 0 // TODO: support dilation - const auto& dilation = primitive->dilation; -#else - const ov::Strides dilation(arg.get_output_layout().get_spatial_rank(), 1); -#endif + const ov::Strides dilation(impl_param->output_layout.get_spatial_rank(), 1); const auto actual_split = split; const auto& pad = primitive->pad; const auto& groups = primitive->groups; auto deconv_params = get_weights_bias_default_params( - arg, + *impl_param, (groups > 1) ? 1 : actual_split, 1, primitive->grouped_weights_shape); @@ -78,6 +72,8 @@ struct deconvolution_impl : typed_primitive_impl_ocl { deconv_params.split = split; deconv_params.groups = groups; + const auto weights_idx = 1 + 0; + const auto& weights_layout = impl_param->input_layouts[weights_idx].convert_to_weights_layout(primitive->grouped_weights_shape); uint32_t kx = weights_layout.spatial(0); uint32_t ky = weights_layout.spatial(1); uint32_t kz = weights_layout.spatial(2); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp index afe76537b8b704..db0c7b05182e87 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp @@ -37,11 +37,8 @@ struct deformable_conv_impl : typed_primitive_impl_ocl { uint32_t get_groups() const override { return _outer.get_groups(); } public: - static primitive_impl* create(const deformable_conv_node& arg) { + static primitive_impl* create(const deformable_conv_node& arg, std::shared_ptr impl_param) { const auto& primitive = arg.get_primitive(); - const auto& weights_layout = arg.weights(0).get_output_layout().convert_to_weights_layout(false); - const auto& weights_size = weights_layout.get_tensor(); - const auto& split = primitive->split(); const auto& groups = primitive->groups; @@ -49,12 +46,16 @@ struct deformable_conv_impl : typed_primitive_impl_ocl { const auto actual_split = depthwise_separable_opt ? (decltype(split))1 : split; auto conv_params = get_weights_bias_default_params( - arg, + *impl_param, (groups > 1 && !depthwise_separable_opt) ? groups : actual_split, groups); auto conv_optional_params = get_default_weights_bias_optional_params(arg.get_program()); + const auto weight_idx = 1 + 0; + const auto& weights_layout = impl_param->input_layouts[weight_idx].convert_to_weights_layout(false); + const auto& weights_size = weights_layout.get_tensor(); + conv_params.depthwise_separable_opt = depthwise_separable_opt; conv_params.split = split; conv_params.groups = groups; @@ -91,9 +92,12 @@ struct deformable_interp_impl : typed_primitive_impl_ocl { uint32_t get_groups() const override { return 1; } public: - static primitive_impl* create(const deformable_interp_node& arg) { + static primitive_impl* create(const deformable_interp_node& arg, std::shared_ptr impl_param) { + const auto input_idx = 0; + const auto trans_idx = 1; + const auto mask_idx = 2; const auto& primitive = arg.get_primitive(); - const auto& input_layout = arg.input().get_output_layout(); + const auto& input_layout = impl_param->input_layouts[input_idx]; const auto& kernel_size = primitive->kernel_size; auto stride = primitive->stride; @@ -102,7 +106,7 @@ struct deformable_interp_impl : typed_primitive_impl_ocl { const auto& groups = primitive->groups; const auto& deformable_groups = primitive->deformable_groups; - auto conv_params = get_default_params(arg, groups); + auto conv_params = get_default_params(*impl_param, groups); auto conv_optional_params = get_default_optional_params(arg.get_program()); @@ -110,9 +114,9 @@ struct deformable_interp_impl : typed_primitive_impl_ocl { auto weights_layout = layout(input_layout.data_type, input_layout.format, kernel_size); conv_params.weights = convert_weights_tensor(weights_layout); - conv_params.inputs.push_back(convert_data_tensor(arg.trans().get_output_layout())); + conv_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[trans_idx])); if (primitive->input.size() == 3) { - conv_params.inputs.push_back(convert_data_tensor(arg.mask().get_output_layout())); + conv_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[mask_idx])); conv_params.deformable_mask_enabled = true; } conv_params.bilinear_interpolation_pad = primitive->bilinear_interpolation_pad; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/depth_to_space.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/depth_to_space.cpp index 4feb9df1f5f9c7..c86db2ac2842aa 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/depth_to_space.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/depth_to_space.cpp @@ -24,13 +24,15 @@ struct depth_to_space_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const depth_to_space_node& arg) { - auto depth_to_space_params = get_default_params(arg); + static primitive_impl* create(const depth_to_space_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + + auto depth_to_space_params = get_default_params(*impl_param); auto depth_to_space_optional_params = get_default_optional_params(arg.get_program()); - depth_to_space_params.block_size = arg.get_primitive()->block_size; - depth_to_space_params.mode = arg.get_primitive()->mode == depth_to_space_mode::blocks_first ? kernel_selector::depth_to_space_mode::BLOCKS_FIRST + depth_to_space_params.block_size = prim->block_size; + depth_to_space_params.mode = prim->mode == depth_to_space_mode::blocks_first ? kernel_selector::depth_to_space_mode::BLOCKS_FIRST : kernel_selector::depth_to_space_mode::DEPTH_FIRST; auto& kernel_selector = kernel_selector::depth_to_space_kernel_selector::Instance(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/detection_output.cpp index fad68beb69e506..ec14a098eeda9b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/detection_output.cpp @@ -51,13 +51,15 @@ struct detection_output_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const detection_output_node& arg) { - auto detect_out_params = get_default_params(arg); + static primitive_impl* create(const detection_output_node& arg, std::shared_ptr impl_param) { + auto detect_out_params = get_default_params(*impl_param); auto detect_out_optional_params = get_default_optional_params(arg.get_program()); - detect_out_params.inputs.push_back(convert_data_tensor(arg.confidence().get_output_layout())); - detect_out_params.inputs.push_back(convert_data_tensor(arg.prior_box().get_output_layout())); + const auto confidence_idx = 1; + const auto prior_box_idx = 2; + detect_out_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[confidence_idx])); + detect_out_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[prior_box_idx])); set_detection_output_specific_params(detect_out_params.detectOutParams, arg); auto& kernel_selector = kernel_selector::detection_output_kernel_selector::Instance(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp index ed3e69cbea1a90..9580155bc118c4 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp @@ -22,8 +22,8 @@ struct dft_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static primitive_impl* create(const dft_node& arg) { - auto params = get_default_params(arg); + static primitive_impl* create(const dft_node& arg, std::shared_ptr impl_param) { + auto params = get_default_params(*impl_param); auto primitive = arg.get_primitive(); params.axes = primitive->axes; if (primitive->kind == dft_kind::inverse) { diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp index b5f9033e0c5741..6892f26daa46dd 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp @@ -29,16 +29,17 @@ struct eltwise_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const eltwise_node& arg) { - auto ew_params = get_default_params(arg); + static primitive_impl* create(const eltwise_node& arg, std::shared_ptr impl_param) { + const auto& primitive = arg.get_primitive(); + + auto ew_params = get_default_params(*impl_param); auto ew_optional_params = get_default_optional_params(arg.get_program()); for (size_t i = 1; i < arg.inputs_count(); i++) { - ew_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout())); + ew_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[i])); } - const auto& primitive = arg.get_primitive(); ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0), kernel_selector::eltwise_params::InputType::Buffer(1)}, @@ -56,8 +57,8 @@ struct eltwise_impl : typed_primitive_impl_ocl { for (size_t i = 0; i < ew_params.inputs.size(); i++) { if (!ew_params.inputs[i].SameDims(ew_params.outputs[0])) { - std::vector input_size = arg.input(i).get_output_layout().get_tensor().raw.vector(); - std::vector output_size = arg.get_output_layout().get_tensor().raw.vector(); + std::vector input_size = impl_param->input_layouts[i].get_tensor().raw.vector(); + std::vector output_size = impl_param->output_layout.get_tensor().raw.vector(); bool broadcast = false; for (size_t d = 0; d < output_size.size(); d++) { if (output_size[d] != 1 && input_size[d] == 1) @@ -98,8 +99,8 @@ struct eltwise_impl : typed_primitive_impl_ocl { // TODO [LOW PRECISION]: check if this parameter's really needed. Maybe data types are enough bool quantization = true; for (size_t i = 0; i < arg.inputs_count(); i++) { - if (arg.input(i).get_output_layout().data_type != data_types::u8 && - arg.input(i).get_output_layout().data_type != data_types::i8) { + if (impl_param->input_layouts[i].data_type != data_types::u8 && + impl_param->input_layouts[i].data_type != data_types::i8) { quantization = false; } } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/embedding_bag.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/embedding_bag.cpp index 936c47ec639f6b..61a09629391c12 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/embedding_bag.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/embedding_bag.cpp @@ -24,12 +24,13 @@ struct embedding_bag_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const embedding_bag_node& arg) { - auto embedding_bag_params = get_default_params(arg); + static primitive_impl* create(const embedding_bag_node& arg, std::shared_ptr impl_param) { + const auto& primitive = arg.get_primitive(); + auto embedding_bag_params = get_default_params(*impl_param); auto embedding_bag_optional_params = get_default_optional_params(arg.get_program()); - switch (arg.get_primitive()->type) { + switch (primitive->type) { case embedding_bag::packed_sum: embedding_bag_params.type = kernel_selector::EmbeddingBagType::PACKED_SUM; break; @@ -45,7 +46,7 @@ struct embedding_bag_impl : typed_primitive_impl_ocl { } for (size_t i = 1; i < arg.inputs_count(); i++) { - embedding_bag_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout())); + embedding_bag_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[i])); } embedding_bag_params.default_index = arg.get_primitive()->default_index; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_detection_output.cpp index 57d57b47ef3545..ac529fe4c30efe 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_detection_output.cpp @@ -31,8 +31,8 @@ struct experimental_detectron_detection_output_impl } public: - static primitive_impl* create(const experimental_detectron_detection_output_node& arg) { - auto params = get_default_params(arg); + static primitive_impl* create(const experimental_detectron_detection_output_node& arg, std::shared_ptr impl_param) { + auto params = get_default_params(*impl_param); auto optional_params = get_default_optional_params( arg.get_program()); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_generate_proposals_single_image.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_generate_proposals_single_image.cpp index 3fa8265c34d174..430ca1149505d7 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_generate_proposals_single_image.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_generate_proposals_single_image.cpp @@ -37,8 +37,8 @@ struct experimental_detectron_generate_proposals_single_image_impl } public: - static primitive_impl* create(const experimental_detectron_generate_proposals_single_image_node& arg) { - auto params = get_default_params(arg); + static primitive_impl* create(const experimental_detectron_generate_proposals_single_image_node& arg, std::shared_ptr impl_param) { + auto params = get_default_params(*impl_param); auto optional_params = get_default_optional_params< kernel_selector::experimental_detectron_generate_proposals_single_image_optional_params>(arg.get_program()); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_prior_grid_generator.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_prior_grid_generator.cpp index a0b3d373aa9835..73f15315b0fb7d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_prior_grid_generator.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_prior_grid_generator.cpp @@ -26,8 +26,8 @@ struct experimental_detectron_prior_grid_generator_impl return make_unique(*this); } - static primitive_impl* create(const experimental_detectron_prior_grid_generator_node& arg) { - auto params = get_default_params(arg); + static primitive_impl* create(const experimental_detectron_prior_grid_generator_node& arg, std::shared_ptr impl_param) { + auto params = get_default_params(*impl_param); auto primPtr = arg.get_primitive(); auto& prim = *primPtr; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_roi_feature_extractor.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_roi_feature_extractor.cpp index 21b9a9c21dbea4..4ba3b50a89c107 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_roi_feature_extractor.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_roi_feature_extractor.cpp @@ -39,8 +39,9 @@ struct experimental_detectron_roi_feature_extractor_impl : public typed_primitiv } public: - static primitive_impl* create(const experimental_detectron_roi_feature_extractor_node& arg) { - const auto output_layout = arg.get_output_layout(); + static primitive_impl* create(const experimental_detectron_roi_feature_extractor_node& arg, std::shared_ptr impl_param) { + const auto& primitive = arg.get_primitive(); + const auto output_layout = impl_param->output_layout; const auto padding_filling_value = output_layout.data_padding.filling_value(); CLDNN_ERROR_NOT_EQUAL(arg.id(), "experimental_detectron_roi_feature_extractor padding filling value", @@ -48,14 +49,12 @@ struct experimental_detectron_roi_feature_extractor_impl : public typed_primitiv "padding mode", 0.0f, "Unknown padding mode in experimental_detectron_roi_feature_extractor."); - - auto params = get_default_params(arg); + auto params = get_default_params(*impl_param); auto optional_params = get_default_optional_params(arg.get_program()); - const auto& primitive = arg.get_primitive(); size_t number_of_inputs = primitive->input_size() - 1; for (std::size_t i = 1; i < number_of_inputs; i++) { - params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout())); + params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[i])); } params.output_dim = primitive->output_dim; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_topk_rois.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_topk_rois.cpp index fa33287cf158f6..97589c7e53b6ab 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_topk_rois.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_topk_rois.cpp @@ -21,13 +21,12 @@ struct experimental_detectron_topk_rois_impl : typed_primitive_impl_ocl(*this); } - static primitive_impl *create(const experimental_detectron_topk_rois_node &arg) { - auto params = get_default_params( - arg); + static primitive_impl *create(const experimental_detectron_topk_rois_node &arg, std::shared_ptr impl_param) { + const auto& primitive = arg.get_primitive(); + auto params = get_default_params(*impl_param); const auto& experimental_detectron_topk_rois_kernel_selector = kernel_selector::experimental_detectron_topk_rois_kernel_selector::Instance(); - const auto& primitive = arg.get_primitive(); - params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout())); + params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[1])); params.max_rois = primitive->max_rois; auto best_kernels = experimental_detectron_topk_rois_kernel_selector.GetBestKernels(params, kernel_selector::experimental_detectron_topk_roi_optional_params()); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/extract_image_patches.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/extract_image_patches.cpp index 38083b4ee17651..33131d073c548b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/extract_image_patches.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/extract_image_patches.cpp @@ -23,15 +23,16 @@ struct extract_image_patches_impl : typed_primitive_impl_ocl(arg); + static primitive_impl* create(const extract_image_patches_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + auto params = get_default_params(*impl_param); auto optional_params = get_default_optional_params(arg.get_program()); - params.sizes = arg.get_primitive()->sizes; - params.strides = arg.get_primitive()->strides; - params.rates = arg.get_primitive()->rates; - params.auto_pad = arg.get_primitive()->auto_pad; + params.sizes = prim->sizes; + params.strides = prim->strides; + params.rates = prim->rates; + params.auto_pad = prim->auto_pad; auto& kernel_selector = kernel_selector::extract_image_patches_kernel_selector::Instance(); auto best_kernels = kernel_selector.GetBestKernels(params, optional_params); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index 2ea1e22bf72a93..36ae094934063f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -40,21 +40,20 @@ struct fully_connected_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const fully_connected_node& arg) { - auto fc_params = get_weights_bias_default_params(arg); + static primitive_impl* create(const fully_connected_node& arg, std::shared_ptr impl_param) { + const auto primitive = arg.get_primitive(); + auto fc_params = get_weights_bias_default_params(*impl_param); auto fc_optional_params = get_default_weights_bias_optional_params( arg.get_program()); fc_optional_params.allowInputReordering = true; - const auto primitive = arg.get_primitive(); - if (primitive->input_size != 3) fc_params.outputs = { fc_params.outputs[0].FlattenFeatureAndSpatials() }; bool is_quantized = true; - for (auto& input : arg.get_dependencies()) - is_quantized &= data_type_traits::is_quantized(input->get_output_layout().data_type); + for (auto& input : impl_param->input_layouts) + is_quantized &= data_type_traits::is_quantized(input.data_type); if (is_quantized) { fc_params.quantization = kernel_selector::QuantizationType::SYMMETRIC; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp index c309520291d624..434724d7b5338a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp @@ -68,17 +68,18 @@ struct gather_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const gather_node& arg) { - auto gather_params = get_default_params(arg); + static primitive_impl* create(const gather_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + auto gather_params = get_default_params(*impl_param); auto gather_optional_params = get_default_optional_params(arg.get_program()); - auto input_layout = arg.get_dependency(0).get_output_layout(); - gather_params.axis = convert_axis(arg.get_primitive()->axis, input_layout.get_rank()); - gather_params.batch_dim = size_t(arg.get_primitive()->batch_dim); - gather_params.support_neg_ind = arg.get_primitive()->support_neg_ind; + auto input_layout = impl_param->input_layouts[0]; + gather_params.axis = convert_axis(prim->axis, input_layout.get_rank()); + gather_params.batch_dim = size_t(prim->batch_dim); + gather_params.support_neg_ind = prim->support_neg_ind; - gather_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout())); + gather_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[1])); auto& kernel_selector = kernel_selector::gather_kernel_selector::Instance(); auto best_kernels = kernel_selector.GetBestKernels(gather_params, gather_optional_params); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp index d0b004c90fce93..e4b45e6fac4210 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp @@ -42,14 +42,15 @@ struct gather_elements_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const gather_elements_node& arg) { - auto gather_elements_params = get_default_params(arg); + static primitive_impl* create(const gather_elements_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + auto gather_elements_params = get_default_params(*impl_param); auto gather_elements_optional_params = get_default_optional_params(arg.get_program()); - gather_elements_params.axis = convert_axis(arg.get_primitive()->axis); + gather_elements_params.axis = convert_axis(prim->axis); - gather_elements_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout())); + gather_elements_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[1])); auto& kernel_selector = kernel_selector::gather_elements_kernel_selector::Instance(); auto best_kernels = kernel_selector.GetBestKernels(gather_elements_params, gather_elements_optional_params); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_nd.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_nd.cpp index 9291c1977b26c8..03b8cfe03c83a7 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_nd.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_nd.cpp @@ -22,16 +22,17 @@ struct gather_nd_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static primitive_impl* create(const gather_nd_node& arg) { - auto gather_nd_params = get_default_params(arg); + static primitive_impl* create(const gather_nd_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + auto gather_nd_params = get_default_params(*impl_param); auto gather_nd_optional_params = get_default_optional_params(arg.get_program()); - gather_nd_params.indices_rank = arg.get_primitive()->indices_rank; - gather_nd_params.batch_dims = arg.get_primitive()->batch_dims; - gather_nd_params.batch_merged_output = arg.get_primitive()->batch_merged_output; + gather_nd_params.indices_rank = prim->indices_rank; + gather_nd_params.batch_dims = prim->batch_dims; + gather_nd_params.batch_merged_output = prim->batch_merged_output; - gather_nd_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout())); + gather_nd_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[1])); auto& kernel_selector = kernel_selector::gather_nd_kernel_selector::Instance(); auto best_kernels = kernel_selector.GetBestKernels(gather_nd_params, gather_nd_optional_params); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_tree.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_tree.cpp index f519b49efc64fc..51c405a1939bb9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_tree.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_tree.cpp @@ -22,14 +22,14 @@ struct gather_tree_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static primitive_impl* create(const gather_tree_node& arg) { - auto b_params = get_default_params(arg, 1); + static primitive_impl* create(const gather_tree_node& arg, std::shared_ptr impl_param) { + auto desc = arg.get_primitive(); + auto b_params = get_default_params(*impl_param, 1); auto b_optional_params = get_default_optional_params(arg.get_program()); for (size_t i = 1; i < arg.get_dependencies().size(); i++) { - b_params.inputs.push_back(convert_data_tensor(arg.get_dependency(i).get_output_layout(), 1)); + b_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[i], 1)); } - auto desc = arg.get_primitive(); auto& kernel_selector = kernel_selector::gather_tree_kernel_selector::Instance(); auto best_kernels = kernel_selector.GetBestKernels(b_params, b_optional_params); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp index 6dd549a7666151..d5ff619b204bfe 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp @@ -23,24 +23,24 @@ struct gemm_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const gemm_node& arg) { - auto gemm_params = get_default_params(arg, 1); + static primitive_impl* create(const gemm_node& arg, std::shared_ptr impl_param) { + auto desc = arg.get_primitive(); + auto gemm_params = get_default_params(*impl_param, 1); auto gemm_optional_params = get_default_optional_params(arg.get_program()); for (size_t i = 1; i < arg.inputs_count(); i++) { - gemm_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout())); + gemm_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[i])); } - auto desc = arg.get_primitive(); gemm_params.alpha = desc->alpha; gemm_params.beta = desc->beta; gemm_params.transpose_input0 = desc->transpose_input0; gemm_params.transpose_input1 = desc->transpose_input1; bool is_quantized = true; - for (auto& input : arg.get_dependencies()) - is_quantized &= data_type_traits::is_quantized(input->get_output_layout().data_type); + for (auto& input : impl_param->input_layouts) + is_quantized &= data_type_traits::is_quantized(input.data_type); if (is_quantized) { gemm_params.quantization = kernel_selector::QuantizationType::SYMMETRIC; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp index c6e715b1299418..c958be7780ee9b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp @@ -105,7 +105,7 @@ struct generic_layer_cpu : typed_primitive_impl { void init_kernels() override {} }; -static primitive_impl* create(const generic_layer_node& arg) { +static primitive_impl* create(const generic_layer_node& arg, std::shared_ptr) { if (arg.get_primitive()->generic_params.engine == kernel_selector::generic_kernel_params::Engine::GPU) { return new generic_layer_impl(arg); } else { diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/grn.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/grn.cpp index 089be7f615814e..9f404ba258ed52 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/grn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/grn.cpp @@ -26,11 +26,12 @@ struct grn_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const grn_node& arg) { - auto grn_params = get_default_params(arg); + static primitive_impl* create(const grn_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + auto grn_params = get_default_params(*impl_param); auto grn_optional_params = get_default_optional_params(arg.get_program()); - grn_params.bias = arg.get_primitive()->bias; + grn_params.bias = prim->bias; auto& kernel_selector = kernel_selector::grn_kernel_selector::Instance(); auto best_kernels = kernel_selector.GetBestKernels(grn_params, grn_optional_params); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/lrn.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/lrn.cpp index 90c429f9c66dde..959ffce8cc8049 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/lrn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/lrn.cpp @@ -21,11 +21,10 @@ struct lrn_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static primitive_impl* create(const lrn_node& arg) { - auto lrn_params = get_default_params(arg); - auto lrn_optional_params = get_default_optional_params(arg.get_program()); - + static primitive_impl* create(const lrn_node& arg, std::shared_ptr impl_param) { const auto& primitive = arg.get_primitive(); + auto lrn_params = get_default_params(*impl_param); + auto lrn_optional_params = get_default_optional_params(arg.get_program()); lrn_params.alpha = primitive->alpha; lrn_params.beta = primitive->beta; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_dynamic_input.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_dynamic_input.cpp index 2e6dfa8ba5fbf0..4a7b5da78eab64 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_dynamic_input.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_dynamic_input.cpp @@ -34,19 +34,23 @@ struct lstm_dynamic_input_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const lstm_dynamic_input_node& arg) { - auto dlstm_input_params = get_default_params(arg); + static primitive_impl* create(const lstm_dynamic_input_node& arg, std::shared_ptr impl_param) { + auto dlstm_input_params = get_default_params(*impl_param); - const auto& weights_layout = arg.weights().get_output_layout(); + const auto dyn_len_idx = 1; + const auto weights_idx = 2; + const auto bias_idx = 3; + + const auto& weights_layout = impl_param->input_layouts[weights_idx]; dlstm_input_params.weights = convert_weights_tensor(weights_layout); if (arg.bias_term()) { - const auto& bias_layout = arg.bias().get_output_layout(); + const auto& bias_layout = impl_param->input_layouts[bias_idx]; dlstm_input_params.bias.push_back(convert_data_tensor(bias_layout)); } // dyn length - const auto& dyn_length_tensor = arg.dyn_length().get_output_layout(); + const auto& dyn_length_tensor = impl_param->input_layouts[dyn_len_idx]; dlstm_input_params.inputs.push_back(convert_data_tensor(dyn_length_tensor)); dlstm_input_params.direction = arg.direction(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_dynamic_timeloop.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_dynamic_timeloop.cpp index cec50564903f4f..ac549b71ea0a7d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_dynamic_timeloop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_dynamic_timeloop.cpp @@ -39,36 +39,36 @@ struct lstm_dynamic_timeloop_impl : typed_primitive_impl_ocl(arg); + static primitive_impl* create(const lstm_dynamic_timeloop_node& arg, std::shared_ptr impl_param) { + auto dlstm_timeloop_params = get_default_params(*impl_param); // dyn length - const auto& dyn_length_tensor = arg.dyn_length().get_output_layout(); + const auto& dyn_length_tensor = impl_param->input_layouts[arg.get_dependency_idx("dyn_length")]; dlstm_timeloop_params.inputs.push_back(convert_data_tensor(dyn_length_tensor)); // recurrent - const auto& recurrent_layout = arg.recurrent().get_output_layout(); + const auto& recurrent_layout = impl_param->input_layouts[arg.get_dependency_idx("recurrent")]; dlstm_timeloop_params.recurrent = convert_data_tensor(recurrent_layout); dlstm_timeloop_params.direction = arg.direction(); if (arg.initial_cell_term()) { - const auto& cell_layout = arg.initial_cell().get_output_layout(); + const auto& cell_layout = impl_param->input_layouts[arg.get_dependency_idx("initial_cell")]; dlstm_timeloop_params.set_cell(convert_data_tensor(cell_layout)); } if (arg.last_hidden_output_term()) { - const auto& last_hidden_output_layout = arg.last_hidden_state().get_output_layout(); + const auto& last_hidden_output_layout = impl_param->input_layouts[arg.get_dependency_idx("last_hidden_output")]; dlstm_timeloop_params.set_last_hidden_output(convert_data_tensor(last_hidden_output_layout)); } if (arg.initial_hidden_term()) { - const auto& hidden_layout = arg.initial_hidden().get_output_layout(); + const auto& hidden_layout = impl_param->input_layouts[arg.get_dependency_idx("initial_hidden")]; dlstm_timeloop_params.set_hidden(convert_data_tensor(hidden_layout)); } if (arg.last_cell_output_term()) { - const auto& last_cell_state_layout = arg.last_cell_state().get_output_layout(); + const auto& last_cell_state_layout = impl_param->input_layouts[arg.get_dependency_idx("last_cell_output")]; dlstm_timeloop_params.set_last_cell_output(convert_data_tensor(last_cell_state_layout)); } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_elt.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_elt.cpp index 959c9e13873893..0978e7363e839f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_elt.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_elt.cpp @@ -34,13 +34,15 @@ struct lstm_elt_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const lstm_elt_node& arg) { - auto lstm_elt_params = get_default_params(arg); + static primitive_impl* create(const lstm_elt_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + auto lstm_elt_params = get_default_params(*impl_param); auto lstm_elt_optional_params = get_default_optional_params(arg.get_program()); if (arg.cell_term()) { - const auto& cell_layout = arg.cell().get_output_layout(); + const auto& cell_idx = 1; + const auto& cell_layout = impl_param->input_layouts[cell_idx]; lstm_elt_params.SetCell(convert_data_tensor(cell_layout)); // TODO: make a generic function to get the direction if (cell_layout.spatial(1) > 1) { @@ -48,7 +50,6 @@ struct lstm_elt_impl : typed_primitive_impl_ocl { } } - const auto& prim = arg.get_primitive(); if (!prim->activations.empty()) { auto a_sz = prim->activations.size(); auto param_sz = prim->activation_params.size(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_gemm.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_gemm.cpp index e69a361ca06ae1..35041425fc1c32 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_gemm.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_gemm.cpp @@ -37,21 +37,26 @@ struct lstm_gemm_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const lstm_gemm_node& arg) { - const auto& weights_layout = arg.weights().get_output_layout(); - - auto lstm_gemm_params = get_default_params(arg); + static primitive_impl* create(const lstm_gemm_node& arg, std::shared_ptr impl_param) { + const auto input_idx = 0; + const auto weight_idx = 1; + const auto recurrent_idx = 2; + const auto bias_idx = 3; + const auto hidden_idx = arg.bias_term() ? 4 : 3; + + const auto& weights_layout = impl_param->input_layouts[weight_idx]; + auto lstm_gemm_params = get_default_params(*impl_param); lstm_gemm_params.weights = convert_data_tensor(weights_layout); if (arg.bias_term()) { - const auto& bias_layout = arg.bias().get_output_layout(); + const auto& bias_layout = impl_param->input_layouts[bias_idx]; lstm_gemm_params.SetBias(convert_data_tensor(bias_layout)); } if (arg.hidden_term()) { - const auto& recurrent_layout = arg.recurrent().get_output_layout(); + const auto& recurrent_layout = impl_param->input_layouts[recurrent_idx]; lstm_gemm_params.recurrent = convert_data_tensor(recurrent_layout); - const auto& hidden_layout = arg.hidden().get_output_layout(); + const auto& hidden_layout = impl_param->input_layouts[hidden_idx]; lstm_gemm_params.SetHidden(convert_data_tensor(hidden_layout)); // TODO: make a generic function to get the direction if (hidden_layout.spatial(1) > 1) { @@ -61,7 +66,7 @@ struct lstm_gemm_impl : typed_primitive_impl_ocl { lstm_gemm_params.direction = arg.direction(); // Update the direction of the input for the gemm kernel - const auto& input_layout = arg.input().get_output_layout(); + const auto& input_layout = impl_param->input_layouts[input_idx]; size_t input_directions = input_layout.spatial(1); if (input_directions > 1) { // For bidirection input, input direction can be 1 or 0 diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/max_unpooling.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/max_unpooling.cpp index 72d3ebe5d9590f..36b680b5f99fb6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/max_unpooling.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/max_unpooling.cpp @@ -40,12 +40,13 @@ struct max_unpooling_impl : typed_primitive_impl_ocl { return parent::execute_impl(tmp_events, instance); } - static primitive_impl* create(const max_unpooling_node& arg) { - auto max_unpooling_params = get_default_params(arg); + static primitive_impl* create(const max_unpooling_node& arg, std::shared_ptr impl_param) { + auto max_unpooling_params = get_default_params(*impl_param); auto max_unpooling_optional_params = get_default_optional_params(arg.get_program()); - max_unpooling_params.inputs.push_back(convert_data_tensor(arg.argmax().get_output_layout())); + const auto max_idx = 1; + max_unpooling_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[max_idx])); auto& kernel_selector = kernel_selector::max_unpooling_kernel_selector::Instance(); auto best_kernels = kernel_selector.GetBestKernels(max_unpooling_params, max_unpooling_optional_params); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/mutable_data.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/mutable_data.cpp index 3d1b88caf02071..a6c043cb7bdb2c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/mutable_data.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/mutable_data.cpp @@ -18,7 +18,7 @@ struct mutable_data_impl : public typed_primitive_impl_ocl { } public: - static primitive_impl* create(mutable_data_node const& arg) { return new mutable_data_impl(arg, {}); } + static primitive_impl* create(mutable_data_node const& arg, std::shared_ptr) { return new mutable_data_impl(arg, {}); } }; namespace detail { diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp index ff993b6ffc71e9..b3c36ce39eea78 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp @@ -26,16 +26,17 @@ struct mvn_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const mvn_node& arg) { - auto mvn_params = get_default_params(arg); + static primitive_impl* create(const mvn_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + auto mvn_params = get_default_params(*impl_param); auto mvn_optional_params = get_default_optional_params(arg.get_program()); - mvn_params.mvnMode = arg.get_primitive()->across_channels ? kernel_selector::mvn_mode::ACROSS_CHANNELS + mvn_params.mvnMode = prim->across_channels ? kernel_selector::mvn_mode::ACROSS_CHANNELS : kernel_selector::mvn_mode::WITHIN_CHANNELS; - mvn_params.mvnNormalizeVariance = arg.get_primitive()->normalize_variance; - mvn_params.epsilon = arg.get_primitive()->epsilon; + mvn_params.mvnNormalizeVariance = prim->normalize_variance; + mvn_params.epsilon = prim->epsilon; - mvn_params.mvnEpsMode = arg.get_primitive()->eps_inside_sqrt ? kernel_selector::mvn_eps_mode::INSIDE_SQRT + mvn_params.mvnEpsMode = prim->eps_inside_sqrt ? kernel_selector::mvn_eps_mode::INSIDE_SQRT : kernel_selector::mvn_eps_mode::OUTSIDE_SQRT; auto& kernel_selector = kernel_selector::mvn_kernel_selector::Instance(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp index 37e5bac447ff05..df40e77ab7374b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp @@ -53,13 +53,14 @@ struct non_max_suppression_impl : typed_primitive_impl_ocl } public: - static primitive_impl* create(const non_max_suppression_node& arg) { - auto params = get_default_params(arg); + static primitive_impl* create(const non_max_suppression_node& arg, std::shared_ptr impl_param) { + const auto& primitive = arg.get_primitive(); + auto params = get_default_params(*impl_param); auto optional_params = get_default_optional_params(arg.get_program()); - const auto& primitive = arg.get_primitive(); - params.inputs.push_back(convert_data_tensor(arg.input_scores().get_output_layout())); + const auto input_scores_idx = 1; + params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[input_scores_idx])); if (arg.has_num_select_per_class()) { cldnn::program_node& node = arg.num_select_per_class_node(); @@ -68,7 +69,7 @@ struct non_max_suppression_impl : typed_primitive_impl_ocl params.num_select_per_class = get_value(node); } else { params.num_select_per_class_type = kernel_selector::NmsArgType::Input; - params.inputs.push_back(convert_data_tensor(node.get_output_layout())); + params.inputs.push_back(convert_data_tensor(impl_param->output_layout)); } } @@ -79,7 +80,7 @@ struct non_max_suppression_impl : typed_primitive_impl_ocl params.iou_threshold = get_value(node); } else { params.iou_threshold_type = kernel_selector::NmsArgType::Input; - params.inputs.push_back(convert_data_tensor(node.get_output_layout())); + params.inputs.push_back(convert_data_tensor(impl_param->output_layout)); } } @@ -90,7 +91,7 @@ struct non_max_suppression_impl : typed_primitive_impl_ocl params.score_threshold = get_value(node); } else { params.score_threshold_type = kernel_selector::NmsArgType::Input; - params.inputs.push_back(convert_data_tensor(node.get_output_layout())); + params.inputs.push_back(convert_data_tensor(impl_param->output_layout)); } } @@ -101,21 +102,28 @@ struct non_max_suppression_impl : typed_primitive_impl_ocl params.soft_nms_sigma = get_value(node); } else { params.soft_nms_sigma_type = kernel_selector::NmsArgType::Input; - params.inputs.push_back(convert_data_tensor(node.get_output_layout())); + params.inputs.push_back(convert_data_tensor(impl_param->output_layout)); } } + auto get_additional_output_node_idx = [&] (bool is_third) { + size_t offset = 2; + offset += arg.has_num_select_per_class(); + offset += arg.has_iou_threshold(); + offset += arg.has_score_threshold(); + offset += arg.has_soft_nms_sigma(); + if (is_third) + offset += arg.has_second_output(); + return offset; + }; + if (arg.has_second_output()) { - layout second_output_layout = arg.second_output_node().get_output_layout(); - second_output_layout.format = arg.input_scores().get_output_layout().format; - params.inputs.push_back(convert_data_tensor(second_output_layout)); + params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[get_additional_output_node_idx(false)])); params.has_second_output = true; } if (arg.has_third_output()) { - layout third_output_layout = arg.third_output_node().get_output_layout(); - third_output_layout.format = arg.input_scores().get_output_layout().format; - params.inputs.push_back(convert_data_tensor(third_output_layout)); + params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[get_additional_output_node_idx(true)])); params.has_third_output = true; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/normalize.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/normalize.cpp index 0b4280def2a075..b81625c96fa6ef 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/normalize.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/normalize.cpp @@ -33,16 +33,17 @@ struct normalize_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const normalize_node& arg) { - auto norm_params = get_default_params(arg); + static primitive_impl* create(const normalize_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + auto norm_params = get_default_params(*impl_param); auto norm_optional_params = get_default_optional_params(arg.get_program()); - const auto& scale_layout = arg.scale().get_output_layout(); + const auto& scale_layout = impl_param->input_layouts[1]; - norm_params.normMode = arg.get_primitive()->across_spatial ? kernel_selector::normalize_mode::ACROSS_SPATIAL + norm_params.normMode = prim->across_spatial ? kernel_selector::normalize_mode::ACROSS_SPATIAL : kernel_selector::normalize_mode::WITHIN_SPATIAL; - norm_params.epsilon = arg.get_primitive()->epsilon; + norm_params.epsilon = prim->epsilon; norm_params.scaleTable = convert_data_tensor(scale_layout).FlattenFeatureAndSpatials(); auto& kernel_selector = kernel_selector::normalize_kernel_selector::Instance(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/one_hot.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/one_hot.cpp index 482e1b16c70d5c..0981927d845cc2 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/one_hot.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/one_hot.cpp @@ -23,16 +23,17 @@ struct one_hot_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static primitive_impl* create(const one_hot_node& arg) { - auto oh_params = get_default_params(arg, 1); + static primitive_impl* create(const one_hot_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + auto oh_params = get_default_params(*impl_param, 1); auto oh_optional_params = get_default_optional_params(arg.get_program()); - oh_params.one_hot_axis = arg.get_primitive()->one_hot_axis; - oh_params.on_value = arg.get_primitive()->on_value; - oh_params.off_value = arg.get_primitive()->off_value; + oh_params.one_hot_axis = prim->one_hot_axis; + oh_params.on_value = prim->on_value; + oh_params.off_value = prim->off_value; - auto output_sizes = arg.get_output_layout().get_dims(); + auto output_sizes = impl_param->output_layout.get_dims(); oh_params.one_hot_limit = output_sizes[oh_params.one_hot_axis]; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/permute.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/permute.cpp index 68ddafbb1277fa..e8655297659882 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/permute.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/permute.cpp @@ -50,13 +50,14 @@ struct permute_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static primitive_impl* create(const permute_node& arg) { - auto permute_params = get_default_params(arg); + static primitive_impl* create(const permute_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + auto permute_params = get_default_params(*impl_param); auto permute_optional_params = get_default_optional_params(arg.get_program()); - auto in_rank = arg.get_dependency(0).get_output_layout().get_rank(); - auto permute_order = convert_permute_order(arg.get_primitive()->permute_order, in_rank); + auto in_rank = impl_param->input_layouts[0].get_rank(); + auto permute_order = convert_permute_order(prim->permute_order, in_rank); permute_params.order = permute_order; auto& kernel_selector = kernel_selector::permute_kernel_selector::Instance(); auto best_kernels = kernel_selector.GetBestKernels(permute_params, permute_optional_params); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp index 09673fde42766b..ace6a61c0f9e0c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp @@ -77,15 +77,13 @@ struct pooling_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const pooling_node& arg) { + static primitive_impl* create(const pooling_node& arg, std::shared_ptr impl_param) { validate_args(arg); - - auto pool_params = get_default_params(arg); + const auto primitive = arg.get_primitive(); + auto pool_params = get_default_params(*impl_param); auto pool_optional_params = get_default_optional_params(arg.get_program()); - const auto primitive = arg.get_primitive(); - pool_params.maxPoolOpset8Features = primitive->maxPoolOpset8Features; if (pool_params.maxPoolOpset8Features) { switch (primitive->index_element_type) { @@ -107,8 +105,8 @@ struct pooling_impl : typed_primitive_impl_ocl { const auto& pad = primitive->pad; const auto& dilation = primitive->dilation; auto kernel = primitive->size; - const auto& input_layout = arg.input().get_output_layout(); - const auto& output_layout = arg.get_output_layout(); + const auto& input_layout = impl_param->input_layouts[0]; + const auto& output_layout = impl_param->output_layout; auto spatial_rank = output_layout.get_spatial_rank(); auto& pp = pool_params; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/pyramid_roi_align.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/pyramid_roi_align.cpp index ab7da77053eb65..69f32ba4b3e6b4 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/pyramid_roi_align.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/pyramid_roi_align.cpp @@ -23,23 +23,27 @@ struct pyramid_roi_align_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static primitive_impl* create(const pyramid_roi_align_node& arg) { + static primitive_impl* create(const pyramid_roi_align_node& arg, std::shared_ptr impl_param) { auto prim = arg.get_primitive(); - auto params = get_default_params(arg, 1); + auto params = get_default_params(*impl_param, 1); auto optional_params = get_default_optional_params(arg.get_program()); - params.inputs.push_back(convert_data_tensor(arg.P2().get_output_layout())); - params.inputs.push_back(convert_data_tensor(arg.P3().get_output_layout())); - params.inputs.push_back(convert_data_tensor(arg.P4().get_output_layout())); - params.inputs.push_back(convert_data_tensor(arg.P5().get_output_layout())); + const auto P2_idx = 1; + const auto P3_idx = 2; + const auto P4_idx = 3; + const auto P5_idx = 4; + params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[P2_idx])); + params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[P3_idx])); + params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[P4_idx])); + params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[P5_idx])); params.sampling_ratio_x = prim->sampling_ratio; params.sampling_ratio_y = prim->sampling_ratio; auto first_layer_scale = prim->pyramid_scales[0]; - auto image_size_x = arg.P2().get_output_layout().spatial(0) * first_layer_scale; - auto image_size_y = arg.P2().get_output_layout().spatial(1) * first_layer_scale; + auto image_size_x = impl_param->input_layouts[P2_idx].spatial(0) * first_layer_scale; + auto image_size_y = impl_param->input_layouts[P2_idx].spatial(1) * first_layer_scale; params.image_size_x = image_size_x; params.image_size_y = image_size_y; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp index b73a4f69758216..e0bddaecb645ab 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp @@ -43,8 +43,8 @@ struct quantize_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const quantize_node& arg) { - auto quantize_params = get_default_params(arg); + static primitive_impl* create(const quantize_node& arg, std::shared_ptr impl_param) { + auto quantize_params = get_default_params(*impl_param); auto quantize_optional_params = get_default_optional_params(arg.get_program()); @@ -75,9 +75,9 @@ struct quantize_impl : typed_primitive_impl_ocl { quantize_params.out_shift = arg.get_output_shift_val(); for (size_t i = 1; i < arg.inputs_count(); i++) { - quantize_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout())); + quantize_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[i])); } - const auto& output_layout = arg.get_output_layout(); + const auto& output_layout = impl_param->output_layout; quantize_params.outputs = { convert_data_tensor(output_layout) }; auto& kernel_selector = kernel_selector::quantize_kernel_selector::Instance(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/random_uniform.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/random_uniform.cpp index 5334aa972b0529..1bc142234237f6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/random_uniform.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/random_uniform.cpp @@ -21,16 +21,15 @@ struct random_uniform_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static primitive_impl *create(const random_uniform_node &arg) { - auto params = get_default_params( - arg); + static primitive_impl *create(const random_uniform_node &arg, std::shared_ptr impl_param) { + const auto &primitive = arg.get_primitive(); + auto params = get_default_params(*impl_param); auto &random_uniform_kernel_selector = kernel_selector::random_uniform_kernel_selector::Instance(); - const auto &primitive = arg.get_primitive(); params.global_seed = primitive->global_seed; params.op_seed = primitive->op_seed; - params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout())); - params.inputs.push_back(convert_data_tensor(arg.input(2).get_output_layout())); + params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[1])); + params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[2])); auto best_kernels = random_uniform_kernel_selector.GetBestKernels(params, kernel_selector::random_uniform_optional_params()); CLDNN_ERROR_BOOL(arg.id(), diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp index ffdb35a1e32584..370aa0579a5b15 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp @@ -20,10 +20,10 @@ struct range_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static primitive_impl* create(const range_node& arg) { - auto params = get_default_params(arg); + static primitive_impl* create(const range_node& arg, std::shared_ptr impl_param) { + auto params = get_default_params(*impl_param); for (int i : {1, 2}) - params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout())); + params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[i])); auto optional_params = get_default_optional_params(arg.get_program()); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp index ca1709d8db68a5..80a319c6bff7bb 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp @@ -58,13 +58,14 @@ struct reduce_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const reduce_node& arg) { - auto reduce_params = get_default_params(arg); + static primitive_impl* create(const reduce_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + auto reduce_params = get_default_params(*impl_param); auto reduce_optional_params = get_default_optional_params(arg.get_program()); - reduce_params.reduceAxes = arg.get_primitive()->axes; - reduce_params.keepDims = arg.get_primitive()->keep_dims; - reduce_params.reduceMode = cldnn_2_reduce_mode(arg.get_primitive()->mode); + reduce_params.reduceAxes = prim->axes; + reduce_params.keepDims = prim->keep_dims; + reduce_params.reduceMode = cldnn_2_reduce_mode(prim->mode); auto& kernel_selector = kernel_selector::reduce_kernel_selector::Instance(); auto best_kernels = kernel_selector.GetBestKernels(reduce_params, reduce_optional_params); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/region_yolo.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/region_yolo.cpp index c785470315da40..2b0449cf3615cd 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/region_yolo.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/region_yolo.cpp @@ -21,8 +21,8 @@ struct region_yolo_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static primitive_impl* create(const region_yolo_node& arg) { - auto ry_params = get_default_params(arg); + static primitive_impl* create(const region_yolo_node& arg, std::shared_ptr impl_param) { + auto ry_params = get_default_params(*impl_param); auto ry_optional_params = get_default_optional_params(arg.get_program()); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp index 8c56e21e426382..60f3e1e210bc13 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp @@ -41,40 +41,40 @@ struct reorder_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const reorder_node& arg) { - auto&& input_layout = arg.input().get_output_layout(); - auto&& output_layout = arg.get_output_layout(); - - auto reorder_params = get_default_params(arg); + static primitive_impl* create(const reorder_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + auto&& output_layout = impl_param->output_layout; + auto reorder_params = get_default_params(*impl_param); auto reorder_optional_params = get_default_optional_params(arg.get_program()); for (size_t i = 1; i < arg.inputs_count(); i++) { - reorder_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout())); + reorder_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[i])); } - if (arg.get_output_layout().data_padding) { + if (impl_param->output_layout.data_padding) { reorder_params.has_padded_output = true; } if (arg.has_mean()) { - if (input_layout.format == cldnn::format::nv12) { + if (impl_param->input_layouts[0].format == cldnn::format::nv12) { const auto& mean_layout = arg.mean_nv12().get_output_layout(); reorder_params.mean = convert_data_tensor(mean_layout); reorder_params.mode = kernel_selector::mean_subtruct_mode::IN_BUFFER; } else { - const auto& mean_layout = arg.mean().get_output_layout(); + const auto mean_idx = 1; + const auto& mean_layout = impl_param->input_layouts[mean_idx]; reorder_params.mean = convert_data_tensor(mean_layout); reorder_params.mode = kernel_selector::mean_subtruct_mode::IN_BUFFER; } - } else if (arg.get_primitive()->subtract_per_feature.empty() == false) { + } else if (prim->subtract_per_feature.empty() == false) { reorder_params.mode = kernel_selector::mean_subtruct_mode::INSIDE_PARAMS; - reorder_params.meanValues = arg.get_primitive()->subtract_per_feature; + reorder_params.meanValues = prim->subtract_per_feature; } else { reorder_params.mode = kernel_selector::mean_subtruct_mode::NONE; } if (reorder_params.mode != kernel_selector::mean_subtruct_mode::NONE) { - switch (arg.get_primitive()->mean_mode) { + switch (prim->mean_mode) { case reorder_mean_mode::none: reorder_params.mean_op = kernel_selector::mean_op::NONE; break; @@ -98,7 +98,7 @@ struct reorder_impl : typed_primitive_impl_ocl { reorder_params.winograd_nr_tiles_x = ceil_div(output_layout.spatial(0), 4); } - reorder_params.winograd = input_layout.format.is_winograd() || output_layout.format.is_winograd(); + reorder_params.winograd = impl_param->input_layouts[0].format.is_winograd() || output_layout.format.is_winograd(); auto& kernel_selector = kernel_selector::reorder_kernel_selector::Instance(); auto best_kernels = kernel_selector.GetBestKernels(reorder_params, reorder_optional_params); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reorg_yolo.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reorg_yolo.cpp index 474d7ef645ef0b..58b6372bb3531f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reorg_yolo.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reorg_yolo.cpp @@ -21,13 +21,12 @@ struct reorg_yolo_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static primitive_impl* create(const reorg_yolo_node& arg) { - auto ry_params = get_default_params(arg); + static primitive_impl* create(const reorg_yolo_node& arg, std::shared_ptr impl_param) { + const auto& primitive = arg.get_primitive(); + auto ry_params = get_default_params(*impl_param); auto ry_optional_params = get_default_optional_params(arg.get_program()); - const auto& primitive = arg.get_primitive(); - ry_params.stride = primitive->stride; auto& kernel_selector = kernel_selector::reorg_yolo_kernel_selector::Instance(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp index 0f2dc1b0d87f4c..26a663fbb6ec84 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp @@ -104,13 +104,13 @@ struct resample_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static primitive_impl* create(const resample_node& arg) { - auto us_params = get_default_params(arg); + static primitive_impl* create(const resample_node& arg, std::shared_ptr impl_param) { + const auto& primitive = arg.get_primitive(); + auto us_params = get_default_params(*impl_param); auto us_optional_params = get_default_optional_params(arg.get_program()); - const auto& primitive = arg.get_primitive(); - size_t dimsNum = arg.get_output_layout().format.dimension(); + size_t dimsNum = impl_param->output_layout.format.dimension(); us_params.resampleType = convert_to_sample_type(primitive->operation_type); us_params.nearestMode = convert_to_nearest_mode(primitive->round_mode); us_params.coordTransMode = convert_to_coord_transform_mode(primitive->coord_trans_mode); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reshape.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reshape.cpp index 9c3d53c2dcf8de..2e2d175650954c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reshape.cpp @@ -22,12 +22,11 @@ struct reshape_impl : public typed_primitive_impl_ocl { } public: - static primitive_impl* create(reshape_node const& arg) { + static primitive_impl* create(reshape_node const& arg, std::shared_ptr impl_param) { if (arg.can_be_optimized()) { return new reshape_impl(arg, {}); } - - auto reorder_params = get_default_params(arg); + auto reorder_params = get_default_params(*impl_param); auto reorder_optional_params = get_default_optional_params(arg.get_program()); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reverse.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reverse.cpp index 2529756c7e47ab..f7ecf729353c56 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reverse.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reverse.cpp @@ -24,8 +24,8 @@ struct reverse_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const reverse_node& arg) { - auto params = get_default_params(arg); + static primitive_impl* create(const reverse_node& arg, std::shared_ptr impl_param) { + auto params = get_default_params(*impl_param); const auto optional_params = get_default_optional_params(arg.get_program()); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reverse_sequence.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reverse_sequence.cpp index cd212f7e3bbe60..217cad0daa2fd6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reverse_sequence.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reverse_sequence.cpp @@ -23,15 +23,16 @@ struct reverse_sequence_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const reverse_sequence_node& arg) { - auto reverse_sequence_params = get_default_params(arg); + static primitive_impl* create(const reverse_sequence_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + auto reverse_sequence_params = get_default_params(*impl_param); auto reverse_sequence_optional_params = get_default_optional_params(arg.get_program()); - reverse_sequence_params.seq_axis = arg.get_primitive()->seq_axis; - reverse_sequence_params.batch_axis = arg.get_primitive()->batch_axis; + reverse_sequence_params.seq_axis = prim->seq_axis; + reverse_sequence_params.batch_axis = prim->batch_axis; - reverse_sequence_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout())); + reverse_sequence_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[1])); auto& kernel_selector = kernel_selector::reverse_sequence_kernel_selector::Instance(); auto best_kernels = kernel_selector.GetBestKernels(reverse_sequence_params, reverse_sequence_optional_params); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/roi_align.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/roi_align.cpp index 19b574383cebda..d97e999350fb11 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/roi_align.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/roi_align.cpp @@ -55,11 +55,11 @@ struct roi_align_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const roi_align_node& arg) { - const auto& input_layout = arg.input().get_output_layout(); - const auto& output_layout = arg.get_output_layout(); - const auto& rois_layout = arg.input(1).get_output_layout(); - const auto& batches_layout = arg.input(2).get_output_layout(); + static primitive_impl* create(const roi_align_node& arg, std::shared_ptr impl_param) { + const auto& input_layout = impl_param->input_layouts[0]; + const auto& output_layout = impl_param->output_layout; + const auto& rois_layout = impl_param->input_layouts[1]; + const auto& batches_layout = impl_param->input_layouts[2]; const auto& primitive = arg.get_primitive(); const auto padding_filling_value = output_layout.data_padding.filling_value(); @@ -75,8 +75,7 @@ struct roi_align_impl : typed_primitive_impl_ocl { input_layout.format.value, "output_layout.format", output_layout.format); - - auto roi_align_params = get_default_params(arg); + auto roi_align_params = get_default_params(*impl_param); auto roi_align_optional_params = get_default_optional_params(arg.get_program()); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/roi_pooling.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/roi_pooling.cpp index 3eda85eb2f1546..3c5b42b5bf52ec 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/roi_pooling.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/roi_pooling.cpp @@ -59,10 +59,10 @@ struct roi_pooling_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const roi_pooling_node& arg) { - const auto& input_layout = arg.input().get_output_layout(); - const auto& output_layout = arg.get_output_layout(); - const auto& rois_layout = arg.rois().get_output_layout(); + static primitive_impl* create(const roi_pooling_node& arg, std::shared_ptr impl_param) { + const auto& input_layout = impl_param->input_layouts[0]; + const auto& output_layout = impl_param->output_layout; + const auto& rois_layout = impl_param->input_layouts[1]; const auto& primitive = arg.get_primitive(); const auto padding_filling_value = output_layout.data_padding.filling_value(); @@ -78,8 +78,7 @@ struct roi_pooling_impl : typed_primitive_impl_ocl { input_layout.format.value, "output_layout.format", output_layout.format); - - auto roi_params = get_default_params(arg); + auto roi_params = get_default_params(*impl_param); auto roi_optional_params = get_default_optional_params(arg.get_program()); @@ -87,7 +86,7 @@ struct roi_pooling_impl : typed_primitive_impl_ocl { const auto roi_bf = roi_bfyx.FlattenFeatureAndSpatials(); roi_params.inputs.push_back(roi_bf); if (primitive->mode == pooling_mode::deformable_bilinear && !primitive->no_trans) - roi_params.inputs.push_back(convert_data_tensor(arg.trans().get_output_layout())); + roi_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[2])); roi_params.mode = cldnn_2_pool_type(primitive->mode); roi_params.position_sensitive = primitive->position_sensitive; roi_params.pooled_width = primitive->pooled_width; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/roll.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/roll.cpp index 113d9fe1535baa..0e0a3e79b2566f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/roll.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/roll.cpp @@ -24,8 +24,8 @@ struct roll_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static primitive_impl* create(const roll_node& arg) { - auto roll_params = get_default_params(arg); + static primitive_impl* create(const roll_node& arg, std::shared_ptr impl_param) { + auto roll_params = get_default_params(*impl_param); auto roll_optional_params = get_default_optional_params(arg.get_program()); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/scale.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/scale.cpp index f20019a6de9db7..2556ac7496328d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/scale.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/scale.cpp @@ -36,19 +36,19 @@ struct scale_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const scale_node& arg) { - auto ew_params = get_default_params(arg); + static primitive_impl* create(const scale_node& arg, std::shared_ptr impl_param) { + auto ew_params = get_default_params(*impl_param); auto ew_optional_params = get_default_optional_params(arg.get_program()); - ew_params.inputs.push_back(convert_data_tensor(arg.scale_in().get_output_layout())); + ew_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[1])); ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0), kernel_selector::eltwise_params::InputType::Buffer(1)}, kernel_selector::eltwise_mode::MUL}); if (arg.bias_term()) { - ew_params.inputs.push_back(convert_data_tensor(arg.bias().get_output_layout())); + ew_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[2])); ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(0), kernel_selector::eltwise_params::InputType::Buffer(2)}, kernel_selector::eltwise_mode::ADD}); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_elements_update.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_elements_update.cpp index 2b080a27928130..cadd1fc5edb3e9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_elements_update.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_elements_update.cpp @@ -43,15 +43,16 @@ struct scatter_elements_update_impl : typed_primitive_impl_ocl(arg); + static primitive_impl* create(const scatter_elements_update_node& arg, std::shared_ptr impl_param) { + const auto& prim = arg.get_primitive(); + auto scatter_elements_update_params = get_default_params(*impl_param); auto scatter_elements_update_optional_params = get_default_optional_params(arg.get_program()); - scatter_elements_update_params.axis = convert_axis(arg.get_primitive()->axis, arg); + scatter_elements_update_params.axis = convert_axis(prim->axis, arg); - scatter_elements_update_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout())); - scatter_elements_update_params.inputs.push_back(convert_data_tensor(arg.input(2).get_output_layout())); + scatter_elements_update_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[1])); + scatter_elements_update_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[2])); auto& kernel_selector = kernel_selector::scatter_elements_update_kernel_selector::Instance(); auto best_kernels = kernel_selector.GetBestKernels(scatter_elements_update_params, scatter_elements_update_optional_params); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_nd_update.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_nd_update.cpp index 912358211ce484..ce95ac3c87f35b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_nd_update.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_nd_update.cpp @@ -24,15 +24,15 @@ struct scatter_nd_update_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const scatter_nd_update_node& arg) { - auto scatter_nd_update_params = get_default_params(arg); + static primitive_impl* create(const scatter_nd_update_node& arg, std::shared_ptr impl_param) { + auto scatter_nd_update_params = get_default_params(*impl_param); auto scatter_nd_update_optional_params = get_default_optional_params(arg.get_program()); scatter_nd_update_params.indices_rank = arg.get_primitive()->indices_rank; - scatter_nd_update_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout())); - scatter_nd_update_params.inputs.push_back(convert_data_tensor(arg.input(2).get_output_layout())); + scatter_nd_update_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[1])); + scatter_nd_update_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[2])); auto& kernel_selector = kernel_selector::scatter_nd_update_kernel_selector::Instance(); auto best_kernels = kernel_selector.GetBestKernels(scatter_nd_update_params, scatter_nd_update_optional_params); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_update.cpp index b563720edd7b48..bb6cf69246de38 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_update.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_update.cpp @@ -43,15 +43,15 @@ struct scatter_update_impl : typed_primitive_impl_ocl { } public: - static primitive_impl* create(const scatter_update_node& arg) { - auto scatter_update_params = get_default_params(arg); + static primitive_impl* create(const scatter_update_node& arg, std::shared_ptr impl_param) { + auto scatter_update_params = get_default_params(*impl_param); auto scatter_update_optional_params = get_default_optional_params(arg.get_program()); scatter_update_params.axis = convert_axis(arg.get_primitive()->axis, arg); - scatter_update_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout())); - scatter_update_params.inputs.push_back(convert_data_tensor(arg.input(2).get_output_layout())); + scatter_update_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[1])); + scatter_update_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[2])); auto& kernel_selector = kernel_selector::scatter_update_kernel_selector::Instance(); auto best_kernels = kernel_selector.GetBestKernels(scatter_update_params, scatter_update_optional_params); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp index e376a0f46a63b1..8dd504336f6ac3 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp @@ -22,13 +22,13 @@ struct select_impl : typed_primitive_impl_ocl