Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] Fix get_default_params & choose_impl not to dependent on program_node #12239

33 changes: 33 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class optional_data_type {
storage_type storage;
};


/// Converts C++ type to @ref data_types .
template <typename T>
struct type_to_data_type;
Expand Down Expand Up @@ -429,6 +430,38 @@ struct layout {
tensor size;
};

class optional_layout {
public:
optional_layout() {}
optional_layout(const layout& lay) {
this->opt_layout_ptr = make_unique<layout>(lay);
}

optional_layout(const optional_layout& new_opt_lay) {
if (new_opt_lay) {
layout copied_lay = *new_opt_lay;
this->opt_layout_ptr = make_unique<layout>(copied_lay);
}
}

operator bool() const {
return this->opt_layout_ptr != nullptr;
}

layout operator*() const {
if (opt_layout_ptr == nullptr)
throw std::runtime_error("Attempt to access uninitialized optional layout!");
return *this->opt_layout_ptr;
}

std::unique_ptr<layout>& get_layout_ptr() {
return opt_layout_ptr;
}

private:
std::unique_ptr<layout> opt_layout_ptr = nullptr;
};

/// @}
/// @}
} // namespace cldnn
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ void add_onednn_optimization_attributes::run(program& p) {
// Reshape fused ops tensors for OneDNN FC if needed
if (fc_prim->input_size == 3) {
for (auto& fused_prim : node->get_fused_primitives()) {
auto fused_node = fused_prim.node;
if (fused_node->is_type<eltwise>()) {
if (fused_prim.is_type<eltwise>()) {
auto& dependency = node->get_dependency(fused_prim.dep_start_idx);
auto original_layout = dependency.get_output_layout();
onednn::combine_bf_with_first_spatial_dim(original_layout);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ void basic_memory_dependencies::run(program& p) {
&& (node->is_type<convolution>() || node->is_type<deconvolution>())) {
size_t eltw_dep = 0;
for (auto& fused_op : node->get_fused_primitives()) {
if (fused_op.node->is_type<eltwise>() && fused_op.deps.size() == 1) {
if (fused_op.is_type<eltwise>() && fused_op.deps.size() == 1) {
// If it is first sum, reuse the buffer
auto fusing_type = onednn_add_fusing_helpers::get_add_fusing_type(*node, fused_op);
if (fusing_type != add_fusing_type::sum || eltw_dep != 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -674,10 +674,10 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
auto& fused_descs = input_data.get_fused_primitives();
auto origin_input_iter = std::find_if(fused_descs.begin(), fused_descs.end(),
[&](cldnn::fused_primitive_desc& desc) {
return (desc.node->id() == prim_id.first);
return (desc.desc->id == prim_id.first);
});
if (origin_input_iter != fused_descs.end()) {
auto users = get_users_from_fusing_history(origin_input_iter->node->id());
auto users = get_users_from_fusing_history(origin_input_iter->desc->id);
if (users.size() != 1) {
return false;
}
Expand Down Expand Up @@ -1167,10 +1167,10 @@ void prepare_primitive_fusing::optimize_fused_ops(program& p) {

auto remove_deps_of_node = [&](cldnn::fused_primitive_desc& desc) {
for (auto& prim : fused_prims) {
if (desc.node->id() == prim.node->id()) {
if (desc.desc->id == prim.desc->id) {
continue;
}
auto rm_iter = prim.fused_deps.find(desc.node->id());
auto rm_iter = prim.fused_deps.find(desc.desc->id);
if (rm_iter != prim.fused_deps.end()) {
prim.fused_deps.erase(rm_iter);
prim.fused_deps.insert(desc.fused_deps.begin(), desc.fused_deps.end());
Expand All @@ -1187,16 +1187,13 @@ void prepare_primitive_fusing::optimize_fused_ops(program& p) {

auto& fp = *curr_itr;
auto& fp_next = *fp_itr;
if (fp.is_type<activation>() && fp_next.is_type<quantize>()) {
const auto& act_prim = fp.typed_desc<activation>();;
const auto& quant_param = fp_next.get_typed_fuse_params<kernel_selector::quantize_fuse_params>();

if (fp.node->is_type<activation>() && fp_next.node->is_type<quantize>()) {
auto& activation_node = fp.node->as<activation>();
auto& quantize_node = fp_next.node->as<quantize>();
bool can_skip = activation_node.get_primitive()->activation_function == activation_func::relu &&
activation_node.get_primitive()->additional_params.a == 0.0f &&
fp.deps.empty() &&
data_type_traits::is_i8_u8(quantize_node.get_output_layout().data_type) &&
quantize_node.get_scale_shift_opt() &&
!quantize_node.get_need_pre_shift();
bool can_skip = fp.deps.empty() && data_type_traits::is_i8_u8(fp_next.output_layout.data_type);
can_skip &= ((act_prim->activation_function == activation_func::relu) && (act_prim->additional_params.a == 0.0f));
can_skip &= (quant_param->scale_shift_opt && !quant_param->has_pre_shift);

if (can_skip) {
remove_deps_of_node(fp);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -485,14 +485,14 @@ void remove_redundant_reorders::run(program& p) {
input.set_output_padding(node->get_output_layout().data_padding);

// Add fused_primitive_desc of reorder to convolution which propagate original output layout to jitter
fused_primitive_desc local_desc;
local_desc.node = p.get_node_ptr(node->id());
fused_primitive_desc local_desc(node->get_primitive());
local_desc.input_layout = input.get_dependency(0).get_output_layout(); // original convolution's output layout
node->set_input_layout(local_desc.input_layout);
local_desc.f_param = node->get_fuse_params();
local_desc.dep_start_idx = input.get_fused_primitives().size();
local_desc.output_layout = output_layout;
local_desc.input_layout = input.get_dependency(0).get_output_layout(); // original convolution's output layout
local_desc.activation = activation_func::none;
input.add_fused_primitive(local_desc);
node->set_input_layout(local_desc.input_layout);

// remove reorder node
LOG_NODE_REMOVAL(node->id());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
// changes the input format of eltwise sum post-op to use binary add.
if (conv_node.get_preferred_impl_type() == impl_types::onednn) {
onednn_add_fusing_helpers::for_eltwise(conv_node, eltwise_mode::sum,
[&](const program_node& p_node, const eltwise_node& e_node, const fused_primitive_desc& desc) {
[&](const program_node& p_node, const fused_primitive_desc& desc) {
auto fusing_type = onednn_add_fusing_helpers::get_add_fusing_type(p_node, desc);
if (fusing_type == add_fusing_type::binary_per_tensor) {
auto& dep_node = p_node.get_dependency(desc.dep_start_idx);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ struct condition_impl : typed_primitive_impl<condition> {
return ev;
}

static primitive_impl* create(const condition_node& arg) { return new condition_impl(arg); }
static primitive_impl* create(const condition_node& arg, std::shared_ptr<kernel_impl_params>) { return new condition_impl(arg); }

void init_kernels() override {}

Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/impls/common/loop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ struct loop_impl : typed_primitive_impl<loop> {
return ev;
}

static primitive_impl* create(const loop_node& arg) { return new loop_impl(arg); }
static primitive_impl* create(const loop_node& arg, std::shared_ptr<kernel_impl_params>) { return new loop_impl(arg); }
};

namespace detail {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ class wait_for_events_impl : public primitive_impl {

bool validate(const primitive_inst&) const override { return true; }

static primitive_impl* create_data(const data_node& data) { return new wait_for_events_impl(data); }
static primitive_impl* create_data(const data_node& data, std::shared_ptr<kernel_impl_params>) { return new wait_for_events_impl(data); }

static primitive_impl* create_input_layout(const input_layout_node& input) {
static primitive_impl* create_input_layout(const input_layout_node& input, std::shared_ptr<kernel_impl_params>) {
return new wait_for_events_impl(input);
}

static primitive_impl* create_prior_box(const prior_box_node& prior_box) {
static primitive_impl* create_prior_box(const prior_box_node& prior_box, std::shared_ptr<kernel_impl_params>) {
// This primitive is being executed on CPU during network compilation.
return new wait_for_events_impl(prior_box);
}
Expand Down
4 changes: 3 additions & 1 deletion src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ struct assign_impl : public typed_primitive_impl<assign> {
void init_kernels() override {}

public:
static primitive_impl* create(assign_node const& arg) { return new assign_impl{}; }
static primitive_impl* create(const assign_node& arg, std::shared_ptr<kernel_impl_params> impl_param) {
return new assign_impl{};
}
};


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -833,7 +833,7 @@ struct detection_output_impl : typed_primitive_impl<detection_output> {

void init_kernels() override {}

static primitive_impl* create(const detection_output_node& arg) { return new detection_output_impl(arg); }
static primitive_impl* create(const detection_output_node& arg, std::shared_ptr<kernel_impl_params>) { return new detection_output_impl(arg); }
};

namespace detail {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ struct non_max_suppression_impl : typed_primitive_impl<non_max_suppression> {
return ev;
}

static primitive_impl* create(const non_max_suppression_node&) {
static primitive_impl* create(const non_max_suppression_node&, std::shared_ptr<kernel_impl_params>) {
return new non_max_suppression_impl();
}
void init_kernels() override {}
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,8 +427,8 @@ struct proposal_impl : typed_primitive_impl<proposal> {

void init_kernels() override {}

static primitive_impl* create(const proposal_node& arg) {
const layout& l = arg.image_info().get_output_layout();
static primitive_impl* create(const proposal_node& arg, std::shared_ptr<kernel_impl_params> impl_param) {
const layout& l = impl_param->input_layouts[2];
const size_t count = l.feature() == 1 ? static_cast<size_t>(l.batch()) : static_cast<size_t>(l.feature());

// Supported image_info sizes and components meaning:
Expand Down
4 changes: 3 additions & 1 deletion src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ struct read_value_impl : public typed_primitive_impl<read_value> {
void init_kernels() override {}

public:
static primitive_impl* create(read_value_node const& arg) { return new read_value_impl{}; }
static primitive_impl* create(const read_value_node& arg, std::shared_ptr<kernel_impl_params> impl_param) {
return new read_value_impl{};
}
};

namespace detail {
Expand Down
27 changes: 13 additions & 14 deletions src/plugins/intel_gpu/src/graph/impls/implementation_map.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include <string>
#include <sstream>
#include "to_string_utils.h"
#include "kernel_selector_helper.h"
#include "activation_inst.h"

namespace cldnn {

Expand Down Expand Up @@ -145,42 +147,39 @@ class implementation_map {
public:
using key_builder = implementation_key<primitive_kind>;
using key_type = typename key_builder::type;
using factory_type = std::function<primitive_impl*(const typed_program_node<primitive_kind>&)>;
using factory_type = std::function<primitive_impl*(const typed_program_node<primitive_kind>&, std::shared_ptr<kernel_impl_params>)>;
using map_type = singleton_map<impl_types, std::pair<std::set<key_type>, factory_type>>;

static factory_type get(const typed_program_node<primitive_kind>& primitive) {
impl_types target_impl_type = primitive.get_preferred_impl_type();
// lookup in database; throw if not found
auto key = key_builder()(primitive);
static factory_type get(std::shared_ptr<kernel_impl_params> impl_param, impl_types preferred_impl_type) {
auto key = key_builder()(impl_param->input_layouts[0]);
for (auto& kv : map_type::instance()) {
impl_types impl_type = kv.first;
if ((target_impl_type & impl_type) != impl_type)
if ((preferred_impl_type & impl_type) != impl_type)
continue;

std::set<key_type>& keys_set = kv.second.first;
auto& factory = kv.second.second;
if (keys_set.empty() || keys_set.find(key) != keys_set.end()) {
if (keys_set.empty() || keys_set.find(key) != keys_set.end()) {
return factory;
}
}
std::stringstream target_impl_type_ss;
target_impl_type_ss << target_impl_type;
target_impl_type_ss << preferred_impl_type;
throw std::runtime_error(std::string("implementation_map for ") + typeid(primitive_kind).name() +
" could not find any implementation to match key: " +
get_key_name(key) + ", impl_type: " + target_impl_type_ss.str() + ", node_id: " + primitive.id());
get_key_name(key) + ", impl_type: " + target_impl_type_ss.str() + ", node_id: " + impl_param->desc->id);
}

// check if for a given engine and type there exist an implementation
static bool check(const typed_program_node<primitive_kind>& primitive) {
static bool check(const typed_program_node<primitive_kind>& primitive, std::shared_ptr<kernel_impl_params> impl_params) {
impl_types target_impl_type = primitive.get_preferred_impl_type();
auto key = key_builder()(primitive);
auto key = key_builder()(impl_params->input_layouts[0]);
return check_key(target_impl_type, key);
}

// check if there exists a kernel implementation of a primitive with output set it primitive's output layout
static bool check_io_eq(const typed_program_node<primitive_kind>& primitive) {
static bool check_io_eq(const typed_program_node<primitive_kind>& primitive, std::shared_ptr<kernel_impl_params> impl_params) {
impl_types target_impl_type = primitive.get_preferred_impl_type();
auto key = key_builder()(primitive.get_output_layout());
auto key = key_builder()(impl_params->output_layout);
return check_key(target_impl_type, key);
}

Expand Down
12 changes: 6 additions & 6 deletions src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,17 @@ struct activation_impl : typed_primitive_impl_ocl<activation> {

return args;
}

static primitive_impl* create(const activation_node& arg) {
auto activation_params = get_default_params<kernel_selector::activation_params>(arg);
static primitive_impl* create(const activation_node& arg, std::shared_ptr<kernel_impl_params> impl_param) {
const auto& prim = arg.get_primitive();
auto activation_params = get_default_params<kernel_selector::activation_params>(*impl_param);
auto activation_optional_params =
get_default_optional_params<kernel_selector::activation_optional_params>(arg.get_program());

convert_new_activation_func(arg.get_primitive(), activation_params.activations);
convert_new_activation_func(prim, activation_params.activations);

if (arg.is_parameterized()) {
const auto& slope_layout = arg.slope_input().get_output_layout();
const auto& output_layout = arg.get_output_layout();
const auto& slope_layout = impl_param->input_layouts[1];
const auto& output_layout = impl_param->output_layout;

const auto params_num =
kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activations[0].function);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ struct adaptive_pooling_impl : public typed_primitive_impl_ocl<adaptive_pooling>
}

public:
static primitive_impl* create(const adaptive_pooling_node& arg) {
auto params = get_default_params<kernel_selector::adaptive_pooling_params>(arg);
static primitive_impl* create(const adaptive_pooling_node& arg, std::shared_ptr<kernel_impl_params> impl_param) {
auto params = get_default_params<kernel_selector::adaptive_pooling_params>(*impl_param);
auto optional_params = get_default_optional_params<kernel_selector::adaptive_pooling_optional_params>(arg.get_program());

const auto& primitive = arg.get_primitive();
Expand Down
7 changes: 3 additions & 4 deletions src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,8 @@ struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
}

public:
static primitive_impl* create(const arg_max_min_node& arg) {
static primitive_impl* create(const arg_max_min_node& arg, std::shared_ptr<kernel_impl_params> impl_param) {
const auto& primitive = arg.get_primitive();

const auto& axis = primitive->axis;
const auto& top_k = primitive->top_k;
const auto& out_type = primitive->output_type;
Expand All @@ -45,7 +44,7 @@ struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
const auto& values_first = primitive->values_first;
const auto& outputs_num = primitive->input.size() == 3 ? 2 : 1; // second output passed as input for TOP_K layer

auto argm_params = get_default_params<kernel_selector::arg_max_min_params>(arg);
auto argm_params = get_default_params<kernel_selector::arg_max_min_params>(*impl_param);
auto argm_optional_params =
get_default_optional_params<kernel_selector::arg_max_min_optional_params>(arg.get_program());

Expand Down Expand Up @@ -84,7 +83,7 @@ struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
argm_params.argMaxMinSortType = kernel_selector::argm_sort::INDEX;

if (outputs_num == 2) {
argm_params.inputs.push_back(convert_data_tensor(arg.get_dependency(2).get_output_layout()));
argm_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[2]));
}

argm_params.values_first = values_first;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ struct average_unpooling_impl : typed_primitive_impl_ocl<average_unpooling> {
}

public:
static primitive_impl* create(const average_unpooling_node& arg) {
auto average_unpooling_params = get_default_params<kernel_selector::average_unpooling_params>(arg);
static primitive_impl* create(const average_unpooling_node& arg, std::shared_ptr<kernel_impl_params> impl_param) {
auto primitive = arg.get_primitive();
auto average_unpooling_params = get_default_params<kernel_selector::average_unpooling_params>(*impl_param);
auto average_unpooling_optional_params =
get_default_optional_params<kernel_selector::average_unpooling_optional_params>(arg.get_program());
auto& params = average_unpooling_params;

auto primitive = arg.get_primitive();
auto stride = primitive->stride;

params.unpoolSize = {
Expand Down
Loading