Skip to content

Commit

Permalink
[GPU] Fix get_default_params & choose_impl not to dependent on progra…
Browse files Browse the repository at this point in the history
…m_node (openvinotoolkit#12239)

* Getting rid of dependency from get_default_param for typed_program_node

* Fix bug

* Enable two pathes to call choose_impl / does_possible_impl_exists / does_an_impl_exists to be able to use given layout

* Replaced impl factory API to get kernel_impl_param's pointer

* Update for recently added primitives

* Add and apply optional_layout

* fix kernel_param_impl to be handled as unique_ptr

* Applied review comments

* Fix rebase conflict

* Fix CI error
  • Loading branch information
yeonbok authored Jul 27, 2022
1 parent 101e1ea commit 361ca20
Show file tree
Hide file tree
Showing 118 changed files with 845 additions and 596 deletions.
33 changes: 33 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class optional_data_type {
storage_type storage;
};


/// Converts C++ type to @ref data_types .
template <typename T>
struct type_to_data_type;
Expand Down Expand Up @@ -429,6 +430,38 @@ struct layout {
tensor size;
};

class optional_layout {
public:
optional_layout() {}
optional_layout(const layout& lay) {
this->opt_layout_ptr = make_unique<layout>(lay);
}

optional_layout(const optional_layout& new_opt_lay) {
if (new_opt_lay) {
layout copied_lay = *new_opt_lay;
this->opt_layout_ptr = make_unique<layout>(copied_lay);
}
}

operator bool() const {
return this->opt_layout_ptr != nullptr;
}

layout operator*() const {
if (opt_layout_ptr == nullptr)
throw std::runtime_error("Attempt to access uninitialized optional layout!");
return *this->opt_layout_ptr;
}

std::unique_ptr<layout>& get_layout_ptr() {
return opt_layout_ptr;
}

private:
std::unique_ptr<layout> opt_layout_ptr = nullptr;
};

/// @}
/// @}
} // namespace cldnn
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ void add_onednn_optimization_attributes::run(program& p) {
// Reshape fused ops tensors for OneDNN FC if needed
if (fc_prim->input_size == 3) {
for (auto& fused_prim : node->get_fused_primitives()) {
auto fused_node = fused_prim.node;
if (fused_node->is_type<eltwise>()) {
if (fused_prim.is_type<eltwise>()) {
auto& dependency = node->get_dependency(fused_prim.dep_start_idx);
auto original_layout = dependency.get_output_layout();
onednn::combine_bf_with_first_spatial_dim(original_layout);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ void basic_memory_dependencies::run(program& p) {
&& (node->is_type<convolution>() || node->is_type<deconvolution>())) {
size_t eltw_dep = 0;
for (auto& fused_op : node->get_fused_primitives()) {
if (fused_op.node->is_type<eltwise>() && fused_op.deps.size() == 1) {
if (fused_op.is_type<eltwise>() && fused_op.deps.size() == 1) {
// If it is first sum, reuse the buffer
auto fusing_type = onednn_add_fusing_helpers::get_add_fusing_type(*node, fused_op);
if (fusing_type != add_fusing_type::sum || eltw_dep != 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -674,10 +674,10 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
auto& fused_descs = input_data.get_fused_primitives();
auto origin_input_iter = std::find_if(fused_descs.begin(), fused_descs.end(),
[&](cldnn::fused_primitive_desc& desc) {
return (desc.node->id() == prim_id.first);
return (desc.desc->id == prim_id.first);
});
if (origin_input_iter != fused_descs.end()) {
auto users = get_users_from_fusing_history(origin_input_iter->node->id());
auto users = get_users_from_fusing_history(origin_input_iter->desc->id);
if (users.size() != 1) {
return false;
}
Expand Down Expand Up @@ -1167,10 +1167,10 @@ void prepare_primitive_fusing::optimize_fused_ops(program& p) {

auto remove_deps_of_node = [&](cldnn::fused_primitive_desc& desc) {
for (auto& prim : fused_prims) {
if (desc.node->id() == prim.node->id()) {
if (desc.desc->id == prim.desc->id) {
continue;
}
auto rm_iter = prim.fused_deps.find(desc.node->id());
auto rm_iter = prim.fused_deps.find(desc.desc->id);
if (rm_iter != prim.fused_deps.end()) {
prim.fused_deps.erase(rm_iter);
prim.fused_deps.insert(desc.fused_deps.begin(), desc.fused_deps.end());
Expand All @@ -1187,16 +1187,13 @@ void prepare_primitive_fusing::optimize_fused_ops(program& p) {

auto& fp = *curr_itr;
auto& fp_next = *fp_itr;
if (fp.is_type<activation>() && fp_next.is_type<quantize>()) {
const auto& act_prim = fp.typed_desc<activation>();;
const auto& quant_param = fp_next.get_typed_fuse_params<kernel_selector::quantize_fuse_params>();

if (fp.node->is_type<activation>() && fp_next.node->is_type<quantize>()) {
auto& activation_node = fp.node->as<activation>();
auto& quantize_node = fp_next.node->as<quantize>();
bool can_skip = activation_node.get_primitive()->activation_function == activation_func::relu &&
activation_node.get_primitive()->additional_params.a == 0.0f &&
fp.deps.empty() &&
data_type_traits::is_i8_u8(quantize_node.get_output_layout().data_type) &&
quantize_node.get_scale_shift_opt() &&
!quantize_node.get_need_pre_shift();
bool can_skip = fp.deps.empty() && data_type_traits::is_i8_u8(fp_next.output_layout.data_type);
can_skip &= ((act_prim->activation_function == activation_func::relu) && (act_prim->additional_params.a == 0.0f));
can_skip &= (quant_param->scale_shift_opt && !quant_param->has_pre_shift);

if (can_skip) {
remove_deps_of_node(fp);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -485,14 +485,14 @@ void remove_redundant_reorders::run(program& p) {
input.set_output_padding(node->get_output_layout().data_padding);

// Add fused_primitive_desc of reorder to convolution which propagate original output layout to jitter
fused_primitive_desc local_desc;
local_desc.node = p.get_node_ptr(node->id());
fused_primitive_desc local_desc(node->get_primitive());
local_desc.input_layout = input.get_dependency(0).get_output_layout(); // original convolution's output layout
node->set_input_layout(local_desc.input_layout);
local_desc.f_param = node->get_fuse_params();
local_desc.dep_start_idx = input.get_fused_primitives().size();
local_desc.output_layout = output_layout;
local_desc.input_layout = input.get_dependency(0).get_output_layout(); // original convolution's output layout
local_desc.activation = activation_func::none;
input.add_fused_primitive(local_desc);
node->set_input_layout(local_desc.input_layout);

// remove reorder node
LOG_NODE_REMOVAL(node->id());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
// changes the input format of eltwise sum post-op to use binary add.
if (conv_node.get_preferred_impl_type() == impl_types::onednn) {
onednn_add_fusing_helpers::for_eltwise(conv_node, eltwise_mode::sum,
[&](const program_node& p_node, const eltwise_node& e_node, const fused_primitive_desc& desc) {
[&](const program_node& p_node, const fused_primitive_desc& desc) {
auto fusing_type = onednn_add_fusing_helpers::get_add_fusing_type(p_node, desc);
if (fusing_type == add_fusing_type::binary_per_tensor) {
auto& dep_node = p_node.get_dependency(desc.dep_start_idx);
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/impls/common/condition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ struct condition_impl : typed_primitive_impl<condition> {
return ev;
}

static primitive_impl* create(const condition_node& arg) { return new condition_impl(arg); }
static primitive_impl* create(const condition_node& arg, std::shared_ptr<kernel_impl_params>) { return new condition_impl(arg); }

void init_kernels() override {}

Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/impls/common/loop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ struct loop_impl : typed_primitive_impl<loop> {
return ev;
}

static primitive_impl* create(const loop_node& arg) { return new loop_impl(arg); }
static primitive_impl* create(const loop_node& arg, std::shared_ptr<kernel_impl_params>) { return new loop_impl(arg); }
};

namespace detail {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ class wait_for_events_impl : public primitive_impl {

bool validate(const primitive_inst&) const override { return true; }

static primitive_impl* create_data(const data_node& data) { return new wait_for_events_impl(data); }
static primitive_impl* create_data(const data_node& data, std::shared_ptr<kernel_impl_params>) { return new wait_for_events_impl(data); }

static primitive_impl* create_input_layout(const input_layout_node& input) {
static primitive_impl* create_input_layout(const input_layout_node& input, std::shared_ptr<kernel_impl_params>) {
return new wait_for_events_impl(input);
}

static primitive_impl* create_prior_box(const prior_box_node& prior_box) {
static primitive_impl* create_prior_box(const prior_box_node& prior_box, std::shared_ptr<kernel_impl_params>) {
// This primitive is being executed on CPU during network compilation.
return new wait_for_events_impl(prior_box);
}
Expand Down
4 changes: 3 additions & 1 deletion src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ struct assign_impl : public typed_primitive_impl<assign> {
void init_kernels() override {}

public:
static primitive_impl* create(assign_node const& arg) { return new assign_impl{}; }
static primitive_impl* create(const assign_node& arg, std::shared_ptr<kernel_impl_params> impl_param) {
return new assign_impl{};
}
};


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -833,7 +833,7 @@ struct detection_output_impl : typed_primitive_impl<detection_output> {

void init_kernels() override {}

static primitive_impl* create(const detection_output_node& arg) { return new detection_output_impl(arg); }
static primitive_impl* create(const detection_output_node& arg, std::shared_ptr<kernel_impl_params>) { return new detection_output_impl(arg); }
};

namespace detail {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ struct non_max_suppression_impl : typed_primitive_impl<non_max_suppression> {
return ev;
}

static primitive_impl* create(const non_max_suppression_node&) {
static primitive_impl* create(const non_max_suppression_node&, std::shared_ptr<kernel_impl_params>) {
return new non_max_suppression_impl();
}
void init_kernels() override {}
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,8 +427,8 @@ struct proposal_impl : typed_primitive_impl<proposal> {

void init_kernels() override {}

static primitive_impl* create(const proposal_node& arg) {
const layout& l = arg.image_info().get_output_layout();
static primitive_impl* create(const proposal_node& arg, std::shared_ptr<kernel_impl_params> impl_param) {
const layout& l = impl_param->input_layouts[2];
const size_t count = l.feature() == 1 ? static_cast<size_t>(l.batch()) : static_cast<size_t>(l.feature());

// Supported image_info sizes and components meaning:
Expand Down
4 changes: 3 additions & 1 deletion src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ struct read_value_impl : public typed_primitive_impl<read_value> {
void init_kernels() override {}

public:
static primitive_impl* create(read_value_node const& arg) { return new read_value_impl{}; }
static primitive_impl* create(const read_value_node& arg, std::shared_ptr<kernel_impl_params> impl_param) {
return new read_value_impl{};
}
};

namespace detail {
Expand Down
27 changes: 13 additions & 14 deletions src/plugins/intel_gpu/src/graph/impls/implementation_map.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include <string>
#include <sstream>
#include "to_string_utils.h"
#include "kernel_selector_helper.h"
#include "activation_inst.h"

namespace cldnn {

Expand Down Expand Up @@ -145,42 +147,39 @@ class implementation_map {
public:
using key_builder = implementation_key<primitive_kind>;
using key_type = typename key_builder::type;
using factory_type = std::function<primitive_impl*(const typed_program_node<primitive_kind>&)>;
using factory_type = std::function<primitive_impl*(const typed_program_node<primitive_kind>&, std::shared_ptr<kernel_impl_params>)>;
using map_type = singleton_map<impl_types, std::pair<std::set<key_type>, factory_type>>;

static factory_type get(const typed_program_node<primitive_kind>& primitive) {
impl_types target_impl_type = primitive.get_preferred_impl_type();
// lookup in database; throw if not found
auto key = key_builder()(primitive);
static factory_type get(std::shared_ptr<kernel_impl_params> impl_param, impl_types preferred_impl_type) {
auto key = key_builder()(impl_param->input_layouts[0]);
for (auto& kv : map_type::instance()) {
impl_types impl_type = kv.first;
if ((target_impl_type & impl_type) != impl_type)
if ((preferred_impl_type & impl_type) != impl_type)
continue;

std::set<key_type>& keys_set = kv.second.first;
auto& factory = kv.second.second;
if (keys_set.empty() || keys_set.find(key) != keys_set.end()) {
if (keys_set.empty() || keys_set.find(key) != keys_set.end()) {
return factory;
}
}
std::stringstream target_impl_type_ss;
target_impl_type_ss << target_impl_type;
target_impl_type_ss << preferred_impl_type;
throw std::runtime_error(std::string("implementation_map for ") + typeid(primitive_kind).name() +
" could not find any implementation to match key: " +
get_key_name(key) + ", impl_type: " + target_impl_type_ss.str() + ", node_id: " + primitive.id());
get_key_name(key) + ", impl_type: " + target_impl_type_ss.str() + ", node_id: " + impl_param->desc->id);
}

// check if for a given engine and type there exist an implementation
static bool check(const typed_program_node<primitive_kind>& primitive) {
static bool check(const typed_program_node<primitive_kind>& primitive, std::shared_ptr<kernel_impl_params> impl_params) {
impl_types target_impl_type = primitive.get_preferred_impl_type();
auto key = key_builder()(primitive);
auto key = key_builder()(impl_params->input_layouts[0]);
return check_key(target_impl_type, key);
}

// check if there exists a kernel implementation of a primitive with output set it primitive's output layout
static bool check_io_eq(const typed_program_node<primitive_kind>& primitive) {
static bool check_io_eq(const typed_program_node<primitive_kind>& primitive, std::shared_ptr<kernel_impl_params> impl_params) {
impl_types target_impl_type = primitive.get_preferred_impl_type();
auto key = key_builder()(primitive.get_output_layout());
auto key = key_builder()(impl_params->output_layout);
return check_key(target_impl_type, key);
}

Expand Down
12 changes: 6 additions & 6 deletions src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,17 @@ struct activation_impl : typed_primitive_impl_ocl<activation> {

return args;
}

static primitive_impl* create(const activation_node& arg) {
auto activation_params = get_default_params<kernel_selector::activation_params>(arg);
static primitive_impl* create(const activation_node& arg, std::shared_ptr<kernel_impl_params> impl_param) {
const auto& prim = arg.get_primitive();
auto activation_params = get_default_params<kernel_selector::activation_params>(*impl_param);
auto activation_optional_params =
get_default_optional_params<kernel_selector::activation_optional_params>(arg.get_program());

convert_new_activation_func(arg.get_primitive(), activation_params.activations);
convert_new_activation_func(prim, activation_params.activations);

if (arg.is_parameterized()) {
const auto& slope_layout = arg.slope_input().get_output_layout();
const auto& output_layout = arg.get_output_layout();
const auto& slope_layout = impl_param->input_layouts[1];
const auto& output_layout = impl_param->output_layout;

const auto params_num =
kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activations[0].function);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ struct adaptive_pooling_impl : public typed_primitive_impl_ocl<adaptive_pooling>
}

public:
static primitive_impl* create(const adaptive_pooling_node& arg) {
auto params = get_default_params<kernel_selector::adaptive_pooling_params>(arg);
static primitive_impl* create(const adaptive_pooling_node& arg, std::shared_ptr<kernel_impl_params> impl_param) {
auto params = get_default_params<kernel_selector::adaptive_pooling_params>(*impl_param);
auto optional_params = get_default_optional_params<kernel_selector::adaptive_pooling_optional_params>(arg.get_program());

const auto& primitive = arg.get_primitive();
Expand Down
7 changes: 3 additions & 4 deletions src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,8 @@ struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
}

public:
static primitive_impl* create(const arg_max_min_node& arg) {
static primitive_impl* create(const arg_max_min_node& arg, std::shared_ptr<kernel_impl_params> impl_param) {
const auto& primitive = arg.get_primitive();

const auto& axis = primitive->axis;
const auto& top_k = primitive->top_k;
const auto& out_type = primitive->output_type;
Expand All @@ -45,7 +44,7 @@ struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
const auto& values_first = primitive->values_first;
const auto& outputs_num = primitive->input.size() == 3 ? 2 : 1; // second output passed as input for TOP_K layer

auto argm_params = get_default_params<kernel_selector::arg_max_min_params>(arg);
auto argm_params = get_default_params<kernel_selector::arg_max_min_params>(*impl_param);
auto argm_optional_params =
get_default_optional_params<kernel_selector::arg_max_min_optional_params>(arg.get_program());

Expand Down Expand Up @@ -84,7 +83,7 @@ struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
argm_params.argMaxMinSortType = kernel_selector::argm_sort::INDEX;

if (outputs_num == 2) {
argm_params.inputs.push_back(convert_data_tensor(arg.get_dependency(2).get_output_layout()));
argm_params.inputs.push_back(convert_data_tensor(impl_param->input_layouts[2]));
}

argm_params.values_first = values_first;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ struct average_unpooling_impl : typed_primitive_impl_ocl<average_unpooling> {
}

public:
static primitive_impl* create(const average_unpooling_node& arg) {
auto average_unpooling_params = get_default_params<kernel_selector::average_unpooling_params>(arg);
static primitive_impl* create(const average_unpooling_node& arg, std::shared_ptr<kernel_impl_params> impl_param) {
auto primitive = arg.get_primitive();
auto average_unpooling_params = get_default_params<kernel_selector::average_unpooling_params>(*impl_param);
auto average_unpooling_optional_params =
get_default_optional_params<kernel_selector::average_unpooling_optional_params>(arg.get_program());
auto& params = average_unpooling_params;

auto primitive = arg.get_primitive();
auto stride = primitive->stride;

params.unpoolSize = {
Expand Down
Loading

0 comments on commit 361ca20

Please sign in to comment.