Skip to content

Commit

Permalink
Fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
vladimir-paramuzov committed Aug 23, 2024
1 parent 11e99dd commit 36f795b
Show file tree
Hide file tree
Showing 60 changed files with 714 additions and 473 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include "program_node.h"

#ifdef ENABLE_ONEDNN_FOR_GPU
#include "fully_connected_inst.h"
#include <impls/onednn/utils.hpp>
#endif

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ void add_required_reorders::add_reorder(program& p, program_node* node, program_
throw std::runtime_error("Internal Error: container index out of range exception.");
}
p.add_intermediate(new_reorder_node, *usr, idx);
new_reorder_node.recalc_output_layouts(false);
}

bool add_required_reorders::test_format(cldnn::program_node& node, format requested_format) {
Expand All @@ -65,12 +66,17 @@ bool add_required_reorders::test_format(cldnn::program_node& node, format reques
const auto& dep_with_port = node.get_dependency_with_port(i);
auto& dep = dep_with_port.first;

auto current_format = dep->get_output_layout(false, dep_with_port.second).format;

if (format::is_weights_format(current_format))
continue;

if (dep->is_type<reorder>()) {
auto& port = dep_with_port.second;
auto new_layout = dep->get_output_layout(false, port);
new_layout.format = requested_format;
dep->set_output_layout(new_layout, false, port);
} else {
} else if (current_format != requested_format) {
add_reorder(node.get_program(), dep_with_port.first, &node, true);
}
}
Expand All @@ -88,6 +94,10 @@ void add_required_reorders::run(program& p) {
if (usr->is_type<data>())
continue;

if (!usr->is_all_valid_output_layouts()) {
usr->get_output_layouts(false);
}

// If usr is assign and input and output data types are different
// add reorder with usr's output data type between dep and usr
if (usr->is_type<assign>()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,16 @@ void compile_graph::run(program& p) {
std::vector<ov::threading::Task> tasks;
std::exception_ptr exception;

auto forcing_map = p.get_config().get_property(ov::intel_gpu::force_implementations);

for (size_t idx = 0; idx < proc_order.size(); idx++) {
auto& node = *(std::next(proc_order.begin(), idx));

bool can_select_impl = !node->is_type<data>() &&
!(node->is_type<mutable_data>() && node->get_dependencies().empty());

if (can_select_impl) {
tasks.push_back([node, &exception] {
tasks.push_back([node, &exception, &forcing_map] {
try {
const auto& params = node->get_kernel_impl_params();
auto shape_type = ImplementationManager::get_shape_type(*params);
Expand All @@ -46,6 +48,11 @@ void compile_graph::run(program& p) {
if (impl_type != impl_types::cpu) {
impl_type = impl_types::any;
}
if (forcing_map.count(node->id())) {
auto forced_impl = forcing_map.at(node->id()).impl_type;
if (forced_impl != impl_types::any)
impl_type = forced_impl;
}
auto selected_impl_manager = node->type()->choose_impl(*node, *node->get_kernel_impl_params(), impl_type, shape_type);
if (selected_impl_manager) {
node->selected_impl = selected_impl_manager->create(*node, *params);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ void handle_reshape::run(program& p) {
auto& new_reshape_node = p.get_or_create(new_reshape);
user->replace_dependency(0, input_node);
p.add_intermediate(new_reshape_node, *user, 0);
new_reshape_node.recalc_output_layouts();
if (new_reshape->input_size() == 2) {
p.add_connection(prim_node.get_dependency(1), new_reshape_node);
}
Expand Down Expand Up @@ -198,6 +199,7 @@ void handle_reshape::run(program& p) {
reshape_input_node.get_dependencies().empty());
reshape_reorder_id++;
reshape_input_node.recalc_output_layout();
node->recalc_output_layouts();
}
}

Expand All @@ -223,6 +225,7 @@ void handle_reshape::run(program& p) {
auto& reshape_input_node = p.get_or_create(reshape_input);
p.add_intermediate(reshape_input_node, *node, 0, reshape_input_node.get_dependencies().empty());
reshape_input_node.recalc_output_layout();
node->recalc_output_layouts();
}

// Check whether output reorder is required for format change
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)

if (new_input.first) {
p.add_intermediate(new_input.first, detection_output_node, i, !new_input.second);
detection_output_node.recalc_output_layouts();
}
}
}
Expand All @@ -784,6 +785,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
layout{ input_layout.get_partial_shape(), input_layout.data_type, new_format });
if (reorder.first) {
p.add_intermediate(reorder.first, deconv_node, 0, !reorder.second);
deconv_node.recalc_output_layouts();
}
}

Expand Down Expand Up @@ -907,6 +909,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
auto new_input = rf.get_reorder(input.id(), input_layout, new_layout);
if (new_input.first) {
p.add_intermediate(new_input.first, fc_node, 0, !new_input.second);
fc_node.recalc_output_layouts();
}
}

Expand All @@ -933,6 +936,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
auto new_input = rf.get_reorder(input->id(), dep.second, input_layout, new_layout);
if (new_input.first) {
p.add_intermediate(new_input.first, pooling_node, 0);
pooling_node.recalc_output_layouts();
}
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
//

#include "impls/registry/implementation_manager.hpp"
#include "pass_manager.h"
#include "program_node.h"
#include "openvino/core/except.hpp"
Expand Down Expand Up @@ -97,7 +98,13 @@ void select_preferred_formats::run(program& p) {

const auto& params = n->get_kernel_impl_params();
auto shape_type = ImplementationManager::get_shape_type(*params);
if (auto factory = n->type()->choose_impl(*n, *n->get_kernel_impl_params(), impl_type, shape_type)) {
// temporary set format to any as we need to query that from impl and don't want impl to be rejected
auto factory = test_format<std::shared_ptr<ImplementationManager>>(*n, format::any,
[&impl_type, &shape_type](const program_node& n) {
return n.type()->choose_impl(n, *n.get_kernel_impl_params(), impl_type, shape_type);
});

if (factory) {
try {
auto fmts = factory->query_formats(*n);
for (size_t i = 0; i < fmts.first.size(); i++) {
Expand Down
1 change: 0 additions & 1 deletion src/plugins/intel_gpu/src/graph/impls/common/register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ void register_implementations() {
REGISTER_COMMON(data);
REGISTER_COMMON(input_layout);
REGISTER_COMMON(loop);
REGISTER_COMMON(prior_box);
}

} // namespace common
Expand Down
1 change: 0 additions & 1 deletion src/plugins/intel_gpu/src/graph/impls/common/register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ REGISTER_COMMON(condition);
REGISTER_COMMON(data);
REGISTER_COMMON(input_layout);
REGISTER_COMMON(loop);
REGISTER_COMMON(prior_box);

#undef REGISTER_COMMON

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,6 @@ class wait_for_events_impl : public primitive_impl {
return make_unique<wait_for_events_impl>(input);
}

static std::unique_ptr<primitive_impl> create_prior_box(const prior_box_node& prior_box, const kernel_impl_params&) {
// This primitive is being executed on CPU during network compilation.
return make_unique<wait_for_events_impl>(prior_box);
}

void update(primitive_inst& inst, const kernel_impl_params& impl_param) override { }
};

Expand All @@ -67,10 +62,6 @@ attach_input_layout_common::attach_input_layout_common() {
implementation_map<input_layout>::add(impl_types::common, shape_types::any, wait_for_events_impl::create_input_layout, {});
}

attach_prior_box_common::attach_prior_box_common() {
implementation_map<prior_box>::add(impl_types::common, wait_for_events_impl::create_prior_box, {});
}

} // namespace detail
} // namespace common
} // namespace cldnn
Expand Down
15 changes: 6 additions & 9 deletions src/plugins/intel_gpu/src/graph/impls/ocl/convolution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ struct ConvolutionImplementationManager : public ImplementationManager {

std::unique_ptr<primitive_impl> create_impl(const program_node& node, const kernel_impl_params& params) const override;

bool validate(const program_node& node) const override {
OPENVINO_ASSERT(node.is_type<convolution>());
bool validate_impl(const program_node& node) const override {
assert(node.is_type<convolution>());

const auto& input_layout = node.get_input_layout(0);
const auto& weights_layout = node.as<convolution>().weights().get_output_layout();
Expand Down Expand Up @@ -85,21 +85,18 @@ struct ConvolutionImplementationManager : public ImplementationManager {
format::bs_fs_yx_bsv4_fsv2,
};

bool fp_case = data_type_traits::is_floating_point(in_dt) &&
bool fp_common_case = data_type_traits::is_floating_point(in_dt) &&
(one_of(input_fmt.value, supported_fp_only_formats) || one_of(input_fmt.value, supported_common_formats));
bool fp16_case = in_dt == ov::element::f16 && input_fmt == format::fs_b_yx_fsv32;
bool fp16_case = everyone_is(ov::element::f16, in_dt, wei_dt) && (input_fmt == format::fs_b_yx_fsv32 || output_fmt == format::fs_b_yx_fsv32);
bool i8u8_case = data_type_traits::is_i8_u8(in_dt) &&
(one_of(input_fmt.value, supported_int_only_formats) || one_of(input_fmt.value, supported_common_formats));

if (!fp_case && !fp16_case && !i8u8_case)
if (!fp_common_case && !fp16_case && !i8u8_case)
return false;
}

return ImplementationManager::validate(node);
return true;
}

in_out_fmts_t query_formats(const program_node&) const override { OPENVINO_NOT_IMPLEMENTED; }
bool support_shapes(const kernel_impl_params&) const override { return true; }
};

} // namespace ocl
Expand Down
18 changes: 4 additions & 14 deletions src/plugins/intel_gpu/src/graph/impls/ocl/detection_output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "primitive_base.hpp"

#include "detection_output.hpp"
#include "detection_output_inst.h"
#include "detection_output/detection_output_kernel_selector.h"
#include "detection_output/detection_output_kernel_ref.h"
Expand Down Expand Up @@ -62,22 +63,11 @@ struct detection_output_impl : typed_primitive_impl_ocl<detection_output> {
}
};

namespace detail {

attach_detection_output_impl::attach_detection_output_impl() {
std::vector<data_types> dt = {
data_types::f32,
data_types::f16,
};
std::vector<format::type> fmt = {
format::bfyx,
format::bs_fs_yx_bsv16_fsv32,
format::bs_fs_zyx_bsv16_fsv32,
};
implementation_map<detection_output>::add(impl_types::ocl, typed_primitive_impl_ocl<detection_output>::create<detection_output_impl>, dt, fmt);
std::unique_ptr<primitive_impl> DetectionOutputImplementationManager::create_impl(const program_node& node, const kernel_impl_params& params) const {
assert(node.is_type<detection_output>());
return typed_primitive_impl_ocl<detection_output>::create<detection_output_impl>(static_cast<const detection_output_node&>(node), params);
}

} // namespace detail
} // namespace ocl
} // namespace cldnn

Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ attach_dft_impl::attach_dft_impl() {
format::bfyx,
format::b_fs_yx_fsv16,
format::b_fs_yx_fsv32,
format::bs_fs_yx_bsv16_fsv32,
format::bs_fs_yx_bsv16_fsv16,
format::bs_fs_yx_bsv32_fsv32,
format::bs_fs_yx_bsv32_fsv16,
Expand Down
37 changes: 4 additions & 33 deletions src/plugins/intel_gpu/src/graph/impls/ocl/gather_nd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "primitive_base.hpp"

#include "gather_nd.hpp"
#include "gather_nd_inst.h"
#include "gather/gather_nd_kernel_selector.h"
#include "gather/gather_nd_kernel_ref.h"
Expand Down Expand Up @@ -55,41 +56,11 @@ struct gather_nd_impl : typed_primitive_impl_ocl<gather_nd> {
}
};

namespace detail {

attach_gather_nd_impl::attach_gather_nd_impl() {
auto types = {
data_types::f32,
data_types::f16,
data_types::i32
};

auto static_formats = {
format::bfyx,
format::bfzyx,
format::bfwzyx
};

implementation_map<gather_nd>::add(impl_types::ocl,
shape_types::static_shape,
typed_primitive_impl_ocl<gather_nd>::create<gather_nd_impl>,
types,
static_formats);

auto dyn_formats = {
format::bfyx,
format::bfzyx,
format::bfwzyx
};

implementation_map<gather_nd>::add(impl_types::ocl,
shape_types::dynamic_shape,
typed_primitive_impl_ocl<gather_nd>::create<gather_nd_impl>,
types,
dyn_formats);
std::unique_ptr<primitive_impl> GatherNDImplementationManager::create_impl(const program_node& node, const kernel_impl_params& params) const {
assert(node.is_type<gather_nd>());
return typed_primitive_impl_ocl<gather_nd>::create<gather_nd_impl>(static_cast<const gather_nd_node&>(node), params);
}

} // namespace detail
} // namespace ocl
} // namespace cldnn

Expand Down
54 changes: 54 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/ocl/gather_nd.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "impls/registry/implementation_manager.hpp"
#include "program_node.h"

#include <memory>
namespace cldnn {
namespace ocl {

struct GatherNDImplementationManager : public ImplementationManager {
OV_GPU_PRIMITIVE_IMPL("GatherNDImplementationOCL")
GatherNDImplementationManager(shape_types shape_type, ValidateFunc vf = nullptr) : ImplementationManager(impl_types::ocl, shape_type, vf) {}
std::unique_ptr<primitive_impl> create_impl(const program_node& node, const kernel_impl_params& params) const override;
bool validate_impl(const program_node& node) const override {
static const std::vector<format> supported_fmts = {
format::bfyx,
format::bfzyx,
format::bfwzyx
};

static const std::vector<ov::element::Type_t> supported_in_types = {
ov::element::f32,
ov::element::f16,
ov::element::i32
};

static const std::vector<ov::element::Type_t> supported_out_types = {
ov::element::f32,
ov::element::f16,
ov::element::i32,
ov::element::i8,
ov::element::u8,
};

const auto& in0_layout = node.get_input_layout(0);
const auto& in1_layout = node.get_input_layout(1);
const auto& out_layout = node.get_output_layout(0);
if (!one_of(in0_layout.format, supported_fmts) || !one_of(out_layout.format, supported_fmts))
return false;

if (!one_of(in0_layout.data_type, supported_in_types) || !one_of(in1_layout.data_type, supported_in_types))
return false;

if (!one_of(out_layout.data_type, supported_out_types))
return false;

return true;
}
};

} // namespace ocl
} // namespace cldnn
2 changes: 2 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ attach_mvn_impl::attach_mvn_impl() {

std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv32),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv32),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv32),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv32),

std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv16),
});
Expand Down
Loading

0 comments on commit 36f795b

Please sign in to comment.