Skip to content

Commit

Permalink
[GPU] SYCL FC opt kernel + few fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
vladimir-paramuzov committed Sep 12, 2024
1 parent c563648 commit b425389
Show file tree
Hide file tree
Showing 9 changed files with 2,941 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ void post_optimize_weights::optimize_weights(T& node, program& p) {
program_node& prev_node = node.get_dependency(i);

if (weights_reorder_params != nullptr) {
auto in = weights_reorder_params->get_input_layout().convert_to_weights_layout(false);
auto out = weights_reorder_params->get_output_layout();
if (in.identical(out))
continue;

bool can_be_fused = prev_node.is_type<reorder>() &&
prev_node.as<reorder>().is_simple_reorder() &&
prev_node.get_users().size() == 1 &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@
using namespace cldnn;

void prepare_primitive_fusing::run(program& p) {
fuse_reorders(p);
remove_redundant_reshape(p);
fuse_bias(p);
fuse_simple_primitives(p);
fuse_constant_transposes(p);
optimize_fused_ops(p);
// fuse_reorders(p);
// remove_redundant_reshape(p);
// fuse_bias(p);
// fuse_simple_primitives(p);
// fuse_constant_transposes(p);
// optimize_fused_ops(p);
}

void prepare_primitive_fusing::remove_redundant_reshape(program &p) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,9 @@ static void optimize_weights_decompression_parameters(fully_connected_node& fc_n
};

auto need_reorder = [&](size_t dep_id) {
if (fc_node.can_use(impl_types::sycl))
return false;

auto dep_layout = fc_node.get_input_layout(dep_id);
auto dep_pshape = dep_layout.get_partial_shape();
// Group for scale_idx is always 1, whereas zero_point_idx is 0.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,18 @@
#include "impls/onednn/fully_connected_onednn.hpp"
#endif

#if OV_GPU_WITH_SYCL
#include "impls/sycl/fully_connected_sycl.hpp"
#endif

namespace ov {
namespace intel_gpu {

using namespace cldnn;

const std::vector<std::shared_ptr<cldnn::ImplementationManager>>& Registry<fully_connected>::get_implementations() {
static const std::vector<std::shared_ptr<ImplementationManager>> impls = {
OV_GPU_CREATE_INSTANCE_SYCL(sycl::FCImplementationManagerSYCL, shape_types::dynamic_shape)
OV_GPU_CREATE_INSTANCE_ONEDNN(onednn::FullyConnectedImplementationManager, shape_types::static_shape)
OV_GPU_GET_INSTANCE_OCL(fully_connected, shape_types::static_shape)
OV_GPU_GET_INSTANCE_OCL(fully_connected, shape_types::dynamic_shape,
Expand Down
Loading

0 comments on commit b425389

Please sign in to comment.