Skip to content

Commit

Permalink
[GPU] Added initial ocl impl
Browse files Browse the repository at this point in the history
  • Loading branch information
Lyamin-Roman committed Jan 28, 2024
1 parent 3c418e1 commit be71494
Show file tree
Hide file tree
Showing 13 changed files with 372 additions and 2 deletions.
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ void register_implementations() {
REGISTER_OCL(eye);
REGISTER_OCL(unique_count);
REGISTER_OCL(unique_gather);
REGISTER_OCL(rope);
}

} // namespace ocl
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
#include "intel_gpu/primitives/eye.hpp"
#include "intel_gpu/primitives/unique.hpp"
#include "intel_gpu/primitives/kv_cache.hpp"
#include "intel_gpu/primitives/rope.hpp"

namespace cldnn {
namespace ocl {
Expand Down Expand Up @@ -172,6 +173,7 @@ REGISTER_OCL(gather_nonzero);
REGISTER_OCL(eye);
REGISTER_OCL(unique_count);
REGISTER_OCL(unique_gather);
REGISTER_OCL(rope);

#undef REGISTER_OCL

Expand Down
80 changes: 80 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/ocl/rope.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "primitive_base.hpp"

#include "rope_inst.h"
#include "rope/rope_kernel_selector.h"
#include "rope/rope_kernel_ref.h"

namespace cldnn {
namespace ocl {

struct rope_impl : typed_primitive_impl_ocl<rope> {
using parent = typed_primitive_impl_ocl<rope>;
using parent::parent;
using kernel_selector_t = kernel_selector::rope_kernel_selector;
using kernel_params_t = std::pair<kernel_selector::rope_params, kernel_selector::rope_optional_params>;

DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::ocl::rope_impl);

std::unique_ptr<primitive_impl> clone() const override {
return make_unique<rope_impl>(*this);
}

void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}

static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<rope>();
auto params = get_default_params<kernel_selector::rope_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::rope_optional_params>(impl_param.get_program());

params.head_cnt = primitive->config.head_cnt;
params.head_size = primitive->config.head_size;
params.rotary_ndims = primitive->config.rotary_ndims;

for (size_t i = 1; i < impl_param.input_layouts.size(); ++i) {
params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(i)));
}
return {params, optional_params};
}

void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
}
};

namespace detail {

attach_rope_impl::attach_rope_impl() {
auto types = {
data_types::f32,
data_types::f16
};

auto formats = {
format::bfyx
};

implementation_map<rope>::add(impl_types::ocl,
shape_types::any,
typed_primitive_impl_ocl<rope>::create<rope_impl>,
types,
formats);
}

} // namespace detail
} // namespace ocl
} // namespace cldnn

BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::rope_impl)
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::rope)
3 changes: 3 additions & 0 deletions src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "fully_connected_inst.h"
#include "non_max_suppression_inst.h"
#include "eltwise_inst.h"
#include "rope_inst.h"
#include "pooling_inst.h"
#include "reduce_inst.h"
#include "one_hot_inst.h"
Expand Down Expand Up @@ -1458,6 +1459,8 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
preferred_impl = _forcing_map.at(node.id()).second;
} else if (node.is_type<condition>()) {
preferred_impl = impl_types::common;
} else if (node.is_type<rope>()) {
preferred_impl = impl_types::ocl;
} else if (node.is_type<detection_output>()) {
const auto& program = node.get_program();
const auto& device_info = program.get_engine().get_device_info();
Expand Down
31 changes: 31 additions & 0 deletions src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rope_ref.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "include/fetch_utils.cl"

KERNEL(rope_ref)(
OPTIONAL_SHAPE_INFO_ARG
const __global INPUT0_TYPE* input,
const __global INPUT1_TYPE* cos_sin,
__global OUTPUT_TYPE* output)
{
const uint p = get_global_id(0);
const uint b = get_global_id(1);
const uint h = get_global_id(2) / HALF_ROTARY_NDIMS;
const uint r = get_global_id(2) % HALF_ROTARY_NDIMS * 2;

uint input_idx = INPUT0_GET_INDEX(p, b, h * HEAD_SIZE, 0);
uint cos_sin_idx = INPUT1_GET_INDEX(p, b, 0, 0);
uint output_idx = OUTPUT_GET_INDEX(p, b, h, 0);

INPUT1_TYPE cosv = cos_sin[cos_sin_idx + r];
INPUT1_TYPE sinv = cos_sin[cos_sin_idx + r + 1];

output[output_idx + r] = cosv * input[input_idx + r] - sinv * input[input_idx + r + 1];
output[output_idx + r + 1] = sinv * input[input_idx + r] + cosv * input[input_idx + r + 1];

for (uint i = HALF_ROTARY_NDIMS * 2; i < HEAD_SIZE; ++i) {
output[output_idx + i] = input[input_idx + i];
}
}
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/kernel_selector/common_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ enum class KernelType {
UNIQUE_COUNT,
UNIQUE_GATHER,
RMS,
ROPE
};

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "rope_kernel_base.h"
#include "kernel_selector_utils.h"

namespace kernel_selector {
bool RoPEKernelBase::Validate(const Params& p, const optional_params& o) const {
return KernelBaseOpenCL::Validate(p, o);
}

JitConstants RoPEKernelBase::GetJitConstants(const rope_params& params, RoPEKernelBase::DispatchData) const {
JitConstants jit = MakeBaseParamsJitConstants(params);

jit.AddConstant(MakeJitConstant("HEAD_SIZE", params.head_size));
jit.AddConstant(MakeJitConstant("HALF_ROTARY_NDIMS", params.rotary_ndims / 2));

return jit;
}

RoPEKernelBase::DispatchData RoPEKernelBase::SetDefault(const rope_params& params) const {
DispatchData dispatchData;
const auto& input = params.inputs[0];

dispatchData.gws = {input.Batch().v, input.Feature().v, params.head_cnt * (params.rotary_ndims / 2)};
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);

return dispatchData;
}

void RoPEKernelBase::GetUpdateDispatchDataFunc(KernelData& kd) const {
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
const auto& prim_params = static_cast<const rope_params&>(params);
auto dispatchData = SetDefault(prim_params);
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
}

KernelsData RoPEKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
assert(params.GetType() == KernelType::ROPE);

if (!Validate(params, options))
return {};

const rope_params& orgParams = static_cast<const rope_params&>(params);
auto dispatchData = SetDefault(orgParams);

KernelData kd = KernelData::Default<rope_params>(params);

auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

GetUpdateDispatchDataFunc(kd);

auto& kernel = kd.kernels[0];
FillCLKernelData(kernel,
dispatchData,
params.engineInfo,
kernelName,
jit,
entry_point,
EXE_MODE_DEFAULT,
false,
false,
2, // TODO: Change num of inputs
GetFusedPrimitiveInputsCount(params),
1,
orgParams.outputs[0].is_dynamic());

return {kd};
}

} // namespace kernel_selector
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "kernel_base_opencl.h"

namespace kernel_selector {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// rope_params
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct rope_params : public base_params {
rope_params() : base_params(KernelType::ROPE) {}
size_t head_cnt;
size_t head_size;
size_t rotary_ndims;
};

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// rope_optional_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct rope_optional_params : optional_params {
rope_optional_params() : optional_params(KernelType::ROPE) {}
};

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// RoPEKernelBase
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class RoPEKernelBase : public KernelBaseOpenCL {
public:
using KernelBaseOpenCL::KernelBaseOpenCL;
virtual ~RoPEKernelBase() {}

struct DispatchData : public CommonDispatchData {};

protected:
bool Validate(const Params&, const optional_params&) const override;
virtual JitConstants GetJitConstants(const rope_params& params, DispatchData dispatchData) const;
virtual DispatchData SetDefault(const rope_params& params) const;
KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
void GetUpdateDispatchDataFunc(KernelData& kd) const override;
};
} // namespace kernel_selector
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "rope_kernel_ref.h"
#include "kernel_selector_utils.h"
#include <string>

namespace kernel_selector {
ParamsKey RoPEKernelRef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::bfyx);

k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();
k.EnableDifferentTypes();
k.EnableDynamicShapesSupport();
return k;
}

KernelsData RoPEKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
return GetCommonKernelsData(params, options);
}

KernelsPriority RoPEKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const {
return FORCE_PRIORITY_9;
}
} // namespace kernel_selector
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "rope_kernel_base.h"

namespace kernel_selector {
class RoPEKernelRef : public RoPEKernelBase {
public:
using Parent = RoPEKernelBase;
RoPEKernelRef() : RoPEKernelBase("rope_ref") {}
virtual ~RoPEKernelRef() {}

KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
};
} // namespace kernel_selector
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "rope_kernel_selector.h"
#include "rope_kernel_ref.h"

namespace kernel_selector {
rope_kernel_selector::rope_kernel_selector() {
Attach<RoPEKernelRef>();
}

KernelsData rope_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
return GetNaiveBestKernel(params, options, KernelType::ROPE);
}
} // namespace kernel_selector
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "kernel_selector.h"

namespace kernel_selector {
class rope_kernel_selector : public kernel_selector_base {
public:
static rope_kernel_selector& Instance() {
static rope_kernel_selector instance_;
return instance_;
}

rope_kernel_selector();

virtual ~rope_kernel_selector() {}

KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
};
} // namespace kernel_selector
Loading

0 comments on commit be71494

Please sign in to comment.