Skip to content

Commit

Permalink
[IE CLDNN] DispatchData refactoring (openvinotoolkit#2508)
Browse files Browse the repository at this point in the history
  • Loading branch information
vladimir-paramuzov authored and mryzhov committed Dec 15, 2020
1 parent af8f09e commit d00bf25
Show file tree
Hide file tree
Showing 298 changed files with 2,529 additions and 3,223 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,31 +23,21 @@ namespace kernel_selector {
ActivationKernelBase::DispatchData ActivationKernelBase::SetDefault(const activation_params& arg) const {
const auto& out = arg.output;

DispatchData runInfo;
std::vector<size_t> global;
std::vector<size_t> local;
DispatchData dispatchData;
if (out.GetLayout() == DataLayout::yxfb) {
global = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v};
local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo);
dispatchData.gws = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v};
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, arg.engineInfo);
} else if (out.GetLayout() == DataLayout::b_fs_yx_fsv16) {
global = {Align(out.Feature().v, 16) * out.Batch().v, out.X().v, out.Y().v};
local = {16, 1, 1};
dispatchData.gws = {Align(out.Feature().v, 16) * out.Batch().v, out.X().v, out.Y().v};
dispatchData.lws = {16, 1, 1};
} else {
global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo);
dispatchData.gws = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, arg.engineInfo);
}

runInfo.gws0 = global[0];
runInfo.gws1 = global[1];
runInfo.gws2 = global[2];
runInfo.lws0 = local[0];
runInfo.lws1 = local[1];
runInfo.lws2 = local[2];
dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;

runInfo.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
runInfo.fp16UnitUsed = out.GetDType() == Datatype::F16;

return runInfo;
return dispatchData;
}

JitConstants ActivationKernelBase::GetJitConstants(const activation_params& params, DispatchData) const {
Expand Down Expand Up @@ -94,20 +84,20 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con
activation_params& newParams = *static_cast<activation_params*>(kd.params.get());
const std::string kernel_id = GetEntryPoint(kernelName, params.layerID, options);

auto runInfo = SetDefault(newParams);
auto cldnn_jit = GetJitConstants(newParams, runInfo);
auto dispatchData = SetDefault(newParams);
auto cldnn_jit = GetJitConstants(newParams, dispatchData);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

auto& kernel = kd.kernels[0];
FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point,
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params));

if (!newParams.inputActivationParams.empty()) {
kernel.arguments.push_back({ArgumentDescriptor::Types::SLOPE, 0});
}

kd.estimatedTime = runInfo.efficiency;
kd.estimatedTime = dispatchData.efficiency;

return {kd};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class ActivationKernelBase : public common_kernel_base {

protected:
bool Validate(const Params& p, const optional_params& o) const override;
virtual JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const;
virtual JitConstants GetJitConstants(const activation_params& params, DispatchData dispatchData) const;
virtual DispatchData SetDefault(const activation_params& arg) const;
KernelsData GetCommonKernelsData(const Params& params, const optional_params& options) const;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,24 +38,16 @@ ParamsKey ActivationKernelOpt::GetSupportedKey() const {
}

ActivationKernelOpt::Parent::DispatchData ActivationKernelOpt::SetDefault(const activation_params& params) const {
auto runInfo = Parent::SetDefault(params);
auto dispatchData = Parent::SetDefault(params);

const auto totalSize = params.inputs[0].LogicalSize();

std::vector<size_t> global = {totalSize / NUM_COLS_WI};
std::vector<size_t> local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
dispatchData.gws = { totalSize / NUM_COLS_WI, 1, 1 };
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);

runInfo.gws0 = global[0];
runInfo.gws1 = 1;
runInfo.gws2 = 1;
dispatchData.efficiency = FORCE_PRIORITY_6;

runInfo.lws0 = local[0];
runInfo.lws1 = 1;
runInfo.lws2 = 1;

runInfo.efficiency = FORCE_PRIORITY_6;

return runInfo;
return dispatchData;
}

bool ActivationKernelOpt::Validate(const Params& p, const optional_params& o) const {
Expand Down Expand Up @@ -87,8 +79,8 @@ bool ActivationKernelOpt::Validate(const Params& p, const optional_params& o) co
return true;
}

JitConstants ActivationKernelOpt::GetJitConstants(const activation_params& params, DispatchData kd) const {
auto jit = ActivationKernelBase::GetJitConstants(params, kd);
JitConstants ActivationKernelOpt::GetJitConstants(const activation_params& params, DispatchData dispatchData) const {
auto jit = ActivationKernelBase::GetJitConstants(params, dispatchData);
auto input_dt = params.inputs[0].GetDType();

jit.AddConstant(MakeJitConstant("NUM_COLS_WI", NUM_COLS_WI));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class ActivationKernelOpt : public ActivationKernelBase {
static const int NUM_COLS_WI = 4;
DispatchData SetDefault(const activation_params& arg) const override;
bool Validate(const Params& p, const optional_params& o) const override;
JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override;
JitConstants GetJitConstants(const activation_params& params, DispatchData dispatchData) const override;
std::vector<FusedOpType> GetSupportedFusedOps() const override {
return {FusedOpType::QUANTIZE,
FusedOpType::SCALE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ ParamsKey ActivationKernelRef::GetSupportedKey() const {
return k;
}

JitConstants ActivationKernelRef::GetJitConstants(const activation_params& params, DispatchData kd) const {
auto jit = ActivationKernelBase::GetJitConstants(params, kd);
JitConstants ActivationKernelRef::GetJitConstants(const activation_params& params, DispatchData dispatchData) const {
auto jit = ActivationKernelBase::GetJitConstants(params, dispatchData);
auto input_dt = params.inputs[0].GetDType();

if (!params.fused_ops.empty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class ActivationKernelRef : public ActivationKernelBase {

KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override;
JitConstants GetJitConstants(const activation_params& params, DispatchData dispatchData) const override;
std::vector<FusedOpType> GetSupportedFusedOps() const override {
return {FusedOpType::QUANTIZE,
FusedOpType::SCALE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,22 +86,11 @@ KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const opti
}
const arg_max_min_params& orgParams = static_cast<const arg_max_min_params&>(params);

DispatchData runInfo;
runInfo.fp16UnitUsed = orgParams.inputs[0].GetDType() == Datatype::F16;

size_t sort_size = orgParams.argMaxMinSortType == ArgMaxMinSortType::VALUE ? getSortSize(orgParams) : 1;

std::vector<size_t> local, global;
global = { Align(getOperationNumber(orgParams), 32), sort_size, 1 };
local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);

runInfo.gws0 = global[0];
runInfo.gws1 = global[1];
runInfo.gws2 = global[2];

runInfo.lws0 = local[0];
runInfo.lws1 = local[1];
runInfo.lws2 = local[2];
DispatchData dispatchData;
dispatchData.gws = { Align(getOperationNumber(orgParams), 32), sort_size, 1 };
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);

KernelData kd = KernelData::Default<arg_max_min_params>(params);

Expand All @@ -110,7 +99,7 @@ KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const opti
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

auto& kernel = kd.kernels[0];
FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);

if (orgParams.outputs_num == 2) {
kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
// Copyright (c) 2018 Intel Corporation
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -37,20 +37,12 @@ JitConstants ArgMaxMinKernelBase::GetJitConstants(const arg_max_min_params& para
}

ArgMaxMinKernelBase::DispatchData ArgMaxMinKernelBase::SetDefault(const arg_max_min_params& params) const {
DispatchData kd;
DispatchData dispatchData;

kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
dispatchData.gws = { 128, params.inputs[0].Batch().v, 1 };
dispatchData.lws = { 128, 1, 1 };

// Determine global work sizes.
kd.gws0 = 128;
kd.gws1 = params.inputs[0].Batch().v;
kd.gws2 = 1;

kd.lws0 = 128;
kd.lws1 = 1;
kd.lws2 = 1;

return kd;
return dispatchData;
}

KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimatedTime) const {
Expand All @@ -60,7 +52,7 @@ KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, cons

const arg_max_min_params& orgParams = static_cast<const arg_max_min_params&>(params);

DispatchData runInfo = SetDefault(orgParams);
DispatchData dispatchData = SetDefault(orgParams);

KernelData kd = KernelData::Default<arg_max_min_params>(params);

Expand All @@ -69,7 +61,7 @@ KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, cons
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

auto& kernel = kd.kernels[0];
FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);

kd.estimatedTime = estimatedTime;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
// Copyright (c) 2018 Intel Corporation
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -37,11 +37,11 @@ KernelsData ArgMaxMinKernelOpt::GetKernelsData(const Params& params, const optio

const arg_max_min_params& orgParams = static_cast<const arg_max_min_params&>(params);

int topK = orgParams.topK;
long size = (long)(orgParams.inputs[0].X().v * orgParams.inputs[0].Y().v * orgParams.inputs[0].Feature().v) / 8;
long outSize = size / 16 * topK;
size_t topK = orgParams.topK;
size_t size = (size_t)(orgParams.inputs[0].X().v * orgParams.inputs[0].Y().v * orgParams.inputs[0].Feature().v) / 8;
size_t outSize = size / 16 * topK;
int kernelAmount = 1;
for (; outSize > 128; outSize = (long)((outSize / 128 + 1) * topK)) {
for (; outSize > 128; outSize = (size_t)((outSize / 128 + 1) * topK)) {
kernelAmount++;
}
KernelData kd = KernelData::Default<arg_max_min_params>(params, kernelAmount);
Expand All @@ -57,22 +57,15 @@ KernelsData ArgMaxMinKernelOpt::GetKernelsData(const Params& params, const optio
newParams.inputs[0] = input;

auto& kernel = kd.kernels[i];
DispatchData runInfo = SetDefault(newParams);
DispatchData dispatchData = SetDefault(newParams);
auto cldnnJit = GetJitConstants(newParams);
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, options);
auto jit = CreateJit(kernelName, cldnnJit, entryPoint);

runInfo.fp16UnitUsed = orgParams.inputs[0].GetDType() == Datatype::F16;
dispatchData.gws = { Align(size, 16), orgParams.inputs[0].Batch().v, 1 };
dispatchData.lws = { 16, 1, 1 };

runInfo.gws0 = Align(size, 16);
runInfo.gws1 = orgParams.inputs[0].Batch().v; // B
runInfo.gws2 = 1;

runInfo.lws0 = 16;
runInfo.lws1 = 1;
runInfo.lws2 = 1;

FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entryPoint);
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entryPoint);
size = (size / 128 + 1) * topK;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
// Copyright (c) 2018 Intel Corporation
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -38,32 +38,30 @@ AverageUnpoolingKernelBase::DispatchData AverageUnpoolingKernelBase::SetDefault(
const average_unpooling_params& params) const {
const auto& input = params.inputs[0];

DispatchData kd;
DispatchData dispatchData;

if (input.GetLayout() == DataLayout::bfyx || input.GetLayout() == DataLayout::byxf) {
// Determine global work sizes.
kd.gws2 = input.Batch().v * input.Feature().v; // B, F
kd.gws0 = Align(input.X().v, 32); // X
kd.gws1 = input.Y().v; // Y
dispatchData.gws = { Align(input.X().v, 32), // X
input.Y().v, // Y
input.Batch().v * input.Feature().v, // B, F
};

kd.lws0 = 32;
kd.lws1 = 1;
kd.lws2 = 1;
dispatchData.lws = { 32, 1, 1 };
} else {
// Determine global work sizes.
kd.gws0 = input.Batch().v * input.Feature().v; // B, F
kd.gws1 = input.X().v; // X
kd.gws2 = input.Y().v; // Y

kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
while (kd.gws0 % kd.lws0 != 0) {
--kd.lws0;
dispatchData.gws = { input.Batch().v * input.Feature().v, // B, F
input.X().v, // X
input.Y().v }; // Y

dispatchData.lws = {1, 1, 1};
dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast<size_t>(1)), static_cast<size_t>(32));
while (dispatchData.gws[0] % dispatchData.lws[0] != 0) {
--dispatchData.lws[0];
}
kd.lws1 = 1;
kd.lws2 = 1;
}

return kd;
return dispatchData;
}

KernelsData AverageUnpoolingKernelBase::GetCommonKernelsData(const Params& params,
Expand All @@ -75,7 +73,7 @@ KernelsData AverageUnpoolingKernelBase::GetCommonKernelsData(const Params& param

const average_unpooling_params& orgParams = static_cast<const average_unpooling_params&>(params);

DispatchData runInfo = SetDefault(orgParams);
DispatchData dispatchData = SetDefault(orgParams);

KernelData kd = KernelData::Default<average_unpooling_params>(params);

Expand All @@ -84,10 +82,10 @@ KernelsData AverageUnpoolingKernelBase::GetCommonKernelsData(const Params& param
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

auto& kernel = kd.kernels[0];
FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);

kd.estimatedTime = estimatedTime;

return {kd};
}
} // namespace kernel_selector
} // namespace kernel_selector
Loading

0 comments on commit d00bf25

Please sign in to comment.