Skip to content

Commit

Permalink
[VPU][OpenCL] Update custom kernels (openvinotoolkit#2131)
Browse files Browse the repository at this point in the history
* [Custom CL] Updated OpenCL kernels and tests

* [Custom CL] Update OpenCL compiler

* Update firmware to 1365

* Disable ExpGenerateProposals tests

* VPU: new firmware no. 1370

* Myriad: re-enable ExpGenerateProposals tests

Co-authored-by: Maxim Kurin <[email protected]>
  • Loading branch information
Evgeny Latkin and maxim-kurin authored Sep 9, 2020
1 parent 867340e commit 5ad4811
Show file tree
Hide file tree
Showing 49 changed files with 2,950 additions and 4,075 deletions.
4 changes: 2 additions & 2 deletions inference-engine/cmake/vpu_dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ set(VPU_SUPPORTED_FIRMWARES usb-ma2450 usb-ma2x8x pcie-ma248x)
# Default packages
#

set(FIRMWARE_PACKAGE_VERSION 1360)
set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.02.0")
set(FIRMWARE_PACKAGE_VERSION 1370)
set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.0")

#
# CMake variables to override default firmware files
Expand Down
8 changes: 7 additions & 1 deletion inference-engine/src/vpu/common/src/utils/simple_math.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,14 @@ void MathExpression::parse(const std::string& expression) {
// parse number
if (std::isdigit(*it)) {
size_t len = 0;
// parse number and use its length
const auto value = std::stof(&*it, &len);
(void) value;
// copy sub string that represents a number
auto substring = std::string{it, it + len};

_parsedTokens.emplace_back(TokenType::Value, ValueType{value}, "");
auto token = Token{TokenType::Value, ValueType{substring}, ""};
_parsedTokens.push_back(std::move(token));

std::advance(it, len - 1);
continue;
Expand All @@ -84,6 +89,7 @@ void MathExpression::parse(const std::string& expression) {
tokenStack.push(token);
continue;
}

if (_vars.find(token) != _vars.end()) {
_parsedTokens.emplace_back(TokenType::Value, ValueType{_vars.at(token)}, "");
continue;
Expand Down
67 changes: 67 additions & 0 deletions inference-engine/src/vpu/custom_kernels/binarization.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable

__kernel void binarization(
const __global half *__restrict src_data,
const __global half *__restrict input_low_high,
const __global half *__restrict dst_data,
int switch_out,
int input_low_high_size,
int W,
int H)
{
__local half local_src[15 * 1024];
__local half local_dst[15 * 1024];

event_t e1 = async_work_group_copy(local_src, src_data + get_group_id(2) * W * H, W * H, 0);
wait_group_events(1, &e1);

int c = get_global_id(2);
int C = get_global_size(2);

half dst_low = switch_out ? 1.h : -1.h;
half dst_high = switch_out ? -1.h : 1.h;

half s_ilow_ihigh = input_low_high_size == 1 ? input_low_high[0] : input_low_high[c];

for (int h = 0; h < H; h++) {

__local const half *__restrict addr_src = local_src + h * W;
__local half *__restrict addr_dst = local_dst + h * W;

#if 1
for (int w = 0; w < W / 8; w++) {

half8 h_src_val8 = (*((__local half8 *)addr_src + w));

short8 cond1;
cond1.s0 = (h_src_val8.s0 <= s_ilow_ihigh);
cond1.s1 = (h_src_val8.s1 <= s_ilow_ihigh);
cond1.s2 = (h_src_val8.s2 <= s_ilow_ihigh);
cond1.s3 = (h_src_val8.s3 <= s_ilow_ihigh);
cond1.s4 = (h_src_val8.s4 <= s_ilow_ihigh);
cond1.s5 = (h_src_val8.s5 <= s_ilow_ihigh);
cond1.s6 = (h_src_val8.s6 <= s_ilow_ihigh);
cond1.s7 = (h_src_val8.s7 <= s_ilow_ihigh);

cond1 = ~(cond1 - (short8)1);

short8 res = cond1 & as_short8((half8)dst_low) | ~cond1 & as_short8((half8)dst_high);

*((__local half8 *)addr_dst + w) = as_half8(res);
}
#endif
for (int w = W & (~0x7); w < W; w++) {
addr_dst[w] = (addr_src[w] <= s_ilow_ihigh) ? dst_low : dst_high;
}
}

barrier(CLK_LOCAL_MEM_FENCE);

event_t e2 = async_work_group_copy(dst_data + get_group_id(2) * W * H, local_dst, W * H, 0);
wait_group_events(1, &e2);
}
95 changes: 95 additions & 0 deletions inference-engine/src/vpu/custom_kernels/binary_convolution.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

int extract_weights(uchar val, int bit) { return ((val >> bit) & 1); }

__kernel void binary_convolution(
const __global half *restrict src_data,
const __global uchar *restrict weights_data,
__global half *restrict dst_data,
float pad_value,

int IW,
int IH,
int IC,

int DW,
int DH,

int GC,

int KW,
int KH,

int PW,
int PH,

int SW,
int SH)
{
int ipad_value = ((pad_value > 0.f) ? 1 : 0);
int c = get_global_id(2);
int y = get_global_id(1);
int x = get_global_id(0);

int OC = get_global_size(2);
int OH = get_global_size(1);
int OW = get_global_size(0);

int KD = 1;
int SD = 0;
int DD = 0;
int PD = 0;
int ID = 1;
int OD = 1;

int nbits = 8;

int g = c % GC;
int oc = c / GC;
int oh = y;
int ow = x;

for (int od = 0; od < OD; od++) {
int oidx = g * OC / GC * OD * OH * OW + oc * OD * OH * OW + od * OH * OW + oh * OW + ow;

int res = 0;

for (int ic = 0; ic < IC / GC; ic++) {
for (int kd = 0; kd < KD; kd++) {
for (int kh = 0; kh < KH; kh++) {
for (int kw = 0; kw < KW; kw++) {
int widx = g * OC / GC * IC / GC * KD * KH * KW
+ oc * IC / GC * KD * KH * KW + ic * KD * KH * KW + kd * KH * KW
+ kh * KW + kw;

int w = extract_weights(weights_data[widx / nbits], (widx % nbits));

int s;

int iw = ow * SW - PW + kw * DW;
int ih = oh * SH - PH + kh * DH;
int id = od * SD - PD + kd * DD;

if (iw < 0 || iw >= (int)IW || ih < 0 || ih >= (int)IH || id < 0
|| id >= (int)ID) {
s = ipad_value;
} else {
int iidx = g * IC / GC * ID * IH * IW + ic * ID * IH * IW + id * IH * IW
+ ih * IW + iw;

s = ((src_data[iidx] > 0.f) ? 1 : 0);
}

res += s ^ w;
}
}
}
}

dst_data[oidx] = (half)(IC / GC * KD * KH * KW - 2 * res);
}
}
Loading

0 comments on commit 5ad4811

Please sign in to comment.