forked from openvinotoolkit/openvino
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[VPU][OpenCL] Update custom kernels (openvinotoolkit#2131)
* [Custom CL] Updated OpenCL kernels and tests * [Custom CL] Update OpenCL compiler * Update firmware to 1365 * Disable ExpGenerateProposals tests * VPU: new firmware no. 1370 * Myriad: re-enable ExpGenerateProposals tests Co-authored-by: Maxim Kurin <[email protected]>
- Loading branch information
1 parent
867340e
commit 5ad4811
Showing
49 changed files
with
2,950 additions
and
4,075 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
// Copyright (C) 2018-2020 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable | ||
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable | ||
|
||
__kernel void binarization( | ||
const __global half *__restrict src_data, | ||
const __global half *__restrict input_low_high, | ||
const __global half *__restrict dst_data, | ||
int switch_out, | ||
int input_low_high_size, | ||
int W, | ||
int H) | ||
{ | ||
__local half local_src[15 * 1024]; | ||
__local half local_dst[15 * 1024]; | ||
|
||
event_t e1 = async_work_group_copy(local_src, src_data + get_group_id(2) * W * H, W * H, 0); | ||
wait_group_events(1, &e1); | ||
|
||
int c = get_global_id(2); | ||
int C = get_global_size(2); | ||
|
||
half dst_low = switch_out ? 1.h : -1.h; | ||
half dst_high = switch_out ? -1.h : 1.h; | ||
|
||
half s_ilow_ihigh = input_low_high_size == 1 ? input_low_high[0] : input_low_high[c]; | ||
|
||
for (int h = 0; h < H; h++) { | ||
|
||
__local const half *__restrict addr_src = local_src + h * W; | ||
__local half *__restrict addr_dst = local_dst + h * W; | ||
|
||
#if 1 | ||
for (int w = 0; w < W / 8; w++) { | ||
|
||
half8 h_src_val8 = (*((__local half8 *)addr_src + w)); | ||
|
||
short8 cond1; | ||
cond1.s0 = (h_src_val8.s0 <= s_ilow_ihigh); | ||
cond1.s1 = (h_src_val8.s1 <= s_ilow_ihigh); | ||
cond1.s2 = (h_src_val8.s2 <= s_ilow_ihigh); | ||
cond1.s3 = (h_src_val8.s3 <= s_ilow_ihigh); | ||
cond1.s4 = (h_src_val8.s4 <= s_ilow_ihigh); | ||
cond1.s5 = (h_src_val8.s5 <= s_ilow_ihigh); | ||
cond1.s6 = (h_src_val8.s6 <= s_ilow_ihigh); | ||
cond1.s7 = (h_src_val8.s7 <= s_ilow_ihigh); | ||
|
||
cond1 = ~(cond1 - (short8)1); | ||
|
||
short8 res = cond1 & as_short8((half8)dst_low) | ~cond1 & as_short8((half8)dst_high); | ||
|
||
*((__local half8 *)addr_dst + w) = as_half8(res); | ||
} | ||
#endif | ||
for (int w = W & (~0x7); w < W; w++) { | ||
addr_dst[w] = (addr_src[w] <= s_ilow_ihigh) ? dst_low : dst_high; | ||
} | ||
} | ||
|
||
barrier(CLK_LOCAL_MEM_FENCE); | ||
|
||
event_t e2 = async_work_group_copy(dst_data + get_group_id(2) * W * H, local_dst, W * H, 0); | ||
wait_group_events(1, &e2); | ||
} |
95 changes: 95 additions & 0 deletions
95
inference-engine/src/vpu/custom_kernels/binary_convolution.cl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
// Copyright (C) 2018-2020 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable | ||
|
||
int extract_weights(uchar val, int bit) { return ((val >> bit) & 1); } | ||
|
||
__kernel void binary_convolution( | ||
const __global half *restrict src_data, | ||
const __global uchar *restrict weights_data, | ||
__global half *restrict dst_data, | ||
float pad_value, | ||
|
||
int IW, | ||
int IH, | ||
int IC, | ||
|
||
int DW, | ||
int DH, | ||
|
||
int GC, | ||
|
||
int KW, | ||
int KH, | ||
|
||
int PW, | ||
int PH, | ||
|
||
int SW, | ||
int SH) | ||
{ | ||
int ipad_value = ((pad_value > 0.f) ? 1 : 0); | ||
int c = get_global_id(2); | ||
int y = get_global_id(1); | ||
int x = get_global_id(0); | ||
|
||
int OC = get_global_size(2); | ||
int OH = get_global_size(1); | ||
int OW = get_global_size(0); | ||
|
||
int KD = 1; | ||
int SD = 0; | ||
int DD = 0; | ||
int PD = 0; | ||
int ID = 1; | ||
int OD = 1; | ||
|
||
int nbits = 8; | ||
|
||
int g = c % GC; | ||
int oc = c / GC; | ||
int oh = y; | ||
int ow = x; | ||
|
||
for (int od = 0; od < OD; od++) { | ||
int oidx = g * OC / GC * OD * OH * OW + oc * OD * OH * OW + od * OH * OW + oh * OW + ow; | ||
|
||
int res = 0; | ||
|
||
for (int ic = 0; ic < IC / GC; ic++) { | ||
for (int kd = 0; kd < KD; kd++) { | ||
for (int kh = 0; kh < KH; kh++) { | ||
for (int kw = 0; kw < KW; kw++) { | ||
int widx = g * OC / GC * IC / GC * KD * KH * KW | ||
+ oc * IC / GC * KD * KH * KW + ic * KD * KH * KW + kd * KH * KW | ||
+ kh * KW + kw; | ||
|
||
int w = extract_weights(weights_data[widx / nbits], (widx % nbits)); | ||
|
||
int s; | ||
|
||
int iw = ow * SW - PW + kw * DW; | ||
int ih = oh * SH - PH + kh * DH; | ||
int id = od * SD - PD + kd * DD; | ||
|
||
if (iw < 0 || iw >= (int)IW || ih < 0 || ih >= (int)IH || id < 0 | ||
|| id >= (int)ID) { | ||
s = ipad_value; | ||
} else { | ||
int iidx = g * IC / GC * ID * IH * IW + ic * ID * IH * IW + id * IH * IW | ||
+ ih * IW + iw; | ||
|
||
s = ((src_data[iidx] > 0.f) ? 1 : 0); | ||
} | ||
|
||
res += s ^ w; | ||
} | ||
} | ||
} | ||
} | ||
|
||
dst_data[oidx] = (half)(IC / GC * KD * KH * KW - 2 * res); | ||
} | ||
} |
Oops, something went wrong.