Skip to content

Commit

Permalink
Corrections related to capacity of cl_khr_fp16 tests in bruteforce (K…
Browse files Browse the repository at this point in the history
  • Loading branch information
shajder committed Nov 28, 2023
1 parent 64db7f5 commit 2944911
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 63 deletions.
42 changes: 17 additions & 25 deletions test_conformance/math_brute_force/unary_two_results_half.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,11 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode)
float maxErrorVal1 = 0.0f;
uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);

constexpr size_t bufferElements = BUFFER_SIZE / sizeof(cl_half);
int scale = (int)((1ULL << 16) / (16 * bufferElements) + 1);
size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_half),
size_t(1ULL << (sizeof(cl_half) * 8)));
size_t bufferSize = bufferElements * sizeof(cl_half);

cl_uchar overflow[bufferElements];
std::vector<cl_uchar> overflow(bufferElements);
int isFract = 0 == strcmp("fract", f->nameInCode);
int skipNanInf = isFract;

Expand All @@ -73,19 +74,10 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode)
{
// Init input array
cl_half *pIn = (cl_half *)gIn;
if (gWimpyMode)
{
for (size_t j = 0; j < bufferElements; j++)
pIn[j] = (cl_ushort)i + j * scale;
}
else
{
for (size_t j = 0; j < bufferElements; j++)
pIn[j] = (cl_ushort)i + j;
}
for (size_t j = 0; j < bufferElements; j++) pIn[j] = (cl_ushort)i + j;

if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
BUFFER_SIZE, gIn, 0, NULL, NULL)))
bufferSize, gIn, 0, NULL, NULL)))
{
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
return error;
Expand All @@ -97,9 +89,9 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode)
uint32_t pattern = 0xacdcacdc;
if (gHostFill)
{
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
memset_pattern4(gOut[j], &pattern, bufferSize);
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
CL_FALSE, 0, BUFFER_SIZE,
CL_FALSE, 0, bufferSize,
gOut[j], 0, NULL, NULL)))
{
vlog_error(
Expand All @@ -108,9 +100,9 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode)
return error;
}

memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
memset_pattern4(gOut2[j], &pattern, bufferSize);
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
CL_FALSE, 0, BUFFER_SIZE,
CL_FALSE, 0, bufferSize,
gOut2[j], 0, NULL, NULL)))
{
vlog_error(
Expand All @@ -122,12 +114,12 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode)
else
{
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
sizeof(pattern), 0, BUFFER_SIZE, 0,
sizeof(pattern), 0, bufferSize, 0,
NULL, NULL);
test_error(error, "clEnqueueFillBuffer 1 failed!\n");

error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
sizeof(pattern), 0, BUFFER_SIZE, 0,
sizeof(pattern), 0, bufferSize, 0,
NULL, NULL);
test_error(error, "clEnqueueFillBuffer 2 failed!\n");
}
Expand All @@ -137,7 +129,7 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode)
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{
size_t vectorSize = sizeValues[j] * sizeof(cl_half);
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{
Expand Down Expand Up @@ -225,14 +217,14 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode)
{
if ((error =
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
bufferSize, gOut[j], 0, NULL, NULL)))
{
vlog_error("ReadArray failed %d\n", error);
return error;
}
if ((error =
clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
bufferSize, gOut2[j], 0, NULL, NULL)))
{
vlog_error("ReadArray2 failed %d\n", error);
return error;
Expand Down Expand Up @@ -432,8 +424,8 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode)
if (gVerboseBruteForce)
{
vlog("base:%14" PRIu64 " step:%10" PRIu64
" bufferSize:%10d \n",
i, step, BUFFER_SIZE);
" bufferSize:%10zu \n",
i, step, bufferSize);
}
else
{
Expand Down
48 changes: 21 additions & 27 deletions test_conformance/math_brute_force/unary_two_results_i_half.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,11 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode)
// sizeof(cl_half) < sizeof (int32_t)
// to prevent overflowing gOut_Ref2 it is necessary to use
// bigger type as denominator for buffer size calculation
constexpr size_t bufferElements = BUFFER_SIZE / sizeof(int32_t);
int scale = (int)((1ULL << 16) / (16 * bufferElements) + 1);
size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_int),
size_t(1ULL << (sizeof(cl_half) * 8)));

size_t bufferSizeLo = bufferElements * sizeof(cl_half);
size_t bufferSizeHi = bufferElements * sizeof(cl_int);

cl_ulong maxiError = 0;

Expand All @@ -83,19 +86,10 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode)
{
// Init input array
cl_half *pIn = (cl_half *)gIn;
if (gWimpyMode)
{
for (size_t j = 0; j < bufferElements; j++)
pIn[j] = (cl_ushort)i + j * scale;
}
else
{
for (size_t j = 0; j < bufferElements; j++)
pIn[j] = (cl_ushort)i + j;
}
for (size_t j = 0; j < bufferElements; j++) pIn[j] = (cl_ushort)i + j;

if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
BUFFER_SIZE, gIn, 0, NULL, NULL)))
bufferSizeLo, gIn, 0, NULL, NULL)))
{
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
return error;
Expand All @@ -107,9 +101,9 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode)
uint32_t pattern = 0xacdcacdc;
if (gHostFill)
{
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
memset_pattern4(gOut[j], &pattern, bufferSizeLo);
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
CL_FALSE, 0, BUFFER_SIZE,
CL_FALSE, 0, bufferSizeLo,
gOut[j], 0, NULL, NULL)))
{
vlog_error(
Expand All @@ -118,9 +112,9 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode)
return error;
}

memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
memset_pattern4(gOut2[j], &pattern, bufferSizeHi);
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
CL_FALSE, 0, BUFFER_SIZE,
CL_FALSE, 0, bufferSizeHi,
gOut2[j], 0, NULL, NULL)))
{
vlog_error(
Expand All @@ -132,12 +126,12 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode)
else
{
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
sizeof(pattern), 0, BUFFER_SIZE, 0,
sizeof(pattern), 0, bufferSizeLo, 0,
NULL, NULL);
test_error(error, "clEnqueueFillBuffer 1 failed!\n");

error = clEnqueueFillBuffer(gQueue, gOutBuffer2[j], &pattern,
sizeof(pattern), 0, BUFFER_SIZE, 0,
sizeof(pattern), 0, bufferSizeHi, 0,
NULL, NULL);
test_error(error, "clEnqueueFillBuffer 2 failed!\n");
}
Expand All @@ -147,8 +141,8 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode)
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{
// align working group size with the bigger output type
size_t vectorSize = sizeValues[j] * sizeof(int32_t);
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
size_t vectorSize = sizeValues[j] * sizeof(cl_int);
size_t localCount = (bufferSizeHi + vectorSize - 1) / vectorSize;
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{
Expand Down Expand Up @@ -198,14 +192,14 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode)
(j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
if ((error =
clEnqueueReadBuffer(gQueue, gOutBuffer[j], blocking, 0,
BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
bufferSizeLo, gOut[j], 0, NULL, NULL)))
{
vlog_error("ReadArray failed %d\n", error);
return error;
}
if ((error =
clEnqueueReadBuffer(gQueue, gOutBuffer2[j], blocking, 0,
BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], blocking,
0, bufferSizeHi, gOut2[j], 0, NULL,
NULL)))
{
vlog_error("ReadArray2 failed %d\n", error);
return error;
Expand Down Expand Up @@ -325,8 +319,8 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode)
if (gVerboseBruteForce)
{
vlog("base:%14" PRIu64 " step:%10" PRIu64
" bufferSize:%10d \n",
i, step, BUFFER_SIZE);
" bufferSize:%10zu \n",
i, step, bufferSizeHi);
}
else
{
Expand Down
15 changes: 4 additions & 11 deletions test_conformance/math_brute_force/unary_u_half.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ int TestFunc_Half_UShort(const Func *f, MTdata d, bool relaxedMode)
float maxError = 0.0f;
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
float maxErrorVal = 0.0f;
size_t bufferSize = BUFFER_SIZE;
size_t bufferElements = bufferSize / sizeof(cl_half);
uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
int scale = (int)((1ULL << 32) / (16 * bufferElements) + 1);
size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_half),
size_t(1ULL << (sizeof(cl_half) * 8)));
size_t bufferSize = bufferElements * sizeof(cl_half);
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
const char *name = f->name;
float half_ulps = f->half_ulps;
Expand All @@ -69,14 +69,7 @@ int TestFunc_Half_UShort(const Func *f, MTdata d, bool relaxedMode)
{
// Init input array
cl_ushort *p = (cl_ushort *)gIn;
if (gWimpyMode)
{
for (size_t j = 0; j < bufferElements; j++) p[j] = i + j * scale;
}
else
{
for (size_t j = 0; j < bufferElements; j++) p[j] = (uint16_t)i + j;
}
for (size_t j = 0; j < bufferElements; j++) p[j] = (uint16_t)i + j;

if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
bufferSize, gIn, 0, NULL, NULL)))
Expand Down

0 comments on commit 2944911

Please sign in to comment.