From 29449119f66cd2797abc38b88523f8570015f0bc Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 28 Nov 2023 18:24:30 +0100 Subject: [PATCH] Corrections related to capacity of cl_khr_fp16 tests in bruteforce (#142) --- .../unary_two_results_half.cpp | 42 +++++++--------- .../unary_two_results_i_half.cpp | 48 ++++++++----------- .../math_brute_force/unary_u_half.cpp | 15 ++---- 3 files changed, 42 insertions(+), 63 deletions(-) diff --git a/test_conformance/math_brute_force/unary_two_results_half.cpp b/test_conformance/math_brute_force/unary_two_results_half.cpp index 9284fbd760..70a9f4c79e 100644 --- a/test_conformance/math_brute_force/unary_two_results_half.cpp +++ b/test_conformance/math_brute_force/unary_two_results_half.cpp @@ -51,10 +51,11 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode) float maxErrorVal1 = 0.0f; uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE); - constexpr size_t bufferElements = BUFFER_SIZE / sizeof(cl_half); - int scale = (int)((1ULL << 16) / (16 * bufferElements) + 1); + size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_half), + size_t(1ULL << (sizeof(cl_half) * 8))); + size_t bufferSize = bufferElements * sizeof(cl_half); - cl_uchar overflow[bufferElements]; + std::vector overflow(bufferElements); int isFract = 0 == strcmp("fract", f->nameInCode); int skipNanInf = isFract; @@ -73,19 +74,10 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode) { // Init input array cl_half *pIn = (cl_half *)gIn; - if (gWimpyMode) - { - for (size_t j = 0; j < bufferElements; j++) - pIn[j] = (cl_ushort)i + j * scale; - } - else - { - for (size_t j = 0; j < bufferElements; j++) - pIn[j] = (cl_ushort)i + j; - } + for (size_t j = 0; j < bufferElements; j++) pIn[j] = (cl_ushort)i + j; if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, - BUFFER_SIZE, gIn, 0, NULL, NULL))) + bufferSize, gIn, 0, NULL, NULL))) { vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); return error; @@ -97,9 +89,9 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode) uint32_t pattern = 0xacdcacdc; if (gHostFill) { - memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); + memset_pattern4(gOut[j], &pattern, bufferSize); if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], - CL_FALSE, 0, BUFFER_SIZE, + CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL))) { vlog_error( @@ -108,9 +100,9 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode) return error; } - memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE); + memset_pattern4(gOut2[j], &pattern, bufferSize); if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], - CL_FALSE, 0, BUFFER_SIZE, + CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL))) { vlog_error( @@ -122,12 +114,12 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode) else { error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern, - sizeof(pattern), 0, BUFFER_SIZE, 0, + sizeof(pattern), 0, bufferSize, 0, NULL, NULL); test_error(error, "clEnqueueFillBuffer 1 failed!\n"); error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern, - sizeof(pattern), 0, BUFFER_SIZE, 0, + sizeof(pattern), 0, bufferSize, 0, NULL, NULL); test_error(error, "clEnqueueFillBuffer 2 failed!\n"); } @@ -137,7 +129,7 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode) for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { size_t vectorSize = sizeValues[j] * sizeof(cl_half); - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; + size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; if ((error = clSetKernelArg(kernels[j][thread_id], 0, sizeof(gOutBuffer[j]), &gOutBuffer[j]))) { @@ -225,14 +217,14 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode) { if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, - BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + bufferSize, gOut[j], 0, NULL, NULL))) { vlog_error("ReadArray failed %d\n", error); return error; } if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, - BUFFER_SIZE, gOut2[j], 0, NULL, NULL))) + bufferSize, gOut2[j], 0, NULL, NULL))) { vlog_error("ReadArray2 failed %d\n", error); return error; @@ -432,8 +424,8 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode) if (gVerboseBruteForce) { vlog("base:%14" PRIu64 " step:%10" PRIu64 - " bufferSize:%10d \n", - i, step, BUFFER_SIZE); + " bufferSize:%10zu \n", + i, step, bufferSize); } else { diff --git a/test_conformance/math_brute_force/unary_two_results_i_half.cpp b/test_conformance/math_brute_force/unary_two_results_i_half.cpp index 007f169686..5906c2837a 100644 --- a/test_conformance/math_brute_force/unary_two_results_i_half.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i_half.cpp @@ -61,8 +61,11 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode) // sizeof(cl_half) < sizeof (int32_t) // to prevent overflowing gOut_Ref2 it is necessary to use // bigger type as denominator for buffer size calculation - constexpr size_t bufferElements = BUFFER_SIZE / sizeof(int32_t); - int scale = (int)((1ULL << 16) / (16 * bufferElements) + 1); + size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_int), + size_t(1ULL << (sizeof(cl_half) * 8))); + + size_t bufferSizeLo = bufferElements * sizeof(cl_half); + size_t bufferSizeHi = bufferElements * sizeof(cl_int); cl_ulong maxiError = 0; @@ -83,19 +86,10 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode) { // Init input array cl_half *pIn = (cl_half *)gIn; - if (gWimpyMode) - { - for (size_t j = 0; j < bufferElements; j++) - pIn[j] = (cl_ushort)i + j * scale; - } - else - { - for (size_t j = 0; j < bufferElements; j++) - pIn[j] = (cl_ushort)i + j; - } + for (size_t j = 0; j < bufferElements; j++) pIn[j] = (cl_ushort)i + j; if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, - BUFFER_SIZE, gIn, 0, NULL, NULL))) + bufferSizeLo, gIn, 0, NULL, NULL))) { vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); return error; @@ -107,9 +101,9 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode) uint32_t pattern = 0xacdcacdc; if (gHostFill) { - memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); + memset_pattern4(gOut[j], &pattern, bufferSizeLo); if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], - CL_FALSE, 0, BUFFER_SIZE, + CL_FALSE, 0, bufferSizeLo, gOut[j], 0, NULL, NULL))) { vlog_error( @@ -118,9 +112,9 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode) return error; } - memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE); + memset_pattern4(gOut2[j], &pattern, bufferSizeHi); if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], - CL_FALSE, 0, BUFFER_SIZE, + CL_FALSE, 0, bufferSizeHi, gOut2[j], 0, NULL, NULL))) { vlog_error( @@ -132,12 +126,12 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode) else { error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern, - sizeof(pattern), 0, BUFFER_SIZE, 0, + sizeof(pattern), 0, bufferSizeLo, 0, NULL, NULL); test_error(error, "clEnqueueFillBuffer 1 failed!\n"); error = clEnqueueFillBuffer(gQueue, gOutBuffer2[j], &pattern, - sizeof(pattern), 0, BUFFER_SIZE, 0, + sizeof(pattern), 0, bufferSizeHi, 0, NULL, NULL); test_error(error, "clEnqueueFillBuffer 2 failed!\n"); } @@ -147,8 +141,8 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode) for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { // align working group size with the bigger output type - size_t vectorSize = sizeValues[j] * sizeof(int32_t); - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; + size_t vectorSize = sizeValues[j] * sizeof(cl_int); + size_t localCount = (bufferSizeHi + vectorSize - 1) / vectorSize; if ((error = clSetKernelArg(kernels[j][thread_id], 0, sizeof(gOutBuffer[j]), &gOutBuffer[j]))) { @@ -198,14 +192,14 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode) (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE; if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], blocking, 0, - BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + bufferSizeLo, gOut[j], 0, NULL, NULL))) { vlog_error("ReadArray failed %d\n", error); return error; } - if ((error = - clEnqueueReadBuffer(gQueue, gOutBuffer2[j], blocking, 0, - BUFFER_SIZE, gOut2[j], 0, NULL, NULL))) + if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], blocking, + 0, bufferSizeHi, gOut2[j], 0, NULL, + NULL))) { vlog_error("ReadArray2 failed %d\n", error); return error; @@ -325,8 +319,8 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode) if (gVerboseBruteForce) { vlog("base:%14" PRIu64 " step:%10" PRIu64 - " bufferSize:%10d \n", - i, step, BUFFER_SIZE); + " bufferSize:%10zu \n", + i, step, bufferSizeHi); } else { diff --git a/test_conformance/math_brute_force/unary_u_half.cpp b/test_conformance/math_brute_force/unary_u_half.cpp index 04b2b16b2b..6f21ef3eee 100644 --- a/test_conformance/math_brute_force/unary_u_half.cpp +++ b/test_conformance/math_brute_force/unary_u_half.cpp @@ -48,10 +48,10 @@ int TestFunc_Half_UShort(const Func *f, MTdata d, bool relaxedMode) float maxError = 0.0f; int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities); float maxErrorVal = 0.0f; - size_t bufferSize = BUFFER_SIZE; - size_t bufferElements = bufferSize / sizeof(cl_half); uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE); - int scale = (int)((1ULL << 32) / (16 * bufferElements) + 1); + size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_half), + size_t(1ULL << (sizeof(cl_half) * 8))); + size_t bufferSize = bufferElements * sizeof(cl_half); logFunctionInfo(f->name, sizeof(cl_half), relaxedMode); const char *name = f->name; float half_ulps = f->half_ulps; @@ -69,14 +69,7 @@ int TestFunc_Half_UShort(const Func *f, MTdata d, bool relaxedMode) { // Init input array cl_ushort *p = (cl_ushort *)gIn; - if (gWimpyMode) - { - for (size_t j = 0; j < bufferElements; j++) p[j] = i + j * scale; - } - else - { - for (size_t j = 0; j < bufferElements; j++) p[j] = (uint16_t)i + j; - } + for (size_t j = 0; j < bufferElements; j++) p[j] = (uint16_t)i + j; if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL)))