From 241579a1169f1d28ca2cf88d411f9b2484cdfe2d Mon Sep 17 00:00:00 2001 From: Tomoaki Teshima Date: Sun, 31 Jan 2021 07:31:52 +0900 Subject: [PATCH] get rid of kernel build failure * prevent out-of-bounds access * prevent deallocate before return * fix build warning --- modules/rgbd/src/opencl/tsdf.cl | 14 ++++++++------ modules/rgbd/src/tsdf.cpp | 13 ++++++------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/modules/rgbd/src/opencl/tsdf.cl b/modules/rgbd/src/opencl/tsdf.cl index e57bbdbabae..b9186826236 100644 --- a/modules/rgbd/src/opencl/tsdf.cl +++ b/modules/rgbd/src/opencl/tsdf.cl @@ -4,8 +4,7 @@ // This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory -typedef __INT8_TYPE__ int8_t; - +typedef char int8_t; typedef int8_t TsdfType; typedef uchar WeightType; @@ -28,14 +27,17 @@ static inline float tsdfToFloat(TsdfType num) } __kernel void preCalculationPixNorm (__global float * pixNorms, + int pix_step, int pix_offset, + int pix_rows, int pix_cols, const __global float * xx, const __global float * yy, - int width) + int width, int height) { int i = get_global_id(0); int j = get_global_id(1); int idx = i*width + j; - pixNorms[idx] = sqrt(xx[j] * xx[j] + yy[i] * yy[i] + 1.0f); + if(i < height && j < width && idx < pix_cols) + pixNorms[idx] = sqrt(xx[j] * xx[j] + yy[i] * yy[i] + 1.0f); } __kernel void integrate(__global const char * depthptr, @@ -85,7 +87,7 @@ __kernel void integrate(__global const char * depthptr, int volYidx = x*volDims.x + y*volDims.y; int startZ, endZ; - if(fabs(zStep.z) > 1e-5) + if(fabs(zStep.z) > 1e-5f) { int baseZ = convert_int(-basePt.z / zStep.z); if(zStep.z > 0) @@ -162,7 +164,7 @@ __kernel void integrate(__global const char * depthptr, if(v == 0) continue; - int idx = projected.y * depth_rows + projected.x; + int idx = projected.y * depth_cols + projected.x; float pixNorm = pixNorms[idx]; //float pixNorm = length(camPixVec); diff --git a/modules/rgbd/src/tsdf.cpp b/modules/rgbd/src/tsdf.cpp index a35899d9ab7..f80ba986a7a 100644 --- a/modules/rgbd/src/tsdf.cpp +++ b/modules/rgbd/src/tsdf.cpp @@ -836,11 +836,11 @@ void TSDFVolumeGPU::reset() volume.setTo(Scalar(0, 0)); } -static cv::UMat preCalculationPixNormGPU(int depth_rows, int depth_cols, Vec2f fxy, Vec2f cxy) +static void preCalculationPixNormGPU(int depth_rows, int depth_cols, Vec2f fxy, Vec2f cxy, UMat& pixNorm) { Mat x(1, depth_cols, CV_32F); Mat y(1, depth_rows, CV_32F); - Mat _pixNorm(1, depth_rows * depth_cols, CV_32F); + pixNorm.create(1, depth_rows * depth_cols, CV_32F); for (int i = 0; i < depth_cols; i++) x.at(0, i) = (i - cxy[0]) / fxy[0]; @@ -859,14 +859,13 @@ static cv::UMat preCalculationPixNormGPU(int depth_rows, int depth_cols, Vec2f f throw std::runtime_error("Failed to create kernel: " + errorStr); AccessFlag af = ACCESS_READ; - UMat pixNorm = _pixNorm.getUMat(af); UMat xx = x.getUMat(af); UMat yy = y.getUMat(af); - kk.args(ocl::KernelArg::PtrReadWrite(pixNorm), + kk.args(ocl::KernelArg::ReadWrite(pixNorm), ocl::KernelArg::PtrReadOnly(xx), ocl::KernelArg::PtrReadOnly(yy), - depth_cols); + depth_cols, depth_rows); size_t globalSize[2]; globalSize[0] = depth_rows; @@ -875,7 +874,7 @@ static cv::UMat preCalculationPixNormGPU(int depth_rows, int depth_cols, Vec2f f if (!kk.run(2, globalSize, NULL, true)) throw std::runtime_error("Failed to run kernel"); - return pixNorm; + return; } // use depth instead of distance (optimization) @@ -910,7 +909,7 @@ void TSDFVolumeGPU::integrate(InputArray _depth, float depthFactor, frameParams[2] = intrinsics.fx; frameParams[3] = intrinsics.fy; frameParams[4] = intrinsics.cx; frameParams[5] = intrinsics.cy; - pixNorms = preCalculationPixNormGPU(depth.rows, depth.cols, fxy, cxy); + preCalculationPixNormGPU(depth.rows, depth.cols, fxy, cxy, pixNorms); } // TODO: optimization possible