Skip to content

Commit

Permalink
Compiles with -pedantic enabled. Added rvalue versions of setArg. Fur…
Browse files Browse the repository at this point in the history
…ther progress on ParallelFor
  • Loading branch information
rickwebiii committed Mar 14, 2012
1 parent 26e7df9 commit 38d363a
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 11 deletions.
9 changes: 5 additions & 4 deletions Include/clUtil_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ namespace clUtil

_Grid();
};
};
}

//Wrapper function to create a grid object of the correct type so you don't
//have to fuck with template arguments.
Expand All @@ -81,11 +81,11 @@ template<typename... Args> clUtil::_Grid<Args...> clUtilGrid(Args... args)

//This function is a hack needed because variadic partial specialization doesn't
//seem to work quite right as of g++-4.5.2

//void setArg_(cl_kernel kernel, size_t argIndex, clUtil::Memory&& curArg);
void setArg_(cl_kernel kernel, size_t argIndex, clUtil::Memory* curArg);
void setArg_(cl_kernel kernel, size_t argIndex, clUtil::Image& curArg);
void setArg_(cl_kernel kernel, size_t argIndex, clUtil::Image&& curArg);
void setArg_(cl_kernel kernel, size_t argIndex, clUtil::Buffer& curArg);
void setArg_(cl_kernel kernel, size_t argIndex, clUtil::Buffer&& curArg);

template<typename T> void setArg_(cl_kernel kernel, size_t argIndex, T curArg)
{
Expand Down Expand Up @@ -124,7 +124,8 @@ void clUtilEnqueueKernel(const char* kernelName,
{
cl_int err;
clUtil::Device& currentDevice = clUtil::Device::GetCurrentDevice();
cl_kernel kernel = currentDevice.getKernel(std::string(kernelName));
cl_kernel kernel =
currentDevice.getKernel(std::move(std::string(kernelName)));

clUtilSetArgs(kernel,
kernelName,
Expand Down
2 changes: 1 addition & 1 deletion Makefile.inc
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CXX=g++
USERFLAGS=-g3 -O0 -Wall -Werror
USERFLAGS=-g3 -O0 -Wall -Werror -pedantic
OpenCLInclude=$(AMDAPPSDKROOT)/include
18 changes: 18 additions & 0 deletions clUtilKernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,15 @@ void setArg_(cl_kernel kernel, size_t argIndex, clUtil::Image& curArg)
clUtilCheckError(err);
}

void setArg_(cl_kernel kernel, size_t argIndex, clUtil::Image&& curArg)
{
cl_int err;
cl_mem memoryHandle = curArg.getMemHandle();

err = clSetKernelArg(kernel, argIndex, sizeof(memoryHandle), &memoryHandle);
clUtilCheckError(err);
}

void setArg_(cl_kernel kernel, size_t argIndex, clUtil::Buffer& curArg)
{
cl_int err;
Expand All @@ -27,6 +36,15 @@ void setArg_(cl_kernel kernel, size_t argIndex, clUtil::Buffer& curArg)
clUtilCheckError(err);
}

void setArg_(cl_kernel kernel, size_t argIndex, clUtil::Buffer&& curArg)
{
cl_int err;
cl_mem memoryHandle = curArg.getMemHandle();

err = clSetKernelArg(kernel, argIndex, sizeof(memoryHandle), &memoryHandle);
clUtilCheckError(err);
}

void clUtilSetArgs(cl_kernel kernel,
const char* kernelName,
clUtil::Grid& workGrid,
Expand Down
34 changes: 32 additions & 2 deletions clUtilParallelFor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,17 @@ ParallelForPerformanceModel::ParallelForPerformanceModel(size_t numSamples,
iterationOffset = newTask.EndIndex + 1;

mPendingSampleQueues[curDeviceGroup].push(newTask);

//Mark remaining work vector
if(curDeviceGroup == 0 && curSample > 0)
{
mRemainingWork[curSample - 1].End = newTask.StartIndex - 1;
}

if(curDeviceGroup == numDeviceGroups - 1 && curSample < numSamples - 1)
{
mRemainingWork[curSample].Start = newTask.EndIndex + 1;
}

#if 0
cout << "DeviceGroup: " << curDeviceGroup
Expand All @@ -92,6 +103,25 @@ PendingTask ParallelForPerformanceModel::getWork(size_t deviceGroup)
}
else //If empty, use model to get work
{
vector<Sample>& curDeviceModel = mModel[deviceGroup];
//size_t bestSample = 0;
//bool leftOfSample = false;

for(size_t curSample = 0; curSample < curDeviceModel.size(); curSample++)
{
//Check to the left of the sample
if(curSample > 0 &&
mRemainingWork[curSample - 1].Start <=
mRemainingWork[curSample - 1].End)
{

}
if(curSample < curDeviceModel.size() - 1) //Check to the right of sample
{
}

}

return work;
}
}
Expand Down Expand Up @@ -172,7 +202,7 @@ void ParallelForPerformanceModel::updateModel(size_t start,
}
else //Updating existing sample
{
if(start > curSample.End) //Work appears to left of sample
if(start > curSample.End) //Work appears to right of sample
{
Sample& nextSample = deviceModel[sampleNumber + 1];

Expand All @@ -184,7 +214,7 @@ void ParallelForPerformanceModel::updateModel(size_t start,

curSample.End = end;
}
else if(end < curSample.Start) //Work appears to right of sample
else if(end < curSample.Start) //Work appears to left of sample
{
Sample& previousSample = deviceModel[sampleNumber - 1];

Expand Down
8 changes: 4 additions & 4 deletions clUtilPlatform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,18 @@ vector<Platform> Platform::GetPlatforms()
&majorVersion,
&minorVersion) != 2)
{
printf("Warning: platform %zu (%s) has malformed platform version string."
printf("Warning: platform %u (%s) has malformed platform version string."
" As such, clUtil will not use it.\n",
curPlatformID,
(unsigned int)curPlatformID,
curPlatform.mPlatformInfo.Name.c_str());
continue;
}

if(majorVersion < 1 || (majorVersion == 1 && minorVersion < 1))
{
printf("Warning: platform %zu (%s) has version < 1.1."
printf("Warning: platform %u (%s) has version < 1.1."
" As such, clUtil will not use it.\n",
curPlatformID,
(unsigned int)curPlatformID,
curPlatform.mPlatformInfo.Name.c_str());
continue;
}
Expand Down

0 comments on commit 38d363a

Please sign in to comment.