Skip to content

Commit

Permalink
Added profiling and profile dumping. Fixed 1D image sample
Browse files Browse the repository at this point in the history
  • Loading branch information
rickwebiii committed Mar 22, 2012
1 parent 7e3eacb commit e821248
Show file tree
Hide file tree
Showing 6 changed files with 238 additions and 13 deletions.
31 changes: 28 additions & 3 deletions Include/clUtilDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,21 @@ namespace clUtil
private:
cl_device_id mDeviceID;
cl_context mContext;
cl_command_queue mCommandQueue;
cl_program mProgram;
std::map<std::string, cl_kernel> mKernels;
DeviceInfo mDeviceInfo;
bool mInfoInitialized;
size_t mDeviceNumber;
std::vector<std::vector<cl_event>> mProfileEvents;
std::vector<cl_command_queue> mCommandQueues;
size_t mCurrentCommandQueue;

static size_t CurrentDevice;
static bool DevicesInitialized;
static bool DevicesFetched;
static std::vector<Device> Devices;

static const size_t NumCommandQueues;

std::string fileToString(const char* filename);
cl_int loadBinary(const char* cachename);
cl_int buildProgram(const char** filenames,
Expand All @@ -98,15 +101,36 @@ namespace clUtil
const char* options);
void dumpBinary(const char* filename);
void getKernels();

public:
Device(cl_device_id deviceID);
const DeviceInfo& getDeviceInfo() const {return mDeviceInfo;}
cl_device_id getDeviceID() const {return mDeviceID;}
cl_context getContext() const {return mContext;}
cl_command_queue getCommandQueue() const {return mCommandQueue;}
void flush();
void finish();
void addProfilingEvent(cl_event event);

cl_command_queue getCommandQueue() const
{
return mCommandQueues[mCurrentCommandQueue];
}

void setCommandQueue(size_t id)
{
if(id > mCommandQueues.size())
{
throw clUtilException("Invalid command queue ID");
}

mCurrentCommandQueue = id;
}

cl_kernel getKernel(std::string&& kernelName) const;
size_t getDeviceNumber() const {return mDeviceNumber;}

static void Flush();
static void Finish();
static const std::vector<Device>& GetDevices() {return Devices; }
static void FetchDevices();
static void InitializeDevices(const char** sourceFiles,
Expand All @@ -115,6 +139,7 @@ namespace clUtil
const char* options = "");
static Device& GetCurrentDevice() { return Devices[CurrentDevice]; }
static size_t GetCurrentDeviceNum() { return CurrentDevice; }
static void DumpProfilingData();
static void SetCurrentDevice(size_t deviceNum)
{
if(deviceNum > Devices.size())
Expand Down
4 changes: 2 additions & 2 deletions Include/clUtilMemory.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ namespace clUtil
{
protected:
cl_mem mMemHandle;
const Device& mDevice;
Device& mDevice;
cl_event mLastAccess;
public:
Memory(const Device& device = Device::GetCurrentDevice()) :
mDevice(device),
mDevice(const_cast<Device&>(device)),
mLastAccess(NULL)
{
}
Expand Down
6 changes: 4 additions & 2 deletions Include/clUtil_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,15 @@ void clUtilEnqueueKernel(const char* kernelName,
events.size() > 0 ? &eventArray[0] : NULL,
&outputEvent);
clUtilCheckError(err);


currentDevice.addProfilingEvent(outputEvent);

//Update the last event for each Memory object passed to this kernel
for(auto i = memories.begin(); i < memories.end(); i++)
{
(*i)->setLastAccess(outputEvent);
}

err = clRelease(outputEvent);
err = clReleaseEvent(outputEvent);
clUtilCheckError(err);
}
189 changes: 184 additions & 5 deletions clUtilDevice.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ size_t Device::CurrentDevice = 0;
bool Device::DevicesInitialized = false;
bool Device::DevicesFetched = false;
vector<Device> Device::Devices;
const size_t Device::NumCommandQueues = 2;

void DeviceInfo::initialize(cl_device_id deviceID)
{
Expand Down Expand Up @@ -214,10 +215,16 @@ void DeviceInfo::initialize(cl_device_id deviceID)

Device::Device(cl_device_id deviceID) :
mDeviceID(deviceID),
mDeviceInfo(),
mInfoInitialized(false)
mContext(NULL),
mProgram(NULL),
mKernels(),
mInfoInitialized(false),
mDeviceNumber(0),
mProfileEvents(Device::NumCommandQueues),
mCommandQueues(),
mCurrentCommandQueue(0)
{
mDeviceInfo.initialize(mDeviceID);
mDeviceInfo.initialize(mDeviceID);
}

void Device::FetchDevices()
Expand Down Expand Up @@ -330,8 +337,16 @@ void Device::initialize(const char** filenames,
mContext = clCreateContext(NULL, 1, &mDeviceID, NULL, NULL, &err);
clUtilCheckError(err);

mCommandQueue = clCreateCommandQueue(mContext, mDeviceID, 0, &err);
clUtilCheckError(err);
for(size_t curQueue = 0; curQueue < Device::NumCommandQueues; curQueue++)
{
cl_command_queue queue = clCreateCommandQueue(mContext,
mDeviceID,
CL_QUEUE_PROFILING_ENABLE,
&err);
clUtilCheckError(err);

mCommandQueues.push_back(queue);
}

if(cachename != NULL)
{
Expand Down Expand Up @@ -365,3 +380,167 @@ cl_kernel Device::getKernel(std::string&& kernelName) const

return kernel->second;
}

void Device::addProfilingEvent(cl_event event)
{
size_t commandQueueID = mCommandQueues.size();
cl_int err;
cl_command_queue queue;

err = clGetEventInfo(event,
CL_EVENT_COMMAND_QUEUE,
sizeof(queue),
&queue,
NULL);
clUtilCheckError(err);

for(size_t curQueue = 0; curQueue < mCommandQueues.size(); curQueue++)
{
if(queue == mCommandQueues[curQueue])
{
commandQueueID = curQueue;
break;
}
}

if(commandQueueID == mCommandQueues.size())
{
throw clUtilException("Bad command queue for profiling event");
}

err = clRetainEvent(event);
clUtilCheckError(err);

mProfileEvents[commandQueueID].push_back(event);
}

void Device::DumpProfilingData()
{
ofstream outputFile("clUtilProfile.out");

outputFile << "<Profile>" << endl;

for(size_t curDeviceID = 0; curDeviceID < Devices.size(); curDeviceID++)
{
Device& curDevice = Devices[curDeviceID];

outputFile << "\t<Device id='" << curDeviceID
<< "' name='" << curDevice.mDeviceInfo.Name
<< "'>" << endl;

for(size_t curQueueID = 0;
curQueueID < curDevice.mProfileEvents.size();
curQueueID++)
{
outputFile << "\t\t<Queue id='" << curQueueID << "'>" << endl;

vector<cl_event>& profileSet = curDevice.mProfileEvents[curQueueID];

for(size_t curEventNum = 0;
curEventNum < profileSet.size();
curEventNum++)
{
cl_event curEvent = profileSet[curEventNum];
cl_ulong startTime;
cl_ulong stopTime;
cl_int err;
const char* eventType;
cl_command_type commandType;

err = clGetEventProfilingInfo(curEvent,
CL_PROFILING_COMMAND_START,
sizeof(startTime),
&startTime,
NULL);
clUtilCheckError(err);

err = clGetEventProfilingInfo(curEvent,
CL_PROFILING_COMMAND_END,
sizeof(stopTime),
&stopTime,
NULL);
clUtilCheckError(err);

err = clGetEventInfo(curEvent,
CL_EVENT_COMMAND_TYPE,
sizeof(commandType),
&commandType,
NULL);
clUtilCheckError(err);

switch(commandType)
{
case CL_COMMAND_NDRANGE_KERNEL:
case CL_COMMAND_NATIVE_KERNEL:
eventType = "kernel";
break;
case CL_COMMAND_READ_BUFFER:
case CL_COMMAND_WRITE_BUFFER:
eventType = "buffer read/write";
break;
case CL_COMMAND_READ_IMAGE:
case CL_COMMAND_WRITE_IMAGE:
eventType = "image read/write";
break;
default:
eventType = "other";
break;
}

outputFile << "\t\t\t<Task type='" << eventType
<< "' startTime='" << startTime
<< "' stopTime='" << stopTime
<< "'/>" << endl;

err = clReleaseEvent(curEvent);
clUtilCheckError(err);
}

outputFile << "\t</Queue>" << endl;

profileSet.clear();
}

outputFile << "\t</Device>" << endl;
}

outputFile << "</Profile>" << endl;
}

void Device::flush()
{
for(size_t curQueue = 0; curQueue < mCommandQueues.size(); curQueue++)
{
cl_int err;

err = clFlush(mCommandQueues[curQueue]);
clUtilCheckError(err);
}
}

void Device::finish()
{
for(size_t curQueue = 0; curQueue < mCommandQueues.size(); curQueue++)
{
cl_int err;

err = clFinish(mCommandQueues[curQueue]);
clUtilCheckError(err);
}
}

void Device::Flush()
{
for(auto device = Devices.begin(); device < Devices.end(); device++)
{
device->flush();
}
}

void Device::Finish()
{
for(auto device = Devices.begin(); device < Devices.end(); device++)
{
device->finish();
}
}
16 changes: 16 additions & 0 deletions clUtilMemory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ void Image::put(void const* pointer, const size_t len)
clUtilCheckError(err);

setLastAccess(nextEvent);

mDevice.addProfilingEvent(nextEvent);

err = clReleaseEvent(nextEvent);
clUtilCheckError(err);
Expand All @@ -130,6 +132,8 @@ void Image::put(void const* pointer, const size_t len)
clUtilCheckError(err);

setLastAccess(nextEvent);

mDevice.addProfilingEvent(nextEvent);

err = clReleaseEvent(nextEvent);
clUtilCheckError(err);
Expand All @@ -153,6 +157,8 @@ void Image::put(void const* pointer, const size_t len)
clUtilCheckError(err);

setLastAccess(nextEvent);

mDevice.addProfilingEvent(nextEvent);

err = clReleaseEvent(nextEvent);
clUtilCheckError(err);
Expand Down Expand Up @@ -204,6 +210,8 @@ void Image::get(void* const pointer, const size_t len)
clUtilCheckError(err);

setLastAccess(nextEvent);

mDevice.addProfilingEvent(nextEvent);

err = clReleaseEvent(nextEvent);
clUtilCheckError(err);
Expand All @@ -230,6 +238,8 @@ void Image::get(void* const pointer, const size_t len)
clUtilCheckError(err);

setLastAccess(nextEvent);

mDevice.addProfilingEvent(nextEvent);

err = clReleaseEvent(nextEvent);
clUtilCheckError(err);
Expand All @@ -253,6 +263,8 @@ void Image::get(void* const pointer, const size_t len)
clUtilCheckError(err);

setLastAccess(nextEvent);

mDevice.addProfilingEvent(nextEvent);

err = clReleaseEvent(nextEvent);
clUtilCheckError(err);
Expand Down Expand Up @@ -283,6 +295,8 @@ void Buffer::put(const void* const pointer, const size_t len)

setLastAccess(nextEvent);

mDevice.addProfilingEvent(nextEvent);

err = clReleaseEvent(nextEvent);
clUtilCheckError(err);
}
Expand All @@ -306,6 +320,8 @@ void Buffer::get(void* const pointer, const size_t len)

setLastAccess(nextEvent);

mDevice.addProfilingEvent(nextEvent);

err = clReleaseEvent(nextEvent);
clUtilCheckError(err);
}
Loading

0 comments on commit e821248

Please sign in to comment.