diff --git a/Include/clUtilDevice.h b/Include/clUtilDevice.h index 83bdc6f..034fc0d 100644 --- a/Include/clUtilDevice.h +++ b/Include/clUtilDevice.h @@ -75,18 +75,21 @@ namespace clUtil private: cl_device_id mDeviceID; cl_context mContext; - cl_command_queue mCommandQueue; cl_program mProgram; std::map mKernels; DeviceInfo mDeviceInfo; bool mInfoInitialized; size_t mDeviceNumber; + std::vector> mProfileEvents; + std::vector mCommandQueues; + size_t mCurrentCommandQueue; static size_t CurrentDevice; static bool DevicesInitialized; static bool DevicesFetched; static std::vector Devices; - + static const size_t NumCommandQueues; + std::string fileToString(const char* filename); cl_int loadBinary(const char* cachename); cl_int buildProgram(const char** filenames, @@ -98,15 +101,36 @@ namespace clUtil const char* options); void dumpBinary(const char* filename); void getKernels(); + public: Device(cl_device_id deviceID); const DeviceInfo& getDeviceInfo() const {return mDeviceInfo;} cl_device_id getDeviceID() const {return mDeviceID;} cl_context getContext() const {return mContext;} - cl_command_queue getCommandQueue() const {return mCommandQueue;} + void flush(); + void finish(); + void addProfilingEvent(cl_event event); + + cl_command_queue getCommandQueue() const + { + return mCommandQueues[mCurrentCommandQueue]; + } + + void setCommandQueue(size_t id) + { + if(id > mCommandQueues.size()) + { + throw clUtilException("Invalid command queue ID"); + } + + mCurrentCommandQueue = id; + } + cl_kernel getKernel(std::string&& kernelName) const; size_t getDeviceNumber() const {return mDeviceNumber;} + static void Flush(); + static void Finish(); static const std::vector& GetDevices() {return Devices; } static void FetchDevices(); static void InitializeDevices(const char** sourceFiles, @@ -115,6 +139,7 @@ namespace clUtil const char* options = ""); static Device& GetCurrentDevice() { return Devices[CurrentDevice]; } static size_t GetCurrentDeviceNum() { return CurrentDevice; } + static void DumpProfilingData(); static void SetCurrentDevice(size_t deviceNum) { if(deviceNum > Devices.size()) diff --git a/Include/clUtilMemory.h b/Include/clUtilMemory.h index c3867eb..b92fa85 100644 --- a/Include/clUtilMemory.h +++ b/Include/clUtilMemory.h @@ -11,11 +11,11 @@ namespace clUtil { protected: cl_mem mMemHandle; - const Device& mDevice; + Device& mDevice; cl_event mLastAccess; public: Memory(const Device& device = Device::GetCurrentDevice()) : - mDevice(device), + mDevice(const_cast(device)), mLastAccess(NULL) { } diff --git a/Include/clUtil_kernel.h b/Include/clUtil_kernel.h index e813cbf..53e9c1e 100644 --- a/Include/clUtil_kernel.h +++ b/Include/clUtil_kernel.h @@ -167,13 +167,15 @@ void clUtilEnqueueKernel(const char* kernelName, events.size() > 0 ? &eventArray[0] : NULL, &outputEvent); clUtilCheckError(err); - + + currentDevice.addProfilingEvent(outputEvent); + //Update the last event for each Memory object passed to this kernel for(auto i = memories.begin(); i < memories.end(); i++) { (*i)->setLastAccess(outputEvent); } - err = clRelease(outputEvent); + err = clReleaseEvent(outputEvent); clUtilCheckError(err); } diff --git a/clUtilDevice.cc b/clUtilDevice.cc index 449fdcc..146a523 100644 --- a/clUtilDevice.cc +++ b/clUtilDevice.cc @@ -8,6 +8,7 @@ size_t Device::CurrentDevice = 0; bool Device::DevicesInitialized = false; bool Device::DevicesFetched = false; vector Device::Devices; +const size_t Device::NumCommandQueues = 2; void DeviceInfo::initialize(cl_device_id deviceID) { @@ -214,10 +215,16 @@ void DeviceInfo::initialize(cl_device_id deviceID) Device::Device(cl_device_id deviceID) : mDeviceID(deviceID), - mDeviceInfo(), - mInfoInitialized(false) + mContext(NULL), + mProgram(NULL), + mKernels(), + mInfoInitialized(false), + mDeviceNumber(0), + mProfileEvents(Device::NumCommandQueues), + mCommandQueues(), + mCurrentCommandQueue(0) { - mDeviceInfo.initialize(mDeviceID); + mDeviceInfo.initialize(mDeviceID); } void Device::FetchDevices() @@ -330,8 +337,16 @@ void Device::initialize(const char** filenames, mContext = clCreateContext(NULL, 1, &mDeviceID, NULL, NULL, &err); clUtilCheckError(err); - mCommandQueue = clCreateCommandQueue(mContext, mDeviceID, 0, &err); - clUtilCheckError(err); + for(size_t curQueue = 0; curQueue < Device::NumCommandQueues; curQueue++) + { + cl_command_queue queue = clCreateCommandQueue(mContext, + mDeviceID, + CL_QUEUE_PROFILING_ENABLE, + &err); + clUtilCheckError(err); + + mCommandQueues.push_back(queue); + } if(cachename != NULL) { @@ -365,3 +380,167 @@ cl_kernel Device::getKernel(std::string&& kernelName) const return kernel->second; } + +void Device::addProfilingEvent(cl_event event) +{ + size_t commandQueueID = mCommandQueues.size(); + cl_int err; + cl_command_queue queue; + + err = clGetEventInfo(event, + CL_EVENT_COMMAND_QUEUE, + sizeof(queue), + &queue, + NULL); + clUtilCheckError(err); + + for(size_t curQueue = 0; curQueue < mCommandQueues.size(); curQueue++) + { + if(queue == mCommandQueues[curQueue]) + { + commandQueueID = curQueue; + break; + } + } + + if(commandQueueID == mCommandQueues.size()) + { + throw clUtilException("Bad command queue for profiling event"); + } + + err = clRetainEvent(event); + clUtilCheckError(err); + + mProfileEvents[commandQueueID].push_back(event); +} + +void Device::DumpProfilingData() +{ + ofstream outputFile("clUtilProfile.out"); + + outputFile << "" << endl; + + for(size_t curDeviceID = 0; curDeviceID < Devices.size(); curDeviceID++) + { + Device& curDevice = Devices[curDeviceID]; + + outputFile << "\t" << endl; + + for(size_t curQueueID = 0; + curQueueID < curDevice.mProfileEvents.size(); + curQueueID++) + { + outputFile << "\t\t" << endl; + + vector& profileSet = curDevice.mProfileEvents[curQueueID]; + + for(size_t curEventNum = 0; + curEventNum < profileSet.size(); + curEventNum++) + { + cl_event curEvent = profileSet[curEventNum]; + cl_ulong startTime; + cl_ulong stopTime; + cl_int err; + const char* eventType; + cl_command_type commandType; + + err = clGetEventProfilingInfo(curEvent, + CL_PROFILING_COMMAND_START, + sizeof(startTime), + &startTime, + NULL); + clUtilCheckError(err); + + err = clGetEventProfilingInfo(curEvent, + CL_PROFILING_COMMAND_END, + sizeof(stopTime), + &stopTime, + NULL); + clUtilCheckError(err); + + err = clGetEventInfo(curEvent, + CL_EVENT_COMMAND_TYPE, + sizeof(commandType), + &commandType, + NULL); + clUtilCheckError(err); + + switch(commandType) + { + case CL_COMMAND_NDRANGE_KERNEL: + case CL_COMMAND_NATIVE_KERNEL: + eventType = "kernel"; + break; + case CL_COMMAND_READ_BUFFER: + case CL_COMMAND_WRITE_BUFFER: + eventType = "buffer read/write"; + break; + case CL_COMMAND_READ_IMAGE: + case CL_COMMAND_WRITE_IMAGE: + eventType = "image read/write"; + break; + default: + eventType = "other"; + break; + } + + outputFile << "\t\t\t" << endl; + + err = clReleaseEvent(curEvent); + clUtilCheckError(err); + } + + outputFile << "\t" << endl; + + profileSet.clear(); + } + + outputFile << "\t" << endl; + } + + outputFile << "" << endl; +} + +void Device::flush() +{ + for(size_t curQueue = 0; curQueue < mCommandQueues.size(); curQueue++) + { + cl_int err; + + err = clFlush(mCommandQueues[curQueue]); + clUtilCheckError(err); + } +} + +void Device::finish() +{ + for(size_t curQueue = 0; curQueue < mCommandQueues.size(); curQueue++) + { + cl_int err; + + err = clFinish(mCommandQueues[curQueue]); + clUtilCheckError(err); + } +} + +void Device::Flush() +{ + for(auto device = Devices.begin(); device < Devices.end(); device++) + { + device->flush(); + } +} + +void Device::Finish() +{ + for(auto device = Devices.begin(); device < Devices.end(); device++) + { + device->finish(); + } +} diff --git a/clUtilMemory.cc b/clUtilMemory.cc index fe65498..9acb8d0 100644 --- a/clUtilMemory.cc +++ b/clUtilMemory.cc @@ -104,6 +104,8 @@ void Image::put(void const* pointer, const size_t len) clUtilCheckError(err); setLastAccess(nextEvent); + + mDevice.addProfilingEvent(nextEvent); err = clReleaseEvent(nextEvent); clUtilCheckError(err); @@ -130,6 +132,8 @@ void Image::put(void const* pointer, const size_t len) clUtilCheckError(err); setLastAccess(nextEvent); + + mDevice.addProfilingEvent(nextEvent); err = clReleaseEvent(nextEvent); clUtilCheckError(err); @@ -153,6 +157,8 @@ void Image::put(void const* pointer, const size_t len) clUtilCheckError(err); setLastAccess(nextEvent); + + mDevice.addProfilingEvent(nextEvent); err = clReleaseEvent(nextEvent); clUtilCheckError(err); @@ -204,6 +210,8 @@ void Image::get(void* const pointer, const size_t len) clUtilCheckError(err); setLastAccess(nextEvent); + + mDevice.addProfilingEvent(nextEvent); err = clReleaseEvent(nextEvent); clUtilCheckError(err); @@ -230,6 +238,8 @@ void Image::get(void* const pointer, const size_t len) clUtilCheckError(err); setLastAccess(nextEvent); + + mDevice.addProfilingEvent(nextEvent); err = clReleaseEvent(nextEvent); clUtilCheckError(err); @@ -253,6 +263,8 @@ void Image::get(void* const pointer, const size_t len) clUtilCheckError(err); setLastAccess(nextEvent); + + mDevice.addProfilingEvent(nextEvent); err = clReleaseEvent(nextEvent); clUtilCheckError(err); @@ -283,6 +295,8 @@ void Buffer::put(const void* const pointer, const size_t len) setLastAccess(nextEvent); + mDevice.addProfilingEvent(nextEvent); + err = clReleaseEvent(nextEvent); clUtilCheckError(err); } @@ -306,6 +320,8 @@ void Buffer::get(void* const pointer, const size_t len) setLastAccess(nextEvent); + mDevice.addProfilingEvent(nextEvent); + err = clReleaseEvent(nextEvent); clUtilCheckError(err); } diff --git a/examples/1DImage/1DImage.cc b/examples/1DImage/1DImage.cc index d409cee..b4a2fd5 100644 --- a/examples/1DImage/1DImage.cc +++ b/examples/1DImage/1DImage.cc @@ -5,7 +5,7 @@ using namespace clUtil; using namespace std; -const unsigned int kImageSize = 1234; +const unsigned int kImageSize = 1234567; struct float4 { @@ -46,6 +46,7 @@ int main(int argc, char** argv) kImageSize); c.get(result.get()); + Device::Finish(); } catch(clUtilException& err) { @@ -61,5 +62,7 @@ int main(int argc, char** argv) } } + Device::DumpProfilingData(); + printf("Success!\n"); }