From d64373109d3d5a84362d32835e7d932848498c1b Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 22 Nov 2015 12:19:55 +0100 Subject: [PATCH 1/8] Prepared the changelog for the next release --- CHANGELOG | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index aaca86b..4a3ace4 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,7 @@ +Development version (next release) +- + Version 2.0.0 - Added support for machine learning models. These models can be trained on a small fraction of the tuning configurations and can be used to predict the remainder. Two models are supported: From dcddd80f96248a4a9524ddefe3ea6a2a64948327 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 23 Jan 2016 16:08:14 +0100 Subject: [PATCH 2/8] Updated FindOpenCL for Intel Linux OpenCL paths --- cmake/Modules/FindOpenCL.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/Modules/FindOpenCL.cmake b/cmake/Modules/FindOpenCL.cmake index 3ca8fa8..b8d47bb 100644 --- a/cmake/Modules/FindOpenCL.cmake +++ b/cmake/Modules/FindOpenCL.cmake @@ -45,6 +45,7 @@ set(OPENCL_HINTS set(OPENCL_PATHS /usr/local/cuda /opt/cuda + /opt/intel/opencl /usr /usr/local ) @@ -63,7 +64,7 @@ mark_as_advanced(OPENCL_INCLUDE_DIRS) find_library(OPENCL_LIBRARIES NAMES OpenCL HINTS ${OPENCL_HINTS} - PATH_SUFFIXES lib lib64 lib/x86_64 lib/x64 lib/x86 lib/Win32 OpenCL/common/lib/x64 + PATH_SUFFIXES lib lib64 lib/x86_64 lib/x86_64/sdk lib/x64 lib/x86 lib/Win32 OpenCL/common/lib/x64 PATHS ${OPENCL_PATHS} DOC "OpenCL library" ) From b5a3a8b4f32e735d7e3463d59014753fde4f7134 Mon Sep 17 00:00:00 2001 From: williamjshipman Date: Sun, 31 Jan 2016 00:48:10 +0200 Subject: [PATCH 3/8] Samples now support a platform parameter in their command lines, in addition to the device number. --- samples/conv/conv.cc | 17 +++++++++++------ samples/gemm/gemm.cc | 15 ++++++++++----- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/samples/conv/conv.cc b/samples/conv/conv.cc index 29a6bc7..1804d0b 100644 --- a/samples/conv/conv.cc +++ b/samples/conv/conv.cc @@ -46,6 +46,7 @@ bool IsMultiple(size_t a, size_t b) { // Constants constexpr auto kDefaultDevice = size_t{0}; +constexpr auto kDefaultPlatform = size_t{0}; constexpr auto kDefaultSearchMethod = size_t{1}; constexpr auto kDefaultSearchParameter1 = size_t{4}; @@ -73,16 +74,20 @@ int main(int argc, char* argv[]) { // Selects the device, the search method and its first parameter. These parameters are all // optional and are thus also given default values. auto device_id = kDefaultDevice; + auto platform_id = kDefaultPlatform; auto method = kDefaultSearchMethod; auto search_param_1 = kDefaultSearchParameter1; if (argc >= 2) { device_id = static_cast(std::stoi(std::string{argv[1]})); - if (argc >= 3) { - method = static_cast(std::stoi(std::string{argv[2]})); + if (argc >= 3) { + platform_id = static_cast(std::stoi(std::string{argv[2]})); if (argc >= 4) { - search_param_1 = static_cast(std::stoi(std::string{argv[3]})); + method = static_cast(std::stoi(std::string{argv[3]})); + if (argc >= 5) { + search_param_1 = static_cast(std::stoi(std::string{argv[4]})); + } } - } + } } // Creates data structures @@ -115,8 +120,8 @@ int main(int argc, char* argv[]) { // =============================================================================================== - // Initializes the tuner (platform 0, device 'device_id') - cltune::Tuner tuner(size_t{0}, static_cast(device_id)); + // Initializes the tuner (platform 'platform_id', device 'device_id') + cltune::Tuner tuner(static_cast(platform_id), static_cast(device_id)); // Sets one of the following search methods: // 0) Random search diff --git a/samples/gemm/gemm.cc b/samples/gemm/gemm.cc index 9bdf4e6..c169328 100644 --- a/samples/gemm/gemm.cc +++ b/samples/gemm/gemm.cc @@ -46,6 +46,7 @@ bool IsMultiple(size_t a, size_t b) { // Constants constexpr auto kDefaultDevice = size_t{0}; +constexpr auto kDefaultPlatform = size_t{0}; constexpr auto kDefaultSearchMethod = size_t{1}; constexpr auto kDefaultSearchParameter1 = size_t{4}; @@ -71,16 +72,20 @@ int main(int argc, char* argv[]) { // Selects the device, the search method and its first parameter. These parameters are all // optional and are thus also given default values. auto device_id = kDefaultDevice; + auto platform_id = kDefaultPlatform; auto method = kDefaultSearchMethod; auto search_param_1 = kDefaultSearchParameter1; if (argc >= 2) { device_id = static_cast(std::stoi(std::string{argv[1]})); - if (argc >= 3) { - method = static_cast(std::stoi(std::string{argv[2]})); + if (argc >= 3) { + platform_id = static_cast(std::stoi(std::string{argv[2]})); if (argc >= 4) { - search_param_1 = static_cast(std::stoi(std::string{argv[3]})); + method = static_cast(std::stoi(std::string{argv[3]})); + if (argc >= 5) { + search_param_1 = static_cast(std::stoi(std::string{argv[4]})); + } } - } + } } // Creates input matrices @@ -99,7 +104,7 @@ int main(int argc, char* argv[]) { for (auto &item: mat_c) { item = 0.0f; } // Initializes the tuner (platform 0, device 'device_id') - cltune::Tuner tuner(size_t{0}, static_cast(device_id)); + cltune::Tuner tuner(static_cast(platform_id), static_cast(device_id)); // Sets one of the following search methods: // 0) Random search From 59faefac6783fa0e1bb3f69c4ba98fc5ea333332 Mon Sep 17 00:00:00 2001 From: williamjshipman Date: Sun, 31 Jan 2016 01:07:16 +0200 Subject: [PATCH 4/8] Updated the README to show that the platform ID is one of the command line parameters and updated the samples so that the order of the parameters matches all parts of the README. --- README.md | 2 +- samples/conv/conv.cc | 4 ++-- samples/gemm/gemm.cc | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 01244d8..efe206f 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ Several examples are included as part of the CLTune distribution. They illustrat * `gemm.cc` providing an advanced and heavily tunable implementation of matrix-matrix multiplication (GEMM) * `conv.cc` providing an advanced and heavily tunable implementation of 2D convolution -The latter two optionally take command-line arguments. The first argument is an integer for the device to run on, the second argument is an integer to select a search strategy (0=random, 1=annealing, 2=PSO, 3=fullsearch), and the third an optional search-strategy parameter. +The latter two optionally take command-line arguments. The first argument is an integer to select the platform (NVIDIA, AMD, etc.), the second argument is an integer for the device to run on, the third argument is an integer to select a search strategy (0=random, 1=annealing, 2=PSO, 3=fullsearch), and the fourth an optional search-strategy parameter. Search strategies and machine-learning diff --git a/samples/conv/conv.cc b/samples/conv/conv.cc index 1804d0b..b37fbc9 100644 --- a/samples/conv/conv.cc +++ b/samples/conv/conv.cc @@ -78,9 +78,9 @@ int main(int argc, char* argv[]) { auto method = kDefaultSearchMethod; auto search_param_1 = kDefaultSearchParameter1; if (argc >= 2) { - device_id = static_cast(std::stoi(std::string{argv[1]})); + platform_id = static_cast(std::stoi(std::string{argv[1]})); if (argc >= 3) { - platform_id = static_cast(std::stoi(std::string{argv[2]})); + device_id = static_cast(std::stoi(std::string{argv[2]})); if (argc >= 4) { method = static_cast(std::stoi(std::string{argv[3]})); if (argc >= 5) { diff --git a/samples/gemm/gemm.cc b/samples/gemm/gemm.cc index c169328..b580bfb 100644 --- a/samples/gemm/gemm.cc +++ b/samples/gemm/gemm.cc @@ -76,9 +76,9 @@ int main(int argc, char* argv[]) { auto method = kDefaultSearchMethod; auto search_param_1 = kDefaultSearchParameter1; if (argc >= 2) { - device_id = static_cast(std::stoi(std::string{argv[1]})); + platform_id = static_cast(std::stoi(std::string{argv[1]})); if (argc >= 3) { - platform_id = static_cast(std::stoi(std::string{argv[2]})); + device_id = static_cast(std::stoi(std::string{argv[2]})); if (argc >= 4) { method = static_cast(std::stoi(std::string{argv[3]})); if (argc >= 5) { From 1d3c1599b46fff18eb24fc29b477fd1cdc798474 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Mon, 21 Mar 2016 20:56:35 +0100 Subject: [PATCH 5/8] Added dllexport to be able to build a DLL under Windows --- CHANGELOG | 3 +- include/cltune.h | 86 ++++++++++++++++++++++++++---------------------- src/cltune.cc | 38 ++++++++++----------- 3 files changed, 68 insertions(+), 59 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 4a3ace4..fcabe81 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,7 @@ Development version (next release) -- +- Added exports to be able to create a DLL on Windows (thanks to Marco Hutter) +- Added command-line OpenCL platform selection in the examples (thanks to William J Shipman) Version 2.0.0 - Added support for machine learning models. These models can be trained on a small fraction of the diff --git a/include/cltune.h b/include/cltune.h index af8dfdc..eaf8a72 100644 --- a/include/cltune.h +++ b/include/cltune.h @@ -35,6 +35,14 @@ #include // std::function #include // std::pair +// Exports library functions under Windows when building a DLL. See also: +// https://msdn.microsoft.com/en-us/library/a90k134d.aspx +#ifdef _WIN32 + #define PUBLIC_API __declspec(dllexport) +#else + #define PUBLIC_API +#endif + namespace cltune { // ================================================================================================= @@ -58,52 +66,52 @@ class Tuner { public: // Initializes the tuner either with platform 0 and device 0 or with a custom platform/device - explicit Tuner(); - explicit Tuner(size_t platform_id, size_t device_id); - ~Tuner(); + explicit PUBLIC_API Tuner(); + explicit PUBLIC_API Tuner(size_t platform_id, size_t device_id); + PUBLIC_API ~Tuner(); // Adds a new kernel to the list of tuning-kernels and returns a unique ID (to be used when // adding tuning parameters). Either loads the source from filenames or from string. - size_t AddKernel(const std::vector &filenames, const std::string &kernel_name, - const IntRange &global, const IntRange &local); - size_t AddKernelFromString(const std::string &source, const std::string &kernel_name, - const IntRange &global, const IntRange &local); + size_t PUBLIC_API AddKernel(const std::vector &filenames, const std::string &kernel_name, + const IntRange &global, const IntRange &local); + size_t PUBLIC_API AddKernelFromString(const std::string &source, const std::string &kernel_name, + const IntRange &global, const IntRange &local); // Sets the reference kernel. Same as the AddKernel function, but in this case there is only one // reference kernel. Calling this function again will overwrite the previous reference kernel. - void SetReference(const std::vector &filenames, - const std::string &kernel_name, - const IntRange &global, const IntRange &local); - void SetReferenceFromString(const std::string &source, - const std::string &kernel_name, - const IntRange &global, const IntRange &local); + void PUBLIC_API SetReference(const std::vector &filenames, + const std::string &kernel_name, + const IntRange &global, const IntRange &local); + void PUBLIC_API SetReferenceFromString(const std::string &source, + const std::string &kernel_name, + const IntRange &global, const IntRange &local); // Adds a new tuning parameter for a kernel with a specific ID. The parameter has a name, the // number of values, and a list of values. - void AddParameter(const size_t id, const std::string ¶meter_name, - const std::initializer_list &values); + void PUBLIC_API AddParameter(const size_t id, const std::string ¶meter_name, + const std::initializer_list &values); // As above, but now adds a single valued parameter to the reference - void AddParameterReference(const std::string ¶meter_name, const size_t value); + void PUBLIC_API AddParameterReference(const std::string ¶meter_name, const size_t value); // Modifies the global or local thread-size (integers) by one of the parameters (strings). The // modifier can be multiplication or division. - void MulGlobalSize(const size_t id, const StringRange range); - void DivGlobalSize(const size_t id, const StringRange range); - void MulLocalSize(const size_t id, const StringRange range); - void DivLocalSize(const size_t id, const StringRange range); + void PUBLIC_API MulGlobalSize(const size_t id, const StringRange range); + void PUBLIC_API DivGlobalSize(const size_t id, const StringRange range); + void PUBLIC_API MulLocalSize(const size_t id, const StringRange range); + void PUBLIC_API DivLocalSize(const size_t id, const StringRange range); // Adds a new constraint to the set of parameters (e.g. must be equal or larger than). The // constraints come in the form of a function object which takes a number of tuning parameters, // given as a vector of strings (parameter names). Their names are later substituted by actual // values. - void AddConstraint(const size_t id, ConstraintFunction valid_if, - const std::vector ¶meters); + void PUBLIC_API AddConstraint(const size_t id, ConstraintFunction valid_if, + const std::vector ¶meters); // As above, but for local memory usage. If this function is not called, it is assumed that the // local memory usage is 0: no configurations will be excluded because of too much local memory. - void SetLocalMemoryUsage(const size_t id, LocalMemoryFunction amount, - const std::vector ¶meters); + void PUBLIC_API SetLocalMemoryUsage(const size_t id, LocalMemoryFunction amount, + const std::vector ¶meters); // Functions to add kernel-arguments for input buffers, output buffers, and scalars. Make sure to // call these in the order in which the arguments appear in the kernel. @@ -113,35 +121,35 @@ class Tuner { // Configures a specific search method. The default search method is "FullSearch". These are // implemented as separate functions since they each take a different number of arguments. - void UseFullSearch(); - void UseRandomSearch(const double fraction); - void UseAnnealing(const double fraction, const double max_temperature); - void UsePSO(const double fraction, const size_t swarm_size, const double influence_global, - const double influence_local, const double influence_random); + void PUBLIC_API UseFullSearch(); + void PUBLIC_API UseRandomSearch(const double fraction); + void PUBLIC_API UseAnnealing(const double fraction, const double max_temperature); + void PUBLIC_API UsePSO(const double fraction, const size_t swarm_size, const double influence_global, + const double influence_local, const double influence_random); // Outputs the search process to a file - void OutputSearchLog(const std::string &filename); + void PUBLIC_API OutputSearchLog(const std::string &filename); // Starts the tuning process: compile all kernels and run them for each permutation of the tuning- // parameters. Note that this might take a while. - void Tune(); + void PUBLIC_API Tune(); // Trains a machine learning model based on the search space explored so far. Then, all the // missing data-points are estimated based on this model. This is only useful if a fraction of // the search space is explored, as is the case when doing random-search. - void ModelPrediction(const Model model_type, const float validation_fraction, - const size_t test_top_x_configurations); + void PUBLIC_API ModelPrediction(const Model model_type, const float validation_fraction, + const size_t test_top_x_configurations); // Prints the results of the tuning either to screen (stdout) or to a specific output-file. // Returns the execution time in miliseconds. - double PrintToScreen() const; - void PrintFormatted() const; - void PrintJSON(const std::string &filename, - const std::vector> &descriptions) const; - void PrintToFile(const std::string &filename) const; + double PUBLIC_API PrintToScreen() const; + void PUBLIC_API PrintFormatted() const; + void PUBLIC_API PrintJSON(const std::string &filename, + const std::vector> &descriptions) const; + void PUBLIC_API PrintToFile(const std::string &filename) const; // Disables all further printing to stdout - void SuppressOutput(); + void PUBLIC_API SuppressOutput(); private: diff --git a/src/cltune.cc b/src/cltune.cc index 8d3ce73..356eb21 100644 --- a/src/cltune.cc +++ b/src/cltune.cc @@ -152,7 +152,7 @@ void Tuner::SetLocalMemoryUsage(const size_t id, LocalMemoryFunction amount, throw std::runtime_error("Invalid parameter"); } } - pimpl->kernels_[id].SetLocalMemoryUsage(amount, parameters); + pimpl->kernels_[id].SetLocalMemoryUsage(amount, parameters); } @@ -170,12 +170,12 @@ void Tuner::AddArgumentInput(const std::vector &source) { } // Compiles the function for various data-types -template void Tuner::AddArgumentInput(const std::vector&); -template void Tuner::AddArgumentInput(const std::vector&); -template void Tuner::AddArgumentInput(const std::vector&); -template void Tuner::AddArgumentInput(const std::vector&); -template void Tuner::AddArgumentInput(const std::vector&); -template void Tuner::AddArgumentInput(const std::vector&); +template void PUBLIC_API Tuner::AddArgumentInput(const std::vector&); +template void PUBLIC_API Tuner::AddArgumentInput(const std::vector&); +template void PUBLIC_API Tuner::AddArgumentInput(const std::vector&); +template void PUBLIC_API Tuner::AddArgumentInput(const std::vector&); +template void PUBLIC_API Tuner::AddArgumentInput(const std::vector&); +template void PUBLIC_API Tuner::AddArgumentInput(const std::vector&); // Similar to the above function, but now marked as output buffer. Output buffers are special in the // sense that they will be checked in the verification process. @@ -188,32 +188,32 @@ void Tuner::AddArgumentOutput(const std::vector &source) { } // Compiles the function for various data-types -template void Tuner::AddArgumentOutput(const std::vector&); -template void Tuner::AddArgumentOutput(const std::vector&); -template void Tuner::AddArgumentOutput(const std::vector&); -template void Tuner::AddArgumentOutput(const std::vector&); -template void Tuner::AddArgumentOutput(const std::vector&); -template void Tuner::AddArgumentOutput(const std::vector&); +template void PUBLIC_API Tuner::AddArgumentOutput(const std::vector&); +template void PUBLIC_API Tuner::AddArgumentOutput(const std::vector&); +template void PUBLIC_API Tuner::AddArgumentOutput(const std::vector&); +template void PUBLIC_API Tuner::AddArgumentOutput(const std::vector&); +template void PUBLIC_API Tuner::AddArgumentOutput(const std::vector&); +template void PUBLIC_API Tuner::AddArgumentOutput(const std::vector&); // Sets a scalar value as an argument to the kernel. Since a vector of scalars of any type doesn't // exist, there is no general implemenation. Instead, each data-type has its specialised version in // which it stores to a specific vector. -template <> void Tuner::AddArgumentScalar(const int argument) { +template <> void PUBLIC_API Tuner::AddArgumentScalar(const int argument) { pimpl->arguments_int_.push_back({pimpl->argument_counter_++, argument}); } -template <> void Tuner::AddArgumentScalar(const size_t argument) { +template <> void PUBLIC_API Tuner::AddArgumentScalar(const size_t argument) { pimpl->arguments_size_t_.push_back({pimpl->argument_counter_++, argument}); } -template <> void Tuner::AddArgumentScalar(const float argument) { +template <> void PUBLIC_API Tuner::AddArgumentScalar(const float argument) { pimpl->arguments_float_.push_back({pimpl->argument_counter_++, argument}); } -template <> void Tuner::AddArgumentScalar(const double argument) { +template <> void PUBLIC_API Tuner::AddArgumentScalar(const double argument) { pimpl->arguments_double_.push_back({pimpl->argument_counter_++, argument}); } -template <> void Tuner::AddArgumentScalar(const float2 argument) { +template <> void PUBLIC_API Tuner::AddArgumentScalar(const float2 argument) { pimpl->arguments_float2_.push_back({pimpl->argument_counter_++, argument}); } -template <> void Tuner::AddArgumentScalar(const double2 argument) { +template <> void PUBLIC_API Tuner::AddArgumentScalar(const double2 argument) { pimpl->arguments_double2_.push_back({pimpl->argument_counter_++, argument}); } From 0b90c0c36ae324d9981a1b2b66e2b8236872e14f Mon Sep 17 00:00:00 2001 From: CNugteren Date: Mon, 21 Mar 2016 20:57:35 +0100 Subject: [PATCH 6/8] Fixes for minor warnings under Visual Studio --- CMakeLists.txt | 2 +- samples/gemm/gemm.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 67069a2..56da1d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -78,7 +78,7 @@ endif() # C++ compiler settings if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") - set(FLAGS "/Ox") + set(FLAGS "/Ox /wd4715 /wd4996") else () set(FLAGS "-O3 -std=c++11") if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") diff --git a/samples/gemm/gemm.cc b/samples/gemm/gemm.cc index b580bfb..5a2f111 100644 --- a/samples/gemm/gemm.cc +++ b/samples/gemm/gemm.cc @@ -113,7 +113,7 @@ int main(int argc, char* argv[]) { // 3) Full search auto fraction = 1.0f/2048.0f; if (method == 0) { tuner.UseRandomSearch(fraction); } - else if (method == 1) { tuner.UseAnnealing(fraction, static_cast(search_param_1)); } + else if (method == 1) { tuner.UseAnnealing(fraction, static_cast(search_param_1)); } else if (method == 2) { tuner.UsePSO(fraction, static_cast(search_param_1), 0.4, 0.0, 0.4); } else { tuner.UseFullSearch(); } From 0dc2a995c07747e14cb8f868eca878a3dd568bf4 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Mon, 21 Mar 2016 22:27:54 +0100 Subject: [PATCH 7/8] Updated the README --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index efe206f..a446c67 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,8 @@ Several examples are included as part of the CLTune distribution. They illustrat The latter two optionally take command-line arguments. The first argument is an integer to select the platform (NVIDIA, AMD, etc.), the second argument is an integer for the device to run on, the third argument is an integer to select a search strategy (0=random, 1=annealing, 2=PSO, 3=fullsearch), and the fourth an optional search-strategy parameter. +Other examples are found in the [CLTuneDemos repository](https://github.com/williamjshipman/CLTuneDemos). CLTune is also used in the [CLBlast library](https://github.com/CNugteren/CLBlast). + Search strategies and machine-learning ------------- @@ -123,9 +125,7 @@ The samples ship with a basic header to convert the included OpenCL samples to C Development and tests ------------- -The CLTune project follows the Google C++ styleguide (with some exceptions) and uses a tab-size of two spaces and a max-width of 100 characters per line. It is furthermore based on practises from the third edition of Effective C++ and the first edition of Effective Modern C++. The project is licensed under the APACHE 2.0 license by SURFsara, (c) 2014. The contributing authors so far are: - -* Cedric Nugteren +The CLTune project follows the Google C++ styleguide (with some exceptions) and uses a tab-size of two spaces and a max-width of 100 characters per line. It is furthermore based on practises from the third edition of Effective C++ and the first edition of Effective Modern C++. The project is licensed under the APACHE 2.0 license by SURFsara, (c) 2014. CLTune is packaged with Catch 1.2.1 and a custom test suite. No external dependencies are needed. The tests will be compiled when providing the `TESTS=ON` option to CMake. Running the tests goes as follows: @@ -137,9 +137,11 @@ However, the more useful tests are the provided examples, since they include a v ./sample_gemm X Y -Citation +More information ------------- -If you refer to this work in a scientific publication, please cite the corresponding CLTune paper published in MCSoC '15: +A how-to-use CLTune tutorial written by William J Shipman is available on [his blog](https://williamjshipman.wordpress.com/2016/01/31/autotuning-opencl-kernels-cltune-on-windows-7/). + +More in-depth information and experimental results are also available in a scientific paper. If you refer to this work in a scientific publication, please cite the corresponding CLTune paper published in MCSoC '15: > Cedric Nugteren and Valeriu Codreanu. CLTune: A Generic Auto-Tuner for OpenCL Kernels. In: MCSoC: 9th International Symposium on Embedded Multicore/Many-core Systems-on-Chip. IEEE, 2015. From 58021489567babe0d0b301f41e2e860cb1e117b5 Mon Sep 17 00:00:00 2001 From: cnugteren Date: Wed, 30 Mar 2016 21:08:35 -0700 Subject: [PATCH 8/8] Updated to version 2.1.0 --- CHANGELOG | 2 +- CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index fcabe81..9445bcb 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,5 @@ -Development version (next release) +Version 2.1.0 - Added exports to be able to create a DLL on Windows (thanks to Marco Hutter) - Added command-line OpenCL platform selection in the examples (thanks to William J Shipman) diff --git a/CMakeLists.txt b/CMakeLists.txt index 56da1d9..03fb90e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,7 @@ cmake_minimum_required(VERSION 2.8.10) project("cltune" CXX) set(cltune_VERSION_MAJOR 2) -set(cltune_VERSION_MINOR 0) +set(cltune_VERSION_MINOR 1) set(cltune_VERSION_PATCH 0) # Options