diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
new file mode 100644
index 0000000000..0604ba0c84
--- /dev/null
+++ b/benchmarks/CMakeLists.txt
@@ -0,0 +1,38 @@
+cmake_minimum_required(VERSION 3.18)
+
+project(libcudacxx-benchmarks LANGUAGES CXX CUDA)
+
+set(CMAKE_BUILD_TYPE "RelWithDebInfo")
+
+set(GPU_ARCHS "70;72;75;80" CACHE STRING "List of GPU architectures to compile for.")
+
+message(STATUS "Enabled CUDA architectures:${GPU_ARCHS}")
+
+find_package(Threads REQUIRED)
+find_package(OpenMP)
+
+function(ConfigureHostBench BENCH_NAME BENCH_SRC)
+ add_executable("${BENCH_NAME}" "${BENCH_SRC}")
+ target_link_libraries("${BENCH_NAME}" PRIVATE Threads::Threads)
+
+ # TODO: Link against libcudaxx interface target instead
+ target_include_directories("${BENCH_NAME}" PRIVATE
+ "${CMAKE_CURRENT_SOURCE_DIR}/../include")
+ if(OpenMP_CXX_FOUND)
+ target_link_libraries("${BENCH_NAME}" PRIVATE OpenMP::OpenMP_CXX)
+ endif()
+endfunction(ConfigureHostBench)
+
+function(ConfigureDeviceBench BENCH_NAME BENCH_SRC)
+ add_executable("${BENCH_NAME}" "${BENCH_SRC}")
+ set_property(TARGET "${BENCH_NAME}" PROPERTY CUDA_ARCHITECTURES "${GPU_ARCHS}")
+ # TODO: Link against libcudaxx interface target instead
+ target_include_directories("${BENCH_NAME}" PRIVATE
+ "${CMAKE_CURRENT_SOURCE_DIR}/../include")
+ target_compile_options("${BENCH_NAME}" PRIVATE --expt-extended-lambda --expt-relaxed-constexpr)
+endfunction(ConfigureDeviceBench)
+
+ConfigureHostBench(concurrency_host concurrency.cpp)
+
+ConfigureDeviceBench(concurrency_device concurrency.cu)
+
diff --git a/samples/benchmark.cpp b/benchmarks/concurrency.cpp
similarity index 100%
rename from samples/benchmark.cpp
rename to benchmarks/concurrency.cpp
diff --git a/samples/benchmark.cu b/benchmarks/concurrency.cu
similarity index 100%
rename from samples/benchmark.cu
rename to benchmarks/concurrency.cu
diff --git a/docs/readme.md b/docs/readme.md
index 5cd8622844..f11b60e0d8 100644
--- a/docs/readme.md
+++ b/docs/readme.md
@@ -1,7 +1,7 @@
# libcu++: The C++ Standard Library for Your Entire System
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
new file mode 100644
index 0000000000..5347513325
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,53 @@
+cmake_minimum_required(VERSION 3.18)
+
+project(libcudacxx-examples LANGUAGES CXX CUDA)
+
+set(CMAKE_BUILD_TYPE "RelWithDebInfo")
+
+find_package(CUDAToolkit REQUIRED)
+find_package(Threads REQUIRED)
+find_package(OpenMP)
+
+# Download input files for the trie examples.
+if(NOT (EXISTS books))
+ execute_process(COMMAND mkdir books)
+ file(DOWNLOAD https://www.gutenberg.org/files/2600/2600-0.txt books/2600-0.txt SHOW_PROGRESS)
+ file(DOWNLOAD http://www.gutenberg.org/cache/epub/996/pg996.txt books/pg996.txt SHOW_PROGRESS)
+ file(DOWNLOAD http://www.gutenberg.org/cache/epub/55/pg55.txt books/pg55.txt SHOW_PROGRESS)
+ file(DOWNLOAD https://www.gutenberg.org/files/8800/8800.txt books/8800.txt SHOW_PROGRESS)
+ file(DOWNLOAD https://www.gutenberg.org/files/84/84-0.txt books/84-0.txt SHOW_PROGRESS)
+ file(DOWNLOAD http://www.gutenberg.org/cache/epub/6130/pg6130.txt books/pg6130.txt SHOW_PROGRESS)
+ file(DOWNLOAD http://www.gutenberg.org/cache/epub/1727/pg1727.txt books/pg1727.txt SHOW_PROGRESS)
+ file(DOWNLOAD https://www.gutenberg.org/files/2701/2701-0.txt books/2701-0.txt SHOW_PROGRESS)
+ file(DOWNLOAD https://www.gutenberg.org/files/35/35-0.txt books/35-0.txt SHOW_PROGRESS)
+ file(DOWNLOAD https://www.gutenberg.org/files/1342/1342-0.txt books/1342-0.txt SHOW_PROGRESS)
+endif()
+
+add_executable(trie_st trie_st.cpp)
+target_compile_features(trie_st PRIVATE cxx_std_11)
+
+add_executable(trie_mt trie_mt.cpp)
+target_compile_features(trie_mt PRIVATE cxx_std_11)
+target_link_libraries(trie_mt Threads::Threads)
+
+if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.1)
+ add_executable(trie_cuda trie.cu)
+ target_compile_features(trie_cuda PRIVATE cxx_std_11 cuda_std_11)
+ target_compile_options(trie_cuda PRIVATE --expt-relaxed-constexpr)
+ set_property(TARGET trie_cuda PROPERTY CUDA_ARCHITECTURES 70)
+else()
+ message(STATUS "Insufficient CUDA version. Skipping trie.cu example.")
+endif()
+
+if(CUDAToolkit_VERSION VERSION_GREATER 10.2)
+ add_executable(rtc rtc_example.cpp)
+ target_link_libraries(rtc CUDA::nvrtc)
+ target_compile_features(rtc PRIVATE cxx_std_11)
+else()
+ message(STATUS "Insufficient CUDA version. Skipping rtc_example.cpp example.")
+endif()
+
+add_executable(hash_map concurrent_hash_table.cu)
+target_compile_features(hash_map PRIVATE cxx_std_14 cuda_std_14)
+set_property(TARGET hash_map PROPERTY CUDA_ARCHITECTURES 70)
+target_compile_options(hash_map PRIVATE --expt-extended-lambda)
diff --git a/examples/concurrent_hash_table.cu b/examples/concurrent_hash_table.cu
new file mode 100644
index 0000000000..f41c6b692a
--- /dev/null
+++ b/examples/concurrent_hash_table.cu
@@ -0,0 +1,260 @@
+// Copyright (c) 2018-2020 NVIDIA Corporation
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// Released under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+
+#include
+#include
+
+// TODO: It would be great if this example could NOT depend on Thrust.
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+#include
+#include
+#include
+
+// TODO: This should be upstreamed and then removed.
+namespace thrust {
+
+using universal_raw_memory_resource =
+ thrust::system::cuda::detail::cuda_memory_resource<
+ thrust::system::cuda::detail::cudaMallocManaged, cudaFree, void*
+ >;
+
+template
+using universal_allocator =
+ thrust::mr::stateless_resource_allocator;
+
+template
+using universal_vector = thrust::device_vector>;
+
+} // thrust
+
+template <
+ typename Key, typename Value,
+ typename Hash = thrust::identity,
+ typename KeyEqual = thrust::equal_to,
+ typename MemoryResource = thrust::universal_raw_memory_resource
+>
+struct concurrent_hash_table {
+ // Elements transition from state_empty -> state_reserved ->
+ // state_filled; no other transitions are allowed.
+ enum state_type {
+ state_empty, state_reserved, state_filled
+ };
+
+ using key_type = Key;
+ using mapped_type = Value;
+ using size_type = cuda::std::uint64_t;
+
+ using key_allocator = thrust::mr::stateless_resource_allocator<
+ key_type, MemoryResource
+ >;
+ using mapped_allocator = thrust::mr::stateless_resource_allocator<
+ mapped_type, MemoryResource
+ >;
+ using state_allocator = thrust::mr::stateless_resource_allocator<
+ cuda::std::atomic, MemoryResource
+ >;
+
+ using key_iterator = typename key_allocator::pointer;
+ using value_iterator = typename mapped_allocator::pointer;
+ using state_iterator = typename state_allocator::pointer;
+
+ // This whole thing is silly and should be a lambda, or at least a private
+ // nested class, but alas, NVCC doesn't like that.
+ struct element_destroyer {
+ private:
+ size_type capacity_;
+ key_iterator keys_;
+ value_iterator values_;
+ state_iterator states_;
+
+ public:
+ __host__ __device__
+ element_destroyer(size_type capacity,
+ key_iterator keys,
+ value_iterator values,
+ state_iterator states)
+ : capacity_(capacity), keys_(keys), values_(values), states_(states)
+ {}
+
+ element_destroyer(element_destroyer const&) = default;
+
+ __host__ __device__
+ void operator()(size_type i) {
+ if (state_empty != states_[i]) {
+ (keys_ + i)->~key_type();
+ (values_ + i)->~mapped_type();
+ }
+ }
+ };
+
+private:
+ size_type capacity_;
+ key_iterator keys_;
+ value_iterator values_;
+ state_iterator states_;
+ Hash hash_;
+ KeyEqual key_equal_;
+
+public:
+ __host__
+ concurrent_hash_table(size_type capacity,
+ Hash hash = Hash(),
+ KeyEqual key_equal = KeyEqual())
+ : capacity_(capacity)
+ , keys_(key_allocator{}.allocate(capacity_))
+ , values_(mapped_allocator{}.allocate(capacity_))
+ , states_(state_allocator{}.allocate(capacity_))
+ , hash_(std::move(hash))
+ , key_equal_(std::move(key_equal))
+ {
+ thrust::uninitialized_fill(thrust::device,
+ states_, states_ + capacity_,
+ state_empty);
+ }
+
+ __host__
+ ~concurrent_hash_table()
+ {
+ thrust::for_each(thrust::device,
+ thrust::counting_iterator(0),
+ thrust::counting_iterator(capacity_),
+ element_destroyer(capacity_, keys_, values_, states_));
+ }
+
+ // TODO: Change return type to an enum with three possible values, succeeded,
+ // exists, and full.
+ template
+ __host__ __device__
+ thrust::pair
+ try_emplace(UKey&& key, Args&&... args) {
+ auto index{hash_(key) % capacity_};
+ // Linearly probe the storage space up to `capacity_` times; if we haven't
+ // succeeded by then, the container is full.
+ for (size_type i = 0; i < capacity_; ++i) {
+ state_type old = states_[index].load(cuda::std::memory_order_acquire);
+ while (old == state_empty) {
+ // As long as the state of this element is empty, attempt to set it to
+ // reserved.
+ if (states_[index].compare_exchange_weak(
+ old, state_reserved, cuda::std::memory_order_acq_rel))
+ {
+ // We succeeded; the element is now "locked" as reserved.
+ new (keys_ + index) key_type(std::forward(key));
+ new (values_ + index) mapped_type(std::forward(args)...);
+ states_[index].store(state_filled, cuda::std::memory_order_release);
+ return thrust::make_pair(values_ + index, true);
+ }
+ }
+ // If we are here, the element we are probing is not empty and we didn't
+ // fill it, so we need to wait for it to be filled.
+ while (state_filled != states_[index].load(cuda::std::memory_order_acquire))
+ ;
+ // Now we know that the element we are probing has been filled by someone
+ // else, so we check if our key is equal to it.
+ if (key_equal_(keys_[index], key))
+ // It is, so the element already exists.
+ return thrust::make_pair(values_ + index, false);
+ // Otherwise, the element isn't a match, so move on to the next element.
+ index = (index + 1) % capacity_;
+ }
+ // If we are here, the container is full.
+ return thrust::make_pair(value_iterator{}, false);
+ }
+
+ __host__ __device__
+ mapped_type& operator[](key_type const& key) {
+ return (*try_emplace(key).first);
+ }
+ __host__ __device__
+ mapped_type& operator[](key_type&& key) {
+ return (*try_emplace(std::move(key)).first);
+ }
+};
+
+template
+struct identity_modulo {
+private:
+ T const modulo_;
+
+public:
+ __host__ __device__
+ identity_modulo(T modulo) : modulo_(std::move(modulo)) {}
+
+ identity_modulo(identity_modulo const&) = default;
+
+ __host__ __device__
+ T operator()(T i) { return i % modulo_; }
+};
+
+int main() {
+ {
+ using table = concurrent_hash_table>;
+
+ auto freq = thrust::allocate_unique(thrust::universal_allocator{}, 8);
+
+ thrust::universal_vector input = [] {
+ thrust::universal_vector v(2048);
+ std::mt19937 gen(1337);
+ std::uniform_int_distribution dis(0, 7);
+ thrust::generate(v.begin(), v.end(), [&] { return dis(gen); });
+ return v;
+ }();
+
+ thrust::for_each(thrust::device, input.begin(), input.end(),
+ [freq = freq.get()] __device__ (int i) {
+ (*freq)[i].fetch_add(1, cuda::std::memory_order_relaxed);
+ }
+ );
+
+ thrust::host_vector gold(8);
+ thrust::for_each(input.begin(), input.end(), [&] (int i) { ++gold[i]; });
+
+ for (cuda::std::uint64_t i = 0; i < 8; ++i)
+ std::cout << "i: " << i
+ << " gold: " << gold[i]
+ << " observed: " << (*freq)[i] << "\n";
+
+ assert(cudaSuccess == cudaDeviceSynchronize());
+ }
+ {
+ using table = concurrent_hash_table, identity_modulo>;
+
+ auto freq = thrust::allocate_unique(thrust::universal_allocator{}, 8, identity_modulo(4));
+
+ thrust::universal_vector input = [] {
+ thrust::universal_vector v(2048);
+ std::mt19937 gen(1337);
+ std::uniform_int_distribution dis(0, 7);
+ thrust::generate(v.begin(), v.end(), [&] { return dis(gen); });
+ return v;
+ }();
+
+ thrust::for_each(thrust::device, input.begin(), input.end(),
+ [freq = freq.get()] __device__ (int i) {
+ (*freq)[i].fetch_add(1, cuda::std::memory_order_relaxed);
+ }
+ );
+
+ thrust::host_vector gold(8);
+ thrust::for_each(input.begin(), input.end(), [&] (int i) { ++gold[i]; });
+
+ for (cuda::std::uint64_t i = 0; i < 8; ++i)
+ std::cout << "i: " << i
+ << " gold: " << gold[i]
+ << " observed: " << (*freq)[i] << "\n";
+
+ assert(cudaSuccess == cudaDeviceSynchronize());
+ }
+}
+
diff --git a/samples/rtc_example.cpp b/examples/rtc_example.cpp
similarity index 81%
rename from samples/rtc_example.cpp
rename to examples/rtc_example.cpp
index 85fb047d3a..b97376ae4e 100644
--- a/samples/rtc_example.cpp
+++ b/examples/rtc_example.cpp
@@ -39,19 +39,19 @@ THE SOFTWARE.
const char *trie =
R"xxx(
-#include
-#include
-#include
+#include
+#include
+#include
template static constexpr T min(T a, T b) { return a < b ? a : b; }
struct trie {
struct ref {
- simt::std::atomic ptr = ATOMIC_VAR_INIT(nullptr);
+ cuda::std::atomic ptr = ATOMIC_VAR_INIT(nullptr);
// the flag will protect against multiple pointer updates
- simt::std::atomic_flag flag = ATOMIC_FLAG_INIT;
+ cuda::std::atomic_flag flag = ATOMIC_FLAG_INIT;
} next[26];
- simt::std::atomic count = ATOMIC_VAR_INIT(0);
+ cuda::std::atomic count = ATOMIC_VAR_INIT(0);
};
__host__ __device__
int index_of(char c) {
@@ -61,7 +61,7 @@ int index_of(char c) {
};
__host__ __device__
void make_trie(/* trie to insert word counts into */ trie& root,
- /* bump allocator to get new nodes*/ simt::std::atomic& bump,
+ /* bump allocator to get new nodes*/ cuda::std::atomic& bump,
/* input */ const char* begin, const char* end,
/* thread this invocation is for */ unsigned index,
/* how many threads there are */ unsigned domain) {
@@ -80,7 +80,7 @@ void make_trie(/* trie to insert word counts into */ trie& root,
auto const index = off >= size ? -1 : index_of(c);
if(index == -1) {
if(n != &root) {
- n->count.fetch_add(1, simt::std::memory_order_relaxed);
+ n->count.fetch_add(1, cuda::std::memory_order_relaxed);
n = &root;
}
//end of last word?
@@ -89,20 +89,20 @@ void make_trie(/* trie to insert word counts into */ trie& root,
else
continue;
}
- if(n->next[index].ptr.load(simt::std::memory_order_acquire) == nullptr) {
- if(n->next[index].flag.test_and_set(simt::std::memory_order_relaxed))
- while(n->next[index].ptr.load(simt::std::memory_order_acquire) == nullptr);
+ if(n->next[index].ptr.load(cuda::std::memory_order_acquire) == nullptr) {
+ if(n->next[index].flag.test_and_set(cuda::std::memory_order_relaxed))
+ while(n->next[index].ptr.load(cuda::std::memory_order_acquire) == nullptr);
else {
- auto next = bump.fetch_add(1, simt::std::memory_order_relaxed);
- n->next[index].ptr.store(next, simt::std::memory_order_release);
+ auto next = bump.fetch_add(1, cuda::std::memory_order_relaxed);
+ n->next[index].ptr.store(next, cuda::std::memory_order_release);
}
}
- n = n->next[index].ptr.load(simt::std::memory_order_relaxed);
+ n = n->next[index].ptr.load(cuda::std::memory_order_relaxed);
}
}
__global__ // __launch_bounds__(1024, 1)
-void call_make_trie(trie* t, simt::std::atomic* bump, const char* begin, const char* end) {
+void call_make_trie(trie* t, cuda::std::atomic* bump, const char* begin, const char* end) {
auto const index = blockDim.x * blockIdx.x + threadIdx.x;
auto const domain = gridDim.x * blockDim.x;
@@ -127,15 +127,13 @@ int main(int argc, char *argv[])
NULL)); // includeNames
const char *opts[] = {"-std=c++11",
- "-I/usr/include/linux",
- "-I/usr/include/c++/7.3.0",
"-I/usr/local/cuda/include",
- "-I/home/olivier/freestanding/include",
+ "-I../../include",
"--gpu-architecture=compute_70",
"--relocatable-device-code=true",
"-default-device"};
nvrtcResult compileResult = nvrtcCompileProgram(prog, // prog
- 8, // numOptions
+ 6, // numOptions
opts); // options
// Obtain compilation log from the program.
size_t logSize;
diff --git a/samples/trie.cu b/examples/trie.cu
similarity index 95%
rename from samples/trie.cu
rename to examples/trie.cu
index 9abef0763e..ba8f99b47e 100644
--- a/samples/trie.cu
+++ b/examples/trie.cu
@@ -179,13 +179,18 @@ int main() {
std::basic_string, managed_allocator> input;
char const* files[] = {
- "2600-0.txt", "2701-0.txt", "35-0.txt", "84-0.txt", "8800.txt",
- "pg1727.txt", "pg55.txt", "pg6130.txt", "pg996.txt", "1342-0.txt"
+ "books/2600-0.txt", "books/2701-0.txt", "books/35-0.txt", "books/84-0.txt", "books/8800.txt",
+ "books/pg1727.txt", "books/pg55.txt", "books/pg6130.txt", "books/pg996.txt", "books/1342-0.txt"
};
for(auto* ptr : files) {
+ std::cout << ptr << std::endl;
auto const cur = input.size();
std::ifstream in(ptr);
+ if(in.fail()) {
+ std::cerr << "Failed to open file: " << ptr << std::endl;
+ return -1;
+ }
in.seekg(0, std::ios_base::end);
auto const pos = in.tellg();
input.resize(cur + pos);
diff --git a/samples/trie_mt.cpp b/examples/trie_mt.cpp
similarity index 93%
rename from samples/trie_mt.cpp
rename to examples/trie_mt.cpp
index e59407b0c5..3dc7fd8f20 100644
--- a/samples/trie_mt.cpp
+++ b/examples/trie_mt.cpp
@@ -120,13 +120,18 @@ int main() {
std::string input;
char const* files[] = {
- "2600-0.txt", "2701-0.txt", "35-0.txt", "84-0.txt", "8800.txt",
- "pg1727.txt", "pg55.txt", "pg6130.txt", "pg996.txt", "1342-0.txt"
+ "books/2600-0.txt", "books/2701-0.txt", "books/35-0.txt", "books/84-0.txt", "books/8800.txt",
+ "books/pg1727.txt", "books/pg55.txt", "books/pg6130.txt", "books/pg996.txt", "books/1342-0.txt"
};
for(auto* ptr : files) {
+ std::cout << ptr << std::endl;
auto const cur = input.size();
std::ifstream in(ptr);
+ if(in.fail()) {
+ std::cerr << "Failed to open file: " << ptr << std::endl;
+ return -1;
+ }
in.seekg(0, std::ios_base::end);
auto const pos = in.tellg();
input.resize(cur + pos);
diff --git a/samples/trie_st.cpp b/examples/trie_st.cpp
similarity index 89%
rename from samples/trie_st.cpp
rename to examples/trie_st.cpp
index f0fb4f1c92..678c71e252 100644
--- a/samples/trie_st.cpp
+++ b/examples/trie_st.cpp
@@ -91,13 +91,18 @@ int main() {
std::string input;
char const* files[] = {
- "2600-0.txt", "2701-0.txt", "35-0.txt", "84-0.txt", "8800.txt",
- "pg1727.txt", "pg55.txt", "pg6130.txt", "pg996.txt", "1342-0.txt"
+ "books/2600-0.txt", "books/2701-0.txt", "books/35-0.txt", "books/84-0.txt", "books/8800.txt",
+ "books/pg1727.txt", "books/pg55.txt", "books/pg6130.txt", "books/pg996.txt", "books/1342-0.txt"
};
for(auto* ptr : files) {
+ std::cout << ptr << std::endl;
auto const cur = input.size();
std::ifstream in(ptr);
+ if(in.fail()) {
+ std::cerr << "Failed to open file: " << ptr << std::endl;
+ return -1;
+ }
in.seekg(0, std::ios_base::end);
auto const pos = in.tellg();
input.resize(cur + pos);
diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt
deleted file mode 100644
index 75fd8fab73..0000000000
--- a/samples/CMakeLists.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-cmake_minimum_required(VERSION 3.8)
-project(libcudacxx-samples CXX)
-
-if (NOT "${CMAKE_CUDA_HOST_COMPILER}" STREQUAL "")
- unset(CMAKE_CUDA_HOST_COMPILER CACHE)
- message(FATAL_ERROR "Thrust tests and examples require the C++ compiler"
- " and the CUDA host compiler to be the same; to set this compiler, please"
- " use the CMAKE_CXX_COMPILER variable, not the CMAKE_CUDA_HOST_COMPILER"
- " variable.")
- endif ()
-set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
-enable_language(CUDA)
-
-set(CMAKE_BUILD_TYPE "RelWithDebInfo")
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --expt-extended-lambda")
-
-set(HIGHEST_COMPUTE_ARCH 80)
-set(KNOWN_COMPUTE_ARCHS 70 72 75 80)
-
-option(DISABLE_ARCH_BY_DEFAULT "If ON, then all CUDA architectures are disabled on the initial CMake run." OFF)
-set(OPTION_INIT ON)
-if (DISABLE_ARCH_BY_DEFAULT)
- set(OPTION_INIT OFF)
-endif ()
-if (NOT ${HIGHEST_COMPUTE_ARCH} IN_LIST KNOWN_COMPUTE_ARCHS)
- message(FATAL_ERROR "When changing the highest compute version, don't forget to add it to the list!")
-endif ()
-
-foreach (COMPUTE_ARCH IN LISTS KNOWN_COMPUTE_ARCHS)
- option(ENABLE_COMPUTE_${COMPUTE_ARCH} "Enable code generation for tests for sm_${COMPUTE_ARCH}" ${OPTION_INIT})
- if (ENABLE_COMPUTE_${COMPUTE_ARCH})
- set(COMPUTE_ARCHS "${COMPUTE_ARCHS} ${COMPUTE_ARCH}")
- set(COMPUTE_MESSAGE "${COMPUTE_MESSAGE} sm_${COMPUTE_ARCH}")
- set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${COMPUTE_ARCH},code=sm_${COMPUTE_ARCH}")
- endif ()
-endforeach ()
-
-option(ENABLE_COMPUTE_FUTURE "Enable code generation for tests for compute_${HIGHEST_COMPUTE_ARCH}" ${OPTION_INIT})
-if (ENABLE_COMPUTE_FUTURE)
- set(COMPUTE_MESSAGE "${COMPUTE_MESSAGE} compute_${HIGHEST_COMPUTE_ARCH}")
- set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${HIGHEST_COMPUTE_ARCH},code=compute_${HIGHEST_COMPUTE_ARCH}")
-endif ()
-
-message(STATUS "Enabled CUDA architectures:${COMPUTE_MESSAGE}")
-
-include_directories(${CMAKE_SOURCE_DIR}/../include)
-
-find_package(Threads REQUIRED)
-find_package(OpenMP)
-
-add_executable(benchmark_host benchmark.cpp)
-
-target_link_libraries(benchmark_host PUBLIC Threads::Threads)
-if(OpenMP_CXX_FOUND)
- target_link_libraries(benchmark_host PUBLIC OpenMP::OpenMP_CXX)
-endif()
-
-add_executable(benchmark_device benchmark.cu)
diff --git a/samples/books.sh b/samples/books.sh
deleted file mode 100755
index 434119267d..0000000000
--- a/samples/books.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/env bash
-curl -O -# https://www.gutenberg.org/files/2600/2600-0.txt
-curl -O -# http://www.gutenberg.org/cache/epub/996/pg996.txt
-curl -O -# http://www.gutenberg.org/cache/epub/55/pg55.txt
-curl -O -# https://www.gutenberg.org/files/8800/8800.txt
-curl -O -# https://www.gutenberg.org/files/84/84-0.txt
-curl -O -# http://www.gutenberg.org/cache/epub/6130/pg6130.txt
-curl -O -# http://www.gutenberg.org/cache/epub/1727/pg1727.txt
-curl -O -# https://www.gutenberg.org/files/2701/2701-0.txt
-curl -O -# https://www.gutenberg.org/files/35/35-0.txt
-curl -O -# https://www.gutenberg.org/files/1342/1342-0.txt
diff --git a/samples/linux.sh b/samples/linux.sh
deleted file mode 100755
index abde3255b3..0000000000
--- a/samples/linux.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env bash
-g++ -std=c++11 trie_st.cpp -O2 -o trie_st
-g++ -std=c++11 trie_mt.cpp -O2 -o trie_mt -pthread
-nvcc -I../include -arch=compute_70 -std=c++11 -O2 trie.cu --expt-relaxed-constexpr -o trie
-g++ -I../include -std=c++14 benchmark.cpp -O2 -lpthread -o benchmark
-nvcc -I../include -arch=compute_70 -std=c++14 benchmark.cu -O2 -lpthread --expt-relaxed-constexpr --expt-extended-lambda -o benchmark
diff --git a/samples/linux_clang.sh b/samples/linux_clang.sh
deleted file mode 100755
index 852f12f504..0000000000
--- a/samples/linux_clang.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/env bash
-clang++-7 -I../include --cuda-gpu-arch=sm_70 -std=c++11 -O2 trie.cu -L/usr/local/cuda/lib64/ -lcudart_static -pthread -ldl -lrt -o trie
diff --git a/samples/linux_rtc.sh b/samples/linux_rtc.sh
deleted file mode 100755
index 0d9e08716e..0000000000
--- a/samples/linux_rtc.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/env bash
-nvcc rtc_example.cpp -lnvrtc -o rtc; ./rtc
diff --git a/samples/openmp.sh b/samples/openmp.sh
deleted file mode 100755
index 2950adf1d9..0000000000
--- a/samples/openmp.sh
+++ /dev/null
@@ -1 +0,0 @@
-clang++ -D_LIBCPP_BARRIER_BUTTERFLY -I../include -fopenmp=libomp -L../../llvm-project/build/lib/ -std=c++11 -O2 benchmark.cpp -lstdc++ -lpthread -lm -o benchmark
diff --git a/samples/windows.bat b/samples/windows.bat
deleted file mode 100644
index 176a7a01a3..0000000000
--- a/samples/windows.bat
+++ /dev/null
@@ -1,4 +0,0 @@
-call vcvars64.bat
-cl /EHsc trie_st.cpp /O2
-cl /EHsc trie_mt.cpp /O2
-nvcc -I../include -arch=compute_70 -O2 trie.cu --expt-relaxed-constexpr -Xcompiler /Zc:__cplusplus -o trie