diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt new file mode 100644 index 0000000000..0604ba0c84 --- /dev/null +++ b/benchmarks/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required(VERSION 3.18) + +project(libcudacxx-benchmarks LANGUAGES CXX CUDA) + +set(CMAKE_BUILD_TYPE "RelWithDebInfo") + +set(GPU_ARCHS "70;72;75;80" CACHE STRING "List of GPU architectures to compile for.") + +message(STATUS "Enabled CUDA architectures:${GPU_ARCHS}") + +find_package(Threads REQUIRED) +find_package(OpenMP) + +function(ConfigureHostBench BENCH_NAME BENCH_SRC) + add_executable("${BENCH_NAME}" "${BENCH_SRC}") + target_link_libraries("${BENCH_NAME}" PRIVATE Threads::Threads) + + # TODO: Link against libcudaxx interface target instead + target_include_directories("${BENCH_NAME}" PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../include") + if(OpenMP_CXX_FOUND) + target_link_libraries("${BENCH_NAME}" PRIVATE OpenMP::OpenMP_CXX) + endif() +endfunction(ConfigureHostBench) + +function(ConfigureDeviceBench BENCH_NAME BENCH_SRC) + add_executable("${BENCH_NAME}" "${BENCH_SRC}") + set_property(TARGET "${BENCH_NAME}" PROPERTY CUDA_ARCHITECTURES "${GPU_ARCHS}") + # TODO: Link against libcudaxx interface target instead + target_include_directories("${BENCH_NAME}" PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../include") + target_compile_options("${BENCH_NAME}" PRIVATE --expt-extended-lambda --expt-relaxed-constexpr) +endfunction(ConfigureDeviceBench) + +ConfigureHostBench(concurrency_host concurrency.cpp) + +ConfigureDeviceBench(concurrency_device concurrency.cu) + diff --git a/samples/benchmark.cpp b/benchmarks/concurrency.cpp similarity index 100% rename from samples/benchmark.cpp rename to benchmarks/concurrency.cpp diff --git a/samples/benchmark.cu b/benchmarks/concurrency.cu similarity index 100% rename from samples/benchmark.cu rename to benchmarks/concurrency.cu diff --git a/docs/readme.md b/docs/readme.md index 5cd8622844..f11b60e0d8 100644 --- a/docs/readme.md +++ b/docs/readme.md @@ -1,7 +1,7 @@ # libcu++: The C++ Standard Library for Your Entire System - +
ExamplesExamples Godbolt Documentation
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 0000000000..5347513325 --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,53 @@ +cmake_minimum_required(VERSION 3.18) + +project(libcudacxx-examples LANGUAGES CXX CUDA) + +set(CMAKE_BUILD_TYPE "RelWithDebInfo") + +find_package(CUDAToolkit REQUIRED) +find_package(Threads REQUIRED) +find_package(OpenMP) + +# Download input files for the trie examples. +if(NOT (EXISTS books)) + execute_process(COMMAND mkdir books) + file(DOWNLOAD https://www.gutenberg.org/files/2600/2600-0.txt books/2600-0.txt SHOW_PROGRESS) + file(DOWNLOAD http://www.gutenberg.org/cache/epub/996/pg996.txt books/pg996.txt SHOW_PROGRESS) + file(DOWNLOAD http://www.gutenberg.org/cache/epub/55/pg55.txt books/pg55.txt SHOW_PROGRESS) + file(DOWNLOAD https://www.gutenberg.org/files/8800/8800.txt books/8800.txt SHOW_PROGRESS) + file(DOWNLOAD https://www.gutenberg.org/files/84/84-0.txt books/84-0.txt SHOW_PROGRESS) + file(DOWNLOAD http://www.gutenberg.org/cache/epub/6130/pg6130.txt books/pg6130.txt SHOW_PROGRESS) + file(DOWNLOAD http://www.gutenberg.org/cache/epub/1727/pg1727.txt books/pg1727.txt SHOW_PROGRESS) + file(DOWNLOAD https://www.gutenberg.org/files/2701/2701-0.txt books/2701-0.txt SHOW_PROGRESS) + file(DOWNLOAD https://www.gutenberg.org/files/35/35-0.txt books/35-0.txt SHOW_PROGRESS) + file(DOWNLOAD https://www.gutenberg.org/files/1342/1342-0.txt books/1342-0.txt SHOW_PROGRESS) +endif() + +add_executable(trie_st trie_st.cpp) +target_compile_features(trie_st PRIVATE cxx_std_11) + +add_executable(trie_mt trie_mt.cpp) +target_compile_features(trie_mt PRIVATE cxx_std_11) +target_link_libraries(trie_mt Threads::Threads) + +if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.1) + add_executable(trie_cuda trie.cu) + target_compile_features(trie_cuda PRIVATE cxx_std_11 cuda_std_11) + target_compile_options(trie_cuda PRIVATE --expt-relaxed-constexpr) + set_property(TARGET trie_cuda PROPERTY CUDA_ARCHITECTURES 70) +else() + message(STATUS "Insufficient CUDA version. Skipping trie.cu example.") +endif() + +if(CUDAToolkit_VERSION VERSION_GREATER 10.2) + add_executable(rtc rtc_example.cpp) + target_link_libraries(rtc CUDA::nvrtc) + target_compile_features(rtc PRIVATE cxx_std_11) +else() + message(STATUS "Insufficient CUDA version. Skipping rtc_example.cpp example.") +endif() + +add_executable(hash_map concurrent_hash_table.cu) +target_compile_features(hash_map PRIVATE cxx_std_14 cuda_std_14) +set_property(TARGET hash_map PROPERTY CUDA_ARCHITECTURES 70) +target_compile_options(hash_map PRIVATE --expt-extended-lambda) diff --git a/examples/concurrent_hash_table.cu b/examples/concurrent_hash_table.cu new file mode 100644 index 0000000000..f41c6b692a --- /dev/null +++ b/examples/concurrent_hash_table.cu @@ -0,0 +1,260 @@ +// Copyright (c) 2018-2020 NVIDIA Corporation +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Released under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + +#include +#include + +// TODO: It would be great if this example could NOT depend on Thrust. +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +// TODO: This should be upstreamed and then removed. +namespace thrust { + +using universal_raw_memory_resource = + thrust::system::cuda::detail::cuda_memory_resource< + thrust::system::cuda::detail::cudaMallocManaged, cudaFree, void* + >; + +template +using universal_allocator = + thrust::mr::stateless_resource_allocator; + +template +using universal_vector = thrust::device_vector>; + +} // thrust + +template < + typename Key, typename Value, + typename Hash = thrust::identity, + typename KeyEqual = thrust::equal_to, + typename MemoryResource = thrust::universal_raw_memory_resource +> +struct concurrent_hash_table { + // Elements transition from state_empty -> state_reserved -> + // state_filled; no other transitions are allowed. + enum state_type { + state_empty, state_reserved, state_filled + }; + + using key_type = Key; + using mapped_type = Value; + using size_type = cuda::std::uint64_t; + + using key_allocator = thrust::mr::stateless_resource_allocator< + key_type, MemoryResource + >; + using mapped_allocator = thrust::mr::stateless_resource_allocator< + mapped_type, MemoryResource + >; + using state_allocator = thrust::mr::stateless_resource_allocator< + cuda::std::atomic, MemoryResource + >; + + using key_iterator = typename key_allocator::pointer; + using value_iterator = typename mapped_allocator::pointer; + using state_iterator = typename state_allocator::pointer; + + // This whole thing is silly and should be a lambda, or at least a private + // nested class, but alas, NVCC doesn't like that. + struct element_destroyer { + private: + size_type capacity_; + key_iterator keys_; + value_iterator values_; + state_iterator states_; + + public: + __host__ __device__ + element_destroyer(size_type capacity, + key_iterator keys, + value_iterator values, + state_iterator states) + : capacity_(capacity), keys_(keys), values_(values), states_(states) + {} + + element_destroyer(element_destroyer const&) = default; + + __host__ __device__ + void operator()(size_type i) { + if (state_empty != states_[i]) { + (keys_ + i)->~key_type(); + (values_ + i)->~mapped_type(); + } + } + }; + +private: + size_type capacity_; + key_iterator keys_; + value_iterator values_; + state_iterator states_; + Hash hash_; + KeyEqual key_equal_; + +public: + __host__ + concurrent_hash_table(size_type capacity, + Hash hash = Hash(), + KeyEqual key_equal = KeyEqual()) + : capacity_(capacity) + , keys_(key_allocator{}.allocate(capacity_)) + , values_(mapped_allocator{}.allocate(capacity_)) + , states_(state_allocator{}.allocate(capacity_)) + , hash_(std::move(hash)) + , key_equal_(std::move(key_equal)) + { + thrust::uninitialized_fill(thrust::device, + states_, states_ + capacity_, + state_empty); + } + + __host__ + ~concurrent_hash_table() + { + thrust::for_each(thrust::device, + thrust::counting_iterator(0), + thrust::counting_iterator(capacity_), + element_destroyer(capacity_, keys_, values_, states_)); + } + + // TODO: Change return type to an enum with three possible values, succeeded, + // exists, and full. + template + __host__ __device__ + thrust::pair + try_emplace(UKey&& key, Args&&... args) { + auto index{hash_(key) % capacity_}; + // Linearly probe the storage space up to `capacity_` times; if we haven't + // succeeded by then, the container is full. + for (size_type i = 0; i < capacity_; ++i) { + state_type old = states_[index].load(cuda::std::memory_order_acquire); + while (old == state_empty) { + // As long as the state of this element is empty, attempt to set it to + // reserved. + if (states_[index].compare_exchange_weak( + old, state_reserved, cuda::std::memory_order_acq_rel)) + { + // We succeeded; the element is now "locked" as reserved. + new (keys_ + index) key_type(std::forward(key)); + new (values_ + index) mapped_type(std::forward(args)...); + states_[index].store(state_filled, cuda::std::memory_order_release); + return thrust::make_pair(values_ + index, true); + } + } + // If we are here, the element we are probing is not empty and we didn't + // fill it, so we need to wait for it to be filled. + while (state_filled != states_[index].load(cuda::std::memory_order_acquire)) + ; + // Now we know that the element we are probing has been filled by someone + // else, so we check if our key is equal to it. + if (key_equal_(keys_[index], key)) + // It is, so the element already exists. + return thrust::make_pair(values_ + index, false); + // Otherwise, the element isn't a match, so move on to the next element. + index = (index + 1) % capacity_; + } + // If we are here, the container is full. + return thrust::make_pair(value_iterator{}, false); + } + + __host__ __device__ + mapped_type& operator[](key_type const& key) { + return (*try_emplace(key).first); + } + __host__ __device__ + mapped_type& operator[](key_type&& key) { + return (*try_emplace(std::move(key)).first); + } +}; + +template +struct identity_modulo { +private: + T const modulo_; + +public: + __host__ __device__ + identity_modulo(T modulo) : modulo_(std::move(modulo)) {} + + identity_modulo(identity_modulo const&) = default; + + __host__ __device__ + T operator()(T i) { return i % modulo_; } +}; + +int main() { + { + using table = concurrent_hash_table>; + + auto freq = thrust::allocate_unique(thrust::universal_allocator
{}, 8); + + thrust::universal_vector input = [] { + thrust::universal_vector v(2048); + std::mt19937 gen(1337); + std::uniform_int_distribution dis(0, 7); + thrust::generate(v.begin(), v.end(), [&] { return dis(gen); }); + return v; + }(); + + thrust::for_each(thrust::device, input.begin(), input.end(), + [freq = freq.get()] __device__ (int i) { + (*freq)[i].fetch_add(1, cuda::std::memory_order_relaxed); + } + ); + + thrust::host_vector gold(8); + thrust::for_each(input.begin(), input.end(), [&] (int i) { ++gold[i]; }); + + for (cuda::std::uint64_t i = 0; i < 8; ++i) + std::cout << "i: " << i + << " gold: " << gold[i] + << " observed: " << (*freq)[i] << "\n"; + + assert(cudaSuccess == cudaDeviceSynchronize()); + } + { + using table = concurrent_hash_table, identity_modulo>; + + auto freq = thrust::allocate_unique
(thrust::universal_allocator
{}, 8, identity_modulo(4)); + + thrust::universal_vector input = [] { + thrust::universal_vector v(2048); + std::mt19937 gen(1337); + std::uniform_int_distribution dis(0, 7); + thrust::generate(v.begin(), v.end(), [&] { return dis(gen); }); + return v; + }(); + + thrust::for_each(thrust::device, input.begin(), input.end(), + [freq = freq.get()] __device__ (int i) { + (*freq)[i].fetch_add(1, cuda::std::memory_order_relaxed); + } + ); + + thrust::host_vector gold(8); + thrust::for_each(input.begin(), input.end(), [&] (int i) { ++gold[i]; }); + + for (cuda::std::uint64_t i = 0; i < 8; ++i) + std::cout << "i: " << i + << " gold: " << gold[i] + << " observed: " << (*freq)[i] << "\n"; + + assert(cudaSuccess == cudaDeviceSynchronize()); + } +} + diff --git a/samples/rtc_example.cpp b/examples/rtc_example.cpp similarity index 81% rename from samples/rtc_example.cpp rename to examples/rtc_example.cpp index 85fb047d3a..b97376ae4e 100644 --- a/samples/rtc_example.cpp +++ b/examples/rtc_example.cpp @@ -39,19 +39,19 @@ THE SOFTWARE. const char *trie = R"xxx( -#include -#include -#include +#include +#include +#include template static constexpr T min(T a, T b) { return a < b ? a : b; } struct trie { struct ref { - simt::std::atomic ptr = ATOMIC_VAR_INIT(nullptr); + cuda::std::atomic ptr = ATOMIC_VAR_INIT(nullptr); // the flag will protect against multiple pointer updates - simt::std::atomic_flag flag = ATOMIC_FLAG_INIT; + cuda::std::atomic_flag flag = ATOMIC_FLAG_INIT; } next[26]; - simt::std::atomic count = ATOMIC_VAR_INIT(0); + cuda::std::atomic count = ATOMIC_VAR_INIT(0); }; __host__ __device__ int index_of(char c) { @@ -61,7 +61,7 @@ int index_of(char c) { }; __host__ __device__ void make_trie(/* trie to insert word counts into */ trie& root, - /* bump allocator to get new nodes*/ simt::std::atomic& bump, + /* bump allocator to get new nodes*/ cuda::std::atomic& bump, /* input */ const char* begin, const char* end, /* thread this invocation is for */ unsigned index, /* how many threads there are */ unsigned domain) { @@ -80,7 +80,7 @@ void make_trie(/* trie to insert word counts into */ trie& root, auto const index = off >= size ? -1 : index_of(c); if(index == -1) { if(n != &root) { - n->count.fetch_add(1, simt::std::memory_order_relaxed); + n->count.fetch_add(1, cuda::std::memory_order_relaxed); n = &root; } //end of last word? @@ -89,20 +89,20 @@ void make_trie(/* trie to insert word counts into */ trie& root, else continue; } - if(n->next[index].ptr.load(simt::std::memory_order_acquire) == nullptr) { - if(n->next[index].flag.test_and_set(simt::std::memory_order_relaxed)) - while(n->next[index].ptr.load(simt::std::memory_order_acquire) == nullptr); + if(n->next[index].ptr.load(cuda::std::memory_order_acquire) == nullptr) { + if(n->next[index].flag.test_and_set(cuda::std::memory_order_relaxed)) + while(n->next[index].ptr.load(cuda::std::memory_order_acquire) == nullptr); else { - auto next = bump.fetch_add(1, simt::std::memory_order_relaxed); - n->next[index].ptr.store(next, simt::std::memory_order_release); + auto next = bump.fetch_add(1, cuda::std::memory_order_relaxed); + n->next[index].ptr.store(next, cuda::std::memory_order_release); } } - n = n->next[index].ptr.load(simt::std::memory_order_relaxed); + n = n->next[index].ptr.load(cuda::std::memory_order_relaxed); } } __global__ // __launch_bounds__(1024, 1) -void call_make_trie(trie* t, simt::std::atomic* bump, const char* begin, const char* end) { +void call_make_trie(trie* t, cuda::std::atomic* bump, const char* begin, const char* end) { auto const index = blockDim.x * blockIdx.x + threadIdx.x; auto const domain = gridDim.x * blockDim.x; @@ -127,15 +127,13 @@ int main(int argc, char *argv[]) NULL)); // includeNames const char *opts[] = {"-std=c++11", - "-I/usr/include/linux", - "-I/usr/include/c++/7.3.0", "-I/usr/local/cuda/include", - "-I/home/olivier/freestanding/include", + "-I../../include", "--gpu-architecture=compute_70", "--relocatable-device-code=true", "-default-device"}; nvrtcResult compileResult = nvrtcCompileProgram(prog, // prog - 8, // numOptions + 6, // numOptions opts); // options // Obtain compilation log from the program. size_t logSize; diff --git a/samples/trie.cu b/examples/trie.cu similarity index 95% rename from samples/trie.cu rename to examples/trie.cu index 9abef0763e..ba8f99b47e 100644 --- a/samples/trie.cu +++ b/examples/trie.cu @@ -179,13 +179,18 @@ int main() { std::basic_string, managed_allocator> input; char const* files[] = { - "2600-0.txt", "2701-0.txt", "35-0.txt", "84-0.txt", "8800.txt", - "pg1727.txt", "pg55.txt", "pg6130.txt", "pg996.txt", "1342-0.txt" + "books/2600-0.txt", "books/2701-0.txt", "books/35-0.txt", "books/84-0.txt", "books/8800.txt", + "books/pg1727.txt", "books/pg55.txt", "books/pg6130.txt", "books/pg996.txt", "books/1342-0.txt" }; for(auto* ptr : files) { + std::cout << ptr << std::endl; auto const cur = input.size(); std::ifstream in(ptr); + if(in.fail()) { + std::cerr << "Failed to open file: " << ptr << std::endl; + return -1; + } in.seekg(0, std::ios_base::end); auto const pos = in.tellg(); input.resize(cur + pos); diff --git a/samples/trie_mt.cpp b/examples/trie_mt.cpp similarity index 93% rename from samples/trie_mt.cpp rename to examples/trie_mt.cpp index e59407b0c5..3dc7fd8f20 100644 --- a/samples/trie_mt.cpp +++ b/examples/trie_mt.cpp @@ -120,13 +120,18 @@ int main() { std::string input; char const* files[] = { - "2600-0.txt", "2701-0.txt", "35-0.txt", "84-0.txt", "8800.txt", - "pg1727.txt", "pg55.txt", "pg6130.txt", "pg996.txt", "1342-0.txt" + "books/2600-0.txt", "books/2701-0.txt", "books/35-0.txt", "books/84-0.txt", "books/8800.txt", + "books/pg1727.txt", "books/pg55.txt", "books/pg6130.txt", "books/pg996.txt", "books/1342-0.txt" }; for(auto* ptr : files) { + std::cout << ptr << std::endl; auto const cur = input.size(); std::ifstream in(ptr); + if(in.fail()) { + std::cerr << "Failed to open file: " << ptr << std::endl; + return -1; + } in.seekg(0, std::ios_base::end); auto const pos = in.tellg(); input.resize(cur + pos); diff --git a/samples/trie_st.cpp b/examples/trie_st.cpp similarity index 89% rename from samples/trie_st.cpp rename to examples/trie_st.cpp index f0fb4f1c92..678c71e252 100644 --- a/samples/trie_st.cpp +++ b/examples/trie_st.cpp @@ -91,13 +91,18 @@ int main() { std::string input; char const* files[] = { - "2600-0.txt", "2701-0.txt", "35-0.txt", "84-0.txt", "8800.txt", - "pg1727.txt", "pg55.txt", "pg6130.txt", "pg996.txt", "1342-0.txt" + "books/2600-0.txt", "books/2701-0.txt", "books/35-0.txt", "books/84-0.txt", "books/8800.txt", + "books/pg1727.txt", "books/pg55.txt", "books/pg6130.txt", "books/pg996.txt", "books/1342-0.txt" }; for(auto* ptr : files) { + std::cout << ptr << std::endl; auto const cur = input.size(); std::ifstream in(ptr); + if(in.fail()) { + std::cerr << "Failed to open file: " << ptr << std::endl; + return -1; + } in.seekg(0, std::ios_base::end); auto const pos = in.tellg(); input.resize(cur + pos); diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt deleted file mode 100644 index 75fd8fab73..0000000000 --- a/samples/CMakeLists.txt +++ /dev/null @@ -1,58 +0,0 @@ -cmake_minimum_required(VERSION 3.8) -project(libcudacxx-samples CXX) - -if (NOT "${CMAKE_CUDA_HOST_COMPILER}" STREQUAL "") - unset(CMAKE_CUDA_HOST_COMPILER CACHE) - message(FATAL_ERROR "Thrust tests and examples require the C++ compiler" - " and the CUDA host compiler to be the same; to set this compiler, please" - " use the CMAKE_CXX_COMPILER variable, not the CMAKE_CUDA_HOST_COMPILER" - " variable.") - endif () -set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER}) -enable_language(CUDA) - -set(CMAKE_BUILD_TYPE "RelWithDebInfo") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --expt-extended-lambda") - -set(HIGHEST_COMPUTE_ARCH 80) -set(KNOWN_COMPUTE_ARCHS 70 72 75 80) - -option(DISABLE_ARCH_BY_DEFAULT "If ON, then all CUDA architectures are disabled on the initial CMake run." OFF) -set(OPTION_INIT ON) -if (DISABLE_ARCH_BY_DEFAULT) - set(OPTION_INIT OFF) -endif () -if (NOT ${HIGHEST_COMPUTE_ARCH} IN_LIST KNOWN_COMPUTE_ARCHS) - message(FATAL_ERROR "When changing the highest compute version, don't forget to add it to the list!") -endif () - -foreach (COMPUTE_ARCH IN LISTS KNOWN_COMPUTE_ARCHS) - option(ENABLE_COMPUTE_${COMPUTE_ARCH} "Enable code generation for tests for sm_${COMPUTE_ARCH}" ${OPTION_INIT}) - if (ENABLE_COMPUTE_${COMPUTE_ARCH}) - set(COMPUTE_ARCHS "${COMPUTE_ARCHS} ${COMPUTE_ARCH}") - set(COMPUTE_MESSAGE "${COMPUTE_MESSAGE} sm_${COMPUTE_ARCH}") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${COMPUTE_ARCH},code=sm_${COMPUTE_ARCH}") - endif () -endforeach () - -option(ENABLE_COMPUTE_FUTURE "Enable code generation for tests for compute_${HIGHEST_COMPUTE_ARCH}" ${OPTION_INIT}) -if (ENABLE_COMPUTE_FUTURE) - set(COMPUTE_MESSAGE "${COMPUTE_MESSAGE} compute_${HIGHEST_COMPUTE_ARCH}") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${HIGHEST_COMPUTE_ARCH},code=compute_${HIGHEST_COMPUTE_ARCH}") -endif () - -message(STATUS "Enabled CUDA architectures:${COMPUTE_MESSAGE}") - -include_directories(${CMAKE_SOURCE_DIR}/../include) - -find_package(Threads REQUIRED) -find_package(OpenMP) - -add_executable(benchmark_host benchmark.cpp) - -target_link_libraries(benchmark_host PUBLIC Threads::Threads) -if(OpenMP_CXX_FOUND) - target_link_libraries(benchmark_host PUBLIC OpenMP::OpenMP_CXX) -endif() - -add_executable(benchmark_device benchmark.cu) diff --git a/samples/books.sh b/samples/books.sh deleted file mode 100755 index 434119267d..0000000000 --- a/samples/books.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -curl -O -# https://www.gutenberg.org/files/2600/2600-0.txt -curl -O -# http://www.gutenberg.org/cache/epub/996/pg996.txt -curl -O -# http://www.gutenberg.org/cache/epub/55/pg55.txt -curl -O -# https://www.gutenberg.org/files/8800/8800.txt -curl -O -# https://www.gutenberg.org/files/84/84-0.txt -curl -O -# http://www.gutenberg.org/cache/epub/6130/pg6130.txt -curl -O -# http://www.gutenberg.org/cache/epub/1727/pg1727.txt -curl -O -# https://www.gutenberg.org/files/2701/2701-0.txt -curl -O -# https://www.gutenberg.org/files/35/35-0.txt -curl -O -# https://www.gutenberg.org/files/1342/1342-0.txt diff --git a/samples/linux.sh b/samples/linux.sh deleted file mode 100755 index abde3255b3..0000000000 --- a/samples/linux.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env bash -g++ -std=c++11 trie_st.cpp -O2 -o trie_st -g++ -std=c++11 trie_mt.cpp -O2 -o trie_mt -pthread -nvcc -I../include -arch=compute_70 -std=c++11 -O2 trie.cu --expt-relaxed-constexpr -o trie -g++ -I../include -std=c++14 benchmark.cpp -O2 -lpthread -o benchmark -nvcc -I../include -arch=compute_70 -std=c++14 benchmark.cu -O2 -lpthread --expt-relaxed-constexpr --expt-extended-lambda -o benchmark diff --git a/samples/linux_clang.sh b/samples/linux_clang.sh deleted file mode 100755 index 852f12f504..0000000000 --- a/samples/linux_clang.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env bash -clang++-7 -I../include --cuda-gpu-arch=sm_70 -std=c++11 -O2 trie.cu -L/usr/local/cuda/lib64/ -lcudart_static -pthread -ldl -lrt -o trie diff --git a/samples/linux_rtc.sh b/samples/linux_rtc.sh deleted file mode 100755 index 0d9e08716e..0000000000 --- a/samples/linux_rtc.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env bash -nvcc rtc_example.cpp -lnvrtc -o rtc; ./rtc diff --git a/samples/openmp.sh b/samples/openmp.sh deleted file mode 100755 index 2950adf1d9..0000000000 --- a/samples/openmp.sh +++ /dev/null @@ -1 +0,0 @@ -clang++ -D_LIBCPP_BARRIER_BUTTERFLY -I../include -fopenmp=libomp -L../../llvm-project/build/lib/ -std=c++11 -O2 benchmark.cpp -lstdc++ -lpthread -lm -o benchmark diff --git a/samples/windows.bat b/samples/windows.bat deleted file mode 100644 index 176a7a01a3..0000000000 --- a/samples/windows.bat +++ /dev/null @@ -1,4 +0,0 @@ -call vcvars64.bat -cl /EHsc trie_st.cpp /O2 -cl /EHsc trie_mt.cpp /O2 -nvcc -I../include -arch=compute_70 -O2 trie.cu --expt-relaxed-constexpr -Xcompiler /Zc:__cplusplus -o trie