Skip to content

Commit

Permalink
Merge pull request #174 from PointKernel/template-contains
Browse files Browse the repository at this point in the history
Make device `contains` take a template key parameter
  • Loading branch information
PointKernel authored Jun 13, 2022
2 parents 5502903 + d19d09d commit 0a832bc
Show file tree
Hide file tree
Showing 18 changed files with 410 additions and 102 deletions.
2 changes: 0 additions & 2 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ conda activate cuda

gpuci_logger "Check versions"
python --version
$CC --version
$CXX --version

gpuci_logger "Check conda environment"
conda info
Expand Down
4 changes: 3 additions & 1 deletion include/cuco/detail/hash_functions.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2021, NVIDIA CORPORATION.
* Copyright (c) 2017-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,6 +18,8 @@

namespace cuco {

using hash_value_type = uint32_t;

namespace detail {

// MurmurHash3_32 implementation from
Expand Down
18 changes: 12 additions & 6 deletions include/cuco/detail/probe_sequence_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

#include <cuda/std/atomic>

#include <cooperative_groups.h>

namespace cuco {
namespace detail {

Expand Down Expand Up @@ -186,13 +188,15 @@ class linear_probing_impl
*
* If vector-load is enabled, the return slot is always even to avoid illegal memory access.
*
* @tparam CG CUDA Cooperative Groups type
* @tparam ProbeKey Probe key type
*
* @param g the Cooperative Group for which the initial slot is needed
* @param k The key to get the slot for
* @return Pointer to the initial slot for `k`
*/
template <typename CG>
__device__ __forceinline__ iterator initial_slot(CG const& g, Key const k) noexcept
template <typename ProbeKey>
__device__ __forceinline__ iterator
initial_slot(cooperative_groups::thread_block_tile<cg_size> const& g, ProbeKey const& k) noexcept
{
auto const hash_value = [&]() {
auto const tmp = hash_(k);
Expand Down Expand Up @@ -307,13 +311,15 @@ class double_hashing_impl
* If vector-load is enabled, the return slot is always a multiple of (`cg_size` * `vector_width`)
* to avoid illegal memory access.
*
* @tparam CG CUDA Cooperative Groups type
* @tparam ProbeKey Probe key type
*
* @param g the Cooperative Group for which the initial slot is needed
* @param k The key to get the slot for
* @return Pointer to the initial slot for `k`
*/
template <typename CG>
__device__ __forceinline__ iterator initial_slot(CG const& g, Key const k) noexcept
template <typename ProbeKey>
__device__ __forceinline__ iterator
initial_slot(cooperative_groups::thread_block_tile<cg_size> const& g, ProbeKey const& k) noexcept
{
std::size_t index;
auto const hash_value = hash1_(k);
Expand Down
13 changes: 8 additions & 5 deletions include/cuco/detail/static_map.inl
Original file line number Diff line number Diff line change
Expand Up @@ -715,9 +715,9 @@ static_map<Key, Value, Scope, Allocator>::device_view::find(CG g,
}

template <typename Key, typename Value, cuda::thread_scope Scope, typename Allocator>
template <typename Hash, typename KeyEqual>
template <typename ProbeKey, typename Hash, typename KeyEqual>
__device__ bool static_map<Key, Value, Scope, Allocator>::device_view::contains(
Key const& k, Hash hash, KeyEqual key_equal) const noexcept
ProbeKey const& k, Hash hash, KeyEqual key_equal) const noexcept
{
auto current_slot = initial_slot(k, hash);

Expand All @@ -733,9 +733,12 @@ __device__ bool static_map<Key, Value, Scope, Allocator>::device_view::contains(
}

template <typename Key, typename Value, cuda::thread_scope Scope, typename Allocator>
template <typename CG, typename Hash, typename KeyEqual>
__device__ bool static_map<Key, Value, Scope, Allocator>::device_view::contains(
CG g, Key const& k, Hash hash, KeyEqual key_equal) const noexcept
template <typename CG, typename ProbeKey, typename Hash, typename KeyEqual>
__device__ std::enable_if_t<std::is_invocable_v<KeyEqual, ProbeKey, Key>, bool>
static_map<Key, Value, Scope, Allocator>::device_view::contains(CG const& g,
ProbeKey const& k,
Hash hash,
KeyEqual key_equal) const noexcept
{
auto current_slot = initial_slot(g, k, hash);

Expand Down
42 changes: 28 additions & 14 deletions include/cuco/detail/static_multimap/device_view_impl.inl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,13 +15,15 @@
*/

#include <cuco/detail/bitwise_compare.cuh>
#include <cuco/detail/static_multimap/kernels.cuh>
#include <cuco/detail/utils.cuh>

#include <thrust/tuple.h>
#include <thrust/type_traits/is_contiguous_iterator.h>

namespace cuco {
#include <cooperative_groups.h>

namespace cuco {
template <typename Key,
typename Value,
cuda::thread_scope Scope,
Expand Down Expand Up @@ -69,13 +71,16 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
*
* To be used for Cooperative Group based probing.
*
* @tparam CG Cooperative Group type
* @tparam ProbeKey Probe key type
*
* @param g the Cooperative Group for which the initial slot is needed
* @param k The key to get the slot for
* @return Pointer to the initial slot for `k`
*/
template <typename CG>
__device__ __forceinline__ iterator initial_slot(CG const& g, Key const& k) noexcept
template <typename ProbeKey>
__device__ __forceinline__ iterator
initial_slot(cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g,
ProbeKey const& k) noexcept
{
return probe_sequence_.initial_slot(g, k);
}
Expand All @@ -85,13 +90,16 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
*
* To be used for Cooperative Group based probing.
*
* @tparam CG Cooperative Group type
* @tparam ProbeKey Probe key type
*
* @param g the Cooperative Group for which the initial slot is needed
* @param k The key to get the slot for
* @return Pointer to the initial slot for `k`
*/
template <typename CG>
__device__ __forceinline__ const_iterator initial_slot(CG g, Key const& k) const noexcept
template <typename ProbeKey>
__device__ __forceinline__ const_iterator
initial_slot(cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g,
ProbeKey const& k) const noexcept
{
return probe_sequence_.initial_slot(g, k);
}
Expand Down Expand Up @@ -568,18 +576,21 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
* `contains` at moderate to high load factors.
*
* @tparam uses_vector_load Boolean flag indicating whether vector loads are used
* @tparam CG Cooperative Group type
* @tparam ProbeKey Probe key type
* @tparam KeyEqual Binary callable type
*
* @param g The Cooperative Group used to perform the contains operation
* @param k The key to search for
* @param key_equal The binary callable used to compare two keys
* for equality
* @return A boolean indicating whether the key/value pair
* containing `k` was inserted
*/
template <bool uses_vector_load, typename CG, typename KeyEqual>
template <bool uses_vector_load, typename ProbeKey, typename KeyEqual>
__device__ __forceinline__ std::enable_if_t<uses_vector_load, bool> contains(
CG g, Key const& k, KeyEqual key_equal) noexcept
cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g,
ProbeKey const& k,
KeyEqual key_equal) noexcept
{
auto current_slot = initial_slot(g, k);

Expand Down Expand Up @@ -616,18 +627,21 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
* `contains` at moderate to high load factors.
*
* @tparam uses_vector_load Boolean flag indicating whether vector loads are used
* @tparam CG Cooperative Group type
* @tparam ProbeKey Probe key type
* @tparam KeyEqual Binary callable type
*
* @param g The Cooperative Group used to perform the contains operation
* @param k The key to search for
* @param key_equal The binary callable used to compare two keys
* for equality
* @return A boolean indicating whether the key/value pair
* containing `k` was inserted
*/
template <bool uses_vector_load, typename CG, typename KeyEqual>
template <bool uses_vector_load, typename ProbeKey, typename KeyEqual>
__device__ __forceinline__ std::enable_if_t<not uses_vector_load, bool> contains(
CG g, Key const& k, KeyEqual key_equal) noexcept
cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g,
ProbeKey const& k,
KeyEqual key_equal) noexcept
{
auto current_slot = initial_slot(g, k);

Expand Down
6 changes: 3 additions & 3 deletions include/cuco/detail/static_multimap/static_multimap.inl
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ template <typename Key,
class ProbeSequence>
template <typename InputIt, typename OutputIt, typename KeyEqual>
void static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::contains(
InputIt first, InputIt last, OutputIt output_begin, cudaStream_t stream, KeyEqual key_equal) const
InputIt first, InputIt last, OutputIt output_begin, KeyEqual key_equal, cudaStream_t stream) const
{
auto const num_keys = std::distance(first, last);
if (num_keys == 0) { return; }
Expand Down Expand Up @@ -536,11 +536,11 @@ template <typename Key,
cuda::thread_scope Scope,
typename Allocator,
class ProbeSequence>
template <typename KeyEqual>
template <typename ProbeKey, typename KeyEqual>
__device__ __forceinline__ bool
static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view::contains(
cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g,
Key const& k,
ProbeKey const& k,
KeyEqual key_equal) noexcept
{
return impl_.contains<uses_vector_load()>(g, k, key_equal);
Expand Down
Loading

0 comments on commit 0a832bc

Please sign in to comment.