Skip to content

Commit

Permalink
Address primitives performance regression with the added edge masking…
Browse files Browse the repository at this point in the history
… support (when edge masking is not in use) (#4119)

This addresses performance regression due to the added edge masking support when edge masking is disabled.

We addressed the issue for `per_v_transform_reduce_incoming|outgoing_e` in #4085

Using a similar approach, we address the issue for `transform_e`, `transform_reduce_e`, and `detail::nbr_intersection` in this PR.

Authors:
  - Seunghwa Kang (https://github.com/seunghwak)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Naim (https://github.com/naimnv)

URL: #4119
  • Loading branch information
seunghwak authored Feb 9, 2024
1 parent 0e753b8 commit 38a8cdb
Show file tree
Hide file tree
Showing 3 changed files with 408 additions and 251 deletions.
73 changes: 48 additions & 25 deletions cpp/src/prims/detail/nbr_intersection.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,8 @@ struct pick_min_degree_t {
}
};

template <typename InputKeyIterator0,
template <bool check_edge_mask,
typename InputKeyIterator0,
typename InputKeyIterator1,
typename InputValueIterator0, // should be void* if invalid
typename InputValueIterator1, // should be void* if invalid
Expand Down Expand Up @@ -356,10 +357,14 @@ __device__ edge_t set_intersection_by_key_with_mask(InputKeyIterator0 input_key_
auto output_idx = output_start_offset;
while ((idx0 < (input_start_offset0 + input_size0)) &&
(idx1 < (input_start_offset1 + input_size1))) {
bool valid0 = apply_mask0 ? check_bit_set(idx0) : true;
bool valid1 = apply_mask1 ? check_bit_set(idx1) : true;
if (!valid0) { ++idx0; }
if (!valid1) { ++idx1; }
bool valid0 = true;
bool valid1 = true;
if constexpr (check_edge_mask) {
valid0 = apply_mask0 ? check_bit_set(idx0) : true;
valid1 = apply_mask1 ? check_bit_set(idx1) : true;
if (!valid0) { ++idx0; }
if (!valid1) { ++idx1; }
}

if (valid0 && valid1) {
auto key0 = *(input_key_first0 + idx0);
Expand Down Expand Up @@ -522,24 +527,42 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t {
// vertices in a single warp (better optimize if this becomes a performance
// bottleneck)

auto mask_first = edge_partition_e_mask ? (*edge_partition_e_mask).value_first()
: static_cast<uint32_t const*>(nullptr);
auto intersection_size = set_intersection_by_key_with_mask(
indices0,
indices1,
edge_property_values0,
edge_property_values1,
mask_first,
nbr_intersection_indices.begin(),
nbr_intersection_e_property_values0,
nbr_intersection_e_property_values1,
local_edge_offset0,
local_degree0,
(std::is_same_v<FirstElementToIdxMap, void*> && edge_partition_e_mask),
local_edge_offset1,
local_degree1,
(std::is_same_v<SecondElementToIdxMap, void*> && edge_partition_e_mask),
nbr_intersection_offsets[i]);
edge_t intersection_size{};
if (edge_partition_e_mask) {
intersection_size =
set_intersection_by_key_with_mask<true>(indices0,
indices1,
edge_property_values0,
edge_property_values1,
(*edge_partition_e_mask).value_first(),
nbr_intersection_indices.begin(),
nbr_intersection_e_property_values0,
nbr_intersection_e_property_values1,
local_edge_offset0,
local_degree0,
std::is_same_v<FirstElementToIdxMap, void*>,
local_edge_offset1,
local_degree1,
std::is_same_v<SecondElementToIdxMap, void*>,
nbr_intersection_offsets[i]);
} else {
intersection_size =
set_intersection_by_key_with_mask<false>(indices0,
indices1,
edge_property_values0,
edge_property_values1,
static_cast<uint32_t const*>(nullptr),
nbr_intersection_indices.begin(),
nbr_intersection_e_property_values0,
nbr_intersection_e_property_values1,
local_edge_offset0,
local_degree0,
false,
local_edge_offset1,
local_degree1,
false,
nbr_intersection_offsets[i]);
}

thrust::fill(
thrust::seq,
Expand Down Expand Up @@ -714,7 +737,7 @@ nbr_intersection(raft::handle_t const& handle,
auto edge_mask_view = graph_view.edge_mask_view();

std::optional<std::unique_ptr<kv_store_t<vertex_t, vertex_t, false>>> major_to_idx_map_ptr{
std::nullopt};
std::nullopt}; // idx to major_nbr_offsets
std::optional<rmm::device_uvector<edge_t>> major_nbr_offsets{std::nullopt};
std::optional<rmm::device_uvector<vertex_t>> major_nbr_indices{std::nullopt};

Expand Down Expand Up @@ -1041,7 +1064,7 @@ nbr_intersection(raft::handle_t const& handle,
// 3. Collect neighbor list for minors (for the neighbors within the minor range for this GPU)

std::optional<std::unique_ptr<kv_store_t<vertex_t, vertex_t, false>>> minor_to_idx_map_ptr{
std::nullopt};
std::nullopt}; // idx to minor_nbr_offsets
std::optional<rmm::device_uvector<size_t>> minor_nbr_offsets{std::nullopt};
std::optional<rmm::device_uvector<vertex_t>> minor_nbr_indices{std::nullopt};

Expand Down
Loading

0 comments on commit 38a8cdb

Please sign in to comment.