Skip to content

Commit

Permalink
Fix MG weighted similarity test failure (#4054)
Browse files Browse the repository at this point in the history
MG weighted similarity tests assume symmetric graphs (undirected). When we remove multi-edges, we pick arbitrary edges and this can lead to an asymmetry in edge weights. This PR adds an additional flag to keep minimum value edges in `remove_multi_edges` and use this function if the input graph is symmetric to maintain weight symmetry.

Applying the non-breaking label as this PR does not change existing code behavior if `keep_min_value_edge` is not provided.

Authors:
  - Seunghwa Kang (https://github.com/seunghwak)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Naim (https://github.com/naimnv)

URL: #4054
  • Loading branch information
seunghwak authored Jan 10, 2024
1 parent cd5fc6f commit 5e8e9b5
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 51 deletions.
21 changes: 16 additions & 5 deletions cpp/include/cugraph/graph_functions.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -1005,9 +1005,14 @@ remove_self_loops(raft::handle_t const& handle,
std::optional<rmm::device_uvector<edge_type_t>>&& edgelist_edge_types);

/**
* @brief Remove all but one edge when a multi-edge exists. Note that this function does not use
* stable methods. When a multi-edge exists, one of the edges will remain, there is no
* guarantee on which one will remain.
* @brief Remove all but one edge when a multi-edge exists.
*
* When a multi-edge exists, one of the edges will remain. If @p keep_min_value_edge is false, an
* arbitrary edge will be selected among the edges in the multi-edge. If @p keep_min_value_edge is
* true, the edge with the minimum value will be selected. The edge weights will be first compared
* (if @p edgelist_weights.has_value() is true); edge IDs will be compared next (if @p
* edgelist_edge_ids.has_value() is true); and edge types (if @p edgelist_edge_types.has_value() is
* true) will compared last.
*
* In an MG context it is assumed that edges have been shuffled to the proper GPU,
* in which case any multi-edges will be on the same GPU.
Expand All @@ -1024,6 +1029,11 @@ remove_self_loops(raft::handle_t const& handle,
* @param edgelist_weights Optional list of edge weights
* @param edgelist_edge_ids Optional list of edge ids
* @param edgelist_edge_types Optional list of edge types
* @param keep_min_value_edge Flag indicating whether to keep an arbitrary edge (false) or the
* minimum value edge (true) among the edges in a multi-edge. Relevant only if @p
* edgelist_weights.has_value() | @p edgelist_edge_ids.has_value() | @p
* edgelist_edge_types.has_value() is true. Setting this to true incurs performance overhead as this
* requires more comparisons.
* @return Tuple of vectors storing edge sources, destinations, optional weights,
* optional edge ids, optional edge types.
*/
Expand All @@ -1038,6 +1048,7 @@ remove_multi_edges(raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<weight_t>>&& edgelist_weights,
std::optional<rmm::device_uvector<edge_t>>&& edgelist_edge_ids,
std::optional<rmm::device_uvector<edge_type_t>>&& edgelist_edge_types);
std::optional<rmm::device_uvector<edge_type_t>>&& edgelist_edge_types,
bool keep_min_value_edge = false);

} // namespace cugraph
7 changes: 5 additions & 2 deletions cpp/src/c_api/graph_mg.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -217,7 +217,10 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
std::move(edgelist_dsts),
std::move(edgelist_weights),
std::move(edgelist_edge_ids),
std::move(edgelist_edge_types));
std::move(edgelist_edge_types),
properties_->is_symmetric
? true /* keep minimum weight edges to maintain symmetry */
: false);
}

std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) =
Expand Down
7 changes: 5 additions & 2 deletions cpp/src/c_api/graph_sg.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -200,7 +200,10 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
std::move(edgelist_dsts),
std::move(edgelist_weights),
std::move(edgelist_edge_ids),
std::move(edgelist_edge_types));
std::move(edgelist_edge_types),
properties_->is_symmetric
? true /* keep minimum weight edges to maintain symmetry */
: false);
}

std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) =
Expand Down
20 changes: 13 additions & 7 deletions cpp/src/structure/remove_multi_edges.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,7 +27,8 @@ remove_multi_edges(raft::handle_t const& handle,
rmm::device_uvector<int32_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<float>>&& edgelist_weights,
std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_ids,
std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types,
bool keep_min_value_edge);

template std::tuple<rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
Expand All @@ -39,7 +40,8 @@ remove_multi_edges(raft::handle_t const& handle,
rmm::device_uvector<int32_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<float>>&& edgelist_weights,
std::optional<rmm::device_uvector<int64_t>>&& edgelist_edge_ids,
std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types,
bool keep_min_value_edge);

template std::tuple<rmm::device_uvector<int64_t>,
rmm::device_uvector<int64_t>,
Expand All @@ -51,7 +53,8 @@ remove_multi_edges(raft::handle_t const& handle,
rmm::device_uvector<int64_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<float>>&& edgelist_weights,
std::optional<rmm::device_uvector<int64_t>>&& edgelist_edge_ids,
std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types,
bool keep_min_value_edge);

template std::tuple<rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
Expand All @@ -63,7 +66,8 @@ remove_multi_edges(raft::handle_t const& handle,
rmm::device_uvector<int32_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<double>>&& edgelist_weights,
std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_ids,
std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types,
bool keep_min_value_edge);

template std::tuple<rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
Expand All @@ -75,7 +79,8 @@ remove_multi_edges(raft::handle_t const& handle,
rmm::device_uvector<int32_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<double>>&& edgelist_weights,
std::optional<rmm::device_uvector<int64_t>>&& edgelist_edge_ids,
std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types,
bool keep_min_value_edge);

template std::tuple<rmm::device_uvector<int64_t>,
rmm::device_uvector<int64_t>,
Expand All @@ -87,6 +92,7 @@ remove_multi_edges(raft::handle_t const& handle,
rmm::device_uvector<int64_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<double>>&& edgelist_weights,
std::optional<rmm::device_uvector<int64_t>>&& edgelist_edge_ids,
std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types,
bool keep_min_value_edge);

} // namespace cugraph
63 changes: 41 additions & 22 deletions cpp/src/structure/remove_multi_edges_impl.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -104,10 +104,12 @@ group_multi_edges(
rmm::device_uvector<vertex_t>&& edgelist_srcs,
rmm::device_uvector<vertex_t>&& edgelist_dsts,
decltype(allocate_dataframe_buffer<edge_value_t>(0, rmm::cuda_stream_view{}))&& edgelist_values,
size_t mem_frugal_threshold)
size_t mem_frugal_threshold,
bool keep_min_value_edge)
{
auto pair_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin());
auto value_first = get_dataframe_buffer_begin(edgelist_values);
auto edge_first = thrust::make_zip_iterator(pair_first, value_first);

if (edgelist_srcs.size() > mem_frugal_threshold) {
// FIXME: Tuning parameter to address high frequency multi-edges
Expand All @@ -128,19 +130,28 @@ group_multi_edges(
raft::update_host(
h_group_counts.data(), group_counts.data(), group_counts.size(), handle.get_stream());

thrust::sort_by_key(handle.get_thrust_policy(),
pair_first,
pair_first + h_group_counts[0],
get_dataframe_buffer_begin(edgelist_values));
thrust::sort_by_key(handle.get_thrust_policy(),
pair_first + h_group_counts[0],
pair_first + edgelist_srcs.size(),
get_dataframe_buffer_begin(edgelist_values) + h_group_counts[0]);
if (keep_min_value_edge) {
thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + h_group_counts[0]);
thrust::sort(handle.get_thrust_policy(),
edge_first + h_group_counts[0],
edge_first + edgelist_srcs.size());
} else {
thrust::sort_by_key(
handle.get_thrust_policy(), pair_first, pair_first + h_group_counts[0], value_first);
thrust::sort_by_key(handle.get_thrust_policy(),
pair_first + h_group_counts[0],
pair_first + edgelist_srcs.size(),
value_first + h_group_counts[0]);
}
} else {
thrust::sort_by_key(handle.get_thrust_policy(),
pair_first,
pair_first + edgelist_srcs.size(),
get_dataframe_buffer_begin(edgelist_values));
if (keep_min_value_edge) {
thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size());
} else {
thrust::sort_by_key(handle.get_thrust_policy(),
pair_first,
pair_first + edgelist_srcs.size(),
get_dataframe_buffer_begin(edgelist_values));
}
}

return std::make_tuple(
Expand All @@ -160,7 +171,8 @@ remove_multi_edges(raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<weight_t>>&& edgelist_weights,
std::optional<rmm::device_uvector<edge_t>>&& edgelist_edge_ids,
std::optional<rmm::device_uvector<edge_type_t>>&& edgelist_edge_types)
std::optional<rmm::device_uvector<edge_type_t>>&& edgelist_edge_types,
bool keep_min_value_edge)
{
auto total_global_mem = handle.get_device_properties().totalGlobalMem;
size_t element_size = sizeof(vertex_t) * 2;
Expand All @@ -187,7 +199,8 @@ remove_multi_edges(raft::handle_t const& handle,
std::make_tuple(std::move(*edgelist_weights),
std::move(*edgelist_edge_ids),
std::move(*edgelist_edge_types)),
mem_frugal_threshold);
mem_frugal_threshold,
keep_min_value_edge);
} else {
std::forward_as_tuple(
edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights, edgelist_edge_ids)) =
Expand All @@ -196,7 +209,8 @@ remove_multi_edges(raft::handle_t const& handle,
std::move(edgelist_srcs),
std::move(edgelist_dsts),
std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_ids)),
mem_frugal_threshold);
mem_frugal_threshold,
keep_min_value_edge);
}
} else {
if (edgelist_edge_types) {
Expand All @@ -207,15 +221,17 @@ remove_multi_edges(raft::handle_t const& handle,
std::move(edgelist_srcs),
std::move(edgelist_dsts),
std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_types)),
mem_frugal_threshold);
mem_frugal_threshold,
keep_min_value_edge);
} else {
std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights)) =
detail::group_multi_edges<vertex_t, thrust::tuple<weight_t>>(
handle,
std::move(edgelist_srcs),
std::move(edgelist_dsts),
std::make_tuple(std::move(*edgelist_weights)),
mem_frugal_threshold);
mem_frugal_threshold,
keep_min_value_edge);
}
}
} else {
Expand All @@ -228,15 +244,17 @@ remove_multi_edges(raft::handle_t const& handle,
std::move(edgelist_srcs),
std::move(edgelist_dsts),
std::make_tuple(std::move(*edgelist_edge_ids), std::move(*edgelist_edge_types)),
mem_frugal_threshold);
mem_frugal_threshold,
keep_min_value_edge);
} else {
std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_edge_ids)) =
detail::group_multi_edges<vertex_t, thrust::tuple<edge_t>>(
handle,
std::move(edgelist_srcs),
std::move(edgelist_dsts),
std::make_tuple(std::move(*edgelist_edge_ids)),
mem_frugal_threshold);
mem_frugal_threshold,
keep_min_value_edge);
}
} else {
if (edgelist_edge_types) {
Expand All @@ -246,7 +264,8 @@ remove_multi_edges(raft::handle_t const& handle,
std::move(edgelist_srcs),
std::move(edgelist_dsts),
std::make_tuple(std::move(*edgelist_edge_types)),
mem_frugal_threshold);
mem_frugal_threshold,
keep_min_value_edge);
} else {
std::tie(edgelist_srcs, edgelist_dsts) = detail::group_multi_edges(
handle, std::move(edgelist_srcs), std::move(edgelist_dsts), mem_frugal_threshold);
Expand Down
14 changes: 8 additions & 6 deletions cpp/tests/link_prediction/weighted_similarity_test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,9 +27,9 @@

struct Similarity_Usecase {
bool use_weights{false};
bool check_correctness{true};
size_t max_seeds{std::numeric_limits<size_t>::max()};
size_t max_vertex_pairs_to_check{std::numeric_limits<size_t>::max()};
bool check_correctness{true};
};

template <typename input_usecase_t>
Expand Down Expand Up @@ -293,7 +293,7 @@ INSTANTIATE_TEST_SUITE_P(
// Disable weighted computation testing in 22.10
//::testing::Values(Similarity_Usecase{true, true, 20, 100}, Similarity_Usecase{false, true, 20,
//: 100}),
::testing::Values(Similarity_Usecase{true, true, 20, 100}),
::testing::Values(Similarity_Usecase{true, 20, 100, true}),
::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
cugraph::test::File_Usecase("test/datasets/dolphins.mtx"))));

Expand All @@ -305,7 +305,7 @@ INSTANTIATE_TEST_SUITE_P(
// Disable weighted computation testing in 22.10
//::testing::Values(Similarity_Usecase{true, true, 20, 100},
//: Similarity_Usecase{false,true,20,100}),
::testing::Values(Similarity_Usecase{true, true, 20, 100}),
::testing::Values(Similarity_Usecase{true, 20, 100, true}),
::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false))));

INSTANTIATE_TEST_SUITE_P(
Expand All @@ -319,7 +319,8 @@ INSTANTIATE_TEST_SUITE_P(
// disable correctness checks
// Disable weighted computation testing in 22.10
//::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}),
::testing::Values(Similarity_Usecase{true, true}),
::testing::Values(Similarity_Usecase{
true, std::numeric_limits<size_t>::max(), std::numeric_limits<size_t>::max(), true}),
::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"))));

INSTANTIATE_TEST_SUITE_P(
Expand All @@ -332,7 +333,8 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Combine(
// disable correctness checks for large graphs
//::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}),
::testing::Values(Similarity_Usecase{true, false}),
::testing::Values(Similarity_Usecase{
true, std::numeric_limits<size_t>::max(), std::numeric_limits<size_t>::max(), false}),
::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false))));

CUGRAPH_TEST_PROGRAM_MAIN()
16 changes: 9 additions & 7 deletions cpp/tests/utilities/test_graphs.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -633,12 +633,14 @@ construct_graph(raft::handle_t const& handle,

if (drop_multi_edges) {
std::tie(d_src_v, d_dst_v, d_weights_v, std::ignore, std::ignore) =
cugraph::remove_multi_edges<vertex_t, edge_t, weight_t, int32_t>(handle,
std::move(d_src_v),
std::move(d_dst_v),
std::move(d_weights_v),
std::nullopt,
std::nullopt);
cugraph::remove_multi_edges<vertex_t, edge_t, weight_t, int32_t>(
handle,
std::move(d_src_v),
std::move(d_dst_v),
std::move(d_weights_v),
std::nullopt,
std::nullopt,
is_symmetric ? true /* keep minimum weight edges to maintain symmetry */ : false);
}

graph_t<vertex_t, edge_t, store_transposed, multi_gpu> graph(handle);
Expand Down

0 comments on commit 5e8e9b5

Please sign in to comment.