From 5e8e9b546f413fdbfd0cb1886b9ec5e715346019 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Wed, 10 Jan 2024 08:00:43 -0800 Subject: [PATCH] Fix MG weighted similarity test failure (#4054) MG weighted similarity tests assume symmetric graphs (undirected). When we remove multi-edges, we pick arbitrary edges and this can lead to an asymmetry in edge weights. This PR adds an additional flag to keep minimum value edges in `remove_multi_edges` and use this function if the input graph is symmetric to maintain weight symmetry. Applying the non-breaking label as this PR does not change existing code behavior if `keep_min_value_edge` is not provided. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Naim (https://github.com/naimnv) URL: https://github.com/rapidsai/cugraph/pull/4054 --- cpp/include/cugraph/graph_functions.hpp | 21 +++++-- cpp/src/c_api/graph_mg.cpp | 7 ++- cpp/src/c_api/graph_sg.cpp | 7 ++- cpp/src/structure/remove_multi_edges.cu | 20 +++--- cpp/src/structure/remove_multi_edges_impl.cuh | 63 ++++++++++++------- .../weighted_similarity_test.cpp | 14 +++-- cpp/tests/utilities/test_graphs.hpp | 16 ++--- 7 files changed, 97 insertions(+), 51 deletions(-) diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index 6a75a420bf8..6684d31d8fd 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1005,9 +1005,14 @@ remove_self_loops(raft::handle_t const& handle, std::optional>&& edgelist_edge_types); /** - * @brief Remove all but one edge when a multi-edge exists. Note that this function does not use - * stable methods. When a multi-edge exists, one of the edges will remain, there is no - * guarantee on which one will remain. + * @brief Remove all but one edge when a multi-edge exists. + * + * When a multi-edge exists, one of the edges will remain. If @p keep_min_value_edge is false, an + * arbitrary edge will be selected among the edges in the multi-edge. If @p keep_min_value_edge is + * true, the edge with the minimum value will be selected. The edge weights will be first compared + * (if @p edgelist_weights.has_value() is true); edge IDs will be compared next (if @p + * edgelist_edge_ids.has_value() is true); and edge types (if @p edgelist_edge_types.has_value() is + * true) will compared last. * * In an MG context it is assumed that edges have been shuffled to the proper GPU, * in which case any multi-edges will be on the same GPU. @@ -1024,6 +1029,11 @@ remove_self_loops(raft::handle_t const& handle, * @param edgelist_weights Optional list of edge weights * @param edgelist_edge_ids Optional list of edge ids * @param edgelist_edge_types Optional list of edge types + * @param keep_min_value_edge Flag indicating whether to keep an arbitrary edge (false) or the + * minimum value edge (true) among the edges in a multi-edge. Relevant only if @p + * edgelist_weights.has_value() | @p edgelist_edge_ids.has_value() | @p + * edgelist_edge_types.has_value() is true. Setting this to true incurs performance overhead as this + * requires more comparisons. * @return Tuple of vectors storing edge sources, destinations, optional weights, * optional edge ids, optional edge types. */ @@ -1038,6 +1048,7 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge = false); } // namespace cugraph diff --git a/cpp/src/c_api/graph_mg.cpp b/cpp/src/c_api/graph_mg.cpp index 326022a3fa9..57a589caf02 100644 --- a/cpp/src/c_api/graph_mg.cpp +++ b/cpp/src/c_api/graph_mg.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -217,7 +217,10 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { std::move(edgelist_dsts), std::move(edgelist_weights), std::move(edgelist_edge_ids), - std::move(edgelist_edge_types)); + std::move(edgelist_edge_types), + properties_->is_symmetric + ? true /* keep minimum weight edges to maintain symmetry */ + : false); } std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) = diff --git a/cpp/src/c_api/graph_sg.cpp b/cpp/src/c_api/graph_sg.cpp index 7793458b53a..6745be01f95 100644 --- a/cpp/src/c_api/graph_sg.cpp +++ b/cpp/src/c_api/graph_sg.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -200,7 +200,10 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { std::move(edgelist_dsts), std::move(edgelist_weights), std::move(edgelist_edge_ids), - std::move(edgelist_edge_types)); + std::move(edgelist_edge_types), + properties_->is_symmetric + ? true /* keep minimum weight edges to maintain symmetry */ + : false); } std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) = diff --git a/cpp/src/structure/remove_multi_edges.cu b/cpp/src/structure/remove_multi_edges.cu index ba07d068c0e..54403f0b034 100644 --- a/cpp/src/structure/remove_multi_edges.cu +++ b/cpp/src/structure/remove_multi_edges.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -39,7 +40,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -51,7 +53,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -63,7 +66,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -75,7 +79,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -87,6 +92,7 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); } // namespace cugraph diff --git a/cpp/src/structure/remove_multi_edges_impl.cuh b/cpp/src/structure/remove_multi_edges_impl.cuh index fdd3059f874..651876ac8b1 100644 --- a/cpp/src/structure/remove_multi_edges_impl.cuh +++ b/cpp/src/structure/remove_multi_edges_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -104,10 +104,12 @@ group_multi_edges( rmm::device_uvector&& edgelist_srcs, rmm::device_uvector&& edgelist_dsts, decltype(allocate_dataframe_buffer(0, rmm::cuda_stream_view{}))&& edgelist_values, - size_t mem_frugal_threshold) + size_t mem_frugal_threshold, + bool keep_min_value_edge) { auto pair_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); auto value_first = get_dataframe_buffer_begin(edgelist_values); + auto edge_first = thrust::make_zip_iterator(pair_first, value_first); if (edgelist_srcs.size() > mem_frugal_threshold) { // FIXME: Tuning parameter to address high frequency multi-edges @@ -128,19 +130,28 @@ group_multi_edges( raft::update_host( h_group_counts.data(), group_counts.data(), group_counts.size(), handle.get_stream()); - thrust::sort_by_key(handle.get_thrust_policy(), - pair_first, - pair_first + h_group_counts[0], - get_dataframe_buffer_begin(edgelist_values)); - thrust::sort_by_key(handle.get_thrust_policy(), - pair_first + h_group_counts[0], - pair_first + edgelist_srcs.size(), - get_dataframe_buffer_begin(edgelist_values) + h_group_counts[0]); + if (keep_min_value_edge) { + thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + h_group_counts[0]); + thrust::sort(handle.get_thrust_policy(), + edge_first + h_group_counts[0], + edge_first + edgelist_srcs.size()); + } else { + thrust::sort_by_key( + handle.get_thrust_policy(), pair_first, pair_first + h_group_counts[0], value_first); + thrust::sort_by_key(handle.get_thrust_policy(), + pair_first + h_group_counts[0], + pair_first + edgelist_srcs.size(), + value_first + h_group_counts[0]); + } } else { - thrust::sort_by_key(handle.get_thrust_policy(), - pair_first, - pair_first + edgelist_srcs.size(), - get_dataframe_buffer_begin(edgelist_values)); + if (keep_min_value_edge) { + thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); + } else { + thrust::sort_by_key(handle.get_thrust_policy(), + pair_first, + pair_first + edgelist_srcs.size(), + get_dataframe_buffer_begin(edgelist_values)); + } } return std::make_tuple( @@ -160,7 +171,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types) + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge) { auto total_global_mem = handle.get_device_properties().totalGlobalMem; size_t element_size = sizeof(vertex_t) * 2; @@ -187,7 +199,8 @@ remove_multi_edges(raft::handle_t const& handle, std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_ids), std::move(*edgelist_edge_types)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } else { std::forward_as_tuple( edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights, edgelist_edge_ids)) = @@ -196,7 +209,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_ids)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } } else { if (edgelist_edge_types) { @@ -207,7 +221,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_types)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } else { std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights)) = detail::group_multi_edges>( @@ -215,7 +230,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_weights)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } } } else { @@ -228,7 +244,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_edge_ids), std::move(*edgelist_edge_types)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } else { std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_edge_ids)) = detail::group_multi_edges>( @@ -236,7 +253,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_edge_ids)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } } else { if (edgelist_edge_types) { @@ -246,7 +264,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_edge_types)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } else { std::tie(edgelist_srcs, edgelist_dsts) = detail::group_multi_edges( handle, std::move(edgelist_srcs), std::move(edgelist_dsts), mem_frugal_threshold); diff --git a/cpp/tests/link_prediction/weighted_similarity_test.cpp b/cpp/tests/link_prediction/weighted_similarity_test.cpp index ca644b76c5a..99e752c0b02 100644 --- a/cpp/tests/link_prediction/weighted_similarity_test.cpp +++ b/cpp/tests/link_prediction/weighted_similarity_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,9 +27,9 @@ struct Similarity_Usecase { bool use_weights{false}; - bool check_correctness{true}; size_t max_seeds{std::numeric_limits::max()}; size_t max_vertex_pairs_to_check{std::numeric_limits::max()}; + bool check_correctness{true}; }; template @@ -293,7 +293,7 @@ INSTANTIATE_TEST_SUITE_P( // Disable weighted computation testing in 22.10 //::testing::Values(Similarity_Usecase{true, true, 20, 100}, Similarity_Usecase{false, true, 20, //: 100}), - ::testing::Values(Similarity_Usecase{true, true, 20, 100}), + ::testing::Values(Similarity_Usecase{true, 20, 100, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); @@ -305,7 +305,7 @@ INSTANTIATE_TEST_SUITE_P( // Disable weighted computation testing in 22.10 //::testing::Values(Similarity_Usecase{true, true, 20, 100}, //: Similarity_Usecase{false,true,20,100}), - ::testing::Values(Similarity_Usecase{true, true, 20, 100}), + ::testing::Values(Similarity_Usecase{true, 20, 100, true}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); INSTANTIATE_TEST_SUITE_P( @@ -319,7 +319,8 @@ INSTANTIATE_TEST_SUITE_P( // disable correctness checks // Disable weighted computation testing in 22.10 //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), - ::testing::Values(Similarity_Usecase{true, true}), + ::testing::Values(Similarity_Usecase{ + true, std::numeric_limits::max(), std::numeric_limits::max(), true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); INSTANTIATE_TEST_SUITE_P( @@ -332,7 +333,8 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Combine( // disable correctness checks for large graphs //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), - ::testing::Values(Similarity_Usecase{true, false}), + ::testing::Values(Similarity_Usecase{ + true, std::numeric_limits::max(), std::numeric_limits::max(), false}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/test_graphs.hpp b/cpp/tests/utilities/test_graphs.hpp index 8cc87b26f1d..5a9dc9c90d4 100644 --- a/cpp/tests/utilities/test_graphs.hpp +++ b/cpp/tests/utilities/test_graphs.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -633,12 +633,14 @@ construct_graph(raft::handle_t const& handle, if (drop_multi_edges) { std::tie(d_src_v, d_dst_v, d_weights_v, std::ignore, std::ignore) = - cugraph::remove_multi_edges(handle, - std::move(d_src_v), - std::move(d_dst_v), - std::move(d_weights_v), - std::nullopt, - std::nullopt); + cugraph::remove_multi_edges( + handle, + std::move(d_src_v), + std::move(d_dst_v), + std::move(d_weights_v), + std::nullopt, + std::nullopt, + is_symmetric ? true /* keep minimum weight edges to maintain symmetry */ : false); } graph_t graph(handle);