diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index 2a11a403f28..fbdabd659a7 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -30,6 +30,7 @@ #include #endif +#include #include #include #include @@ -90,39 +91,35 @@ struct invalid_col_comm_rank_t { }; template -struct transform_and_count_local_nbr_indices_t { +struct transform_local_nbr_indices_t { using key_t = typename thrust::iterator_traits::value_type; using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; edge_partition_device_view_t edge_partition{}; - UniqueKeyIdxIterator unique_key_idx_first{}; + thrust::optional local_key_indices{thrust::nullopt}; KeyIterator key_first{}; - OffsetIterator offset_first{}; LocalNbrIdxIterator local_nbr_idx_first{}; OutputValueIterator output_value_first{}; - thrust::optional output_count_first{}; EdgePartitionSrcValueInputWrapper edge_partition_src_value_input; EdgePartitionDstValueInputWrapper edge_partition_dst_value_input; EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input; EdgeOp e_op{}; edge_t invalid_idx{}; thrust::optional invalid_value{thrust::nullopt}; + size_t K{}; __device__ void operator()(size_t i) const { - auto key_idx = *(unique_key_idx_first + i); + auto key_idx = local_key_indices ? (*local_key_indices)[i] : (i / K); auto key = *(key_first + key_idx); vertex_t major{}; if constexpr (std::is_same_v) { @@ -149,65 +146,85 @@ struct transform_and_count_local_nbr_indices_t { } else { thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_offset); } - auto start_offset = *(offset_first + i); - auto end_offset = *(offset_first + (i + 1)); - - size_t num_valid_local_nbr_indices{0}; - for (size_t i = start_offset; i < end_offset; ++i) { - auto local_nbr_idx = *(local_nbr_idx_first + i); - if (local_nbr_idx != invalid_idx) { - assert(local_nbr_idx < local_degree); - auto minor = indices[local_nbr_idx]; - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); - - std::conditional_t - key_or_src{}; // key if major - std::conditional_t - key_or_dst{}; // key if major - if constexpr (GraphViewType::is_storage_transposed) { - key_or_src = minor; - key_or_dst = key; - } else { - key_or_src = key; - key_or_dst = minor; - } - auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; - auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; - *(output_value_first + i) = - e_op(key_or_src, - key_or_dst, - edge_partition_src_value_input.get(src_offset), - edge_partition_dst_value_input.get(dst_offset), - edge_partition_e_value_input.get(edge_offset + local_nbr_idx)); - ++num_valid_local_nbr_indices; - } else if (invalid_value) { - *(output_value_first + i) = *invalid_value; + auto local_nbr_idx = *(local_nbr_idx_first + i); + if (local_nbr_idx != invalid_idx) { + auto minor = indices[local_nbr_idx]; + auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); + + std::conditional_t + key_or_src{}; // key if major + std::conditional_t + key_or_dst{}; // key if major + if constexpr (GraphViewType::is_storage_transposed) { + key_or_src = minor; + key_or_dst = key; } else { - assert(output_count_first); + key_or_src = key; + key_or_dst = minor; } + auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; + auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; + *(output_value_first + i) = + e_op(key_or_src, + key_or_dst, + edge_partition_src_value_input.get(src_offset), + edge_partition_dst_value_input.get(dst_offset), + edge_partition_e_value_input.get(edge_offset + local_nbr_idx)); + } else if (invalid_value) { + *(output_value_first + i) = *invalid_value; } - if (output_count_first) { *(*output_count_first + key_idx) = num_valid_local_nbr_indices; } } }; -template -struct copy_and_fill_sample_e_op_results_t { - raft::device_span sample_counts{}; - raft::device_span sample_displacements{}; - InputIterator input_first{}; - OutputIterator output_first{}; +template +struct count_valids_t { + raft::device_span sample_local_nbr_indices{}; size_t K{}; - typename thrust::iterator_traits::value_type invalid_value; + edge_t invalid_idx{}; - __device__ void operator()(size_t i) const + __device__ size_t operator()(size_t i) const { - auto num_valid_samples = sample_counts[i]; - for (size_t j = 0; j < num_valid_samples; ++j) { // copy - *(output_first + K * i + j) = *(input_first + sample_displacements[i] + j); - } - for (size_t j = num_valid_samples; j < K; ++j) { // fill - *(output_first + K * i + j) = invalid_value; - } + auto first = sample_local_nbr_indices.begin() + i * K; + return static_cast( + thrust::distance(first, thrust::find(thrust::seq, first, first + K, invalid_idx))); + } +}; + +struct count_t { + raft::device_span sample_counts{}; + + __device__ void operator()(size_t key_idx) const + { + cuda::std::atomic_ref counter(sample_counts[key_idx]); + counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed); + } +}; + +template +struct copy_with_counter_t { + raft::device_span sample_counts{}; + thrust::optional> sample_offsets{}; + Iterator output_first{}; + size_t K{}; + + __device__ void operator()(thrust::tuple pair) const + { + auto e_op_result = thrust::get<0>(pair); + auto key_idx = thrust::get<1>(pair); + cuda::std::atomic_ref counter(*(sample_counts.begin() + key_idx)); + auto sample_idx = counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed); + auto output_offset = (sample_offsets ? (*sample_offsets)[key_idx] : key_idx * K) + sample_idx; + *(output_first + output_offset) = e_op_result; + } +}; + +template +struct check_invalid_t { + edge_t invalid_idx{}; + + __device__ bool operator()(thrust::tuple pair) const + { + return thrust::get<0>(pair) == invalid_idx; } }; @@ -281,6 +298,11 @@ per_v_random_select_transform_e(raft::handle_t const& handle, CUGRAPH_EXPECTS(K >= size_t{1}, "Invalid input argument: invalid K, K should be a positive integer."); + auto col_comm_size = + GraphViewType::is_multi_gpu + ? handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()).get_size() + : int{1}; + if (do_expensive_check) { // FIXME: better re-factor this check function? vertex_t const* frontier_vertex_first{nullptr}; @@ -311,7 +333,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, auto frontier_key_last = frontier.end(); std::vector local_frontier_sizes{}; - if constexpr (GraphViewType::is_multi_gpu) { + if (col_comm_size > 1) { auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); local_frontier_sizes = host_scalar_allgather( col_comm, @@ -330,11 +352,11 @@ per_v_random_select_transform_e(raft::handle_t const& handle, // 1. aggregate frontier auto aggregate_local_frontier_keys = - GraphViewType::is_multi_gpu + (col_comm_size > 1) ? std::make_optional( local_frontier_displacements.back() + local_frontier_sizes.back(), handle.get_stream()) : std::nullopt; - if constexpr (GraphViewType::is_multi_gpu) { + if (col_comm_size > 1) { auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); device_allgatherv(col_comm, frontier_key_first, @@ -347,7 +369,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, // 2. compute degrees auto aggregate_local_frontier_local_degrees = - GraphViewType::is_multi_gpu + (col_comm_size > 1) ? std::make_optional>( local_frontier_displacements.back() + local_frontier_sizes.back(), handle.get_stream()) : std::nullopt; @@ -360,8 +382,8 @@ per_v_random_select_transform_e(raft::handle_t const& handle, vertex_t const* edge_partition_frontier_major_first{nullptr}; auto edge_partition_frontier_key_first = - (GraphViewType::is_multi_gpu ? get_dataframe_buffer_begin(*aggregate_local_frontier_keys) - : frontier_key_first) + + ((col_comm_size > 1) ? get_dataframe_buffer_begin(*aggregate_local_frontier_keys) + : frontier_key_first) + local_frontier_displacements[i]; if constexpr (std::is_same_v) { edge_partition_frontier_major_first = edge_partition_frontier_key_first; @@ -374,7 +396,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, local_frontier_sizes[i]), handle.get_stream()); - if constexpr (GraphViewType::is_multi_gpu) { + if (col_comm_size > 1) { // FIXME: this copy is unnecessary if edge_partition.compute_local_degrees() takes a pointer // to the output array thrust::copy( @@ -388,12 +410,11 @@ per_v_random_select_transform_e(raft::handle_t const& handle, } auto frontier_gathered_local_degrees = - GraphViewType::is_multi_gpu + (col_comm_size > 1) ? std::make_optional>(size_t{0}, handle.get_stream()) : std::nullopt; - if constexpr (GraphViewType::is_multi_gpu) { - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_size = col_comm.get_size(); + if (col_comm_size > 1) { + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); std::tie(frontier_gathered_local_degrees, std::ignore) = shuffle_values(col_comm, @@ -431,12 +452,11 @@ per_v_random_select_transform_e(raft::handle_t const& handle, sample_nbr_indices); // neighbor index within an edge partition (note that each vertex's // neighbors are distributed in col_comm_size partitions) std::optional> sample_key_indices{ - std::nullopt}; // relevant only when multi-GPU + std::nullopt}; // relevant only when (col_comm_size > 1) auto local_frontier_sample_counts = std::vector{}; auto local_frontier_sample_displacements = std::vector{}; - if constexpr (GraphViewType::is_multi_gpu) { - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_size = col_comm.get_size(); + if (col_comm_size > 1) { + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); sample_key_indices = rmm::device_uvector(sample_local_nbr_indices.size(), handle.get_stream()); @@ -507,10 +527,6 @@ per_v_random_select_transform_e(raft::handle_t const& handle, // 5. transform - auto sample_counts = - (!GraphViewType::is_multi_gpu && !invalid_value) - ? std::make_optional>(frontier.size(), handle.get_stream()) - : std::nullopt; auto sample_e_op_results = allocate_dataframe_buffer( local_frontier_sample_displacements.back() + local_frontier_sample_counts.back(), handle.get_stream()); @@ -520,8 +536,8 @@ per_v_random_select_transform_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_frontier_key_first = - (GraphViewType::is_multi_gpu ? get_dataframe_buffer_begin(*aggregate_local_frontier_keys) - : frontier_key_first) + + ((col_comm_size > 1) ? get_dataframe_buffer_begin(*aggregate_local_frontier_keys) + : frontier_key_first) + local_frontier_displacements[i]; auto edge_partition_sample_local_nbr_index_first = sample_local_nbr_indices.begin() + local_frontier_sample_displacements[i]; @@ -542,97 +558,59 @@ per_v_random_select_transform_e(raft::handle_t const& handle, } auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, i); - if constexpr (GraphViewType::is_multi_gpu) { - thrust::sort_by_key(handle.get_thrust_policy(), - (*sample_key_indices).begin() + local_frontier_sample_displacements[i], - (*sample_key_indices).begin() + local_frontier_sample_displacements[i] + - local_frontier_sample_counts[i], - edge_partition_sample_local_nbr_index_first); - auto num_unique_key_indices = - thrust::count_if(handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(local_frontier_sample_counts[i]), - is_first_in_run_t{(*sample_key_indices).data() + - local_frontier_sample_displacements[i]}); - rmm::device_uvector unique_key_indices(num_unique_key_indices, handle.get_stream()); - rmm::device_uvector unique_key_local_nbr_idx_counts(num_unique_key_indices, - handle.get_stream()); - thrust::reduce_by_key(handle.get_thrust_policy(), - (*sample_key_indices).begin() + local_frontier_sample_displacements[i], - (*sample_key_indices).begin() + local_frontier_sample_displacements[i] + - local_frontier_sample_counts[i], - thrust::make_constant_iterator(edge_t{1}), - unique_key_indices.begin(), - unique_key_local_nbr_idx_counts.begin()); - rmm::device_uvector unique_key_local_nbr_idx_offsets(num_unique_key_indices + 1, - handle.get_stream()); - unique_key_local_nbr_idx_offsets.set_element_to_zero_async(size_t{0}, handle.get_stream()); - thrust::inclusive_scan(handle.get_thrust_policy(), - unique_key_local_nbr_idx_counts.begin(), - unique_key_local_nbr_idx_counts.end(), - unique_key_local_nbr_idx_offsets.begin() + 1); - auto offset_first = unique_key_local_nbr_idx_offsets.begin(); + if (col_comm_size > 1) { + auto edge_partition_sample_key_index_first = + (*sample_key_indices).begin() + local_frontier_sample_displacements[i]; thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(unique_key_indices.size()), - transform_and_count_local_nbr_indices_t{edge_partition, - unique_key_indices.begin(), - edge_partition_frontier_key_first, - offset_first, - edge_partition_sample_local_nbr_index_first, - edge_partition_sample_e_op_result_first, - thrust::nullopt, - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - e_op, - cugraph::ops::gnn::graph::INVALID_ID, - to_thrust_optional(invalid_value)}); + thrust::make_counting_iterator(local_frontier_sample_counts[i]), + transform_local_nbr_indices_t{ + edge_partition, + thrust::make_optional(edge_partition_sample_key_index_first), + edge_partition_frontier_key_first, + edge_partition_sample_local_nbr_index_first, + edge_partition_sample_e_op_result_first, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + e_op, + cugraph::ops::gnn::graph::INVALID_ID, + to_thrust_optional(invalid_value), + K}); } else { - auto offset_first = thrust::make_transform_iterator(thrust::make_counting_iterator(size_t{0}), - multiplier_t{K}); thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(frontier.size()), - transform_and_count_local_nbr_indices_t< - GraphViewType, - decltype(thrust::make_counting_iterator(size_t{0})), - decltype(edge_partition_frontier_key_first), - decltype(offset_first), - decltype(edge_partition_sample_local_nbr_index_first), - decltype(edge_partition_sample_e_op_result_first), - size_t*, - edge_partition_src_input_device_view_t, - edge_partition_dst_input_device_view_t, - edge_partition_e_input_device_view_t, - EdgeOp, - T>{edge_partition, - thrust::make_counting_iterator(size_t{0}), - edge_partition_frontier_key_first, - offset_first, - edge_partition_sample_local_nbr_index_first, - edge_partition_sample_e_op_result_first, - sample_counts ? thrust::optional((*sample_counts).data()) : thrust::nullopt, - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - e_op, - cugraph::ops::gnn::graph::INVALID_ID, - to_thrust_optional(invalid_value)}); + thrust::make_counting_iterator(frontier.size() * K), + transform_local_nbr_indices_t{edge_partition, + thrust::nullopt, + edge_partition_frontier_key_first, + edge_partition_sample_local_nbr_index_first, + edge_partition_sample_e_op_result_first, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + e_op, + cugraph::ops::gnn::graph::INVALID_ID, + to_thrust_optional(invalid_value), + K}); } } @@ -641,81 +619,93 @@ per_v_random_select_transform_e(raft::handle_t const& handle, auto sample_offsets = invalid_value ? std::nullopt : std::make_optional>( frontier.size() + 1, handle.get_stream()); - if (GraphViewType::is_multi_gpu) { + if (col_comm_size > 1) { auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - std::vector rx_counts{}; - std::tie(sample_e_op_results, rx_counts) = + std::tie(sample_e_op_results, std::ignore) = shuffle_values(col_comm, get_dataframe_buffer_begin(sample_e_op_results), local_frontier_sample_counts, handle.get_stream()); std::tie(sample_key_indices, std::ignore) = shuffle_values( col_comm, (*sample_key_indices).begin(), local_frontier_sample_counts, handle.get_stream()); - // FIXME: better refactor this sort-and-reduce-by-key - thrust::sort_by_key(handle.get_thrust_policy(), - (*sample_key_indices).begin(), - (*sample_key_indices).end(), - get_dataframe_buffer_begin(sample_e_op_results)); - auto num_unique_key_indices = - thrust::count_if(handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator((*sample_key_indices).size()), - is_first_in_run_t{(*sample_key_indices).data()}); - rmm::device_uvector unique_key_indices(num_unique_key_indices, handle.get_stream()); - rmm::device_uvector unique_key_sample_counts(num_unique_key_indices, - handle.get_stream()); - thrust::reduce_by_key(handle.get_thrust_policy(), - (*sample_key_indices).begin(), - (*sample_key_indices).end(), - thrust::make_constant_iterator(edge_t{1}), - unique_key_indices.begin(), - unique_key_sample_counts.begin()); - sample_counts = rmm::device_uvector(frontier.size(), handle.get_stream()); - thrust::fill( - handle.get_thrust_policy(), (*sample_counts).begin(), (*sample_counts).end(), size_t{0}); - thrust::scatter(handle.get_thrust_policy(), - unique_key_sample_counts.begin(), - unique_key_sample_counts.end(), - unique_key_indices.begin(), - (*sample_counts).begin()); + + rmm::device_uvector sample_counts(frontier.size(), handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), sample_counts.begin(), sample_counts.end(), size_t{0}); + auto input_pair_first = thrust::make_zip_iterator(thrust::make_tuple( + get_dataframe_buffer_begin(sample_e_op_results), (*sample_key_indices).begin())); if (invalid_value) { - rmm::device_uvector sample_displacements((*sample_counts).size(), - handle.get_stream()); - thrust::exclusive_scan(handle.get_thrust_policy(), - (*sample_counts).begin(), - (*sample_counts).end(), - sample_displacements.begin()); auto tmp_sample_e_op_results = allocate_dataframe_buffer(frontier.size() * K, handle.get_stream()); - auto input_first = get_dataframe_buffer_begin(sample_e_op_results); - auto output_first = get_dataframe_buffer_begin(tmp_sample_e_op_results); - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(frontier.size()), - copy_and_fill_sample_e_op_results_t{ - raft::device_span((*sample_counts).data(), (*sample_counts).size()), - raft::device_span(sample_displacements.data(), sample_displacements.size()), - input_first, - output_first, - K, - *invalid_value}); + auto tmp_sample_e_op_result_first = get_dataframe_buffer_begin(tmp_sample_e_op_results); + thrust::fill(handle.get_thrust_policy(), + get_dataframe_buffer_begin(tmp_sample_e_op_results), + get_dataframe_buffer_end(tmp_sample_e_op_results), + *invalid_value); + thrust::for_each(handle.get_thrust_policy(), + input_pair_first, + input_pair_first + size_dataframe_buffer(sample_e_op_results), + copy_with_counter_t{ + raft::device_span(sample_counts.data(), sample_counts.size()), + thrust::nullopt, + tmp_sample_e_op_result_first, + K}); sample_e_op_results = std::move(tmp_sample_e_op_results); } else { + thrust::for_each( + handle.get_thrust_policy(), + (*sample_key_indices).begin(), + (*sample_key_indices).end(), + count_t{raft::device_span(sample_counts.data(), sample_counts.size())}); (*sample_offsets).set_element_to_zero_async(size_t{0}, handle.get_stream()); thrust::inclusive_scan(handle.get_thrust_policy(), - (*sample_counts).begin(), - (*sample_counts).end(), + sample_counts.begin(), + sample_counts.end(), (*sample_offsets).begin() + 1); + auto tmp_sample_e_op_results = allocate_dataframe_buffer( + (*sample_offsets).back_element(handle.get_stream()), handle.get_stream()); + auto tmp_sample_e_op_result_first = get_dataframe_buffer_begin(tmp_sample_e_op_results); + thrust::fill( + handle.get_thrust_policy(), sample_counts.begin(), sample_counts.end(), size_t{0}); + thrust::for_each( + handle.get_thrust_policy(), + input_pair_first, + input_pair_first + size_dataframe_buffer(sample_e_op_results), + copy_with_counter_t{ + raft::device_span(sample_counts.data(), sample_counts.size()), + raft::device_span((*sample_offsets).data(), (*sample_offsets).size()), + tmp_sample_e_op_result_first, + K}); + sample_e_op_results = std::move(tmp_sample_e_op_results); } } else { if (!invalid_value) { + rmm::device_uvector sample_counts(frontier.size(), handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + sample_counts.begin(), + sample_counts.end(), + count_valids_t{raft::device_span(sample_local_nbr_indices.data(), + sample_local_nbr_indices.size()), + K, + cugraph::ops::gnn::graph::INVALID_ID}); (*sample_offsets).set_element_to_zero_async(size_t{0}, handle.get_stream()); thrust::inclusive_scan(handle.get_thrust_policy(), - (*sample_counts).begin(), - (*sample_counts).end(), + sample_counts.begin(), + sample_counts.end(), (*sample_offsets).begin() + 1); + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( + sample_local_nbr_indices.begin(), get_dataframe_buffer_begin(sample_e_op_results))); + resize_dataframe_buffer( + sample_e_op_results, + thrust::distance(pair_first, + thrust::remove_if(handle.get_thrust_policy(), + pair_first, + pair_first + sample_local_nbr_indices.size(), + check_invalid_t{ + cugraph::ops::gnn::graph::INVALID_ID})), + handle.get_stream()); + shrink_to_fit_dataframe_buffer(sample_e_op_results, handle.get_stream()); } } diff --git a/cpp/tests/prims/mg_count_if_e.cu b/cpp/tests/prims/mg_count_if_e.cu index 4689a8bedaa..dd49c4ff5bf 100644 --- a/cpp/tests/prims/mg_count_if_e.cu +++ b/cpp/tests/prims/mg_count_if_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -193,6 +193,22 @@ TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeFalse) cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int64FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test, false>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGCountIfE_Rmat, CheckInt64Int64FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test, false>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + TEST_P(Tests_MGCountIfE_File, CheckInt32Int32FloatTupleIntFloatTransposeTrue) { auto param = GetParam(); @@ -208,6 +224,22 @@ TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeTrue) cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int64FloatTupleIntFloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test, true>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGCountIfE_Rmat, CheckInt64Int64FloatTupleIntFloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test, true>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + TEST_P(Tests_MGCountIfE_File, CheckInt32Int32FloatTransposeFalse) { auto param = GetParam(); @@ -222,6 +254,22 @@ TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int32FloatTransposeFalse) cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int64FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGCountIfE_Rmat, CheckInt64Int64FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + TEST_P(Tests_MGCountIfE_File, CheckInt32Int32FloatTransposeTrue) { auto param = GetParam(); @@ -236,6 +284,22 @@ TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int32FloatTransposeTrue) cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int64FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGCountIfE_Rmat, CheckInt64Int64FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGCountIfE_File, @@ -254,7 +318,11 @@ INSTANTIATE_TEST_SUITE_P( 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); INSTANTIATE_TEST_SUITE_P( - rmat_large_test, + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ Tests_MGCountIfE_Rmat, ::testing::Combine(::testing::Values(Prims_Usecase{false}), ::testing::Values(cugraph::test::Rmat_Usecase( diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu index 0048e1dd73b..3af09953154 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu @@ -246,6 +246,22 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32FloatTup cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64FloatTupleIntFloat) +{ + auto param = GetParam(); + run_current_test>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64FloatTupleIntFloat) +{ + auto param = GetParam(); + run_current_test>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32Float) { auto param = GetParam(); @@ -260,6 +276,22 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32Float) cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64Float) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64Float) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGPerVPairTransformDstNbrIntersection_File, @@ -278,7 +310,11 @@ INSTANTIATE_TEST_SUITE_P( 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); INSTANTIATE_TEST_SUITE_P( - rmat_large_test, + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ Tests_MGPerVPairTransformDstNbrIntersection_Rmat, ::testing::Combine(::testing::Values(Prims_Usecase{size_t{1024 * 1024}, false}), ::testing::Values(cugraph::test::Rmat_Usecase( diff --git a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu index 06028bfbc7c..42ab506562c 100644 --- a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,13 +32,21 @@ #include #include #include +#if 1 // for random seed selection +#include +#endif +#include #include #include #include #include #include #include +#if 1 // for random seed selection +#include +#include +#endif #include @@ -68,6 +76,7 @@ struct e_op_t { }; struct Prims_Usecase { + size_t num_seeds{0}; size_t K{0}; bool with_replacement{false}; bool use_invalid_value{false}; @@ -81,7 +90,16 @@ class Tests_MGPerVRandomSelectTransformOutgoingE public: Tests_MGPerVRandomSelectTransformOutgoingE() {} - static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } + static void SetUpTestCase() + { + handle_ = cugraph::test::initialize_mg_handle(); +#if 1 // FIXME: for benchmarking, delete once benchmarking is finished. + cugraph::test::enforce_p2p_initialization(handle_->get_comms(), handle_->get_stream()); + cugraph::test::enforce_p2p_initialization( + handle_->get_subcomm(cugraph::partition_2d::key_naming_t().col_name()), + handle_->get_stream()); +#endif + } static void TearDownTestCase() { handle_.reset(); } @@ -131,13 +149,59 @@ class Tests_MGPerVRandomSelectTransformOutgoingE auto mg_dst_prop = cugraph::test::generate::dst_property( *handle_, mg_graph_view, mg_vertex_prop); + // FIXME: better refactor this random seed generation code for reuse +#if 1 auto mg_vertex_buffer = rmm::device_uvector( mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); thrust::sequence(handle_->get_thrust_policy(), - cugraph::get_dataframe_buffer_begin(mg_vertex_buffer), - cugraph::get_dataframe_buffer_end(mg_vertex_buffer), + mg_vertex_buffer.begin(), + mg_vertex_buffer.end(), mg_graph_view.local_vertex_partition_range_first()); + thrust::shuffle(handle_->get_thrust_policy(), + mg_vertex_buffer.begin(), + mg_vertex_buffer.end(), + thrust::default_random_engine()); + + std::vector tx_value_counts(comm_size); + for (int i = 0; i < comm_size; ++i) { + tx_value_counts[i] = + mg_vertex_buffer.size() / comm_size + + (static_cast(i) < static_cast(mg_vertex_buffer.size() % comm_size) ? 1 : 0); + } + std::tie(mg_vertex_buffer, std::ignore) = cugraph::shuffle_values( + handle_->get_comms(), mg_vertex_buffer.begin(), tx_value_counts, handle_->get_stream()); + thrust::shuffle(handle_->get_thrust_policy(), + mg_vertex_buffer.begin(), + mg_vertex_buffer.end(), + thrust::default_random_engine()); + + auto num_seeds = + std::min(prims_usecase.num_seeds, static_cast(mg_graph_view.number_of_vertices())); + auto num_seeds_this_gpu = + num_seeds / comm_size + + (static_cast(comm_rank) < static_cast(num_seeds % comm_size ? 1 : 0)); + + auto buffer_sizes = cugraph::host_scalar_allgather( + handle_->get_comms(), mg_vertex_buffer.size(), handle_->get_stream()); + auto min_buffer_size = *std::min_element(buffer_sizes.begin(), buffer_sizes.end()); + if (min_buffer_size <= num_seeds / comm_size) { + auto new_sizes = std::vector(comm_size, min_buffer_size); + auto num_deficits = num_seeds - min_buffer_size * comm_size; + for (int i = 0; i < comm_size; ++i) { + auto delta = std::min(num_deficits, mg_vertex_buffer.size() - new_sizes[i]); + new_sizes[i] += delta; + num_deficits -= delta; + } + num_seeds_this_gpu = new_sizes[comm_rank]; + } + mg_vertex_buffer.resize(num_seeds_this_gpu, handle_->get_stream()); + mg_vertex_buffer.shrink_to_fit(handle_->get_stream()); + + mg_vertex_buffer = cugraph::detail::shuffle_int_vertices_to_local_gpu_by_vertex_partitioning( + *handle_, std::move(mg_vertex_buffer), mg_graph_view.vertex_partition_range_lasts()); +#endif + constexpr size_t bucket_idx_cur = 0; constexpr size_t num_buckets = 1; @@ -355,6 +419,22 @@ TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, CheckInt32Int32FloatTupl cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, CheckInt32Int64FloatTupleIntFloat) +{ + auto param = GetParam(); + run_current_test>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, CheckInt64Int64FloatTupleIntFloat) +{ + auto param = GetParam(); + run_current_test>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_File, CheckInt32Int32Float) { auto param = GetParam(); @@ -369,14 +449,30 @@ TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, CheckInt32Int32Float) cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, CheckInt32Int64Float) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, CheckInt64Int64Float) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGPerVRandomSelectTransformOutgoingE_File, ::testing::Combine( - ::testing::Values(Prims_Usecase{size_t{4}, false, false, false, true}, - Prims_Usecase{size_t{4}, false, true, false, true}, - Prims_Usecase{size_t{4}, true, false, false, true}, - Prims_Usecase{size_t{4}, true, true, false, true}), + ::testing::Values(Prims_Usecase{size_t{1000}, size_t{4}, false, false, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, false, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), @@ -385,21 +481,27 @@ INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P( rmat_small_test, Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, - ::testing::Combine(::testing::Values(Prims_Usecase{size_t{4}, false, false, false, true}, - Prims_Usecase{size_t{4}, false, true, false, true}, - Prims_Usecase{size_t{4}, true, false, false, true}, - Prims_Usecase{size_t{4}, true, true, false, true}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); + ::testing::Combine( + ::testing::Values(Prims_Usecase{size_t{1000}, size_t{4}, false, false, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, false, true}), + ::testing::Values( + cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); INSTANTIATE_TEST_SUITE_P( - rmat_large_test, + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, - ::testing::Combine(::testing::Values(Prims_Usecase{size_t{4}, false, false, false, false}, - Prims_Usecase{size_t{4}, false, true, false, false}, - Prims_Usecase{size_t{4}, true, false, false, false}, - Prims_Usecase{size_t{4}, true, true, false, false}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); + ::testing::Combine( + ::testing::Values(Prims_Usecase{size_t{10000000}, size_t{25}, false, false, false, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, false, true, false, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, true, false, false, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, true, true, false, false}), + ::testing::Values( + cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu index aec160c6703..5655e08081c 100644 --- a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu @@ -463,6 +463,24 @@ TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, + CheckInt32Int64FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test, false>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, + CheckInt64Int64FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test, false>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_File, CheckInt32Int32FloatTupleIntFloatTransposeTrue) { @@ -480,6 +498,24 @@ TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, + CheckInt32Int64FloatTupleIntFloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test, true>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, + CheckInt64Int64FloatTupleIntFloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test, true>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_File, CheckInt32Int32FloatTransposeFalse) { auto param = GetParam(); @@ -494,6 +530,22 @@ TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, CheckInt32Int32FloatTr cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, CheckInt32Int64FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, CheckInt64Int64FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_File, CheckInt32Int32FloatTransposeTrue) { auto param = GetParam(); @@ -508,6 +560,22 @@ TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, CheckInt32Int32FloatTr cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, CheckInt32Int64FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, CheckInt64Int64FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGPerVTransformReduceIncomingOutgoingE_File, @@ -526,7 +594,11 @@ INSTANTIATE_TEST_SUITE_P( 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); INSTANTIATE_TEST_SUITE_P( - rmat_large_test, + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, ::testing::Combine(::testing::Values(Prims_Usecase{false}), ::testing::Values(cugraph::test::Rmat_Usecase( diff --git a/cpp/tests/prims/mg_transform_reduce_e.cu b/cpp/tests/prims/mg_transform_reduce_e.cu index cd76226458c..af7d8e9051c 100644 --- a/cpp/tests/prims/mg_transform_reduce_e.cu +++ b/cpp/tests/prims/mg_transform_reduce_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -247,6 +247,22 @@ TEST_P(Tests_MGTransformReduceE_Rmat, CheckInt32Int32FloatTupleIntFloatTranspose cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGTransformReduceE_Rmat, CheckInt32Int64FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test, false>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceE_Rmat, CheckInt64Int64FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test, false>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + TEST_P(Tests_MGTransformReduceE_File, CheckInt32Int32FloatTupleIntFloatTransposeTrue) { auto param = GetParam(); @@ -262,6 +278,22 @@ TEST_P(Tests_MGTransformReduceE_Rmat, CheckInt32Int32FloatTupleIntFloatTranspose cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGTransformReduceE_Rmat, CheckInt32Int64FloatTupleIntFloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test, true>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceE_Rmat, CheckInt64Int64FloatTupleIntFloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test, true>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + TEST_P(Tests_MGTransformReduceE_File, CheckInt32Int32FloatTransposeFalse) { auto param = GetParam(); @@ -276,6 +308,22 @@ TEST_P(Tests_MGTransformReduceE_Rmat, CheckInt32Int32FloatTransposeFalse) cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGTransformReduceE_Rmat, CheckInt32Int64FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceE_Rmat, CheckInt64Int64FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + TEST_P(Tests_MGTransformReduceE_File, CheckInt32Int32FloatTransposeTrue) { auto param = GetParam(); @@ -290,6 +338,22 @@ TEST_P(Tests_MGTransformReduceE_Rmat, CheckInt32Int32FloatTransposeTrue) cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGTransformReduceE_Rmat, CheckInt32Int64FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceE_Rmat, CheckInt64Int64FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGTransformReduceE_File, @@ -308,7 +372,11 @@ INSTANTIATE_TEST_SUITE_P( 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); INSTANTIATE_TEST_SUITE_P( - rmat_large_test, + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ Tests_MGTransformReduceE_Rmat, ::testing::Combine(::testing::Values(Prims_Usecase{false}), ::testing::Values(cugraph::test::Rmat_Usecase( diff --git a/cpp/tests/prims/mg_transform_reduce_v.cu b/cpp/tests/prims/mg_transform_reduce_v.cu index c290dfe3fd2..6ff9a5cf17d 100644 --- a/cpp/tests/prims/mg_transform_reduce_v.cu +++ b/cpp/tests/prims/mg_transform_reduce_v.cu @@ -280,6 +280,22 @@ TEST_P(Tests_MGTransformReduceV_Rmat, CheckInt32Int32FloatTupleIntFloatTranspose cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGTransformReduceV_Rmat, CheckInt32Int64FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test, false>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceV_Rmat, CheckInt64Int64FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test, false>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + TEST_P(Tests_MGTransformReduceV_File, CheckInt32Int32FloatTupleIntFloatTransposeTrue) { auto param = GetParam(); @@ -295,6 +311,22 @@ TEST_P(Tests_MGTransformReduceV_Rmat, CheckInt32Int32FloatTupleIntFloatTranspose cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGTransformReduceV_Rmat, CheckInt32Int64FloatTupleIntFloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test, true>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceV_Rmat, CheckInt64Int64FloatTupleIntFloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test, true>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + TEST_P(Tests_MGTransformReduceV_File, CheckInt32Int32FloatTransposeFalse) { auto param = GetParam(); @@ -309,6 +341,22 @@ TEST_P(Tests_MGTransformReduceV_Rmat, CheckInt32Int32FloatTransposeFalse) cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGTransformReduceV_Rmat, CheckInt32Int64FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceV_Rmat, CheckInt64Int64FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + TEST_P(Tests_MGTransformReduceV_File, CheckInt32Int32FloatTransposeTrue) { auto param = GetParam(); @@ -323,6 +371,22 @@ TEST_P(Tests_MGTransformReduceV_Rmat, CheckInt32Int32FloatTransposeTrue) cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGTransformReduceV_Rmat, CheckInt32Int64FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceV_Rmat, CheckInt64Int64FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGTransformReduceV_File, @@ -341,7 +405,11 @@ INSTANTIATE_TEST_SUITE_P( 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); INSTANTIATE_TEST_SUITE_P( - rmat_large_test, + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ Tests_MGTransformReduceV_Rmat, ::testing::Combine(::testing::Values(Prims_Usecase{false}), ::testing::Values(cugraph::test::Rmat_Usecase( diff --git a/cpp/tests/utilities/mg_utilities.cpp b/cpp/tests/utilities/mg_utilities.cpp index 7c898ee1924..a0475dcc734 100644 --- a/cpp/tests/utilities/mg_utilities.cpp +++ b/cpp/tests/utilities/mg_utilities.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -66,13 +66,12 @@ std::unique_ptr initialize_mg_handle(size_t pool_size) return std::move(handle); } -void enforce_p2p_initialization(raft::handle_t const& handle) +void enforce_p2p_initialization(raft::comms::comms_t const& comm, rmm::cuda_stream_view stream) { - auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); - rmm::device_uvector tx_ints(comm_size, handle.get_stream()); - rmm::device_uvector rx_ints(comm_size, handle.get_stream()); + rmm::device_uvector tx_ints(comm_size, stream); + rmm::device_uvector rx_ints(comm_size, stream); std::vector tx_sizes(comm_size, size_t{1}); std::vector tx_offsets(comm_size); std::iota(tx_offsets.begin(), tx_offsets.end(), size_t{0}); @@ -90,9 +89,9 @@ void enforce_p2p_initialization(raft::handle_t const& handle) rx_sizes, rx_offsets, rx_ranks, - handle.get_stream()); + stream); - handle.sync_stream(); + CUDA_TRY(cudaStreamSynchronize(stream)); } } // namespace test diff --git a/cpp/tests/utilities/mg_utilities.hpp b/cpp/tests/utilities/mg_utilities.hpp index cbaad5a023a..9f98245387d 100644 --- a/cpp/tests/utilities/mg_utilities.hpp +++ b/cpp/tests/utilities/mg_utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,7 @@ std::unique_ptr initialize_mg_handle(size_t pool_size = 64); // NCCL lazily initializes for P2P, and this enforces P2P initialization for better performance // measurements -void enforce_p2p_initialization(raft::handle_t const& handle); +void enforce_p2p_initialization(raft::comms::comms_t const& comm, rmm::cuda_stream_view stream); } // namespace test } // namespace cugraph diff --git a/cpp/tests/utilities/thrust_wrapper.cu b/cpp/tests/utilities/thrust_wrapper.cu index 802d12f2145..7ad2b4f1392 100644 --- a/cpp/tests/utilities/thrust_wrapper.cu +++ b/cpp/tests/utilities/thrust_wrapper.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -106,6 +106,11 @@ template std::tuple, rmm::device_uvector> s rmm::device_uvector const& keys, rmm::device_uvector const& values); +template std::tuple, rmm::device_uvector> sort_by_key( + raft::handle_t const& handle, + rmm::device_uvector const& keys, + rmm::device_uvector const& values); + template std::tuple, rmm::device_uvector> sort_by_key( raft::handle_t const& handle, rmm::device_uvector const& keys, @@ -117,6 +122,12 @@ sort_by_key(raft::handle_t const& handle, rmm::device_uvector const& keys, std::tuple, rmm::device_uvector> const& values); +template std::tuple, + std::tuple, rmm::device_uvector>> +sort_by_key(raft::handle_t const& handle, + rmm::device_uvector const& keys, + std::tuple, rmm::device_uvector> const& values); + template vertex_t max_element(raft::handle_t const& handle, raft::device_span vertices) {