Skip to content

Commit

Permalink
Sampling primitive performance optimization. (#3061)
Browse files Browse the repository at this point in the history
Improve performance for 1 & 2 GPU test cases (there will be follow-up PRs for larger scale testing/performance improvements)
Bug fix for the single GPU & invalid_value = thrust::nullopt case.

Authors:
  - Seunghwa Kang (https://github.com/seunghwak)

Approvers:
  - Naim (https://github.com/naimnv)
  - Chuck Hastings (https://github.com/ChuckHastings)

URL: #3061
  • Loading branch information
seunghwak authored Jan 12, 2023
1 parent a02e876 commit 9f5718b
Show file tree
Hide file tree
Showing 10 changed files with 668 additions and 254 deletions.
422 changes: 206 additions & 216 deletions cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh

Large diffs are not rendered by default.

72 changes: 70 additions & 2 deletions cpp/tests/prims/mg_count_if_e.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -193,6 +193,22 @@ TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeFalse)
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int64FloatTupleIntFloatTransposeFalse)
{
auto param = GetParam();
run_current_test<int32_t, int64_t, float, thrust::tuple<int, float>, false>(
std::get<0>(param),
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGCountIfE_Rmat, CheckInt64Int64FloatTupleIntFloatTransposeFalse)
{
auto param = GetParam();
run_current_test<int64_t, int64_t, float, thrust::tuple<int, float>, false>(
std::get<0>(param),
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGCountIfE_File, CheckInt32Int32FloatTupleIntFloatTransposeTrue)
{
auto param = GetParam();
Expand All @@ -208,6 +224,22 @@ TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeTrue)
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int64FloatTupleIntFloatTransposeTrue)
{
auto param = GetParam();
run_current_test<int32_t, int64_t, float, thrust::tuple<int, float>, true>(
std::get<0>(param),
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGCountIfE_Rmat, CheckInt64Int64FloatTupleIntFloatTransposeTrue)
{
auto param = GetParam();
run_current_test<int64_t, int64_t, float, thrust::tuple<int, float>, true>(
std::get<0>(param),
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGCountIfE_File, CheckInt32Int32FloatTransposeFalse)
{
auto param = GetParam();
Expand All @@ -222,6 +254,22 @@ TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int32FloatTransposeFalse)
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int64FloatTransposeFalse)
{
auto param = GetParam();
run_current_test<int32_t, int64_t, float, int, false>(
std::get<0>(param),
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGCountIfE_Rmat, CheckInt64Int64FloatTransposeFalse)
{
auto param = GetParam();
run_current_test<int64_t, int64_t, float, int, false>(
std::get<0>(param),
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGCountIfE_File, CheckInt32Int32FloatTransposeTrue)
{
auto param = GetParam();
Expand All @@ -236,6 +284,22 @@ TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int32FloatTransposeTrue)
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int64FloatTransposeTrue)
{
auto param = GetParam();
run_current_test<int32_t, int64_t, float, int, true>(
std::get<0>(param),
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGCountIfE_Rmat, CheckInt64Int64FloatTransposeTrue)
{
auto param = GetParam();
run_current_test<int64_t, int64_t, float, int, true>(
std::get<0>(param),
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

INSTANTIATE_TEST_SUITE_P(
file_test,
Tests_MGCountIfE_File,
Expand All @@ -254,7 +318,11 @@ INSTANTIATE_TEST_SUITE_P(
10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true))));

INSTANTIATE_TEST_SUITE_P(
rmat_large_test,
rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with
--gtest_filter to select only the rmat_benchmark_test with a specific
vertex & edge type combination) by command line arguments and do not
include more than one Rmat_Usecase that differ only in scale or edge
factor (to avoid running same benchmarks more than once) */
Tests_MGCountIfE_Rmat,
::testing::Combine(::testing::Values(Prims_Usecase{false}),
::testing::Values(cugraph::test::Rmat_Usecase(
Expand Down
38 changes: 37 additions & 1 deletion cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,22 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32FloatTup
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64FloatTupleIntFloat)
{
auto param = GetParam();
run_current_test<int32_t, int64_t, float, thrust::tuple<int, float>>(
std::get<0>(param),
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64FloatTupleIntFloat)
{
auto param = GetParam();
run_current_test<int64_t, int64_t, float, thrust::tuple<int, float>>(
std::get<0>(param),
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32Float)
{
auto param = GetParam();
Expand All @@ -260,6 +276,22 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32Float)
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64Float)
{
auto param = GetParam();
run_current_test<int32_t, int64_t, float, int>(
std::get<0>(param),
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64Float)
{
auto param = GetParam();
run_current_test<int64_t, int64_t, float, int>(
std::get<0>(param),
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

INSTANTIATE_TEST_SUITE_P(
file_test,
Tests_MGPerVPairTransformDstNbrIntersection_File,
Expand All @@ -278,7 +310,11 @@ INSTANTIATE_TEST_SUITE_P(
10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true))));

INSTANTIATE_TEST_SUITE_P(
rmat_large_test,
rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with
--gtest_filter to select only the rmat_benchmark_test with a specific
vertex & edge type combination) by command line arguments and do not
include more than one Rmat_Usecase that differ only in scale or edge
factor (to avoid running same benchmarks more than once) */
Tests_MGPerVPairTransformDstNbrIntersection_Rmat,
::testing::Combine(::testing::Values(Prims_Usecase{size_t{1024 * 1024}, false}),
::testing::Values(cugraph::test::Rmat_Usecase(
Expand Down
144 changes: 123 additions & 21 deletions cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -32,13 +32,21 @@
#include <cugraph/utilities/high_res_timer.hpp>
#include <cugraph/utilities/host_scalar_comm.hpp>
#include <cugraph/utilities/thrust_tuple_utils.hpp>
#if 1 // for random seed selection
#include <cugraph/utilities/shuffle_comm.cuh>
#endif

#include <raft/comms/comms.hpp>
#include <raft/comms/mpi_comms.hpp>
#include <raft/core/comms.hpp>
#include <raft/core/handle.hpp>
#include <rmm/device_uvector.hpp>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/tuple.h>
#if 1 // for random seed selection
#include <thrust/random.h>
#include <thrust/shuffle.h>
#endif

#include <gtest/gtest.h>

Expand Down Expand Up @@ -68,6 +76,7 @@ struct e_op_t {
};

struct Prims_Usecase {
size_t num_seeds{0};
size_t K{0};
bool with_replacement{false};
bool use_invalid_value{false};
Expand All @@ -81,7 +90,16 @@ class Tests_MGPerVRandomSelectTransformOutgoingE
public:
Tests_MGPerVRandomSelectTransformOutgoingE() {}

static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); }
static void SetUpTestCase()
{
handle_ = cugraph::test::initialize_mg_handle();
#if 1 // FIXME: for benchmarking, delete once benchmarking is finished.
cugraph::test::enforce_p2p_initialization(handle_->get_comms(), handle_->get_stream());
cugraph::test::enforce_p2p_initialization(
handle_->get_subcomm(cugraph::partition_2d::key_naming_t().col_name()),
handle_->get_stream());
#endif
}

static void TearDownTestCase() { handle_.reset(); }

Expand Down Expand Up @@ -131,13 +149,59 @@ class Tests_MGPerVRandomSelectTransformOutgoingE
auto mg_dst_prop = cugraph::test::generate<vertex_t, property_t>::dst_property(
*handle_, mg_graph_view, mg_vertex_prop);

// FIXME: better refactor this random seed generation code for reuse
#if 1
auto mg_vertex_buffer = rmm::device_uvector<vertex_t>(
mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream());
thrust::sequence(handle_->get_thrust_policy(),
cugraph::get_dataframe_buffer_begin(mg_vertex_buffer),
cugraph::get_dataframe_buffer_end(mg_vertex_buffer),
mg_vertex_buffer.begin(),
mg_vertex_buffer.end(),
mg_graph_view.local_vertex_partition_range_first());

thrust::shuffle(handle_->get_thrust_policy(),
mg_vertex_buffer.begin(),
mg_vertex_buffer.end(),
thrust::default_random_engine());

std::vector<size_t> tx_value_counts(comm_size);
for (int i = 0; i < comm_size; ++i) {
tx_value_counts[i] =
mg_vertex_buffer.size() / comm_size +
(static_cast<size_t>(i) < static_cast<size_t>(mg_vertex_buffer.size() % comm_size) ? 1 : 0);
}
std::tie(mg_vertex_buffer, std::ignore) = cugraph::shuffle_values(
handle_->get_comms(), mg_vertex_buffer.begin(), tx_value_counts, handle_->get_stream());
thrust::shuffle(handle_->get_thrust_policy(),
mg_vertex_buffer.begin(),
mg_vertex_buffer.end(),
thrust::default_random_engine());

auto num_seeds =
std::min(prims_usecase.num_seeds, static_cast<size_t>(mg_graph_view.number_of_vertices()));
auto num_seeds_this_gpu =
num_seeds / comm_size +
(static_cast<size_t>(comm_rank) < static_cast<size_t>(num_seeds % comm_size ? 1 : 0));

auto buffer_sizes = cugraph::host_scalar_allgather(
handle_->get_comms(), mg_vertex_buffer.size(), handle_->get_stream());
auto min_buffer_size = *std::min_element(buffer_sizes.begin(), buffer_sizes.end());
if (min_buffer_size <= num_seeds / comm_size) {
auto new_sizes = std::vector<size_t>(comm_size, min_buffer_size);
auto num_deficits = num_seeds - min_buffer_size * comm_size;
for (int i = 0; i < comm_size; ++i) {
auto delta = std::min(num_deficits, mg_vertex_buffer.size() - new_sizes[i]);
new_sizes[i] += delta;
num_deficits -= delta;
}
num_seeds_this_gpu = new_sizes[comm_rank];
}
mg_vertex_buffer.resize(num_seeds_this_gpu, handle_->get_stream());
mg_vertex_buffer.shrink_to_fit(handle_->get_stream());

mg_vertex_buffer = cugraph::detail::shuffle_int_vertices_to_local_gpu_by_vertex_partitioning(
*handle_, std::move(mg_vertex_buffer), mg_graph_view.vertex_partition_range_lasts());
#endif

constexpr size_t bucket_idx_cur = 0;
constexpr size_t num_buckets = 1;

Expand Down Expand Up @@ -355,6 +419,22 @@ TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, CheckInt32Int32FloatTupl
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, CheckInt32Int64FloatTupleIntFloat)
{
auto param = GetParam();
run_current_test<int32_t, int64_t, float, thrust::tuple<int, float>>(
std::get<0>(param),
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, CheckInt64Int64FloatTupleIntFloat)
{
auto param = GetParam();
run_current_test<int64_t, int64_t, float, thrust::tuple<int, float>>(
std::get<0>(param),
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_File, CheckInt32Int32Float)
{
auto param = GetParam();
Expand All @@ -369,14 +449,30 @@ TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, CheckInt32Int32Float)
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, CheckInt32Int64Float)
{
auto param = GetParam();
run_current_test<int32_t, int64_t, float, int>(
std::get<0>(param),
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, CheckInt64Int64Float)
{
auto param = GetParam();
run_current_test<int64_t, int64_t, float, int>(
std::get<0>(param),
cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

INSTANTIATE_TEST_SUITE_P(
file_test,
Tests_MGPerVRandomSelectTransformOutgoingE_File,
::testing::Combine(
::testing::Values(Prims_Usecase{size_t{4}, false, false, false, true},
Prims_Usecase{size_t{4}, false, true, false, true},
Prims_Usecase{size_t{4}, true, false, false, true},
Prims_Usecase{size_t{4}, true, true, false, true}),
::testing::Values(Prims_Usecase{size_t{1000}, size_t{4}, false, false, false, true},
Prims_Usecase{size_t{1000}, size_t{4}, false, true, false, true},
Prims_Usecase{size_t{1000}, size_t{4}, true, false, false, true},
Prims_Usecase{size_t{1000}, size_t{4}, true, true, false, true}),
::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
cugraph::test::File_Usecase("test/datasets/web-Google.mtx"),
cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"),
Expand All @@ -385,21 +481,27 @@ INSTANTIATE_TEST_SUITE_P(
INSTANTIATE_TEST_SUITE_P(
rmat_small_test,
Tests_MGPerVRandomSelectTransformOutgoingE_Rmat,
::testing::Combine(::testing::Values(Prims_Usecase{size_t{4}, false, false, false, true},
Prims_Usecase{size_t{4}, false, true, false, true},
Prims_Usecase{size_t{4}, true, false, false, true},
Prims_Usecase{size_t{4}, true, true, false, true}),
::testing::Values(cugraph::test::Rmat_Usecase(
10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true))));
::testing::Combine(
::testing::Values(Prims_Usecase{size_t{1000}, size_t{4}, false, false, false, true},
Prims_Usecase{size_t{1000}, size_t{4}, false, true, false, true},
Prims_Usecase{size_t{1000}, size_t{4}, true, false, false, true},
Prims_Usecase{size_t{1000}, size_t{4}, true, true, false, true}),
::testing::Values(
cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true))));

INSTANTIATE_TEST_SUITE_P(
rmat_large_test,
rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with
--gtest_filter to select only the rmat_benchmark_test with a specific
vertex & edge type combination) by command line arguments and do not
include more than one Rmat_Usecase that differ only in scale or edge
factor (to avoid running same benchmarks more than once) */
Tests_MGPerVRandomSelectTransformOutgoingE_Rmat,
::testing::Combine(::testing::Values(Prims_Usecase{size_t{4}, false, false, false, false},
Prims_Usecase{size_t{4}, false, true, false, false},
Prims_Usecase{size_t{4}, true, false, false, false},
Prims_Usecase{size_t{4}, true, true, false, false}),
::testing::Values(cugraph::test::Rmat_Usecase(
20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true))));
::testing::Combine(
::testing::Values(Prims_Usecase{size_t{10000000}, size_t{25}, false, false, false, false},
Prims_Usecase{size_t{10000000}, size_t{25}, false, true, false, false},
Prims_Usecase{size_t{10000000}, size_t{25}, true, false, false, false},
Prims_Usecase{size_t{10000000}, size_t{25}, true, true, false, false}),
::testing::Values(
cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true))));

CUGRAPH_MG_TEST_PROGRAM_MAIN()
Loading

0 comments on commit 9f5718b

Please sign in to comment.