Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update select_random_vertices to sample from a given distributed set or from (0, V] #3455

Merged
merged 22 commits into from
Apr 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions cpp/include/cugraph/graph_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -892,8 +892,9 @@ weight_t compute_total_edge_weight(
* or multi-GPU (true).
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Graph view object of the input graph to compute the maximum per-vertex outgoing
* edge weight sums.
* @param graph_view Graph view object of the input graph to select random vertices from.
* @param given_set Distributed set to sample from. If @p given_set is not specified, sample from
* the entire vertex range provided by @p graph_view.
naimnv marked this conversation as resolved.
Show resolved Hide resolved
* @param rng_state The RngState instance holding pseudo-random number generator state.
* @param select_count The number of vertices to select from the graph
* @param with_replacement If true, select with replacement, if false select without replacement
Expand All @@ -904,9 +905,11 @@ template <typename vertex_t, typename edge_t, bool store_transposed, bool multi_
rmm::device_uvector<vertex_t> select_random_vertices(
raft::handle_t const& handle,
graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
std::optional<raft::device_span<vertex_t const>> given_set,
raft::random::RngState& rng_state,
size_t select_count,
bool with_replacement,
bool sort_vertices);
bool sort_vertices,
bool do_expensive_check = false);

} // namespace cugraph
36 changes: 36 additions & 0 deletions cpp/include/cugraph/partition_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

#pragma once

#include <cugraph/utilities/host_scalar_comm.hpp>
#include <cugraph/utilities/shuffle_comm.cuh>

#include <raft/core/comms.hpp>
#include <raft/core/handle.hpp>

Expand Down Expand Up @@ -93,6 +96,39 @@ class partition_manager {
return std::string(map_major_comm_to_gpu_row_comm ? "gpu_col_comm" : "gpu_row_comm");
}

template <typename vertex_t>
static std::vector<vertex_t> compute_partition_range_lasts(raft::handle_t const& handle,
vertex_t local_partition_size)
{
auto& comm = handle.get_comms();
auto const comm_size = comm.get_size();
auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name());
auto const major_comm_size = major_comm.get_size();
auto const major_comm_rank = major_comm.get_rank();
auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
auto const minor_comm_size = minor_comm.get_size();
auto const minor_comm_rank = minor_comm.get_rank();

auto vertex_counts = host_scalar_allgather(comm, local_partition_size, handle.get_stream());
auto vertex_partition_ids =
host_scalar_allgather(comm,
partition_manager::compute_vertex_partition_id_from_graph_subcomm_ranks(
major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank),
handle.get_stream());

std::vector<vertex_t> vertex_partition_range_offsets(comm_size + 1, 0);
for (int i = 0; i < comm_size; ++i) {
vertex_partition_range_offsets[vertex_partition_ids[i]] = vertex_counts[i];
}
std::exclusive_scan(vertex_partition_range_offsets.begin(),
vertex_partition_range_offsets.end(),
vertex_partition_range_offsets.begin(),
vertex_t{0});

return std::vector<vertex_t>(vertex_partition_range_offsets.begin() + 1,
vertex_partition_range_offsets.end());
}

static void init_subcomm(raft::handle_t& handle, int gpu_row_comm_size)
{
auto& comm = handle.get_comms();
Expand Down
8 changes: 7 additions & 1 deletion cpp/src/c_api/random.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,13 @@ struct select_random_vertices_functor : public cugraph::c_api::abstract_functor
rmm::device_uvector<vertex_t> local_vertices(0, handle_.get_stream());

local_vertices = cugraph::select_random_vertices(
handle_, graph_view, rng_state_->rng_state_, num_vertices_, false, false);
handle_,
graph_view,
std::optional<raft::device_span<vertex_t const>>{std::nullopt},
rng_state_->rng_state_,
num_vertices_,
false,
false);

cugraph::unrenumber_int_vertices<vertex_t, multi_gpu>(
handle_,
Expand Down
146 changes: 121 additions & 25 deletions cpp/src/structure/select_random_vertices_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
#include <cugraph/detail/shuffle_wrappers.hpp>
#include <cugraph/detail/utility_wrappers.hpp>
#include <cugraph/graph_functions.hpp>
#include <cugraph/utilities/device_functors.cuh>
#include <cugraph/utilities/error.hpp>
#include <cugraph/utilities/host_scalar_comm.hpp>
#include <cugraph/utilities/shuffle_comm.cuh>
#include <detail/graph_partition_utils.cuh>

#include <raft/core/handle.hpp>
#include <rmm/device_scalar.hpp>
Expand All @@ -30,31 +32,66 @@
#include <cugraph-ops/graph/sampling.hpp>
#endif

#include <thrust/functional.h>
#include <thrust/gather.h>
#include <thrust/logical.h>
#include <thrust/sequence.h>
#include <thrust/sort.h>

namespace cugraph {
#include <chrono>
#include <cstdlib>
#include <iostream>
naimnv marked this conversation as resolved.
Show resolved Hide resolved

namespace cugraph {
template <typename vertex_t, typename edge_t, bool store_transposed, bool multi_gpu>
rmm::device_uvector<vertex_t> select_random_vertices(
raft::handle_t const& handle,
graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
std::optional<raft::device_span<vertex_t const>> given_set,
raft::random::RngState& rng_state,
size_t select_count,
bool with_replacement,
bool sort_vertices)
bool sort_vertices,
bool do_expensive_check)
{
CUGRAPH_EXPECTS(
with_replacement || select_count <= static_cast<size_t>(graph_view.number_of_vertices()),
"Invalid input arguments: select_count should not exceed the number of vertices if "
"with_replacement == false.");
size_t num_of_elements_in_given_set{0};
if (given_set) {
if (do_expensive_check) {
CUGRAPH_EXPECTS(static_cast<size_t>(thrust::count_if(
handle.get_thrust_policy(),
(*given_set).begin(),
(*given_set).begin() + (*given_set).size(),
detail::check_out_of_range_t<vertex_t>{
graph_view.local_vertex_partition_range_first(),
graph_view.local_vertex_partition_range_last()})) == size_t{0},
"Invalid input argument: vertex IDs in the given set must be within vertex "
"partition assigned to this GPU");
}
num_of_elements_in_given_set = static_cast<size_t>((*given_set).size());
if constexpr (multi_gpu) {
num_of_elements_in_given_set = host_scalar_allreduce(handle.get_comms(),
num_of_elements_in_given_set,
raft::comms::op_t::SUM,
handle.get_stream());
}
CUGRAPH_EXPECTS(
with_replacement || select_count <= num_of_elements_in_given_set,
"Invalid input arguments: select_count should not exceed the number of given vertices if "
"with_replacement == false.");
} else {
CUGRAPH_EXPECTS(
with_replacement || select_count <= static_cast<size_t>(graph_view.number_of_vertices()),
"Invalid input arguments: select_count should not exceed the number of vertices if "
"with_replacement == false.");
}

rmm::device_uvector<vertex_t> mg_sample_buffer(0, handle.get_stream());

size_t this_gpu_select_count{0};
if constexpr (multi_gpu) {
auto const comm_rank = handle.get_comms().get_rank();
auto const comm_size = handle.get_comms().get_size();
auto& comm = handle.get_comms();
auto const comm_size = comm.get_size();
auto const comm_rank = comm.get_rank();

this_gpu_select_count =
select_count / static_cast<size_t>(comm_size) +
Expand All @@ -64,26 +101,53 @@ rmm::device_uvector<vertex_t> select_random_vertices(
this_gpu_select_count = select_count;
}

std::vector<vertex_t> partition_range_lasts;

vertex_t local_int_vertex_first{0};
vertex_t local_int_vertex_last{given_set ? static_cast<vertex_t>(given_set->size())
: graph_view.number_of_vertices()};
naimnv marked this conversation as resolved.
Show resolved Hide resolved

if constexpr (multi_gpu) {
partition_range_lasts = given_set ? cugraph::partition_manager::compute_partition_range_lasts(
handle, static_cast<vertex_t>((*given_set).size()))
: graph_view.vertex_partition_range_lasts();

auto& comm = handle.get_comms();
auto const comm_size = comm.get_size();
auto const comm_rank = comm.get_rank();
auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name());
auto const major_comm_size = major_comm.get_size();
auto const major_comm_rank = major_comm.get_rank();
auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
auto const minor_comm_size = minor_comm.get_size();
auto const minor_comm_rank = minor_comm.get_rank();

auto vertex_partition_id =
partition_manager::compute_vertex_partition_id_from_graph_subcomm_ranks(
major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank);

local_int_vertex_first =
vertex_partition_id == 0 ? vertex_t{0} : partition_range_lasts[vertex_partition_id - 1];
local_int_vertex_last = partition_range_lasts[vertex_partition_id];
}

naimnv marked this conversation as resolved.
Show resolved Hide resolved
if (with_replacement) {
// FIXME: need to double check uniform_random_fill generates random numbers in [0, V) (not [0,
// V])
mg_sample_buffer.resize(this_gpu_select_count, handle.get_stream());
cugraph::detail::uniform_random_fill(handle.get_stream(),
mg_sample_buffer.data(),
mg_sample_buffer.size(),
vertex_t{0},
graph_view.number_of_vertices(),
given_set
? static_cast<vertex_t>(num_of_elements_in_given_set)
: graph_view.number_of_vertices(),
rng_state);
} else {
auto local_vertex_partition_range_first = graph_view.local_vertex_partition_range_first();
auto local_vertex_partition_range_last = graph_view.local_vertex_partition_range_last();

mg_sample_buffer = rmm::device_uvector<vertex_t>(
local_vertex_partition_range_last - local_vertex_partition_range_first, handle.get_stream());
mg_sample_buffer = rmm::device_uvector<vertex_t>(local_int_vertex_last - local_int_vertex_first,
handle.get_stream());
thrust::sequence(handle.get_thrust_policy(),
mg_sample_buffer.begin(),
mg_sample_buffer.end(),
local_vertex_partition_range_first);
local_int_vertex_first);

{ // random shuffle (use this instead of thrust::shuffle to use raft::random::RngState)
rmm::device_uvector<float> random_numbers(mg_sample_buffer.size(), handle.get_stream());
Expand All @@ -100,16 +164,37 @@ rmm::device_uvector<vertex_t> select_random_vertices(
}

if constexpr (multi_gpu) {
auto const comm_rank = handle.get_comms().get_rank();
auto const comm_size = handle.get_comms().get_size();
auto& comm = handle.get_comms();
auto const comm_size = comm.get_size();
auto const comm_rank = comm.get_rank();

std::vector<size_t> tx_value_counts(comm_size);
for (int i = 0; i < comm_size; ++i) {
tx_value_counts[i] =
mg_sample_buffer.size() / comm_size +
(static_cast<size_t>(i) < static_cast<size_t>(mg_sample_buffer.size() % comm_size) ? 1
: 0);
std::fill(
tx_value_counts.begin(), tx_value_counts.end(), mg_sample_buffer.size() / comm_size);

std::vector<vertex_t> h_random_numbers;
{
rmm::device_uvector<vertex_t> d_random_numbers(mg_sample_buffer.size() % comm_size,
handle.get_stream());
cugraph::detail::uniform_random_fill(handle.get_stream(),
d_random_numbers.data(),
d_random_numbers.size(),
vertex_t{0},
vertex_t{comm_size},
rng_state);

h_random_numbers.resize(d_random_numbers.size());

raft::update_host(h_random_numbers.data(),
d_random_numbers.data(),
d_random_numbers.size(),
handle.get_stream());
}

for (int i = 0; i < static_cast<int>(mg_sample_buffer.size() % comm_size); i++) {
tx_value_counts[h_random_numbers[i]]++;
}

std::tie(mg_sample_buffer, std::ignore) = cugraph::shuffle_values(
handle.get_comms(), mg_sample_buffer.begin(), tx_value_counts, handle.get_stream());

Expand Down Expand Up @@ -148,7 +233,18 @@ rmm::device_uvector<vertex_t> select_random_vertices(

if constexpr (multi_gpu) {
mg_sample_buffer = cugraph::detail::shuffle_int_vertices_to_local_gpu_by_vertex_partitioning(
handle, std::move(mg_sample_buffer), graph_view.vertex_partition_range_lasts());
handle, std::move(mg_sample_buffer), partition_range_lasts);
}

if (given_set) {
thrust::gather(
handle.get_thrust_policy(),
thrust::make_transform_iterator(
mg_sample_buffer.begin(), cugraph::detail::shift_left_t<vertex_t>{local_int_vertex_first}),
thrust::make_transform_iterator(
mg_sample_buffer.end(), cugraph::detail::shift_left_t<vertex_t>{local_int_vertex_first}),
(*given_set).begin(),
mg_sample_buffer.begin());
}

if (sort_vertices) {
Expand Down
24 changes: 18 additions & 6 deletions cpp/src/structure/select_random_vertices_mg.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,49 +21,61 @@ namespace cugraph {
template rmm::device_uvector<int32_t> select_random_vertices(
raft::handle_t const& handle,
graph_view_t<int32_t, int32_t, false, true> const& graph_view,
std::optional<raft::device_span<int32_t const>> given_set,
raft::random::RngState& rng_state,
size_t select_count,
bool with_replacement,
bool sort_vertices);
bool sort_vertices,
bool do_expensive_check);

template rmm::device_uvector<int32_t> select_random_vertices(
raft::handle_t const& handle,
graph_view_t<int32_t, int64_t, false, true> const& graph_view,
std::optional<raft::device_span<int32_t const>> given_set,
raft::random::RngState& rng_state,
size_t select_count,
bool with_replacement,
bool sort_vertices);
bool sort_vertices,
bool do_expensive_check);

template rmm::device_uvector<int64_t> select_random_vertices(
raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, false, true> const& graph_view,
std::optional<raft::device_span<int64_t const>> given_set,
raft::random::RngState& rng_state,
size_t select_count,
bool with_replacement,
bool sort_vertices);
bool sort_vertices,
bool do_expensive_check);

template rmm::device_uvector<int32_t> select_random_vertices(
raft::handle_t const& handle,
graph_view_t<int32_t, int32_t, true, true> const& graph_view,
std::optional<raft::device_span<int32_t const>> given_set,
raft::random::RngState& rng_state,
size_t select_count,
bool with_replacement,
bool sort_vertices);
bool sort_vertices,
bool do_expensive_check);

template rmm::device_uvector<int32_t> select_random_vertices(
raft::handle_t const& handle,
graph_view_t<int32_t, int64_t, true, true> const& graph_view,
std::optional<raft::device_span<int32_t const>> given_set,
raft::random::RngState& rng_state,
size_t select_count,
bool with_replacement,
bool sort_vertices);
bool sort_vertices,
bool do_expensive_check);

template rmm::device_uvector<int64_t> select_random_vertices(
raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, true, true> const& graph_view,
std::optional<raft::device_span<int64_t const>> given_set,
raft::random::RngState& rng_state,
size_t select_count,
bool with_replacement,
bool sort_vertices);
bool sort_vertices,
bool do_expensive_check);

} // namespace cugraph
Loading