Skip to content

Commit

Permalink
Use new sampling primitives (#2751)
Browse files Browse the repository at this point in the history
Closes #2581 
Closes #2582
Closes #2665

Update the neighborhood sampling algorithm to use the new neighborhood sampling primitive defined in #2703

Authors:
  - Chuck Hastings (https://github.com/ChuckHastings)

Approvers:
  - Seunghwa Kang (https://github.com/seunghwak)

URL: #2751
  • Loading branch information
ChuckHastings authored Oct 3, 2022
1 parent 26d47ff commit 83f27a1
Show file tree
Hide file tree
Showing 14 changed files with 524 additions and 1,547 deletions.
4 changes: 4 additions & 0 deletions cpp/include/cugraph/algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1316,6 +1316,10 @@ extract_ego(raft::handle_t const& handle,
* @brief returns random walks (RW) from starting sources, where each path is of given maximum
* length. Uniform distribution is assumed for the random engine.
*
* @deprecated This algorithm will be deprecated once all of the functionality is migrated
* to the newer APIS: uniform_random_walks(), biased_random_walks(), and
* node2vec_random_walks().
*
* @tparam graph_t Type of graph/view (typically, graph_view_t).
* @tparam index_t Type used to store indexing and sizes.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
Expand Down
15 changes: 15 additions & 0 deletions cpp/include/cugraph/detail/utility_wrappers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,21 @@ void uniform_random_fill(rmm::cuda_stream_view const& stream_view,
value_t max_value,
uint64_t seed);

/**
* @brief Fill a buffer with a constant value
*
* @tparam value_t type of the value to operate on
*
* @param [in] handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator,
* and handles to various CUDA libraries) to run graph algorithms.
* @param[out] d_value device array to fill
* @param[in] size number of elements in array
* @param[in] value value
*
*/
template <typename value_t>
void scalar_fill(raft::handle_t const& handle, value_t* d_value, size_t size, value_t value);

/**
* @brief Fill a buffer with a sequence of values
*
Expand Down
25 changes: 25 additions & 0 deletions cpp/src/detail/utility_wrappers.cu
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,26 @@ template void uniform_random_fill(rmm::cuda_stream_view const& stream_view,
double max_value,
uint64_t seed);

template <typename value_t>
void scalar_fill(raft::handle_t const& handle, value_t* d_value, size_t size, value_t value)
{
thrust::fill_n(handle.get_thrust_policy(), d_value, size, value);
}

template void scalar_fill(raft::handle_t const& handle,
int32_t* d_value,
size_t size,
int32_t value);

template void scalar_fill(raft::handle_t const& handle,
int64_t* d_value,
size_t size,
int64_t value);

template void scalar_fill(raft::handle_t const& handle, float* d_value, size_t size, float value);

template void scalar_fill(raft::handle_t const& handle, double* d_value, size_t size, double value);

template <typename value_t>
void sequence_fill(rmm::cuda_stream_view const& stream_view,
value_t* d_value,
Expand All @@ -79,6 +99,11 @@ template void sequence_fill(rmm::cuda_stream_view const& stream_view,
size_t size,
int64_t start_value);

template void sequence_fill(rmm::cuda_stream_view const& stream_view,
uint64_t* d_value,
size_t size,
uint64_t start_value);

template <typename vertex_t>
vertex_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view,
vertex_t const* d_edgelist_srcs,
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/prims/detail/extract_transform_v_frontier_e.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -813,14 +813,16 @@ extract_transform_v_frontier_e(raft::handle_t const& handle,
auto frontier_key_last = frontier.end();
auto frontier_keys = allocate_dataframe_buffer<key_t>(size_t{0}, handle.get_stream());
if constexpr (!VertexFrontierBucketType::is_sorted_unique) {
frontier_keys = resize_dataframe_buffer(frontier_keys, frontier.size(), handle.get_stream());
resize_dataframe_buffer(frontier_keys, frontier.size(), handle.get_stream());
thrust::copy(handle.get_thrust_policy(),
frontier_key_first,
frontier_key_last,
get_dataframe_buffer_begin(frontier_keys));
thrust::sort(handle.get_thrust_policy(),
get_dataframe_buffer_begin(frontier_keys),
get_dataframe_buffer_end(frontier_keys));
frontier_key_first = get_dataframe_buffer_begin(frontier_keys);
frontier_key_last = get_dataframe_buffer_end(frontier_keys);
thrust::sort(handle.get_thrust_policy(), frontier_key_first, frontier_key_last);
}

// 1. fill the buffers
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ extract_transform_v_frontier_outgoing_e(raft::handle_t const& handle,
static_assert(!GraphViewType::is_storage_transposed);

using e_op_result_t = typename evaluate_edge_op<GraphViewType,
key_t,
typename VertexFrontierBucketType::key_type,
EdgeSrcValueInputWrapper,
EdgeDstValueInputWrapper,
EdgeOp>::result_type;
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@

#include <prims/property_op_utils.cuh>

#include <cugraph/edge_partition_device_view.cuh>
#include <cugraph/edge_partition_endpoint_property_device_view.cuh>
#include <cugraph/utilities/dataframe_buffer.hpp>
#include <cugraph/utilities/host_scalar_comm.hpp>
#include <cugraph/utilities/misc_utils.cuh>
#include <cugraph/utilities/shuffle_comm.cuh>

Expand Down Expand Up @@ -127,7 +130,6 @@ struct transform_and_count_local_nbr_indices_t {
major = thrust::get<0>(key);
}
auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
printf("major=%d major_offste=%d\n", (int)major, (int)major_offset);
vertex_t const* indices{nullptr};
thrust::optional<weight_t const*> weights{thrust::nullopt};
[[maybe_unused]] edge_t local_degree{0};
Expand Down
138 changes: 27 additions & 111 deletions cpp/src/sampling/detail/graph_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,62 +25,6 @@ namespace detail {
// in implementation, naming and documentation. We should review these and
// consider updating things to support an arbitrary value for store_transposed

/**
* @brief Compute local out degrees of the majors belonging to the adjacency matrices
* stored on each gpu
*
* Iterate through partitions and store their local degrees
*
* @tparam GraphViewType Type of the passed non-owning graph object.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Non-owning graph object.
* @return A single vector containing the local out degrees of the majors belong to the adjacency
* matrices
*/
template <typename GraphViewType>
rmm::device_uvector<typename GraphViewType::edge_type> compute_local_major_degrees(
raft::handle_t const& handle, GraphViewType const& graph_view);

/**
* @brief Calculate global degree information for all vertices represented by current gpu
*
* Calculate local degree and perform row wise exclusive scan over all gpus in column
* communicator.
*
* @tparam GraphViewType Type of the passed non-owning graph object.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Non-owning graph object.
* @return Tuple of two device vectors. The first one contains per source edge-count encountered
* by gpus in the column communicator before current gpu. The second device vector contains the
* global out degree for every source represented by current gpu
*/
template <typename GraphViewType>
std::tuple<rmm::device_uvector<typename GraphViewType::edge_type>,
rmm::device_uvector<typename GraphViewType::edge_type>>
get_global_degree_information(raft::handle_t const& handle, GraphViewType const& graph_view);

/**
* @brief Calculate global adjacency offset for all majors represented by current gpu
*
* @tparam GraphViewType Type of the passed non-owning graph object.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Non-owning graph object.
* @param[in] global_degree_offsets Global degree offset to local adjacency list for every major
* represented by current gpu
* @param global_out_degrees Global out degrees for every source represented by current gpu
* @return Device vector containing the number of edges that are prior to the adjacency list of
* every major that can be represented by the current gpu
*/
template <typename GraphViewType>
rmm::device_uvector<typename GraphViewType::edge_type> get_global_adjacency_offset(
raft::handle_t const& handle,
GraphViewType const& graph_view,
const rmm::device_uvector<typename GraphViewType::edge_type>& global_degree_offsets,
const rmm::device_uvector<typename GraphViewType::edge_type>& global_out_degrees);

/**
* @brief Gather active majors across gpus in a column communicator
*
Expand All @@ -98,93 +42,65 @@ template <typename vertex_t>
rmm::device_uvector<vertex_t> allgather_active_majors(raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& d_in);

// FIXME: Need docs if this function survives
template <typename vertex_t, typename edge_t, typename weight_t>
std::tuple<rmm::device_uvector<vertex_t>,
rmm::device_uvector<vertex_t>,
rmm::device_uvector<weight_t>,
rmm::device_uvector<edge_t>>
count_and_remove_duplicates(raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& src,
rmm::device_uvector<vertex_t>&& dst,
rmm::device_uvector<weight_t>&& wgt);

/**
* @brief Return global out degrees of active majors
* @brief Gather edge list for specified vertices
*
* Get partition information of all graph partitions on the gpu and select
* global degrees of all active majors
* Collect all the edges that are present in the adjacency lists on the current gpu
*
* @tparam GraphViewType Type of the passed non-owning graph object.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Non-owning graph object.
* @param active_majors Device vector containing all the vertex id that are processed by
* gpus in the column communicator
* @param global_out_degrees Global out degrees for every source represented by current gpu
* @return Global out degrees of all majors in active_majors
*/
template <typename GraphViewType>
rmm::device_uvector<typename GraphViewType::edge_type> get_active_major_global_degrees(
raft::handle_t const& handle,
GraphViewType const& graph_view,
const rmm::device_uvector<typename GraphViewType::vertex_type>& active_majors,
const rmm::device_uvector<typename GraphViewType::edge_type>& global_out_degrees);

/**
* @brief Gather specified edges present on the current gpu
*
* Collect all the edges that are present in the adjacency lists on the current gpu
*
* @tparam GraphViewType Type of the passed non-owning graph object.
* @param[in] handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator,
* and handles to various CUDA libraries) to run graph algorithms.
* @param[in] graph_view Non-owning graph object.
* @param[in] active_majors Device vector containing all the vertex id that are processed by
* gpus in the column communicator
* @param[in] minor_map Device vector of minor indices (modifiable in-place) corresponding to
* vertex IDs being returned
* @param[in] indices_per_major Number of indices supplied for every major in the range
* [vertex_input_first, vertex_input_last)
* @param[in] global_degree_offsets Global degree offset to local adjacency list for every major
* represented by current gpu
* @return A tuple of device vector containing the majors, minors, and weights gathered
* locally
* @return A tuple of device vector containing the majors, minors and weights gathered locally
*/
template <typename GraphViewType>
std::tuple<rmm::device_uvector<typename GraphViewType::vertex_type>,
rmm::device_uvector<typename GraphViewType::vertex_type>,
std::optional<rmm::device_uvector<typename GraphViewType::weight_type>>>
gather_local_edges(
gather_one_hop_edgelist(
raft::handle_t const& handle,
GraphViewType const& graph_view,
const rmm::device_uvector<typename GraphViewType::vertex_type>& active_majors,
rmm::device_uvector<typename GraphViewType::edge_type>&& minor_map,
typename GraphViewType::edge_type indices_per_major,
const rmm::device_uvector<typename GraphViewType::edge_type>& global_degree_offsets,
bool remove_invalid_vertices = true);
bool do_expensive_check = false);

/**
* @brief Gather edge list for specified vertices
*
* Collect all the edges that are present in the adjacency lists on the current gpu
* @brief Randomly sample edges from the adjacency list of specified vertices
*
* @tparam GraphViewType Type of the passed non-owning graph object.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param rng_state Random number generator state
* @param graph_view Non-owning graph object.
* @param active_majors Device vector containing all the vertex id that are processed by
* gpus in the column communicator
* @param fanout How many edges to sample for each vertex
* @param with_replacement If true sample with replacement, otherwise sample without replacement
* @param invalid_vertex_id Value to use for an invalid vertex
* @return A tuple of device vector containing the majors, minors and weights gathered locally
*/
template <typename GraphViewType>
std::tuple<rmm::device_uvector<typename GraphViewType::vertex_type>,
rmm::device_uvector<typename GraphViewType::vertex_type>,
std::optional<rmm::device_uvector<typename GraphViewType::weight_type>>>
gather_one_hop_edgelist(
raft::handle_t const& handle,
GraphViewType const& graph_view,
const rmm::device_uvector<typename GraphViewType::vertex_type>& active_majors);

template <typename vertex_t, typename edge_t, typename weight_t>
std::tuple<rmm::device_uvector<vertex_t>,
rmm::device_uvector<vertex_t>,
rmm::device_uvector<weight_t>,
rmm::device_uvector<edge_t>>
count_and_remove_duplicates(raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& src,
rmm::device_uvector<vertex_t>&& dst,
rmm::device_uvector<weight_t>&& wgt);
sample_edges(raft::handle_t const& handle,
GraphViewType const& graph_view,
raft::random::RngState& rng_state,
rmm::device_uvector<typename GraphViewType::vertex_type> const& active_majors,
size_t fanout,
bool with_replacement);

} // namespace detail

} // namespace cugraph
Loading

0 comments on commit 83f27a1

Please sign in to comment.