Skip to content

Commit

Permalink
Update graph partitioning scheme (#1443)
Browse files Browse the repository at this point in the history
Partially addresses Issue #1442

Update graph partitioning scheme to better control memory footprint vs concurrency trade-offs for large-scale graph processing in large clusters. This new partitioning scheme also simplifies communication patterns among GPUs which can potentially improve scalability.

Authors:
  - Seunghwa Kang (https://github.com/seunghwak)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Alex Fender (https://github.com/afender)
  - Andrei Schaffer (https://github.com/aschaffer)

URL: #1443
  • Loading branch information
seunghwak authored Apr 6, 2021
1 parent a6edf62 commit 9a1ab09
Show file tree
Hide file tree
Showing 64 changed files with 5,481 additions and 2,841 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ add_library(cugraph SHARED
src/experimental/graph_view.cu
src/experimental/coarsen_graph.cu
src/experimental/renumber_edgelist.cu
src/experimental/renumber_utils.cu
src/experimental/relabel.cu
src/experimental/induced_subgraph.cu
src/experimental/bfs.cu
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/dendrogram.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class Dendrogram {
public:
void add_level(vertex_t first_index,
vertex_t num_verts,
cudaStream_t stream = 0,
cudaStream_t stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource())
{
level_ptr_.push_back(std::make_unique<rmm::device_uvector<vertex_t>>(num_verts, stream, mr));
Expand Down
80 changes: 28 additions & 52 deletions cpp/include/experimental/detail/graph_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -56,65 +56,32 @@ rmm::device_uvector<edge_t> compute_major_degrees(
rmm::device_uvector<edge_t> degrees(0, handle.get_stream());

vertex_t max_num_local_degrees{0};
for (int i = 0; i < (partition.is_hypergraph_partitioned() ? col_comm_size : row_comm_size);
++i) {
auto vertex_partition_idx = partition.is_hypergraph_partitioned()
? static_cast<size_t>(i * row_comm_size + row_comm_rank)
: static_cast<size_t>(col_comm_rank * row_comm_size + i);
for (int i = 0; i < col_comm_size; ++i) {
auto vertex_partition_idx = static_cast<size_t>(i * row_comm_size + row_comm_rank);
auto vertex_partition_size = partition.get_vertex_partition_size(vertex_partition_idx);
max_num_local_degrees = std::max(max_num_local_degrees, vertex_partition_size);
if (i == (partition.is_hypergraph_partitioned() ? col_comm_rank : row_comm_rank)) {
degrees.resize(vertex_partition_size, handle.get_stream());
}
if (i == col_comm_rank) { degrees.resize(vertex_partition_size, handle.get_stream()); }
}
local_degrees.resize(max_num_local_degrees, handle.get_stream());
for (int i = 0; i < (partition.is_hypergraph_partitioned() ? col_comm_size : row_comm_size);
++i) {
auto vertex_partition_idx = partition.is_hypergraph_partitioned()
? static_cast<size_t>(i * row_comm_size + row_comm_rank)
: static_cast<size_t>(col_comm_rank * row_comm_size + i);
for (int i = 0; i < col_comm_size; ++i) {
auto vertex_partition_idx = static_cast<size_t>(i * row_comm_size + row_comm_rank);
vertex_t major_first{};
vertex_t major_last{};
std::tie(major_first, major_last) = partition.get_vertex_partition_range(vertex_partition_idx);
auto p_offsets =
partition.is_hypergraph_partitioned()
? adj_matrix_partition_offsets[i]
: adj_matrix_partition_offsets[0] +
(major_first - partition.get_vertex_partition_first(col_comm_rank * row_comm_size));
auto p_offsets = adj_matrix_partition_offsets[i];
thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::make_counting_iterator(vertex_t{0}),
thrust::make_counting_iterator(major_last - major_first),
local_degrees.data(),
[p_offsets] __device__(auto i) { return p_offsets[i + 1] - p_offsets[i]; });
if (partition.is_hypergraph_partitioned()) {
col_comm.reduce(local_degrees.data(),
i == col_comm_rank ? degrees.data() : static_cast<edge_t *>(nullptr),
static_cast<size_t>(major_last - major_first),
raft::comms::op_t::SUM,
i,
handle.get_stream());
} else {
row_comm.reduce(local_degrees.data(),
i == row_comm_rank ? degrees.data() : static_cast<edge_t *>(nullptr),
static_cast<size_t>(major_last - major_first),
raft::comms::op_t::SUM,
i,
handle.get_stream());
}
col_comm.reduce(local_degrees.data(),
i == col_comm_rank ? degrees.data() : static_cast<edge_t *>(nullptr),
static_cast<size_t>(major_last - major_first),
raft::comms::op_t::SUM,
i,
handle.get_stream());
}

raft::comms::status_t status{};
if (partition.is_hypergraph_partitioned()) {
status =
col_comm.sync_stream(handle.get_stream()); // this is neessary as local_degrees will become
// out-of-scope once this function returns.
} else {
status =
row_comm.sync_stream(handle.get_stream()); // this is neessary as local_degrees will become
// out-of-scope once this function returns.
}
CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure.");

return degrees;
}

Expand Down Expand Up @@ -170,7 +137,6 @@ struct compute_gpu_id_from_vertex_t {

template <typename vertex_t>
struct compute_gpu_id_from_edge_t {
bool hypergraph_partitioned{false};
int comm_size{0};
int row_comm_size{0};
int col_comm_size{0};
Expand All @@ -180,12 +146,22 @@ struct compute_gpu_id_from_edge_t {
cuco::detail::MurmurHash3_32<vertex_t> hash_func{};
auto major_comm_rank = static_cast<int>(hash_func(major) % comm_size);
auto minor_comm_rank = static_cast<int>(hash_func(minor) % comm_size);
if (hypergraph_partitioned) {
return (minor_comm_rank / col_comm_size) * row_comm_size + (major_comm_rank % row_comm_size);
} else {
return (major_comm_rank - (major_comm_rank % row_comm_size)) +
(minor_comm_rank / col_comm_size);
}
return (minor_comm_rank / row_comm_size) * row_comm_size + (major_comm_rank % row_comm_size);
}
};

template <typename vertex_t>
struct compute_partition_id_from_edge_t {
int comm_size{0};
int row_comm_size{0};
int col_comm_size{0};

__device__ int operator()(vertex_t major, vertex_t minor) const
{
cuco::detail::MurmurHash3_32<vertex_t> hash_func{};
auto major_comm_rank = static_cast<int>(hash_func(major) % comm_size);
auto minor_comm_rank = static_cast<int>(hash_func(minor) % comm_size);
return major_comm_rank * col_comm_size + minor_comm_rank / row_comm_size;
}
};

Expand Down
14 changes: 14 additions & 0 deletions cpp/include/experimental/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,20 @@ template <typename edge_t>
struct invalid_edge_id : invalid_idx<edge_t> {
};

template <typename vertex_t>
__host__ __device__ std::enable_if_t<std::is_signed<vertex_t>::value, bool> is_valid_vertex(
vertex_t num_vertices, vertex_t v)
{
return (v >= 0) && (v < num_vertices);
}

template <typename vertex_t>
__host__ __device__ std::enable_if_t<std::is_unsigned<vertex_t>::value, bool> is_valid_vertex(
vertex_t num_vertices, vertex_t v)
{
return v < num_vertices;
}

} // namespace experimental
} // namespace cugraph

Expand Down
Loading

0 comments on commit 9a1ab09

Please sign in to comment.