Skip to content

Commit

Permalink
Improve memory scaling for low average vertex degree graphs & many GP…
Browse files Browse the repository at this point in the history
…Us (#1823)

Assuming sqrt(P) is an integer, matrix row/column property array sizes in each GPU scale as V/sqrt(P) while the storage requirement for graph edges scale as E/P. If E/V is small and P is large, the O(V/sqrt(P)) part will dominate the memory requirement and analyzing N times larger graphs will require N^2 times more GPUs; this is unacceptable. However, in this case, at most E/P elements of the V/sqrt(P) sized array will be accessed, so no need to store the whole V/sqrt(P) values. Instead, we can store row/column properties in (key, value) pairs limiting the memory requirement to be the minimum of V/sqrt(P) and E/P.

This PR supports storing matrix row/column properties in (key, value) pairs if the percentage of actually accessed elements is lower than the threshold value (the code has been tested only up to 8 GPUs, and there was no clear benefit at this scale; currently the threshold value is set to 0 and (key, value) pair support is never enabled, but the threshold value will be adjusted later after large scale testing).

Authors:
  - Seunghwa Kang (https://github.com/seunghwak)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)

URL: #1823
  • Loading branch information
seunghwak authored Sep 22, 2021
1 parent 4e457dc commit 7cabcd0
Show file tree
Hide file tree
Showing 17 changed files with 847 additions and 288 deletions.
26 changes: 26 additions & 0 deletions cpp/include/cugraph/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ struct graph_meta_t<vertex_t, edge_t, multi_gpu, std::enable_if_t<multi_gpu>> {

// segment offsets based on vertex degree, relevant only if vertex IDs are renumbered
std::optional<std::vector<vertex_t>> segment_offsets{std::nullopt};

vertex_t num_local_unique_edge_rows{};
vertex_t num_local_unique_edge_cols{};
};

// single-GPU version
Expand Down Expand Up @@ -136,6 +139,22 @@ class graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enab
this->get_graph_properties(),
partition_,
adj_matrix_partition_segment_offsets_,
local_sorted_unique_edge_rows_
? std::optional<vertex_t const*>{(*local_sorted_unique_edge_rows_).data()}
: std::nullopt,
local_sorted_unique_edge_rows_
? std::optional<vertex_t const*>{(*local_sorted_unique_edge_rows_).data() +
(*local_sorted_unique_edge_rows_).size()}
: std::nullopt,
local_sorted_unique_edge_row_offsets_,
local_sorted_unique_edge_cols_
? std::optional<vertex_t const*>{(*local_sorted_unique_edge_cols_).data()}
: std::nullopt,
local_sorted_unique_edge_cols_
? std::optional<vertex_t const*>{(*local_sorted_unique_edge_cols_).data() +
(*local_sorted_unique_edge_cols_).size()}
: std::nullopt,
local_sorted_unique_edge_col_offsets_,
},
false);
}
Expand All @@ -155,6 +174,13 @@ class graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enab
// segment offsets within the vertex partition based on vertex degree, relevant only if
// segment_offsets.size() > 0
std::optional<std::vector<vertex_t>> adj_matrix_partition_segment_offsets_{std::nullopt};

// if valid, store row/column properties in key/value pairs (this saves memory if # unique edge
// rows/cols << V / row_comm_size|col_comm_size).
std::optional<rmm::device_uvector<vertex_t>> local_sorted_unique_edge_rows_{std::nullopt};
std::optional<rmm::device_uvector<vertex_t>> local_sorted_unique_edge_cols_{std::nullopt};
std::optional<std::vector<vertex_t>> local_sorted_unique_edge_row_offsets_{std::nullopt};
std::optional<std::vector<vertex_t>> local_sorted_unique_edge_col_offsets_{std::nullopt};
};

// single-GPU version
Expand Down
9 changes: 6 additions & 3 deletions cpp/include/cugraph/graph_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ struct renumber_meta_t<vertex_t, edge_t, multi_gpu, std::enable_if_t<multi_gpu>>
edge_t number_of_edges{};
partition_t<vertex_t> partition{};
std::vector<vertex_t> segment_offsets{};

vertex_t num_local_unique_edge_majors{};
vertex_t num_local_unique_edge_minors{};
};

template <typename vertex_t, typename edge_t, bool multi_gpu>
Expand Down Expand Up @@ -88,9 +91,9 @@ struct renumber_meta_t<vertex_t, edge_t, multi_gpu, std::enable_if_t<!multi_gpu>
* Tuple of labels (vertex IDs before renumbering) for the entire set of vertices (assigned to this
* process in multi-GPU) and meta-data collected while renumbering. The meta-data includes total
* number of vertices, total number of edges, partition_t object storing graph partitioning
* information, and vertex partition segment offsets (a vertex partition is partitioned to multiple
* segments based on vertex degrees). This meta-data is expected to be used in graph construction &
* graph primitives.
* information, vertex partition segment offsets (a vertex partition is partitioned to multiple
* segments based on vertex degrees), and the number of local unique edge major & minor vertex IDs.
* This meta-data is expected to be used in graph construction & graph primitives.
*/
template <typename vertex_t, typename edge_t, bool multi_gpu>
std::enable_if_t<
Expand Down
27 changes: 26 additions & 1 deletion cpp/include/cugraph/graph_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,11 @@ namespace detail {

using namespace cugraph::visitors;

// FIXME: threshold values require tuning (currently disabled)
// use (key, value) pairs to store row/column properties if (unique edge rows/cols) over (V /
// row_comm_size|col_comm_size) is smaller than the threshold value
double constexpr row_col_properties_kv_pair_fill_ratio_threshold = 0.0;

// FIXME: threshold values require tuning
// use the hypersparse format (currently, DCSR or DCSC) for the vertices with their degrees smaller
// than col_comm_size * hypersparse_threshold_ratio, should be less than 1.0
Expand Down Expand Up @@ -300,6 +305,13 @@ struct graph_view_meta_t<vertex_t, edge_t, multi_gpu, std::enable_if_t<multi_gpu

// segment offsets based on vertex degree, relevant only if vertex IDs are renumbered
std::optional<std::vector<vertex_t>> adj_matrix_partition_segment_offsets{};

std::optional<vertex_t const*> local_sorted_unique_edge_row_first{std::nullopt};
std::optional<vertex_t const*> local_sorted_unique_edge_row_last{std::nullopt};
std::optional<std::vector<vertex_t>> local_sorted_unique_edge_row_offsets{std::nullopt};
std::optional<vertex_t const*> local_sorted_unique_edge_col_first{std::nullopt};
std::optional<vertex_t const*> local_sorted_unique_edge_col_last{std::nullopt};
std::optional<std::vector<vertex_t>> local_sorted_unique_edge_col_offsets{std::nullopt};
};

// single-GPU version
Expand Down Expand Up @@ -587,6 +599,11 @@ class graph_view_t<vertex_t,
return local_sorted_unique_edge_row_last_;
}

std::optional<std::vector<vertex_t>> get_local_sorted_unique_edge_row_offsets() const
{
return local_sorted_unique_edge_row_offsets_;
}

std::optional<vertex_t const*> get_local_sorted_unique_edge_col_begin() const
{
return local_sorted_unique_edge_col_first_;
Expand All @@ -597,6 +614,11 @@ class graph_view_t<vertex_t,
return local_sorted_unique_edge_col_last_;
}

std::optional<std::vector<vertex_t>> get_local_sorted_unique_edge_col_offsets() const
{
return local_sorted_unique_edge_col_offsets_;
}

private:
std::vector<edge_t const*> adj_matrix_partition_offsets_{};
std::vector<vertex_t const*> adj_matrix_partition_indices_{};
Expand All @@ -613,11 +635,14 @@ class graph_view_t<vertex_t,
// segment offsets based on vertex degree, relevant only if vertex IDs are renumbered
std::optional<std::vector<vertex_t>> adj_matrix_partition_segment_offsets_{};

// FIXME: to be implemented.
// if valid, store row/column properties in key/value pairs (this saves memory if # unique edge
// rows/cols << V / row_comm_size|col_comm_size).
std::optional<vertex_t const*> local_sorted_unique_edge_row_first_{std::nullopt};
std::optional<vertex_t const*> local_sorted_unique_edge_row_last_{std::nullopt};
std::optional<std::vector<vertex_t>> local_sorted_unique_edge_row_offsets_{std::nullopt};
std::optional<vertex_t const*> local_sorted_unique_edge_col_first_{std::nullopt};
std::optional<vertex_t const*> local_sorted_unique_edge_col_last_{std::nullopt};
std::optional<std::vector<vertex_t>> local_sorted_unique_edge_col_offsets_{std::nullopt};
};

// single-GPU version
Expand Down
Loading

0 comments on commit 7cabcd0

Please sign in to comment.