Skip to content

Commit

Permalink
Multiple graph generator with power law distribution on sizes (#1483)
Browse files Browse the repository at this point in the history
Add a function that leverages the RMAT generator and creates k graphs with a power-law or uniform distribution of graph sizes.
Closes #1458 
Consider adding bindings for this as part of #1473

Authors:
  - Alex Fender (https://github.com/afender)

Approvers:
  - Seunghwa Kang (https://github.com/seunghwak)
  - Brad Rees (https://github.com/BradReesWork)

URL: #1483
  • Loading branch information
afender authored Mar 31, 2021
1 parent f2e5a87 commit 7a2b02b
Show file tree
Hide file tree
Showing 3 changed files with 217 additions and 2 deletions.
55 changes: 54 additions & 1 deletion cpp/include/experimental/graph_generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,66 @@ template <typename vertex_t>
std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> generate_rmat_edgelist(
raft::handle_t const& handle,
size_t scale,
size_t edge_factor = 16,
size_t num_edges,
double a = 0.57,
double b = 0.19,
double c = 0.19,
uint64_t seed = 0,
bool clip_and_flip = false,
bool scramble_vertex_ids = false);

enum class generator_distribution_t { POWER_LAW = 0, UNIFORM };

/**
* @brief generate multiple edge lists using the R-mat graph generator.
*
* This function allows multi-edges and self-loops similar to the Graph 500 reference
* implementation.
*
* @p scramble_vertex_ids needs to be set to `true` to generate a graph conforming to the Graph 500
* specification (note that scrambling does not affect cuGraph's graph construction performance, so
* this is generally unnecessary). If `edge_factor` is given (e.g. Graph 500), set @p num_edges to
* (size_t{1} << @p scale) * `edge_factor`. To generate an undirected graph, set @p b == @p c and @p
* clip_and_flip = true. All the resulting edges will be placed in the lower triangular part
* (inculding the diagonal) of the graph adjacency matrix.
*
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param n_edgelists Number of edge lists (graphs) to generate
* @param min_scale Scale factor to set the minimum number of verties in the graph.
* @param max_scale Scale factor to set the maximum number of verties in the graph.
* @param edge_factor Average number of edges per vertex to generate.
* @param size_distribution Distribution of the graph sizes, impacts the scale parameter of the
* R-MAT generator
* @param edge_distribution Edges distribution for each graph, impacts how R-MAT parameters a,b,c,d,
* are set.
* @param seed Seed value for the random number generator.
* @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part
* (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to
* `false`).
* @param scramble_vertex_ids Flag controlling whether to scramble vertex ID bits (if set to `true`)
* or not (if set to `false`); scrambling vertx ID bits breaks correlation between vertex ID values
* and vertex degrees. The scramble code here follows the algorithm in the Graph 500 reference
* implementation version 3.0.0.
* @return A vector of std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> of
*size @p n_edgelists, each vector element being a tuple of rmm::device_uvector objects for edge
*source vertex IDs and edge destination vertex IDs.
*/
template <typename vertex_t>
std::vector<std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>>>
generate_rmat_edgelists(
raft::handle_t const& handle,
size_t n_edgelists,
size_t min_scale,
size_t max_scale,
size_t edge_factor = 16,
generator_distribution_t size_distribution = generator_distribution_t::POWER_LAW,
generator_distribution_t edge_distribution = generator_distribution_t::POWER_LAW,
uint64_t seed = 0,
bool clip_and_flip = false,
bool scramble_vertex_ids = false);

} // namespace experimental
} // namespace cugraph
78 changes: 77 additions & 1 deletion cpp/src/experimental/generate_rmat_edgelist.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
#include <thrust/iterator/zip_iterator.h>
#include <thrust/tuple.h>

#include <random>
#include <tuple>
#include "rmm/detail/error.hpp"

namespace cugraph {
namespace experimental {
Expand Down Expand Up @@ -121,7 +123,57 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> generat
return std::make_tuple(std::move(srcs), std::move(dsts));
}

// explicit instantiation
template <typename vertex_t>
std::vector<std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>>>
generate_rmat_edgelists(raft::handle_t const& handle,
size_t n_edgelists,
size_t min_scale,
size_t max_scale,
size_t edge_factor,
generator_distribution_t component_distribution,
generator_distribution_t edge_distribution,
uint64_t seed,
bool clip_and_flip,
bool scramble_vertex_ids)
{
CUGRAPH_EXPECTS(min_scale > 0, "minimum graph scale is 1.");
CUGRAPH_EXPECTS(size_t{1} << max_scale <= std::numeric_limits<vertex_t>::max(),
"Invalid input argument: scale too large for vertex_t.");

std::vector<std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>>> output{};
output.reserve(n_edgelists);
std::vector<vertex_t> scale(n_edgelists);

std::default_random_engine eng;
eng.seed(seed);
if (component_distribution == generator_distribution_t::UNIFORM) {
std::uniform_int_distribution<vertex_t> dist(min_scale, max_scale);
std::generate(scale.begin(), scale.end(), [&dist, &eng]() { return dist(eng); });
} else {
// May expose this as a parameter in the future
std::exponential_distribution<float> dist(4);
// The modulo is here to protect the range because exponential distribution is defined on
// [0,infinity). With exponent 4 most values are between 0 and 1
auto range = max_scale - min_scale;
std::generate(scale.begin(), scale.end(), [&dist, &eng, &min_scale, &range]() {
return min_scale + static_cast<vertex_t>(static_cast<float>(range) * dist(eng)) % range;
});
}

// intialized to standard powerlaw values
double a = 0.57, b = 0.19, c = 0.19;
if (edge_distribution == generator_distribution_t::UNIFORM) {
a = 0.25;
b = a;
c = a;
}

for (size_t i = 0; i < n_edgelists; i++) {
output.push_back(generate_rmat_edgelist<vertex_t>(
handle, scale[i], scale[i] * edge_factor, a, b, c, i, clip_and_flip, scramble_vertex_ids));
}
return output;
}

template std::tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>
generate_rmat_edgelist<int32_t>(raft::handle_t const& handle,
Expand All @@ -145,5 +197,29 @@ generate_rmat_edgelist<int64_t>(raft::handle_t const& handle,
bool clip_and_flip,
bool scramble_vertex_ids);

template std::vector<std::tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>>
generate_rmat_edgelists<int32_t>(raft::handle_t const& handle,
size_t n_edgelists,
size_t min_scale,
size_t max_scale,
size_t edge_factor,
generator_distribution_t component_distribution,
generator_distribution_t edge_distribution,
uint64_t seed,
bool clip_and_flip,
bool scramble_vertex_ids);

template std::vector<std::tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>>>
generate_rmat_edgelists<int64_t>(raft::handle_t const& handle,
size_t n_edgelists,
size_t min_scale,
size_t max_scale,
size_t edge_factor,
generator_distribution_t component_distribution,
generator_distribution_t edge_distribution,
uint64_t seed,
bool clip_and_flip,
bool scramble_vertex_ids);

} // namespace experimental
} // namespace cugraph
86 changes: 86 additions & 0 deletions cpp/tests/experimental/generate_rmat_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* limitations under the License.
*/

#include <tuple>
#include <utilities/base_fixture.hpp>
#include <utilities/test_utilities.hpp>

Expand Down Expand Up @@ -281,5 +282,90 @@ INSTANTIATE_TEST_CASE_P(simple_test,
GenerateRmat_Usecase(20, 16, 0.57, 0.19, 0.19, false),
GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, true),
GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, false)));
typedef struct GenerateRmats_Usecase_t {
size_t n_edgelists{0};
size_t min_scale{0};
size_t max_scale{0};
size_t edge_factor{0};
cugraph::experimental::generator_distribution_t component_distribution;
cugraph::experimental::generator_distribution_t edge_distribution;

GenerateRmats_Usecase_t(size_t n_edgelists,
size_t min_scale,
size_t max_scale,
size_t edge_factor,
cugraph::experimental::generator_distribution_t component_distribution,
cugraph::experimental::generator_distribution_t edge_distribution)
: n_edgelists(n_edgelists),
min_scale(min_scale),
max_scale(max_scale),
component_distribution(component_distribution),
edge_distribution(edge_distribution),
edge_factor(edge_factor){};
} GenerateRmats_Usecase;
class Tests_GenerateRmats : public ::testing::TestWithParam<GenerateRmats_Usecase> {
public:
Tests_GenerateRmats() {}
static void SetupTestCase() {}
static void TearDownTestCase() {}

virtual void SetUp() {}
virtual void TearDown() {}

template <typename vertex_t>
void run_current_test(GenerateRmats_Usecase const& configuration)
{
raft::handle_t handle{};

CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement

auto outputs =
cugraph::experimental::generate_rmat_edgelists<vertex_t>(handle,
configuration.n_edgelists,
configuration.min_scale,
configuration.max_scale,
configuration.edge_factor,
configuration.component_distribution,
configuration.edge_distribution,
uint64_t{0});

CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement
ASSERT_EQ(configuration.n_edgelists, outputs.size());
for (auto i = outputs.begin(); i != outputs.end(); ++i) {
ASSERT_EQ(std::get<0>(*i).size(), std::get<1>(*i).size());
ASSERT_TRUE((configuration.min_scale * configuration.edge_factor) <= std::get<0>(*i).size());
ASSERT_TRUE((configuration.max_scale * configuration.edge_factor) >= std::get<0>(*i).size());
}
}
};
TEST_P(Tests_GenerateRmats, CheckInt32) { run_current_test<int32_t>(GetParam()); }

INSTANTIATE_TEST_CASE_P(
simple_test,
Tests_GenerateRmats,
::testing::Values(
GenerateRmats_Usecase(8,
1,
16,
32,
cugraph::experimental::generator_distribution_t::UNIFORM,
cugraph::experimental::generator_distribution_t::UNIFORM),
GenerateRmats_Usecase(8,
1,
16,
32,
cugraph::experimental::generator_distribution_t::UNIFORM,
cugraph::experimental::generator_distribution_t::POWER_LAW),
GenerateRmats_Usecase(8,
3,
16,
32,
cugraph::experimental::generator_distribution_t::POWER_LAW,
cugraph::experimental::generator_distribution_t::UNIFORM),
GenerateRmats_Usecase(8,
3,
16,
32,
cugraph::experimental::generator_distribution_t::POWER_LAW,
cugraph::experimental::generator_distribution_t::POWER_LAW)));
CUGRAPH_TEST_PROGRAM_MAIN()

0 comments on commit 7a2b02b

Please sign in to comment.