diff --git a/cpp/include/experimental/graph_generator.hpp b/cpp/include/experimental/graph_generator.hpp index b8495ed7581..bc7337944f3 100644 --- a/cpp/include/experimental/graph_generator.hpp +++ b/cpp/include/experimental/graph_generator.hpp @@ -72,7 +72,7 @@ template std::tuple, rmm::device_uvector> generate_rmat_edgelist( raft::handle_t const& handle, size_t scale, - size_t edge_factor = 16, + size_t num_edges, double a = 0.57, double b = 0.19, double c = 0.19, @@ -80,5 +80,58 @@ std::tuple, rmm::device_uvector> generat bool clip_and_flip = false, bool scramble_vertex_ids = false); +enum class generator_distribution_t { POWER_LAW = 0, UNIFORM }; + +/** + * @brief generate multiple edge lists using the R-mat graph generator. + * + * This function allows multi-edges and self-loops similar to the Graph 500 reference + * implementation. + * + * @p scramble_vertex_ids needs to be set to `true` to generate a graph conforming to the Graph 500 + * specification (note that scrambling does not affect cuGraph's graph construction performance, so + * this is generally unnecessary). If `edge_factor` is given (e.g. Graph 500), set @p num_edges to + * (size_t{1} << @p scale) * `edge_factor`. To generate an undirected graph, set @p b == @p c and @p + * clip_and_flip = true. All the resulting edges will be placed in the lower triangular part + * (inculding the diagonal) of the graph adjacency matrix. + * + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param n_edgelists Number of edge lists (graphs) to generate + * @param min_scale Scale factor to set the minimum number of verties in the graph. + * @param max_scale Scale factor to set the maximum number of verties in the graph. + * @param edge_factor Average number of edges per vertex to generate. + * @param size_distribution Distribution of the graph sizes, impacts the scale parameter of the + * R-MAT generator + * @param edge_distribution Edges distribution for each graph, impacts how R-MAT parameters a,b,c,d, + * are set. + * @param seed Seed value for the random number generator. + * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part + * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to + * `false`). + * @param scramble_vertex_ids Flag controlling whether to scramble vertex ID bits (if set to `true`) + * or not (if set to `false`); scrambling vertx ID bits breaks correlation between vertex ID values + * and vertex degrees. The scramble code here follows the algorithm in the Graph 500 reference + * implementation version 3.0.0. + * @return A vector of std::tuple, rmm::device_uvector> of + *size @p n_edgelists, each vector element being a tuple of rmm::device_uvector objects for edge + *source vertex IDs and edge destination vertex IDs. + */ +template +std::vector, rmm::device_uvector>> +generate_rmat_edgelists( + raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor = 16, + generator_distribution_t size_distribution = generator_distribution_t::POWER_LAW, + generator_distribution_t edge_distribution = generator_distribution_t::POWER_LAW, + uint64_t seed = 0, + bool clip_and_flip = false, + bool scramble_vertex_ids = false); + } // namespace experimental } // namespace cugraph diff --git a/cpp/src/experimental/generate_rmat_edgelist.cu b/cpp/src/experimental/generate_rmat_edgelist.cu index 0a6d666432f..185fa837a70 100644 --- a/cpp/src/experimental/generate_rmat_edgelist.cu +++ b/cpp/src/experimental/generate_rmat_edgelist.cu @@ -27,7 +27,9 @@ #include #include +#include #include +#include "rmm/detail/error.hpp" namespace cugraph { namespace experimental { @@ -121,7 +123,57 @@ std::tuple, rmm::device_uvector> generat return std::make_tuple(std::move(srcs), std::move(dsts)); } -// explicit instantiation +template +std::vector, rmm::device_uvector>> +generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + generator_distribution_t component_distribution, + generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids) +{ + CUGRAPH_EXPECTS(min_scale > 0, "minimum graph scale is 1."); + CUGRAPH_EXPECTS(size_t{1} << max_scale <= std::numeric_limits::max(), + "Invalid input argument: scale too large for vertex_t."); + + std::vector, rmm::device_uvector>> output{}; + output.reserve(n_edgelists); + std::vector scale(n_edgelists); + + std::default_random_engine eng; + eng.seed(seed); + if (component_distribution == generator_distribution_t::UNIFORM) { + std::uniform_int_distribution dist(min_scale, max_scale); + std::generate(scale.begin(), scale.end(), [&dist, &eng]() { return dist(eng); }); + } else { + // May expose this as a parameter in the future + std::exponential_distribution dist(4); + // The modulo is here to protect the range because exponential distribution is defined on + // [0,infinity). With exponent 4 most values are between 0 and 1 + auto range = max_scale - min_scale; + std::generate(scale.begin(), scale.end(), [&dist, &eng, &min_scale, &range]() { + return min_scale + static_cast(static_cast(range) * dist(eng)) % range; + }); + } + + // intialized to standard powerlaw values + double a = 0.57, b = 0.19, c = 0.19; + if (edge_distribution == generator_distribution_t::UNIFORM) { + a = 0.25; + b = a; + c = a; + } + + for (size_t i = 0; i < n_edgelists; i++) { + output.push_back(generate_rmat_edgelist( + handle, scale[i], scale[i] * edge_factor, a, b, c, i, clip_and_flip, scramble_vertex_ids)); + } + return output; +} template std::tuple, rmm::device_uvector> generate_rmat_edgelist(raft::handle_t const& handle, @@ -145,5 +197,29 @@ generate_rmat_edgelist(raft::handle_t const& handle, bool clip_and_flip, bool scramble_vertex_ids); +template std::vector, rmm::device_uvector>> +generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + generator_distribution_t component_distribution, + generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + +template std::vector, rmm::device_uvector>> +generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + generator_distribution_t component_distribution, + generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + } // namespace experimental } // namespace cugraph diff --git a/cpp/tests/experimental/generate_rmat_test.cpp b/cpp/tests/experimental/generate_rmat_test.cpp index 249a1a3c6c8..666106d62ca 100644 --- a/cpp/tests/experimental/generate_rmat_test.cpp +++ b/cpp/tests/experimental/generate_rmat_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include @@ -281,5 +282,90 @@ INSTANTIATE_TEST_CASE_P(simple_test, GenerateRmat_Usecase(20, 16, 0.57, 0.19, 0.19, false), GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, true), GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, false))); +typedef struct GenerateRmats_Usecase_t { + size_t n_edgelists{0}; + size_t min_scale{0}; + size_t max_scale{0}; + size_t edge_factor{0}; + cugraph::experimental::generator_distribution_t component_distribution; + cugraph::experimental::generator_distribution_t edge_distribution; + + GenerateRmats_Usecase_t(size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + cugraph::experimental::generator_distribution_t component_distribution, + cugraph::experimental::generator_distribution_t edge_distribution) + : n_edgelists(n_edgelists), + min_scale(min_scale), + max_scale(max_scale), + component_distribution(component_distribution), + edge_distribution(edge_distribution), + edge_factor(edge_factor){}; +} GenerateRmats_Usecase; +class Tests_GenerateRmats : public ::testing::TestWithParam { + public: + Tests_GenerateRmats() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + template + void run_current_test(GenerateRmats_Usecase const& configuration) + { + raft::handle_t handle{}; + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + auto outputs = + cugraph::experimental::generate_rmat_edgelists(handle, + configuration.n_edgelists, + configuration.min_scale, + configuration.max_scale, + configuration.edge_factor, + configuration.component_distribution, + configuration.edge_distribution, + uint64_t{0}); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + ASSERT_EQ(configuration.n_edgelists, outputs.size()); + for (auto i = outputs.begin(); i != outputs.end(); ++i) { + ASSERT_EQ(std::get<0>(*i).size(), std::get<1>(*i).size()); + ASSERT_TRUE((configuration.min_scale * configuration.edge_factor) <= std::get<0>(*i).size()); + ASSERT_TRUE((configuration.max_scale * configuration.edge_factor) >= std::get<0>(*i).size()); + } + } +}; +TEST_P(Tests_GenerateRmats, CheckInt32) { run_current_test(GetParam()); } + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_GenerateRmats, + ::testing::Values( + GenerateRmats_Usecase(8, + 1, + 16, + 32, + cugraph::experimental::generator_distribution_t::UNIFORM, + cugraph::experimental::generator_distribution_t::UNIFORM), + GenerateRmats_Usecase(8, + 1, + 16, + 32, + cugraph::experimental::generator_distribution_t::UNIFORM, + cugraph::experimental::generator_distribution_t::POWER_LAW), + GenerateRmats_Usecase(8, + 3, + 16, + 32, + cugraph::experimental::generator_distribution_t::POWER_LAW, + cugraph::experimental::generator_distribution_t::UNIFORM), + GenerateRmats_Usecase(8, + 3, + 16, + 32, + cugraph::experimental::generator_distribution_t::POWER_LAW, + cugraph::experimental::generator_distribution_t::POWER_LAW))); CUGRAPH_TEST_PROGRAM_MAIN()