From f2e5a8755e18aadfc151ca65787dd4a3775efb85 Mon Sep 17 00:00:00 2001 From: Andrei Schaffer <37386037+aschaffer@users.noreply.github.com> Date: Tue, 30 Mar 2021 13:09:33 -0500 Subject: [PATCH] Implement C/CUDA RandomWalks functionality (#1439) This PR tracks work on issue: https://github.com/rapidsai/cugraph/issues/1380. Authors: - Andrei Schaffer (@aschaffer) Approvers: - Brad Rees (@BradReesWork) - Chuck Hastings (@ChuckHastings) - Seunghwa Kang (@seunghwak) URL: https://github.com/rapidsai/cugraph/pull/1439 --- cpp/CMakeLists.txt | 1 + cpp/include/algorithms.hpp | 28 + cpp/include/utilities/cython.hpp | 24 + cpp/include/utilities/graph_traits.hpp | 61 ++ cpp/src/experimental/random_walks.cuh | 887 ++++++++++++++++++ cpp/src/sampling/random_walks.cu | 78 ++ cpp/src/utilities/cython.cu | 76 ++ cpp/tests/CMakeLists.txt | 14 + cpp/tests/experimental/random_walks_test.cu | 152 +++ cpp/tests/experimental/random_walks_utils.cuh | 152 +++ cpp/tests/experimental/rw_low_level_test.cu | 783 ++++++++++++++++ 11 files changed, 2256 insertions(+) create mode 100644 cpp/include/utilities/graph_traits.hpp create mode 100644 cpp/src/experimental/random_walks.cuh create mode 100644 cpp/src/sampling/random_walks.cu create mode 100644 cpp/tests/experimental/random_walks_test.cu create mode 100644 cpp/tests/experimental/random_walks_utils.cuh create mode 100644 cpp/tests/experimental/rw_low_level_test.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 34ea935e31d..57f324a60a9 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -420,6 +420,7 @@ add_library(cugraph SHARED src/community/triangles_counting.cu src/community/extract_subgraph_by_vertex.cu src/community/egonet.cu + src/sampling/random_walks.cu src/cores/core_number.cu src/traversal/two_hop_neighbors.cu src/components/connectivity.cu diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index b8706d81e21..0b45b799357 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -1252,5 +1252,33 @@ extract_ego(raft::handle_t const &handle, vertex_t *source_vertex, vertex_t n_subgraphs, vertex_t radius); + +/** + * @brief returns random walks (RW) from starting sources, where each path is of given maximum + * length. Uniform distribution is assumed for the random engine. + * + * @tparam graph_t Type of graph/view (typically, graph_view_t). + * @tparam index_t Type used to store indexing and sizes. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph Graph (view )object to generate RW on. + * @param ptr_d_start Device pointer to set of starting vertex indices for the RW. + * @param num_paths = number(paths). + * @param max_depth maximum length of RWs. + * @return std::tuple, device_vec_t, + * device_vec_t> Triplet of coalesced RW paths, with corresponding edge weights for + * each, and corresponding path sizes. This is meant to minimize the number of DF's to be passed to + * the Python layer. The meaning of "coalesced" here is that a 2D array of paths of different sizes + * is represented as a 1D array. + */ +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector> +random_walks(raft::handle_t const &handle, + graph_t const &graph, + typename graph_t::vertex_type const *ptr_d_start, + index_t num_paths, + index_t max_depth); } // namespace experimental } // namespace cugraph diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index 98e850abbf0..a58331d465a 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -19,6 +19,7 @@ #include #include #include +#include namespace cugraph { namespace cython { @@ -185,6 +186,19 @@ struct major_minor_weights_t { rmm::device_uvector shuffled_weights_; }; +// aggregate for random_walks() return type +// to be exposed to cython: +// +struct random_walk_ret_t { + size_t coalesced_sz_v_; + size_t coalesced_sz_w_; + size_t num_paths_; + size_t max_depth_; + std::unique_ptr d_coalesced_v_; + std::unique_ptr d_coalesced_w_; + std::unique_ptr d_sizes_; +}; + // wrapper for renumber_edgelist() return // (unrenumbering maps, etc.) // @@ -442,6 +456,16 @@ std::unique_ptr call_egonet(raft::handle_t const& handle, vertex_t* source_vertex, vertex_t n_subgraphs, vertex_t radius); +// wrapper for random_walks. +// +template +std::enable_if_t::value, + std::unique_ptr> +call_random_walks(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t const* ptr_start_set, + edge_t num_paths, + edge_t max_depth); // wrapper for shuffling: // diff --git a/cpp/include/utilities/graph_traits.hpp b/cpp/include/utilities/graph_traits.hpp new file mode 100644 index 00000000000..363a13190be --- /dev/null +++ b/cpp/include/utilities/graph_traits.hpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cugraph { +namespace experimental { + +// primary template: +// +template +struct is_one_of; // purposely empty + +// partial specializations: +// +template +struct is_one_of { + static constexpr bool value = std::is_same::value || is_one_of::value; +}; + +template +struct is_one_of { + static constexpr bool value = false; +}; + +// meta-function that constrains +// vertex_t and edge_t template param candidates: +// +template +struct is_vertex_edge_combo { + static constexpr bool value = is_one_of::value && + is_one_of::value && + (sizeof(vertex_t) <= sizeof(edge_t)); +}; + +// meta-function that constrains +// all 3 template param candidates: +// +template +struct is_candidate { + static constexpr bool value = + is_vertex_edge_combo::value && is_one_of::value; +}; + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/random_walks.cuh b/cpp/src/experimental/random_walks.cuh new file mode 100644 index 00000000000..aea8f3d8420 --- /dev/null +++ b/cpp/src/experimental/random_walks.cuh @@ -0,0 +1,887 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// +#pragma once + +#include + +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +using device_vec_t = rmm::device_uvector; + +template +using device_v_it = typename device_vec_t::iterator; + +template +value_t* raw_ptr(device_vec_t& dv) +{ + return dv.data(); +} + +template +value_t const* raw_const_ptr(device_vec_t const& dv) +{ + return dv.data(); +} + +template +struct device_const_vector_view { + device_const_vector_view(value_t const* d_buffer, index_t size) : d_buffer_(d_buffer), size_(size) + { + } + + device_const_vector_view(device_const_vector_view const& other) = delete; + device_const_vector_view& operator=(device_const_vector_view const& other) = delete; + + device_const_vector_view(device_const_vector_view&& other) + { + d_buffer_ = other.d_buffer_; + size_ = other.size_; + } + device_const_vector_view& operator=(device_const_vector_view&& other) + { + d_buffer_ = other.d_buffer_; + size_ = other.size_; + + return *this; + } + + value_t const* begin(void) const { return d_buffer_; } + + value_t const* end() const { return d_buffer_ + size_; } + + index_t size(void) const { return size_; } + + private: + value_t const* d_buffer_{nullptr}; + index_t size_; +}; + +// raft random generator: +// (using upper-bound cached "map" +// giving out_deg(v) for each v in [0, |V|); +// and a pre-generated vector of float random values +// in [0,1] to be brought into [0, d_ub[v])) +// +template +struct rrandom_gen_t { + using seed_type = seed_t; + using real_type = real_t; + + rrandom_gen_t(raft::handle_t const& handle, + index_t num_paths, + device_vec_t& d_random, // scratch-pad, non-coalesced + device_vec_t const& d_crt_out_deg, // non-coalesced + seed_t seed = seed_t{}) + : handle_(handle), + seed_(seed), + num_paths_(num_paths), + d_ptr_out_degs_(raw_const_ptr(d_crt_out_deg)), + d_ptr_random_(raw_ptr(d_random)) + { + auto rnd_sz = d_random.size(); + + CUGRAPH_EXPECTS(rnd_sz >= static_cast(num_paths), + "Un-allocated random buffer."); + + // done in constructor; + // this must be done at each step, + // but this object is constructed at each step; + // + raft::random::Rng rng(seed_); + rng.uniform( + d_ptr_random_, num_paths, real_t{0.0}, real_t{1.0}, handle.get_stream()); + } + + // in place: + // for each v in [0, num_paths) { + // if out_deg(v) > 0 + // d_col_indx[v] = random index in [0, out_deg(v)) + //} + void generate_col_indices(device_vec_t& d_col_indx) const + { + thrust::transform_if( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_ptr_random_, + d_ptr_random_ + num_paths_, // input1 + d_ptr_out_degs_, // input2 + d_ptr_out_degs_, // also stencil + d_col_indx.begin(), + [] __device__(real_t rnd_vindx, edge_t crt_out_deg) { + real_t max_ub = static_cast(crt_out_deg - 1); + auto interp_vindx = rnd_vindx * max_ub + real_t{.5}; + vertex_t v_indx = static_cast(interp_vindx); + return (v_indx >= crt_out_deg ? crt_out_deg - 1 : v_indx); + }, + [] __device__(auto crt_out_deg) { return crt_out_deg > 0; }); + } + + private: + raft::handle_t const& handle_; + index_t num_paths_; + edge_t const* d_ptr_out_degs_; // device buffer with out-deg of current set of vertices (most + // recent vertex in each path); size = num_paths_ + real_t* d_ptr_random_; // device buffer with real random values; size = num_paths_ + seed_t seed_; // seed to be used for current batch +}; + +// seeding policy: time (clock) dependent, +// to avoid RW calls repeating same random data: +// +template +struct clock_seeding_t { + clock_seeding_t(void) = default; + + seed_t operator()(void) { return static_cast(std::time(nullptr)); } +}; + +// seeding policy: fixed for debug/testing repro +// +template +struct fixed_seeding_t { + // purposely no default cnstr. + + fixed_seeding_t(seed_t seed) : seed_(seed) {} + seed_t operator()(void) { return seed_; } + + private: + seed_t seed_; +}; + +// classes abstracting the next vertex extraction mechanism: +// +// primary template, purposely undefined +template +struct col_indx_extract_t; + +// specialization for single-gpu functionality: +// +template +struct col_indx_extract_t> { + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + col_indx_extract_t(raft::handle_t const& handle, + device_vec_t const& d_indices, + device_vec_t const& d_offsets, + device_vec_t const& d_values, + device_vec_t const& d_crt_out_degs, + device_vec_t const& d_sizes, + index_t num_paths, + index_t max_depth) + : handle_(handle), + col_indices_(raw_const_ptr(d_indices)), + row_offsets_(raw_const_ptr(d_offsets)), + values_(raw_const_ptr(d_values)), + out_degs_(raw_const_ptr(d_crt_out_degs)), + sizes_(raw_const_ptr(d_sizes)), + num_paths_(num_paths), + max_depth_(max_depth) + { + } + + col_indx_extract_t(raft::handle_t const& handle, + vertex_t const* p_d_indices, + edge_t const* p_d_offsets, + weight_t const* p_d_values, + edge_t const* p_d_crt_out_degs, + index_t const* p_d_sizes, + index_t num_paths, + index_t max_depth) + : handle_(handle), + col_indices_(p_d_indices), + row_offsets_(p_d_offsets), + values_(p_d_values), + out_degs_(p_d_crt_out_degs), + sizes_(p_d_sizes), + num_paths_(num_paths), + max_depth_(max_depth) + { + } + + col_indx_extract_t(raft::handle_t const& handle, + graph_t const& graph, + edge_t const* p_d_crt_out_degs, + index_t const* p_d_sizes, + index_t num_paths, + index_t max_depth) + : handle_(handle), + col_indices_(graph.indices()), + row_offsets_(graph.offsets()), + values_(graph.weights()), + out_degs_(p_d_crt_out_degs), + sizes_(p_d_sizes), + num_paths_(num_paths), + max_depth_(max_depth) + { + } + + // in-place extractor of next set of vertices and weights, + // (d_v_next_vertices, d_v_next_weights), + // given start set of vertices. d_v_src_vertices, + // and corresponding column index set, d_v_col_indx: + // + // for each indx in [0, num_paths){ + // v_indx = d_v_src_vertices[indx*max_depth + d_sizes[indx] - 1]; + // if( out_degs_[v_indx] > 0 ) { + // start_row = row_offsets_[v_indx]; + // delta = d_v_col_indx[indx]; + // d_v_next_vertices[indx] = col_indices_[start_row + delta]; + // } + // (use tranform_if() with transform iterator) + // + void operator()( + device_vec_t const& d_coalesced_src_v, // in: coalesced vector of vertices + device_vec_t const& + d_v_col_indx, // in: column indices, given by stepper's random engine + device_vec_t& d_v_next_vertices, // out: set of destination vertices, for next step + device_vec_t& + d_v_next_weights) // out: set of weights between src and destination vertices, for next step + const + { + thrust::transform_if( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_paths_), // input1 + d_v_col_indx.begin(), // input2 + out_degs_, // stencil + thrust::make_zip_iterator( + thrust::make_tuple(d_v_next_vertices.begin(), d_v_next_weights.begin())), // output + [max_depth = max_depth_, + ptr_d_sizes = sizes_, + ptr_d_coalesced_v = raw_const_ptr(d_coalesced_src_v), + row_offsets = row_offsets_, + col_indices = col_indices_, + values = values_] __device__(auto indx, auto col_indx) { + auto delta = ptr_d_sizes[indx] - 1; + auto v_indx = ptr_d_coalesced_v[indx * max_depth + delta]; + auto start_row = row_offsets[v_indx]; + return thrust::make_tuple(col_indices[start_row + col_indx], values[start_row + col_indx]); + }, + [] __device__(auto crt_out_deg) { return crt_out_deg > 0; }); + } + + private: + raft::handle_t const& handle_; + vertex_t const* col_indices_; + edge_t const* row_offsets_; + weight_t const* values_; + + edge_t const* out_degs_; + index_t const* sizes_; + index_t num_paths_; + index_t max_depth_; +}; + +/** + * @brief Class abstracting the RW initialization, stepping, and stopping functionality + * The outline of the algorithm is as follows: + * + * (1) vertex sets are coalesced into d_coalesced_v, + * weight sets are coalesced into d_coalesced_w; + * i.e., the 2 coalesced vectors are allocated to + * num_paths * max_depth, and num_paths * (max_depth -1), respectively + * (since each path has a number of edges equal one + * less than the number of vertices); + * d_coalesced_v is initialized for each i*max_depth entry + * (i=0,,,,num_paths-1) to the corresponding starting vertices; + * (2) d_sizes maintains the current size is for each path; + * Note that a path may end prematurely if it reaches a sink vertex; + * (3) d_crt_out_degs maintains the out-degree of each of the latest + * vertices in the path; i.e., if N(v) := set of destination + * vertices from v, then this vector stores |N(v)| + * for last v in each path; i.e., + * d_crt_out_degs[i] = + * out-degree( d_coalesced_v[i*max_depth + d_sizes[i]-1] ), + * for i in {0,..., num_paths-1}; + * (4) a set of num_paths floating point numbers between [0,1] + * are generated at each step; then they get translated into + * _indices_ k in {0,...d_crt_out_degs[i]-1}; + * (5) the next vertex v is then picked as the k-th out-neighbor: + * next(v) = N(v)[k]; + * (6) d_sizes are incremented accordingly; i.e., for those paths whose + * corresponding last vertex has out-degree > 0; + * (7) then next(v) and corresponding weight of (v, next(v)) are stored + * at appropriate location in their corresponding coalesced vectors; + * (8) the client of this class (the random_walks() function) then repeats + * this process max_depth times or until all paths + * have reached sinks; i.e., d_crt_out_degs = {0, 0,...,0}, + * whichever comes first; + * (9) in the end some post-processing is done (stop()) to remove + * unused entries from the 2 coalesced vectors; + * (10) the triplet made of the 2 coalesced vectors and d_sizes is then returned; + * + */ +template , + typename index_t = typename graph_t::edge_type> +struct random_walker_t { + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + using seed_t = typename random_engine_t::seed_type; + using real_t = typename random_engine_t::real_type; + + random_walker_t(raft::handle_t const& handle, + graph_t const& graph, + index_t num_paths, + index_t max_depth) + : handle_(handle), + num_paths_(num_paths), + max_depth_(max_depth), + d_cached_out_degs_(graph.compute_out_degrees(handle_)) + { + } + + // for each i in [0..num_paths_) { + // d_paths_v_set[i*max_depth] = d_src_init_v[i]; + // + void start(device_const_vector_view& d_src_init_v, // in: start set + device_vec_t& d_paths_v_set, // out: coalesced v + device_vec_t& d_sizes) const // out: init sizes to {1,...} + { + // intialize path sizes to 1, as they contain at least one vertex each: + // the initial set: d_src_init_v; + // + thrust::copy_n(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::make_constant_iterator(1), + num_paths_, + d_sizes.begin()); + + // scatter d_src_init_v to coalesced vertex vector: + // + auto dlambda = [stride = max_depth_] __device__(auto indx) { return indx * stride; }; + + // use the transform iterator as map: + // + auto map_it_begin = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), dlambda); + + thrust::scatter(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_src_init_v.begin(), + d_src_init_v.end(), + map_it_begin, + d_paths_v_set.begin()); + } + + // overload for start() with device_uvector d_v_start + // (handy for testing) + // + void start(device_vec_t const& d_start, // in: start set + device_vec_t& d_paths_v_set, // out: coalesced v + device_vec_t& d_sizes) const // out: init sizes to {1,...} + { + device_const_vector_view d_start_cview{d_start.data(), + static_cast(d_start.size())}; + + start(d_start_cview, d_paths_v_set, d_sizes); + } + + // in-place updates its arguments from one step to next + // (to avoid copying); all "crt" arguments are updated at each step() + // and passed as scratchpad space to avoid copying them + // from one step to another + // + // take one step in sync for all paths that have not reached sinks: + // + void step( + graph_t const& graph, + seed_t seed, + device_vec_t& d_coalesced_v, // crt coalesced vertex set + device_vec_t& d_coalesced_w, // crt coalesced weight set + device_vec_t& d_paths_sz, // crt paths sizes + device_vec_t& d_crt_out_degs, // crt out-degs for current set of vertices + device_vec_t& d_random, // crt set of random real values + device_vec_t& d_col_indx, // crt col col indices to be used for retrieving next step + device_vec_t& d_next_v, // crt set of destination vertices, for next step + device_vec_t& d_next_w) + const // set of weights between src and destination vertices, for next step + { + // update crt snapshot of out-degs, + // from cached out degs, using + // latest vertex in each path as source: + // + gather_from_coalesced( + d_coalesced_v, d_cached_out_degs_, d_paths_sz, d_crt_out_degs, max_depth_, num_paths_); + + // generate random destination indices: + // + random_engine_t rgen(handle_, num_paths_, d_random, d_crt_out_degs, seed); + + rgen.generate_col_indices(d_col_indx); + + // dst extraction from dst indices: + // + col_indx_extract_t col_extractor(handle_, + graph, + raw_const_ptr(d_crt_out_degs), + raw_const_ptr(d_paths_sz), + num_paths_, + max_depth_); + + // The following steps update the next entry in each path, + // except the paths that reached sinks; + // + // for each indx in [0..num_paths) { + // v_indx = d_v_rnd_n_indx[indx]; + // + // -- get the `v_indx`-th out-vertex of d_v_paths_v_set[indx] vertex: + // -- also, note the size deltas increased by 1 in dst (d_sizes[]): + // + // d_coalesced_v[indx*num_paths + d_sizes[indx]] = + // get_out_vertex(graph, d_coalesced_v[indx*num_paths + d_sizes[indx] -1)], v_indx); + // d_coalesced_w[indx*(num_paths-1) + d_sizes[indx] - 1] = + // get_out_edge_weight(graph, d_coalesced_v[indx*num_paths + d_sizes[indx]-2], v_indx); + // + // (1) generate actual vertex destinations: + // + col_extractor(d_coalesced_v, d_col_indx, d_next_v, d_next_w); + + // (2) update path sizes: + // + update_path_sizes(d_crt_out_degs, d_paths_sz); + + // (3) actual coalesced updates: + // + scatter_vertices(d_next_v, d_coalesced_v, d_crt_out_degs, d_paths_sz); + scatter_weights(d_next_w, d_coalesced_w, d_crt_out_degs, d_paths_sz); + } + + // returns true if all paths reached sinks: + // + bool all_paths_stopped(device_vec_t const& d_crt_out_degs) const + { + auto how_many_stopped = + thrust::count_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_crt_out_degs.begin(), + d_crt_out_degs.end(), + [] __device__(auto crt_out_deg) { return crt_out_deg == 0; }); + return (static_cast(how_many_stopped) == d_crt_out_degs.size()); + } + + // wrap-up, post-process: + // truncate v_set, w_set to actual space used + // + void stop(device_vec_t& d_coalesced_v, // coalesced vertex set + device_vec_t& d_coalesced_w, // coalesced weight set + device_vec_t const& d_sizes) const // paths sizes + { + assert(max_depth_ > 1); // else, no need to step; and no edges + + index_t const* ptr_d_sizes = d_sizes.data(); + + auto predicate_v = [max_depth = max_depth_, ptr_d_sizes] __device__(auto indx) { + auto row_indx = indx / max_depth; + auto col_indx = indx % max_depth; + + return (col_indx >= ptr_d_sizes[row_indx]); + }; + + auto predicate_w = [max_depth = max_depth_, ptr_d_sizes] __device__(auto indx) { + auto row_indx = indx / (max_depth - 1); + auto col_indx = indx % (max_depth - 1); + + return (col_indx >= ptr_d_sizes[row_indx] - 1); + }; + + auto new_end_v = + thrust::remove_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_coalesced_v.begin(), + d_coalesced_v.end(), + thrust::make_counting_iterator(0), + predicate_v); + + auto new_end_w = + thrust::remove_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_coalesced_w.begin(), + d_coalesced_w.end(), + thrust::make_counting_iterator(0), + predicate_w); + + CUDA_TRY(cudaStreamSynchronize(handle_.get_stream())); + + d_coalesced_v.resize(thrust::distance(d_coalesced_v.begin(), new_end_v), handle_.get_stream()); + d_coalesced_w.resize(thrust::distance(d_coalesced_w.begin(), new_end_w), handle_.get_stream()); + } + + // in-place non-static (needs handle_): + // for indx in [0, nelems): + // gather d_result[indx] = d_src[d_coalesced[indx*stride + d_sizes[indx] -1]] + // + template + void gather_from_coalesced( + device_vec_t const& d_coalesced, // |gather map| = stride*nelems + device_vec_t const& d_src, // |gather input| = nelems + device_vec_t const& d_sizes, // |paths sizes| = nelems, elems in [1, stride] + device_vec_t& d_result, // |output| = nelems + index_t stride, // stride = coalesce block size (typically max_depth) + index_t nelems) const // nelems = number of elements to gather (typically num_paths_) + { + vertex_t const* ptr_d_coalesced = raw_const_ptr(d_coalesced); + index_t const* ptr_d_sizes = raw_const_ptr(d_sizes); + + // delta = ptr_d_sizes[indx] - 1 + // + auto dlambda = [stride, ptr_d_sizes, ptr_d_coalesced] __device__(auto indx) { + auto delta = ptr_d_sizes[indx] - 1; + return ptr_d_coalesced[indx * stride + delta]; + }; + + // use the transform iterator as map: + // + auto map_it_begin = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), dlambda); + + thrust::gather(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + map_it_begin, + map_it_begin + nelems, + d_src.begin(), + d_result.begin()); + } + + // in-place non-static (needs handle_); + // pre-condition: path sizes are assumed updated + // to reflect new vertex additions; + // + // for indx in [0, nelems): + // if ( d_crt_out_degs[indx] > 0 ) + // d_coalesced[indx*stride + (d_sizes[indx] - adjust)- 1] = d_src[indx] + // + // adjust := 0 for coalesced vertices; 1 for weights + // (because |edges| = |vertices| - 1, in each path); + // + template + void scatter_to_coalesced( + device_vec_t const& d_src, // |scatter input| = nelems + device_vec_t& d_coalesced, // |scatter input| = stride*nelems + device_vec_t const& d_crt_out_degs, // |current set of vertex out degrees| = nelems, + // to be used as stencil (don't scatter if 0) + device_vec_t const& + d_sizes, // paths sizes used to provide delta in coalesced paths; + // pre-condition: assumed as updated to reflect new vertex additions; + // also, this is the number of _vertices_ in each path; + // hence for scattering weights this needs to be adjusted; hence the `adjust` parameter + index_t + stride, // stride = coalesce block size (max_depth for vertices; max_depth-1 for weights) + index_t nelems, // nelems = number of elements to gather (typically num_paths_) + index_t adjust = 0) + const // adjusting parameter for scattering vertices (0) or weights (1); see above for more; + { + index_t const* ptr_d_sizes = raw_const_ptr(d_sizes); + + auto dlambda = [stride, adjust, ptr_d_sizes] __device__(auto indx) { + auto delta = ptr_d_sizes[indx] - adjust - 1; + return indx * stride + delta; + }; + + // use the transform iterator as map: + // + auto map_it_begin = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), dlambda); + + thrust::scatter_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_src.begin(), + d_src.end(), + map_it_begin, + d_crt_out_degs.begin(), + d_coalesced.begin(), + [] __device__(auto crt_out_deg) { + return crt_out_deg > 0; // predicate + }); + } + + // updates the entries in the corresponding coalesced vector, + // for which out_deg > 0 + // + void scatter_vertices(device_vec_t const& d_src, + device_vec_t& d_coalesced, + device_vec_t const& d_crt_out_degs, + device_vec_t const& d_sizes) const + { + scatter_to_coalesced(d_src, d_coalesced, d_crt_out_degs, d_sizes, max_depth_, num_paths_); + } + // + void scatter_weights(device_vec_t const& d_src, + device_vec_t& d_coalesced, + device_vec_t const& d_crt_out_degs, + device_vec_t const& d_sizes) const + { + scatter_to_coalesced( + d_src, d_coalesced, d_crt_out_degs, d_sizes, max_depth_ - 1, num_paths_, 1); + } + + // in-place update (increment) path sizes for paths + // that have not reached a sink; i.e., for which + // d_crt_out_degs[indx]>0: + // + void update_path_sizes(device_vec_t const& d_crt_out_degs, + device_vec_t& d_sizes) const + { + thrust::transform_if( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_sizes.begin(), + d_sizes.end(), // input + d_crt_out_degs.begin(), // stencil + d_sizes.begin(), // output: in-place + [] __device__(auto crt_sz) { return crt_sz + 1; }, + [] __device__(auto crt_out_deg) { return crt_out_deg > 0; }); + } + + device_vec_t const& get_out_degs(void) const { return d_cached_out_degs_; } + + private: + raft::handle_t const& handle_; + index_t num_paths_; + index_t max_depth_; + device_vec_t d_cached_out_degs_; +}; + +/** + * @brief returns random walks (RW) from starting sources, where each path is of given maximum + * length. Single-GPU specialization. + * + * @tparam graph_t Type of graph (view). + * @tparam random_engine_t Type of random engine used to generate RW. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph Graph object to generate RW on. + * @param d_v_start Device (view) set of starting vertex indices for the RW. + * number(paths) == d_v_start.size(). + * @param max_depth maximum length of RWs. + * @return std::tuple, device_vec_t, + * device_vec_t, seed> Quadruplet of coalesced RW paths, with corresponding edge weights + * for each, and corresponding path sizes. This is meant to minimize the number of DF's to be passed + * to the Python layer. Also returning seed for testing / debugging repro. The meaning of + * "coalesced" here is that a 2D array of paths of different sizes is represented as a 1D array. + */ +template , + typename seeding_policy_t = clock_seeding_t, + typename index_t = typename graph_t::edge_type> +std::enable_if_t, + device_vec_t, + device_vec_t, + typename random_engine_t::seed_type>> +random_walks_impl(raft::handle_t const& handle, + graph_t const& graph, + device_const_vector_view& d_v_start, + index_t max_depth, + seeding_policy_t seeder = clock_seeding_t{}) +{ + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + using seed_t = typename random_engine_t::seed_type; + using real_t = typename random_engine_t::real_type; + + vertex_t num_vertices = graph.get_number_of_vertices(); + + auto how_many_valid = + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_v_start.begin(), + d_v_start.end(), + [num_vertices] __device__(auto crt_vertex) { + return (crt_vertex >= 0) && (crt_vertex < num_vertices); + }); + + CUGRAPH_EXPECTS(static_cast(how_many_valid) == d_v_start.size(), + "Invalid set of starting vertices."); + + auto num_paths = d_v_start.size(); + auto stream = handle.get_stream(); + + random_walker_t rand_walker{ + handle, graph, static_cast(num_paths), static_cast(max_depth)}; + + // pre-allocate num_paths * max_depth; + // + auto coalesced_sz = num_paths * max_depth; + device_vec_t d_coalesced_v(coalesced_sz, stream); // coalesced vertex set + device_vec_t d_coalesced_w(coalesced_sz, stream); // coalesced weight set + device_vec_t d_paths_sz(num_paths, stream); // paths sizes + device_vec_t d_crt_out_degs(num_paths, stream); // out-degs for current set of vertices + device_vec_t d_random(num_paths, stream); + device_vec_t d_col_indx(num_paths, stream); + device_vec_t d_next_v(num_paths, stream); + device_vec_t d_next_w(num_paths, stream); + + // abstracted out seed initialization: + // + seed_t seed0 = static_cast(seeder()); + + // very first vertex, for each path: + // + rand_walker.start(d_v_start, d_coalesced_v, d_paths_sz); + + // start from 1, as 0-th was initialized above: + // + for (decltype(max_depth) step_indx = 1; step_indx < max_depth; ++step_indx) { + // take one-step in-sync for each path in parallel: + // + rand_walker.step(graph, + seed0 + static_cast(step_indx), + d_coalesced_v, + d_coalesced_w, + d_paths_sz, + d_crt_out_degs, + d_random, + d_col_indx, + d_next_v, + d_next_w); + + // early exit: all paths have reached sinks: + // + if (rand_walker.all_paths_stopped(d_crt_out_degs)) break; + } + + // wrap-up, post-process: + // truncate v_set, w_set to actual space used + // + rand_walker.stop(d_coalesced_v, d_coalesced_w, d_paths_sz); + + // because device_uvector is not copy-cnstr-able: + // + return std::make_tuple(std::move(d_coalesced_v), + std::move(d_coalesced_w), + std::move(d_paths_sz), + seed0); // also return seed for repro +} + +/** + * @brief returns random walks (RW) from starting sources, where each path is of given maximum + * length. Multi-GPU specialization. + * + * @tparam graph_t Type of graph (view). + * @tparam random_engine_t Type of random engine used to generate RW. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph Graph object to generate RW on. + * @param d_v_start Device (view) set of starting vertex indices for the RW. number(RW) == + * d_v_start.size(). + * @param max_depth maximum length of RWs. + * @return std::tuple, device_vec_t, + * device_vec_t, seed> Quadruplet of coalesced RW paths, with corresponding edge weights + * for each, and coresponding path sizes. This is meant to minimize the number of DF's to be passed + * to the Python layer. Also returning seed for testing / debugging repro. The meaning of + * "coalesced" here is that a 2D array of paths of different sizes is represented as a 1D array. + */ +template , + typename seeding_policy_t = clock_seeding_t, + typename index_t = typename graph_t::edge_type> +std::enable_if_t, + device_vec_t, + device_vec_t, + typename random_engine_t::seed_type>> +random_walks_impl(raft::handle_t const& handle, + graph_t const& graph, + device_const_vector_view& d_v_start, + index_t max_depth, + seeding_policy_t seeder = clock_seeding_t{}) +{ + CUGRAPH_FAIL("Not implemented yet."); +} + +} // namespace detail + +/** + * @brief returns random walks (RW) from starting sources, where each path is of given maximum + * length. Uniform distribution is assumed for the random engine. + * + * @tparam graph_t Type of graph (view). + * @tparam index_t Type used to store indexing and sizes. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph Graph object to generate RW on. + * @param ptr_d_start Device pointer to set of starting vertex indices for the RW. + * @param num_paths = number(paths). + * @param max_depth maximum length of RWs. + * @return std::tuple, device_vec_t, + * device_vec_t> Triplet of coalesced RW paths, with corresponding edge weights for + * each, and coresponding path sizes. This is meant to minimize the number of DF's to be passed to + * the Python layer. + */ +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector> +random_walks(raft::handle_t const& handle, + graph_t const& graph, + typename graph_t::vertex_type const* ptr_d_start, + index_t num_paths, + index_t max_depth) +{ + using vertex_t = typename graph_t::vertex_type; + + // 0-copy const device view: + // + detail::device_const_vector_view d_v_start{ptr_d_start, num_paths}; + + auto quad_tuple = detail::random_walks_impl(handle, graph, d_v_start, max_depth); + // ignore last element of the quad, seed, + // since it's meant for testing / debugging, only: + // + return std::make_tuple(std::move(std::get<0>(quad_tuple)), + std::move(std::get<1>(quad_tuple)), + std::move(std::get<2>(quad_tuple))); +} +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/sampling/random_walks.cu b/cpp/src/sampling/random_walks.cu new file mode 100644 index 00000000000..88d5d9ed5c8 --- /dev/null +++ b/cpp/src/sampling/random_walks.cu @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// +#include +#include + +namespace cugraph { +namespace experimental { +// template explicit instantiation directives (EIDir's): +// +// SG FP32{ +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int32_t const* ptr_d_start, + int32_t num_paths, + int32_t max_depth); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int32_t const* ptr_d_start, + int64_t num_paths, + int64_t max_depth); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int64_t const* ptr_d_start, + int64_t num_paths, + int64_t max_depth); +//} +// +// SG FP64{ +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int32_t const* ptr_d_start, + int32_t num_paths, + int32_t max_depth); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int32_t const* ptr_d_start, + int64_t num_paths, + int64_t max_depth); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int64_t const* ptr_d_start, + int64_t num_paths, + int64_t max_depth); +//} +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 5382b4856f3..a9e3146bbcd 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -696,6 +696,61 @@ std::unique_ptr call_egonet(raft::handle_t const& handle, } } +// Wrapper for random_walks() through a graph container +// to expose the API to cython. +// +template +std::enable_if_t::value, + std::unique_ptr> +call_random_walks(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t const* ptr_start_set, + edge_t num_paths, + edge_t max_depth) +{ + if (graph_container.weightType == numberTypeEnum::floatType) { + using weight_t = float; + + auto graph = + detail::create_graph(handle, graph_container); + + auto triplet = cugraph::experimental::random_walks( + handle, graph->view(), ptr_start_set, num_paths, max_depth); + + random_walk_ret_t rw_tri{std::get<0>(triplet).size(), + std::get<1>(triplet).size(), + static_cast(num_paths), + static_cast(max_depth), + std::make_unique(std::get<0>(triplet).release()), + std::make_unique(std::get<1>(triplet).release()), + std::make_unique(std::get<2>(triplet).release())}; + + return std::make_unique(std::move(rw_tri)); + + } else if (graph_container.weightType == numberTypeEnum::doubleType) { + using weight_t = double; + + auto graph = + detail::create_graph(handle, graph_container); + + auto triplet = cugraph::experimental::random_walks( + handle, graph->view(), ptr_start_set, num_paths, max_depth); + + random_walk_ret_t rw_tri{std::get<0>(triplet).size(), + std::get<1>(triplet).size(), + static_cast(num_paths), + static_cast(max_depth), + std::make_unique(std::get<0>(triplet).release()), + std::make_unique(std::get<1>(triplet).release()), + std::make_unique(std::get<2>(triplet).release())}; + + return std::make_unique(std::move(rw_tri)); + + } else { + CUGRAPH_FAIL("Unsupported weight type."); + } +} + // Wrapper for calling SSSP through a graph container template void call_sssp(raft::handle_t const& handle, @@ -1038,6 +1093,27 @@ template std::unique_ptr call_egonet( int64_t n_subgraphs, int64_t radius); +template std::unique_ptr call_random_walks( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t const* ptr_start_set, + int32_t num_paths, + int32_t max_depth); + +template std::unique_ptr call_random_walks( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t const* ptr_start_set, + int64_t num_paths, + int64_t max_depth); + +template std::unique_ptr call_random_walks( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t const* ptr_start_set, + int64_t num_paths, + int64_t max_depth); + template void call_sssp(raft::handle_t const& handle, graph_container_t const& graph_container, int32_t* identifiers, diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 5292f9f9997..3b65b0edb29 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -415,6 +415,20 @@ set(EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS}") +################################################################################################### +# - Experimental RANDOM_WALKS tests ------------------------------------------------------------ + +set(EXPERIMENTAL_RANDOM_WALKS_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/random_walks_test.cu") + +ConfigureTest(EXPERIMENTAL_RANDOM_WALKS_TEST "${EXPERIMENTAL_RANDOM_WALKS_TEST_SRCS}") + +################################################################################################### +set(EXPERIMENTAL_RANDOM_WALKS_LOW_LEVEL_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/rw_low_level_test.cu") + +ConfigureTest(EXPERIMENTAL_RANDOM_WALKS_LOW_LEVEL_TEST "${EXPERIMENTAL_RANDOM_WALKS_LOW_LEVEL_SRCS}") + ################################################################################################### # - MG tests -------------------------------------------------------------------------------------- diff --git a/cpp/tests/experimental/random_walks_test.cu b/cpp/tests/experimental/random_walks_test.cu new file mode 100644 index 00000000000..9fb1716f62b --- /dev/null +++ b/cpp/tests/experimental/random_walks_test.cu @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cuda_profiler_api.h" +#include "gtest/gtest.h" + +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include + +#include "random_walks_utils.cuh" + +#include +#include +#include +#include +#include +#include + +namespace { // anonym. +template +void fill_start(raft::handle_t const& handle, + rmm::device_uvector& d_start, + index_t num_vertices) +{ + index_t num_paths = d_start.size(); + + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_paths), + + d_start.begin(), + [num_vertices] __device__(auto indx) { return indx % num_vertices; }); +} +} // namespace + +struct RandomWalks_Usecase { + std::string graph_file_full_path{}; + bool test_weighted{false}; + + RandomWalks_Usecase(std::string const& graph_file_path, bool test_weighted) + : test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +}; + +class Tests_RandomWalks : public ::testing::TestWithParam { + public: + Tests_RandomWalks() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(RandomWalks_Usecase const& configuration) + { + raft::handle_t handle{}; + + // debuf info: + // + // std::cout << "read graph file: " << configuration.graph_file_full_path << std::endl; + + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); + + auto graph_view = graph.view(); + + // call random_walks: + start_random_walks(graph_view); + } + + template + void start_random_walks(graph_vt const& graph_view) + { + using vertex_t = typename graph_vt::vertex_type; + using edge_t = typename graph_vt::edge_type; + using weight_t = typename graph_vt::weight_type; + + raft::handle_t handle{}; + edge_t num_paths = 10; + rmm::device_uvector d_start(num_paths, handle.get_stream()); + + vertex_t num_vertices = graph_view.get_number_of_vertices(); + fill_start(handle, d_start, num_vertices); + + // 0-copy const device view: + // + cugraph::experimental::detail::device_const_vector_view d_start_view{ + d_start.data(), num_paths}; + + edge_t max_depth{10}; + + auto ret_tuple = + cugraph::experimental::detail::random_walks_impl(handle, graph_view, d_start_view, max_depth); + + // check results: + // + bool test_all_paths = cugraph::test::host_check_rw_paths( + handle, graph_view, std::get<0>(ret_tuple), std::get<1>(ret_tuple), std::get<2>(ret_tuple)); + + if (!test_all_paths) + std::cout << "starting seed on failure: " << std::get<3>(ret_tuple) << '\n'; + + ASSERT_TRUE(test_all_paths); + } +}; + +TEST_P(Tests_RandomWalks, Initialize_i32_i32_f) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_RandomWalks, + ::testing::Values(RandomWalks_Usecase("test/datasets/karate.mtx", true), + RandomWalks_Usecase("test/datasets/web-Google.mtx", true), + RandomWalks_Usecase("test/datasets/ljournal-2008.mtx", true), + RandomWalks_Usecase("test/datasets/webbase-1M.mtx", true))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/random_walks_utils.cuh b/cpp/tests/experimental/random_walks_utils.cuh new file mode 100644 index 00000000000..863094dc310 --- /dev/null +++ b/cpp/tests/experimental/random_walks_utils.cuh @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +// utilities for testing / verification of Random Walks functionality: +// +namespace cugraph { +namespace test { + +template +using vector_test_t = cugraph::experimental::detail::device_vec_t; // for debug purposes + +// host side utility to check a if a sequence of vertices is connected: +// +template +bool host_check_path(std::vector const& row_offsets, + std::vector const& col_inds, + std::vector const& values, + typename std::vector::const_iterator v_path_begin, + typename std::vector::const_iterator v_path_end, + typename std::vector::const_iterator w_path_begin) +{ + bool assert1 = (row_offsets.size() > 0); + bool assert2 = (col_inds.size() == values.size()); + + vertex_t num_rows = row_offsets.size() - 1; + edge_t nnz = row_offsets.back(); + + bool assert3 = (nnz == static_cast(col_inds.size())); + if (assert1 == false || assert2 == false || assert3 == false) { + std::cout << "CSR inconsistency\n"; + return false; + } + + auto it_w = w_path_begin; + for (auto it_v = v_path_begin; it_v != v_path_end - 1; ++it_v, ++it_w) { + auto crt_vertex = *it_v; + auto next_vertex = *(it_v + 1); + + auto begin = col_inds.begin() + row_offsets[crt_vertex]; + auto end = col_inds.begin() + row_offsets[crt_vertex + 1]; + auto found_next = std::find_if( + begin, end, [next_vertex](auto dst_vertex) { return dst_vertex == next_vertex; }); + if (found_next == end) { + std::cout << "vertex not found: " << next_vertex << " as neighbor of " << crt_vertex << '\n'; + return false; + } + + auto delta = row_offsets[crt_vertex] + std::distance(begin, found_next); + + // std::cout << "delta in ci: " << delta << '\n'; + auto found_edge = values.begin() + delta; + if (*found_edge != *it_w) { + std::cout << "weight not found: " << *found_edge << " between " << crt_vertex << " and " + << next_vertex << '\n'; + return false; + } + } + return true; +} + +template +bool host_check_rw_paths( + raft::handle_t const& handle, + cugraph::experimental::graph_view_t const& graph_view, + vector_test_t const& d_coalesced_v, + vector_test_t const& d_coalesced_w, + vector_test_t const& d_sizes) +{ + edge_t num_edges = graph_view.get_number_of_edges(); + vertex_t num_vertices = graph_view.get_number_of_vertices(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + std::vector v_ro(num_vertices + 1); + std::vector v_ci(num_edges); + std::vector v_vals(num_edges); + + raft::update_host(v_ro.data(), offsets, v_ro.size(), handle.get_stream()); + raft::update_host(v_ci.data(), indices, v_ci.size(), handle.get_stream()); + raft::update_host(v_vals.data(), values, v_vals.size(), handle.get_stream()); + + std::vector v_coalesced(d_coalesced_v.size()); + std::vector w_coalesced(d_coalesced_w.size()); + std::vector v_sizes(d_sizes.size()); + + raft::update_host(v_coalesced.data(), + cugraph::experimental::detail::raw_const_ptr(d_coalesced_v), + d_coalesced_v.size(), + handle.get_stream()); + raft::update_host(w_coalesced.data(), + cugraph::experimental::detail::raw_const_ptr(d_coalesced_w), + d_coalesced_w.size(), + handle.get_stream()); + raft::update_host(v_sizes.data(), + cugraph::experimental::detail::raw_const_ptr(d_sizes), + d_sizes.size(), + handle.get_stream()); + + auto it_v_begin = v_coalesced.begin(); + auto it_w_begin = w_coalesced.begin(); + for (auto&& crt_sz : v_sizes) { + auto it_v_end = it_v_begin + crt_sz; + + bool test_path = host_check_path(v_ro, v_ci, v_vals, it_v_begin, it_v_end, it_w_begin); + + it_v_begin = it_v_end; + it_w_begin += crt_sz - 1; + + if (!test_path) { // something went wrong; print to debug (since it's random) + raft::print_host_vector("sizes", v_sizes.data(), v_sizes.size(), std::cout); + + raft::print_host_vector("coalesced v", v_coalesced.data(), v_coalesced.size(), std::cout); + + raft::print_host_vector("coalesced w", w_coalesced.data(), w_coalesced.size(), std::cout); + + return false; + } + } + return true; +} + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/experimental/rw_low_level_test.cu b/cpp/tests/experimental/rw_low_level_test.cu new file mode 100644 index 00000000000..a32e258d366 --- /dev/null +++ b/cpp/tests/experimental/rw_low_level_test.cu @@ -0,0 +1,783 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "cuda_profiler_api.h" + +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include + +#include "random_walks_utils.cuh" + +#include +#include +#include +#include +#include +#include +#include + +using namespace cugraph::experimental; + +template +using vector_test_t = detail::device_vec_t; // for debug purposes + +namespace { // anonym. + +template +graph_t make_graph(raft::handle_t const& handle, + std::vector const& v_src, + std::vector const& v_dst, + std::vector const& v_w, + vertex_t num_vertices, + edge_t num_edges) +{ + vector_test_t d_src(num_edges, handle.get_stream()); + vector_test_t d_dst(num_edges, handle.get_stream()); + vector_test_t d_weights(num_edges, handle.get_stream()); + + raft::update_device(d_src.data(), v_src.data(), d_src.size(), handle.get_stream()); + raft::update_device(d_dst.data(), v_dst.data(), d_dst.size(), handle.get_stream()); + raft::update_device(d_weights.data(), v_w.data(), d_weights.size(), handle.get_stream()); + + edgelist_t edgelist{ + d_src.data(), d_dst.data(), d_weights.data(), num_edges}; + + graph_t graph( + handle, edgelist, num_vertices, graph_properties_t{}, false); + + return graph; +} + +template +bool check_col_indices(raft::handle_t const& handle, + vector_test_t const& d_crt_out_degs, + vector_test_t const& d_col_indx, + index_t num_paths) +{ + bool all_indices_within_degs = thrust::all_of( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_paths), + [p_d_col_indx = detail::raw_const_ptr(d_col_indx), + p_d_crt_out_degs = detail::raw_const_ptr(d_crt_out_degs)] __device__(auto indx) { + if (p_d_crt_out_degs[indx] > 0) + return ((p_d_col_indx[indx] >= 0) && (p_d_col_indx[indx] < p_d_crt_out_degs[indx])); + else + return true; + }); + return all_indices_within_degs; +} + +} // namespace + +// FIXME (per rlratzel request): +// This test may be considered an e2e test +// which could be moved to a different test suite: +// +struct RandomWalksPrimsTest : public ::testing::Test { +}; + +TEST_F(RandomWalksPrimsTest, SimpleGraphRWStart) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + std::vector v_ro(num_vertices + 1); + std::vector v_ci(num_edges); + std::vector v_vs(num_edges); + + raft::update_host(v_ro.data(), offsets, num_vertices + 1, handle.get_stream()); + raft::update_host(v_ci.data(), indices, num_edges, handle.get_stream()); + raft::update_host(v_vs.data(), values, num_edges, handle.get_stream()); + + std::vector v_ro_expected{0, 1, 3, 6, 7, 8, 8}; + std::vector v_ci_expected{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_vs_expected{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + EXPECT_EQ(v_ro, v_ro_expected); + EXPECT_EQ(v_ci, v_ci_expected); + EXPECT_EQ(v_vs, v_vs_expected); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + std::vector v_coalesced_exp{1, -1, -1, 0, -1, -1, 4, -1, -1, 2, -1, -1}; + raft::update_host( + v_coalesced.data(), raw_const_ptr(d_coalesced_v), total_sz, handle.get_stream()); + EXPECT_EQ(v_coalesced, v_coalesced_exp); + + std::vector v_sizes{1, 1, 1, 1}; + std::vector v_sz_exp(num_paths); + raft::update_host(v_sz_exp.data(), raw_const_ptr(d_sizes), num_paths, handle.get_stream()); + + EXPECT_EQ(v_sizes, v_sz_exp); +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphCoalesceExperiments) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + auto const& d_out_degs = rand_walker.get_out_degs(); + EXPECT_EQ(static_cast(num_vertices), d_out_degs.size()); + + std::vector v_out_degs(num_vertices); + raft::update_host( + v_out_degs.data(), raw_const_ptr(d_out_degs), num_vertices, handle.get_stream()); + + std::vector v_out_degs_exp{1, 2, 3, 1, 1, 0}; + EXPECT_EQ(v_out_degs, v_out_degs_exp); + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + // update crt_out_degs: + // + vector_test_t d_crt_out_degs(num_paths, handle.get_stream()); + rand_walker.gather_from_coalesced( + d_coalesced_v, d_out_degs, d_sizes, d_crt_out_degs, max_depth, num_paths); + + std::vector v_crt_out_degs(num_paths); + raft::update_host( + v_crt_out_degs.data(), raw_const_ptr(d_crt_out_degs), num_paths, handle.get_stream()); + + std::vector v_crt_out_degs_exp{2, 1, 1, 3}; + EXPECT_EQ(v_crt_out_degs, v_crt_out_degs_exp); +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphColExtraction) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + auto const& d_out_degs = rand_walker.get_out_degs(); + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + // update crt_out_degs: + // + vector_test_t d_crt_out_degs(num_paths, handle.get_stream()); + rand_walker.gather_from_coalesced( + d_coalesced_v, d_out_degs, d_sizes, d_crt_out_degs, max_depth, num_paths); + + col_indx_extract_t col_extractor{handle, + graph_view, + raw_const_ptr(d_crt_out_degs), + raw_const_ptr(d_sizes), + num_paths, + max_depth}; + + // typically given by random engine: + // + std::vector v_col_indx{1, 0, 0, 2}; + vector_test_t d_col_indx(num_paths, handle.get_stream()); + + raft::update_device(d_col_indx.data(), v_col_indx.data(), d_col_indx.size(), handle.get_stream()); + + vector_test_t d_next_v(num_paths, handle.get_stream()); + vector_test_t d_next_w(num_paths, handle.get_stream()); + + col_extractor(d_coalesced_v, d_col_indx, d_next_v, d_next_w); + + std::vector v_next_v(num_paths); + std::vector v_next_w(num_paths); + + raft::update_host(v_next_v.data(), raw_const_ptr(d_next_v), num_paths, handle.get_stream()); + raft::update_host(v_next_w.data(), raw_const_ptr(d_next_w), num_paths, handle.get_stream()); + + std::vector v_next_v_exp{4, 1, 5, 3}; + std::vector v_next_w_exp{2.1f, 0.1f, 7.1f, 5.1f}; + + EXPECT_EQ(v_next_v, v_next_v_exp); + EXPECT_EQ(v_next_w, v_next_w_exp); +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphRndGenColIndx) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + using real_t = float; + using seed_t = long; + + using random_engine_t = rrandom_gen_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + auto const& d_out_degs = rand_walker.get_out_degs(); + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + // update crt_out_degs: + // + vector_test_t d_crt_out_degs(num_paths, handle.get_stream()); + rand_walker.gather_from_coalesced( + d_coalesced_v, d_out_degs, d_sizes, d_crt_out_degs, max_depth, num_paths); + + // random engine generated: + // + vector_test_t d_col_indx(num_paths, handle.get_stream()); + vector_test_t d_random(num_paths, handle.get_stream()); + + seed_t seed = static_cast(std::time(nullptr)); + random_engine_t rgen(handle, num_paths, d_random, d_crt_out_degs, seed); + rgen.generate_col_indices(d_col_indx); + + bool all_indices_within_degs = check_col_indices(handle, d_crt_out_degs, d_col_indx, num_paths); + + ASSERT_TRUE(all_indices_within_degs); +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphUpdatePathSizes) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + using real_t = float; + using seed_t = long; + + using random_engine_t = rrandom_gen_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + auto const& d_out_degs = rand_walker.get_out_degs(); + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + // Fixed set of out-degs, as opposed to have them generated by the algorithm. + // That's because I want to test a certain functionality in isolation + // + std::vector v_crt_out_degs{2, 0, 1, 0}; + vector_test_t d_crt_out_degs(num_paths, handle.get_stream()); + raft::update_device( + d_crt_out_degs.data(), v_crt_out_degs.data(), d_crt_out_degs.size(), handle.get_stream()); + + rand_walker.update_path_sizes(d_crt_out_degs, d_sizes); + + std::vector v_sizes(num_paths); + raft::update_host(v_sizes.data(), raw_const_ptr(d_sizes), num_paths, handle.get_stream()); + std::vector v_sizes_exp{2, 1, 2, 1}; + // i.e., corresponding 0-entries in crt-out-degs, don't get updated; + + EXPECT_EQ(v_sizes, v_sizes_exp); +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphScatterUpdate) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + auto const& d_out_degs = rand_walker.get_out_degs(); + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + // update crt_out_degs: + // + vector_test_t d_crt_out_degs(num_paths, handle.get_stream()); + rand_walker.gather_from_coalesced( + d_coalesced_v, d_out_degs, d_sizes, d_crt_out_degs, max_depth, num_paths); + + col_indx_extract_t col_extractor{handle, + graph_view, + raw_const_ptr(d_crt_out_degs), + raw_const_ptr(d_sizes), + num_paths, + max_depth}; + + // typically given by random engine: + // + std::vector v_col_indx{1, 0, 0, 2}; + vector_test_t d_col_indx(num_paths, handle.get_stream()); + + raft::update_device(d_col_indx.data(), v_col_indx.data(), d_col_indx.size(), handle.get_stream()); + + vector_test_t d_next_v(num_paths, handle.get_stream()); + vector_test_t d_next_w(num_paths, handle.get_stream()); + + col_extractor(d_coalesced_v, d_col_indx, d_next_v, d_next_w); + + rand_walker.update_path_sizes(d_crt_out_degs, d_sizes); + + // check start(): + // + { + std::vector v_coalesced_exp{1, -1, -1, 0, -1, -1, 4, -1, -1, 2, -1, -1}; + raft::update_host( + v_coalesced.data(), raw_const_ptr(d_coalesced_v), total_sz, handle.get_stream()); + EXPECT_EQ(v_coalesced, v_coalesced_exp); + } + + // check crt_out_degs: + // + { + std::vector v_crt_out_degs(num_paths); + raft::update_host( + v_crt_out_degs.data(), raw_const_ptr(d_crt_out_degs), num_paths, handle.get_stream()); + std::vector v_crt_out_degs_exp{2, 1, 1, 3}; + EXPECT_EQ(v_crt_out_degs, v_crt_out_degs_exp); + } + + // check paths sizes update: + // + { + std::vector v_sizes(num_paths); + raft::update_host(v_sizes.data(), raw_const_ptr(d_sizes), num_paths, handle.get_stream()); + std::vector v_sizes_exp{2, 2, 2, 2}; + // i.e., corresponding 0-entries in crt-out-degs, don't get updated; + EXPECT_EQ(v_sizes, v_sizes_exp); + } + + // check next step: + // + { + std::vector v_next_v(num_paths); + std::vector v_next_w(num_paths); + + raft::update_host(v_next_v.data(), raw_const_ptr(d_next_v), num_paths, handle.get_stream()); + raft::update_host(v_next_w.data(), raw_const_ptr(d_next_w), num_paths, handle.get_stream()); + + std::vector v_next_v_exp{4, 1, 5, 3}; + std::vector v_next_w_exp{2.1f, 0.1f, 7.1f, 5.1f}; + + EXPECT_EQ(v_next_v, v_next_v_exp); + EXPECT_EQ(v_next_w, v_next_w_exp); + } + + rand_walker.scatter_vertices(d_next_v, d_coalesced_v, d_crt_out_degs, d_sizes); + rand_walker.scatter_weights(d_next_w, d_coalesced_w, d_crt_out_degs, d_sizes); + + // check vertex/weight scatter: + // + { + raft::update_host( + v_coalesced.data(), raw_const_ptr(d_coalesced_v), total_sz, handle.get_stream()); + raft::update_host( + w_coalesced.data(), raw_const_ptr(d_coalesced_w), total_sz - num_paths, handle.get_stream()); + + std::vector v_coalesced_exp{1, 4, -1, 0, 1, -1, 4, 5, -1, 2, 3, -1}; + std::vector w_coalesced_exp{2.1, -1, 0.1, -1, 7.1, -1, 5.1, -1}; + + EXPECT_EQ(v_coalesced, v_coalesced_exp); + EXPECT_EQ(w_coalesced, w_coalesced_exp); + } +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphCoalesceDefragment) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_sizes{1, 2, 2, 1}; + vector_test_t d_sizes(num_paths, handle.get_stream()); + raft::update_device(d_sizes.data(), v_sizes.data(), d_sizes.size(), handle.get_stream()); + + std::vector v_coalesced(total_sz, -1); + v_coalesced[0] = 3; + v_coalesced[max_depth] = 5; + v_coalesced[max_depth + 1] = 2; + v_coalesced[2 * max_depth] = 4; + v_coalesced[2 * max_depth + 1] = 0; + v_coalesced[3 * max_depth] = 1; + + std::vector w_coalesced(total_sz - num_paths, -1); + w_coalesced[max_depth - 1] = 10.1; + w_coalesced[2 * max_depth - 2] = 11.2; + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + rand_walker.stop(d_coalesced_v, d_coalesced_w, d_sizes); + + // check vertex/weight defragment: + // + { + v_coalesced.resize(d_coalesced_v.size()); + w_coalesced.resize(d_coalesced_w.size()); + + raft::update_host( + v_coalesced.data(), raw_const_ptr(d_coalesced_v), d_coalesced_v.size(), handle.get_stream()); + raft::update_host( + w_coalesced.data(), raw_const_ptr(d_coalesced_w), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_coalesced_exp{3, 5, 2, 4, 0, 1}; + std::vector w_coalesced_exp{10.1, 11.2}; + + EXPECT_EQ(v_coalesced, v_coalesced_exp); + EXPECT_EQ(w_coalesced, w_coalesced_exp); + } +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphRandomWalk) +{ + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + std::vector v_ro(num_vertices + 1); + std::vector v_ci(num_edges); + std::vector v_vals(num_edges); + + raft::update_host(v_ro.data(), offsets, v_ro.size(), handle.get_stream()); + raft::update_host(v_ci.data(), indices, v_ci.size(), handle.get_stream()); + raft::update_host(v_vals.data(), values, v_vals.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_v_start(v_start.size(), handle.get_stream()); + raft::update_device(d_v_start.data(), v_start.data(), d_v_start.size(), handle.get_stream()); + + index_t num_paths = v_start.size(); + index_t max_depth = 5; + + // 0-copy const device view: + // + detail::device_const_vector_view d_start_view{d_v_start.data(), num_paths}; + auto quad = detail::random_walks_impl(handle, graph_view, d_start_view, max_depth); + + auto& d_coalesced_v = std::get<0>(quad); + auto& d_coalesced_w = std::get<1>(quad); + auto& d_sizes = std::get<2>(quad); + auto seed0 = std::get<3>(quad); + + bool test_all_paths = + cugraph::test::host_check_rw_paths(handle, graph_view, d_coalesced_v, d_coalesced_w, d_sizes); + + if (!test_all_paths) std::cout << "starting seed on failure: " << seed0 << '\n'; + + ASSERT_TRUE(test_all_paths); +}