From 637c13947fb443d42985a62a7ec904066b267224 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Thu, 3 Jun 2021 14:51:52 -0500 Subject: [PATCH] Update the Random Walk binding (#1599) Closes #1579 Authors: - https://github.com/Iroy30 - Brad Rees (https://github.com/BradReesWork) Approvers: - Brad Rees (https://github.com/BradReesWork) - Andrei Schaffer (https://github.com/aschaffer) - Rick Ratzel (https://github.com/rlratzel) - Joseph Nke (https://github.com/jnke2016) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1599 --- cpp/include/cugraph/utilities/cython.hpp | 14 +- cpp/src/utilities/cython.cu | 36 +++- notebooks/sampling/RandomWalk.ipynb | 178 +++--------------- python/cugraph/__init__.py | 2 +- python/cugraph/sampling/__init__.py | 2 +- python/cugraph/sampling/random_walks.pxd | 8 +- python/cugraph/sampling/random_walks.py | 77 ++++---- .../cugraph/sampling/random_walks_wrapper.pyx | 59 +++++- python/cugraph/structure/graph_utilities.pxd | 5 + python/cugraph/tests/test_random_walks.py | 106 +++++++---- 10 files changed, 251 insertions(+), 236 deletions(-) diff --git a/cpp/include/cugraph/utilities/cython.hpp b/cpp/include/cugraph/utilities/cython.hpp index f187a985108..273e55bae25 100644 --- a/cpp/include/cugraph/utilities/cython.hpp +++ b/cpp/include/cugraph/utilities/cython.hpp @@ -210,6 +210,12 @@ struct random_walk_ret_t { std::unique_ptr d_sizes_; }; +struct random_walk_path_t { + std::unique_ptr d_v_offsets; + std::unique_ptr d_w_sizes; + std::unique_ptr d_w_offsets; +}; + struct graph_generator_t { std::unique_ptr d_source; std::unique_ptr d_destination; @@ -538,7 +544,13 @@ call_random_walks(raft::handle_t const& handle, graph_container_t const& graph_container, vertex_t const* ptr_start_set, edge_t num_paths, - edge_t max_depth); + edge_t max_depth, + bool use_padding); + +template +std::unique_ptr call_rw_paths(raft::handle_t const& handle, + index_t num_paths, + index_t const* vertex_path_sizes); // convertor from random_walks return type to COO: // diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index a95e4eb5421..e9bf9ffe031 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -879,7 +879,8 @@ call_random_walks(raft::handle_t const& handle, graph_container_t const& graph_container, vertex_t const* ptr_start_set, edge_t num_paths, - edge_t max_depth) + edge_t max_depth, + bool use_padding) { if (graph_container.weightType == numberTypeEnum::floatType) { using weight_t = float; @@ -888,7 +889,7 @@ call_random_walks(raft::handle_t const& handle, detail::create_graph(handle, graph_container); auto triplet = cugraph::experimental::random_walks( - handle, graph->view(), ptr_start_set, num_paths, max_depth); + handle, graph->view(), ptr_start_set, num_paths, max_depth, use_padding); random_walk_ret_t rw_tri{std::get<0>(triplet).size(), std::get<1>(triplet).size(), @@ -907,7 +908,7 @@ call_random_walks(raft::handle_t const& handle, detail::create_graph(handle, graph_container); auto triplet = cugraph::experimental::random_walks( - handle, graph->view(), ptr_start_set, num_paths, max_depth); + handle, graph->view(), ptr_start_set, num_paths, max_depth, use_padding); random_walk_ret_t rw_tri{std::get<0>(triplet).size(), std::get<1>(triplet).size(), @@ -924,6 +925,20 @@ call_random_walks(raft::handle_t const& handle, } } +template +std::unique_ptr call_rw_paths(raft::handle_t const& handle, + index_t num_paths, + index_t const* vertex_path_sizes) +{ + auto triplet = + cugraph::experimental::query_rw_sizes_offsets(handle, num_paths, vertex_path_sizes); + random_walk_path_t rw_path_tri{ + std::make_unique(std::get<0>(triplet).release()), + std::make_unique(std::get<1>(triplet).release()), + std::make_unique(std::get<2>(triplet).release())}; + return std::make_unique(std::move(rw_path_tri)); +} + template std::unique_ptr random_walks_to_coo(raft::handle_t const& handle, random_walk_ret_t& rw_tri) @@ -1354,21 +1369,30 @@ template std::unique_ptr call_random_walks( graph_container_t const& graph_container, int32_t const* ptr_start_set, int32_t num_paths, - int32_t max_depth); + int32_t max_depth, + bool use_padding); template std::unique_ptr call_random_walks( raft::handle_t const& handle, graph_container_t const& graph_container, int32_t const* ptr_start_set, int64_t num_paths, - int64_t max_depth); + int64_t max_depth, + bool use_padding); template std::unique_ptr call_random_walks( raft::handle_t const& handle, graph_container_t const& graph_container, int64_t const* ptr_start_set, int64_t num_paths, - int64_t max_depth); + int64_t max_depth, + bool use_padding); + +template std::unique_ptr call_rw_paths( + raft::handle_t const& handle, int32_t num_paths, int32_t const* vertex_path_sizes); + +template std::unique_ptr call_rw_paths( + raft::handle_t const& handle, int64_t num_paths, int64_t const* vertex_path_sizes); template std::unique_ptr random_walks_to_coo( raft::handle_t const& handle, random_walk_ret_t& rw_tri); diff --git a/notebooks/sampling/RandomWalk.ipynb b/notebooks/sampling/RandomWalk.ipynb index 31a521db1c1..84f8e1db07f 100644 --- a/notebooks/sampling/RandomWalk.ipynb +++ b/notebooks/sampling/RandomWalk.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -67,7 +67,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -78,20 +78,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(34, 78)" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# some stats on the graph\n", "(G.number_of_nodes(), G.number_of_edges() )" @@ -99,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -109,11 +98,21 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "rw, so = cugraph.random_walks(G, seeds, 4)" + "# random walk path length\n", + "path_length = 4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rw, so, sz = cugraph.random_walks(G, seeds, path_length, use_padding=True)" ] }, { @@ -131,144 +130,27 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0\n", - "1 3\n", - "2 6\n", - "dtype: int64" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "so" - ] - }, - { - "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
srcdstweight
01761.0
16171.0
21761.0
319331.0
433311.0
53121.0
\n", - "
" - ], - "text/plain": [ - " src dst weight\n", - "0 17 6 1.0\n", - "1 6 17 1.0\n", - "2 17 6 1.0\n", - "3 19 33 1.0\n", - "4 33 31 1.0\n", - "5 31 2 1.0" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "rw" + "rw.head(10)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "seed 17 starts at index 0 and is 3 rows\n", - "seed 19 starts at index 3 and is 3 rows\n" - ] - } - ], + "outputs": [], "source": [ + "idx = 0\n", "for i in range(len(seeds)):\n", - " print(f\"seed {seeds[i]} starts at index {so[i]} and is {so[1 + 1] - so[1]} rows\")" + " for j in range(path_length):\n", + " print(f\"{rw[idx]}\", end=\" \")\n", + " idx += 1\n", + " print(\" \")" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -305,7 +187,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.8.10" } }, "nbformat": 4, diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index 1a113b93d8d..55c35fa7b4b 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -107,7 +107,7 @@ from cugraph.raft import raft_include_test from cugraph.comms import comms -from cugraph.sampling import random_walks +from cugraph.sampling import random_walks, rw_path # Versioneer from ._version import get_versions diff --git a/python/cugraph/sampling/__init__.py b/python/cugraph/sampling/__init__.py index fd9d072d4f8..ab0bfab0c66 100644 --- a/python/cugraph/sampling/__init__.py +++ b/python/cugraph/sampling/__init__.py @@ -11,4 +11,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.sampling.random_walks import random_walks +from cugraph.sampling.random_walks import random_walks, rw_path diff --git a/python/cugraph/sampling/random_walks.pxd b/python/cugraph/sampling/random_walks.pxd index 1eaea92c3e5..f86d6396c98 100644 --- a/python/cugraph/sampling/random_walks.pxd +++ b/python/cugraph/sampling/random_walks.pxd @@ -19,4 +19,10 @@ cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": const graph_container_t &g, const vertex_t *ptr_d_start, edge_t num_paths, - edge_t max_depth) except + + edge_t max_depth, + bool use_padding) except + + + cdef unique_ptr[random_walk_path_t] call_rw_paths[index_t]( + const handle_t &handle, + index_t num_paths, + const index_t* sizes) except + diff --git a/python/cugraph/sampling/random_walks.py b/python/cugraph/sampling/random_walks.py index 84fde262010..fc21abd3bc4 100644 --- a/python/cugraph/sampling/random_walks.py +++ b/python/cugraph/sampling/random_walks.py @@ -14,16 +14,12 @@ import cudf from cugraph.sampling import random_walks_wrapper import cugraph -from collections import defaultdict -# FIXME might be more efficient to return either (df + offset) or 3 cudf.Series - -def random_walks( - G, - start_vertices, - max_depth=None -): +def random_walks(G, + start_vertices, + max_depth=None, + use_padding=False): """ compute random walks for each nodes in 'start_vertices' @@ -43,16 +39,20 @@ def random_walks( max_depth : int The maximum depth of the random walks + use_padding : bool + If True, padded paths are returned else coalesced paths are returned. Returns ------- - random_walks_edge_lists : cudf.DataFrame - GPU data frame containing all random walks sources identifiers, - destination identifiers, edge weights + vertex_paths : cudf.Series or cudf.DataFrame + Series containing the vertices of edges/paths in the random walk. + + edge_weight_paths: cudf.Series + Series containing the edge weights of edges represented by the + returned vertex_paths - seeds_offsets: cudf.Series - Series containing the starting offset in the returned edge list - for each vertex in start_vertices. + sizes: int + The path size in case of coalesced paths. """ if max_depth is None: raise TypeError("must specify a 'max_depth'") @@ -74,7 +74,7 @@ def random_walks( start_vertices = G.lookup_internal_vertex_id(start_vertices) vertex_set, edge_set, sizes = random_walks_wrapper.random_walks( - G, start_vertices, max_depth) + G, start_vertices, max_depth, use_padding) if G.renumbered: df_ = cudf.DataFrame() @@ -82,21 +82,32 @@ def random_walks( df_ = G.unrenumber(df_, 'vertex_set', preserve_order=True) vertex_set = cudf.Series(df_['vertex_set']) - edge_list = defaultdict(list) - next_path_idx = 0 - offsets = [0] - - df = cudf.DataFrame() - for s in sizes.values_host: - for i in range(next_path_idx, s+next_path_idx-1): - edge_list['src'].append(vertex_set.values_host[i]) - edge_list['dst'].append(vertex_set.values_host[i+1]) - next_path_idx += s - df = df.append(edge_list, ignore_index=True) - offsets.append(df.index[-1]+1) - edge_list['src'].clear() - edge_list['dst'].clear() - df['weight'] = edge_set - offsets = cudf.Series(offsets) - - return df, offsets + if use_padding: + edge_set_sz = (max_depth-1)*len(start_vertices) + return vertex_set, edge_set[:edge_set_sz], sizes + + vertex_set_sz = sizes.sum() + edge_set_sz = vertex_set_sz - len(start_vertices) + return vertex_set[:vertex_set_sz], edge_set[:edge_set_sz], sizes + + +def rw_path(num_paths, sizes): + """ + Retrieve more information on the obtained paths in case use_padding + is False. + + parameters + ---------- + num_paths: int + Number of paths in the random walk output. + + sizes: int + Path size returned in random walk output. + + Returns + ------- + path_data : cudf.DataFrame + Dataframe containing vetex path offsets, edge weight offsets and + edge weight sizes for each path. + """ + return random_walks_wrapper.rw_path_retrieval(num_paths, sizes) diff --git a/python/cugraph/sampling/random_walks_wrapper.pyx b/python/cugraph/sampling/random_walks_wrapper.pyx index f186a972413..64194976e87 100644 --- a/python/cugraph/sampling/random_walks_wrapper.pyx +++ b/python/cugraph/sampling/random_walks_wrapper.pyx @@ -10,7 +10,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.sampling.random_walks cimport call_random_walks +from cugraph.sampling.random_walks cimport call_random_walks, call_rw_paths #from cugraph.structure.graph_primtypes cimport * from cugraph.structure.graph_utilities cimport * from libcpp cimport bool @@ -24,7 +24,9 @@ import numpy.ctypeslib as ctypeslib from rmm._lib.device_buffer cimport DeviceBuffer from cudf.core.buffer import Buffer from cython.operator cimport dereference as deref -def random_walks(input_graph, start_vertices, max_depth): + + +def random_walks(input_graph, start_vertices, max_depth, use_padding): """ Call random_walks """ @@ -89,32 +91,71 @@ def random_walks(input_graph, start_vertices, max_depth): graph_container, c_start_vertex_ptr, num_paths, - max_depth)) + max_depth, + use_padding)) else: # (edge_t == np.dtype("int64")): rw_ret_ptr = move(call_random_walks[int, long]( deref(handle_), graph_container, c_start_vertex_ptr, num_paths, - max_depth)) + max_depth, + use_padding)) else: # (vertex_t == edge_t == np.dtype("int64")): rw_ret_ptr = move(call_random_walks[long, long]( deref(handle_), graph_container, c_start_vertex_ptr, num_paths, - max_depth)) + max_depth, + use_padding)) rw_ret= move(rw_ret_ptr.get()[0]) vertex_set = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_coalesced_v_)) edge_set = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_coalesced_w_)) - sizes = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_sizes_)) vertex_set = Buffer(vertex_set) edge_set = Buffer(edge_set) - sizes = Buffer(sizes) set_vertex = cudf.Series(data=vertex_set, dtype=vertex_t) set_edge = cudf.Series(data=edge_set, dtype=weight_t) - set_sizes = cudf.Series(data=sizes, dtype=edge_t) + + if not use_padding: + sizes = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_sizes_)) + sizes = Buffer(sizes) + set_sizes = cudf.Series(data=sizes, dtype=edge_t) + else: + set_sizes = None return set_vertex, set_edge, set_sizes - + + +def rw_path_retrieval(num_paths, sizes): + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get() + index_t = sizes.dtype + + cdef unique_ptr[random_walk_path_t] rw_path_ptr + cdef uintptr_t c_sizes = sizes.__cuda_array_interface__['data'][0] + + if index_t == np.dtype("int32"): + rw_path_ptr = move(call_rw_paths[int](deref(handle_), + num_paths, + c_sizes)) + else: # index_t == np.dtype("int64"): + rw_path_ptr = move(call_rw_paths[long](deref(handle_), + num_paths, + c_sizes)) + + rw_path = move(rw_path_ptr.get()[0]) + vertex_offsets = DeviceBuffer.c_from_unique_ptr(move(rw_path.d_v_offsets)) + weight_sizes = DeviceBuffer.c_from_unique_ptr(move(rw_path.d_w_sizes)) + weight_offsets = DeviceBuffer.c_from_unique_ptr(move(rw_path.d_w_offsets)) + vertex_offsets = Buffer(vertex_offsets) + weight_sizes = Buffer(weight_sizes) + weight_offsets = Buffer(weight_offsets) + + df = cudf.DataFrame() + df['vertex_offsets'] = cudf.Series(data=vertex_offsets, dtype=index_t) + df['weight_sizes'] = cudf.Series(data=weight_sizes, dtype=index_t) + df['weight_offsets'] = cudf.Series(data=weight_offsets, dtype=index_t) + return df diff --git a/python/cugraph/structure/graph_utilities.pxd b/python/cugraph/structure/graph_utilities.pxd index a19ed4c600c..2d5b081dd0c 100644 --- a/python/cugraph/structure/graph_utilities.pxd +++ b/python/cugraph/structure/graph_utilities.pxd @@ -94,6 +94,11 @@ cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": unique_ptr[device_buffer] d_coalesced_w_ unique_ptr[device_buffer] d_sizes_ + cdef cppclass random_walk_path_t: + unique_ptr[device_buffer] d_v_offsets + unique_ptr[device_buffer] d_w_sizes + unique_ptr[device_buffer] d_w_offsets + cdef cppclass graph_generator_t: unique_ptr[device_buffer] d_source unique_ptr[device_buffer] d_destination diff --git a/python/cugraph/tests/test_random_walks.py b/python/cugraph/tests/test_random_walks.py index ba0cd6eadc9..302a93cd02a 100644 --- a/python/cugraph/tests/test_random_walks.py +++ b/python/cugraph/tests/test_random_walks.py @@ -29,11 +29,10 @@ DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL] -def calc_random_walks( - graph_file, - directed=False, - max_depth=None -): +def calc_random_walks(graph_file, + directed=False, + max_depth=None, + use_padding=False): """ compute random walks for each nodes in 'start_vertices' @@ -52,16 +51,20 @@ def calc_random_walks( max_depth : int The maximum depth of the random walks + use_padding : bool + If True, padded paths are returned else coalesced paths are returned. Returns ------- - random_walks_edge_lists : cudf.DataFrame - GPU data frame containing all random walks sources identifiers, - destination identifiers, edge weights + vertex_paths : cudf.Series or cudf.DataFrame + Series containing the vertices of edges/paths in the random walk. + + edge_weight_paths: cudf.Series + Series containing the edge weights of edges represented by the + returned vertex_paths - seeds_offsets: cudf.Series - Series containing the starting offset in the returned edge list - for each vertex in start_vertices. + sizes: int + The path size in case of coalesced paths. """ G = utils.generate_cugraph_graph_from_file( graph_file, directed=directed, edgevals=True) @@ -69,45 +72,47 @@ def calc_random_walks( k = random.randint(1, 10) start_vertices = random.sample(range(G.number_of_vertices()), k) - df, offsets = cugraph.random_walks(G, start_vertices, max_depth) + vertex_paths, edge_weights, vertex_path_sizes = cugraph.random_walks( + G, start_vertices, max_depth, use_padding) - return df, offsets, start_vertices + return (vertex_paths, edge_weights, vertex_path_sizes), start_vertices -def check_random_walks(df, offsets, seeds, df_G=None): +def check_random_walks(path_data, seeds, df_G=None): invalid_edge = 0 invalid_seeds = 0 - invalid_weight = 0 offsets_idx = 0 - for i in range(len(df.index)): - src, dst, weight = df.iloc[i].to_array() - if i == offsets[offsets_idx]: - if df['src'].iloc[i] != seeds[offsets_idx]: + next_path_idx = 0 + v_paths = path_data[0] + sizes = path_data[2].to_array().tolist() + + for s in sizes: + for i in range(next_path_idx, next_path_idx+s-1): + src, dst = v_paths.iloc[i], v_paths.iloc[i+1] + if i == next_path_idx and src != seeds[offsets_idx]: invalid_seeds += 1 print( "[ERR] Invalid seed: " " src {} != src {}" - .format(df['src'].iloc[i], offsets[offsets_idx]) + .format(src, seeds[offsets_idx]) ) - offsets_idx += 1 + offsets_idx += 1 + next_path_idx += s - edge = df.loc[(df['src'] == (src)) & (df['dst'] == (dst))].reset_index( - drop=True) exp_edge = df_G.loc[ (df_G['src'] == (src)) & ( df_G['dst'] == (dst))].reset_index(drop=True) - if not exp_edge.equals(edge[:1]): + if not (exp_edge['src'].loc[0], exp_edge['dst'].loc[0]) == (src, dst): print( "[ERR] Invalid edge: " - "There is no edge src {} dst {} weight {}" - .format(src, dst, weight) + "There is no edge src {} dst {}" + .format(src, dst) ) - invalid_weight += 1 + invalid_edge += 1 assert invalid_edge == 0 assert invalid_seeds == 0 - assert invalid_weight == 0 # ============================================================================= # Pytest Setup / Teardown - called for each test function @@ -121,11 +126,9 @@ def prepare_test(): @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("max_depth", [None]) -def test_random_walks_invalid_max_dept( - graph_file, - directed, - max_depth -): +def test_random_walks_invalid_max_dept(graph_file, + directed, + max_depth): prepare_test() with pytest.raises(TypeError): df, offsets, seeds = calc_random_walks( @@ -137,7 +140,7 @@ def test_random_walks_invalid_max_dept( @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) -def test_random_walks( +def test_random_walks_coalesced( graph_file, directed ): @@ -145,12 +148,43 @@ def test_random_walks( df_G = utils.read_csv_file(graph_file) df_G.rename( columns={"0": "src", "1": "dst", "2": "weight"}, inplace=True) - df, offsets, seeds = calc_random_walks( + path_data, seeds = calc_random_walks( graph_file, directed, max_depth=max_depth ) - check_random_walks(df, offsets, seeds, df_G) + check_random_walks(path_data, seeds, df_G) + + # Check path query output + df = cugraph.rw_path(len(seeds), path_data[2]) + v_offsets = [0] + path_data[2].cumsum()[:-1].to_array().tolist() + w_offsets = [0] + (path_data[2]-1).cumsum()[:-1].to_array().tolist() + + assert df['weight_sizes'].equals(path_data[2]-1) + assert df['vertex_offsets'].to_array().tolist() == v_offsets + assert df['weight_offsets'].to_array().tolist() == w_offsets + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +def test_random_walks_padded( + graph_file, + directed +): + max_depth = random.randint(2, 10) + df_G = utils.read_csv_file(graph_file) + df_G.rename( + columns={"0": "src", "1": "dst", "2": "weight"}, inplace=True) + path_data, seeds = calc_random_walks( + graph_file, + directed, + max_depth=max_depth, + use_padding=True + ) + v_paths = path_data[0] + e_weights = path_data[1] + assert len(v_paths) == max_depth*len(seeds) + assert len(e_weights) == (max_depth - 1)*len(seeds) """@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)