Skip to content

Commit

Permalink
Add utility function for computing a secondary cost for BFS and SSSP …
Browse files Browse the repository at this point in the history
…output (#1376)

Solves: #1373

Authors:
  - Hugo Linsenmaier (https://github.com/hlinsen)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Andrei Schaffer (https://github.com/aschaffer)
  - Alex Fender (https://github.com/afender)

URL: #1376
  • Loading branch information
hlinsen authored Apr 7, 2021
1 parent 1b34e26 commit 79c3ba0
Show file tree
Hide file tree
Showing 8 changed files with 411 additions and 1 deletion.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ endif(BUILD_STATIC_FAISS)
add_library(cugraph SHARED
src/utilities/spmv_1D.cu
src/utilities/cython.cu
src/utilities/path_retrieval.cu
src/structure/graph.cu
src/linear_assignment/hungarian.cu
src/link_analysis/gunrock_hits.cpp
Expand Down
45 changes: 45 additions & 0 deletions cpp/include/utilities/path_retrieval.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <raft/handle.hpp>

namespace cugraph {

/**
* @brief Takes the results of BFS or SSSP function call and sums the given
* weights along the path to the starting vertex.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam weight_t Type of edge weights. Needs to be a floating point type.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms. Must have at least one worker stream.
* @param vertices Pointer to vertex ids.
* @param preds Pointer to predecessors.
* @param info_weights Secondary weights along the edge from predecessor to vertex.
* @param out Contains for each index the sum of weights along the path unfolding.
* @param num_vertices Number of vertices.
**/
template <typename vertex_t, typename weight_t>
void get_traversed_cost(raft::handle_t const &handle,
vertex_t const *vertices,
vertex_t const *preds,
weight_t const *info_weights,
weight_t *out,
vertex_t stop_vertex,
vertex_t num_vertices);
} // namespace cugraph
133 changes: 133 additions & 0 deletions cpp/src/utilities/path_retrieval.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <rmm/thrust_rmm_allocator.h>
#include <rmm/device_uvector.hpp>

#include <raft/handle.hpp>

#include <utilities/error.hpp>
#include <utilities/path_retrieval.hpp>

namespace cugraph {
namespace detail {

template <typename vertex_t, typename weight_t>
__global__ void get_traversed_cost_kernel(vertex_t const *vertices,
vertex_t const *preds,
vertex_t const *vtx_map,
weight_t const *info_weights,
weight_t *out,
vertex_t stop_vertex,
vertex_t num_vertices)
{
for (vertex_t i = threadIdx.x + blockIdx.x * blockDim.x; i < num_vertices;
i += gridDim.x * blockDim.x) {
weight_t sum = info_weights[i];
vertex_t pred = preds[i];
while (pred != stop_vertex) {
vertex_t pos = vtx_map[pred];
sum += info_weights[pos];
pred = preds[pos];
}
out[i] = sum;
}
}

template <typename vertex_t, typename weight_t>
void get_traversed_cost_impl(raft::handle_t const &handle,
vertex_t const *vertices,
vertex_t const *preds,
weight_t const *info_weights,
weight_t *out,
vertex_t stop_vertex,
vertex_t num_vertices)
{
auto stream = handle.get_stream();
vertex_t max_blocks = handle.get_device_properties().maxGridSize[0];
vertex_t max_threads = handle.get_device_properties().maxThreadsPerBlock;

dim3 nthreads, nblocks;
nthreads.x = std::min<vertex_t>(num_vertices, max_threads);
nthreads.y = 1;
nthreads.z = 1;
nblocks.x = std::min<vertex_t>((num_vertices + nthreads.x - 1) / nthreads.x, max_blocks);
nblocks.y = 1;
nblocks.z = 1;

rmm::device_uvector<vertex_t> vtx_map_v(num_vertices, stream);
rmm::device_uvector<vertex_t> vtx_keys_v(num_vertices, stream);
vertex_t *vtx_map = vtx_map_v.data();
vertex_t *vtx_keys = vtx_keys_v.data();
raft::copy(vtx_keys, vertices, num_vertices, stream);

thrust::sequence(rmm::exec_policy(stream)->on(stream), vtx_map, vtx_map + num_vertices);

thrust::stable_sort_by_key(
rmm::exec_policy(stream)->on(stream), vtx_keys, vtx_keys + num_vertices, vtx_map);

get_traversed_cost_kernel<<<nblocks, nthreads>>>(
vertices, preds, vtx_map, info_weights, out, stop_vertex, num_vertices);
}
} // namespace detail

template <typename vertex_t, typename weight_t>
void get_traversed_cost(raft::handle_t const &handle,
vertex_t const *vertices,
vertex_t const *preds,
weight_t const *info_weights,
weight_t *out,
vertex_t stop_vertex,
vertex_t num_vertices)
{
CUGRAPH_EXPECTS(num_vertices > 0, "num_vertices should be strictly positive");
CUGRAPH_EXPECTS(out != nullptr, "out should be of size num_vertices");
cugraph::detail::get_traversed_cost_impl(
handle, vertices, preds, info_weights, out, stop_vertex, num_vertices);
}

template void get_traversed_cost<int32_t, float>(raft::handle_t const &handle,
int32_t const *vertices,
int32_t const *preds,
float const *info_weights,
float *out,
int32_t stop_vertex,
int32_t num_vertices);

template void get_traversed_cost<int32_t, double>(raft::handle_t const &handle,
int32_t const *vertices,
int32_t const *preds,
double const *info_weights,
double *out,
int32_t stop_vertex,
int32_t num_vertices);

template void get_traversed_cost<int64_t, float>(raft::handle_t const &handle,
int64_t const *vertices,
int64_t const *preds,
float const *info_weights,
float *out,
int64_t stop_vertex,
int64_t num_vertices);

template void get_traversed_cost<int64_t, double>(raft::handle_t const &handle,
int64_t const *vertices,
int64_t const *preds,
double const *info_weights,
double *out,
int64_t stop_vertex,
int64_t num_vertices);
} // namespace cugraph
28 changes: 28 additions & 0 deletions python/cugraph/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
import pytest

import cugraph
import cudf
from cugraph.tests import utils
import numpy as np


def test_bfs_paths():
Expand Down Expand Up @@ -68,3 +70,29 @@ def test_bfs_paths_array():
answer = cugraph.utils.get_traversed_path_list(df, 100)

assert "not in the result set" in str(ErrorMsg)


@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_get_traversed_cost(graph_file):
cu_M = utils.read_csv_file(graph_file)

noise = cudf.Series(np.random.randint(10, size=(cu_M.shape[0])))
cu_M['info'] = cu_M['2'] + noise

G = cugraph.Graph()
G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='info')

# run SSSP starting at vertex 17
df = cugraph.sssp(G, 16)

answer = cugraph.utilities.path_retrieval.get_traversed_cost(df, 16,
cu_M['0'],
cu_M['1'],
cu_M['info']
)

df = df.sort_values(by='vertex').reset_index()
answer = answer.sort_values(by='vertex').reset_index()

assert df.shape[0] == answer.shape[0]
assert np.allclose(df['distance'], answer['info'])
3 changes: 2 additions & 1 deletion python/cugraph/utilities/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2020, NVIDIA CORPORATION.
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -25,3 +25,4 @@
is_cp_matrix_type,
is_sp_matrix_type,
)
from cugraph.utilities.path_retrieval import get_traversed_cost
30 changes: 30 additions & 0 deletions python/cugraph/utilities/path_retrieval.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3

from cugraph.structure.graph_primtypes cimport *

cdef extern from "utilities/path_retrieval.hpp" namespace "cugraph":

cdef void get_traversed_cost[vertex_t, weight_t](const handle_t &handle,
const vertex_t *vertices,
const vertex_t *preds,
const weight_t *info_weights,
weight_t *out,
vertex_t stop_vertex,
vertex_t num_vertices) except +

100 changes: 100 additions & 0 deletions python/cugraph/utilities/path_retrieval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import cudf

from cugraph.structure.symmetrize import symmetrize
from cugraph.structure.number_map import NumberMap
from cugraph.utilities import path_retrieval_wrapper


def get_traversed_cost(df, source, source_col, dest_col, value_col):
"""
Take the DataFrame result from a BFS or SSSP function call and sums
the given weights along the path to the starting vertex.
The source_col, dest_col identifiers need to match with the vertex and
predecessor columns of df.
Input Parameters
----------
df : cudf.DataFrame
The dataframe containing the results of a BFS or SSSP call
source: int
Index of the source vertex.
source_col : cudf.DataFrame
This cudf.Series wraps a gdf_column of size E (E: number of edges).
The gdf column contains the source index for each edge.
Source indices must be an integer type.
dest_col : cudf.Series
This cudf.Series wraps a gdf_column of size E (E: number of edges).
The gdf column contains the destination index for each edge.
Destination indices must be an integer type.
value_col : cudf.Series
This cudf.Series wraps a gdf_column of size E (E: number of edges).
The gdf column contains values associated with this edge.
Weight should be a floating type.
Returns
---------
df : cudf.DataFrame
DataFrame containing two columns 'vertex' and 'info'.
Unreachable vertices will have value the max value of the weight type.
"""

if 'vertex' not in df.columns:
raise ValueError("DataFrame does not appear to be a BFS or "
"SSP result - 'vertex' column missing")
if 'distance' not in df.columns:
raise ValueError("DataFrame does not appear to be a BFS or "
"SSP result - 'distance' column missing")
if 'predecessor' not in df.columns:
raise ValueError("DataFrame does not appear to be a BFS or "
"SSP result - 'predecessor' column missing")

src, dst, val = symmetrize(source_col,
dest_col,
value_col)

symmetrized_df = cudf.DataFrame()
symmetrized_df['source'] = src
symmetrized_df['destination'] = dst
symmetrized_df['weights'] = val

input_df = df.merge(symmetrized_df,
left_on=['vertex', 'predecessor'],
right_on=['source', 'destination'],
how="left"
)

# Set unreachable vertex weights to max float and source vertex weight to 0
max_val = np.finfo(val.dtype).max
input_df[['weights']] = input_df[['weights']].fillna(max_val)
input_df.loc[input_df['vertex'] == source, 'weights'] = 0

# Renumber
renumbered_gdf, renumber_map = NumberMap.renumber(input_df,
["vertex"],
["predecessor"],
preserve_order=True)
renumbered_gdf = renumbered_gdf.rename(columns={'src': 'vertex',
'dst': 'predecessor'})
stop_vertex = renumber_map.to_internal_vertex_id(cudf.Series(-1)).values[0]

out_df = path_retrieval_wrapper.get_traversed_cost(renumbered_gdf,
stop_vertex)

# Unrenumber
out_df['vertex'] = renumber_map.unrenumber(renumbered_gdf, 'vertex',
preserve_order=True)["vertex"]
return out_df
Loading

0 comments on commit 79c3ba0

Please sign in to comment.