Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add utility function for computing a secondary cost for BFS and SSSP output #1376

Merged
merged 27 commits into from
Apr 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ endif(BUILD_STATIC_FAISS)
add_library(cugraph SHARED
src/utilities/spmv_1D.cu
src/utilities/cython.cu
src/utilities/path_retrieval.cu
src/structure/graph.cu
src/linear_assignment/hungarian.cu
src/link_analysis/gunrock_hits.cpp
Expand Down
45 changes: 45 additions & 0 deletions cpp/include/utilities/path_retrieval.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <raft/handle.hpp>

namespace cugraph {

/**
* @brief Takes the results of BFS or SSSP function call and sums the given
* weights along the path to the starting vertex.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam weight_t Type of edge weights. Needs to be a floating point type.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms. Must have at least one worker stream.
* @param vertices Pointer to vertex ids.
* @param preds Pointer to predecessors.
* @param info_weights Secondary weights along the edge from predecessor to vertex.
* @param out Contains for each index the sum of weights along the path unfolding.
* @param num_vertices Number of vertices.
**/
template <typename vertex_t, typename weight_t>
void get_traversed_cost(raft::handle_t const &handle,
vertex_t const *vertices,
vertex_t const *preds,
weight_t const *info_weights,
weight_t *out,
vertex_t stop_vertex,
vertex_t num_vertices);
} // namespace cugraph
133 changes: 133 additions & 0 deletions cpp/src/utilities/path_retrieval.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <rmm/thrust_rmm_allocator.h>
#include <rmm/device_uvector.hpp>

#include <raft/handle.hpp>

#include <utilities/error.hpp>
#include <utilities/path_retrieval.hpp>

namespace cugraph {
namespace detail {

template <typename vertex_t, typename weight_t>
__global__ void get_traversed_cost_kernel(vertex_t const *vertices,
vertex_t const *preds,
vertex_t const *vtx_map,
weight_t const *info_weights,
weight_t *out,
vertex_t stop_vertex,
vertex_t num_vertices)
{
for (vertex_t i = threadIdx.x + blockIdx.x * blockDim.x; i < num_vertices;
i += gridDim.x * blockDim.x) {
weight_t sum = info_weights[i];
vertex_t pred = preds[i];
while (pred != stop_vertex) {
vertex_t pos = vtx_map[pred];
sum += info_weights[pos];
pred = preds[pos];
}
out[i] = sum;
}
}

template <typename vertex_t, typename weight_t>
void get_traversed_cost_impl(raft::handle_t const &handle,
vertex_t const *vertices,
vertex_t const *preds,
weight_t const *info_weights,
weight_t *out,
vertex_t stop_vertex,
vertex_t num_vertices)
{
auto stream = handle.get_stream();
vertex_t max_blocks = handle.get_device_properties().maxGridSize[0];
vertex_t max_threads = handle.get_device_properties().maxThreadsPerBlock;

dim3 nthreads, nblocks;
nthreads.x = std::min<vertex_t>(num_vertices, max_threads);
nthreads.y = 1;
nthreads.z = 1;
nblocks.x = std::min<vertex_t>((num_vertices + nthreads.x - 1) / nthreads.x, max_blocks);
nblocks.y = 1;
nblocks.z = 1;

rmm::device_uvector<vertex_t> vtx_map_v(num_vertices, stream);
rmm::device_uvector<vertex_t> vtx_keys_v(num_vertices, stream);
vertex_t *vtx_map = vtx_map_v.data();
vertex_t *vtx_keys = vtx_keys_v.data();
raft::copy(vtx_keys, vertices, num_vertices, stream);

thrust::sequence(rmm::exec_policy(stream)->on(stream), vtx_map, vtx_map + num_vertices);

thrust::stable_sort_by_key(
rmm::exec_policy(stream)->on(stream), vtx_keys, vtx_keys + num_vertices, vtx_map);

get_traversed_cost_kernel<<<nblocks, nthreads>>>(
vertices, preds, vtx_map, info_weights, out, stop_vertex, num_vertices);
}
} // namespace detail

template <typename vertex_t, typename weight_t>
void get_traversed_cost(raft::handle_t const &handle,
vertex_t const *vertices,
vertex_t const *preds,
weight_t const *info_weights,
weight_t *out,
vertex_t stop_vertex,
vertex_t num_vertices)
{
CUGRAPH_EXPECTS(num_vertices > 0, "num_vertices should be strictly positive");
CUGRAPH_EXPECTS(out != nullptr, "out should be of size num_vertices");
cugraph::detail::get_traversed_cost_impl(
afender marked this conversation as resolved.
Show resolved Hide resolved
handle, vertices, preds, info_weights, out, stop_vertex, num_vertices);
}

template void get_traversed_cost<int32_t, float>(raft::handle_t const &handle,
int32_t const *vertices,
int32_t const *preds,
float const *info_weights,
float *out,
int32_t stop_vertex,
int32_t num_vertices);

template void get_traversed_cost<int32_t, double>(raft::handle_t const &handle,
int32_t const *vertices,
int32_t const *preds,
double const *info_weights,
double *out,
int32_t stop_vertex,
int32_t num_vertices);

template void get_traversed_cost<int64_t, float>(raft::handle_t const &handle,
int64_t const *vertices,
int64_t const *preds,
float const *info_weights,
float *out,
int64_t stop_vertex,
int64_t num_vertices);

template void get_traversed_cost<int64_t, double>(raft::handle_t const &handle,
int64_t const *vertices,
int64_t const *preds,
double const *info_weights,
double *out,
int64_t stop_vertex,
int64_t num_vertices);
} // namespace cugraph
28 changes: 28 additions & 0 deletions python/cugraph/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
import pytest

import cugraph
import cudf
from cugraph.tests import utils
import numpy as np


def test_bfs_paths():
Expand Down Expand Up @@ -68,3 +70,29 @@ def test_bfs_paths_array():
answer = cugraph.utils.get_traversed_path_list(df, 100)

assert "not in the result set" in str(ErrorMsg)


@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_get_traversed_cost(graph_file):
cu_M = utils.read_csv_file(graph_file)

noise = cudf.Series(np.random.randint(10, size=(cu_M.shape[0])))
cu_M['info'] = cu_M['2'] + noise

G = cugraph.Graph()
G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='info')

# run SSSP starting at vertex 17
df = cugraph.sssp(G, 16)

answer = cugraph.utilities.path_retrieval.get_traversed_cost(df, 16,
cu_M['0'],
cu_M['1'],
cu_M['info']
)

df = df.sort_values(by='vertex').reset_index()
answer = answer.sort_values(by='vertex').reset_index()

assert df.shape[0] == answer.shape[0]
assert np.allclose(df['distance'], answer['info'])
3 changes: 2 additions & 1 deletion python/cugraph/utilities/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2020, NVIDIA CORPORATION.
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -25,3 +25,4 @@
is_cp_matrix_type,
is_sp_matrix_type,
)
from cugraph.utilities.path_retrieval import get_traversed_cost
30 changes: 30 additions & 0 deletions python/cugraph/utilities/path_retrieval.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3

from cugraph.structure.graph_primtypes cimport *

cdef extern from "utilities/path_retrieval.hpp" namespace "cugraph":

cdef void get_traversed_cost[vertex_t, weight_t](const handle_t &handle,
const vertex_t *vertices,
const vertex_t *preds,
const weight_t *info_weights,
weight_t *out,
vertex_t stop_vertex,
vertex_t num_vertices) except +

100 changes: 100 additions & 0 deletions python/cugraph/utilities/path_retrieval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import cudf

from cugraph.structure.symmetrize import symmetrize
from cugraph.structure.number_map import NumberMap
from cugraph.utilities import path_retrieval_wrapper


def get_traversed_cost(df, source, source_col, dest_col, value_col):
"""
Take the DataFrame result from a BFS or SSSP function call and sums
the given weights along the path to the starting vertex.
The source_col, dest_col identifiers need to match with the vertex and
predecessor columns of df.

Input Parameters
----------
df : cudf.DataFrame
The dataframe containing the results of a BFS or SSSP call
source: int
Index of the source vertex.
source_col : cudf.DataFrame
This cudf.Series wraps a gdf_column of size E (E: number of edges).
The gdf column contains the source index for each edge.
Source indices must be an integer type.
dest_col : cudf.Series
This cudf.Series wraps a gdf_column of size E (E: number of edges).
The gdf column contains the destination index for each edge.
Destination indices must be an integer type.
value_col : cudf.Series
This cudf.Series wraps a gdf_column of size E (E: number of edges).
The gdf column contains values associated with this edge.
Weight should be a floating type.

Returns
---------
df : cudf.DataFrame
DataFrame containing two columns 'vertex' and 'info'.
Unreachable vertices will have value the max value of the weight type.
"""

if 'vertex' not in df.columns:
raise ValueError("DataFrame does not appear to be a BFS or "
"SSP result - 'vertex' column missing")
if 'distance' not in df.columns:
raise ValueError("DataFrame does not appear to be a BFS or "
"SSP result - 'distance' column missing")
if 'predecessor' not in df.columns:
raise ValueError("DataFrame does not appear to be a BFS or "
"SSP result - 'predecessor' column missing")

src, dst, val = symmetrize(source_col,
dest_col,
value_col)

symmetrized_df = cudf.DataFrame()
symmetrized_df['source'] = src
symmetrized_df['destination'] = dst
symmetrized_df['weights'] = val

input_df = df.merge(symmetrized_df,
left_on=['vertex', 'predecessor'],
right_on=['source', 'destination'],
how="left"
)

# Set unreachable vertex weights to max float and source vertex weight to 0
max_val = np.finfo(val.dtype).max
input_df[['weights']] = input_df[['weights']].fillna(max_val)
input_df.loc[input_df['vertex'] == source, 'weights'] = 0

# Renumber
renumbered_gdf, renumber_map = NumberMap.renumber(input_df,
["vertex"],
["predecessor"],
preserve_order=True)
renumbered_gdf = renumbered_gdf.rename(columns={'src': 'vertex',
'dst': 'predecessor'})
stop_vertex = renumber_map.to_internal_vertex_id(cudf.Series(-1)).values[0]

out_df = path_retrieval_wrapper.get_traversed_cost(renumbered_gdf,
stop_vertex)

# Unrenumber
out_df['vertex'] = renumber_map.unrenumber(renumbered_gdf, 'vertex',
preserve_order=True)["vertex"]
return out_df
Loading