Skip to content

Commit

Permalink
Add Eigenvector Centrality to pylibcugraph, cugraph APIs (#2255)
Browse files Browse the repository at this point in the history
This PR:

1.  Adds Eigenvector Centrality to the pylibcugraph and cugraph software stacks, which started from #2180 and is followed up by future PRs in order to close #2146  
2. Minor improvements to pylibcugraph Katz Centrality
3. Added functionality to `test_doctests.py` so that certain docstrings can be skipped on different architecture configs (such as ktruss in CUDA 11.4) 
4. Added undirected/directed versions of graph example used in C tests in `datasets`
5. Removed cugraph copy of warning wrapper from pylibcugraph and have it call the pylibcugraph version
6. Testing for both Python eigenvector centrality wrappers

This PR is identical to #2243, just that the name of the branch is different

Authors:
  - https://github.com/betochimas
  - Chuck Hastings (https://github.com/ChuckHastings)

Approvers:
  - Rick Ratzel (https://github.com/rlratzel)

URL: #2255
  • Loading branch information
betochimas authored May 27, 2022
1 parent b0c6a9e commit 6d0239d
Show file tree
Hide file tree
Showing 21 changed files with 878 additions and 107 deletions.
10 changes: 9 additions & 1 deletion datasets/toy_graph.csv
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,12 @@
2 1 3.1
2 3 4.1
3 5 7.2
4 5 3.2
4 5 3.2
1 0 0.1
3 1 2.1
4 1 1.1
0 2 5.1
1 2 3.1
3 2 4.1
5 3 7.2
5 4 3.2
8 changes: 8 additions & 0 deletions datasets/toy_graph_undirected.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
0 1 0.1
1 3 2.1
1 4 1.1
2 0 5.1
2 1 3.1
2 3 4.1
3 5 7.2
4 5 3.2
14 changes: 14 additions & 0 deletions docs/cugraph/source/api_docs/centrality.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,17 @@ Degree Centrality
:toctree: api/

cugraph.degree_centrality

Eigenvector Centrality
----------------------
.. autosummary::
:toctree: api/

cugraph.centrality.eigenvector_centrality

Eigenvector Centrality (MG)
---------------------------
.. autosummary::
:toctree: api/

cugraph.dask.centrality.eigenvector_centrality.eigenvector_centrality
1 change: 1 addition & 0 deletions python/cugraph/cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
edge_betweenness_centrality,
katz_centrality,
degree_centrality,
eigenvector_centrality,
)

from cugraph.cores import core_number, k_core
Expand Down
1 change: 1 addition & 0 deletions python/cugraph/cugraph/centrality/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@
edge_betweenness_centrality,
)
from cugraph.centrality.degree_centrality import degree_centrality
from cugraph.centrality.eigenvector_centrality import eigenvector_centrality
122 changes: 122 additions & 0 deletions python/cugraph/cugraph/centrality/eigenvector_centrality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pylibcugraph import (ResourceHandle,
GraphProperties,
SGGraph,
eigenvector_centrality as pylib_eigen
)
from cugraph.utilities import (ensure_cugraph_obj_for_nx,
df_score_to_dictionary,
)
import cudf
import cupy


def eigenvector_centrality(
G, max_iter=100, tol=1.0e-6, normalized=True
):
"""
Compute the eigenvector centrality for a graph G.
Eigenvector centrality computes the centrality for a node based on the
centrality of its neighbors. The eigenvector centrality for node i is the
i-th element of the vector x defined by the eigenvector equation.
Parameters
----------
G : cuGraph.Graph or networkx.Graph
cuGraph graph descriptor with connectivity information. The graph can
contain either directed or undirected edges.
max_iter : int, optional (default=100)
The maximum number of iterations before an answer is returned. This can
be used to limit the execution time and do an early exit before the
solver reaches the convergence tolerance.
tol : float, optional (default=1e-6)
Set the tolerance the approximation, this parameter should be a small
magnitude value.
The lower the tolerance the better the approximation. If this value is
0.0f, cuGraph will use the default value which is 1.0e-6.
Setting too small a tolerance can lead to non-convergence due to
numerical roundoff. Usually values between 1e-2 and 1e-6 are
acceptable.
normalized : bool, optional, default=True
If True normalize the resulting eigenvector centrality values
Returns
-------
df : cudf.DataFrame or Dictionary if using NetworkX
GPU data frame containing two cudf.Series of size V: the vertex
identifiers and the corresponding eigenvector centrality values.
df['vertex'] : cudf.Series
Contains the vertex identifiers
df['eigenvector_centrality'] : cudf.Series
Contains the eigenvector centrality of vertices
Examples
--------
>>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
... dtype=['int32', 'int32', 'float32'], header=None)
>>> G = cugraph.Graph()
>>> G.from_cudf_edgelist(gdf, source='0', destination='1')
>>> ec = cugraph.eigenvector_centrality(G)
"""
if (not isinstance(max_iter, int)) or max_iter <= 0:
raise ValueError(f"'max_iter' must be a positive integer"
f", got: {max_iter}")
if (not isinstance(tol, float)) or (tol <= 0.0):
raise ValueError(f"'tol' must be a positive float, got: {tol}")

G, isNx = ensure_cugraph_obj_for_nx(G)

srcs = G.edgelist.edgelist_df['src']
dsts = G.edgelist.edgelist_df['dst']
if 'weights' in G.edgelist.edgelist_df.columns:
weights = G.edgelist.edgelist_df['weights']
else:
# FIXME: If weights column is not imported, a weights column of 1s
# with type hardcoded to float32 is passed into wrapper
weights = cudf.Series(cupy.ones(srcs.size, dtype="float32"))

resource_handle = ResourceHandle()
graph_props = GraphProperties(is_multigraph=G.is_multigraph())
store_transposed = False
renumber = False
do_expensive_check = False

sg = SGGraph(resource_handle, graph_props, srcs, dsts, weights,
store_transposed, renumber, do_expensive_check)

vertices, values = pylib_eigen(resource_handle, sg,
tol, max_iter,
do_expensive_check)

vertices = cudf.Series(vertices)
values = cudf.Series(values)

df = cudf.DataFrame()
df["vertex"] = vertices
df["eigenvector_centrality"] = values

if G.renumbered:
df = G.unrenumber(df, "vertex")

if isNx is True:
dict = df_score_to_dictionary(df, "eigenvector_centrality")
return dict
else:
return df
19 changes: 8 additions & 11 deletions python/cugraph/cugraph/centrality/katz_centrality.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from pylibcugraph.experimental import (ResourceHandle,
GraphProperties,
SGGraph,
katz_centrality as pylibcugraph_katz
)
from pylibcugraph import (ResourceHandle,
GraphProperties,
SGGraph,
katz_centrality as pylibcugraph_katz
)
from cugraph.utilities import (ensure_cugraph_obj_for_nx,
df_score_to_dictionary,
)
Expand Down Expand Up @@ -74,8 +74,6 @@ def katz_centrality(
The maximum number of iterations before an answer is returned. This can
be used to limit the execution time and do an early exit before the
solver reaches the convergence tolerance.
If this value is lower or equal to 0 cuGraph will use the default
value, which is 100.
tol : float, optional (default=1.0e-6)
Set the tolerance the approximation, this parameter should be a small
Expand Down Expand Up @@ -124,10 +122,9 @@ def katz_centrality(
elif (not isinstance(beta, float)) or (beta <= 0.0):
raise ValueError(f"'beta' must be a positive float or None, "
f"got: {beta}")
if (not isinstance(max_iter, int)):
raise ValueError(f"'max_iter' must be an integer, got: {max_iter}")
elif max_iter <= 0:
max_iter = 100
if (not isinstance(max_iter, int)) or (max_iter <= 0):
raise ValueError(f"'max_iter' must be a positive integer"
f", got: {max_iter}")
if (not isinstance(tol, float)) or (tol <= 0.0):
raise ValueError(f"'tol' must be a positive float, got: {tol}")

Expand Down
1 change: 1 addition & 0 deletions python/cugraph/cugraph/dask/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@
from .community.louvain import louvain
from .centrality.katz_centrality import katz_centrality
from .components.connectivity import weakly_connected_components
from .centrality.eigenvector_centrality import eigenvector_centrality
Loading

0 comments on commit 6d0239d

Please sign in to comment.