Skip to content

Commit

Permalink
Refactor k-core (#2731)
Browse files Browse the repository at this point in the history
This PR refactors `k-core` by leveraging the CAPI and updates the python cugraph tests. An MG implementation of k-core is also included in this PR

closes #2689 
closes #2634 
closes #2637 
closes #2638

Authors:
  - Joseph Nke (https://github.com/jnke2016)
  - Chuck Hastings (https://github.com/ChuckHastings)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Rick Ratzel (https://github.com/rlratzel)

URL: #2731
  • Loading branch information
jnke2016 authored Nov 29, 2022
1 parent a6d85c0 commit 2eed6eb
Show file tree
Hide file tree
Showing 49 changed files with 833 additions and 315 deletions.
1 change: 0 additions & 1 deletion python/cugraph/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ rapids_cython_init()
add_subdirectory(cugraph/centrality)
add_subdirectory(cugraph/community)
add_subdirectory(cugraph/components)
add_subdirectory(cugraph/cores)
add_subdirectory(cugraph/dask/comms)
add_subdirectory(cugraph/dask/structure)
add_subdirectory(cugraph/generators)
Expand Down
3 changes: 0 additions & 3 deletions python/cugraph/cugraph/community/leiden.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,6 @@ def leiden(G, max_iter=100, resolution=1.0):
"""
G, isNx = ensure_cugraph_obj_for_nx(G)

if not G.edgelist.weights:
raise RuntimeError("input graph must be weighted")

if G.is_directed():
raise ValueError("input graph must be undirected")

Expand Down
3 changes: 0 additions & 3 deletions python/cugraph/cugraph/community/louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,6 @@ def louvain(G, max_iter=100, resolution=1.0):

G, isNx = ensure_cugraph_obj_for_nx(G)

if not G.edgelist.weights:
raise RuntimeError("input graph must be weighted")

if G.is_directed():
raise ValueError("input graph must be undirected")

Expand Down
3 changes: 0 additions & 3 deletions python/cugraph/cugraph/community/subgraph_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,6 @@ def subgraph(G, vertices):

G, isNx = ensure_cugraph_obj_for_nx(G)

if not G.edgelist.weights:
raise RuntimeError("input graph must be weighted")

if G.renumbered:
if isinstance(vertices, cudf.DataFrame):
vertices = G.lookup_internal_vertex_id(vertices, vertices.columns)
Expand Down
22 changes: 0 additions & 22 deletions python/cugraph/cugraph/cores/CMakeLists.txt

This file was deleted.

22 changes: 8 additions & 14 deletions python/cugraph/cugraph/cores/core_number.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@
df_score_to_dictionary,
)
import cudf
import warnings

from pylibcugraph import core_number as pylibcugraph_core_number, ResourceHandle


def core_number(G, degree_type=None):
def core_number(G, degree_type="bidirectional"):
"""
Compute the core numbers for the nodes of the graph G. A k-core of a graph
is a maximal subgraph that contains nodes of degree k or more.
Expand All @@ -36,13 +35,12 @@ def core_number(G, degree_type=None):
represented as directed edges in both directions. While this graph
can contain edge weights, they don't participate in the calculation
of the core numbers.
The current implementation only supports undirected graphs.
degree_type: str
degree_type: str, (default="bidirectional")
This option determines if the core number computation should be based
on input, output, or both directed edges, with valid values being
"incoming", "outgoing", and "bidirectional" respectively.
This option is currently ignored in this release, and setting it will
result in a warning.
Returns
-------
Expand All @@ -65,19 +63,15 @@ def core_number(G, degree_type=None):

G, isNx = ensure_cugraph_obj_for_nx(G)

if degree_type is not None:
warning_msg = "The 'degree_type' parameter is ignored in this release."
warnings.warn(warning_msg, Warning)

if G.is_directed():
raise ValueError("input graph must be undirected")

# FIXME: enable this check once 'degree_type' is supported
"""
if degree_type not in ["incoming", "outgoing", "bidirectional"]:
raise ValueError(f"'degree_type' must be either incoming, "
f"outgoing or bidirectional, got: {degree_type}")
"""
raise ValueError(
f"'degree_type' must be either incoming, "
f"outgoing or bidirectional, got: {degree_type}"
)

vertex, core_number = pylibcugraph_core_number(
resource_handle=ResourceHandle(),
graph=G._plc_graph,
Expand Down
28 changes: 0 additions & 28 deletions python/cugraph/cugraph/cores/k_core.pxd

This file was deleted.

59 changes: 47 additions & 12 deletions python/cugraph/cugraph/cores/k_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.cores import k_core_wrapper
import cudf
from pylibcugraph import core_number as pylibcugraph_core_number, ResourceHandle

from pylibcugraph import (
core_number as pylibcugraph_core_number,
k_core as pylibcugraph_k_core,
ResourceHandle,
)

from cugraph.utilities import (
ensure_cugraph_obj_for_nx,
cugraph_to_nx,
)


def _call_plc_core_number(G):
def _call_plc_core_number(G, degree_type):
vertex, core_number = pylibcugraph_core_number(
resource_handle=ResourceHandle(),
graph=G._plc_graph,
degree_type=None,
degree_type=degree_type,
do_expensive_check=False,
)

Expand All @@ -34,7 +39,7 @@ def _call_plc_core_number(G):
return df


def k_core(G, k=None, core_number=None):
def k_core(G, k=None, core_number=None, degree_type="bidirectional"):
"""
Compute the k-core of the graph G based on the out degree of its nodes. A
k-core of a graph is a maximal subgraph that contains nodes of degree k or
Expand All @@ -48,11 +53,17 @@ def k_core(G, k=None, core_number=None):
should contain undirected edges where undirected edges are represented
as directed edges in both directions. While this graph can contain edge
weights, they don't participate in the calculation of the k-core.
The current implementation only supports undirected graphs.
k : int, optional (default=None)
Order of the core. This value must not be negative. If set to None, the
main core is returned.
degree_type: str, (default="bidirectional")
This option determines if the core number computation should be based
on input, output, or both directed edges, with valid values being
"incoming", "outgoing", and "bidirectional" respectively.
core_number : cudf.DataFrame, optional (default=None)
Precomputed core number of the nodes of the graph G containing two
cudf.Series of size V: the vertex identifiers and the corresponding
Expand All @@ -79,34 +90,58 @@ def k_core(G, k=None, core_number=None):

G, isNx = ensure_cugraph_obj_for_nx(G)

if degree_type not in ["incoming", "outgoing", "bidirectional"]:
raise ValueError(
f"'degree_type' must be either incoming, "
f"outgoing or bidirectional, got: {degree_type}"
)

mytype = type(G)

KCoreGraph = mytype()

if G.is_directed():
raise ValueError("G must be an undirected Graph instance")

if core_number is not None:
if G.renumbered is True:
if core_number is None:
core_number = _call_plc_core_number(G, degree_type=degree_type)
else:
if G.renumbered:
if len(G.renumber_map.implementation.col_names) > 1:
cols = core_number.columns[:-1].to_list()
else:
cols = "vertex"
core_number = G.add_internal_vertex_id(core_number, "vertex", cols)

else:
core_number = _call_plc_core_number(G)
core_number = core_number.rename(columns={"core_number": "values"}, copy=False)
core_number = G.add_internal_vertex_id(core_number, "vertex", cols)

core_number = core_number.rename(columns={"core_number": "values"})
if k is None:
k = core_number["values"].max()

k_core_df = k_core_wrapper.k_core(G, k, core_number)
src_vertices, dst_vertices, weights = pylibcugraph_k_core(
resource_handle=ResourceHandle(),
graph=G._plc_graph,
degree_type=degree_type,
k=k,
core_result=core_number,
do_expensive_check=False,
)

k_core_df = cudf.DataFrame()
k_core_df["src"] = src_vertices
k_core_df["dst"] = dst_vertices
k_core_df["weight"] = weights

if G.renumbered:
k_core_df, src_names = G.unrenumber(k_core_df, "src", get_column_names=True)
k_core_df, dst_names = G.unrenumber(k_core_df, "dst", get_column_names=True)

else:
src_names = k_core_df.columns[0]
dst_names = k_core_df.columns[1]

if G.edgelist.weights:

KCoreGraph.from_cudf_edgelist(
k_core_df, source=src_names, destination=dst_names, edge_attr="weight"
)
Expand Down
59 changes: 0 additions & 59 deletions python/cugraph/cugraph/cores/k_core_wrapper.pyx

This file was deleted.

1 change: 1 addition & 0 deletions python/cugraph/cugraph/dask/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from .sampling.random_walks import random_walks
from .centrality.eigenvector_centrality import eigenvector_centrality
from .cores.core_number import core_number
from .cores.k_core import k_core
from .link_prediction.jaccard import jaccard
from .link_prediction.sorensen import sorensen
from .link_prediction.overlap import overlap
2 changes: 1 addition & 1 deletion python/cugraph/cugraph/dask/community/egonet.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def ego_graph(input_graph, n, radius=1, center=True):
Parameters
----------
G : cugraph.Graph, networkx.Graph
input_graph : cugraph.Graph, networkx.Graph
Graph or matrix object, which should contain the connectivity
information. Edge weights, if present, should be single or double
precision floating point values.
Expand Down
1 change: 1 addition & 0 deletions python/cugraph/cugraph/dask/cores/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
# limitations under the License.

from .core_number import core_number
from .k_core import k_core
Loading

0 comments on commit 2eed6eb

Please sign in to comment.