From 2eed6ebf07efb5ef2c3acef8a264fbe849cca135 Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Tue, 29 Nov 2022 16:05:27 -0600 Subject: [PATCH] Refactor k-core (#2731) This PR refactors `k-core` by leveraging the CAPI and updates the python cugraph tests. An MG implementation of k-core is also included in this PR closes #2689 closes #2634 closes #2637 closes #2638 Authors: - Joseph Nke (https://github.com/jnke2016) - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/2731 --- python/cugraph/CMakeLists.txt | 1 - python/cugraph/cugraph/community/leiden.py | 3 - python/cugraph/cugraph/community/louvain.py | 3 - .../cugraph/community/subgraph_extraction.py | 3 - python/cugraph/cugraph/cores/CMakeLists.txt | 22 -- python/cugraph/cugraph/cores/core_number.py | 22 +- python/cugraph/cugraph/cores/k_core.pxd | 28 --- python/cugraph/cugraph/cores/k_core.py | 59 +++-- .../cugraph/cugraph/cores/k_core_wrapper.pyx | 59 ----- python/cugraph/cugraph/dask/__init__.py | 1 + .../cugraph/cugraph/dask/community/egonet.py | 2 +- python/cugraph/cugraph/dask/cores/__init__.py | 1 + .../cugraph/cugraph/dask/cores/core_number.py | 20 +- python/cugraph/cugraph/dask/cores/k_core.py | 203 ++++++++++++++++++ .../cugraph/dask/link_prediction/jaccard.py | 12 +- .../cugraph/dask/link_prediction/overlap.py | 12 +- .../cugraph/dask/link_prediction/sorensen.py | 12 +- python/cugraph/cugraph/dask/traversal/sssp.py | 10 + .../experimental/link_prediction/jaccard.py | 7 +- .../experimental/link_prediction/overlap.py | 7 +- .../experimental/link_prediction/sorensen.py | 7 +- .../cugraph/link_prediction/jaccard.py | 2 +- .../simpleDistributedGraph.py | 5 +- .../graph_implementation/simpleGraph.py | 9 +- .../cugraph/tests/mg/test_mg_core_number.py | 21 +- .../cugraph/tests/mg/test_mg_jaccard.py | 2 +- .../cugraph/tests/mg/test_mg_k_core.py | 189 ++++++++++++++++ .../cugraph/tests/mg/test_mg_overlap.py | 2 +- .../cugraph/tests/mg/test_mg_sorensen.py | 2 +- .../cugraph/cugraph/tests/mg/test_mg_sssp.py | 30 +++ .../cugraph/cugraph/tests/test_core_number.py | 21 +- python/cugraph/cugraph/tests/test_jaccard.py | 7 +- python/cugraph/cugraph/tests/test_k_core.py | 33 ++- python/cugraph/cugraph/tests/test_leiden.py | 10 - python/cugraph/cugraph/tests/test_louvain.py | 6 - python/cugraph/cugraph/tests/test_overlap.py | 8 +- python/cugraph/cugraph/tests/test_sorensen.py | 6 +- python/cugraph/cugraph/tests/test_sssp.py | 8 +- .../cugraph/tests/test_subgraph_extraction.py | 6 - python/cugraph/cugraph/traversal/sssp.py | 7 +- .../pylibcugraph/pylibcugraph/CMakeLists.txt | 1 + python/pylibcugraph/pylibcugraph/__init__.py | 2 + .../_cugraph_c/core_algorithms.pxd | 48 ++++- .../pylibcugraph/pylibcugraph/core_number.pyx | 13 -- python/pylibcugraph/pylibcugraph/egonet.pyx | 3 - .../pylibcugraph/jaccard_coefficients.pyx | 15 +- python/pylibcugraph/pylibcugraph/k_core.pyx | 169 +++++++++++++++ .../pylibcugraph/overlap_coefficients.pyx | 14 +- .../pylibcugraph/sorensen_coefficients.pyx | 15 +- 49 files changed, 833 insertions(+), 315 deletions(-) delete mode 100644 python/cugraph/cugraph/cores/CMakeLists.txt delete mode 100644 python/cugraph/cugraph/cores/k_core.pxd delete mode 100644 python/cugraph/cugraph/cores/k_core_wrapper.pyx create mode 100644 python/cugraph/cugraph/dask/cores/k_core.py create mode 100644 python/cugraph/cugraph/tests/mg/test_mg_k_core.py create mode 100644 python/pylibcugraph/pylibcugraph/k_core.pyx diff --git a/python/cugraph/CMakeLists.txt b/python/cugraph/CMakeLists.txt index edd080524aa..d6f6ba47787 100644 --- a/python/cugraph/CMakeLists.txt +++ b/python/cugraph/CMakeLists.txt @@ -93,7 +93,6 @@ rapids_cython_init() add_subdirectory(cugraph/centrality) add_subdirectory(cugraph/community) add_subdirectory(cugraph/components) -add_subdirectory(cugraph/cores) add_subdirectory(cugraph/dask/comms) add_subdirectory(cugraph/dask/structure) add_subdirectory(cugraph/generators) diff --git a/python/cugraph/cugraph/community/leiden.py b/python/cugraph/cugraph/community/leiden.py index 6a5dc6dd105..5b81e34f4a1 100644 --- a/python/cugraph/cugraph/community/leiden.py +++ b/python/cugraph/cugraph/community/leiden.py @@ -74,9 +74,6 @@ def leiden(G, max_iter=100, resolution=1.0): """ G, isNx = ensure_cugraph_obj_for_nx(G) - if not G.edgelist.weights: - raise RuntimeError("input graph must be weighted") - if G.is_directed(): raise ValueError("input graph must be undirected") diff --git a/python/cugraph/cugraph/community/louvain.py b/python/cugraph/cugraph/community/louvain.py index 310468a032c..a313aa44048 100644 --- a/python/cugraph/cugraph/community/louvain.py +++ b/python/cugraph/cugraph/community/louvain.py @@ -77,9 +77,6 @@ def louvain(G, max_iter=100, resolution=1.0): G, isNx = ensure_cugraph_obj_for_nx(G) - if not G.edgelist.weights: - raise RuntimeError("input graph must be weighted") - if G.is_directed(): raise ValueError("input graph must be undirected") diff --git a/python/cugraph/cugraph/community/subgraph_extraction.py b/python/cugraph/cugraph/community/subgraph_extraction.py index 44479b2b11c..e83c1623d48 100644 --- a/python/cugraph/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/cugraph/community/subgraph_extraction.py @@ -58,9 +58,6 @@ def subgraph(G, vertices): G, isNx = ensure_cugraph_obj_for_nx(G) - if not G.edgelist.weights: - raise RuntimeError("input graph must be weighted") - if G.renumbered: if isinstance(vertices, cudf.DataFrame): vertices = G.lookup_internal_vertex_id(vertices, vertices.columns) diff --git a/python/cugraph/cugraph/cores/CMakeLists.txt b/python/cugraph/cugraph/cores/CMakeLists.txt deleted file mode 100644 index 713a09bdf47..00000000000 --- a/python/cugraph/cugraph/cores/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -# ============================================================================= -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= - -set(cython_sources k_core_wrapper.pyx) -set(linked_libraries cugraph::cugraph) -rapids_cython_create_modules( - CXX - SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX cores_ - ASSOCIATED_TARGETS cugraph -) diff --git a/python/cugraph/cugraph/cores/core_number.py b/python/cugraph/cugraph/cores/core_number.py index 6c6f663e6f2..84153632f58 100644 --- a/python/cugraph/cugraph/cores/core_number.py +++ b/python/cugraph/cugraph/cores/core_number.py @@ -16,12 +16,11 @@ df_score_to_dictionary, ) import cudf -import warnings from pylibcugraph import core_number as pylibcugraph_core_number, ResourceHandle -def core_number(G, degree_type=None): +def core_number(G, degree_type="bidirectional"): """ Compute the core numbers for the nodes of the graph G. A k-core of a graph is a maximal subgraph that contains nodes of degree k or more. @@ -36,13 +35,12 @@ def core_number(G, degree_type=None): represented as directed edges in both directions. While this graph can contain edge weights, they don't participate in the calculation of the core numbers. + The current implementation only supports undirected graphs. - degree_type: str + degree_type: str, (default="bidirectional") This option determines if the core number computation should be based on input, output, or both directed edges, with valid values being "incoming", "outgoing", and "bidirectional" respectively. - This option is currently ignored in this release, and setting it will - result in a warning. Returns ------- @@ -65,19 +63,15 @@ def core_number(G, degree_type=None): G, isNx = ensure_cugraph_obj_for_nx(G) - if degree_type is not None: - warning_msg = "The 'degree_type' parameter is ignored in this release." - warnings.warn(warning_msg, Warning) - if G.is_directed(): raise ValueError("input graph must be undirected") - # FIXME: enable this check once 'degree_type' is supported - """ if degree_type not in ["incoming", "outgoing", "bidirectional"]: - raise ValueError(f"'degree_type' must be either incoming, " - f"outgoing or bidirectional, got: {degree_type}") - """ + raise ValueError( + f"'degree_type' must be either incoming, " + f"outgoing or bidirectional, got: {degree_type}" + ) + vertex, core_number = pylibcugraph_core_number( resource_handle=ResourceHandle(), graph=G._plc_graph, diff --git a/python/cugraph/cugraph/cores/k_core.pxd b/python/cugraph/cugraph/cores/k_core.pxd deleted file mode 100644 index 1d22e7ac4d2..00000000000 --- a/python/cugraph/cugraph/cores/k_core.pxd +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# cython: profile=False -# distutils: language = c++ -# cython: embedsignature = True -# cython: language_level = 3 - -from cugraph.structure.graph_primtypes cimport * - -cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": - - cdef unique_ptr[GraphCOO[VT,ET,WT]] k_core[VT,ET,WT]( - const GraphCOOView[VT,ET,WT] &in_graph, - int k, - const VT *vertex_id, - const VT *core_number, - VT num_vertex_ids) except + diff --git a/python/cugraph/cugraph/cores/k_core.py b/python/cugraph/cugraph/cores/k_core.py index eae390d20ba..b1cc796a7dd 100644 --- a/python/cugraph/cugraph/cores/k_core.py +++ b/python/cugraph/cugraph/cores/k_core.py @@ -11,20 +11,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.cores import k_core_wrapper import cudf -from pylibcugraph import core_number as pylibcugraph_core_number, ResourceHandle + +from pylibcugraph import ( + core_number as pylibcugraph_core_number, + k_core as pylibcugraph_k_core, + ResourceHandle, +) + from cugraph.utilities import ( ensure_cugraph_obj_for_nx, cugraph_to_nx, ) -def _call_plc_core_number(G): +def _call_plc_core_number(G, degree_type): vertex, core_number = pylibcugraph_core_number( resource_handle=ResourceHandle(), graph=G._plc_graph, - degree_type=None, + degree_type=degree_type, do_expensive_check=False, ) @@ -34,7 +39,7 @@ def _call_plc_core_number(G): return df -def k_core(G, k=None, core_number=None): +def k_core(G, k=None, core_number=None, degree_type="bidirectional"): """ Compute the k-core of the graph G based on the out degree of its nodes. A k-core of a graph is a maximal subgraph that contains nodes of degree k or @@ -48,11 +53,17 @@ def k_core(G, k=None, core_number=None): should contain undirected edges where undirected edges are represented as directed edges in both directions. While this graph can contain edge weights, they don't participate in the calculation of the k-core. + The current implementation only supports undirected graphs. k : int, optional (default=None) Order of the core. This value must not be negative. If set to None, the main core is returned. + degree_type: str, (default="bidirectional") + This option determines if the core number computation should be based + on input, output, or both directed edges, with valid values being + "incoming", "outgoing", and "bidirectional" respectively. + core_number : cudf.DataFrame, optional (default=None) Precomputed core number of the nodes of the graph G containing two cudf.Series of size V: the vertex identifiers and the corresponding @@ -79,34 +90,58 @@ def k_core(G, k=None, core_number=None): G, isNx = ensure_cugraph_obj_for_nx(G) + if degree_type not in ["incoming", "outgoing", "bidirectional"]: + raise ValueError( + f"'degree_type' must be either incoming, " + f"outgoing or bidirectional, got: {degree_type}" + ) + mytype = type(G) + KCoreGraph = mytype() if G.is_directed(): raise ValueError("G must be an undirected Graph instance") - if core_number is not None: - if G.renumbered is True: + if core_number is None: + core_number = _call_plc_core_number(G, degree_type=degree_type) + else: + if G.renumbered: if len(G.renumber_map.implementation.col_names) > 1: cols = core_number.columns[:-1].to_list() else: cols = "vertex" - core_number = G.add_internal_vertex_id(core_number, "vertex", cols) - else: - core_number = _call_plc_core_number(G) - core_number = core_number.rename(columns={"core_number": "values"}, copy=False) + core_number = G.add_internal_vertex_id(core_number, "vertex", cols) + core_number = core_number.rename(columns={"core_number": "values"}) if k is None: k = core_number["values"].max() - k_core_df = k_core_wrapper.k_core(G, k, core_number) + src_vertices, dst_vertices, weights = pylibcugraph_k_core( + resource_handle=ResourceHandle(), + graph=G._plc_graph, + degree_type=degree_type, + k=k, + core_result=core_number, + do_expensive_check=False, + ) + + k_core_df = cudf.DataFrame() + k_core_df["src"] = src_vertices + k_core_df["dst"] = dst_vertices + k_core_df["weight"] = weights if G.renumbered: k_core_df, src_names = G.unrenumber(k_core_df, "src", get_column_names=True) k_core_df, dst_names = G.unrenumber(k_core_df, "dst", get_column_names=True) + else: + src_names = k_core_df.columns[0] + dst_names = k_core_df.columns[1] + if G.edgelist.weights: + KCoreGraph.from_cudf_edgelist( k_core_df, source=src_names, destination=dst_names, edge_attr="weight" ) diff --git a/python/cugraph/cugraph/cores/k_core_wrapper.pyx b/python/cugraph/cugraph/cores/k_core_wrapper.pyx deleted file mode 100644 index 28bb191f4f4..00000000000 --- a/python/cugraph/cugraph/cores/k_core_wrapper.pyx +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# cython: profile=False -# distutils: language = c++ -# cython: embedsignature = True -# cython: language_level = 3 - -from cugraph.cores.k_core cimport k_core as c_k_core -from cugraph.structure.graph_primtypes cimport * -from cugraph.structure import graph_primtypes_wrapper -from libc.stdint cimport uintptr_t -import numpy as np - - -#### FIXME: Should return data frame instead of passing in k_core_graph... -#### Ripple down through implementation (algorithms.hpp, core_number.cu) - -cdef (uintptr_t, uintptr_t) core_number_params(core_number): - [core_number['vertex'], core_number['values']] = graph_primtypes_wrapper.datatype_cast([core_number['vertex'], core_number['values']], [np.int32]) - cdef uintptr_t c_vertex = core_number['vertex'].__cuda_array_interface__['data'][0] - cdef uintptr_t c_values = core_number['values'].__cuda_array_interface__['data'][0] - return (c_vertex, c_values) - - -def k_core_float(input_graph, k, core_number): - c_vertex, c_values = core_number_params(core_number) - cdef GraphCOOViewFloat in_graph = get_graph_view[GraphCOOViewFloat](input_graph) - return coo_to_df(move(c_k_core[int,int,float](in_graph, k, c_vertex, c_values, len(core_number)))) - - -def k_core_double(input_graph, k, core_number): - c_vertex, c_values = core_number_params(core_number) - cdef GraphCOOViewDouble in_graph = get_graph_view[GraphCOOViewDouble](input_graph) - return coo_to_df(move(c_k_core[int,int,double](in_graph, k, c_vertex, c_values, len(core_number)))) - - -def k_core(input_graph, k, core_number): - """ - Call k_core - """ - [input_graph.edgelist.edgelist_df['src'], - input_graph.edgelist.edgelist_df['dst']] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], - input_graph.edgelist.edgelist_df['dst']], - [np.int32]) - if graph_primtypes_wrapper.weight_type(input_graph) == np.float64: - return k_core_double(input_graph, k, core_number) - else: - return k_core_float(input_graph, k, core_number) diff --git a/python/cugraph/cugraph/dask/__init__.py b/python/cugraph/cugraph/dask/__init__.py index ec685150824..ef9503b4349 100644 --- a/python/cugraph/cugraph/dask/__init__.py +++ b/python/cugraph/cugraph/dask/__init__.py @@ -26,6 +26,7 @@ from .sampling.random_walks import random_walks from .centrality.eigenvector_centrality import eigenvector_centrality from .cores.core_number import core_number +from .cores.k_core import k_core from .link_prediction.jaccard import jaccard from .link_prediction.sorensen import sorensen from .link_prediction.overlap import overlap diff --git a/python/cugraph/cugraph/dask/community/egonet.py b/python/cugraph/cugraph/dask/community/egonet.py index 32feee12370..e7a0e7c2870 100644 --- a/python/cugraph/cugraph/dask/community/egonet.py +++ b/python/cugraph/cugraph/dask/community/egonet.py @@ -80,7 +80,7 @@ def ego_graph(input_graph, n, radius=1, center=True): Parameters ---------- - G : cugraph.Graph, networkx.Graph + input_graph : cugraph.Graph, networkx.Graph Graph or matrix object, which should contain the connectivity information. Edge weights, if present, should be single or double precision floating point values. diff --git a/python/cugraph/cugraph/dask/cores/__init__.py b/python/cugraph/cugraph/dask/cores/__init__.py index 4dff071ce91..76241cbf17f 100644 --- a/python/cugraph/cugraph/dask/cores/__init__.py +++ b/python/cugraph/cugraph/dask/cores/__init__.py @@ -12,3 +12,4 @@ # limitations under the License. from .core_number import core_number +from .k_core import k_core diff --git a/python/cugraph/cugraph/dask/cores/core_number.py b/python/cugraph/cugraph/dask/cores/core_number.py index d0d11b7de75..2b12722e35a 100644 --- a/python/cugraph/cugraph/dask/cores/core_number.py +++ b/python/cugraph/cugraph/dask/cores/core_number.py @@ -17,7 +17,6 @@ import cugraph.dask.comms.comms as Comms import dask_cudf import cudf -import warnings from pylibcugraph import ResourceHandle, core_number as pylibcugraph_core_number @@ -43,7 +42,7 @@ def _call_plc_core_number(sID, mg_graph_x, dt_x, do_expensive_check): ) -def core_number(input_graph, degree_type=None): +def core_number(input_graph, degree_type="bidirectional"): """ Compute the core numbers for the nodes of the graph G. A k-core of a graph is a maximal subgraph that contains nodes of degree k or more. @@ -58,12 +57,10 @@ def core_number(input_graph, degree_type=None): (edge weights are not used in this algorithm). The current implementation only supports undirected graphs. - degree_type: str + degree_type: str, (default="bidirectional") This option determines if the core number computation should be based on input, output, or both directed edges, with valid values being "incoming", "outgoing", and "bidirectional" respectively. - This option is currently ignored in this release, and setting it will - result in a warning. Returns @@ -80,16 +77,11 @@ def core_number(input_graph, degree_type=None): if input_graph.is_directed(): raise ValueError("input graph must be undirected") - if degree_type is not None: - warning_msg = "The 'degree_type' parameter is ignored in this release." - warnings.warn(warning_msg, Warning) - - # FIXME: enable this check once 'degree_type' is supported - """ if degree_type not in ["incoming", "outgoing", "bidirectional"]: - raise ValueError(f"'degree_type' must be either incoming, " - f"outgoing or bidirectional, got: {degree_type}") - """ + raise ValueError( + f"'degree_type' must be either incoming, " + f"outgoing or bidirectional, got: {degree_type}" + ) # Initialize dask client client = input_graph._client diff --git a/python/cugraph/cugraph/dask/cores/k_core.py b/python/cugraph/cugraph/dask/cores/k_core.py new file mode 100644 index 00000000000..beef1e434d2 --- /dev/null +++ b/python/cugraph/cugraph/dask/cores/k_core.py @@ -0,0 +1,203 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from dask.distributed import wait +import cugraph.dask.comms.comms as Comms +from cugraph.dask.common.input_utils import get_distributed_data +import dask_cudf +import cudf + +import cugraph.dask as dcg +from pylibcugraph import ResourceHandle, k_core as pylibcugraph_k_core + + +def convert_to_cudf(cp_arrays): + """ + Creates a cudf DataFrame from cupy arrays from pylibcugraph wrapper + """ + cupy_src_vertices, cupy_dst_vertices, cupy_weights = cp_arrays + df = cudf.DataFrame() + df["src"] = cupy_src_vertices + df["dst"] = cupy_dst_vertices + df["weights"] = cupy_weights + + return df + + +def _call_plc_k_core(sID, mg_graph_x, k, degree_type, core_result, do_expensive_check): + + return pylibcugraph_k_core( + resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()), + graph=mg_graph_x, + k=k, + degree_type=degree_type, + core_result=core_result, + do_expensive_check=do_expensive_check, + ) + + +def k_core(input_graph, k=None, core_number=None, degree_type="bidirectional"): + """ + Compute the k-core of the graph G based on the out degree of its nodes. A + k-core of a graph is a maximal subgraph that contains nodes of degree k or + more. This call does not support a graph with self-loops and parallel + edges. + + Parameters + ---------- + input_graph : cuGraph.Graph + cuGraph graph descriptor with connectivity information. The graph + should contain undirected edges where undirected edges are represented + as directed edges in both directions. While this graph can contain edge + weights, they don't participate in the calculation of the k-core. + The current implementation only supports undirected graphs. + + k : int, optional (default=None) + Order of the core. This value must not be negative. If set to None, the + main core is returned. + + degree_type: str (default="bidirectional") + This option determines if the core number computation should be based + on input, output, or both directed edges, with valid values being + "incoming", "outgoing", and "bidirectional" respectively. + + core_number : cudf.DataFrame or das_cudf.DataFrame, optional (default=None) + Precomputed core number of the nodes of the graph G containing two + cudf.Series of size V: the vertex identifiers and the corresponding + core number values. If set to None, the core numbers of the nodes are + calculated internally. + + core_number['vertex'] : cudf.Series or dask_cudf.Series + Contains the vertex identifiers + core_number['values'] : cudf.Series or dask_cudf.Series + Contains the core number of vertices + + Returns + ------- + result : dask_cudf.DataFrame + GPU distributed data frame containing the K Core of the input graph + + ddf['src']: dask_cudf.Series + Contains sources of the K Core + ddf['dst']: dask_cudf.Series + Contains destinations of the K Core + + and/or + + ddf['weights']: dask_cudf.Series + Contains weights of the K Core + + + Examples + -------- + >>> import cugraph.dask as dcg + >>> import dask_cudf + >>> # ... Init a DASK Cluster + >>> # see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html + >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. + >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv") + >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv", + ... chunksize=chunksize, delimiter=" ", + ... names=["src", "dst", "value"], + ... dtype=["int32", "int32", "float32"]) + >>> dg = cugraph.Graph(directed=True) + >>> dg.from_dask_cudf_edgelist(ddf, source='src', destination='dst', + ... edge_attr='value') + >>> KCore_df = dcg.k_core(dg) + """ + + if degree_type not in ["incoming", "outgoing", "bidirectional"]: + raise ValueError( + f"'degree_type' must be either incoming, " + f"outgoing or bidirectional, got: {degree_type}" + ) + + if input_graph.is_directed(): + raise ValueError("input graph must be undirected") + + if core_number is None: + core_number = dcg.core_number(input_graph) + + if input_graph.renumbered is True: + + if len(input_graph.renumber_map.implementation.col_names) > 1: + cols = core_number.columns[:-1].to_list() + else: + cols = "vertex" + + core_number = input_graph.add_internal_vertex_id( + core_number, "vertex", cols + ) + + if not isinstance(core_number, dask_cudf.DataFrame): + if isinstance(core_number, cudf.DataFrame): + # convert to dask_cudf in order to distribute the edges + core_number = dask_cudf.from_cudf(core_number, input_graph._npartitions) + + else: + raise TypeError( + f"'core_number' must be either None or of" + f"type cudf/dask_cudf, got: {type(core_number)}" + ) + + core_number = core_number.rename(columns={"core_number": "values"}) + if k is None: + k = core_number["values"].max().compute() + + core_number = get_distributed_data(core_number) + wait(core_number) + core_number = core_number.worker_to_parts + + client = input_graph._client + + do_expensive_check = False + + result = [ + client.submit( + _call_plc_k_core, + Comms.get_session_id(), + input_graph._plc_graph[w], + k, + degree_type, + core_number[w][0], + do_expensive_check, + workers=[w], + allow_other_workers=False, + ) + for w in Comms.get_workers() + ] + + wait(result) + + cudf_result = [client.submit(convert_to_cudf, cp_arrays) for cp_arrays in result] + + wait(cudf_result) + + ddf = dask_cudf.from_delayed(cudf_result).persist() + wait(ddf) + + # FIXME: Dask doesn't always release it fast enough. + # For instance if the algo is run several times with + # the same PLC graph, the current iteration might try to cache + # the past iteration's futures and this can cause a hang if some + # of those futures get released midway + del result + del cudf_result + + if input_graph.renumbered: + ddf = input_graph.unrenumber(ddf, "src") + ddf = input_graph.unrenumber(ddf, "dst") + + return ddf diff --git a/python/cugraph/cugraph/dask/link_prediction/jaccard.py b/python/cugraph/cugraph/dask/link_prediction/jaccard.py index 031a34473a3..62cb9a14a86 100644 --- a/python/cugraph/cugraph/dask/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/dask/link_prediction/jaccard.py @@ -103,7 +103,7 @@ def jaccard(input_graph, vertex_pair=None, use_weight=False): ---------- input_graph : cugraph.Graph cuGraph Graph instance, should contain the connectivity information - as an edge list (edge weights are not used for this algorithm). The + as an edge list (edge weights are not supported yet for this algorithm). The graph should be undirected where an undirected edge is represented by a directed edge in both direction. The adjacency list will be computed if not already present. @@ -145,14 +145,10 @@ def jaccard(input_graph, vertex_pair=None, use_weight=False): vertex_pair_col_name = vertex_pair.columns if use_weight: - raise ValueError( - "'use_weight' is currently not supported and must be set to 'False'" - ) + raise ValueError("'use_weight' is currently not supported.") - # FIXME: Implement a better way to check if the graph is weighted similar - # to 'simpleGraph' - if len(input_graph.edgelist.edgelist_df.columns) == 3: - raise RuntimeError("input graph must be unweighted") + if input_graph.is_weighted(): + raise ValueError("Weighted graphs are currently not supported.") if isinstance(vertex_pair, (dask_cudf.DataFrame, cudf.DataFrame)): vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) diff --git a/python/cugraph/cugraph/dask/link_prediction/overlap.py b/python/cugraph/cugraph/dask/link_prediction/overlap.py index 024676ea849..def274cf974 100644 --- a/python/cugraph/cugraph/dask/link_prediction/overlap.py +++ b/python/cugraph/cugraph/dask/link_prediction/overlap.py @@ -81,7 +81,7 @@ def overlap(input_graph, vertex_pair=None, use_weight=False): ---------- input_graph : cugraph.Graph cuGraph Graph instance, should contain the connectivity information - as an edge list (edge weights are not used for this algorithm). The + as an edge list (edge weights are not supported yet for this algorithm). The graph should be undirected where an undirected edge is represented by a directed edge in both direction. The adjacency list will be computed if not already present. @@ -123,14 +123,10 @@ def overlap(input_graph, vertex_pair=None, use_weight=False): vertex_pair_col_name = vertex_pair.columns if use_weight: - raise ValueError( - "'use_weight' is currently not supported and must be set to 'False'" - ) + raise ValueError("'use_weight' is currently not supported.") - # FIXME: Implement a better way to check if the graph is weighted similar - # to 'simpleGraph' - if len(input_graph.edgelist.edgelist_df.columns) == 3: - raise RuntimeError("input graph must be unweighted") + if input_graph.is_weighted(): + raise ValueError("Weighted graphs are currently not supported.") if isinstance(vertex_pair, (dask_cudf.DataFrame, cudf.DataFrame)): vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) diff --git a/python/cugraph/cugraph/dask/link_prediction/sorensen.py b/python/cugraph/cugraph/dask/link_prediction/sorensen.py index a42e7730a04..de1116fa6ce 100644 --- a/python/cugraph/cugraph/dask/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/dask/link_prediction/sorensen.py @@ -77,7 +77,7 @@ def sorensen(input_graph, vertex_pair=None, use_weight=False): ---------- input_graph : cugraph.Graph cuGraph Graph instance, should contain the connectivity information - as an edge list (edge weights are not used for this algorithm). The + as an edge list (edge weights are not supported yet for this algorithm). The graph should be undirected where an undirected edge is represented by a directed edge in both direction. The adjacency list will be computed if not already present. @@ -119,14 +119,10 @@ def sorensen(input_graph, vertex_pair=None, use_weight=False): vertex_pair_col_name = vertex_pair.columns if use_weight: - raise ValueError( - "'use_weight' is currently not supported and must be set to 'False'" - ) + raise ValueError("'use_weight' is currently not supported.") - # FIXME: Implement a better way to check if the graph is weighted similar - # to 'simpleGraph' - if len(input_graph.edgelist.edgelist_df.columns) == 3: - raise RuntimeError("input graph must be unweighted") + if input_graph.is_weighted(): + raise ValueError("Weighted graphs are currently not supported.") if isinstance(vertex_pair, (dask_cudf.DataFrame, cudf.DataFrame)): vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) diff --git a/python/cugraph/cugraph/dask/traversal/sssp.py b/python/cugraph/cugraph/dask/traversal/sssp.py index bd92e30f835..16789b547da 100644 --- a/python/cugraph/cugraph/dask/traversal/sssp.py +++ b/python/cugraph/cugraph/dask/traversal/sssp.py @@ -20,6 +20,7 @@ import cudf import dask_cudf from pylibcugraph import sssp as pylibcugraph_sssp, ResourceHandle +import warnings def _call_plc_sssp( @@ -99,6 +100,15 @@ def sssp(input_graph, source, cutoff=None, check_source=True): """ + # FIXME: Implement a better way to check if the graph is weighted similar + # to 'simpleGraph' + if len(input_graph.edgelist.edgelist_df.columns) != 3: + warning_msg = ( + "'SSSP' requires the input graph to be weighted: Unweighted " + "graphs will not be supported in the next release." + ) + warnings.warn(warning_msg, PendingDeprecationWarning) + client = input_graph._client def check_valid_vertex(G, source): diff --git a/python/cugraph/cugraph/experimental/link_prediction/jaccard.py b/python/cugraph/cugraph/experimental/link_prediction/jaccard.py index 867e30ae5a4..84aee3561d3 100644 --- a/python/cugraph/cugraph/experimental/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/experimental/link_prediction/jaccard.py @@ -120,11 +120,14 @@ def EXPERIMENTAL__jaccard(G, vertex_pair=None, use_weight=False): raise ValueError("Input must be an undirected Graph.") if G.edgelist.weights: - raise RuntimeError("input graph must be unweighted") + raise ValueError( + "Weighted graphs are currently not supported " + "but will be in the next release." + ) if use_weight: raise ValueError( - "'use_weight' is currently not supported and must be set to 'False'" + "'use_weight' is currently not supported but will " "be in the next release" ) if vertex_pair is None: diff --git a/python/cugraph/cugraph/experimental/link_prediction/overlap.py b/python/cugraph/cugraph/experimental/link_prediction/overlap.py index c120e4e6282..372cd93e727 100644 --- a/python/cugraph/cugraph/experimental/link_prediction/overlap.py +++ b/python/cugraph/cugraph/experimental/link_prediction/overlap.py @@ -155,11 +155,14 @@ def EXPERIMENTAL__overlap(G, vertex_pair=None, use_weight=False): raise ValueError("Input must be an undirected Graph.") if G.edgelist.weights: - raise RuntimeError("input graph must be unweighted") + raise ValueError( + "Weighted graphs are currently not supported " + "but will be in the next release." + ) if use_weight: raise ValueError( - "'use_weight' is currently not supported and must be set to 'False'" + "'use_weight' is currently not supported but will " "be in the next release" ) if vertex_pair is None: diff --git a/python/cugraph/cugraph/experimental/link_prediction/sorensen.py b/python/cugraph/cugraph/experimental/link_prediction/sorensen.py index 2f7bec52973..ca2069ec44c 100644 --- a/python/cugraph/cugraph/experimental/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/experimental/link_prediction/sorensen.py @@ -88,11 +88,14 @@ def EXPERIMENTAL__sorensen(G, vertex_pair=None, use_weight=False): raise ValueError("Input must be an undirected Graph.") if G.edgelist.weights: - raise RuntimeError("input graph must be unweighted") + raise ValueError( + "Weighted graphs are currently not supported " + "but will be in the next release." + ) if use_weight: raise ValueError( - "'use_weight' is currently not supported and must be set to 'False'" + "'use_weight' is currently not supported but will " "be in the next release" ) if vertex_pair is None: diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py index 2c6a1d5d905..2e5590a0cca 100644 --- a/python/cugraph/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/link_prediction/jaccard.py @@ -66,7 +66,7 @@ def jaccard(input_graph, vertex_pair=None): Parameters ---------- - graph : cugraph.Graph + input_graph : cugraph.Graph cuGraph Graph instance, should contain the connectivity information as an edge list (edge weights are not used for this algorithm). The graph should be undirected where an undirected edge is represented by a diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index ae05960dafd..1d78b2c89de 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -15,6 +15,7 @@ from cugraph.structure.graph_primtypes_wrapper import Direction from cugraph.structure.number_map import NumberMap from cugraph.structure.symmetrize import symmetrize +import cupy import cudf import dask_cudf @@ -85,8 +86,8 @@ def _make_plc_graph( elif values.dtype == "int64": values = values.astype("float64") else: - # values = cudf.Series(cupy.ones(len(edata_x[0]), dtype="float32")) - values = None + # Some algos require the graph to be weighted + values = cudf.Series(cupy.ones(len(edata_x[0]), dtype="float32")) if simpleDistributedGraphImpl.edgeIdCol in edata_x[0]: if simpleDistributedGraphImpl.edgeTypeCol not in edata_x[0]: diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index c4e027dce8c..992b8160b46 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -16,6 +16,7 @@ from cugraph.structure.symmetrize import symmetrize from cugraph.structure.number_map import NumberMap import cugraph.dask.common.mg_utils as mg_utils +import cupy import cudf import dask_cudf import cugraph.dask.comms.comms as Comms @@ -369,6 +370,7 @@ def view_edge_list(self): edgelist_df, simpleGraphImpl.dstCol ) + # FIXME: revisit this approach if not self.properties.directed: edgelist_df = edgelist_df[ edgelist_df[simpleGraphImpl.srcCol] @@ -879,7 +881,12 @@ def _make_plc_graph(self, value_col=None, store_transposed=False, renumber=True) else: raise ValueError(f"Illegal value col {type(value_col)}") - if weight_col is not None: + if weight_col is None: + # Some algos require the graph to be weighted + weight_col = cudf.Series( + cupy.ones(len(self.edgelist.edgelist_df), dtype="float32") + ) + else: weight_t = weight_col.dtype if weight_t == "int32": diff --git a/python/cugraph/cugraph/tests/mg/test_mg_core_number.py b/python/cugraph/cugraph/tests/mg/test_mg_core_number.py index f2e627ea37e..ef2c43b5cdc 100644 --- a/python/cugraph/cugraph/tests/mg/test_mg_core_number.py +++ b/python/cugraph/cugraph/tests/mg/test_mg_core_number.py @@ -32,7 +32,7 @@ def setup_function(): # Pytest fixtures # ============================================================================= datasets = utils.DATASETS_UNDIRECTED -degree_type = ["incoming", "outgoing"] +degree_type = ["incoming", "outgoing", "bidirectional"] fixture_params = utils.genFixtureParamsProduct( (datasets, "graph_file"), @@ -106,11 +106,8 @@ def test_sg_core_number(dask_client, benchmark, input_expected_output): sg_core_number_results = None G = input_expected_output["SGGraph"] degree_type = input_expected_output["degree_type"] - warning_msg = "The 'degree_type' parameter is ignored in this release." - # FIXME: Remove this warning test once 'degree_type' is supported" - with pytest.warns(Warning, match=warning_msg): - sg_core_number_results = benchmark(cugraph.core_number, G, degree_type) + sg_core_number_results = benchmark(cugraph.core_number, G, degree_type) assert sg_core_number_results is not None @@ -119,11 +116,7 @@ def test_core_number(dask_client, benchmark, input_expected_output): dg = input_expected_output["MGGraph"] degree_type = input_expected_output["degree_type"] - warning_msg = "The 'degree_type' parameter is ignored in this release." - - # FIXME: Remove this warning test once 'degree_type' is supported" - with pytest.warns(Warning, match=warning_msg): - result_core_number = benchmark(dcg.core_number, dg, degree_type) + result_core_number = benchmark(dcg.core_number, dg, degree_type) result_core_number = ( result_core_number.drop_duplicates() @@ -167,13 +160,7 @@ def test_core_number_invalid_input(input_expected_output): legacy_renum_only=True, ) - with pytest.raises(ValueError): - dcg.core_number(dg) - - # FIXME: enable this check once 'degree_type' is supported - """ invalid_degree_type = 3 dg = input_expected_output["MGGraph"] with pytest.raises(ValueError): - cugraph.core_number(dg, invalid_degree_type) - """ + dcg.core_number(dg, invalid_degree_type) diff --git a/python/cugraph/cugraph/tests/mg/test_mg_jaccard.py b/python/cugraph/cugraph/tests/mg/test_mg_jaccard.py index 0a3a7e58014..4a0e3b66465 100644 --- a/python/cugraph/cugraph/tests/mg/test_mg_jaccard.py +++ b/python/cugraph/cugraph/tests/mg/test_mg_jaccard.py @@ -176,7 +176,7 @@ def test_dask_weighted_jaccard(): legacy_renum_only=True, store_transposed=True, ) - with pytest.raises(RuntimeError): + with pytest.raises(ValueError): dcg.jaccard(dg) dg = cugraph.Graph(directed=False) diff --git a/python/cugraph/cugraph/tests/mg/test_mg_k_core.py b/python/cugraph/cugraph/tests/mg/test_mg_k_core.py new file mode 100644 index 00000000000..6c3b6384a53 --- /dev/null +++ b/python/cugraph/cugraph/tests/mg/test_mg_k_core.py @@ -0,0 +1,189 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc + +import pytest + +import cugraph +from cugraph.testing import utils +import cugraph.dask as dcg +import dask_cudf +from cugraph.structure.symmetrize import symmetrize_df +from cudf.testing.testing import assert_frame_equal + + +# ============================================================================= +# Pytest Setup / Teardown - called for each test function +# ============================================================================= +def setup_function(): + gc.collect() + + +# ============================================================================= +# Pytest fixtures +# ============================================================================= +datasets = utils.DATASETS_UNDIRECTED + +core_number = [True, False] +degree_type = ["bidirectional", "outgoing", "incoming"] + +fixture_params = utils.genFixtureParamsProduct( + (datasets, "graph_file"), (core_number, "core_number"), (degree_type, "degree_type") +) + + +@pytest.fixture(scope="module", params=fixture_params) +def input_combo(request): + """ + Simply return the current combination of params as a dictionary for use in + tests or other parameterized fixtures. + """ + parameters = dict(zip(("graph_file", "core_number", "degree_type"), request.param)) + + return parameters + + +@pytest.fixture(scope="module") +def input_expected_output(dask_client, input_combo): + """ + This fixture returns the inputs and expected results from the Core number + algo. + """ + core_number = input_combo["core_number"] + degree_type = input_combo["degree_type"] + input_data_path = input_combo["graph_file"] + G = utils.generate_cugraph_graph_from_file( + input_data_path, directed=False, edgevals=True + ) + + if core_number: + # compute the core_number + core_number = cugraph.core_number(G, degree_type=degree_type) + else: + core_number = None + + input_combo["core_number"] = core_number + + input_combo["SGGraph"] = G + + sg_k_core_graph = cugraph.k_core( + G, core_number=core_number, degree_type=degree_type + ) + sg_k_core_results = sg_k_core_graph.view_edge_list() + # FIXME: The result will come asymetric. Symmetrize the results + sg_k_core_results = ( + symmetrize_df(sg_k_core_results, "src", "dst", "weights") + .sort_values(["src", "dst"]) + .reset_index(drop=True) + ) + + input_combo["sg_k_core_results"] = sg_k_core_results + + # Creating an edgelist from a dask cudf dataframe + chunksize = dcg.get_chunksize(input_data_path) + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + dg = cugraph.Graph(directed=False) + # FIXME: False when renumbering (C++ and python renumbering) + dg.from_dask_cudf_edgelist( + ddf, + source="src", + destination="dst", + edge_attr="value", + renumber=True, + legacy_renum_only=True, + ) + + input_combo["MGGraph"] = dg + + return input_combo + + +# ============================================================================= +# Tests +# ============================================================================= +def test_sg_k_core(dask_client, benchmark, input_expected_output): + # This test is only for benchmark purposes. + sg_k_core = None + G = input_expected_output["SGGraph"] + core_number = input_expected_output["core_number"] + degree_type = input_expected_output["degree_type"] + + sg_k_core = benchmark( + cugraph.k_core, G, core_number=core_number, degree_type=degree_type + ) + assert sg_k_core is not None + + +def test_dask_k_core(dask_client, benchmark, input_expected_output): + + dg = input_expected_output["MGGraph"] + core_number = input_expected_output["core_number"] + + k_core_results = benchmark(dcg.k_core, dg, core_number=core_number) + + expected_k_core_results = input_expected_output["sg_k_core_results"] + + k_core_results = ( + k_core_results.compute().sort_values(["src", "dst"]).reset_index(drop=True) + ) + + assert_frame_equal( + expected_k_core_results, k_core_results, check_dtype=False, check_like=True + ) + + +def test_dask_k_core_invalid_input(dask_client): + input_data_path = datasets[0] + chunksize = dcg.get_chunksize(input_data_path) + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + dg = cugraph.Graph(directed=True) + dg.from_dask_cudf_edgelist( + ddf, + source="src", + destination="dst", + edge_attr="value", + renumber=True, + legacy_renum_only=True, + store_transposed=True, + ) + with pytest.raises(ValueError): + dcg.k_core(dg) + + dg = cugraph.Graph(directed=False) + dg.from_dask_cudf_edgelist( + ddf, + source="src", + destination="dst", + edge_attr="value", + legacy_renum_only=True, + store_transposed=True, + ) + + degree_type = "invalid" + with pytest.raises(ValueError): + dcg.k_core(dg, degree_type=degree_type) diff --git a/python/cugraph/cugraph/tests/mg/test_mg_overlap.py b/python/cugraph/cugraph/tests/mg/test_mg_overlap.py index 488fb74ab93..08fc7c27bac 100644 --- a/python/cugraph/cugraph/tests/mg/test_mg_overlap.py +++ b/python/cugraph/cugraph/tests/mg/test_mg_overlap.py @@ -176,7 +176,7 @@ def test_dask_weighted_overlap(): legacy_renum_only=True, store_transposed=True, ) - with pytest.raises(RuntimeError): + with pytest.raises(ValueError): dcg.overlap(dg) dg = cugraph.Graph(directed=False) diff --git a/python/cugraph/cugraph/tests/mg/test_mg_sorensen.py b/python/cugraph/cugraph/tests/mg/test_mg_sorensen.py index d07d88fb67b..b26cbc12d56 100644 --- a/python/cugraph/cugraph/tests/mg/test_mg_sorensen.py +++ b/python/cugraph/cugraph/tests/mg/test_mg_sorensen.py @@ -176,7 +176,7 @@ def test_dask_weighted_sorensen(): legacy_renum_only=True, store_transposed=True, ) - with pytest.raises(RuntimeError): + with pytest.raises(ValueError): dcg.sorensen(dg) dg = cugraph.Graph(directed=False) diff --git a/python/cugraph/cugraph/tests/mg/test_mg_sssp.py b/python/cugraph/cugraph/tests/mg/test_mg_sssp.py index df7a341e5a0..42403180059 100644 --- a/python/cugraph/cugraph/tests/mg/test_mg_sssp.py +++ b/python/cugraph/cugraph/tests/mg/test_mg_sssp.py @@ -84,3 +84,33 @@ def test_dask_sssp(dask_client, directed): ): err = err + 1 assert err == 0 + + +def test_dask_unweighted_sssp(dask_client): + input_data_path = input_data_path = ( + RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv" + ).as_posix() + chunksize = dcg.get_chunksize(input_data_path) + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + dg = cugraph.Graph(directed=False) + dg.from_dask_cudf_edgelist( + ddf, + source="src", + destination="dst", + legacy_renum_only=True, + store_transposed=True, + ) + + warning_msg = ( + "'SSSP' requires the input graph to be weighted: Unweighted " + "graphs will not be supported in the next release." + ) + with pytest.warns(PendingDeprecationWarning, match=warning_msg): + dcg.sssp(dg, 0) diff --git a/python/cugraph/cugraph/tests/test_core_number.py b/python/cugraph/cugraph/tests/test_core_number.py index f5d6c7ae260..b7a9175a5bc 100644 --- a/python/cugraph/cugraph/tests/test_core_number.py +++ b/python/cugraph/cugraph/tests/test_core_number.py @@ -79,16 +79,12 @@ def test_core_number(input_combo): drop=True ) - warning_msg = "The 'degree_type' parameter is ignored in this release." - - # FIXME: Remove this warning test once 'degree_type' is supported" - with pytest.warns(Warning, match=warning_msg): - core_number_results = ( - cugraph.core_number(G, degree_type) - .sort_values("vertex") - .reset_index(drop=True) - .rename(columns={"core_number": "cugraph_core_number"}) - ) + core_number_results = ( + cugraph.core_number(G, degree_type) + .sort_values("vertex") + .reset_index(drop=True) + .rename(columns={"core_number": "cugraph_core_number"}) + ) # Compare the nx core number results with cugraph core_number_results["nx_core_number"] = nx_core_number_results["core_number"] @@ -113,10 +109,7 @@ def test_core_number_invalid_input(input_combo): with pytest.raises(ValueError): cugraph.core_number(G) - # FIXME: enable this check once 'degree_type' is supported - """ invalid_degree_type = "invalid" G = input_combo["G"] with pytest.raises(ValueError): - experimental_core_number(G, invalid_degree_type) - """ + cugraph.core_number(G, invalid_degree_type) diff --git a/python/cugraph/cugraph/tests/test_jaccard.py b/python/cugraph/cugraph/tests/test_jaccard.py index 5322bbd6f54..10d0206bd84 100644 --- a/python/cugraph/cugraph/tests/test_jaccard.py +++ b/python/cugraph/cugraph/tests/test_jaccard.py @@ -292,7 +292,10 @@ def test_jaccard_multi_column(read_csv): "1_dst": "1_destination", } ) - assert_frame_equal(df_res, df_plc_exp, check_dtype=False, check_like=True) + + jaccard_res = df_res["jaccard_coeff"].sort_values().reset_index(drop=True) + jaccard_plc_exp = df_plc_exp["jaccard_coeff"].sort_values().reset_index(drop=True) + assert_series_equal(jaccard_res, jaccard_plc_exp) G2 = cugraph.Graph() G2.from_cudf_edgelist(cu_M, source="src_0", destination="dst_0") @@ -307,7 +310,7 @@ def test_jaccard_multi_column(read_csv): def test_weighted_exp_jaccard(): karate = DATASETS_UNDIRECTED[0] G = karate.get_graph() - with pytest.raises(RuntimeError): + with pytest.raises(ValueError): exp_jaccard(G) G = karate.get_graph(ignore_weights=True) diff --git a/python/cugraph/cugraph/tests/test_k_core.py b/python/cugraph/cugraph/tests/test_k_core.py index 0e4bf360c29..74c262505b9 100644 --- a/python/cugraph/cugraph/tests/test_k_core.py +++ b/python/cugraph/cugraph/tests/test_k_core.py @@ -34,6 +34,13 @@ print("Networkx version : {} ".format(nx.__version__)) +# ============================================================================= +# Pytest Setup / Teardown - called for each test function +# ============================================================================= +def setup_function(): + gc.collect() + + def calc_k_cores(graph_file, directed=True): # directed is used to create either a Graph or DiGraph so the returned # cugraph can be compared to nx graph of same type. @@ -67,7 +74,6 @@ def compare_edges(cg, nxg): @pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) def test_k_core_Graph(graph_file): - gc.collect() cu_kcore, nx_kcore = calc_k_cores(graph_file, False) @@ -76,7 +82,6 @@ def test_k_core_Graph(graph_file): @pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) def test_k_core_Graph_nx(graph_file): - gc.collect() dataset_path = graph_file.get_path() NM = utils.read_csv_for_nx(dataset_path) Gnx = nx.from_pandas_edgelist(NM, source="0", target="1", create_using=nx.Graph()) @@ -88,7 +93,6 @@ def test_k_core_Graph_nx(graph_file): @pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) def test_k_core_corenumber_multicolumn(graph_file): - gc.collect() dataset_path = graph_file.get_path() cu_M = utils.read_csv_file(dataset_path) cu_M.rename(columns={"0": "src_0", "1": "dst_0"}, inplace=True) @@ -103,17 +107,34 @@ def test_k_core_corenumber_multicolumn(graph_file): corenumber_G1 = cugraph.core_number(G1) corenumber_G1.rename(columns={"core_number": "values"}, inplace=True) corenumber_G1 = corenumber_G1[["0_vertex", "1_vertex", "values"]] - + corenumber_G1 = None ck_res = cugraph.k_core(G1, core_number=corenumber_G1) G2 = cugraph.Graph() - G2.from_cudf_edgelist(cu_M, source="src_0", destination="dst_0") - ck_exp = cugraph.k_core(G2) + G2.from_cudf_edgelist(cu_M, source="src_0", destination="dst_0", renumber=False) + + corenumber_G2 = cugraph.core_number(G2) + corenumber_G2.rename(columns={"core_number": "values"}, inplace=True) + corenumber_G2 = corenumber_G2[["vertex", "values"]] + ck_exp = cugraph.k_core(G2, core_number=corenumber_G2) # FIXME: Replace with multi-column view_edge_list() edgelist_df = ck_res.edgelist.edgelist_df edgelist_df_res = ck_res.unrenumber(edgelist_df, "src") edgelist_df_res = ck_res.unrenumber(edgelist_df_res, "dst") + for i in range(len(edgelist_df_res)): assert ck_exp.has_edge( edgelist_df_res["0_src"].iloc[i], edgelist_df_res["0_dst"].iloc[i] ) + + +def test_k_core_invalid_input(): + karate = DATASETS_UNDIRECTED[0] + G = karate.get_graph(create_using=cugraph.Graph(directed=True)) + with pytest.raises(ValueError): + cugraph.k_core(G) + + G = karate.get_graph() + degree_type = "invalid" + with pytest.raises(ValueError): + cugraph.k_core(G, degree_type=degree_type) diff --git a/python/cugraph/cugraph/tests/test_leiden.py b/python/cugraph/cugraph/tests/test_leiden.py index d405820791e..768115491e5 100644 --- a/python/cugraph/cugraph/tests/test_leiden.py +++ b/python/cugraph/cugraph/tests/test_leiden.py @@ -98,13 +98,3 @@ def test_leiden_directed_graph(): with pytest.raises(ValueError): parts, mod = cugraph_leiden(G) - - -def test_leiden_unweighted_graph(): - - G = karate_asymmetric.get_graph( - create_using=cugraph.Graph(directed=True), ignore_weights=True - ) - - with pytest.raises(RuntimeError): - parts, mod = cugraph_leiden(G) diff --git a/python/cugraph/cugraph/tests/test_louvain.py b/python/cugraph/cugraph/tests/test_louvain.py index a530deba0ba..1392894aab7 100644 --- a/python/cugraph/cugraph/tests/test_louvain.py +++ b/python/cugraph/cugraph/tests/test_louvain.py @@ -107,9 +107,3 @@ def test_louvain(graph_file): def test_louvain_directed_graph(): with pytest.raises(ValueError): cugraph_call(karate_asymmetric, edgevals=True, directed=True) - - -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) -def test_louvain_with_no_edgevals(graph_file): - with pytest.raises(RuntimeError): - cugraph_call(karate_asymmetric, edgevals=False) diff --git a/python/cugraph/cugraph/tests/test_overlap.py b/python/cugraph/cugraph/tests/test_overlap.py index b21c00b9801..3b940d8b39c 100644 --- a/python/cugraph/cugraph/tests/test_overlap.py +++ b/python/cugraph/cugraph/tests/test_overlap.py @@ -145,7 +145,7 @@ def extract_two_hop(read_csv): # Test -def test_overlap_1(gpubenchmark, read_csv, extract_two_hop): +def test_overlap(gpubenchmark, read_csv, extract_two_hop): M, graph_file = read_csv pairs = extract_two_hop @@ -197,7 +197,9 @@ def test_overlap_multi_column(graph_file): "1_dst": "1_destination", } ) - assert_frame_equal(df_res, df_plc_exp, check_dtype=False, check_like=True) + overlap_res = df_res["overlap_coeff"].sort_values().reset_index(drop=True) + overlap_plc_exp = df_plc_exp["overlap_coeff"].sort_values().reset_index(drop=True) + assert_series_equal(overlap_res, overlap_plc_exp) G2 = cugraph.Graph() G2.from_cudf_edgelist(cu_M, source="src_0", destination="dst_0") @@ -212,7 +214,7 @@ def test_overlap_multi_column(graph_file): def test_weighted_exp_overlap(): karate = DATASETS_UNDIRECTED[0] G = karate.get_graph() - with pytest.raises(RuntimeError): + with pytest.raises(ValueError): exp_overlap(G) G = karate.get_graph(ignore_weights=True) diff --git a/python/cugraph/cugraph/tests/test_sorensen.py b/python/cugraph/cugraph/tests/test_sorensen.py index 030e865ab61..b827cf32665 100644 --- a/python/cugraph/cugraph/tests/test_sorensen.py +++ b/python/cugraph/cugraph/tests/test_sorensen.py @@ -257,7 +257,9 @@ def test_sorensen_multi_column(read_csv): "1_dst": "1_destination", } ) - assert_frame_equal(df_res, df_plc_exp, check_dtype=False, check_like=True) + sorensen_res = df_res["sorensen_coeff"].sort_values().reset_index(drop=True) + sorensen_plc_exp = df_plc_exp["sorensen_coeff"].sort_values().reset_index(drop=True) + assert_series_equal(sorensen_res, sorensen_plc_exp) G2 = cugraph.Graph() G2.from_cudf_edgelist(cu_M, source="src_0", destination="dst_0") @@ -272,7 +274,7 @@ def test_sorensen_multi_column(read_csv): def test_weighted_exp_sorensen(): karate = DATASETS_UNDIRECTED[0] G = karate.get_graph() - with pytest.raises(RuntimeError): + with pytest.raises(ValueError): exp_sorensen(G) G = karate.get_graph(ignore_weights=True) diff --git a/python/cugraph/cugraph/tests/test_sssp.py b/python/cugraph/cugraph/tests/test_sssp.py index a6cfddefce6..518b0aac622 100644 --- a/python/cugraph/cugraph/tests/test_sssp.py +++ b/python/cugraph/cugraph/tests/test_sssp.py @@ -161,7 +161,7 @@ def networkx_call(graph_file, source, edgevals=True): nx_paths = nx.single_source_dijkstra_path_length(Gnx, source) G = graph_file.get_graph( - create_using=cugraph.Graph(directed=True), ignore_weights=not edgevals + create_using=cugraph.Graph(directed=True), ignore_weights=True ) t2 = time.time() - t1 @@ -443,5 +443,9 @@ def test_scipy_api_compat(): def test_sssp_with_no_edgevals(): G = datasets.karate.get_graph(ignore_weights=True) - with pytest.raises(RuntimeError): + warning_msg = ( + "'SSSP' requires the input graph to be weighted: Unweighted " + "graphs will not be supported in the next release." + ) + with pytest.warns(PendingDeprecationWarning, match=warning_msg): cugraph.sssp(G, 1) diff --git a/python/cugraph/cugraph/tests/test_subgraph_extraction.py b/python/cugraph/cugraph/tests/test_subgraph_extraction.py index 3d5f04516af..fcfd063b61a 100644 --- a/python/cugraph/cugraph/tests/test_subgraph_extraction.py +++ b/python/cugraph/cugraph/tests/test_subgraph_extraction.py @@ -179,9 +179,3 @@ def test_subgraph_extraction_graph_not_renumbered(): assert Sg.number_of_vertices() == 3 assert Sg.number_of_edges() == 3 - - -def test_subgraph_with_no_edgevals(): - G = karate.get_graph(ignore_weights=True) - with pytest.raises(RuntimeError): - cugraph.subgraph(G, 1) diff --git a/python/cugraph/cugraph/traversal/sssp.py b/python/cugraph/cugraph/traversal/sssp.py index 550bc26248d..7e04ff7678b 100644 --- a/python/cugraph/cugraph/traversal/sssp.py +++ b/python/cugraph/cugraph/traversal/sssp.py @@ -12,6 +12,7 @@ # limitations under the License. import numpy as np +import warnings import cudf from cugraph.structure import Graph, DiGraph, MultiGraph, MultiDiGraph @@ -216,7 +217,11 @@ def sssp( ) if not G.edgelist.weights: - raise RuntimeError("input graph must be weighted") + warning_msg = ( + "'SSSP' requires the input graph to be weighted: Unweighted " + "graphs will not be supported in the next release." + ) + warnings.warn(warning_msg, PendingDeprecationWarning) if G.renumbered: if isinstance(source, cudf.DataFrame): diff --git a/python/pylibcugraph/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/CMakeLists.txt index e7f1c4f5b54..492ff738f1e 100644 --- a/python/pylibcugraph/pylibcugraph/CMakeLists.txt +++ b/python/pylibcugraph/pylibcugraph/CMakeLists.txt @@ -23,6 +23,7 @@ set(cython_sources graph_properties.pyx graphs.pyx hits.pyx + k_core.pyx jaccard_coefficients.pyx sorensen_coefficients.pyx overlap_coefficients.pyx diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py index a00d202a040..753c78e7072 100644 --- a/python/pylibcugraph/pylibcugraph/__init__.py +++ b/python/pylibcugraph/pylibcugraph/__init__.py @@ -43,6 +43,8 @@ from pylibcugraph.core_number import core_number +from pylibcugraph.k_core import k_core + from pylibcugraph.two_hop_neighbors import get_two_hop_neighbors from pylibcugraph.louvain import louvain diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/core_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/core_algorithms.pxd index 1830cc71a49..4d3509e8b7f 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/core_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/core_algorithms.pxd @@ -65,4 +65,50 @@ cdef extern from "cugraph_c/core_algorithms.h": bool_t do_expensive_check, cugraph_core_result_t** result, cugraph_error_t** error - ) \ No newline at end of file + ) + + ########################################################################### + # k-core + ctypedef struct cugraph_k_core_result_t: + pass + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_k_core_result_get_src_vertices( + cugraph_k_core_result_t* result + ) + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_k_core_result_get_dst_vertices( + cugraph_k_core_result_t* result + ) + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_k_core_result_get_weights( + cugraph_k_core_result_t* result + ) + + cdef void \ + cugraph_k_core_result_free( + cugraph_k_core_result_t* result + ) + + cdef cugraph_error_code_t \ + cugraph_core_result_create( + const cugraph_resource_handle_t* handle, + cugraph_type_erased_device_array_view_t* vertices, + cugraph_type_erased_device_array_view_t* core_numbers, + cugraph_core_result_t** core_result, + cugraph_error_t** error + ) + + cdef cugraph_error_code_t \ + cugraph_k_core( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + size_t k, + cugraph_k_core_degree_type_t degree_type, + const cugraph_core_result_t* core_result, + bool_t do_expensive_check, + cugraph_k_core_result_t** result, + cugraph_error_t** error + ) diff --git a/python/pylibcugraph/pylibcugraph/core_number.pyx b/python/pylibcugraph/pylibcugraph/core_number.pyx index 7e80774335f..7d0c42f7dd0 100644 --- a/python/pylibcugraph/pylibcugraph/core_number.pyx +++ b/python/pylibcugraph/pylibcugraph/core_number.pyx @@ -15,7 +15,6 @@ # cython: language_level = 3 from libc.stdint cimport uintptr_t -import warnings from pylibcugraph._cugraph_c.resource_handle cimport ( bool_t, @@ -28,8 +27,6 @@ from pylibcugraph._cugraph_c.error cimport ( ) from pylibcugraph._cugraph_c.array cimport ( cugraph_type_erased_device_array_view_t, - cugraph_type_erased_device_array_view_create, - cugraph_type_erased_device_array_view_free, ) from pylibcugraph._cugraph_c.graph cimport ( cugraph_graph_t, @@ -51,7 +48,6 @@ from pylibcugraph.graphs cimport ( from pylibcugraph.utils cimport ( assert_success, copy_to_cupy_array, - assert_CAI_type, get_c_type_from_numpy_type, ) @@ -77,8 +73,6 @@ def core_number(ResourceHandle resource_handle, "incoming", "outgoing", and "bidirectional" respectively. This option is currently ignored in this release, and setting it will result in a warning. - - This implementation only supports bidirectional edges. do_expensive_check: bool If True, performs more extensive tests on the inputs to ensure @@ -103,13 +97,6 @@ def core_number(ResourceHandle resource_handle, cdef cugraph_error_code_t error_code cdef cugraph_error_t* error_ptr - if degree_type is not None: - warning_msg = ( - "The 'degree_type' parameter is ignored in this release.") - warnings.warn(warning_msg, Warning) - - degree_type = "bidirectional" - degree_type_map = { "incoming": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_IN, "outgoing": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_OUT, diff --git a/python/pylibcugraph/pylibcugraph/egonet.pyx b/python/pylibcugraph/pylibcugraph/egonet.pyx index 6ed0e31ef92..639e4c386a7 100644 --- a/python/pylibcugraph/pylibcugraph/egonet.pyx +++ b/python/pylibcugraph/pylibcugraph/egonet.pyx @@ -27,8 +27,6 @@ from pylibcugraph._cugraph_c.error cimport ( ) from pylibcugraph._cugraph_c.array cimport ( cugraph_type_erased_device_array_view_t, - cugraph_type_erased_device_array_view_create, - cugraph_type_erased_device_array_view_free, ) from pylibcugraph._cugraph_c.graph cimport ( cugraph_graph_t, @@ -53,7 +51,6 @@ from pylibcugraph.graphs cimport ( ) from pylibcugraph.utils cimport ( assert_success, - assert_CAI_type, copy_to_cupy_array, create_cugraph_type_erased_device_array_view_from_py_obj, ) diff --git a/python/pylibcugraph/pylibcugraph/jaccard_coefficients.pyx b/python/pylibcugraph/pylibcugraph/jaccard_coefficients.pyx index 1ce7950f097..c7f8e2368b4 100644 --- a/python/pylibcugraph/pylibcugraph/jaccard_coefficients.pyx +++ b/python/pylibcugraph/pylibcugraph/jaccard_coefficients.pyx @@ -50,7 +50,6 @@ from pylibcugraph.graphs cimport ( ) from pylibcugraph.utils cimport ( assert_success, - assert_CAI_type, copy_to_cupy_array, create_cugraph_type_erased_device_array_view_from_py_obj ) @@ -63,13 +62,9 @@ def EXPERIMENTAL__jaccard_coefficients(ResourceHandle resource_handle, bool_t use_weight, bool_t do_expensive_check): """ - Compute the similarity for the specified vertex_pairs + Compute the Jaccard coefficients for the specified vertex_pairs. - Note that Jaccard similarity must run on a symmetric graph - - The HITS algorithm computes two numbers for a node. Authorities - estimates the node value based on the incoming links. Hubs estimates - the node value based on outgoing links. + Note that Jaccard similarity must run on a symmetric graph. Parameters ---------- @@ -95,10 +90,8 @@ def EXPERIMENTAL__jaccard_coefficients(ResourceHandle resource_handle, Returns ------- - A tuple of device arrays, where the third item in the tuple is a device - array containing the vertex identifiers, the first and second items are device - arrays containing respectively the hubs and authorities values for the corresponding - vertices + A tuple of device arrays containing the vertex pairs with + their corresponding Jaccard coefficient scores. Examples -------- diff --git a/python/pylibcugraph/pylibcugraph/k_core.pyx b/python/pylibcugraph/pylibcugraph/k_core.pyx new file mode 100644 index 00000000000..50344469b11 --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/k_core.pyx @@ -0,0 +1,169 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + +from libc.stdint cimport uintptr_t +import warnings + +from pylibcugraph._cugraph_c.resource_handle cimport ( + bool_t, + cugraph_resource_handle_t, +) +from pylibcugraph._cugraph_c.error cimport ( + cugraph_error_code_t, + cugraph_error_t, +) +from pylibcugraph._cugraph_c.array cimport ( + cugraph_type_erased_device_array_view_t, +) +from pylibcugraph._cugraph_c.graph cimport ( + cugraph_graph_t, +) +from pylibcugraph._cugraph_c.core_algorithms cimport ( + cugraph_core_result_t, + cugraph_k_core_result_t, + cugraph_core_result_create, + cugraph_k_core, + cugraph_k_core_degree_type_t, + cugraph_k_core_result_get_src_vertices, + cugraph_k_core_result_get_dst_vertices, + cugraph_k_core_result_get_weights, + cugraph_k_core_result_free, + cugraph_core_result_free, +) +from pylibcugraph.resource_handle cimport ( + ResourceHandle, +) +from pylibcugraph.graphs cimport ( + _GPUGraph, +) +from pylibcugraph.utils cimport ( + assert_success, + copy_to_cupy_array, + create_cugraph_type_erased_device_array_view_from_py_obj, +) + +def k_core(ResourceHandle resource_handle, + _GPUGraph graph, + size_t k, + degree_type, + core_result, + bool_t do_expensive_check): + """ + Compute the k-core of the graph G + A k-core of a graph is a maximal subgraph that + contains nodes of degree k or more. This call does not support a graph + with self-loops and parallel edges. + + Parameters + ---------- + resource_handle: ResourceHandle + Handle to the underlying device and host resource needed for + referencing data and running algorithms. + + graph : SGGraph or MGGraph + The input graph, for either Single or Multi-GPU operations. + + k : size_t (default=None) + Order of the core. This value must not be negative. If set to None + the main core is returned. + + degree_type: str + This option determines if the core number computation should be based + on input, output, or both directed edges, with valid values being + "incoming", "outgoing", and "bidirectional" respectively. + This option is currently ignored in this release, and setting it will + result in a warning. + + core_result : device array type + Precomputed core number of the nodes of the graph G + If set to None, the core numbers of the nodes are calculated + internally. + + do_expensive_check: bool + If True, performs more extensive tests on the inputs to ensure + validity, at the expense of increased run time. + + Returns + ------- + A tuple of device arrays contaning the sources, destinations vertices + and the weights. + + Examples + -------- + # FIXME: No example yet + + """ + cdef cugraph_resource_handle_t* c_resource_handle_ptr = \ + resource_handle.c_resource_handle_ptr + cdef cugraph_graph_t* c_graph_ptr = graph.c_graph_ptr + + cdef cugraph_core_result_t* core_result_ptr + cdef cugraph_k_core_result_t* k_core_result_ptr + cdef cugraph_error_code_t error_code + cdef cugraph_error_t* error_ptr + + + degree_type_map = { + "incoming": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_IN, + "outgoing": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_OUT, + "bidirectional": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_INOUT} + + cdef cugraph_type_erased_device_array_view_t* \ + vertices_view_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj( + core_result["vertex"]) + + cdef cugraph_type_erased_device_array_view_t* \ + core_numbers_view_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj( + core_result["values"]) + + # Create a core_number result + error_code = cugraph_core_result_create(c_resource_handle_ptr, + vertices_view_ptr, + core_numbers_view_ptr, + &core_result_ptr, + &error_ptr) + assert_success(error_code, error_ptr, "cugraph_core_result_create") + + + # compute k_core + error_code = cugraph_k_core(c_resource_handle_ptr, + c_graph_ptr, + k, + degree_type_map[degree_type], + core_result_ptr, + do_expensive_check, + &k_core_result_ptr, + &error_ptr) + assert_success(error_code, error_ptr, "cugraph_k_core_number") + + + cdef cugraph_type_erased_device_array_view_t* src_vertices_ptr = \ + cugraph_k_core_result_get_src_vertices(k_core_result_ptr) + cdef cugraph_type_erased_device_array_view_t* dst_vertices_ptr = \ + cugraph_k_core_result_get_dst_vertices(k_core_result_ptr) + cdef cugraph_type_erased_device_array_view_t* weigths_ptr = \ + cugraph_k_core_result_get_weights(k_core_result_ptr) + + cupy_src_vertices = copy_to_cupy_array(c_resource_handle_ptr, src_vertices_ptr) + cupy_dst_vertices = copy_to_cupy_array(c_resource_handle_ptr, dst_vertices_ptr) + cupy_weights = copy_to_cupy_array(c_resource_handle_ptr, weigths_ptr) + + cugraph_k_core_result_free(k_core_result_ptr) + cugraph_core_result_free(core_result_ptr) + + return (cupy_src_vertices, cupy_dst_vertices, cupy_weights) diff --git a/python/pylibcugraph/pylibcugraph/overlap_coefficients.pyx b/python/pylibcugraph/pylibcugraph/overlap_coefficients.pyx index ba2431c1716..4910cc15c65 100644 --- a/python/pylibcugraph/pylibcugraph/overlap_coefficients.pyx +++ b/python/pylibcugraph/pylibcugraph/overlap_coefficients.pyx @@ -50,7 +50,6 @@ from pylibcugraph.graphs cimport ( ) from pylibcugraph.utils cimport ( assert_success, - assert_CAI_type, copy_to_cupy_array, create_cugraph_type_erased_device_array_view_from_py_obj ) @@ -63,13 +62,10 @@ def EXPERIMENTAL__overlap_coefficients(ResourceHandle resource_handle, bool_t use_weight, bool_t do_expensive_check): """ - Compute the similarity for the specified vertex_pairs + Compute the Overlap coefficients for the specified vertex_pairs. - Note that Overlap similarity must run on a symmetric graph + Note that Overlap similarity must run on a symmetric graph. - The HITS algorithm computes two numbers for a node. Authorities - estimates the node value based on the incoming links. Hubs estimates - the node value based on outgoing links. Parameters ---------- @@ -95,10 +91,8 @@ def EXPERIMENTAL__overlap_coefficients(ResourceHandle resource_handle, Returns ------- - A tuple of device arrays, where the third item in the tuple is a device - array containing the vertex identifiers, the first and second items are device - arrays containing respectively the hubs and authorities values for the corresponding - vertices + A tuple of device arrays containing the vertex pairs with + their corresponding Overlap coefficient scores. Examples -------- diff --git a/python/pylibcugraph/pylibcugraph/sorensen_coefficients.pyx b/python/pylibcugraph/pylibcugraph/sorensen_coefficients.pyx index f7f04a50630..8c4755f10ee 100644 --- a/python/pylibcugraph/pylibcugraph/sorensen_coefficients.pyx +++ b/python/pylibcugraph/pylibcugraph/sorensen_coefficients.pyx @@ -50,7 +50,6 @@ from pylibcugraph.graphs cimport ( ) from pylibcugraph.utils cimport ( assert_success, - assert_CAI_type, copy_to_cupy_array, create_cugraph_type_erased_device_array_view_from_py_obj ) @@ -63,13 +62,9 @@ def EXPERIMENTAL__sorensen_coefficients(ResourceHandle resource_handle, bool_t use_weight, bool_t do_expensive_check): """ - Compute the similarity for the specified vertex_pairs + Compute the Sorensen coefficients for the specified vertex_pairs. - Note that Sorensen similarity must run on a symmetric graph - - The HITS algorithm computes two numbers for a node. Authorities - estimates the node value based on the incoming links. Hubs estimates - the node value based on outgoing links. + Note that Sorensen similarity must run on a symmetric graph. Parameters ---------- @@ -95,10 +90,8 @@ def EXPERIMENTAL__sorensen_coefficients(ResourceHandle resource_handle, Returns ------- - A tuple of device arrays, where the third item in the tuple is a device - array containing the vertex identifiers, the first and second items are device - arrays containing respectively the hubs and authorities values for the corresponding - vertices + A tuple of device arrays containing the vertex pairs with + their corresponding Sorensen coefficient scores. Examples --------