From c5d2a9a43ea34924a82c3f3176a68c5f428fdd31 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Thu, 18 Jan 2024 05:22:11 -0600 Subject: [PATCH] `nx-cugraph`: add `to_undirected` method; add reciprocity algorithms (#4063) Getting `G.to_undirected` to work was more involved than I expected, but at least we got two algorithms "for free" out of the effort! We raise `NotImplementedError` for `multidigraph.to_undirected()` for now. I would say that understanding the reciprocity algorithms is the first step to understanding `to_undirected`. Authors: - Erik Welch (https://github.com/eriknw) - Rick Ratzel (https://github.com/rlratzel) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4063 --- python/nx-cugraph/_nx_cugraph/__init__.py | 2 + python/nx-cugraph/lint.yaml | 14 +-- .../nx_cugraph/algorithms/__init__.py | 3 +- .../nx_cugraph/algorithms/reciprocity.py | 93 ++++++++++++++ .../nx-cugraph/nx_cugraph/classes/digraph.py | 114 +++++++++++++++++- python/nx-cugraph/nx_cugraph/classes/graph.py | 7 +- .../nx_cugraph/classes/multidigraph.py | 10 +- .../nx_cugraph/classes/multigraph.py | 4 +- .../nx_cugraph/tests/test_generators.py | 27 +---- .../nx_cugraph/tests/test_graph_methods.py | 67 ++++++++++ .../nx_cugraph/tests/testing_utils.py | 38 ++++++ 11 files changed, 342 insertions(+), 37 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/reciprocity.py create mode 100644 python/nx-cugraph/nx_cugraph/tests/test_graph_methods.py create mode 100644 python/nx-cugraph/nx_cugraph/tests/testing_utils.py diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index d9b997411ae..69320e6b55c 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -91,10 +91,12 @@ "number_weakly_connected_components", "octahedral_graph", "out_degree_centrality", + "overall_reciprocity", "pagerank", "pappus_graph", "path_graph", "petersen_graph", + "reciprocity", "sedgewick_maze_graph", "single_source_shortest_path_length", "single_target_shortest_path_length", diff --git a/python/nx-cugraph/lint.yaml b/python/nx-cugraph/lint.yaml index de6f20bc439..0d4f0b59413 100644 --- a/python/nx-cugraph/lint.yaml +++ b/python/nx-cugraph/lint.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # https://pre-commit.com/ # @@ -36,7 +36,7 @@ repos: - id: autoflake args: [--in-place] - repo: https://github.com/pycqa/isort - rev: 5.12.0 + rev: 5.13.2 hooks: - id: isort - repo: https://github.com/asottile/pyupgrade @@ -45,23 +45,23 @@ repos: - id: pyupgrade args: [--py39-plus] - repo: https://github.com/psf/black - rev: 23.11.0 + rev: 23.12.1 hooks: - id: black # - id: black-jupyter - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.7 + rev: v0.1.13 hooks: - id: ruff args: [--fix-only, --show-fixes] # --unsafe-fixes] - repo: https://github.com/PyCQA/flake8 - rev: 6.1.0 + rev: 7.0.0 hooks: - id: flake8 args: ['--per-file-ignores=_nx_cugraph/__init__.py:E501', '--extend-ignore=SIM105'] # Why is this necessary? additional_dependencies: &flake8_dependencies # These versions need updated manually - - flake8==6.1.0 + - flake8==7.0.0 - flake8-bugbear==23.12.2 - flake8-simplify==0.21.0 - repo: https://github.com/asottile/yesqa @@ -77,7 +77,7 @@ repos: additional_dependencies: [tomli] files: ^(nx_cugraph|docs)/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.7 + rev: v0.1.13 hooks: - id: ruff - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py index d28a629fe63..65700838f41 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -26,5 +26,6 @@ from .dag import * from .isolate import * from .link_analysis import * +from .reciprocity import * from .shortest_paths import * from .traversal import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/reciprocity.py b/python/nx-cugraph/nx_cugraph/algorithms/reciprocity.py new file mode 100644 index 00000000000..c87abdf9fa7 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/reciprocity.py @@ -0,0 +1,93 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cupy as cp +import networkx as nx +import numpy as np + +from nx_cugraph.convert import _to_directed_graph +from nx_cugraph.utils import networkx_algorithm, not_implemented_for + +__all__ = ["reciprocity", "overall_reciprocity"] + + +@not_implemented_for("undirected", "multigraph") +@networkx_algorithm(version_added="24.02") +def reciprocity(G, nodes=None): + if nodes is None: + return overall_reciprocity(G) + G = _to_directed_graph(G) + N = G._N + # 'nodes' can also be a single node identifier + if nodes in G: + index = nodes if G.key_to_id is None else G.key_to_id[nodes] + mask = (G.src_indices == index) | (G.dst_indices == index) + src_indices = G.src_indices[mask] + if src_indices.size == 0: + raise nx.NetworkXError("Not defined for isolated nodes.") + dst_indices = G.dst_indices[mask] + # Create two lists of edge identifiers, one for each direction. + # Edge identifiers can be created from a pair of node + # identifiers. Simply adding src IDs to dst IDs is not adequate, so + # make one set of values (either src or dst depending on direction) + # unique by multiplying values by N. + # Upcast to int64 so indices don't overflow. + edges_a_b = N * src_indices.astype(np.int64) + dst_indices + edges_b_a = src_indices + N * dst_indices.astype(np.int64) + # Find the matching edge identifiers in each list. The edge identifier + # generation ensures the ID for A->B == the ID for B->A + recip_indices = cp.intersect1d( + edges_a_b, + edges_b_a, + # assume_unique=True, # cupy <= 12.2.0 also assumes sorted + ) + num_selfloops = (src_indices == dst_indices).sum().tolist() + return (recip_indices.size - num_selfloops) / edges_a_b.size + + # Don't include self-loops + mask = G.src_indices != G.dst_indices + src_indices = G.src_indices[mask] + dst_indices = G.dst_indices[mask] + # Create two lists of edges, one for each direction, and find the matching + # IDs in each list (see description above). + edges_a_b = N * src_indices.astype(np.int64) + dst_indices + edges_b_a = src_indices + N * dst_indices.astype(np.int64) + recip_indices = cp.intersect1d( + edges_a_b, + edges_b_a, + # assume_unique=True, # cupy <= 12.2.0 also assumes sorted + ) + numer = cp.bincount(recip_indices // N, minlength=N) + denom = cp.bincount(src_indices, minlength=N) + denom += cp.bincount(dst_indices, minlength=N) + recip = 2 * numer / denom + node_ids = G._nodekeys_to_nodearray(nodes) + return G._nodearrays_to_dict(node_ids, recip[node_ids]) + + +@not_implemented_for("undirected", "multigraph") +@networkx_algorithm(version_added="24.02") +def overall_reciprocity(G): + G = _to_directed_graph(G) + if G.number_of_edges() == 0: + raise nx.NetworkXError("Not defined for empty graphs") + # Create two lists of edges, one for each direction, and find the matching + # IDs in each list (see description in reciprocity()). + edges_a_b = G._N * G.src_indices.astype(np.int64) + G.dst_indices + edges_b_a = G.src_indices + G._N * G.dst_indices.astype(np.int64) + recip_indices = cp.intersect1d( + edges_a_b, + edges_b_a, + # assume_unique=True, # cupy <= 12.2.0 also assumes sorted + ) + num_selfloops = (G.src_indices == G.dst_indices).sum().tolist() + return (recip_indices.size - num_selfloops) / edges_a_b.size diff --git a/python/nx-cugraph/nx_cugraph/classes/digraph.py b/python/nx-cugraph/nx_cugraph/classes/digraph.py index 52ea2334c85..3392f336201 100644 --- a/python/nx-cugraph/nx_cugraph/classes/digraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/digraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,13 +12,16 @@ # limitations under the License. from __future__ import annotations +from copy import deepcopy from typing import TYPE_CHECKING import cupy as cp import networkx as nx +import numpy as np import nx_cugraph as nxcg +from ..utils import index_dtype from .graph import Graph if TYPE_CHECKING: # pragma: no cover @@ -59,6 +62,115 @@ def number_of_edges( def reverse(self, copy: bool = True) -> DiGraph: return self._copy(not copy, self.__class__, reverse=True) + @networkx_api + def to_undirected(self, reciprocal=False, as_view=False): + N = self._N + # Upcast to int64 so indices don't overflow + src_dst_indices_old = N * self.src_indices.astype(np.int64) + self.dst_indices + if reciprocal: + src_dst_indices_new = cp.intersect1d( + src_dst_indices_old, + self.src_indices + N * self.dst_indices.astype(np.int64), + # assume_unique=True, # cupy <= 12.2.0 also assumes sorted + ) + if self.edge_values: + sorter = cp.argsort(src_dst_indices_old) + idx = cp.searchsorted( + src_dst_indices_old, src_dst_indices_new, sorter=sorter + ) + indices = sorter[idx] + src_indices = self.src_indices[indices].copy() + dst_indices = self.dst_indices[indices].copy() + edge_values = { + key: val[indices].copy() for key, val in self.edge_values.items() + } + edge_masks = { + key: val[indices].copy() for key, val in self.edge_masks.items() + } + else: + src_indices, dst_indices = cp.divmod( + src_dst_indices_new, N, dtype=index_dtype + ) + else: + src_dst_indices_old_T = self.src_indices + N * self.dst_indices.astype( + np.int64 + ) + if self.edge_values: + src_dst_extra = cp.setdiff1d( + src_dst_indices_old_T, src_dst_indices_old, assume_unique=True + ) + sorter = cp.argsort(src_dst_indices_old_T) + idx = cp.searchsorted( + src_dst_indices_old_T, src_dst_extra, sorter=sorter + ) + indices = sorter[idx] + src_indices = cp.hstack((self.src_indices, self.dst_indices[indices])) + dst_indices = cp.hstack((self.dst_indices, self.src_indices[indices])) + edge_values = { + key: cp.hstack((val, val[indices])) + for key, val in self.edge_values.items() + } + edge_masks = { + key: cp.hstack((val, val[indices])) + for key, val in self.edge_masks.items() + } + else: + src_dst_indices_new = cp.union1d( + src_dst_indices_old, src_dst_indices_old_T + ) + src_indices, dst_indices = cp.divmod( + src_dst_indices_new, N, dtype=index_dtype + ) + + if self.edge_values: + recip_indices = cp.lexsort(cp.vstack((src_indices, dst_indices))) + for key, mask in edge_masks.items(): + # Make sure we choose a value that isn't masked out + val = edge_values[key] + rmask = mask[recip_indices] + recip_only = rmask & ~mask + val[recip_only] = val[recip_indices[recip_only]] + only = mask & ~rmask + val[recip_indices[only]] = val[only] + mask |= mask[recip_indices] + # Arbitrarily choose to use value from (j > i) edge + mask = src_indices < dst_indices + left_idx = cp.nonzero(mask)[0] + right_idx = recip_indices[mask] + for val in edge_values.values(): + val[left_idx] = val[right_idx] + else: + edge_values = {} + edge_masks = {} + + node_values = self.node_values + node_masks = self.node_masks + key_to_id = self.key_to_id + id_to_key = None if key_to_id is None else self._id_to_key + if not as_view: + node_values = {key: val.copy() for key, val in node_values.items()} + node_masks = {key: val.copy() for key, val in node_masks.items()} + if key_to_id is not None: + key_to_id = key_to_id.copy() + if id_to_key is not None: + id_to_key = id_to_key.copy() + rv = self.to_undirected_class().from_coo( + N, + src_indices, + dst_indices, + edge_values, + edge_masks, + node_values, + node_masks, + key_to_id=key_to_id, + id_to_key=id_to_key, + ) + if as_view: + rv.graph = self.graph + else: + rv.graph.update(deepcopy(self.graph)) + return rv + # Many more methods to implement... ################### diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index 251e92b70ec..45f81ad1117 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -531,7 +531,7 @@ def to_directed(self, as_view: bool = False) -> nxcg.DiGraph: @networkx_api def to_undirected(self, as_view: bool = False) -> Graph: # Does deep copy in networkx - return self.copy(as_view) + return self._copy(as_view, self.to_undirected_class()) # Not implemented... # adj, adjacency, add_edge, add_edges_from, add_node, @@ -742,6 +742,11 @@ def _degrees_array(self): _out_degrees_array = _degrees_array # Data conversions + def _nodekeys_to_nodearray(self, nodes: Iterable[NodeKey]) -> cp.array[IndexValue]: + if self.key_to_id is None: + return cp.fromiter(nodes, dtype=index_dtype) + return cp.fromiter(map(self.key_to_id.__getitem__, nodes), dtype=index_dtype) + def _nodeiter_to_iter(self, node_ids: Iterable[IndexValue]) -> Iterable[NodeKey]: """Convert an iterable of node IDs to an iterable of node keys.""" if (id_to_key := self.id_to_key) is not None: diff --git a/python/nx-cugraph/nx_cugraph/classes/multidigraph.py b/python/nx-cugraph/nx_cugraph/classes/multidigraph.py index 2c7bfc00752..2e7a55a9eb1 100644 --- a/python/nx-cugraph/nx_cugraph/classes/multidigraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/multidigraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -33,3 +33,11 @@ def is_directed(cls) -> bool: @classmethod def to_networkx_class(cls) -> type[nx.MultiDiGraph]: return nx.MultiDiGraph + + ########################## + # NetworkX graph methods # + ########################## + + @networkx_api + def to_undirected(self, reciprocal=False, as_view=False): + raise NotImplementedError diff --git a/python/nx-cugraph/nx_cugraph/classes/multigraph.py b/python/nx-cugraph/nx_cugraph/classes/multigraph.py index 23466dc7dd4..fb787369e58 100644 --- a/python/nx-cugraph/nx_cugraph/classes/multigraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/multigraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -399,7 +399,7 @@ def to_directed(self, as_view: bool = False) -> nxcg.MultiDiGraph: @networkx_api def to_undirected(self, as_view: bool = False) -> MultiGraph: # Does deep copy in networkx - return self.copy(as_view) + return self._copy(as_view, self.to_undirected_class()) ################### # Private methods # diff --git a/python/nx-cugraph/nx_cugraph/tests/test_generators.py b/python/nx-cugraph/nx_cugraph/tests/test_generators.py index 511f8dcd8e2..c751b0fe2b3 100644 --- a/python/nx-cugraph/nx_cugraph/tests/test_generators.py +++ b/python/nx-cugraph/nx_cugraph/tests/test_generators.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,30 +17,9 @@ import nx_cugraph as nxcg -nxver = parse(nx.__version__) - +from .testing_utils import assert_graphs_equal -def assert_graphs_equal(Gnx, Gcg): - assert isinstance(Gnx, nx.Graph) - assert isinstance(Gcg, nxcg.Graph) - assert Gnx.number_of_nodes() == Gcg.number_of_nodes() - assert Gnx.number_of_edges() == Gcg.number_of_edges() - assert Gnx.is_directed() == Gcg.is_directed() - assert Gnx.is_multigraph() == Gcg.is_multigraph() - G = nxcg.to_networkx(Gcg) - rv = nx.utils.graphs_equal(G, Gnx) - if not rv: - print("GRAPHS ARE NOT EQUAL!") - assert sorted(G) == sorted(Gnx) - assert sorted(G._adj) == sorted(Gnx._adj) - assert sorted(G._node) == sorted(Gnx._node) - for k in sorted(G._adj): - print(k, sorted(G._adj[k]), sorted(Gnx._adj[k])) - print(nx.to_scipy_sparse_array(G).todense()) - print(nx.to_scipy_sparse_array(Gnx).todense()) - print(G.graph) - print(Gnx.graph) - assert rv +nxver = parse(nx.__version__) if nxver.major == 3 and nxver.minor < 2: diff --git a/python/nx-cugraph/nx_cugraph/tests/test_graph_methods.py b/python/nx-cugraph/nx_cugraph/tests/test_graph_methods.py new file mode 100644 index 00000000000..3120995a2b2 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_graph_methods.py @@ -0,0 +1,67 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx +import pytest + +import nx_cugraph as nxcg + +from .testing_utils import assert_graphs_equal + + +def _create_Gs(): + rv = [] + rv.append(nx.DiGraph()) + G = nx.DiGraph() + G.add_edge(0, 1) + G.add_edge(1, 0) + rv.append(G) + G = G.copy() + G.add_edge(0, 2) + rv.append(G) + G = G.copy() + G.add_edge(1, 1) + rv.append(G) + G = nx.DiGraph() + G.add_edge(0, 1, x=1, y=2) + G.add_edge(1, 0, x=10, z=3) + rv.append(G) + G = G.copy() + G.add_edge(0, 2, a=42) + rv.append(G) + G = G.copy() + G.add_edge(1, 1, a=4) + rv.append(G) + return rv + + +@pytest.mark.parametrize("Gnx", _create_Gs()) +@pytest.mark.parametrize("reciprocal", [False, True]) +def test_to_undirected_directed(Gnx, reciprocal): + Gcg = nxcg.DiGraph(Gnx) + assert_graphs_equal(Gnx, Gcg) + Hnx1 = Gnx.to_undirected(reciprocal=reciprocal) + Hcg1 = Gcg.to_undirected(reciprocal=reciprocal) + assert_graphs_equal(Hnx1, Hcg1) + Hnx2 = Hnx1.to_directed() + Hcg2 = Hcg1.to_directed() + assert_graphs_equal(Hnx2, Hcg2) + + +def test_multidigraph_to_undirected(): + Gnx = nx.MultiDiGraph() + Gnx.add_edge(0, 1) + Gnx.add_edge(0, 1) + Gnx.add_edge(1, 0) + Gcg = nxcg.MultiDiGraph(Gnx) + with pytest.raises(NotImplementedError): + Gcg.to_undirected() diff --git a/python/nx-cugraph/nx_cugraph/tests/testing_utils.py b/python/nx-cugraph/nx_cugraph/tests/testing_utils.py new file mode 100644 index 00000000000..6d4741c9ca6 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/testing_utils.py @@ -0,0 +1,38 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx + +import nx_cugraph as nxcg + + +def assert_graphs_equal(Gnx, Gcg): + assert isinstance(Gnx, nx.Graph) + assert isinstance(Gcg, nxcg.Graph) + assert Gnx.number_of_nodes() == Gcg.number_of_nodes() + assert Gnx.number_of_edges() == Gcg.number_of_edges() + assert Gnx.is_directed() == Gcg.is_directed() + assert Gnx.is_multigraph() == Gcg.is_multigraph() + G = nxcg.to_networkx(Gcg) + rv = nx.utils.graphs_equal(G, Gnx) + if not rv: + print("GRAPHS ARE NOT EQUAL!") + assert sorted(G) == sorted(Gnx) + assert sorted(G._adj) == sorted(Gnx._adj) + assert sorted(G._node) == sorted(Gnx._node) + for k in sorted(G._adj): + print(k, sorted(G._adj[k]), sorted(Gnx._adj[k])) + print(nx.to_scipy_sparse_array(G).todense()) + print(nx.to_scipy_sparse_array(Gnx).todense()) + print(G.graph) + print(Gnx.graph) + assert rv