Skip to content

Commit

Permalink
nx-cugraph: handle louvain with isolated nodes (#3897)
Browse files Browse the repository at this point in the history
This handles isolated nodes in `louvain_communities` similar to what is done in #3886. This is expected to be a temporary fix until pylibcugraph can handle isolated nodes.

As a bonus, I added `isolates` algorithm 🎉

CC @naimnv @rlratzel

Authors:
  - Erik Welch (https://github.com/eriknw)

Approvers:
  - Rick Ratzel (https://github.com/rlratzel)

URL: #3897
  • Loading branch information
eriknw authored Oct 3, 2023
1 parent a863835 commit 5ce3ee1
Show file tree
Hide file tree
Showing 9 changed files with 151 additions and 13 deletions.
7 changes: 5 additions & 2 deletions python/nx-cugraph/_nx_cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,23 @@
# BEGIN: functions
"betweenness_centrality",
"edge_betweenness_centrality",
"is_isolate",
"isolates",
"louvain_communities",
"number_of_isolates",
# END: functions
},
"extra_docstrings": {
# BEGIN: extra_docstrings
"betweenness_centrality": "`weight` parameter is not yet supported.",
"edge_betweenness_centrality": "`weight` parameter is not yet supported.",
"louvain_communities": "`threshold` and `seed` parameters are currently ignored.",
"louvain_communities": "`seed` parameter is currently ignored.",
# END: extra_docstrings
},
"extra_parameters": {
# BEGIN: extra_parameters
"louvain_communities": {
"max_level : int, optional": "Upper limit of the number of macro-iterations.",
"max_level : int, optional": "Upper limit of the number of macro-iterations (max: 500).",
},
# END: extra_parameters
},
Expand Down
2 changes: 1 addition & 1 deletion python/nx-cugraph/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ repos:
# These versions need updated manually
- flake8==6.1.0
- flake8-bugbear==23.9.16
- flake8-simplify==0.20.0
- flake8-simplify==0.21.0
- repo: https://github.com/asottile/yesqa
rev: v1.5.0
hooks:
Expand Down
1 change: 1 addition & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
# limitations under the License.
from . import centrality, community
from .centrality import *
from .isolate import *
28 changes: 23 additions & 5 deletions python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import warnings

import pylibcugraph as plc

Expand All @@ -22,27 +22,38 @@
not_implemented_for,
)

from ..isolate import _isolates

__all__ = ["louvain_communities"]


@not_implemented_for("directed")
@networkx_algorithm(
extra_params={
"max_level : int, optional": "Upper limit of the number of macro-iterations."
"max_level : int, optional": (
"Upper limit of the number of macro-iterations (max: 500)."
)
}
)
def louvain_communities(
G, weight="weight", resolution=1, threshold=0.0000001, seed=None, *, max_level=None
):
"""`threshold` and `seed` parameters are currently ignored."""
"""`seed` parameter is currently ignored."""
# NetworkX allows both directed and undirected, but cugraph only allows undirected.
seed = _seed_to_int(seed) # Unused, but ensure it's valid for future compatibility
G = _to_undirected_graph(G, weight)
if G.row_indices.size == 0:
# TODO: PLC doesn't handle empty graphs gracefully!
return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
if max_level is None:
max_level = sys.maxsize
max_level = 500
elif max_level > 500:
warnings.warn(
f"max_level is set too high (={max_level}), setting it to 500.",
UserWarning,
stacklevel=2,
)
max_level = 500
vertices, clusters, modularity = plc.louvain(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
Expand All @@ -52,7 +63,14 @@ def louvain_communities(
do_expensive_check=False,
)
groups = _groupby(clusters, vertices)
return [set(G._nodearray_to_list(node_ids)) for node_ids in groups.values()]
rv = [set(G._nodearray_to_list(node_ids)) for node_ids in groups.values()]
# TODO: PLC doesn't handle isolated vertices yet, so this is a temporary fix
isolates = _isolates(G)
if isolates.size > 0:
isolates = isolates[isolates > vertices.max()]
if isolates.size > 0:
rv.extend({node} for node in G._nodearray_to_list(isolates))
return rv


@louvain_communities._can_run
Expand Down
63 changes: 63 additions & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/isolate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from typing import TYPE_CHECKING

import cupy as cp

from nx_cugraph.convert import _to_graph
from nx_cugraph.utils import networkx_algorithm

if TYPE_CHECKING: # pragma: no cover
from nx_cugraph.typing import IndexValue

__all__ = ["is_isolate", "isolates", "number_of_isolates"]


@networkx_algorithm
def is_isolate(G, n):
G = _to_graph(G)
index = n if G.key_to_id is None else G.key_to_id[n]
return not (
(G.row_indices == index).any().tolist()
or G.is_directed()
and (G.col_indices == index).any().tolist()
)


def _mark_isolates(G) -> cp.ndarray[bool]:
"""Return a boolean mask array indicating indices of isolated nodes."""
mark_isolates = cp.ones(len(G), bool)
mark_isolates[G.row_indices] = False
if G.is_directed():
mark_isolates[G.col_indices] = False
return mark_isolates


def _isolates(G) -> cp.ndarray[IndexValue]:
"""Like isolates, but return an array of indices instead of an iterator of nodes."""
G = _to_graph(G)
return cp.nonzero(_mark_isolates(G))[0]


@networkx_algorithm
def isolates(G):
G = _to_graph(G)
return G._nodeiter_to_iter(iter(_isolates(G).tolist()))


@networkx_algorithm
def number_of_isolates(G):
G = _to_graph(G)
return _mark_isolates(G).sum().tolist()
2 changes: 1 addition & 1 deletion python/nx-cugraph/nx_cugraph/classes/digraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from .graph import Graph

if TYPE_CHECKING:
if TYPE_CHECKING: # pragma: no cover
from nx_cugraph.typing import NodeKey

__all__ = ["DiGraph"]
Expand Down
6 changes: 3 additions & 3 deletions python/nx-cugraph/nx_cugraph/classes/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

import nx_cugraph as nxcg

if TYPE_CHECKING:
if TYPE_CHECKING: # pragma: no cover
from collections.abc import Iterable, Iterator

from nx_cugraph.typing import (
Expand Down Expand Up @@ -245,9 +245,9 @@ def from_dcsc(
def __new__(cls, incoming_graph_data=None, **attr) -> Graph:
if incoming_graph_data is None:
new_graph = cls.from_coo(0, cp.empty(0, np.int32), cp.empty(0, np.int32))
elif incoming_graph_data.__class__ is new_graph.__class__:
elif incoming_graph_data.__class__ is cls:
new_graph = incoming_graph_data.copy()
elif incoming_graph_data.__class__ is new_graph.to_networkx_class():
elif incoming_graph_data.__class__ is cls.to_networkx_class():
new_graph = nxcg.from_networkx(incoming_graph_data, preserve_all_attrs=True)
else:
raise NotImplementedError
Expand Down
2 changes: 1 addition & 1 deletion python/nx-cugraph/nx_cugraph/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

import nx_cugraph as nxcg

if TYPE_CHECKING:
if TYPE_CHECKING: # pragma: no cover
from nx_cugraph.typing import AttrKey, Dtype, EdgeValue, NodeValue

__all__ = [
Expand Down
53 changes: 53 additions & 0 deletions python/nx-cugraph/nx_cugraph/tests/test_community.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import networkx as nx
import pytest

import nx_cugraph as nxcg


def test_louvain_isolated_nodes():
is_nx_30_or_31 = hasattr(nx.classes, "backends")

def check(left, right):
assert len(left) == len(right)
assert set(map(frozenset, left)) == set(map(frozenset, right))

# Empty graph (no nodes)
G = nx.Graph()
if is_nx_30_or_31:
with pytest.raises(ZeroDivisionError):
nx.community.louvain_communities(G)
else:
nx_result = nx.community.louvain_communities(G)
cg_result = nxcg.community.louvain_communities(G)
check(nx_result, cg_result)
# Graph with no edges
G.add_nodes_from(range(5))
if is_nx_30_or_31:
with pytest.raises(ZeroDivisionError):
nx.community.louvain_communities(G)
else:
nx_result = nx.community.louvain_communities(G)
cg_result = nxcg.community.louvain_communities(G)
check(nx_result, cg_result)
# Graph with isolated nodes
G.add_edge(1, 2)
nx_result = nx.community.louvain_communities(G)
cg_result = nxcg.community.louvain_communities(G)
check(nx_result, cg_result)
# Another one
G.add_edge(4, 4)
nx_result = nx.community.louvain_communities(G)
cg_result = nxcg.community.louvain_communities(G)
check(nx_result, cg_result)

0 comments on commit 5ce3ee1

Please sign in to comment.