Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing Nx and Graph/DiGraph issues #1882

Merged
merged 13 commits into from
Nov 1, 2021
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
df_score_to_dictionary,
ensure_cugraph_obj_for_nx,
)
import cugraph


# NOTE: result_type=float could be an intuitive way to indicate the result type
Expand Down Expand Up @@ -262,7 +261,7 @@ def edge_betweenness_centrality(
df = G.unrenumber(df, "src")
df = G.unrenumber(df, "dst")

if type(G) is cugraph.Graph:
if G.is_directed() is False:
# select the lower triangle of the df based on src/dst vertex value
lower_triangle = df['src'] >= df['dst']
# swap the src and dst vertices for the lower triangle only. Because
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def __from_edgelist(
raise Exception(
"edge_attr column name not found in input."
"Recheck the edge_attr parameter")
self.weighted = True
self.properties.weighted = True
ddf_columns = ddf_columns + [edge_attr]
input_ddf = input_ddf[ddf_columns]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def __from_edgelist(
dest_col = elist[destination]

if edge_attr is not None:
self.weighted = True
self.properties.weighted = True
value_col = elist[edge_attr]
else:
value_col = None
Expand Down
17 changes: 9 additions & 8 deletions python/cugraph/cugraph/tests/test_bfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@
from cugraph.tests import utils
import random

import pandas as pd
import cupy as cp
from cupyx.scipy.sparse.coo import coo_matrix as cp_coo_matrix
from cupyx.scipy.sparse.csr import csr_matrix as cp_csr_matrix
from cupyx.scipy.sparse.csc import csc_matrix as cp_csc_matrix
from scipy.sparse.coo import coo_matrix as sp_coo_matrix
from scipy.sparse.csr import csr_matrix as sp_csr_matrix
from scipy.sparse.csc import csc_matrix as sp_csc_matrix

# Temporarily suppress warnings till networkX fixes deprecation warnings
# (Using or importing the ABCs from 'collections' instead of from
# 'collections.abc' is deprecated, and in 3.8 it will stop working) for
Expand All @@ -33,14 +42,6 @@
import networkx as nx
import networkx.algorithms.centrality.betweenness as nxacb

import pandas as pd
import cupy as cp
from cupyx.scipy.sparse.coo import coo_matrix as cp_coo_matrix
from cupyx.scipy.sparse.csr import csr_matrix as cp_csr_matrix
from cupyx.scipy.sparse.csc import csc_matrix as cp_csc_matrix
from scipy.sparse.coo import coo_matrix as sp_coo_matrix
from scipy.sparse.csr import csr_matrix as sp_csr_matrix
from scipy.sparse.csc import csc_matrix as sp_csc_matrix

# =============================================================================
# Parameters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,7 @@ def test_edge_betweenness_centrality_nx(
prepare_test()

Gnx = utils.generate_nx_graph_from_file(graph_file, directed, edgevals)
assert nx.is_directed(Gnx) == directed

nx_bc = nx.edge_betweenness_centrality(Gnx)
cu_bc = cugraph.edge_betweenness_centrality(Gnx)
Expand All @@ -504,7 +505,8 @@ def test_edge_betweenness_centrality_nx(
networkx_bc = sorted(nx_bc.items(), key=lambda x: x[0])
cugraph_bc = sorted(cu_bc.items(), key=lambda x: x[0])
err = 0
assert len(cugraph_bc) == len(networkx_bc)

assert len(networkx_bc) == len(cugraph_bc)
for i in range(len(cugraph_bc)):
if (
abs(cugraph_bc[i][1] - networkx_bc[i][1]) > 0.01
Expand Down
38 changes: 32 additions & 6 deletions python/cugraph/cugraph/tests/test_modularity.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
import cudf
import cugraph
from cugraph.tests import utils
from cugraph.utilities import ensure_cugraph_obj_for_nx

import networkx as nx


def cugraph_call(G, partitions):
Expand Down Expand Up @@ -51,7 +54,6 @@ def random_call(G, partitions):
PARTITIONS = [2, 4, 8]


# Test all combinations of default/managed and pooled/non-pooled allocation
@pytest.mark.parametrize("graph_file", utils.DATASETS)
@pytest.mark.parametrize("partitions", PARTITIONS)
def test_modularity_clustering(graph_file, partitions):
Expand All @@ -71,12 +73,38 @@ def test_modularity_clustering(graph_file, partitions):
assert cu_score > rand_score


# Test all combinations of default/managed and pooled/non-pooled allocation
@pytest.mark.parametrize("graph_file", utils.DATASETS)
@pytest.mark.parametrize("partitions", PARTITIONS)
def test_modularity_clustering_multi_column(graph_file, partitions):
gc.collect()
def test_modularity_clustering_nx(graph_file, partitions):
# Read in the graph and get a cugraph object
csv_data = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)

nxG = nx.from_pandas_edgelist(
csv_data,
source="0",
target="1",
edge_attr="weight",
create_using=nx.DiGraph(),
)
assert nx.is_directed(nxG) is True
assert nx.is_weighted(nxG) is True

cuG, isNx = ensure_cugraph_obj_for_nx(nxG)
assert cugraph.is_directed(cuG) is True
assert cugraph.is_weighted(cuG) is True

# Get the modularity score for partitioning versus random assignment
cu_score = cugraph_call(cuG, partitions)
rand_score = random_call(cuG, partitions)

# Assert that the partitioning has better modularity than the random
# assignment
assert cu_score > rand_score


@pytest.mark.parametrize("graph_file", utils.DATASETS)
@pytest.mark.parametrize("partitions", PARTITIONS)
def test_modularity_clustering_multi_column(graph_file, partitions):
# Read in the graph and get a cugraph object
cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False)
cu_M.rename(columns={'0': 'src_0', '1': 'dst_0'}, inplace=True)
Expand Down Expand Up @@ -113,8 +141,6 @@ def test_modularity_clustering_multi_column(graph_file, partitions):


def test_digraph_rejected():
gc.collect()

df = cudf.DataFrame()
df["src"] = cudf.Series(range(10))
df["dst"] = cudf.Series(range(10))
Expand Down
66 changes: 56 additions & 10 deletions python/cugraph/cugraph/tests/test_nx_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import gc

import pandas as pd
import pytest
import cudf

Expand All @@ -39,13 +38,33 @@ def _compare_graphs(nxG, cuG, has_wt=True):
cu_df = cuG.view_edge_list().to_pandas()
BradReesWork marked this conversation as resolved.
Show resolved Hide resolved
if has_wt is True:
cu_df = cu_df.drop(columns=["weights"])

out_of_order = cu_df[cu_df['src'] > cu_df['dst']]
if len(out_of_order) > 0:
out_of_order = out_of_order.rename(
columns={"src": "dst", "dst": "src"})
right_order = cu_df[cu_df['src'] < cu_df['dst']]
cu_df = pd.concat([out_of_order, right_order])
del out_of_order
del right_order
cu_df = cu_df.sort_values(by=["src", "dst"]).reset_index(drop=True)

nx_df = nx.to_pandas_edgelist(nxG)
if has_wt is True:
nx_df = nx_df.drop(columns=["weight"])
nx_df = nx_df.rename(columns={"source": "src", "target": "dst"})
nx_df = nx_df.astype('int32')

out_of_order = nx_df[nx_df['src'] > nx_df['dst']]
BradReesWork marked this conversation as resolved.
Show resolved Hide resolved
if len(out_of_order) > 0:
out_of_order = out_of_order.rename(
columns={"src": "dst", "dst": "src"})
right_order = nx_df[nx_df['src'] < nx_df['dst']]

nx_df = pd.concat([out_of_order, right_order])
del out_of_order
del right_order

nx_df = nx_df.sort_values(by=["src", "dst"]).reset_index(drop=True)

assert cu_df.to_dict() == nx_df.to_dict()
Expand All @@ -54,9 +73,6 @@ def _compare_graphs(nxG, cuG, has_wt=True):
@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_networkx_compatibility(graph_file):
# test to make sure cuGraph and Nx build similar Graphs

gc.collect()

# Read in the graph
M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)

Expand All @@ -81,22 +97,52 @@ def test_networkx_compatibility(graph_file):


@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_nx_convert(graph_file):
gc.collect()

def test_nx_convert_undirected(graph_file):
# read data and create a Nx Graph
nx_df = utils.read_csv_for_nx(graph_file)
nxG = nx.from_pandas_edgelist(nx_df, "0", "1", create_using=nx.Graph)
assert nx.is_directed(nxG) is False
assert nx.is_weighted(nxG) is False

cuG = cugraph.utilities.convert_from_nx(nxG)
assert cuG.is_directed() is False
assert cuG.is_weighted() is False

_compare_graphs(nxG, cuG, has_wt=False)


@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_nx_convert_directed(graph_file):
# read data and create a Nx DiGraph
nx_df = utils.read_csv_for_nx(graph_file)
nxG = nx.from_pandas_edgelist(nx_df, "0", "1", create_using=nx.DiGraph)
assert nxG.is_directed() is True

cuG = cugraph.utilities.convert_from_nx(nxG)
assert cuG.is_directed() is True
assert cuG.is_weighted() is False

_compare_graphs(nxG, cuG, has_wt=False)


@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_nx_convert_multicol(graph_file):
gc.collect()
def test_nx_convert_weighted(graph_file):
# read data and create a Nx DiGraph
nx_df = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
nxG = nx.from_pandas_edgelist(nx_df, "0", "1", "weight",
create_using=nx.DiGraph)
assert nx.is_directed(nxG) is True
assert nx.is_weighted(nxG) is True

cuG = cugraph.utilities.convert_from_nx(nxG)
assert cugraph.is_directed(cuG) is True
assert cugraph.is_weighted(cuG) is True

_compare_graphs(nxG, cuG, has_wt=True)


@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_nx_convert_multicol(graph_file):
# read data and create a Nx Graph
nx_df = utils.read_csv_for_nx(graph_file)

Expand Down
33 changes: 21 additions & 12 deletions python/cugraph/cugraph/utilities/nx_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,23 @@
nx = import_optional("networkx")


def convert_from_nx(nxG, weight=None):
def convert_from_nx(nxG, weight=None, do_renumber=True):
BradReesWork marked this conversation as resolved.
Show resolved Hide resolved
"""
weight, if given, is the string/name of the edge attr in nxG to use for
weights in the resulting cugraph obj. If nxG has no edge attributes,
weight is ignored even if specified.
"""
if type(nxG) == nx.classes.graph.Graph:
if isinstance(nxG, nx.classes.digraph.DiGraph):
G = cugraph.Graph(directed=True)
elif isinstance(nxG, nx.classes.graph.Graph):
G = cugraph.Graph()
elif type(nxG) == nx.classes.digraph.DiGraph:
G = cugraph.DiGraph()
else:
raise ValueError("nxG does not appear to be a NetworkX graph type")
raise ValueError(
"nxG does not appear to be a supported NetworkX graph type")

is_weighted = nx.is_weighted(nxG)
pdf = nx.to_pandas_edgelist(nxG)

# Convert vertex columns to strings if they are not integers
# This allows support for any vertex input type
if pdf["source"].dtype not in [np.int32, np.int64] or \
Expand All @@ -54,20 +57,26 @@ def convert_from_nx(nxG, weight=None):
if num_col < 2:
raise ValueError("NetworkX graph did not contain edges")

if weight is None:
num_col == 2
if num_col == 2:
pdf = pdf[["source", "target"]]

if num_col >= 3 and weight is not None:
pdf = pdf[["source", "target", weight]]
num_col = 3
if num_col >= 3:
if is_weighted is False:
pdf = pdf[["source", "target"]]
elif weight is None:
pdf = pdf[["source", "target", "weight"]]
weight = "weight"
BradReesWork marked this conversation as resolved.
Show resolved Hide resolved
else:
pdf = pdf[["source", "target", weight]]

gdf = from_pandas(pdf)

if num_col == 2:
G.from_cudf_edgelist(gdf, "source", "target")
G.from_cudf_edgelist(gdf, source="source", destination="target",
renumber=do_renumber)
else:
G.from_cudf_edgelist(gdf, "source", "target", weight)
G.from_cudf_edgelist(gdf, source="source", destination="target",
edge_attr=weight, renumber=do_renumber)

del gdf
del pdf
Expand Down