diff --git a/python/cugraph/cugraph/centrality/betweenness_centrality.py b/python/cugraph/cugraph/centrality/betweenness_centrality.py index d86349504da..458f282dc2b 100644 --- a/python/cugraph/cugraph/centrality/betweenness_centrality.py +++ b/python/cugraph/cugraph/centrality/betweenness_centrality.py @@ -20,7 +20,6 @@ df_score_to_dictionary, ensure_cugraph_obj_for_nx, ) -import cugraph # NOTE: result_type=float could be an intuitive way to indicate the result type @@ -262,7 +261,7 @@ def edge_betweenness_centrality( df = G.unrenumber(df, "src") df = G.unrenumber(df, "dst") - if type(G) is cugraph.Graph: + if G.is_directed() is False: # select the lower triangle of the df based on src/dst vertex value lower_triangle = df['src'] >= df['dst'] # swap the src and dst vertices for the lower triangle only. Because diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index 10d46cc3fed..6e80c94c9ef 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -88,7 +88,7 @@ def __from_edgelist( raise Exception( "edge_attr column name not found in input." "Recheck the edge_attr parameter") - self.weighted = True + self.properties.weighted = True ddf_columns = ddf_columns + [edge_attr] input_ddf = input_ddf[ddf_columns] diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index e74b04c00b5..56a24d6e114 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -152,7 +152,7 @@ def __from_edgelist( dest_col = elist[destination] if edge_attr is not None: - self.weighted = True + self.properties.weighted = True value_col = elist[edge_attr] else: value_col = None diff --git a/python/cugraph/cugraph/tests/test_bfs.py b/python/cugraph/cugraph/tests/test_bfs.py index a8547d692c2..a2f16500d47 100644 --- a/python/cugraph/cugraph/tests/test_bfs.py +++ b/python/cugraph/cugraph/tests/test_bfs.py @@ -21,6 +21,15 @@ from cugraph.tests import utils import random +import pandas as pd +import cupy as cp +from cupyx.scipy.sparse.coo import coo_matrix as cp_coo_matrix +from cupyx.scipy.sparse.csr import csr_matrix as cp_csr_matrix +from cupyx.scipy.sparse.csc import csc_matrix as cp_csc_matrix +from scipy.sparse.coo import coo_matrix as sp_coo_matrix +from scipy.sparse.csr import csr_matrix as sp_csr_matrix +from scipy.sparse.csc import csc_matrix as sp_csc_matrix + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from # 'collections.abc' is deprecated, and in 3.8 it will stop working) for @@ -33,14 +42,6 @@ import networkx as nx import networkx.algorithms.centrality.betweenness as nxacb -import pandas as pd -import cupy as cp -from cupyx.scipy.sparse.coo import coo_matrix as cp_coo_matrix -from cupyx.scipy.sparse.csr import csr_matrix as cp_csr_matrix -from cupyx.scipy.sparse.csc import csc_matrix as cp_csc_matrix -from scipy.sparse.coo import coo_matrix as sp_coo_matrix -from scipy.sparse.csr import csr_matrix as sp_csr_matrix -from scipy.sparse.csc import csc_matrix as sp_csc_matrix # ============================================================================= # Parameters diff --git a/python/cugraph/cugraph/tests/test_edge_betweenness_centrality.py b/python/cugraph/cugraph/tests/test_edge_betweenness_centrality.py index 6caad0d9fad..c70a17839aa 100644 --- a/python/cugraph/cugraph/tests/test_edge_betweenness_centrality.py +++ b/python/cugraph/cugraph/tests/test_edge_betweenness_centrality.py @@ -496,6 +496,7 @@ def test_edge_betweenness_centrality_nx( prepare_test() Gnx = utils.generate_nx_graph_from_file(graph_file, directed, edgevals) + assert nx.is_directed(Gnx) == directed nx_bc = nx.edge_betweenness_centrality(Gnx) cu_bc = cugraph.edge_betweenness_centrality(Gnx) @@ -504,7 +505,8 @@ def test_edge_betweenness_centrality_nx( networkx_bc = sorted(nx_bc.items(), key=lambda x: x[0]) cugraph_bc = sorted(cu_bc.items(), key=lambda x: x[0]) err = 0 - assert len(cugraph_bc) == len(networkx_bc) + + assert len(networkx_bc) == len(cugraph_bc) for i in range(len(cugraph_bc)): if ( abs(cugraph_bc[i][1] - networkx_bc[i][1]) > 0.01 diff --git a/python/cugraph/cugraph/tests/test_modularity.py b/python/cugraph/cugraph/tests/test_modularity.py index 21b8adae6e6..9bbff9fc83c 100644 --- a/python/cugraph/cugraph/tests/test_modularity.py +++ b/python/cugraph/cugraph/tests/test_modularity.py @@ -19,6 +19,9 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities import ensure_cugraph_obj_for_nx + +import networkx as nx def cugraph_call(G, partitions): @@ -51,7 +54,6 @@ def random_call(G, partitions): PARTITIONS = [2, 4, 8] -# Test all combinations of default/managed and pooled/non-pooled allocation @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_modularity_clustering(graph_file, partitions): @@ -71,12 +73,38 @@ def test_modularity_clustering(graph_file, partitions): assert cu_score > rand_score -# Test all combinations of default/managed and pooled/non-pooled allocation @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) -def test_modularity_clustering_multi_column(graph_file, partitions): - gc.collect() +def test_modularity_clustering_nx(graph_file, partitions): + # Read in the graph and get a cugraph object + csv_data = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + + nxG = nx.from_pandas_edgelist( + csv_data, + source="0", + target="1", + edge_attr="weight", + create_using=nx.DiGraph(), + ) + assert nx.is_directed(nxG) is True + assert nx.is_weighted(nxG) is True + + cuG, isNx = ensure_cugraph_obj_for_nx(nxG) + assert cugraph.is_directed(cuG) is True + assert cugraph.is_weighted(cuG) is True + + # Get the modularity score for partitioning versus random assignment + cu_score = cugraph_call(cuG, partitions) + rand_score = random_call(cuG, partitions) + + # Assert that the partitioning has better modularity than the random + # assignment + assert cu_score > rand_score + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("partitions", PARTITIONS) +def test_modularity_clustering_multi_column(graph_file, partitions): # Read in the graph and get a cugraph object cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) cu_M.rename(columns={'0': 'src_0', '1': 'dst_0'}, inplace=True) @@ -113,8 +141,6 @@ def test_modularity_clustering_multi_column(graph_file, partitions): def test_digraph_rejected(): - gc.collect() - df = cudf.DataFrame() df["src"] = cudf.Series(range(10)) df["dst"] = cudf.Series(range(10)) diff --git a/python/cugraph/cugraph/tests/test_nx_convert.py b/python/cugraph/cugraph/tests/test_nx_convert.py index 98cc8a11dc7..5bd32213864 100644 --- a/python/cugraph/cugraph/tests/test_nx_convert.py +++ b/python/cugraph/cugraph/tests/test_nx_convert.py @@ -11,8 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import gc - +import pandas as pd import pytest import cudf @@ -39,6 +38,15 @@ def _compare_graphs(nxG, cuG, has_wt=True): cu_df = cuG.view_edge_list().to_pandas() if has_wt is True: cu_df = cu_df.drop(columns=["weights"]) + + out_of_order = cu_df[cu_df['src'] > cu_df['dst']] + if len(out_of_order) > 0: + out_of_order = out_of_order.rename( + columns={"src": "dst", "dst": "src"}) + right_order = cu_df[cu_df['src'] < cu_df['dst']] + cu_df = pd.concat([out_of_order, right_order]) + del out_of_order + del right_order cu_df = cu_df.sort_values(by=["src", "dst"]).reset_index(drop=True) nx_df = nx.to_pandas_edgelist(nxG) @@ -46,6 +54,17 @@ def _compare_graphs(nxG, cuG, has_wt=True): nx_df = nx_df.drop(columns=["weight"]) nx_df = nx_df.rename(columns={"source": "src", "target": "dst"}) nx_df = nx_df.astype('int32') + + out_of_order = nx_df[nx_df['src'] > nx_df['dst']] + if len(out_of_order) > 0: + out_of_order = out_of_order.rename( + columns={"src": "dst", "dst": "src"}) + right_order = nx_df[nx_df['src'] < nx_df['dst']] + + nx_df = pd.concat([out_of_order, right_order]) + del out_of_order + del right_order + nx_df = nx_df.sort_values(by=["src", "dst"]).reset_index(drop=True) assert cu_df.to_dict() == nx_df.to_dict() @@ -54,9 +73,6 @@ def _compare_graphs(nxG, cuG, has_wt=True): @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_networkx_compatibility(graph_file): # test to make sure cuGraph and Nx build similar Graphs - - gc.collect() - # Read in the graph M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) @@ -81,22 +97,52 @@ def test_networkx_compatibility(graph_file): @pytest.mark.parametrize("graph_file", utils.DATASETS) -def test_nx_convert(graph_file): - gc.collect() - +def test_nx_convert_undirected(graph_file): # read data and create a Nx Graph nx_df = utils.read_csv_for_nx(graph_file) + nxG = nx.from_pandas_edgelist(nx_df, "0", "1", create_using=nx.Graph) + assert nx.is_directed(nxG) is False + assert nx.is_weighted(nxG) is False + + cuG = cugraph.utilities.convert_from_nx(nxG) + assert cuG.is_directed() is False + assert cuG.is_weighted() is False + + _compare_graphs(nxG, cuG, has_wt=False) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_nx_convert_directed(graph_file): + # read data and create a Nx DiGraph + nx_df = utils.read_csv_for_nx(graph_file) nxG = nx.from_pandas_edgelist(nx_df, "0", "1", create_using=nx.DiGraph) + assert nxG.is_directed() is True cuG = cugraph.utilities.convert_from_nx(nxG) + assert cuG.is_directed() is True + assert cuG.is_weighted() is False _compare_graphs(nxG, cuG, has_wt=False) @pytest.mark.parametrize("graph_file", utils.DATASETS) -def test_nx_convert_multicol(graph_file): - gc.collect() +def test_nx_convert_weighted(graph_file): + # read data and create a Nx DiGraph + nx_df = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + nxG = nx.from_pandas_edgelist(nx_df, "0", "1", "weight", + create_using=nx.DiGraph) + assert nx.is_directed(nxG) is True + assert nx.is_weighted(nxG) is True + cuG = cugraph.utilities.convert_from_nx(nxG) + assert cugraph.is_directed(cuG) is True + assert cugraph.is_weighted(cuG) is True + + _compare_graphs(nxG, cuG, has_wt=True) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_nx_convert_multicol(graph_file): # read data and create a Nx Graph nx_df = utils.read_csv_for_nx(graph_file) diff --git a/python/cugraph/cugraph/utilities/nx_factory.py b/python/cugraph/cugraph/utilities/nx_factory.py index c5d8ee22f83..e2ee1f14ab5 100644 --- a/python/cugraph/cugraph/utilities/nx_factory.py +++ b/python/cugraph/cugraph/utilities/nx_factory.py @@ -28,20 +28,23 @@ nx = import_optional("networkx") -def convert_from_nx(nxG, weight=None): +def convert_from_nx(nxG, weight=None, do_renumber=True): """ weight, if given, is the string/name of the edge attr in nxG to use for weights in the resulting cugraph obj. If nxG has no edge attributes, weight is ignored even if specified. """ - if type(nxG) == nx.classes.graph.Graph: + if isinstance(nxG, nx.classes.digraph.DiGraph): + G = cugraph.Graph(directed=True) + elif isinstance(nxG, nx.classes.graph.Graph): G = cugraph.Graph() - elif type(nxG) == nx.classes.digraph.DiGraph: - G = cugraph.DiGraph() else: - raise ValueError("nxG does not appear to be a NetworkX graph type") + raise ValueError( + "nxG does not appear to be a supported NetworkX graph type") + is_weighted = nx.is_weighted(nxG) pdf = nx.to_pandas_edgelist(nxG) + # Convert vertex columns to strings if they are not integers # This allows support for any vertex input type if pdf["source"].dtype not in [np.int32, np.int64] or \ @@ -54,20 +57,26 @@ def convert_from_nx(nxG, weight=None): if num_col < 2: raise ValueError("NetworkX graph did not contain edges") - if weight is None: - num_col == 2 + if num_col == 2: pdf = pdf[["source", "target"]] - if num_col >= 3 and weight is not None: - pdf = pdf[["source", "target", weight]] - num_col = 3 + if num_col >= 3: + if is_weighted is False: + pdf = pdf[["source", "target"]] + elif weight is None: + pdf = pdf[["source", "target", "weight"]] + weight = "weight" + else: + pdf = pdf[["source", "target", weight]] gdf = from_pandas(pdf) if num_col == 2: - G.from_cudf_edgelist(gdf, "source", "target") + G.from_cudf_edgelist(gdf, source="source", destination="target", + renumber=do_renumber) else: - G.from_cudf_edgelist(gdf, "source", "target", weight) + G.from_cudf_edgelist(gdf, source="source", destination="target", + edge_attr=weight, renumber=do_renumber) del gdf del pdf