From 7f7451b2293cd90267c518997dec5d24187ecb9f Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Fri, 22 Oct 2021 21:57:14 -0500 Subject: [PATCH] Bug fix for https://github.com/rapidsai/cugraph/issues/1899 - added test and bug fix to ensure a graph that has not been renumbered is handled correctly by cugraph.subgraph() --- .../cugraph/community/subgraph_extraction.py | 14 +++--- .../cugraph/tests/test_subgraph_extraction.py | 43 +++++++++++-------- python/cugraph/pytest.ini | 8 ++-- 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/python/cugraph/cugraph/community/subgraph_extraction.py b/python/cugraph/cugraph/community/subgraph_extraction.py index 14173311a94..6ab499558e1 100644 --- a/python/cugraph/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/cugraph/community/subgraph_extraction.py @@ -11,19 +11,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import cudf + from cugraph.community import subgraph_extraction_wrapper from cugraph.utilities import (ensure_cugraph_obj_for_nx, cugraph_to_nx, ) -import cudf - def subgraph(G, vertices): """ Compute a subgraph of the existing graph including only the specified - vertices. This algorithm works for both directed and undirected graphs, - it does not actually traverse the edges, simply pulls out any edges that + vertices. This algorithm works for both directed and undirected graphs, and + does not traverse the edges, but instead it simply pulls out any edges that are incident on vertices that are both contained in the vertices list. Parameters @@ -66,10 +66,12 @@ def subgraph(G, vertices): result_graph = type(G)() df = subgraph_extraction_wrapper.subgraph(G, vertices) + src_names = "src" + dst_names = "dst" if G.renumbered: - df, src_names = G.unrenumber(df, "src", get_column_names=True) - df, dst_names = G.unrenumber(df, "dst", get_column_names=True) + df, src_names = G.unrenumber(df, src_names, get_column_names=True) + df, dst_names = G.unrenumber(df, dst_names, get_column_names=True) if G.edgelist.weights: result_graph.from_cudf_edgelist( diff --git a/python/cugraph/cugraph/tests/test_subgraph_extraction.py b/python/cugraph/cugraph/tests/test_subgraph_extraction.py index 389a7716e48..49c6e929684 100644 --- a/python/cugraph/cugraph/tests/test_subgraph_extraction.py +++ b/python/cugraph/cugraph/tests/test_subgraph_extraction.py @@ -15,24 +15,20 @@ import numpy as np import pytest +import networkx as nx import cudf import cugraph from cugraph.tests import utils -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx +################################################################################ +# pytest setup - called for each test function +def setup_function(): + gc.collect() +################################################################################ def compare_edges(cg, nxg): edgelist_df = cg.view_edge_list() assert cg.edgelist.weights is False @@ -71,10 +67,9 @@ def nx_call(M, verts, directed=True): return nx.subgraph(G, verts) +################################################################################ @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_DiGraph(graph_file): - gc.collect() - M = utils.read_csv_for_nx(graph_file) verts = np.zeros(3, dtype=np.int32) verts[0] = 0 @@ -87,8 +82,6 @@ def test_subgraph_extraction_DiGraph(graph_file): @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_Graph(graph_file): - gc.collect() - M = utils.read_csv_for_nx(graph_file) verts = np.zeros(3, dtype=np.int32) verts[0] = 0 @@ -101,7 +94,6 @@ def test_subgraph_extraction_Graph(graph_file): @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_Graph_nx(graph_file): - gc.collect() directed = False verts = np.zeros(3, dtype=np.int32) verts[0] = 0 @@ -130,8 +122,6 @@ def test_subgraph_extraction_Graph_nx(graph_file): @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_multi_column(graph_file): - gc.collect() - M = utils.read_csv_for_nx(graph_file) cu_M = cudf.DataFrame() @@ -162,3 +152,22 @@ def test_subgraph_extraction_multi_column(graph_file): for i in range(len(edgelist_df_res)): assert sG2.has_edge(edgelist_df_res["0_src"].iloc[i], edgelist_df_res["0_dst"].iloc[i]) + + +# FIXME: the coverage provided by this test could probably be handled by another +# test that also checks using renumber=False +def test_subgraph_extraction_graph_not_renumbered(): + """ + Ensure subgraph() works with a Graph that has not been renumbered + """ + graph_file = utils.RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv" + gdf = cudf.read_csv(graph_file, delimiter = " ", + dtype=["int32", "int32", "float32"], header=None) + verts = np.array([0, 1, 2], dtype=np.int32) + sverts = cudf.Series(verts) + G = cugraph.Graph() + G.from_cudf_edgelist(gdf, source="0", destination="1", renumber=False) + Sg = cugraph.subgraph(G, sverts) + + assert Sg.number_of_vertices() == 3 + assert Sg.number_of_edges() == 3 diff --git a/python/cugraph/pytest.ini b/python/cugraph/pytest.ini index a1933ea34aa..046f972801c 100644 --- a/python/cugraph/pytest.ini +++ b/python/cugraph/pytest.ini @@ -4,9 +4,11 @@ addopts = --benchmark-max-time=0 --benchmark-min-rounds=1 --benchmark-columns="mean, rounds" - --benchmark-gpu-disable - --cov=cugraph - --cov-report term-missing:skip-covered + ## for use with rapids-pytest-benchmark plugin + #--benchmark-gpu-disable + ## for use with pytest-cov plugin + #--cov=cugraph + #--cov-report term-missing:skip-covered markers = managedmem_on: RMM managed memory enabled