From 9884dd9b110c2b1f888d2965ba3cb69641f5625a Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Fri, 23 Feb 2024 07:24:37 -0800 Subject: [PATCH 01/20] Clean up centrality unit tests --- .../test_batch_betweenness_centrality_mg.py | 29 ++++---- ...st_batch_edge_betweenness_centrality_mg.py | 27 ++++--- .../test_betweenness_centrality_mg.py | 72 ++++++++++--------- 3 files changed, 67 insertions(+), 61 deletions(-) diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py index 8ccbbfc9ec5..aeef3ba539c 100644 --- a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -24,32 +24,37 @@ compare_scores, ) -DIRECTED_GRAPH_OPTIONS = [False, True] -WEIGHTED_GRAPH_OPTIONS = [False, True] -ENDPOINTS_OPTIONS = [False, True] -NORMALIZED_OPTIONS = [False, True] -DEFAULT_EPSILON = 0.0001 -SUBSET_SIZE_OPTIONS = [4, None] -SUBSET_SEED_OPTIONS = [42] - # ============================================================================= # Parameters # ============================================================================= + DATASETS = [karate] +DEFAULT_EPSILON = 0.0001 +DIRECTED_GRAPH_OPTIONS = [False, True] +ENDPOINTS_OPTIONS = [False, True] +NORMALIZED_OPTIONS = [False, True] +RESULT_DTYPE_OPTIONS = [np.float64] +SUBSET_SIZE_OPTIONS = [4, None] +SUBSET_SEED_OPTIONS = [42] # FIXME: The "preset_gpu_count" from 21.08 and below are currently not # supported and have been removed - -RESULT_DTYPE_OPTIONS = [np.float64] - +WEIGHTED_GRAPH_OPTIONS = [False, True] # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() +# ============================================================================= +# Tests +# ============================================================================= + + @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") @pytest.mark.parametrize( diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py index 154477a1a67..52e6ffdccd6 100644 --- a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,39 +19,38 @@ from cugraph.dask.common.mg_utils import is_single_gpu from cugraph.datasets import karate, netscience -# Get parameters from standard betwenness_centrality_test -# As tests directory is not a module, we need to add it to the path -# FIXME: Test must be reworked to import from 'cugraph.testing' instead of -# importing from other tests -from test_edge_betweenness_centrality import ( - DIRECTED_GRAPH_OPTIONS, - NORMALIZED_OPTIONS, - DEFAULT_EPSILON, - SUBSET_SIZE_OPTIONS, -) - from test_edge_betweenness_centrality import ( calc_edge_betweenness_centrality, compare_scores, ) + # ============================================================================= # Parameters # ============================================================================= -DATASETS = [karate, netscience] +DATASETS = [karate, netscience] +DIRECTED_GRAPH_OPTIONS = [False, True] +NORMALIZED_OPTIONS = [False, True] +DEFAULT_EPSILON = 0.0001 +SUBSET_SIZE_OPTIONS = [4, None] # FIXME: The "preset_gpu_count" from 21.08 and below are not supported and have # been removed RESULT_DTYPE_OPTIONS = [np.float32, np.float64] - # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() +# ============================================================================= +# Tests +# ============================================================================= + # FIXME: Fails for directed = False(bc score twice as much) and normalized = True. @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py index 1e20287d1e5..6fa73e95961 100644 --- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,13 +15,13 @@ import pytest -import dask_cudf import cupy import cudf import cugraph import cugraph.dask as dcg +import dask_cudf +from cugraph.datasets import karate, dolphins from cugraph.testing import utils -from pylibcugraph.testing import gen_fixture_params_product # ============================================================================= @@ -33,48 +33,50 @@ def setup_function(): gc.collect() -IS_DIRECTED = [True, False] +# ============================================================================= +# Parameters +# ============================================================================= +DATASETS = [karate, dolphins] +IS_DIRECTED = [True, False] +NORMALIZED = [False, True] +ENDPOINTS = [False, True] +SUBSET_SEED = [42, None] +SUBSET_SIZE = [None, 15] +VERTEX_LIST_TYPE = [list, cudf] # ============================================================================= -# Pytest fixtures +# Helper functions # ============================================================================= -datasets = utils.DATASETS_UNDIRECTED -fixture_params = gen_fixture_params_product( - (datasets, "graph_file"), - ([False, True], "normalized"), - ([False, True], "endpoints"), - ([42, None], "subset_seed"), - ([None, 15], "subset_size"), - (IS_DIRECTED, "directed"), - ([list, cudf], "vertex_list_type"), -) +def get_sg_graph(dataset, directed): + G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) + return G -@pytest.fixture(scope="module", params=fixture_params) -def input_combo(request): - """ - Simply return the current combination of params as a dictionary for use in - tests or other parameterized fixtures. - """ - parameters = dict( - zip( - ( - "graph_file", - "normalized", - "endpoints", - "subset_seed", - "subset_size", - "directed", - "vertex_list_type", - ), - request.param, - ) + +def get_mg_graph(dataset, directed): + input_data_path = dataset.get_path() + blocksize = dcg.get_chunksize(input_data_path) + ddf = dask_cudf.read_csv( + input_data_path, + blocksize=blocksize, + delimiter=dataset.metadata["delim"], + names=dataset.metadata["col_names"], + dtype=dataset.metadata["col_types"], + ) + dg = cugraph.Graph(directed=directed) + dg.from_dask_cudf_edgelist( + ddf, + source="src", + destination="dst", + edge_attr="wgt", + renumber=True, + store_transposed=True, ) - return parameters + return dg @pytest.fixture(scope="module") From 07aac44a5beb5a7ce7365c51be876983243daabc Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Mon, 26 Feb 2024 12:34:49 -0800 Subject: [PATCH 02/20] Update MG centrality tests --- .../test_batch_betweenness_centrality_mg.py | 30 ++-- ...st_batch_edge_betweenness_centrality_mg.py | 21 +-- .../test_betweenness_centrality_mg.py | 131 ++++++------------ .../centrality/test_degree_centrality_mg.py | 75 +++++----- 4 files changed, 110 insertions(+), 147 deletions(-) diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py index aeef3ba539c..7050d0c9e55 100644 --- a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py @@ -29,17 +29,17 @@ # Parameters # ============================================================================= + DATASETS = [karate] DEFAULT_EPSILON = 0.0001 -DIRECTED_GRAPH_OPTIONS = [False, True] -ENDPOINTS_OPTIONS = [False, True] -NORMALIZED_OPTIONS = [False, True] -RESULT_DTYPE_OPTIONS = [np.float64] -SUBSET_SIZE_OPTIONS = [4, None] -SUBSET_SEED_OPTIONS = [42] -# FIXME: The "preset_gpu_count" from 21.08 and below are currently not -# supported and have been removed -WEIGHTED_GRAPH_OPTIONS = [False, True] +IS_DIRECTED = [False, True] +ENDPOINTS = [False, True] +IS_NORMALIZED = [False, True] +RESULT_DTYPES = [np.float64] +SUBSET_SIZES = [4, None] +SUBSET_SEEDS = [42] +IS_WEIGHTED = [False, True] + # ============================================================================= # Pytest Setup / Teardown - called for each test function @@ -60,13 +60,13 @@ def setup_function(): @pytest.mark.parametrize( "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS] ) -@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) -@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) -@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) +@pytest.mark.parametrize("directed", IS_DIRECTED) +@pytest.mark.parametrize("subset_size", SUBSET_SIZES) +@pytest.mark.parametrize("normalized", IS_NORMALIZED) @pytest.mark.parametrize("weight", [None]) -@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) -@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) -@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +@pytest.mark.parametrize("endpoints", ENDPOINTS) +@pytest.mark.parametrize("subset_seed", SUBSET_SEEDS) +@pytest.mark.parametrize("result_dtype", RESULT_DTYPES) def test_mg_betweenness_centrality( graph_file, directed, diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py index 52e6ffdccd6..48364a4a79a 100644 --- a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py @@ -29,14 +29,14 @@ # Parameters # ============================================================================= + DATASETS = [karate, netscience] -DIRECTED_GRAPH_OPTIONS = [False, True] -NORMALIZED_OPTIONS = [False, True] +IS_DIRECTED = [True, False] +IS_NORMALIZED = [True, False] DEFAULT_EPSILON = 0.0001 -SUBSET_SIZE_OPTIONS = [4, None] -# FIXME: The "preset_gpu_count" from 21.08 and below are not supported and have -# been removed -RESULT_DTYPE_OPTIONS = [np.float32, np.float64] +SUBSET_SIZES = [4, None] +RESULT_DTYPES = [np.float32, np.float64] + # ============================================================================= # Pytest Setup / Teardown - called for each test function @@ -51,16 +51,17 @@ def setup_function(): # Tests # ============================================================================= + # FIXME: Fails for directed = False(bc score twice as much) and normalized = True. @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") @pytest.mark.parametrize( "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS] ) -@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) -@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) -@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) -@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +@pytest.mark.parametrize("directed", IS_DIRECTED) +@pytest.mark.parametrize("subset_size", SUBSET_SIZES) +@pytest.mark.parametrize("normalized", IS_NORMALIZED) +@pytest.mark.parametrize("result_dtype", RESULT_DTYPES) def test_mg_edge_betweenness_centrality( graph_file, directed, diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py index 6fa73e95961..48fbe796bb4 100644 --- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py @@ -19,9 +19,7 @@ import cudf import cugraph import cugraph.dask as dcg -import dask_cudf from cugraph.datasets import karate, dolphins -from cugraph.testing import utils # ============================================================================= @@ -39,11 +37,11 @@ def setup_function(): DATASETS = [karate, dolphins] IS_DIRECTED = [True, False] -NORMALIZED = [False, True] -ENDPOINTS = [False, True] -SUBSET_SEED = [42, None] -SUBSET_SIZE = [None, 15] -VERTEX_LIST_TYPE = [list, cudf] +IS_NORMALIZED = [True, False] +ENDPOINTS = [True, False] +SUBSET_SEEDS = [42, None] +SUBSET_SIZES = [None, 15] +VERTEX_LIST_TYPES = [list, cudf] # ============================================================================= # Helper functions @@ -57,15 +55,7 @@ def get_sg_graph(dataset, directed): def get_mg_graph(dataset, directed): - input_data_path = dataset.get_path() - blocksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - blocksize=blocksize, - delimiter=dataset.metadata["delim"], - names=dataset.metadata["col_names"], - dtype=dataset.metadata["col_types"], - ) + ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=directed) dg.from_dask_cudf_edgelist( ddf, @@ -79,101 +69,64 @@ def get_mg_graph(dataset, directed): return dg -@pytest.fixture(scope="module") -def input_expected_output(input_combo): - """ - This fixture returns the inputs and expected results from the - betweenness_centrality algo based on cuGraph betweenness_centrality) which can - be used for validation. - """ +# ============================================================================= +# Tests +# ============================================================================= - input_data_path = input_combo["graph_file"] - normalized = input_combo["normalized"] - endpoints = input_combo["endpoints"] - random_state = input_combo["subset_seed"] - subset_size = input_combo["subset_size"] - directed = input_combo["directed"] - vertex_list_type = input_combo["vertex_list_type"] - G = utils.generate_cugraph_graph_from_file(input_data_path, directed=directed) +@pytest.mark.mg +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("directed", IS_DIRECTED) +@pytest.mark.parametrize("normalized", IS_NORMALIZED) +@pytest.mark.parametrize("endpoint", ENDPOINTS) +@pytest.mark.parametrize("subset_seed", SUBSET_SEEDS) +@pytest.mark.parametrize("subset_size", SUBSET_SIZES) +@pytest.mark.parametrize("v_list_type", VERTEX_LIST_TYPES) +def test_dask_mg_betweenness_centrality( + dataset, + directed, + normalized, + endpoint, + subset_seed, + subset_size, + v_list_type, + dask_client, + benchmark, +): + g = get_sg_graph(dataset, directed) + dataset.unload() + dg = get_mg_graph(dataset, directed) + random_state = subset_seed if subset_size is None: k = subset_size elif isinstance(subset_size, int): # Select random vertices - k = G.select_random_vertices( + k = g.select_random_vertices( random_state=random_state, num_vertices=subset_size ) - if vertex_list_type is list: + if v_list_type is list: k = k.to_arrow().to_pylist() print("the seeds are \n", k) - if vertex_list_type is int: + if v_list_type is int: # This internally sample k vertices in betweenness centrality. # Since the nodes that will be sampled by each implementation will # be random, therefore sample all vertices which will make the test # consistent. - k = len(G.nodes()) - - input_combo["k"] = k + k = len(g.nodes()) sg_cugraph_bc = cugraph.betweenness_centrality( - G, k=k, normalized=normalized, endpoints=endpoints, random_state=random_state + g, k=k, normalized=normalized, endpoints=endpoint, random_state=random_state ) - # Save the results back to the input_combo dictionary to prevent redundant - # cuGraph runs. Other tests using the input_combo fixture will look for - # them, and if not present they will have to re-run the same cuGraph call. sg_cugraph_bc = sg_cugraph_bc.sort_values("vertex").reset_index(drop=True) - input_combo["sg_cugraph_results"] = sg_cugraph_bc - chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg = cugraph.Graph(directed=directed) - dg.from_dask_cudf_edgelist( - ddf, - source="src", - destination="dst", - edge_attr="value", - renumber=True, - store_transposed=True, - ) - - input_combo["MGGraph"] = dg - - return input_combo - - -# ============================================================================= -# Tests -# ============================================================================= - - -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) - - -@pytest.mark.mg -def test_dask_mg_betweenness_centrality(dask_client, benchmark, input_expected_output): - - dg = input_expected_output["MGGraph"] - k = input_expected_output["k"] - endpoints = input_expected_output["endpoints"] - normalized = input_expected_output["normalized"] - random_state = input_expected_output["subset_seed"] mg_bc_results = benchmark( dcg.betweenness_centrality, dg, k=k, normalized=normalized, - endpoints=endpoints, + endpoints=endpoint, random_state=random_state, ) @@ -181,11 +134,9 @@ def test_dask_mg_betweenness_centrality(dask_client, benchmark, input_expected_o mg_bc_results.compute().sort_values("vertex").reset_index(drop=True) )["betweenness_centrality"].to_cupy() - sg_bc_results = ( - input_expected_output["sg_cugraph_results"] - .sort_values("vertex") - .reset_index(drop=True) - )["betweenness_centrality"].to_cupy() + sg_bc_results = (sg_cugraph_bc.sort_values("vertex").reset_index(drop=True))[ + "betweenness_centrality" + ].to_cupy() diff = cupy.isclose(mg_bc_results, sg_bc_results) diff --git a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py index 1bef1e0872b..8606649c745 100644 --- a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py @@ -15,12 +15,12 @@ import pytest -import cudf -import dask_cudf import cugraph -from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH +from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.datasets import karate_asymmetric, polbooks, email_Eu_core from cudf.testing import assert_series_equal + # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= @@ -30,44 +30,55 @@ def setup_function(): gc.collect() +# ============================================================================= +# Parameters +# ============================================================================= + + +DATASETS = [karate_asymmetric, polbooks, email_Eu_core] IS_DIRECTED = [True, False] -DATA_PATH = [ - (RAPIDS_DATASET_ROOT_DIR_PATH / "karate-asymmetric.csv").as_posix(), - (RAPIDS_DATASET_ROOT_DIR_PATH / "polbooks.csv").as_posix(), - (RAPIDS_DATASET_ROOT_DIR_PATH / "email-Eu-core.csv").as_posix(), -] + +# ============================================================================= +# Helper functions +# ============================================================================= -@pytest.mark.mg -@pytest.mark.parametrize("directed", IS_DIRECTED) -@pytest.mark.parametrize("data_file", DATA_PATH) -def test_dask_mg_degree(dask_client, directed, data_file): - - input_data_path = data_file - chunksize = cugraph.dask.get_chunksize(input_data_path) - - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) +def get_sg_graph(dataset, directed): + G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) - df = cudf.read_csv( - input_data_path, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) + return G + +def get_mg_graph(dataset, directed): + ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=directed) - dg.from_dask_cudf_edgelist(ddf, "src", "dst") + dg.from_dask_cudf_edgelist( + ddf, + source="src", + destination="dst", + edge_attr="wgt", + renumber=True, + store_transposed=True, + ) + + return dg + + +# ============================================================================= +# Tests +# ============================================================================= + + +@pytest.mark.mg +@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("directed", IS_DIRECTED) +def test_dask_mg_degree(dask_client, dataset, directed): + dg = get_mg_graph(dataset, directed) dg.compute_renumber_edge_list() - g = cugraph.Graph(directed=directed) - g.from_cudf_edgelist(df, "src", "dst") + g = get_sg_graph(dataset, directed) merge_df_in_degree = ( dg.in_degree() From d4a610224ebb300a3ab0c68b3dc6d7ea0a1b88e1 Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Tue, 27 Feb 2024 08:55:34 -0800 Subject: [PATCH 03/20] Refactor edge_betweenness_centrality --- .../test_edge_betweenness_centrality_mg.py | 228 +++++++----------- 1 file changed, 88 insertions(+), 140 deletions(-) diff --git a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py index 478b7e655d5..ae44917dc3a 100644 --- a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -14,14 +14,9 @@ import gc import pytest -import dask_cudf -from pylibcugraph.testing.utils import gen_fixture_params_product -from cugraph.datasets import karate, dolphins - import cugraph import cugraph.dask as dcg - -# from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.datasets import karate, dolphins # ============================================================================= @@ -33,79 +28,34 @@ def setup_function(): gc.collect() -IS_DIRECTED = [True, False] -INCLUDE_WEIGHTS = [False, True] -INCLUDE_EDGE_IDS = [False, True] -NORMALIZED_OPTIONS = [False, True] -SUBSET_SIZE_OPTIONS = [4, None] - - -# email_Eu_core is too expensive to test -datasets = [karate, dolphins] - - # ============================================================================= -# Pytest fixtures +# Parameters # ============================================================================= -fixture_params = gen_fixture_params_product( - (datasets, "graph_file"), - (IS_DIRECTED, "directed"), - (INCLUDE_WEIGHTS, "include_weights"), - (INCLUDE_EDGE_IDS, "include_edgeids"), - (NORMALIZED_OPTIONS, "normalized"), - (SUBSET_SIZE_OPTIONS, "subset_size"), -) - - -@pytest.fixture(scope="module", params=fixture_params) -def input_combo(request): - """ - Simply return the current combination of params as a dictionary for use in - tests or other parameterized fixtures. - """ - parameters = dict( - zip( - ( - "graph_file", - "directed", - "include_weights", - "include_edge_ids", - "normalized", - "subset_size", - "subset_seed", - ), - request.param, - ) - ) +DATASETS = [karate, dolphins] +IS_DIRECTED = [True, False] +IS_WEIGHTED = [True, False] +INCLUDE_EDGE_IDS = [True, False] +IS_NORMALIZED = [True, False] +SUBSET_SIZES = [4, None] - return parameters +# ============================================================================= +# Helper functions +# ============================================================================= -@pytest.fixture(scope="module") -def input_expected_output(input_combo): - """ - This fixture returns the inputs and expected results from the edge - betweenness centrality algo. - (based on cuGraph edge betweenness centrality) which can be used - for validation. - """ - directed = input_combo["directed"] - normalized = input_combo["normalized"] - k = input_combo["subset_size"] - subset_seed = 42 - edge_ids = input_combo["include_edge_ids"] - weight = input_combo["include_weights"] - df = input_combo["graph_file"].get_edgelist() +def get_sg_graph(dataset, directed, edge_ids): + dataset.unload() + df = dataset.get_edgelist() if edge_ids: if not directed: # Edge ids not supported for undirected graph - return - dtype = df.dtypes[0] + return None + dtype = df.dtypes.iloc[0] edge_id = "edge_id" - df["edge_id"] = df.index + df[edge_id] = df.index df = df.astype(dtype) else: @@ -115,30 +65,13 @@ def input_expected_output(input_combo): G.from_cudf_edgelist( df, source="src", destination="dst", weight="wgt", edge_id=edge_id ) - if isinstance(k, int): - k = G.select_random_vertices(subset_seed, k) - input_combo["k"] = k - # Save the results back to the input_combo dictionary to prevent redundant - # cuGraph runs. Other tests using the input_combo fixture will look for - # them, and if not present they will have to re-run the same cuGraph call. - sg_cugraph_edge_bc = ( - cugraph.edge_betweenness_centrality(G, k, normalized) - .sort_values(["src", "dst"]) - .reset_index(drop=True) - ) + return G - input_data_path = input_combo["graph_file"].get_path() - input_combo["sg_cugraph_results"] = sg_cugraph_edge_bc - chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) +def get_mg_graph(dataset, directed, edge_ids, weight): + dataset.unload() + ddf = dataset.get_dask_edgelist() if weight: weight = ddf @@ -154,20 +87,16 @@ def input_expected_output(input_combo): edge_id = None dg = cugraph.Graph(directed=directed) - dg.from_dask_cudf_edgelist( ddf, source="src", destination="dst", - weight="value", + weight="wgt", edge_id=edge_id, renumber=True, ) - input_combo["MGGraph"] = dg - input_combo["include_weights"] = weight - - return input_combo + return dg, weight # ============================================================================= @@ -175,57 +104,76 @@ def input_expected_output(input_combo): # ============================================================================= -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) @pytest.mark.mg +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("directed", IS_DIRECTED) +@pytest.mark.parametrize("weighted", IS_WEIGHTED) +@pytest.mark.parametrize("edge_ids", INCLUDE_EDGE_IDS) +@pytest.mark.parametrize("normalized", IS_NORMALIZED) +@pytest.mark.parametrize("subset_size", SUBSET_SIZES) def test_dask_mg_edge_betweenness_centrality( - dask_client, benchmark, input_expected_output + dask_client, + dataset, + directed, + weighted, + edge_ids, + normalized, + subset_size, + benchmark, ): - if input_expected_output is not None: - dg = input_expected_output["MGGraph"] - k = input_expected_output["k"] - normalized = input_expected_output["normalized"] - weight = input_expected_output["include_weights"] - if weight is not None: - with pytest.raises(NotImplementedError): - result_edge_bc = benchmark( - dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight - ) - - else: + g = get_sg_graph(dataset, directed, edge_ids) + + if g is None: + pytest.skip("Edge_ids not supported for undirected graph") + + dg, weight = get_mg_graph(dataset, directed, edge_ids, weighted) + subset_seed = 42 + + k = subset_size + if isinstance(k, int): + k = g.select_random_vertices(subset_seed, k) + + sg_cugraph_edge_bc = ( + cugraph.edge_betweenness_centrality(g, k, normalized) + .sort_values(["src", "dst"]) + .reset_index(drop=True) + ) + + if weight is not None: + with pytest.raises(NotImplementedError): result_edge_bc = benchmark( dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight ) - result_edge_bc = ( - result_edge_bc.compute() - .sort_values(["src", "dst"]) - .reset_index(drop=True) - .rename(columns={"betweenness_centrality": "mg_betweenness_centrality"}) - ) - if len(result_edge_bc.columns) > 3: - result_edge_bc = result_edge_bc.rename( - columns={"edge_id": "mg_edge_id"} - ) + else: + result_edge_bc = benchmark( + dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight + ) + result_edge_bc = ( + result_edge_bc.compute() + .sort_values(["src", "dst"]) + .reset_index(drop=True) + .rename(columns={"betweenness_centrality": "mg_betweenness_centrality"}) + ) - expected_output = input_expected_output["sg_cugraph_results"].reset_index( - drop=True - ) - result_edge_bc["betweenness_centrality"] = expected_output[ - "betweenness_centrality" - ] - if len(expected_output.columns) > 3: - result_edge_bc["edge_id"] = expected_output["edge_id"] - edge_id_diff = result_edge_bc.query("mg_edge_id != edge_id") - assert len(edge_id_diff) == 0 - - edge_bc_diffs1 = result_edge_bc.query( - "mg_betweenness_centrality - betweenness_centrality > 0.01" - ) - edge_bc_diffs2 = result_edge_bc.query( - "betweenness_centrality - mg_betweenness_centrality < -0.01" - ) + if len(result_edge_bc.columns) > 3: + result_edge_bc = result_edge_bc.rename(columns={"edge_id": "mg_edge_id"}) + + expected_output = sg_cugraph_edge_bc.reset_index(drop=True) + result_edge_bc["betweenness_centrality"] = expected_output[ + "betweenness_centrality" + ] + if len(expected_output.columns) > 3: + result_edge_bc["edge_id"] = expected_output["edge_id"] + edge_id_diff = result_edge_bc.query("mg_edge_id != edge_id") + assert len(edge_id_diff) == 0 + + edge_bc_diffs1 = result_edge_bc.query( + "mg_betweenness_centrality - betweenness_centrality > 0.01" + ) + edge_bc_diffs2 = result_edge_bc.query( + "betweenness_centrality - mg_betweenness_centrality < -0.01" + ) - assert len(edge_bc_diffs1) == 0 - assert len(edge_bc_diffs2) == 0 + assert len(edge_bc_diffs1) == 0 + assert len(edge_bc_diffs2) == 0 From f7c78b95ac3ba62ce292a06c80e80d406847c6fc Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Sun, 10 Mar 2024 20:00:29 -0700 Subject: [PATCH 04/20] Revert batch_betweenness_centrality --- .../test_batch_betweenness_centrality_mg.py | 41 ++++++++----------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py index 7050d0c9e55..9d858919786 100644 --- a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py @@ -24,49 +24,44 @@ compare_scores, ) +DIRECTED_GRAPH_OPTIONS = [False, True] +WEIGHTED_GRAPH_OPTIONS = [False, True] +ENDPOINTS_OPTIONS = [False, True] +NORMALIZED_OPTIONS = [False, True] +DEFAULT_EPSILON = 0.0001 +SUBSET_SIZE_OPTIONS = [4, None] +SUBSET_SEED_OPTIONS = [42] + # ============================================================================= # Parameters # ============================================================================= - - DATASETS = [karate] -DEFAULT_EPSILON = 0.0001 -IS_DIRECTED = [False, True] -ENDPOINTS = [False, True] -IS_NORMALIZED = [False, True] -RESULT_DTYPES = [np.float64] -SUBSET_SIZES = [4, None] -SUBSET_SEEDS = [42] -IS_WEIGHTED = [False, True] +# FIXME: The "preset_gpu_count" from 21.08 and below are currently not +# supported and have been removed + +RESULT_DTYPE_OPTIONS = [np.float64] # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= - - def setup_function(): gc.collect() -# ============================================================================= -# Tests -# ============================================================================= - - @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") @pytest.mark.parametrize( "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS] ) -@pytest.mark.parametrize("directed", IS_DIRECTED) -@pytest.mark.parametrize("subset_size", SUBSET_SIZES) -@pytest.mark.parametrize("normalized", IS_NORMALIZED) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) +@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [None]) -@pytest.mark.parametrize("endpoints", ENDPOINTS) -@pytest.mark.parametrize("subset_seed", SUBSET_SEEDS) -@pytest.mark.parametrize("result_dtype", RESULT_DTYPES) +@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) +@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) +@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) def test_mg_betweenness_centrality( graph_file, directed, From 32e0ad977e9cf5e6cf96c9550e178e1c962b604d Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Sun, 10 Mar 2024 21:32:19 -0700 Subject: [PATCH 05/20] Revert batch_edge_betweenness_centrality --- ...st_batch_edge_betweenness_centrality_mg.py | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py index 48364a4a79a..eb1dec7e5bd 100644 --- a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py @@ -19,49 +19,49 @@ from cugraph.dask.common.mg_utils import is_single_gpu from cugraph.datasets import karate, netscience +# Get parameters from standard betwenness_centrality_test +# As tests directory is not a module, we need to add it to the path +# FIXME: Test must be reworked to import from 'cugraph.testing' instead of +# importing from other tests +from test_edge_betweenness_centrality import ( + DIRECTED_GRAPH_OPTIONS, + NORMALIZED_OPTIONS, + DEFAULT_EPSILON, + SUBSET_SIZE_OPTIONS, +) + from test_edge_betweenness_centrality import ( calc_edge_betweenness_centrality, compare_scores, ) - # ============================================================================= # Parameters # ============================================================================= - - DATASETS = [karate, netscience] -IS_DIRECTED = [True, False] -IS_NORMALIZED = [True, False] -DEFAULT_EPSILON = 0.0001 -SUBSET_SIZES = [4, None] -RESULT_DTYPES = [np.float32, np.float64] + +# FIXME: The "preset_gpu_count" from 21.08 and below are not supported and have +# been removed +RESULT_DTYPE_OPTIONS = [np.float32, np.float64] # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= - - def setup_function(): gc.collect() -# ============================================================================= -# Tests -# ============================================================================= - - # FIXME: Fails for directed = False(bc score twice as much) and normalized = True. @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") @pytest.mark.parametrize( "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS] ) -@pytest.mark.parametrize("directed", IS_DIRECTED) -@pytest.mark.parametrize("subset_size", SUBSET_SIZES) -@pytest.mark.parametrize("normalized", IS_NORMALIZED) -@pytest.mark.parametrize("result_dtype", RESULT_DTYPES) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) +@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) +@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) def test_mg_edge_betweenness_centrality( graph_file, directed, From f98b0bc3df43a5ea1185b5f4bd62a363eb2130f2 Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Sun, 10 Mar 2024 22:59:10 -0700 Subject: [PATCH 06/20] Revert test_betweenness_centrality --- .../test_betweenness_centrality_mg.py | 177 +++++++++++------- 1 file changed, 112 insertions(+), 65 deletions(-) diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py index 48fbe796bb4..60d344eb0fd 100644 --- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py @@ -15,11 +15,13 @@ import pytest +import dask_cudf import cupy import cudf import cugraph import cugraph.dask as dcg -from cugraph.datasets import karate, dolphins +from cugraph.testing import utils +from pylibcugraph.testing import gen_fixture_params_product # ============================================================================= @@ -31,102 +33,145 @@ def setup_function(): gc.collect() -# ============================================================================= -# Parameters -# ============================================================================= - -DATASETS = [karate, dolphins] IS_DIRECTED = [True, False] -IS_NORMALIZED = [True, False] -ENDPOINTS = [True, False] -SUBSET_SEEDS = [42, None] -SUBSET_SIZES = [None, 15] -VERTEX_LIST_TYPES = [list, cudf] + # ============================================================================= -# Helper functions +# Pytest fixtures # ============================================================================= - -def get_sg_graph(dataset, directed): - G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) - - return G - - -def get_mg_graph(dataset, directed): - ddf = dataset.get_dask_edgelist() - dg = cugraph.Graph(directed=directed) - dg.from_dask_cudf_edgelist( - ddf, - source="src", - destination="dst", - edge_attr="wgt", - renumber=True, - store_transposed=True, +datasets = utils.DATASETS_UNDIRECTED + +fixture_params = gen_fixture_params_product( + (datasets, "graph_file"), + ([False, True], "normalized"), + ([False, True], "endpoints"), + ([42, None], "subset_seed"), + ([None, 15], "subset_size"), + (IS_DIRECTED, "directed"), + ([list, cudf], "vertex_list_type"), +) + + +@pytest.fixture(scope="module", params=fixture_params) +def input_combo(request): + """ + Simply return the current combination of params as a dictionary for use in + tests or other parameterized fixtures. + """ + parameters = dict( + zip( + ( + "graph_file", + "normalized", + "endpoints", + "subset_seed", + "subset_size", + "directed", + "vertex_list_type", + ), + request.param, + ) ) - return dg + return parameters -# ============================================================================= -# Tests -# ============================================================================= +@pytest.fixture(scope="module") +def input_expected_output(input_combo): + """ + This fixture returns the inputs and expected results from the + betweenness_centrality algo based on cuGraph betweenness_centrality) which can + be used for validation. + """ + input_data_path = input_combo["graph_file"] + normalized = input_combo["normalized"] + endpoints = input_combo["endpoints"] + random_state = input_combo["subset_seed"] + subset_size = input_combo["subset_size"] + directed = input_combo["directed"] + vertex_list_type = input_combo["vertex_list_type"] -@pytest.mark.mg -@pytest.mark.parametrize("dataset", DATASETS) -@pytest.mark.parametrize("directed", IS_DIRECTED) -@pytest.mark.parametrize("normalized", IS_NORMALIZED) -@pytest.mark.parametrize("endpoint", ENDPOINTS) -@pytest.mark.parametrize("subset_seed", SUBSET_SEEDS) -@pytest.mark.parametrize("subset_size", SUBSET_SIZES) -@pytest.mark.parametrize("v_list_type", VERTEX_LIST_TYPES) -def test_dask_mg_betweenness_centrality( - dataset, - directed, - normalized, - endpoint, - subset_seed, - subset_size, - v_list_type, - dask_client, - benchmark, -): - g = get_sg_graph(dataset, directed) - dataset.unload() - dg = get_mg_graph(dataset, directed) - random_state = subset_seed + G = utils.generate_cugraph_graph_from_file(input_data_path, directed=directed) if subset_size is None: k = subset_size elif isinstance(subset_size, int): # Select random vertices - k = g.select_random_vertices( + k = G.select_random_vertices( random_state=random_state, num_vertices=subset_size ) - if v_list_type is list: + if vertex_list_type is list: k = k.to_arrow().to_pylist() print("the seeds are \n", k) - if v_list_type is int: + if vertex_list_type is int: # This internally sample k vertices in betweenness centrality. # Since the nodes that will be sampled by each implementation will # be random, therefore sample all vertices which will make the test # consistent. - k = len(g.nodes()) + k = len(G.nodes()) + + input_combo["k"] = k sg_cugraph_bc = cugraph.betweenness_centrality( - g, k=k, normalized=normalized, endpoints=endpoint, random_state=random_state + G, k=k, normalized=normalized, endpoints=endpoints, random_state=random_state ) + # Save the results back to the input_combo dictionary to prevent redundant + # cuGraph runs. Other tests using the input_combo fixture will look for + # them, and if not present they will have to re-run the same cuGraph call. sg_cugraph_bc = sg_cugraph_bc.sort_values("vertex").reset_index(drop=True) + input_combo["sg_cugraph_results"] = sg_cugraph_bc + chunksize = dcg.get_chunksize(input_data_path) + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + dg = cugraph.Graph(directed=directed) + dg.from_dask_cudf_edgelist( + ddf, + source="src", + destination="dst", + edge_attr="value", + renumber=True, + store_transposed=True, + ) + + input_combo["MGGraph"] = dg + + return input_combo + + +# ============================================================================= +# Tests +# ============================================================================= + + +# @pytest.mark.skipif( +# is_single_gpu(), reason="skipping MG testing on Single GPU system" +# ) + + +@pytest.mark.mg +def test_dask_mg_betweenness_centrality(dask_client, benchmark, input_expected_output): + + dg = input_expected_output["MGGraph"] + k = input_expected_output["k"] + endpoints = input_expected_output["endpoints"] + normalized = input_expected_output["normalized"] + random_state = input_expected_output["subset_seed"] mg_bc_results = benchmark( dcg.betweenness_centrality, dg, k=k, normalized=normalized, - endpoints=endpoint, + endpoints=endpoints, random_state=random_state, ) @@ -134,9 +179,11 @@ def test_dask_mg_betweenness_centrality( mg_bc_results.compute().sort_values("vertex").reset_index(drop=True) )["betweenness_centrality"].to_cupy() - sg_bc_results = (sg_cugraph_bc.sort_values("vertex").reset_index(drop=True))[ - "betweenness_centrality" - ].to_cupy() + sg_bc_results = ( + input_expected_output["sg_cugraph_results"] + .sort_values("vertex") + .reset_index(drop=True) + )["betweenness_centrality"].to_cupy() diff = cupy.isclose(mg_bc_results, sg_bc_results) From 36b31c9c2f5c575ba4ea99f5e2a407a449d9a10e Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Mon, 11 Mar 2024 06:46:48 -0700 Subject: [PATCH 07/20] Revert degree_centrality --- .../centrality/test_degree_centrality_mg.py | 75 ++++++++----------- 1 file changed, 33 insertions(+), 42 deletions(-) diff --git a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py index 8606649c745..18a9941ab06 100644 --- a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py @@ -15,12 +15,13 @@ import pytest +import cudf +import dask_cudf import cugraph from cugraph.dask.common.mg_utils import is_single_gpu -from cugraph.datasets import karate_asymmetric, polbooks, email_Eu_core +from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH from cudf.testing import assert_series_equal - # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= @@ -30,55 +31,45 @@ def setup_function(): gc.collect() -# ============================================================================= -# Parameters -# ============================================================================= - - -DATASETS = [karate_asymmetric, polbooks, email_Eu_core] IS_DIRECTED = [True, False] - -# ============================================================================= -# Helper functions -# ============================================================================= - - -def get_sg_graph(dataset, directed): - G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) - - return G - - -def get_mg_graph(dataset, directed): - ddf = dataset.get_dask_edgelist() - dg = cugraph.Graph(directed=directed) - dg.from_dask_cudf_edgelist( - ddf, - source="src", - destination="dst", - edge_attr="wgt", - renumber=True, - store_transposed=True, - ) - - return dg - - -# ============================================================================= -# Tests -# ============================================================================= +DATA_PATH = [ + (RAPIDS_DATASET_ROOT_DIR_PATH / "karate-asymmetric.csv").as_posix(), + (RAPIDS_DATASET_ROOT_DIR_PATH / "polbooks.csv").as_posix(), + (RAPIDS_DATASET_ROOT_DIR_PATH / "email-Eu-core.csv").as_posix(), +] @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") -@pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_mg_degree(dask_client, dataset, directed): - dg = get_mg_graph(dataset, directed) +@pytest.mark.parametrize("data_file", DATA_PATH) +def test_dask_mg_degree(dask_client, directed, data_file): + + input_data_path = data_file + chunksize = cugraph.dask.get_chunksize(input_data_path) + + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + df = cudf.read_csv( + input_data_path, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + dg = cugraph.Graph(directed=directed) + dg.from_dask_cudf_edgelist(ddf, "src", "dst") dg.compute_renumber_edge_list() - g = get_sg_graph(dataset, directed) + g = cugraph.Graph(directed=directed) + g.from_cudf_edgelist(df, "src", "dst") merge_df_in_degree = ( dg.in_degree() From 786dc9f5bf1a77b9528a50a560ee4902d63ecd9f Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Mon, 11 Mar 2024 08:13:00 -0700 Subject: [PATCH 08/20] Revert edge_betweenness_centrality --- .../test_edge_betweenness_centrality_mg.py | 226 +++++++++++------- 1 file changed, 139 insertions(+), 87 deletions(-) diff --git a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py index ae44917dc3a..6c066a947ac 100644 --- a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py @@ -14,9 +14,14 @@ import gc import pytest +import dask_cudf +from pylibcugraph.testing.utils import gen_fixture_params_product +from cugraph.datasets import karate, dolphins + import cugraph import cugraph.dask as dcg -from cugraph.datasets import karate, dolphins + +# from cugraph.dask.common.mg_utils import is_single_gpu # ============================================================================= @@ -28,34 +33,79 @@ def setup_function(): gc.collect() -# ============================================================================= -# Parameters -# ============================================================================= +IS_DIRECTED = [True, False] +INCLUDE_WEIGHTS = [False, True] +INCLUDE_EDGE_IDS = [False, True] +NORMALIZED_OPTIONS = [False, True] +SUBSET_SIZE_OPTIONS = [4, None] -DATASETS = [karate, dolphins] -IS_DIRECTED = [True, False] -IS_WEIGHTED = [True, False] -INCLUDE_EDGE_IDS = [True, False] -IS_NORMALIZED = [True, False] -SUBSET_SIZES = [4, None] +# email_Eu_core is too expensive to test +datasets = [karate, dolphins] # ============================================================================= -# Helper functions +# Pytest fixtures # ============================================================================= -def get_sg_graph(dataset, directed, edge_ids): - dataset.unload() - df = dataset.get_edgelist() +fixture_params = gen_fixture_params_product( + (datasets, "graph_file"), + (IS_DIRECTED, "directed"), + (INCLUDE_WEIGHTS, "include_weights"), + (INCLUDE_EDGE_IDS, "include_edgeids"), + (NORMALIZED_OPTIONS, "normalized"), + (SUBSET_SIZE_OPTIONS, "subset_size"), +) + + +@pytest.fixture(scope="module", params=fixture_params) +def input_combo(request): + """ + Simply return the current combination of params as a dictionary for use in + tests or other parameterized fixtures. + """ + parameters = dict( + zip( + ( + "graph_file", + "directed", + "include_weights", + "include_edge_ids", + "normalized", + "subset_size", + "subset_seed", + ), + request.param, + ) + ) + + return parameters + + +@pytest.fixture(scope="module") +def input_expected_output(input_combo): + """ + This fixture returns the inputs and expected results from the edge + betweenness centrality algo. + (based on cuGraph edge betweenness centrality) which can be used + for validation. + """ + directed = input_combo["directed"] + normalized = input_combo["normalized"] + k = input_combo["subset_size"] + subset_seed = 42 + edge_ids = input_combo["include_edge_ids"] + weight = input_combo["include_weights"] + + df = input_combo["graph_file"].get_edgelist() if edge_ids: if not directed: # Edge ids not supported for undirected graph - return None - dtype = df.dtypes.iloc[0] + return + dtype = df.dtypes[0] edge_id = "edge_id" - df[edge_id] = df.index + df["edge_id"] = df.index df = df.astype(dtype) else: @@ -65,13 +115,30 @@ def get_sg_graph(dataset, directed, edge_ids): G.from_cudf_edgelist( df, source="src", destination="dst", weight="wgt", edge_id=edge_id ) + if isinstance(k, int): + k = G.select_random_vertices(subset_seed, k) - return G + input_combo["k"] = k + # Save the results back to the input_combo dictionary to prevent redundant + # cuGraph runs. Other tests using the input_combo fixture will look for + # them, and if not present they will have to re-run the same cuGraph call. + sg_cugraph_edge_bc = ( + cugraph.edge_betweenness_centrality(G, k, normalized) + .sort_values(["src", "dst"]) + .reset_index(drop=True) + ) + input_data_path = input_combo["graph_file"].get_path() -def get_mg_graph(dataset, directed, edge_ids, weight): - dataset.unload() - ddf = dataset.get_dask_edgelist() + input_combo["sg_cugraph_results"] = sg_cugraph_edge_bc + chunksize = dcg.get_chunksize(input_data_path) + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) if weight: weight = ddf @@ -87,16 +154,20 @@ def get_mg_graph(dataset, directed, edge_ids, weight): edge_id = None dg = cugraph.Graph(directed=directed) + dg.from_dask_cudf_edgelist( ddf, source="src", destination="dst", - weight="wgt", + weight="value", edge_id=edge_id, renumber=True, ) - return dg, weight + input_combo["MGGraph"] = dg + input_combo["include_weights"] = weight + + return input_combo # ============================================================================= @@ -104,76 +175,57 @@ def get_mg_graph(dataset, directed, edge_ids, weight): # ============================================================================= +# @pytest.mark.skipif( +# is_single_gpu(), reason="skipping MG testing on Single GPU system" +# ) @pytest.mark.mg -@pytest.mark.parametrize("dataset", DATASETS) -@pytest.mark.parametrize("directed", IS_DIRECTED) -@pytest.mark.parametrize("weighted", IS_WEIGHTED) -@pytest.mark.parametrize("edge_ids", INCLUDE_EDGE_IDS) -@pytest.mark.parametrize("normalized", IS_NORMALIZED) -@pytest.mark.parametrize("subset_size", SUBSET_SIZES) def test_dask_mg_edge_betweenness_centrality( - dask_client, - dataset, - directed, - weighted, - edge_ids, - normalized, - subset_size, - benchmark, + dask_client, benchmark, input_expected_output ): - g = get_sg_graph(dataset, directed, edge_ids) - - if g is None: - pytest.skip("Edge_ids not supported for undirected graph") - - dg, weight = get_mg_graph(dataset, directed, edge_ids, weighted) - subset_seed = 42 - - k = subset_size - if isinstance(k, int): - k = g.select_random_vertices(subset_seed, k) - - sg_cugraph_edge_bc = ( - cugraph.edge_betweenness_centrality(g, k, normalized) - .sort_values(["src", "dst"]) - .reset_index(drop=True) - ) - - if weight is not None: - with pytest.raises(NotImplementedError): + if input_expected_output is not None: + dg = input_expected_output["MGGraph"] + k = input_expected_output["k"] + normalized = input_expected_output["normalized"] + weight = input_expected_output["include_weights"] + if weight is not None: + with pytest.raises(NotImplementedError): + result_edge_bc = benchmark( + dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight + ) + + else: result_edge_bc = benchmark( dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight ) + result_edge_bc = ( + result_edge_bc.compute() + .sort_values(["src", "dst"]) + .reset_index(drop=True) + .rename(columns={"betweenness_centrality": "mg_betweenness_centrality"}) + ) - else: - result_edge_bc = benchmark( - dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight - ) - result_edge_bc = ( - result_edge_bc.compute() - .sort_values(["src", "dst"]) - .reset_index(drop=True) - .rename(columns={"betweenness_centrality": "mg_betweenness_centrality"}) - ) - - if len(result_edge_bc.columns) > 3: - result_edge_bc = result_edge_bc.rename(columns={"edge_id": "mg_edge_id"}) - - expected_output = sg_cugraph_edge_bc.reset_index(drop=True) - result_edge_bc["betweenness_centrality"] = expected_output[ - "betweenness_centrality" - ] - if len(expected_output.columns) > 3: - result_edge_bc["edge_id"] = expected_output["edge_id"] - edge_id_diff = result_edge_bc.query("mg_edge_id != edge_id") - assert len(edge_id_diff) == 0 + if len(result_edge_bc.columns) > 3: + result_edge_bc = result_edge_bc.rename( + columns={"edge_id": "mg_edge_id"} + ) - edge_bc_diffs1 = result_edge_bc.query( - "mg_betweenness_centrality - betweenness_centrality > 0.01" - ) - edge_bc_diffs2 = result_edge_bc.query( - "betweenness_centrality - mg_betweenness_centrality < -0.01" - ) + expected_output = input_expected_output["sg_cugraph_results"].reset_index( + drop=True + ) + result_edge_bc["betweenness_centrality"] = expected_output[ + "betweenness_centrality" + ] + if len(expected_output.columns) > 3: + result_edge_bc["edge_id"] = expected_output["edge_id"] + edge_id_diff = result_edge_bc.query("mg_edge_id != edge_id") + assert len(edge_id_diff) == 0 + + edge_bc_diffs1 = result_edge_bc.query( + "mg_betweenness_centrality - betweenness_centrality > 0.01" + ) + edge_bc_diffs2 = result_edge_bc.query( + "betweenness_centrality - mg_betweenness_centrality < -0.01" + ) - assert len(edge_bc_diffs1) == 0 - assert len(edge_bc_diffs2) == 0 + assert len(edge_bc_diffs1) == 0 + assert len(edge_bc_diffs2) == 0 From c9d0e3047ee95b8492ef21e3ce4ed783a3ea30b7 Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Mon, 11 Mar 2024 12:48:45 -0700 Subject: [PATCH 09/20] Add back changes except edge_betweenness_centrality --- .../test_batch_betweenness_centrality_mg.py | 41 ++-- ...st_batch_edge_betweenness_centrality_mg.py | 38 ++-- .../test_betweenness_centrality_mg.py | 177 +++++++----------- .../centrality/test_degree_centrality_mg.py | 75 ++++---- 4 files changed, 149 insertions(+), 182 deletions(-) diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py index 9d858919786..7050d0c9e55 100644 --- a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py @@ -24,44 +24,49 @@ compare_scores, ) -DIRECTED_GRAPH_OPTIONS = [False, True] -WEIGHTED_GRAPH_OPTIONS = [False, True] -ENDPOINTS_OPTIONS = [False, True] -NORMALIZED_OPTIONS = [False, True] -DEFAULT_EPSILON = 0.0001 -SUBSET_SIZE_OPTIONS = [4, None] -SUBSET_SEED_OPTIONS = [42] - # ============================================================================= # Parameters # ============================================================================= -DATASETS = [karate] -# FIXME: The "preset_gpu_count" from 21.08 and below are currently not -# supported and have been removed -RESULT_DTYPE_OPTIONS = [np.float64] + +DATASETS = [karate] +DEFAULT_EPSILON = 0.0001 +IS_DIRECTED = [False, True] +ENDPOINTS = [False, True] +IS_NORMALIZED = [False, True] +RESULT_DTYPES = [np.float64] +SUBSET_SIZES = [4, None] +SUBSET_SEEDS = [42] +IS_WEIGHTED = [False, True] # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() +# ============================================================================= +# Tests +# ============================================================================= + + @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") @pytest.mark.parametrize( "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS] ) -@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) -@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) -@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) +@pytest.mark.parametrize("directed", IS_DIRECTED) +@pytest.mark.parametrize("subset_size", SUBSET_SIZES) +@pytest.mark.parametrize("normalized", IS_NORMALIZED) @pytest.mark.parametrize("weight", [None]) -@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) -@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) -@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +@pytest.mark.parametrize("endpoints", ENDPOINTS) +@pytest.mark.parametrize("subset_seed", SUBSET_SEEDS) +@pytest.mark.parametrize("result_dtype", RESULT_DTYPES) def test_mg_betweenness_centrality( graph_file, directed, diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py index eb1dec7e5bd..48364a4a79a 100644 --- a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py @@ -19,49 +19,49 @@ from cugraph.dask.common.mg_utils import is_single_gpu from cugraph.datasets import karate, netscience -# Get parameters from standard betwenness_centrality_test -# As tests directory is not a module, we need to add it to the path -# FIXME: Test must be reworked to import from 'cugraph.testing' instead of -# importing from other tests -from test_edge_betweenness_centrality import ( - DIRECTED_GRAPH_OPTIONS, - NORMALIZED_OPTIONS, - DEFAULT_EPSILON, - SUBSET_SIZE_OPTIONS, -) - from test_edge_betweenness_centrality import ( calc_edge_betweenness_centrality, compare_scores, ) + # ============================================================================= # Parameters # ============================================================================= -DATASETS = [karate, netscience] -# FIXME: The "preset_gpu_count" from 21.08 and below are not supported and have -# been removed -RESULT_DTYPE_OPTIONS = [np.float32, np.float64] + +DATASETS = [karate, netscience] +IS_DIRECTED = [True, False] +IS_NORMALIZED = [True, False] +DEFAULT_EPSILON = 0.0001 +SUBSET_SIZES = [4, None] +RESULT_DTYPES = [np.float32, np.float64] # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() +# ============================================================================= +# Tests +# ============================================================================= + + # FIXME: Fails for directed = False(bc score twice as much) and normalized = True. @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") @pytest.mark.parametrize( "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS] ) -@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) -@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) -@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) -@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +@pytest.mark.parametrize("directed", IS_DIRECTED) +@pytest.mark.parametrize("subset_size", SUBSET_SIZES) +@pytest.mark.parametrize("normalized", IS_NORMALIZED) +@pytest.mark.parametrize("result_dtype", RESULT_DTYPES) def test_mg_edge_betweenness_centrality( graph_file, directed, diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py index 60d344eb0fd..48fbe796bb4 100644 --- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py @@ -15,13 +15,11 @@ import pytest -import dask_cudf import cupy import cudf import cugraph import cugraph.dask as dcg -from cugraph.testing import utils -from pylibcugraph.testing import gen_fixture_params_product +from cugraph.datasets import karate, dolphins # ============================================================================= @@ -33,145 +31,102 @@ def setup_function(): gc.collect() -IS_DIRECTED = [True, False] +# ============================================================================= +# Parameters +# ============================================================================= +DATASETS = [karate, dolphins] +IS_DIRECTED = [True, False] +IS_NORMALIZED = [True, False] +ENDPOINTS = [True, False] +SUBSET_SEEDS = [42, None] +SUBSET_SIZES = [None, 15] +VERTEX_LIST_TYPES = [list, cudf] # ============================================================================= -# Pytest fixtures +# Helper functions # ============================================================================= -datasets = utils.DATASETS_UNDIRECTED - -fixture_params = gen_fixture_params_product( - (datasets, "graph_file"), - ([False, True], "normalized"), - ([False, True], "endpoints"), - ([42, None], "subset_seed"), - ([None, 15], "subset_size"), - (IS_DIRECTED, "directed"), - ([list, cudf], "vertex_list_type"), -) - - -@pytest.fixture(scope="module", params=fixture_params) -def input_combo(request): - """ - Simply return the current combination of params as a dictionary for use in - tests or other parameterized fixtures. - """ - parameters = dict( - zip( - ( - "graph_file", - "normalized", - "endpoints", - "subset_seed", - "subset_size", - "directed", - "vertex_list_type", - ), - request.param, - ) + +def get_sg_graph(dataset, directed): + G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) + + return G + + +def get_mg_graph(dataset, directed): + ddf = dataset.get_dask_edgelist() + dg = cugraph.Graph(directed=directed) + dg.from_dask_cudf_edgelist( + ddf, + source="src", + destination="dst", + edge_attr="wgt", + renumber=True, + store_transposed=True, ) - return parameters + return dg -@pytest.fixture(scope="module") -def input_expected_output(input_combo): - """ - This fixture returns the inputs and expected results from the - betweenness_centrality algo based on cuGraph betweenness_centrality) which can - be used for validation. - """ +# ============================================================================= +# Tests +# ============================================================================= - input_data_path = input_combo["graph_file"] - normalized = input_combo["normalized"] - endpoints = input_combo["endpoints"] - random_state = input_combo["subset_seed"] - subset_size = input_combo["subset_size"] - directed = input_combo["directed"] - vertex_list_type = input_combo["vertex_list_type"] - G = utils.generate_cugraph_graph_from_file(input_data_path, directed=directed) +@pytest.mark.mg +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("directed", IS_DIRECTED) +@pytest.mark.parametrize("normalized", IS_NORMALIZED) +@pytest.mark.parametrize("endpoint", ENDPOINTS) +@pytest.mark.parametrize("subset_seed", SUBSET_SEEDS) +@pytest.mark.parametrize("subset_size", SUBSET_SIZES) +@pytest.mark.parametrize("v_list_type", VERTEX_LIST_TYPES) +def test_dask_mg_betweenness_centrality( + dataset, + directed, + normalized, + endpoint, + subset_seed, + subset_size, + v_list_type, + dask_client, + benchmark, +): + g = get_sg_graph(dataset, directed) + dataset.unload() + dg = get_mg_graph(dataset, directed) + random_state = subset_seed if subset_size is None: k = subset_size elif isinstance(subset_size, int): # Select random vertices - k = G.select_random_vertices( + k = g.select_random_vertices( random_state=random_state, num_vertices=subset_size ) - if vertex_list_type is list: + if v_list_type is list: k = k.to_arrow().to_pylist() print("the seeds are \n", k) - if vertex_list_type is int: + if v_list_type is int: # This internally sample k vertices in betweenness centrality. # Since the nodes that will be sampled by each implementation will # be random, therefore sample all vertices which will make the test # consistent. - k = len(G.nodes()) - - input_combo["k"] = k + k = len(g.nodes()) sg_cugraph_bc = cugraph.betweenness_centrality( - G, k=k, normalized=normalized, endpoints=endpoints, random_state=random_state + g, k=k, normalized=normalized, endpoints=endpoint, random_state=random_state ) - # Save the results back to the input_combo dictionary to prevent redundant - # cuGraph runs. Other tests using the input_combo fixture will look for - # them, and if not present they will have to re-run the same cuGraph call. sg_cugraph_bc = sg_cugraph_bc.sort_values("vertex").reset_index(drop=True) - input_combo["sg_cugraph_results"] = sg_cugraph_bc - chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg = cugraph.Graph(directed=directed) - dg.from_dask_cudf_edgelist( - ddf, - source="src", - destination="dst", - edge_attr="value", - renumber=True, - store_transposed=True, - ) - - input_combo["MGGraph"] = dg - - return input_combo - - -# ============================================================================= -# Tests -# ============================================================================= - - -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) - - -@pytest.mark.mg -def test_dask_mg_betweenness_centrality(dask_client, benchmark, input_expected_output): - - dg = input_expected_output["MGGraph"] - k = input_expected_output["k"] - endpoints = input_expected_output["endpoints"] - normalized = input_expected_output["normalized"] - random_state = input_expected_output["subset_seed"] mg_bc_results = benchmark( dcg.betweenness_centrality, dg, k=k, normalized=normalized, - endpoints=endpoints, + endpoints=endpoint, random_state=random_state, ) @@ -179,11 +134,9 @@ def test_dask_mg_betweenness_centrality(dask_client, benchmark, input_expected_o mg_bc_results.compute().sort_values("vertex").reset_index(drop=True) )["betweenness_centrality"].to_cupy() - sg_bc_results = ( - input_expected_output["sg_cugraph_results"] - .sort_values("vertex") - .reset_index(drop=True) - )["betweenness_centrality"].to_cupy() + sg_bc_results = (sg_cugraph_bc.sort_values("vertex").reset_index(drop=True))[ + "betweenness_centrality" + ].to_cupy() diff = cupy.isclose(mg_bc_results, sg_bc_results) diff --git a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py index 18a9941ab06..8606649c745 100644 --- a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py @@ -15,13 +15,12 @@ import pytest -import cudf -import dask_cudf import cugraph from cugraph.dask.common.mg_utils import is_single_gpu -from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH +from cugraph.datasets import karate_asymmetric, polbooks, email_Eu_core from cudf.testing import assert_series_equal + # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= @@ -31,45 +30,55 @@ def setup_function(): gc.collect() +# ============================================================================= +# Parameters +# ============================================================================= + + +DATASETS = [karate_asymmetric, polbooks, email_Eu_core] IS_DIRECTED = [True, False] -DATA_PATH = [ - (RAPIDS_DATASET_ROOT_DIR_PATH / "karate-asymmetric.csv").as_posix(), - (RAPIDS_DATASET_ROOT_DIR_PATH / "polbooks.csv").as_posix(), - (RAPIDS_DATASET_ROOT_DIR_PATH / "email-Eu-core.csv").as_posix(), -] +# ============================================================================= +# Helper functions +# ============================================================================= -@pytest.mark.mg -@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") -@pytest.mark.parametrize("directed", IS_DIRECTED) -@pytest.mark.parametrize("data_file", DATA_PATH) -def test_dask_mg_degree(dask_client, directed, data_file): - - input_data_path = data_file - chunksize = cugraph.dask.get_chunksize(input_data_path) - - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - df = cudf.read_csv( - input_data_path, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) +def get_sg_graph(dataset, directed): + G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) + + return G + +def get_mg_graph(dataset, directed): + ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=directed) - dg.from_dask_cudf_edgelist(ddf, "src", "dst") + dg.from_dask_cudf_edgelist( + ddf, + source="src", + destination="dst", + edge_attr="wgt", + renumber=True, + store_transposed=True, + ) + + return dg + + +# ============================================================================= +# Tests +# ============================================================================= + + +@pytest.mark.mg +@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("directed", IS_DIRECTED) +def test_dask_mg_degree(dask_client, dataset, directed): + dg = get_mg_graph(dataset, directed) dg.compute_renumber_edge_list() - g = cugraph.Graph(directed=directed) - g.from_cudf_edgelist(df, "src", "dst") + g = get_sg_graph(dataset, directed) merge_df_in_degree = ( dg.in_degree() From 2ad3aeb2e66a668d93a18a3f0fe8d7e3e6bed40a Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Wed, 13 Mar 2024 12:06:14 -0700 Subject: [PATCH 10/20] Add call to deload internal dataset edge list --- .../test_batch_betweenness_centrality_mg.py | 11 +- ...st_batch_edge_betweenness_centrality_mg.py | 10 +- .../test_betweenness_centrality_mg.py | 5 + .../centrality/test_degree_centrality_mg.py | 5 + .../test_edge_betweenness_centrality_mg.py | 229 +++++++----------- .../test_eigenvector_centrality_mg.py | 50 ++-- .../centrality/test_katz_centrality_mg.py | 71 +++--- 7 files changed, 170 insertions(+), 211 deletions(-) diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py index 7050d0c9e55..1c73ebb0216 100644 --- a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py @@ -57,9 +57,7 @@ def setup_function(): @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") -@pytest.mark.parametrize( - "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS] -) +@pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("directed", IS_DIRECTED) @pytest.mark.parametrize("subset_size", SUBSET_SIZES) @pytest.mark.parametrize("normalized", IS_NORMALIZED) @@ -68,7 +66,7 @@ def setup_function(): @pytest.mark.parametrize("subset_seed", SUBSET_SEEDS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPES) def test_mg_betweenness_centrality( - graph_file, + dataset, directed, subset_size, normalized, @@ -79,7 +77,7 @@ def test_mg_betweenness_centrality( dask_client, ): sorted_df = calc_betweenness_centrality( - graph_file, + dataset, directed=directed, normalized=normalized, k=subset_size, @@ -95,3 +93,6 @@ def test_mg_betweenness_centrality( second_key="ref_bc", epsilon=DEFAULT_EPSILON, ) + + # Clean-up stored dataset edge-lists + dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py index 48364a4a79a..4530dd3da86 100644 --- a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py @@ -55,15 +55,13 @@ def setup_function(): # FIXME: Fails for directed = False(bc score twice as much) and normalized = True. @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") -@pytest.mark.parametrize( - "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS] -) +@pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("directed", IS_DIRECTED) @pytest.mark.parametrize("subset_size", SUBSET_SIZES) @pytest.mark.parametrize("normalized", IS_NORMALIZED) @pytest.mark.parametrize("result_dtype", RESULT_DTYPES) def test_mg_edge_betweenness_centrality( - graph_file, + dataset, directed, subset_size, normalized, @@ -71,7 +69,7 @@ def test_mg_edge_betweenness_centrality( dask_client, ): sorted_df = calc_edge_betweenness_centrality( - graph_file, + dataset, directed=directed, normalized=normalized, k=subset_size, @@ -86,3 +84,5 @@ def test_mg_edge_betweenness_centrality( second_key="ref_bc", epsilon=DEFAULT_EPSILON, ) + # Clean-up stored dataset edge-lists + dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py index 48fbe796bb4..c94c2dcaff6 100644 --- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py @@ -49,12 +49,14 @@ def setup_function(): def get_sg_graph(dataset, directed): + dataset.unload() G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) return G def get_mg_graph(dataset, directed): + dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=directed) dg.from_dask_cudf_edgelist( @@ -141,3 +143,6 @@ def test_dask_mg_betweenness_centrality( diff = cupy.isclose(mg_bc_results, sg_bc_results) assert diff.all() + + # Clean-up stored dataset edge-lists + dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py index 8606649c745..68daff9238c 100644 --- a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py @@ -45,12 +45,14 @@ def setup_function(): def get_sg_graph(dataset, directed): + dataset.unload() G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) return G def get_mg_graph(dataset, directed): + dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=directed) dg.from_dask_cudf_edgelist( @@ -116,3 +118,6 @@ def test_dask_mg_degree(dask_client, dataset, directed): check_names=False, check_dtype=False, ) + + # Clean-up stored dataset edge-lists + dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py index 6c066a947ac..c3a559da5c9 100644 --- a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py @@ -14,14 +14,9 @@ import gc import pytest -import dask_cudf -from pylibcugraph.testing.utils import gen_fixture_params_product -from cugraph.datasets import karate, dolphins - import cugraph import cugraph.dask as dcg - -# from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.datasets import karate, dolphins # ============================================================================= @@ -33,79 +28,34 @@ def setup_function(): gc.collect() -IS_DIRECTED = [True, False] -INCLUDE_WEIGHTS = [False, True] -INCLUDE_EDGE_IDS = [False, True] -NORMALIZED_OPTIONS = [False, True] -SUBSET_SIZE_OPTIONS = [4, None] - - -# email_Eu_core is too expensive to test -datasets = [karate, dolphins] - - # ============================================================================= -# Pytest fixtures +# Parameters # ============================================================================= -fixture_params = gen_fixture_params_product( - (datasets, "graph_file"), - (IS_DIRECTED, "directed"), - (INCLUDE_WEIGHTS, "include_weights"), - (INCLUDE_EDGE_IDS, "include_edgeids"), - (NORMALIZED_OPTIONS, "normalized"), - (SUBSET_SIZE_OPTIONS, "subset_size"), -) - - -@pytest.fixture(scope="module", params=fixture_params) -def input_combo(request): - """ - Simply return the current combination of params as a dictionary for use in - tests or other parameterized fixtures. - """ - parameters = dict( - zip( - ( - "graph_file", - "directed", - "include_weights", - "include_edge_ids", - "normalized", - "subset_size", - "subset_seed", - ), - request.param, - ) - ) +DATASETS = [karate, dolphins] +IS_DIRECTED = [True, False] +IS_WEIGHTED = [True, False] +INCLUDE_EDGE_IDS = [True, False] +IS_NORMALIZED = [True, False] +SUBSET_SIZES = [4, None] - return parameters +# ============================================================================= +# Helper functions +# ============================================================================= -@pytest.fixture(scope="module") -def input_expected_output(input_combo): - """ - This fixture returns the inputs and expected results from the edge - betweenness centrality algo. - (based on cuGraph edge betweenness centrality) which can be used - for validation. - """ - directed = input_combo["directed"] - normalized = input_combo["normalized"] - k = input_combo["subset_size"] - subset_seed = 42 - edge_ids = input_combo["include_edge_ids"] - weight = input_combo["include_weights"] - df = input_combo["graph_file"].get_edgelist() +def get_sg_graph(dataset, directed, edge_ids): + dataset.unload() + df = dataset.get_edgelist() if edge_ids: if not directed: # Edge ids not supported for undirected graph - return - dtype = df.dtypes[0] + return None + dtype = df.dtypes.iloc[0] edge_id = "edge_id" - df["edge_id"] = df.index + df[edge_id] = df.index df = df.astype(dtype) else: @@ -115,30 +65,13 @@ def input_expected_output(input_combo): G.from_cudf_edgelist( df, source="src", destination="dst", weight="wgt", edge_id=edge_id ) - if isinstance(k, int): - k = G.select_random_vertices(subset_seed, k) - input_combo["k"] = k - # Save the results back to the input_combo dictionary to prevent redundant - # cuGraph runs. Other tests using the input_combo fixture will look for - # them, and if not present they will have to re-run the same cuGraph call. - sg_cugraph_edge_bc = ( - cugraph.edge_betweenness_centrality(G, k, normalized) - .sort_values(["src", "dst"]) - .reset_index(drop=True) - ) + return G - input_data_path = input_combo["graph_file"].get_path() - input_combo["sg_cugraph_results"] = sg_cugraph_edge_bc - chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) +def get_mg_graph(dataset, directed, edge_ids, weight): + dataset.unload() + ddf = dataset.get_dask_edgelist() if weight: weight = ddf @@ -154,20 +87,16 @@ def input_expected_output(input_combo): edge_id = None dg = cugraph.Graph(directed=directed) - dg.from_dask_cudf_edgelist( ddf, source="src", destination="dst", - weight="value", + weight="wgt", edge_id=edge_id, renumber=True, ) - input_combo["MGGraph"] = dg - input_combo["include_weights"] = weight - - return input_combo + return dg, weight # ============================================================================= @@ -175,57 +104,79 @@ def input_expected_output(input_combo): # ============================================================================= -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) @pytest.mark.mg +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("directed", IS_DIRECTED) +@pytest.mark.parametrize("weighted", IS_WEIGHTED) +@pytest.mark.parametrize("edge_ids", INCLUDE_EDGE_IDS) +@pytest.mark.parametrize("normalized", IS_NORMALIZED) +@pytest.mark.parametrize("subset_size", SUBSET_SIZES) def test_dask_mg_edge_betweenness_centrality( - dask_client, benchmark, input_expected_output + dask_client, + dataset, + directed, + weighted, + edge_ids, + normalized, + subset_size, + benchmark, ): - if input_expected_output is not None: - dg = input_expected_output["MGGraph"] - k = input_expected_output["k"] - normalized = input_expected_output["normalized"] - weight = input_expected_output["include_weights"] - if weight is not None: - with pytest.raises(NotImplementedError): - result_edge_bc = benchmark( - dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight - ) - - else: + g = get_sg_graph(dataset, directed, edge_ids) + + if g is None: + pytest.skip("Edge_ids not supported for undirected graph") + + dg, weight = get_mg_graph(dataset, directed, edge_ids, weighted) + subset_seed = 42 + + k = subset_size + if isinstance(k, int): + k = g.select_random_vertices(subset_seed, k) + + sg_cugraph_edge_bc = ( + cugraph.edge_betweenness_centrality(g, k, normalized) + .sort_values(["src", "dst"]) + .reset_index(drop=True) + ) + + if weight is not None: + with pytest.raises(NotImplementedError): result_edge_bc = benchmark( dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight ) - result_edge_bc = ( - result_edge_bc.compute() - .sort_values(["src", "dst"]) - .reset_index(drop=True) - .rename(columns={"betweenness_centrality": "mg_betweenness_centrality"}) - ) - if len(result_edge_bc.columns) > 3: - result_edge_bc = result_edge_bc.rename( - columns={"edge_id": "mg_edge_id"} - ) + else: + result_edge_bc = benchmark( + dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight + ) + result_edge_bc = ( + result_edge_bc.compute() + .sort_values(["src", "dst"]) + .reset_index(drop=True) + .rename(columns={"betweenness_centrality": "mg_betweenness_centrality"}) + ) - expected_output = input_expected_output["sg_cugraph_results"].reset_index( - drop=True - ) - result_edge_bc["betweenness_centrality"] = expected_output[ - "betweenness_centrality" - ] - if len(expected_output.columns) > 3: - result_edge_bc["edge_id"] = expected_output["edge_id"] - edge_id_diff = result_edge_bc.query("mg_edge_id != edge_id") - assert len(edge_id_diff) == 0 - - edge_bc_diffs1 = result_edge_bc.query( - "mg_betweenness_centrality - betweenness_centrality > 0.01" - ) - edge_bc_diffs2 = result_edge_bc.query( - "betweenness_centrality - mg_betweenness_centrality < -0.01" - ) + if len(result_edge_bc.columns) > 3: + result_edge_bc = result_edge_bc.rename(columns={"edge_id": "mg_edge_id"}) + + expected_output = sg_cugraph_edge_bc.reset_index(drop=True) + result_edge_bc["betweenness_centrality"] = expected_output[ + "betweenness_centrality" + ] + if len(expected_output.columns) > 3: + result_edge_bc["edge_id"] = expected_output["edge_id"] + edge_id_diff = result_edge_bc.query("mg_edge_id != edge_id") + assert len(edge_id_diff) == 0 + + edge_bc_diffs1 = result_edge_bc.query( + "mg_betweenness_centrality - betweenness_centrality > 0.01" + ) + edge_bc_diffs2 = result_edge_bc.query( + "betweenness_centrality - mg_betweenness_centrality < -0.01" + ) + + assert len(edge_bc_diffs1) == 0 + assert len(edge_bc_diffs2) == 0 - assert len(edge_bc_diffs1) == 0 - assert len(edge_bc_diffs2) == 0 + # Clean-up stored dataset edge-lists + dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py index e2ce7d2c341..60d39273777 100644 --- a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,11 +16,10 @@ import pytest import cudf -import dask_cudf import cugraph import cugraph.dask as dcg from cugraph.dask.common.mg_utils import is_single_gpu -from cugraph.testing.utils import DATASETS +from cugraph.datasets import karate_disjoint, dolphins, netscience # ============================================================================= @@ -32,28 +31,33 @@ def setup_function(): gc.collect() +# ============================================================================= +# Parameters +# ============================================================================= + + +DATASETS = [karate_disjoint, dolphins, netscience] IS_DIRECTED = [True, False] +# ============================================================================= +# Tests +# ============================================================================= + + @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("directed", IS_DIRECTED) -@pytest.mark.parametrize("input_data_path", DATASETS) -def test_dask_mg_eigenvector_centrality(dask_client, directed, input_data_path): - input_data_path = input_data_path.as_posix() +def test_dask_mg_eigenvector_centrality(dask_client, dataset, directed): + input_data_path = dataset.get_path() print(f"dataset={input_data_path}") - chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) + ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True) mg_res = dcg.eigenvector_centrality(dg, tol=1e-6) mg_res = mg_res.compute() + import networkx as nx from cugraph.testing import utils @@ -84,20 +88,15 @@ def test_dask_mg_eigenvector_centrality(dask_client, directed, input_data_path): err = err + 1 assert err == 0 + # Clean-up stored dataset edge-lists + dataset.unload() + @pytest.mark.mg def test_dask_mg_eigenvector_centrality_transposed_false(dask_client): - input_data_path = DATASETS[0] + dataset = DATASETS[0] - chunksize = dcg.get_chunksize(input_data_path) - - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) + ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=False) @@ -110,3 +109,6 @@ def test_dask_mg_eigenvector_centrality_transposed_false(dask_client): with pytest.warns(UserWarning, match=warning_msg): dcg.eigenvector_centrality(dg) + + # Clean-up stored dataset edge-lists + dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py index 72b81ce50bb..d1a899eba06 100644 --- a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,11 +16,10 @@ import pytest import cudf -import dask_cudf import cugraph import cugraph.dask as dcg from cugraph.dask.common.mg_utils import is_single_gpu -from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH +from cugraph.datasets import karate # ============================================================================= @@ -32,25 +31,30 @@ def setup_function(): gc.collect() +# ============================================================================= +# Parameters +# ============================================================================= + + +DATASETS = [karate] IS_DIRECTED = [True, False] +# ============================================================================= +# Tests +# ============================================================================= + + @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_mg_katz_centrality(dask_client, directed): +def test_dask_mg_katz_centrality(dask_client, dataset, directed): - input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix() + input_data_path = dataset.get_path() print(f"dataset={input_data_path}") - chunksize = dcg.get_chunksize(input_data_path) - - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) + + ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True) @@ -92,22 +96,16 @@ def test_dask_mg_katz_centrality(dask_client, directed): err = err + 1 assert err == 0 + # Clean-up stored dataset edge-lists + dataset.unload() + @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_mg_katz_centrality_nstart(dask_client, directed): - input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix() - print(f"dataset={input_data_path}") - chunksize = dcg.get_chunksize(input_data_path) - - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) +def test_dask_mg_katz_centrality_nstart(dask_client, dataset, directed): + ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True) @@ -139,20 +137,14 @@ def test_dask_mg_katz_centrality_nstart(dask_client, directed): err = err + 1 assert err == 0 + # Clean-up stored dataset edge-lists + dataset.unload() -@pytest.mark.mg -def test_dask_mg_katz_centrality_transposed_false(dask_client): - input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix() - chunksize = dcg.get_chunksize(input_data_path) - - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) +@pytest.mark.mg +@pytest.mark.parametrize("dataset", DATASETS) +def test_dask_mg_katz_centrality_transposed_false(dask_client, dataset): + ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=False) @@ -165,3 +157,6 @@ def test_dask_mg_katz_centrality_transposed_false(dask_client): with pytest.warns(UserWarning, match=warning_msg): dcg.katz_centrality(dg) + + # Clean-up stored dataset edge-lists + dataset.unload() From 5b8000603ebc918ebe05e6d6893b7ee7365b3c2c Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Wed, 13 Mar 2024 14:43:10 -0700 Subject: [PATCH 11/20] Unload pre-existing internal DF --- .../cugraph/tests/centrality/test_eigenvector_centrality_mg.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py index 60d39273777..8cd77fb5e24 100644 --- a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py @@ -52,6 +52,7 @@ def setup_function(): def test_dask_mg_eigenvector_centrality(dask_client, dataset, directed): input_data_path = dataset.get_path() print(f"dataset={input_data_path}") + dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True) @@ -96,8 +97,8 @@ def test_dask_mg_eigenvector_centrality(dask_client, dataset, directed): def test_dask_mg_eigenvector_centrality_transposed_false(dask_client): dataset = DATASETS[0] + dataset.unload() ddf = dataset.get_dask_edgelist() - dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=False) From e62cad98b2a3874074cb936e80330b7392a78c63 Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Wed, 13 Mar 2024 14:44:18 -0700 Subject: [PATCH 12/20] Unload pre-existing internal DF --- .../cugraph/tests/centrality/test_katz_centrality_mg.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py index d1a899eba06..ebbe5974814 100644 --- a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py @@ -50,12 +50,11 @@ def setup_function(): @pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("directed", IS_DIRECTED) def test_dask_mg_katz_centrality(dask_client, dataset, directed): - input_data_path = dataset.get_path() print(f"dataset={input_data_path}") + dataset.unload() ddf = dataset.get_dask_edgelist() - dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True) @@ -105,8 +104,8 @@ def test_dask_mg_katz_centrality(dask_client, dataset, directed): @pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("directed", IS_DIRECTED) def test_dask_mg_katz_centrality_nstart(dask_client, dataset, directed): + dataset.unload() ddf = dataset.get_dask_edgelist() - dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True) @@ -144,8 +143,8 @@ def test_dask_mg_katz_centrality_nstart(dask_client, dataset, directed): @pytest.mark.mg @pytest.mark.parametrize("dataset", DATASETS) def test_dask_mg_katz_centrality_transposed_false(dask_client, dataset): + dataset.unload() ddf = dataset.get_dask_edgelist() - dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=False) From 945c866760b532e9c1d79f81257f7a5f9ec890a7 Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Fri, 15 Mar 2024 08:00:33 -0700 Subject: [PATCH 13/20] Update comms and community MG tests --- .../cugraph/tests/comms/test_comms_mg.py | 99 ++++++-------- .../community/test_induced_subgraph_mg.py | 20 ++- .../cugraph/tests/community/test_leiden_mg.py | 126 +++++------------- .../tests/community/test_louvain_mg.py | 125 ++++------------- .../tests/community/test_triangle_count_mg.py | 125 +++++++---------- 5 files changed, 156 insertions(+), 339 deletions(-) diff --git a/python/cugraph/cugraph/tests/comms/test_comms_mg.py b/python/cugraph/cugraph/tests/comms/test_comms_mg.py index 747ef935e01..18d4db2d77f 100644 --- a/python/cugraph/cugraph/tests/comms/test_comms_mg.py +++ b/python/cugraph/cugraph/tests/comms/test_comms_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,10 +16,9 @@ import pytest import cugraph.dask as dcg -import cudf -import dask_cudf import cugraph -from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH +from cugraph.datasets import karate, dolphins + # ============================================================================= # Pytest Setup / Teardown - called for each test function @@ -30,12 +29,37 @@ def setup_function(): gc.collect() +# ============================================================================= +# Parameters +# ============================================================================= + + +DATASETS = [karate, dolphins] IS_DIRECTED = [True, False] -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) +# ============================================================================= +# Helper Functions +# ============================================================================= + + +def get_pagerank_result(dataset, is_mg): + """Return the cugraph.pagerank result for an MG or SG graph""" + dataset.unload() + + if is_mg: + dg = dataset.get_dask_graph(store_transposed=True) + return dcg.pagerank(dg).compute() + else: + g = dataset.get_graph(store_transposed=True) + return cugraph.pagerank(g) + + +# ============================================================================= +# Tests +# ============================================================================= + + @pytest.mark.mg @pytest.mark.parametrize("directed", IS_DIRECTED) def test_dask_mg_pagerank(dask_client, directed): @@ -43,62 +67,17 @@ def test_dask_mg_pagerank(dask_client, directed): # Initialize and run pagerank on two distributed graphs # with same communicator - input_data_path1 = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix() + input_data_path1 = karate.get_path() print(f"dataset1={input_data_path1}") - chunksize1 = dcg.get_chunksize(input_data_path1) + result_pr1 = get_pagerank_result(karate, is_mg=True) - input_data_path2 = (RAPIDS_DATASET_ROOT_DIR_PATH / "dolphins.csv").as_posix() + input_data_path2 = dolphins.get_path() print(f"dataset2={input_data_path2}") - chunksize2 = dcg.get_chunksize(input_data_path2) - - ddf1 = dask_cudf.read_csv( - input_data_path1, - chunksize=chunksize1, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg1 = cugraph.Graph(directed=directed) - dg1.from_dask_cudf_edgelist(ddf1, "src", "dst") - - result_pr1 = dcg.pagerank(dg1).compute() - - ddf2 = dask_cudf.read_csv( - input_data_path2, - chunksize=chunksize2, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg2 = cugraph.Graph(directed=directed) - dg2.from_dask_cudf_edgelist(ddf2, "src", "dst") - - result_pr2 = dcg.pagerank(dg2).compute() + result_pr2 = get_pagerank_result(dolphins, is_mg=True) # Calculate single GPU pagerank for verification of results - df1 = cudf.read_csv( - input_data_path1, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - g1 = cugraph.Graph(directed=directed) - g1.from_cudf_edgelist(df1, "src", "dst") - expected_pr1 = cugraph.pagerank(g1) - - df2 = cudf.read_csv( - input_data_path2, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - g2 = cugraph.Graph(directed=directed) - g2.from_cudf_edgelist(df2, "src", "dst") - expected_pr2 = cugraph.pagerank(g2) + expected_pr1 = get_pagerank_result(karate, is_mg=False) + expected_pr2 = get_pagerank_result(dolphins, is_mg=False) # Compare and verify pagerank results @@ -134,3 +113,7 @@ def test_dask_mg_pagerank(dask_client, directed): err2 = err2 + 1 print("Mismatches in ", input_data_path2, ": ", err2) assert err1 == err2 == 0 + + # Clean-up stored dataset edge-lists + karate.unload() + dolphins.unload() diff --git a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py index 45ec8eca0e8..9e199840fbb 100644 --- a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py +++ b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py @@ -17,7 +17,6 @@ import cugraph import cugraph.dask as dcg -import dask_cudf from cudf.testing.testing import assert_frame_equal from cugraph.dask.common.mg_utils import is_single_gpu from cugraph.datasets import karate, dolphins, email_Eu_core @@ -36,32 +35,28 @@ def setup_function(): # Parameters # ============================================================================= + DATASETS = [karate, dolphins, email_Eu_core] IS_DIRECTED = [True, False] NUM_VERTICES = [2, 5, 10, 20] OFFSETS = [None] + # ============================================================================= # Helper functions # ============================================================================= def get_sg_graph(dataset, directed): + dataset.unload() G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) return G def get_mg_graph(dataset, directed): - input_data_path = dataset.get_path() - blocksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - blocksize=blocksize, - delimiter=dataset.metadata["delim"], - names=dataset.metadata["col_names"], - dtype=dataset.metadata["col_types"], - ) + dataset.unload() + ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=directed) dg.from_dask_cudf_edgelist( ddf, @@ -108,7 +103,7 @@ def test_mg_induced_subgraph( # FIXME: This parameter is not yet tested # mg_offsets = mg_offsets.compute().reset_index(drop=True) - mg_df, mg_offsets = result_induced_subgraph + mg_df, _ = result_induced_subgraph if mg_df is not None and sg_induced_subgraph is not None: # FIXME: 'edges()' or 'view_edgelist()' takes half the edges out if @@ -126,3 +121,6 @@ def test_mg_induced_subgraph( # of all the vertices and ensure that there is None assert sg_induced_subgraph is None assert mg_df is None + + # Clean-up stored dataset edge-lists + dataset.unload() diff --git a/python/cugraph/cugraph/tests/community/test_leiden_mg.py b/python/cugraph/cugraph/tests/community/test_leiden_mg.py index 69fccdae260..4ed7244fe29 100644 --- a/python/cugraph/cugraph/tests/community/test_leiden_mg.py +++ b/python/cugraph/cugraph/tests/community/test_leiden_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,123 +13,58 @@ import pytest - -import dask_cudf import cugraph import cugraph.dask as dcg -from cugraph.testing import utils - +from cugraph.datasets import karate_asymmetric, karate, dolphins -try: - from rapids_pytest_benchmark import setFixtureParamNames -except ImportError: - print( - "\n\nWARNING: rapids_pytest_benchmark is not installed, " - "falling back to pytest_benchmark fixtures.\n" - ) - # if rapids_pytest_benchmark is not available, just perfrom time-only - # benchmarking and replace the util functions with nops - import pytest_benchmark +# ============================================================================= +# Parameters +# ============================================================================= - gpubenchmark = pytest_benchmark.plugin.benchmark - def setFixtureParamNames(*args, **kwargs): - pass +DATASETS = [karate, dolphins] +DATASETS_ASYMMETRIC = [karate_asymmetric] # ============================================================================= -# Parameters +# Helper Functions # ============================================================================= -DATASETS_ASYMMETRIC = [utils.RAPIDS_DATASET_ROOT_DIR_PATH / "karate-asymmetric.csv"] - - -############################################################################### -# Fixtures -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) -@pytest.fixture( - scope="module", - params=DATASETS_ASYMMETRIC, - ids=[f"dataset={d.as_posix()}" for d in DATASETS_ASYMMETRIC], -) -def daskGraphFromDataset(request, dask_client): - """ - Returns a new dask dataframe created from the dataset file param. - This creates a directed Graph. - """ - # Since parameterized fixtures do not assign param names to param values, - # manually call the helper to do so. - setFixtureParamNames(request, ["dataset"]) - dataset = request.param - - chunksize = dcg.get_chunksize(dataset) - ddf = dask_cudf.read_csv( - dataset, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg = cugraph.Graph(directed=True) - dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value") - return dg -@pytest.fixture( - scope="module", - params=utils.DATASETS_UNDIRECTED, - ids=[f"dataset={d.as_posix()}" for d in utils.DATASETS_UNDIRECTED], -) -def uddaskGraphFromDataset(request, dask_client): - """ - Returns a new dask dataframe created from the dataset file param. - This creates an undirected Graph. - """ - # Since parameterized fixtures do not assign param names to param - # values, manually call the helper to do so. - setFixtureParamNames(request, ["dataset"]) - dataset = request.param - - chunksize = dcg.get_chunksize(dataset) - ddf = dask_cudf.read_csv( - dataset, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg = cugraph.Graph(directed=False) - dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value") +def get_mg_graph(dataset, directed): + """Returns an MG graph""" + ddf = dataset.get_dask_edgelist() + + dg = cugraph.Graph(directed=directed) + dg.from_dask_cudf_edgelist(ddf, "src", "dst", "wgt") return dg -############################################################################### +# ============================================================================= # Tests -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) +# ============================================================================= # FIXME: Implement more robust tests + + @pytest.mark.mg -def test_mg_leiden_with_edgevals_directed_graph(daskGraphFromDataset): +@pytest.mark.parametrize("dataset", DATASETS_ASYMMETRIC) +def test_mg_leiden_with_edgevals_directed_graph(dask_client, dataset): + dg = get_mg_graph(dataset, directed=True) # Directed graphs are not supported by Leiden and a ValueError should be # raised with pytest.raises(ValueError): - parts, mod = dcg.leiden(daskGraphFromDataset) + parts, mod = dcg.leiden(dg) + + # Clean-up stored dataset edge-lists + dataset.unload() -############################################################################### -# Tests -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) -# FIXME: Implement more robust tests @pytest.mark.mg -def test_mg_leiden_with_edgevals_undirected_graph(uddaskGraphFromDataset): - parts, mod = dcg.leiden(uddaskGraphFromDataset) +@pytest.mark.parametrize("dataset", DATASETS) +def test_mg_leiden_with_edgevals_undirected_graph(dask_client, dataset): + dg = get_mg_graph(dataset, directed=False) + parts, mod = dcg.leiden(dg) # FIXME: either call Nx with the same dataset and compare results, or # hardcode golden results to compare to. @@ -137,3 +72,6 @@ def test_mg_leiden_with_edgevals_undirected_graph(uddaskGraphFromDataset): print(parts.compute()) print(mod) print() + + # Clean-up stored dataset edge-lists + dataset.unload() diff --git a/python/cugraph/cugraph/tests/community/test_louvain_mg.py b/python/cugraph/cugraph/tests/community/test_louvain_mg.py index 5318262fe26..ce89f7f62a2 100644 --- a/python/cugraph/cugraph/tests/community/test_louvain_mg.py +++ b/python/cugraph/cugraph/tests/community/test_louvain_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -14,122 +14,44 @@ import pytest import cugraph.dask as dcg +from cugraph.datasets import karate_asymmetric, karate, dolphins -import cugraph -import dask_cudf -from cugraph.testing import utils +from test_leiden_mg import get_mg_graph -try: - from rapids_pytest_benchmark import setFixtureParamNames -except ImportError: - print( - "\n\nWARNING: rapids_pytest_benchmark is not installed, " - "falling back to pytest_benchmark fixtures.\n" - ) - - # if rapids_pytest_benchmark is not available, just perfrom time-only - # benchmarking and replace the util functions with nops - import pytest_benchmark +# ============================================================================= +# Parameters +# ============================================================================= - gpubenchmark = pytest_benchmark.plugin.benchmark - def setFixtureParamNames(*args, **kwargs): - pass +DATASETS_ASYMMETRIC = DATASETS_ASYMMETRIC = [karate_asymmetric] +DATASETS = [karate, dolphins] # ============================================================================= -# Parameters -# ============================================================================= -DATASETS_ASYMMETRIC = [utils.RAPIDS_DATASET_ROOT_DIR_PATH / "karate-asymmetric.csv"] - - -############################################################################### -# Fixtures -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) -@pytest.fixture( - scope="module", - params=DATASETS_ASYMMETRIC, - ids=[f"dataset={d.as_posix()}" for d in DATASETS_ASYMMETRIC], -) -def daskGraphFromDataset(request, dask_client): - """ - Returns a new dask dataframe created from the dataset file param. - This creates a directed Graph. - """ - # Since parameterized fixtures do not assign param names to param values, - # manually call the helper to do so. - setFixtureParamNames(request, ["dataset"]) - dataset = request.param - - chunksize = dcg.get_chunksize(dataset) - ddf = dask_cudf.read_csv( - dataset, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg = cugraph.Graph(directed=True) - dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value") - return dg - - -@pytest.fixture( - scope="module", - params=utils.DATASETS_UNDIRECTED, - ids=[f"dataset={d.as_posix()}" for d in utils.DATASETS_UNDIRECTED], -) -def uddaskGraphFromDataset(request, dask_client): - """ - Returns a new dask dataframe created from the dataset file param. - This creates an undirected Graph. - """ - # Since parameterized fixtures do not assign param names to param - # values, manually call the helper to do so. - setFixtureParamNames(request, ["dataset"]) - dataset = request.param - - chunksize = dcg.get_chunksize(dataset) - ddf = dask_cudf.read_csv( - dataset, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg = cugraph.Graph(directed=False) - dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value") - return dg - - -############################################################################### # Tests -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) +# ============================================================================= # FIXME: Implement more robust tests + + @pytest.mark.mg -def test_mg_louvain_with_edgevals_directed_graph(daskGraphFromDataset): +@pytest.mark.parametrize("dataset", DATASETS_ASYMMETRIC) +def test_mg_louvain_with_edgevals_directed_graph(dask_client, dataset): + dg = get_mg_graph(dataset, directed=True) # Directed graphs are not supported by Louvain and a ValueError should be # raised with pytest.raises(ValueError): - parts, mod = dcg.louvain(daskGraphFromDataset) + parts, mod = dcg.louvain(dg) + + # Clean-up stored dataset edge-lists + dataset.unload() -############################################################################### -# Tests -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) -# FIXME: Implement more robust tests @pytest.mark.mg -def test_mg_louvain_with_edgevals_undirected_graph(uddaskGraphFromDataset): - parts, mod = dcg.louvain(uddaskGraphFromDataset) +@pytest.mark.parametrize("dataset", DATASETS) +def test_mg_louvain_with_edgevals_undirected_graph(dask_client, dataset): + dg = get_mg_graph(dataset, directed=False) + parts, mod = dcg.louvain(dg) # FIXME: either call Nx with the same dataset and compare results, or # hardcode golden results to compare to. @@ -137,3 +59,6 @@ def test_mg_louvain_with_edgevals_undirected_graph(uddaskGraphFromDataset): print(parts.compute()) print(mod) print() + + # Clean-up stored dataset edge-lists + dataset.unload() diff --git a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py index 0f7bb14581f..02723d75527 100644 --- a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py +++ b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,115 +16,85 @@ import random import pytest -import cudf -import dask_cudf import cugraph import cugraph.dask as dcg -from cugraph.testing import utils -from pylibcugraph.testing.utils import gen_fixture_params_product +from cugraph.datasets import karate, dolphins # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() # ============================================================================= -# Pytest fixtures +# Parameters # ============================================================================= -datasets = utils.DATASETS_UNDIRECTED -fixture_params = gen_fixture_params_product( - (datasets, "graph_file"), - ([True, False], "start_list"), -) - - -@pytest.fixture(scope="module", params=fixture_params) -def input_combo(request): - """ - Simply return the current combination of params as a dictionary for use in - tests or other parameterized fixtures. - """ - parameters = dict(zip(("graph_file", "start_list", "edgevals"), request.param)) - - return parameters - - -@pytest.fixture(scope="module") -def input_expected_output(dask_client, input_combo): - """ - This fixture returns the inputs and expected results from the triangle - count algo. - """ - start_list = input_combo["start_list"] - input_data_path = input_combo["graph_file"] - G = utils.generate_cugraph_graph_from_file( - input_data_path, directed=False, edgevals=True - ) - input_combo["SGGraph"] = G - if start_list: +DATASETS = [karate, dolphins] +START_LIST = [True, False] + + +# ============================================================================= +# Helper Functions +# ============================================================================= + + +def get_sg_graph(dataset, directed, start): + dataset.unload() + G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) + if start: # sample k nodes from the cuGraph graph - k = random.randint(1, 10) - srcs = G.view_edge_list()[G.source_columns] - dsts = G.view_edge_list()[G.destination_columns] - nodes = cudf.concat([srcs, dsts]).drop_duplicates() - start_list = nodes.sample(k) + start = G.select_random_vertices(num_vertices=random.randint(1, 10)) else: - start_list = None + start = None - sg_triangle_results = cugraph.triangle_count(G, start_list) - sg_triangle_results = sg_triangle_results.sort_values("vertex").reset_index( - drop=True - ) + return G, start - input_combo["sg_triangle_results"] = sg_triangle_results - input_combo["start_list"] = start_list - - # Creating an edgelist from a dask cudf dataframe - chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - dg = cugraph.Graph(directed=False) +def get_mg_graph(dataset, directed): + dataset.unload() + ddf = dataset.get_dask_edgelist() + dg = cugraph.Graph(directed=directed) dg.from_dask_cudf_edgelist( - ddf, source="src", destination="dst", edge_attr="value", renumber=True + ddf, source="src", destination="dst", edge_attr="wgt", renumber=True ) - input_combo["MGGraph"] = dg - - return input_combo + return dg # ============================================================================= # Tests # ============================================================================= + + @pytest.mark.mg -def test_sg_triangles(dask_client, benchmark, input_expected_output): +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("start", START_LIST) +def test_sg_triangles(dask_client, dataset, start, benchmark): # This test is only for benchmark purposes. sg_triangle_results = None - G = input_expected_output["SGGraph"] - start_list = input_expected_output["start_list"] - sg_triangle_results = benchmark(cugraph.triangle_count, G, start_list) + G, start = get_sg_graph(dataset, False, start) + + sg_triangle_results = benchmark(cugraph.triangle_count, G, start) + sg_triangle_results.sort_values("vertex").reset_index(drop=True) assert sg_triangle_results is not None + # Clean-up stored dataset edge-lists + dataset.unload() @pytest.mark.mg -def test_triangles(dask_client, benchmark, input_expected_output): - - dg = input_expected_output["MGGraph"] - start_list = input_expected_output["start_list"] - - result_counts = benchmark(dcg.triangle_count, dg, start_list) +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("start", START_LIST) +def test_triangles(dask_client, dataset, start, benchmark): + G, start = get_sg_graph(dataset, False, start) + dg = get_mg_graph(dataset, False) + result_counts = benchmark(dcg.triangle_count, dg, start) result_counts = ( result_counts.drop_duplicates() .compute() @@ -132,8 +102,9 @@ def test_triangles(dask_client, benchmark, input_expected_output): .reset_index(drop=True) .rename(columns={"counts": "mg_counts"}) ) - - expected_output = input_expected_output["sg_triangle_results"] + expected_output = ( + cugraph.triangle_count(G, start).sort_values("vertex").reset_index(drop=True) + ) # Update the mg triangle count with sg triangle count results # for easy comparison using cuDF DataFrame methods. @@ -141,3 +112,5 @@ def test_triangles(dask_client, benchmark, input_expected_output): counts_diffs = result_counts.query("mg_counts != sg_counts") assert len(counts_diffs) == 0 + # Clean-up stored dataset edge-lists + dataset.unload() From 2eef14ef5f7a4ef1281292fc710e3a37c182f33f Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Thu, 4 Apr 2024 09:23:24 -0700 Subject: [PATCH 14/20] Refactor test_connectivity_mg --- .../tests/components/test_connectivity_mg.py | 54 ++++++++----------- 1 file changed, 23 insertions(+), 31 deletions(-) diff --git a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py index 217c9f0f09f..b1f571cd896 100644 --- a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py +++ b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,11 +15,9 @@ import pytest -import cudf -import dask_cudf import cugraph import cugraph.dask as dcg -from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH +from cugraph.datasets import netscience # ============================================================================= @@ -31,41 +29,35 @@ def setup_function(): gc.collect() +# ============================================================================= +# Parameters +# ============================================================================= + + +DATASETS = [netscience] # Directed graph is not currently supported IS_DIRECTED = [False, True] -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) +# ============================================================================= +# Tests +# ============================================================================= + + @pytest.mark.mg +@pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_mg_wcc(dask_client, directed): +def test_dask_mg_wcc(dask_client, directed, dataset): - input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "netscience.csv").as_posix() + input_data_path = dataset.get_path() print(f"dataset={input_data_path}") - chunksize = dcg.get_chunksize(input_data_path) - - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - df = cudf.read_csv( - input_data_path, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - g = cugraph.Graph(directed=directed) - g.from_cudf_edgelist(df, "src", "dst", renumber=True) - - dg = cugraph.Graph(directed=directed) - dg.from_dask_cudf_edgelist(ddf, "src", "dst") + create_using = cugraph.Graph(directed=directed) + + g = dataset.get_graph(create_using=create_using) + dataset.unload() + + dg = dataset.get_dask_graph(create_using=create_using) + dataset.unload() if not directed: expected_dist = cugraph.weakly_connected_components(g) From 90c3a1772564f66988ba22e7fab43f4f3b13c2ea Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Thu, 4 Apr 2024 09:23:41 -0700 Subject: [PATCH 15/20] Refactor core tests --- .../cugraph/tests/core/test_core_number_mg.py | 126 ++++---------- .../cugraph/tests/core/test_k_core_mg.py | 162 ++++++------------ 2 files changed, 86 insertions(+), 202 deletions(-) diff --git a/python/cugraph/cugraph/tests/core/test_core_number_mg.py b/python/cugraph/cugraph/tests/core/test_core_number_mg.py index 23214b5f51b..b52711c3c75 100644 --- a/python/cugraph/cugraph/tests/core/test_core_number_mg.py +++ b/python/cugraph/cugraph/tests/core/test_core_number_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,107 +15,67 @@ import pytest -import dask_cudf import cugraph import cugraph.dask as dcg -from cugraph.testing import utils -from pylibcugraph.testing.utils import gen_fixture_params_product +from cugraph.datasets import karate, dolphins, karate_asymmetric # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() # ============================================================================= -# Pytest fixtures +# Parameters # ============================================================================= -datasets = utils.DATASETS_UNDIRECTED -degree_type = ["incoming", "outgoing", "bidirectional"] - -fixture_params = gen_fixture_params_product( - (datasets, "graph_file"), - (degree_type, "degree_type"), -) - - -@pytest.fixture(scope="module", params=fixture_params) -def input_combo(request): - """ - Simply return the current combination of params as a dictionary for use in - tests or other parameterized fixtures. - """ - parameters = dict(zip(("graph_file", "degree_type"), request.param)) - - return parameters - - -@pytest.fixture(scope="module") -def input_expected_output(dask_client, input_combo): - """ - This fixture returns the inputs and expected results from the Core number - algo. - """ - degree_type = input_combo["degree_type"] - input_data_path = input_combo["graph_file"] - G = utils.generate_cugraph_graph_from_file( - input_data_path, directed=False, edgevals=True - ) - input_combo["SGGraph"] = G - sg_core_number_results = cugraph.core_number(G, degree_type) - sg_core_number_results = sg_core_number_results.sort_values("vertex").reset_index( - drop=True - ) +DATASETS = [karate, dolphins] +DEGREE_TYPE = ["incoming", "outgoing", "bidirectional"] - input_combo["sg_core_number_results"] = sg_core_number_results - input_combo["degree_type"] = degree_type - - # Creating an edgelist from a dask cudf dataframe - chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - dg = cugraph.Graph(directed=False) - dg.from_dask_cudf_edgelist( - ddf, source="src", destination="dst", edge_attr="value", renumber=True - ) +# ============================================================================= +# Helper Functions +# ============================================================================= - input_combo["MGGraph"] = dg - return input_combo +def get_sg_results(dataset, degree_type): + dataset.unload() + G = dataset.get_graph(create_using=cugraph.Graph(directed=False)) + res = cugraph.core_number(G, degree_type) + res = res.sort_values("vertex").reset_index(drop=True) + return res # ============================================================================= # Tests # ============================================================================= + + @pytest.mark.mg -def test_sg_core_number(dask_client, benchmark, input_expected_output): +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("degree_type", DEGREE_TYPE) +def test_sg_core_number(dask_client, dataset, degree_type, benchmark): # This test is only for benchmark purposes. sg_core_number_results = None - G = input_expected_output["SGGraph"] - degree_type = input_expected_output["degree_type"] - + G = dataset.get_graph(create_using=cugraph.Graph(directed=False)) + dataset.unload() sg_core_number_results = benchmark(cugraph.core_number, G, degree_type) assert sg_core_number_results is not None @pytest.mark.mg -def test_core_number(dask_client, benchmark, input_expected_output): - - dg = input_expected_output["MGGraph"] - degree_type = input_expected_output["degree_type"] +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("degree_type", DEGREE_TYPE) +def test_core_number(dask_client, dataset, degree_type, benchmark): + dataset.unload() + dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False)) result_core_number = benchmark(dcg.core_number, dg, degree_type) - result_core_number = ( result_core_number.drop_duplicates() .compute() @@ -124,7 +84,7 @@ def test_core_number(dask_client, benchmark, input_expected_output): .rename(columns={"core_number": "mg_core_number"}) ) - expected_output = input_expected_output["sg_core_number_results"] + expected_output = get_sg_results(dataset, degree_type) # Update the mg core number with sg core number results # for easy comparison using cuDF DataFrame methods. @@ -132,33 +92,13 @@ def test_core_number(dask_client, benchmark, input_expected_output): counts_diffs = result_core_number.query("mg_core_number != sg_core_number") assert len(counts_diffs) == 0 + dataset.unload() @pytest.mark.mg -def test_core_number_invalid_input(input_expected_output): - input_data_path = ( - utils.RAPIDS_DATASET_ROOT_DIR_PATH / "karate-asymmetric.csv" - ).as_posix() - - chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg = cugraph.Graph(directed=True) - dg.from_dask_cudf_edgelist( - ddf, - source="src", - destination="dst", - edge_attr="value", - renumber=True, - ) - +def test_core_number_invalid_input(): + dg = karate_asymmetric.get_graph(create_using=cugraph.Graph(directed=True)) invalid_degree_type = 3 - dg = input_expected_output["MGGraph"] + with pytest.raises(ValueError): dcg.core_number(dg, invalid_degree_type) diff --git a/python/cugraph/cugraph/tests/core/test_k_core_mg.py b/python/cugraph/cugraph/tests/core/test_k_core_mg.py index 32c4f4553a2..98ab58c3656 100644 --- a/python/cugraph/cugraph/tests/core/test_k_core_mg.py +++ b/python/cugraph/cugraph/tests/core/test_k_core_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,58 +15,40 @@ import pytest -import dask_cudf import cugraph import cugraph.dask as dcg -from cugraph.testing import utils +from cugraph.datasets import karate, dolphins from cudf.testing.testing import assert_frame_equal from cugraph.structure.symmetrize import symmetrize_df -from pylibcugraph.testing import gen_fixture_params_product # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() # ============================================================================= -# Pytest fixtures +# Parameters +# ============================================================================= + + +DATASETS = [karate, dolphins] +CORE_NUMBER = [True, False] +DEGREE_TYPE = ["bidirectional", "outgoing", "incoming"] + + +# ============================================================================= +# Helper Functions # ============================================================================= -datasets = utils.DATASETS_UNDIRECTED - -core_number = [True, False] -degree_type = ["bidirectional", "outgoing", "incoming"] - -fixture_params = gen_fixture_params_product( - (datasets, "graph_file"), (core_number, "core_number"), (degree_type, "degree_type") -) - - -@pytest.fixture(scope="module", params=fixture_params) -def input_combo(request): - """ - Simply return the current combination of params as a dictionary for use in - tests or other parameterized fixtures. - """ - parameters = dict(zip(("graph_file", "core_number", "degree_type"), request.param)) - - return parameters - - -@pytest.fixture(scope="module") -def input_expected_output(dask_client, input_combo): - """ - This fixture returns the inputs and expected results from the Core number - algo. - """ - core_number = input_combo["core_number"] - degree_type = input_combo["degree_type"] - input_data_path = input_combo["graph_file"] - G = utils.generate_cugraph_graph_from_file( - input_data_path, directed=False, edgevals=True - ) + + +def get_sg_results(dataset, core_number, degree_type): + dataset.unload() + G = dataset.get_graph(create_using=cugraph.Graph(directed=False)) if core_number: # compute the core_number @@ -74,78 +56,60 @@ def input_expected_output(dask_client, input_combo): else: core_number = None - input_combo["core_number"] = core_number - - input_combo["SGGraph"] = G - sg_k_core_graph = cugraph.k_core( G, core_number=core_number, degree_type=degree_type ) - sg_k_core_results = sg_k_core_graph.view_edge_list() + res = sg_k_core_graph.view_edge_list() # FIXME: The result will come asymetric. Symmetrize the results srcCol = sg_k_core_graph.source_columns dstCol = sg_k_core_graph.destination_columns wgtCol = sg_k_core_graph.weight_column - sg_k_core_results = ( - symmetrize_df(sg_k_core_results, srcCol, dstCol, wgtCol) + res = ( + symmetrize_df(res, srcCol, dstCol, wgtCol) .sort_values([srcCol, dstCol]) .reset_index(drop=True) ) - - input_combo["sg_k_core_results"] = sg_k_core_results - - # Creating an edgelist from a dask cudf dataframe - chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg = cugraph.Graph(directed=False) - # FIXME: False when renumbering (C++ and python renumbering) - dg.from_dask_cudf_edgelist( - ddf, - source="src", - destination="dst", - edge_attr="value", - renumber=True, - ) - - input_combo["MGGraph"] = dg - - return input_combo + return res, core_number # ============================================================================= # Tests # ============================================================================= + + @pytest.mark.mg -def test_sg_k_core(dask_client, benchmark, input_expected_output): +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("core_number", CORE_NUMBER) +@pytest.mark.parametrize("degree_type", DEGREE_TYPE) +def test_sg_k_core(dask_client, dataset, core_number, degree_type, benchmark): # This test is only for benchmark purposes. sg_k_core = None - G = input_expected_output["SGGraph"] - core_number = input_expected_output["core_number"] - degree_type = input_expected_output["degree_type"] - + dataset.unload() + G = dataset.get_graph(create_using=cugraph.Graph(directed=False)) + if core_number: + # compute the core_number + core_number = cugraph.core_number(G, degree_type=degree_type) + else: + core_number = None sg_k_core = benchmark( cugraph.k_core, G, core_number=core_number, degree_type=degree_type ) assert sg_k_core is not None + dataset.unload() @pytest.mark.mg -def test_dask_mg_k_core(dask_client, benchmark, input_expected_output): - - dg = input_expected_output["MGGraph"] - core_number = input_expected_output["core_number"] +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("core_number", CORE_NUMBER) +@pytest.mark.parametrize("degree_type", DEGREE_TYPE) +def test_dask_mg_k_core(dask_client, dataset, core_number, degree_type, benchmark): + expected_k_core_results, core_number = get_sg_results( + dataset, core_number, degree_type + ) + dataset.unload() + dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False)) k_core_results = benchmark(dcg.k_core, dg, core_number=core_number) - - expected_k_core_results = input_expected_output["sg_k_core_results"] - k_core_results = ( k_core_results.compute() .sort_values(["src", "dst"]) @@ -156,40 +120,20 @@ def test_dask_mg_k_core(dask_client, benchmark, input_expected_output): assert_frame_equal( expected_k_core_results, k_core_results, check_dtype=False, check_like=True ) + dataset.unload() @pytest.mark.mg def test_dask_mg_k_core_invalid_input(dask_client): - input_data_path = datasets[0] - chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) + dataset = DATASETS[0] + dataset.unload() + dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=True)) - dg = cugraph.Graph(directed=True) - dg.from_dask_cudf_edgelist( - ddf, - source="src", - destination="dst", - edge_attr="value", - renumber=True, - store_transposed=True, - ) with pytest.raises(ValueError): dcg.k_core(dg) - dg = cugraph.Graph(directed=False) - dg.from_dask_cudf_edgelist( - ddf, - source="src", - destination="dst", - edge_attr="value", - store_transposed=True, - ) + dataset.unload() + dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False)) degree_type = "invalid" with pytest.raises(ValueError): From 5658418728985fad263aad1a11880550279d3a2f Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Tue, 9 Apr 2024 12:58:01 -0700 Subject: [PATCH 16/20] Updates to internals MG tests --- .../tests/internals/test_renumber_mg.py | 98 ++++++++++--------- .../internals/test_replicate_edgelist_mg.py | 71 +++++--------- .../tests/internals/test_symmetrize_mg.py | 4 +- 3 files changed, 79 insertions(+), 94 deletions(-) diff --git a/python/cugraph/cugraph/tests/internals/test_renumber_mg.py b/python/cugraph/cugraph/tests/internals/test_renumber_mg.py index e9521f16594..64917d0c747 100644 --- a/python/cugraph/cugraph/tests/internals/test_renumber_mg.py +++ b/python/cugraph/cugraph/tests/internals/test_renumber_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -24,33 +24,61 @@ import dask_cudf import cugraph.dask as dcg import cugraph +from cugraph.datasets import karate, karate_disjoint from cugraph.testing import utils from cugraph.structure.number_map import NumberMap from cugraph.dask.common.mg_utils import is_single_gpu -from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH from cudf.testing import assert_frame_equal, assert_series_equal # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() +# ============================================================================= +# Parameters +# ============================================================================= + + +DATASETS = [karate] +DATASETS_UNRENUMBERED = [karate_disjoint] IS_DIRECTED = [True, False] +# ============================================================================= +# Helper Functions +# ============================================================================= + + +def get_sg_graph(dataset, directed): + dataset.unload() + g = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) + + return g + + +def get_mg_graph(dataset, directed): + dataset.unload() + dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=directed)) + + return dg + + +# ============================================================================= +# Tests +# ============================================================================= + + @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") -@pytest.mark.parametrize( - "graph_file", - utils.DATASETS_UNRENUMBERED, - ids=[f"dataset={d.as_posix()}" for d in utils.DATASETS_UNRENUMBERED], -) -def test_mg_renumber(graph_file, dask_client): - - M = utils.read_csv_for_nx(graph_file) +@pytest.mark.parametrize("dataset", DATASETS_UNRENUMBERED) +def test_mg_renumber(dataset, dask_client): + M = utils.read_csv_for_nx(dataset.get_path()) sources = cudf.Series(M["0"]) destinations = cudf.Series(M["1"]) @@ -96,13 +124,9 @@ def test_mg_renumber(graph_file, dask_client): @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") -@pytest.mark.parametrize( - "graph_file", - utils.DATASETS_UNRENUMBERED, - ids=[f"dataset={d.as_posix()}" for d in utils.DATASETS_UNRENUMBERED], -) -def test_mg_renumber_add_internal_vertex_id(graph_file, dask_client): - M = utils.read_csv_for_nx(graph_file) +@pytest.mark.parametrize("dataset", DATASETS_UNRENUMBERED) +def test_mg_renumber_add_internal_vertex_id(dataset, dask_client): + M = utils.read_csv_for_nx(dataset.get_path()) sources = cudf.Series(M["0"]) destinations = cudf.Series(M["1"]) @@ -131,33 +155,13 @@ def test_mg_renumber_add_internal_vertex_id(graph_file, dask_client): @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_mg_pagerank(dask_client, directed): +def test_dask_mg_pagerank(dask_client, dataset, directed): pandas.set_option("display.max_rows", 10000) - input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix() - chunksize = dcg.get_chunksize(input_data_path) - - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - df = cudf.read_csv( - input_data_path, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - g = cugraph.Graph(directed=directed) - g.from_cudf_edgelist(df, "src", "dst") - - dg = cugraph.Graph(directed=directed) - dg.from_dask_cudf_edgelist(ddf, "src", "dst") + g = get_sg_graph(dataset, directed) + dg = get_mg_graph(dataset, directed) expected_pr = cugraph.pagerank(g) result_pr = dcg.pagerank(dg).compute() @@ -178,20 +182,18 @@ def test_dask_mg_pagerank(dask_client, directed): print("Mismatches:", err) assert err == 0 + dataset.unload() + @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") -@pytest.mark.parametrize( - "graph_file", - utils.DATASETS_UNRENUMBERED, - ids=[f"dataset={d.as_posix()}" for d in utils.DATASETS_UNRENUMBERED], -) -def test_mg_renumber_common_col_names(graph_file, dask_client): +@pytest.mark.parametrize("dataset", DATASETS_UNRENUMBERED) +def test_mg_renumber_common_col_names(dataset, dask_client): """ Ensure that commonly-used column names in the input do not conflict with names used internally by NumberMap. """ - M = utils.read_csv_for_nx(graph_file) + M = utils.read_csv_for_nx(dataset.get_path()) sources = cudf.Series(M["0"]) destinations = cudf.Series(M["1"]) diff --git a/python/cugraph/cugraph/tests/internals/test_replicate_edgelist_mg.py b/python/cugraph/cugraph/tests/internals/test_replicate_edgelist_mg.py index 3bdb5c079ef..09936e954e8 100644 --- a/python/cugraph/cugraph/tests/internals/test_replicate_edgelist_mg.py +++ b/python/cugraph/cugraph/tests/internals/test_replicate_edgelist_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,73 +17,54 @@ import dask_cudf import numpy as np -from cugraph.testing import UNDIRECTED_DATASETS, karate_disjoint - +from cugraph.datasets import karate, dolphins, karate_disjoint from cugraph.structure.replicate_edgelist import replicate_edgelist from cudf.testing.testing import assert_frame_equal -from pylibcugraph.testing.utils import gen_fixture_params_product # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() +# ============================================================================= +# Parameters +# ============================================================================= + + edgeWeightCol = "weights" edgeIdCol = "edge_id" edgeTypeCol = "edge_type" srcCol = "src" dstCol = "dst" - -input_data = UNDIRECTED_DATASETS + [karate_disjoint] -datasets = [pytest.param(d) for d in input_data] - -fixture_params = gen_fixture_params_product( - (datasets, "graph_file"), - ([True, False], "distributed"), - ([True, False], "use_weights"), - ([True, False], "use_edge_ids"), - ([True, False], "use_edge_type_ids"), -) - - -@pytest.fixture(scope="module", params=fixture_params) -def input_combo(request): - """ - Simply return the current combination of params as a dictionary for use in - tests or other parameterized fixtures. - """ - return dict( - zip( - ( - "graph_file", - "use_weights", - "use_edge_ids", - "use_edge_type_ids", - "distributed", - ), - request.param, - ) - ) +DATASETS = [karate, dolphins, karate_disjoint] +IS_DISTRIBUTED = [True, False] +USE_WEIGHTS = [True, False] +USE_EDGE_IDS = [True, False] +USE_EDGE_TYPE_IDS = [True, False] # ============================================================================= # Tests # ============================================================================= -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) -@pytest.mark.mg -def test_mg_replicate_edgelist(dask_client, input_combo): - df = input_combo["graph_file"].get_edgelist() - distributed = input_combo["distributed"] - use_weights = input_combo["use_weights"] - use_edge_ids = input_combo["use_edge_ids"] - use_edge_type_ids = input_combo["use_edge_type_ids"] + +@pytest.mark.mg +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("distributed", IS_DISTRIBUTED) +@pytest.mark.parametrize("use_weights", USE_WEIGHTS) +@pytest.mark.parametrize("use_edge_ids", USE_EDGE_IDS) +@pytest.mark.parametrize("use_edge_type_ids", USE_EDGE_TYPE_IDS) +def test_mg_replicate_edgelist( + dask_client, dataset, distributed, use_weights, use_edge_ids, use_edge_type_ids +): + dataset.unload() + df = dataset.get_edgelist() columns = [srcCol, dstCol] weight = None diff --git a/python/cugraph/cugraph/tests/internals/test_symmetrize_mg.py b/python/cugraph/cugraph/tests/internals/test_symmetrize_mg.py index 05cc06e6282..913443fe400 100644 --- a/python/cugraph/cugraph/tests/internals/test_symmetrize_mg.py +++ b/python/cugraph/cugraph/tests/internals/test_symmetrize_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -25,6 +25,8 @@ # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() From 5d79f42b75bf04e791945b4fbcbe0b8e27c96bce Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Tue, 7 May 2024 17:13:29 -0700 Subject: [PATCH 17/20] Remove calls to in MG tests --- .../test_batch_betweenness_centrality_mg.py | 3 --- .../test_batch_edge_betweenness_centrality_mg.py | 2 -- .../centrality/test_betweenness_centrality_mg.py | 6 ------ .../tests/centrality/test_degree_centrality_mg.py | 5 ----- .../test_edge_betweenness_centrality_mg.py | 5 ----- .../centrality/test_eigenvector_centrality_mg.py | 8 -------- .../tests/centrality/test_katz_centrality_mg.py | 12 ------------ python/cugraph/cugraph/tests/comms/test_comms_mg.py | 5 ----- .../tests/community/test_induced_subgraph_mg.py | 5 ----- .../cugraph/tests/community/test_leiden_mg.py | 6 ------ .../cugraph/tests/community/test_louvain_mg.py | 6 ------ .../tests/community/test_triangle_count_mg.py | 6 ------ .../cugraph/tests/components/test_connectivity_mg.py | 3 --- .../cugraph/tests/core/test_core_number_mg.py | 4 ---- python/cugraph/cugraph/tests/core/test_k_core_mg.py | 7 ------- 15 files changed, 83 deletions(-) diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py index 1c73ebb0216..9f0980d4199 100644 --- a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py @@ -93,6 +93,3 @@ def test_mg_betweenness_centrality( second_key="ref_bc", epsilon=DEFAULT_EPSILON, ) - - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py index 4530dd3da86..4764c01f0fc 100644 --- a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py @@ -84,5 +84,3 @@ def test_mg_edge_betweenness_centrality( second_key="ref_bc", epsilon=DEFAULT_EPSILON, ) - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py index c94c2dcaff6..35e199093ce 100644 --- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py @@ -49,14 +49,12 @@ def setup_function(): def get_sg_graph(dataset, directed): - dataset.unload() G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) return G def get_mg_graph(dataset, directed): - dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=directed) dg.from_dask_cudf_edgelist( @@ -96,7 +94,6 @@ def test_dask_mg_betweenness_centrality( benchmark, ): g = get_sg_graph(dataset, directed) - dataset.unload() dg = get_mg_graph(dataset, directed) random_state = subset_seed @@ -143,6 +140,3 @@ def test_dask_mg_betweenness_centrality( diff = cupy.isclose(mg_bc_results, sg_bc_results) assert diff.all() - - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py index 68daff9238c..8606649c745 100644 --- a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py @@ -45,14 +45,12 @@ def setup_function(): def get_sg_graph(dataset, directed): - dataset.unload() G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) return G def get_mg_graph(dataset, directed): - dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=directed) dg.from_dask_cudf_edgelist( @@ -118,6 +116,3 @@ def test_dask_mg_degree(dask_client, dataset, directed): check_names=False, check_dtype=False, ) - - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py index 80acfe1c4ad..5b83a05e2a2 100644 --- a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py @@ -47,7 +47,6 @@ def setup_function(): def get_sg_graph(dataset, directed, edge_ids): - dataset.unload() df = dataset.get_edgelist() if edge_ids: if not directed: @@ -71,7 +70,6 @@ def get_sg_graph(dataset, directed, edge_ids): def get_mg_graph(dataset, directed, edge_ids, weight): - dataset.unload() ddf = dataset.get_dask_edgelist() if weight: @@ -178,6 +176,3 @@ def test_dask_mg_edge_betweenness_centrality( assert len(edge_bc_diffs1) == 0 assert len(edge_bc_diffs2) == 0 - - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py index 8cd77fb5e24..3a840c82e95 100644 --- a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py @@ -52,7 +52,6 @@ def setup_function(): def test_dask_mg_eigenvector_centrality(dask_client, dataset, directed): input_data_path = dataset.get_path() print(f"dataset={input_data_path}") - dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True) @@ -89,15 +88,11 @@ def test_dask_mg_eigenvector_centrality(dask_client, dataset, directed): err = err + 1 assert err == 0 - # Clean-up stored dataset edge-lists - dataset.unload() - @pytest.mark.mg def test_dask_mg_eigenvector_centrality_transposed_false(dask_client): dataset = DATASETS[0] - dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=False) @@ -110,6 +105,3 @@ def test_dask_mg_eigenvector_centrality_transposed_false(dask_client): with pytest.warns(UserWarning, match=warning_msg): dcg.eigenvector_centrality(dg) - - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py index ebbe5974814..5dcbd8173df 100644 --- a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py @@ -53,7 +53,6 @@ def test_dask_mg_katz_centrality(dask_client, dataset, directed): input_data_path = dataset.get_path() print(f"dataset={input_data_path}") - dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True) @@ -95,16 +94,12 @@ def test_dask_mg_katz_centrality(dask_client, dataset, directed): err = err + 1 assert err == 0 - # Clean-up stored dataset edge-lists - dataset.unload() - @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") @pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("directed", IS_DIRECTED) def test_dask_mg_katz_centrality_nstart(dask_client, dataset, directed): - dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True) @@ -136,14 +131,10 @@ def test_dask_mg_katz_centrality_nstart(dask_client, dataset, directed): err = err + 1 assert err == 0 - # Clean-up stored dataset edge-lists - dataset.unload() - @pytest.mark.mg @pytest.mark.parametrize("dataset", DATASETS) def test_dask_mg_katz_centrality_transposed_false(dask_client, dataset): - dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=False) @@ -156,6 +147,3 @@ def test_dask_mg_katz_centrality_transposed_false(dask_client, dataset): with pytest.warns(UserWarning, match=warning_msg): dcg.katz_centrality(dg) - - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/comms/test_comms_mg.py b/python/cugraph/cugraph/tests/comms/test_comms_mg.py index 18d4db2d77f..d096eb7e5c2 100644 --- a/python/cugraph/cugraph/tests/comms/test_comms_mg.py +++ b/python/cugraph/cugraph/tests/comms/test_comms_mg.py @@ -45,7 +45,6 @@ def setup_function(): def get_pagerank_result(dataset, is_mg): """Return the cugraph.pagerank result for an MG or SG graph""" - dataset.unload() if is_mg: dg = dataset.get_dask_graph(store_transposed=True) @@ -113,7 +112,3 @@ def test_dask_mg_pagerank(dask_client, directed): err2 = err2 + 1 print("Mismatches in ", input_data_path2, ": ", err2) assert err1 == err2 == 0 - - # Clean-up stored dataset edge-lists - karate.unload() - dolphins.unload() diff --git a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py index 9e199840fbb..311fd7a24bc 100644 --- a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py +++ b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py @@ -48,14 +48,12 @@ def setup_function(): def get_sg_graph(dataset, directed): - dataset.unload() G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) return G def get_mg_graph(dataset, directed): - dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=directed) dg.from_dask_cudf_edgelist( @@ -121,6 +119,3 @@ def test_mg_induced_subgraph( # of all the vertices and ensure that there is None assert sg_induced_subgraph is None assert mg_df is None - - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/community/test_leiden_mg.py b/python/cugraph/cugraph/tests/community/test_leiden_mg.py index 4ed7244fe29..5bbb513a615 100644 --- a/python/cugraph/cugraph/tests/community/test_leiden_mg.py +++ b/python/cugraph/cugraph/tests/community/test_leiden_mg.py @@ -56,9 +56,6 @@ def test_mg_leiden_with_edgevals_directed_graph(dask_client, dataset): with pytest.raises(ValueError): parts, mod = dcg.leiden(dg) - # Clean-up stored dataset edge-lists - dataset.unload() - @pytest.mark.mg @pytest.mark.parametrize("dataset", DATASETS) @@ -72,6 +69,3 @@ def test_mg_leiden_with_edgevals_undirected_graph(dask_client, dataset): print(parts.compute()) print(mod) print() - - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/community/test_louvain_mg.py b/python/cugraph/cugraph/tests/community/test_louvain_mg.py index ce89f7f62a2..0dff7f1c8b0 100644 --- a/python/cugraph/cugraph/tests/community/test_louvain_mg.py +++ b/python/cugraph/cugraph/tests/community/test_louvain_mg.py @@ -43,9 +43,6 @@ def test_mg_louvain_with_edgevals_directed_graph(dask_client, dataset): with pytest.raises(ValueError): parts, mod = dcg.louvain(dg) - # Clean-up stored dataset edge-lists - dataset.unload() - @pytest.mark.mg @pytest.mark.parametrize("dataset", DATASETS) @@ -59,6 +56,3 @@ def test_mg_louvain_with_edgevals_undirected_graph(dask_client, dataset): print(parts.compute()) print(mod) print() - - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py index 02723d75527..e2c47af8a1b 100644 --- a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py +++ b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py @@ -45,7 +45,6 @@ def setup_function(): def get_sg_graph(dataset, directed, start): - dataset.unload() G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) if start: # sample k nodes from the cuGraph graph @@ -57,7 +56,6 @@ def get_sg_graph(dataset, directed, start): def get_mg_graph(dataset, directed): - dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=directed) dg.from_dask_cudf_edgelist( @@ -83,8 +81,6 @@ def test_sg_triangles(dask_client, dataset, start, benchmark): sg_triangle_results = benchmark(cugraph.triangle_count, G, start) sg_triangle_results.sort_values("vertex").reset_index(drop=True) assert sg_triangle_results is not None - # Clean-up stored dataset edge-lists - dataset.unload() @pytest.mark.mg @@ -112,5 +108,3 @@ def test_triangles(dask_client, dataset, start, benchmark): counts_diffs = result_counts.query("mg_counts != sg_counts") assert len(counts_diffs) == 0 - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py index b1f571cd896..d1f6ddae604 100644 --- a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py +++ b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py @@ -54,10 +54,7 @@ def test_dask_mg_wcc(dask_client, directed, dataset): create_using = cugraph.Graph(directed=directed) g = dataset.get_graph(create_using=create_using) - dataset.unload() - dg = dataset.get_dask_graph(create_using=create_using) - dataset.unload() if not directed: expected_dist = cugraph.weakly_connected_components(g) diff --git a/python/cugraph/cugraph/tests/core/test_core_number_mg.py b/python/cugraph/cugraph/tests/core/test_core_number_mg.py index b52711c3c75..a85181da7c1 100644 --- a/python/cugraph/cugraph/tests/core/test_core_number_mg.py +++ b/python/cugraph/cugraph/tests/core/test_core_number_mg.py @@ -44,7 +44,6 @@ def setup_function(): def get_sg_results(dataset, degree_type): - dataset.unload() G = dataset.get_graph(create_using=cugraph.Graph(directed=False)) res = cugraph.core_number(G, degree_type) res = res.sort_values("vertex").reset_index(drop=True) @@ -63,7 +62,6 @@ def test_sg_core_number(dask_client, dataset, degree_type, benchmark): # This test is only for benchmark purposes. sg_core_number_results = None G = dataset.get_graph(create_using=cugraph.Graph(directed=False)) - dataset.unload() sg_core_number_results = benchmark(cugraph.core_number, G, degree_type) assert sg_core_number_results is not None @@ -72,7 +70,6 @@ def test_sg_core_number(dask_client, dataset, degree_type, benchmark): @pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("degree_type", DEGREE_TYPE) def test_core_number(dask_client, dataset, degree_type, benchmark): - dataset.unload() dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False)) result_core_number = benchmark(dcg.core_number, dg, degree_type) @@ -92,7 +89,6 @@ def test_core_number(dask_client, dataset, degree_type, benchmark): counts_diffs = result_core_number.query("mg_core_number != sg_core_number") assert len(counts_diffs) == 0 - dataset.unload() @pytest.mark.mg diff --git a/python/cugraph/cugraph/tests/core/test_k_core_mg.py b/python/cugraph/cugraph/tests/core/test_k_core_mg.py index 98ab58c3656..3e8f97f6b1d 100644 --- a/python/cugraph/cugraph/tests/core/test_k_core_mg.py +++ b/python/cugraph/cugraph/tests/core/test_k_core_mg.py @@ -47,7 +47,6 @@ def setup_function(): def get_sg_results(dataset, core_number, degree_type): - dataset.unload() G = dataset.get_graph(create_using=cugraph.Graph(directed=False)) if core_number: @@ -84,7 +83,6 @@ def get_sg_results(dataset, core_number, degree_type): def test_sg_k_core(dask_client, dataset, core_number, degree_type, benchmark): # This test is only for benchmark purposes. sg_k_core = None - dataset.unload() G = dataset.get_graph(create_using=cugraph.Graph(directed=False)) if core_number: # compute the core_number @@ -95,7 +93,6 @@ def test_sg_k_core(dask_client, dataset, core_number, degree_type, benchmark): cugraph.k_core, G, core_number=core_number, degree_type=degree_type ) assert sg_k_core is not None - dataset.unload() @pytest.mark.mg @@ -107,7 +104,6 @@ def test_dask_mg_k_core(dask_client, dataset, core_number, degree_type, benchmar dataset, core_number, degree_type ) - dataset.unload() dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False)) k_core_results = benchmark(dcg.k_core, dg, core_number=core_number) k_core_results = ( @@ -120,19 +116,16 @@ def test_dask_mg_k_core(dask_client, dataset, core_number, degree_type, benchmar assert_frame_equal( expected_k_core_results, k_core_results, check_dtype=False, check_like=True ) - dataset.unload() @pytest.mark.mg def test_dask_mg_k_core_invalid_input(dask_client): dataset = DATASETS[0] - dataset.unload() dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=True)) with pytest.raises(ValueError): dcg.k_core(dg) - dataset.unload() dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False)) degree_type = "invalid" From b55e935a67cd41fa777a388edc1855e207f3f549 Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Wed, 8 May 2024 11:26:03 -0700 Subject: [PATCH 18/20] Add MG vs. SG Check --- .../centrality/test_betweenness_centrality.py | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py index db34c68a054..ff8859a01b1 100644 --- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py +++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION.: +# Copyright (c) 2020-2024, NVIDIA CORPORATION.: # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -111,11 +111,18 @@ def calc_betweenness_centrality( else: edge_attr = None - G = graph_file.get_graph( - download=True, - create_using=cugraph.Graph(directed=directed), - ignore_weights=not edgevals, - ) + G = None + if multi_gpu_batch: + G = graph_file.get_dask_graph( + create_using=cugraph.Graph(directed=directed), ignore_weights=not edgevals + ) + G.enable_batch() + else: + G = graph_file.get_graph( + download=True, + create_using=cugraph.Graph(directed=directed), + ignore_weights=not edgevals, + ) M = G.to_pandas_edgelist().rename( columns={"src": "0", "dst": "1", "wgt": edge_attr} @@ -130,8 +137,6 @@ def calc_betweenness_centrality( ) assert G is not None and Gnx is not None - if multi_gpu_batch: - G.enable_batch() calc_func = None if k is not None and seed is not None: From b8e22178b21d3670eaa5dcca4f3004b6583a9a91 Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Thu, 9 May 2024 08:14:45 -0700 Subject: [PATCH 19/20] Style --- python/cugraph/cugraph/tests/components/test_connectivity_mg.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py index 5cb13433d5b..d1f6ddae604 100644 --- a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py +++ b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py @@ -56,7 +56,6 @@ def test_dask_mg_wcc(dask_client, directed, dataset): g = dataset.get_graph(create_using=create_using) dg = dataset.get_dask_graph(create_using=create_using) - if not directed: expected_dist = cugraph.weakly_connected_components(g) result_dist = dcg.weakly_connected_components(dg) From 264e37ed5e7ecebfb6e9dc1b943abf4432e02b31 Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Fri, 10 May 2024 09:32:27 -0700 Subject: [PATCH 20/20] Add MG graph helper to test_connectivity_mg.py --- .../tests/components/test_connectivity_mg.py | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py index d1f6ddae604..4ab251c0e29 100644 --- a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py +++ b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py @@ -39,6 +39,21 @@ def setup_function(): IS_DIRECTED = [False, True] +# ============================================================================= +# Helper +# ============================================================================= + + +def get_mg_graph(dataset, directed): + """Returns an MG graph""" + ddf = dataset.get_dask_edgelist() + + dg = cugraph.Graph(directed=directed) + dg.from_dask_cudf_edgelist(ddf, "src", "dst", "wgt") + + return dg + + # ============================================================================= # Tests # ============================================================================= @@ -47,15 +62,14 @@ def setup_function(): @pytest.mark.mg @pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_mg_wcc(dask_client, directed, dataset): - +def test_dask_mg_wcc(dask_client, dataset, directed): input_data_path = dataset.get_path() print(f"dataset={input_data_path}") - create_using = cugraph.Graph(directed=directed) - g = dataset.get_graph(create_using=create_using) - dg = dataset.get_dask_graph(create_using=create_using) + g = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) + dg = get_mg_graph(dataset, directed) + # breakpoint() if not directed: expected_dist = cugraph.weakly_connected_components(g) result_dist = dcg.weakly_connected_components(dg)