From 9884dd9b110c2b1f888d2965ba3cb69641f5625a Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Fri, 23 Feb 2024 07:24:37 -0800
Subject: [PATCH 01/20] Clean up centrality unit tests

---
 .../test_batch_betweenness_centrality_mg.py   | 29 ++++----
 ...st_batch_edge_betweenness_centrality_mg.py | 27 ++++---
 .../test_betweenness_centrality_mg.py         | 72 ++++++++++---------
 3 files changed, 67 insertions(+), 61 deletions(-)

diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
index 8ccbbfc9ec5..aeef3ba539c 100644
--- a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -24,32 +24,37 @@
     compare_scores,
 )
 
-DIRECTED_GRAPH_OPTIONS = [False, True]
-WEIGHTED_GRAPH_OPTIONS = [False, True]
-ENDPOINTS_OPTIONS = [False, True]
-NORMALIZED_OPTIONS = [False, True]
-DEFAULT_EPSILON = 0.0001
-SUBSET_SIZE_OPTIONS = [4, None]
-SUBSET_SEED_OPTIONS = [42]
-
 
 # =============================================================================
 # Parameters
 # =============================================================================
+
 DATASETS = [karate]
+DEFAULT_EPSILON = 0.0001
+DIRECTED_GRAPH_OPTIONS = [False, True]
+ENDPOINTS_OPTIONS = [False, True]
+NORMALIZED_OPTIONS = [False, True]
+RESULT_DTYPE_OPTIONS = [np.float64]
+SUBSET_SIZE_OPTIONS = [4, None]
+SUBSET_SEED_OPTIONS = [42]
 # FIXME: The "preset_gpu_count" from 21.08 and below are currently not
 # supported and have been removed
-
-RESULT_DTYPE_OPTIONS = [np.float64]
-
+WEIGHTED_GRAPH_OPTIONS = [False, True]
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
+
+
 def setup_function():
     gc.collect()
 
 
+# =============================================================================
+# Tests
+# =============================================================================
+
+
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
 @pytest.mark.parametrize(
diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
index 154477a1a67..52e6ffdccd6 100644
--- a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -19,39 +19,38 @@
 from cugraph.dask.common.mg_utils import is_single_gpu
 from cugraph.datasets import karate, netscience
 
-# Get parameters from standard betwenness_centrality_test
-# As tests directory is not a module, we need to add it to the path
-# FIXME: Test must be reworked to import from 'cugraph.testing' instead of
-# importing from other tests
-from test_edge_betweenness_centrality import (
-    DIRECTED_GRAPH_OPTIONS,
-    NORMALIZED_OPTIONS,
-    DEFAULT_EPSILON,
-    SUBSET_SIZE_OPTIONS,
-)
-
 from test_edge_betweenness_centrality import (
     calc_edge_betweenness_centrality,
     compare_scores,
 )
 
+
 # =============================================================================
 # Parameters
 # =============================================================================
-DATASETS = [karate, netscience]
 
+DATASETS = [karate, netscience]
+DIRECTED_GRAPH_OPTIONS = [False, True]
+NORMALIZED_OPTIONS = [False, True]
+DEFAULT_EPSILON = 0.0001
+SUBSET_SIZE_OPTIONS = [4, None]
 # FIXME: The "preset_gpu_count" from 21.08 and below are not supported and have
 # been removed
 RESULT_DTYPE_OPTIONS = [np.float32, np.float64]
 
-
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
+
+
 def setup_function():
     gc.collect()
 
 
+# =============================================================================
+# Tests
+# =============================================================================
+
 # FIXME: Fails for directed = False(bc score twice as much) and normalized = True.
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
index 1e20287d1e5..6fa73e95961 100644
--- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -15,13 +15,13 @@
 
 import pytest
 
-import dask_cudf
 import cupy
 import cudf
 import cugraph
 import cugraph.dask as dcg
+import dask_cudf
+from cugraph.datasets import karate, dolphins
 from cugraph.testing import utils
-from pylibcugraph.testing import gen_fixture_params_product
 
 
 # =============================================================================
@@ -33,48 +33,50 @@ def setup_function():
     gc.collect()
 
 
-IS_DIRECTED = [True, False]
+# =============================================================================
+# Parameters
+# =============================================================================
 
+DATASETS = [karate, dolphins]
+IS_DIRECTED = [True, False]
+NORMALIZED = [False, True]
+ENDPOINTS = [False, True]
+SUBSET_SEED = [42, None]
+SUBSET_SIZE = [None, 15]
+VERTEX_LIST_TYPE = [list, cudf]
 
 # =============================================================================
-# Pytest fixtures
+# Helper functions
 # =============================================================================
 
-datasets = utils.DATASETS_UNDIRECTED
 
-fixture_params = gen_fixture_params_product(
-    (datasets, "graph_file"),
-    ([False, True], "normalized"),
-    ([False, True], "endpoints"),
-    ([42, None], "subset_seed"),
-    ([None, 15], "subset_size"),
-    (IS_DIRECTED, "directed"),
-    ([list, cudf], "vertex_list_type"),
-)
+def get_sg_graph(dataset, directed):
+    G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
 
+    return G
 
-@pytest.fixture(scope="module", params=fixture_params)
-def input_combo(request):
-    """
-    Simply return the current combination of params as a dictionary for use in
-    tests or other parameterized fixtures.
-    """
-    parameters = dict(
-        zip(
-            (
-                "graph_file",
-                "normalized",
-                "endpoints",
-                "subset_seed",
-                "subset_size",
-                "directed",
-                "vertex_list_type",
-            ),
-            request.param,
-        )
+
+def get_mg_graph(dataset, directed):
+    input_data_path = dataset.get_path()
+    blocksize = dcg.get_chunksize(input_data_path)
+    ddf = dask_cudf.read_csv(
+        input_data_path,
+        blocksize=blocksize,
+        delimiter=dataset.metadata["delim"],
+        names=dataset.metadata["col_names"],
+        dtype=dataset.metadata["col_types"],
+    )
+    dg = cugraph.Graph(directed=directed)
+    dg.from_dask_cudf_edgelist(
+        ddf,
+        source="src",
+        destination="dst",
+        edge_attr="wgt",
+        renumber=True,
+        store_transposed=True,
     )
 
-    return parameters
+    return dg
 
 
 @pytest.fixture(scope="module")

From 07aac44a5beb5a7ce7365c51be876983243daabc Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Mon, 26 Feb 2024 12:34:49 -0800
Subject: [PATCH 02/20] Update MG centrality tests

---
 .../test_batch_betweenness_centrality_mg.py   |  30 ++--
 ...st_batch_edge_betweenness_centrality_mg.py |  21 +--
 .../test_betweenness_centrality_mg.py         | 131 ++++++------------
 .../centrality/test_degree_centrality_mg.py   |  75 +++++-----
 4 files changed, 110 insertions(+), 147 deletions(-)

diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
index aeef3ba539c..7050d0c9e55 100644
--- a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
@@ -29,17 +29,17 @@
 # Parameters
 # =============================================================================
 
+
 DATASETS = [karate]
 DEFAULT_EPSILON = 0.0001
-DIRECTED_GRAPH_OPTIONS = [False, True]
-ENDPOINTS_OPTIONS = [False, True]
-NORMALIZED_OPTIONS = [False, True]
-RESULT_DTYPE_OPTIONS = [np.float64]
-SUBSET_SIZE_OPTIONS = [4, None]
-SUBSET_SEED_OPTIONS = [42]
-# FIXME: The "preset_gpu_count" from 21.08 and below are currently not
-# supported and have been removed
-WEIGHTED_GRAPH_OPTIONS = [False, True]
+IS_DIRECTED = [False, True]
+ENDPOINTS = [False, True]
+IS_NORMALIZED = [False, True]
+RESULT_DTYPES = [np.float64]
+SUBSET_SIZES = [4, None]
+SUBSET_SEEDS = [42]
+IS_WEIGHTED = [False, True]
+
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
@@ -60,13 +60,13 @@ def setup_function():
 @pytest.mark.parametrize(
     "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS]
 )
-@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
-@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
-@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
+@pytest.mark.parametrize("directed", IS_DIRECTED)
+@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
+@pytest.mark.parametrize("normalized", IS_NORMALIZED)
 @pytest.mark.parametrize("weight", [None])
-@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS)
-@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS)
-@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS)
+@pytest.mark.parametrize("endpoints", ENDPOINTS)
+@pytest.mark.parametrize("subset_seed", SUBSET_SEEDS)
+@pytest.mark.parametrize("result_dtype", RESULT_DTYPES)
 def test_mg_betweenness_centrality(
     graph_file,
     directed,
diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
index 52e6ffdccd6..48364a4a79a 100644
--- a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
@@ -29,14 +29,14 @@
 # Parameters
 # =============================================================================
 
+
 DATASETS = [karate, netscience]
-DIRECTED_GRAPH_OPTIONS = [False, True]
-NORMALIZED_OPTIONS = [False, True]
+IS_DIRECTED = [True, False]
+IS_NORMALIZED = [True, False]
 DEFAULT_EPSILON = 0.0001
-SUBSET_SIZE_OPTIONS = [4, None]
-# FIXME: The "preset_gpu_count" from 21.08 and below are not supported and have
-# been removed
-RESULT_DTYPE_OPTIONS = [np.float32, np.float64]
+SUBSET_SIZES = [4, None]
+RESULT_DTYPES = [np.float32, np.float64]
+
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
@@ -51,16 +51,17 @@ def setup_function():
 # Tests
 # =============================================================================
 
+
 # FIXME: Fails for directed = False(bc score twice as much) and normalized = True.
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
 @pytest.mark.parametrize(
     "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS]
 )
-@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
-@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
-@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
-@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS)
+@pytest.mark.parametrize("directed", IS_DIRECTED)
+@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
+@pytest.mark.parametrize("normalized", IS_NORMALIZED)
+@pytest.mark.parametrize("result_dtype", RESULT_DTYPES)
 def test_mg_edge_betweenness_centrality(
     graph_file,
     directed,
diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
index 6fa73e95961..48fbe796bb4 100644
--- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
@@ -19,9 +19,7 @@
 import cudf
 import cugraph
 import cugraph.dask as dcg
-import dask_cudf
 from cugraph.datasets import karate, dolphins
-from cugraph.testing import utils
 
 
 # =============================================================================
@@ -39,11 +37,11 @@ def setup_function():
 
 DATASETS = [karate, dolphins]
 IS_DIRECTED = [True, False]
-NORMALIZED = [False, True]
-ENDPOINTS = [False, True]
-SUBSET_SEED = [42, None]
-SUBSET_SIZE = [None, 15]
-VERTEX_LIST_TYPE = [list, cudf]
+IS_NORMALIZED = [True, False]
+ENDPOINTS = [True, False]
+SUBSET_SEEDS = [42, None]
+SUBSET_SIZES = [None, 15]
+VERTEX_LIST_TYPES = [list, cudf]
 
 # =============================================================================
 # Helper functions
@@ -57,15 +55,7 @@ def get_sg_graph(dataset, directed):
 
 
 def get_mg_graph(dataset, directed):
-    input_data_path = dataset.get_path()
-    blocksize = dcg.get_chunksize(input_data_path)
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        blocksize=blocksize,
-        delimiter=dataset.metadata["delim"],
-        names=dataset.metadata["col_names"],
-        dtype=dataset.metadata["col_types"],
-    )
+    ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=directed)
     dg.from_dask_cudf_edgelist(
         ddf,
@@ -79,101 +69,64 @@ def get_mg_graph(dataset, directed):
     return dg
 
 
-@pytest.fixture(scope="module")
-def input_expected_output(input_combo):
-    """
-    This fixture returns the inputs and expected results from the
-    betweenness_centrality algo based on cuGraph betweenness_centrality) which can
-    be used for validation.
-    """
+# =============================================================================
+# Tests
+# =============================================================================
 
-    input_data_path = input_combo["graph_file"]
-    normalized = input_combo["normalized"]
-    endpoints = input_combo["endpoints"]
-    random_state = input_combo["subset_seed"]
-    subset_size = input_combo["subset_size"]
-    directed = input_combo["directed"]
-    vertex_list_type = input_combo["vertex_list_type"]
 
-    G = utils.generate_cugraph_graph_from_file(input_data_path, directed=directed)
+@pytest.mark.mg
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("directed", IS_DIRECTED)
+@pytest.mark.parametrize("normalized", IS_NORMALIZED)
+@pytest.mark.parametrize("endpoint", ENDPOINTS)
+@pytest.mark.parametrize("subset_seed", SUBSET_SEEDS)
+@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
+@pytest.mark.parametrize("v_list_type", VERTEX_LIST_TYPES)
+def test_dask_mg_betweenness_centrality(
+    dataset,
+    directed,
+    normalized,
+    endpoint,
+    subset_seed,
+    subset_size,
+    v_list_type,
+    dask_client,
+    benchmark,
+):
+    g = get_sg_graph(dataset, directed)
+    dataset.unload()
+    dg = get_mg_graph(dataset, directed)
+    random_state = subset_seed
 
     if subset_size is None:
         k = subset_size
     elif isinstance(subset_size, int):
         # Select random vertices
-        k = G.select_random_vertices(
+        k = g.select_random_vertices(
             random_state=random_state, num_vertices=subset_size
         )
-        if vertex_list_type is list:
+        if v_list_type is list:
             k = k.to_arrow().to_pylist()
 
         print("the seeds are \n", k)
-        if vertex_list_type is int:
+        if v_list_type is int:
             # This internally sample k vertices in betweenness centrality.
             # Since the nodes that will be sampled by each implementation will
             # be random, therefore sample all vertices which will make the test
             # consistent.
-            k = len(G.nodes())
-
-    input_combo["k"] = k
+            k = len(g.nodes())
 
     sg_cugraph_bc = cugraph.betweenness_centrality(
-        G, k=k, normalized=normalized, endpoints=endpoints, random_state=random_state
+        g, k=k, normalized=normalized, endpoints=endpoint, random_state=random_state
     )
-    # Save the results back to the input_combo dictionary to prevent redundant
-    # cuGraph runs. Other tests using the input_combo fixture will look for
-    # them, and if not present they will have to re-run the same cuGraph call.
     sg_cugraph_bc = sg_cugraph_bc.sort_values("vertex").reset_index(drop=True)
 
-    input_combo["sg_cugraph_results"] = sg_cugraph_bc
-    chunksize = dcg.get_chunksize(input_data_path)
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    dg = cugraph.Graph(directed=directed)
-    dg.from_dask_cudf_edgelist(
-        ddf,
-        source="src",
-        destination="dst",
-        edge_attr="value",
-        renumber=True,
-        store_transposed=True,
-    )
-
-    input_combo["MGGraph"] = dg
-
-    return input_combo
-
-
-# =============================================================================
-# Tests
-# =============================================================================
-
-
-# @pytest.mark.skipif(
-#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
-# )
-
-
-@pytest.mark.mg
-def test_dask_mg_betweenness_centrality(dask_client, benchmark, input_expected_output):
-
-    dg = input_expected_output["MGGraph"]
-    k = input_expected_output["k"]
-    endpoints = input_expected_output["endpoints"]
-    normalized = input_expected_output["normalized"]
-    random_state = input_expected_output["subset_seed"]
     mg_bc_results = benchmark(
         dcg.betweenness_centrality,
         dg,
         k=k,
         normalized=normalized,
-        endpoints=endpoints,
+        endpoints=endpoint,
         random_state=random_state,
     )
 
@@ -181,11 +134,9 @@ def test_dask_mg_betweenness_centrality(dask_client, benchmark, input_expected_o
         mg_bc_results.compute().sort_values("vertex").reset_index(drop=True)
     )["betweenness_centrality"].to_cupy()
 
-    sg_bc_results = (
-        input_expected_output["sg_cugraph_results"]
-        .sort_values("vertex")
-        .reset_index(drop=True)
-    )["betweenness_centrality"].to_cupy()
+    sg_bc_results = (sg_cugraph_bc.sort_values("vertex").reset_index(drop=True))[
+        "betweenness_centrality"
+    ].to_cupy()
 
     diff = cupy.isclose(mg_bc_results, sg_bc_results)
 
diff --git a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py
index 1bef1e0872b..8606649c745 100644
--- a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py
@@ -15,12 +15,12 @@
 
 import pytest
 
-import cudf
-import dask_cudf
 import cugraph
-from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH
+from cugraph.dask.common.mg_utils import is_single_gpu
+from cugraph.datasets import karate_asymmetric, polbooks, email_Eu_core
 from cudf.testing import assert_series_equal
 
+
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
@@ -30,44 +30,55 @@ def setup_function():
     gc.collect()
 
 
+# =============================================================================
+# Parameters
+# =============================================================================
+
+
+DATASETS = [karate_asymmetric, polbooks, email_Eu_core]
 IS_DIRECTED = [True, False]
 
-DATA_PATH = [
-    (RAPIDS_DATASET_ROOT_DIR_PATH / "karate-asymmetric.csv").as_posix(),
-    (RAPIDS_DATASET_ROOT_DIR_PATH / "polbooks.csv").as_posix(),
-    (RAPIDS_DATASET_ROOT_DIR_PATH / "email-Eu-core.csv").as_posix(),
-]
+
+# =============================================================================
+# Helper functions
+# =============================================================================
 
 
-@pytest.mark.mg
-@pytest.mark.parametrize("directed", IS_DIRECTED)
-@pytest.mark.parametrize("data_file", DATA_PATH)
-def test_dask_mg_degree(dask_client, directed, data_file):
-
-    input_data_path = data_file
-    chunksize = cugraph.dask.get_chunksize(input_data_path)
-
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
+def get_sg_graph(dataset, directed):
+    G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
 
-    df = cudf.read_csv(
-        input_data_path,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
+    return G
 
+
+def get_mg_graph(dataset, directed):
+    ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=directed)
-    dg.from_dask_cudf_edgelist(ddf, "src", "dst")
+    dg.from_dask_cudf_edgelist(
+        ddf,
+        source="src",
+        destination="dst",
+        edge_attr="wgt",
+        renumber=True,
+        store_transposed=True,
+    )
+
+    return dg
+
+
+# =============================================================================
+# Tests
+# =============================================================================
+
+
+@pytest.mark.mg
+@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("directed", IS_DIRECTED)
+def test_dask_mg_degree(dask_client, dataset, directed):
+    dg = get_mg_graph(dataset, directed)
     dg.compute_renumber_edge_list()
 
-    g = cugraph.Graph(directed=directed)
-    g.from_cudf_edgelist(df, "src", "dst")
+    g = get_sg_graph(dataset, directed)
 
     merge_df_in_degree = (
         dg.in_degree()

From d4a610224ebb300a3ab0c68b3dc6d7ea0a1b88e1 Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Tue, 27 Feb 2024 08:55:34 -0800
Subject: [PATCH 03/20] Refactor edge_betweenness_centrality

---
 .../test_edge_betweenness_centrality_mg.py    | 228 +++++++-----------
 1 file changed, 88 insertions(+), 140 deletions(-)

diff --git a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py
index 478b7e655d5..ae44917dc3a 100644
--- a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -14,14 +14,9 @@
 import gc
 import pytest
 
-import dask_cudf
-from pylibcugraph.testing.utils import gen_fixture_params_product
-from cugraph.datasets import karate, dolphins
-
 import cugraph
 import cugraph.dask as dcg
-
-# from cugraph.dask.common.mg_utils import is_single_gpu
+from cugraph.datasets import karate, dolphins
 
 
 # =============================================================================
@@ -33,79 +28,34 @@ def setup_function():
     gc.collect()
 
 
-IS_DIRECTED = [True, False]
-INCLUDE_WEIGHTS = [False, True]
-INCLUDE_EDGE_IDS = [False, True]
-NORMALIZED_OPTIONS = [False, True]
-SUBSET_SIZE_OPTIONS = [4, None]
-
-
-# email_Eu_core is too expensive to test
-datasets = [karate, dolphins]
-
-
 # =============================================================================
-# Pytest fixtures
+# Parameters
 # =============================================================================
 
 
-fixture_params = gen_fixture_params_product(
-    (datasets, "graph_file"),
-    (IS_DIRECTED, "directed"),
-    (INCLUDE_WEIGHTS, "include_weights"),
-    (INCLUDE_EDGE_IDS, "include_edgeids"),
-    (NORMALIZED_OPTIONS, "normalized"),
-    (SUBSET_SIZE_OPTIONS, "subset_size"),
-)
-
-
-@pytest.fixture(scope="module", params=fixture_params)
-def input_combo(request):
-    """
-    Simply return the current combination of params as a dictionary for use in
-    tests or other parameterized fixtures.
-    """
-    parameters = dict(
-        zip(
-            (
-                "graph_file",
-                "directed",
-                "include_weights",
-                "include_edge_ids",
-                "normalized",
-                "subset_size",
-                "subset_seed",
-            ),
-            request.param,
-        )
-    )
+DATASETS = [karate, dolphins]
+IS_DIRECTED = [True, False]
+IS_WEIGHTED = [True, False]
+INCLUDE_EDGE_IDS = [True, False]
+IS_NORMALIZED = [True, False]
+SUBSET_SIZES = [4, None]
 
-    return parameters
 
+# =============================================================================
+# Helper functions
+# =============================================================================
 
-@pytest.fixture(scope="module")
-def input_expected_output(input_combo):
-    """
-    This fixture returns the inputs and expected results from the edge
-    betweenness centrality algo.
-    (based on cuGraph edge betweenness centrality) which can be used
-    for validation.
-    """
-    directed = input_combo["directed"]
-    normalized = input_combo["normalized"]
-    k = input_combo["subset_size"]
-    subset_seed = 42
-    edge_ids = input_combo["include_edge_ids"]
-    weight = input_combo["include_weights"]
 
-    df = input_combo["graph_file"].get_edgelist()
+def get_sg_graph(dataset, directed, edge_ids):
+    dataset.unload()
+    df = dataset.get_edgelist()
     if edge_ids:
         if not directed:
             # Edge ids not supported for undirected graph
-            return
-        dtype = df.dtypes[0]
+            return None
+        dtype = df.dtypes.iloc[0]
         edge_id = "edge_id"
-        df["edge_id"] = df.index
+        df[edge_id] = df.index
         df = df.astype(dtype)
 
     else:
@@ -115,30 +65,13 @@ def input_expected_output(input_combo):
     G.from_cudf_edgelist(
         df, source="src", destination="dst", weight="wgt", edge_id=edge_id
     )
-    if isinstance(k, int):
-        k = G.select_random_vertices(subset_seed, k)
 
-    input_combo["k"] = k
-    # Save the results back to the input_combo dictionary to prevent redundant
-    # cuGraph runs. Other tests using the input_combo fixture will look for
-    # them, and if not present they will have to re-run the same cuGraph call.
-    sg_cugraph_edge_bc = (
-        cugraph.edge_betweenness_centrality(G, k, normalized)
-        .sort_values(["src", "dst"])
-        .reset_index(drop=True)
-    )
+    return G
 
-    input_data_path = input_combo["graph_file"].get_path()
 
-    input_combo["sg_cugraph_results"] = sg_cugraph_edge_bc
-    chunksize = dcg.get_chunksize(input_data_path)
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
+def get_mg_graph(dataset, directed, edge_ids, weight):
+    dataset.unload()
+    ddf = dataset.get_dask_edgelist()
 
     if weight:
         weight = ddf
@@ -154,20 +87,16 @@ def input_expected_output(input_combo):
         edge_id = None
 
     dg = cugraph.Graph(directed=directed)
-
     dg.from_dask_cudf_edgelist(
         ddf,
         source="src",
         destination="dst",
-        weight="value",
+        weight="wgt",
         edge_id=edge_id,
         renumber=True,
     )
 
-    input_combo["MGGraph"] = dg
-    input_combo["include_weights"] = weight
-
-    return input_combo
+    return dg, weight
 
 
 # =============================================================================
@@ -175,57 +104,76 @@ def input_expected_output(input_combo):
 # =============================================================================
 
 
-# @pytest.mark.skipif(
-#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
-# )
 @pytest.mark.mg
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("directed", IS_DIRECTED)
+@pytest.mark.parametrize("weighted", IS_WEIGHTED)
+@pytest.mark.parametrize("edge_ids", INCLUDE_EDGE_IDS)
+@pytest.mark.parametrize("normalized", IS_NORMALIZED)
+@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
 def test_dask_mg_edge_betweenness_centrality(
-    dask_client, benchmark, input_expected_output
+    dask_client,
+    dataset,
+    directed,
+    weighted,
+    edge_ids,
+    normalized,
+    subset_size,
+    benchmark,
 ):
-    if input_expected_output is not None:
-        dg = input_expected_output["MGGraph"]
-        k = input_expected_output["k"]
-        normalized = input_expected_output["normalized"]
-        weight = input_expected_output["include_weights"]
-        if weight is not None:
-            with pytest.raises(NotImplementedError):
-                result_edge_bc = benchmark(
-                    dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight
-                )
-
-        else:
+    g = get_sg_graph(dataset, directed, edge_ids)
+
+    if g is None:
+        pytest.skip("Edge_ids not supported for undirected graph")
+
+    dg, weight = get_mg_graph(dataset, directed, edge_ids, weighted)
+    subset_seed = 42
+
+    k = subset_size
+    if isinstance(k, int):
+        k = g.select_random_vertices(subset_seed, k)
+
+    sg_cugraph_edge_bc = (
+        cugraph.edge_betweenness_centrality(g, k, normalized)
+        .sort_values(["src", "dst"])
+        .reset_index(drop=True)
+    )
+
+    if weight is not None:
+        with pytest.raises(NotImplementedError):
             result_edge_bc = benchmark(
                 dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight
             )
-            result_edge_bc = (
-                result_edge_bc.compute()
-                .sort_values(["src", "dst"])
-                .reset_index(drop=True)
-                .rename(columns={"betweenness_centrality": "mg_betweenness_centrality"})
-            )
 
-            if len(result_edge_bc.columns) > 3:
-                result_edge_bc = result_edge_bc.rename(
-                    columns={"edge_id": "mg_edge_id"}
-                )
+    else:
+        result_edge_bc = benchmark(
+            dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight
+        )
+        result_edge_bc = (
+            result_edge_bc.compute()
+            .sort_values(["src", "dst"])
+            .reset_index(drop=True)
+            .rename(columns={"betweenness_centrality": "mg_betweenness_centrality"})
+        )
 
-            expected_output = input_expected_output["sg_cugraph_results"].reset_index(
-                drop=True
-            )
-            result_edge_bc["betweenness_centrality"] = expected_output[
-                "betweenness_centrality"
-            ]
-            if len(expected_output.columns) > 3:
-                result_edge_bc["edge_id"] = expected_output["edge_id"]
-                edge_id_diff = result_edge_bc.query("mg_edge_id != edge_id")
-                assert len(edge_id_diff) == 0
-
-            edge_bc_diffs1 = result_edge_bc.query(
-                "mg_betweenness_centrality - betweenness_centrality > 0.01"
-            )
-            edge_bc_diffs2 = result_edge_bc.query(
-                "betweenness_centrality - mg_betweenness_centrality < -0.01"
-            )
+        if len(result_edge_bc.columns) > 3:
+            result_edge_bc = result_edge_bc.rename(columns={"edge_id": "mg_edge_id"})
+
+        expected_output = sg_cugraph_edge_bc.reset_index(drop=True)
+        result_edge_bc["betweenness_centrality"] = expected_output[
+            "betweenness_centrality"
+        ]
+        if len(expected_output.columns) > 3:
+            result_edge_bc["edge_id"] = expected_output["edge_id"]
+            edge_id_diff = result_edge_bc.query("mg_edge_id != edge_id")
+            assert len(edge_id_diff) == 0
+
+        edge_bc_diffs1 = result_edge_bc.query(
+            "mg_betweenness_centrality - betweenness_centrality > 0.01"
+        )
+        edge_bc_diffs2 = result_edge_bc.query(
+            "betweenness_centrality - mg_betweenness_centrality < -0.01"
+        )
 
-            assert len(edge_bc_diffs1) == 0
-            assert len(edge_bc_diffs2) == 0
+        assert len(edge_bc_diffs1) == 0
+        assert len(edge_bc_diffs2) == 0

From f7c78b95ac3ba62ce292a06c80e80d406847c6fc Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Sun, 10 Mar 2024 20:00:29 -0700
Subject: [PATCH 04/20] Revert batch_betweenness_centrality

---
 .../test_batch_betweenness_centrality_mg.py   | 41 ++++++++-----------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
index 7050d0c9e55..9d858919786 100644
--- a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
@@ -24,49 +24,44 @@
     compare_scores,
 )
 
+DIRECTED_GRAPH_OPTIONS = [False, True]
+WEIGHTED_GRAPH_OPTIONS = [False, True]
+ENDPOINTS_OPTIONS = [False, True]
+NORMALIZED_OPTIONS = [False, True]
+DEFAULT_EPSILON = 0.0001
+SUBSET_SIZE_OPTIONS = [4, None]
+SUBSET_SEED_OPTIONS = [42]
+
 
 # =============================================================================
 # Parameters
 # =============================================================================
-
-
 DATASETS = [karate]
-DEFAULT_EPSILON = 0.0001
-IS_DIRECTED = [False, True]
-ENDPOINTS = [False, True]
-IS_NORMALIZED = [False, True]
-RESULT_DTYPES = [np.float64]
-SUBSET_SIZES = [4, None]
-SUBSET_SEEDS = [42]
-IS_WEIGHTED = [False, True]
+# FIXME: The "preset_gpu_count" from 21.08 and below are currently not
+# supported and have been removed
+
+RESULT_DTYPE_OPTIONS = [np.float64]
 
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
-
-
 def setup_function():
     gc.collect()
 
 
-# =============================================================================
-# Tests
-# =============================================================================
-
-
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
 @pytest.mark.parametrize(
     "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS]
 )
-@pytest.mark.parametrize("directed", IS_DIRECTED)
-@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
-@pytest.mark.parametrize("normalized", IS_NORMALIZED)
+@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
+@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
 @pytest.mark.parametrize("weight", [None])
-@pytest.mark.parametrize("endpoints", ENDPOINTS)
-@pytest.mark.parametrize("subset_seed", SUBSET_SEEDS)
-@pytest.mark.parametrize("result_dtype", RESULT_DTYPES)
+@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS)
+@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS)
+@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS)
 def test_mg_betweenness_centrality(
     graph_file,
     directed,

From 32e0ad977e9cf5e6cf96c9550e178e1c962b604d Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Sun, 10 Mar 2024 21:32:19 -0700
Subject: [PATCH 05/20] Revert batch_edge_betweenness_centrality

---
 ...st_batch_edge_betweenness_centrality_mg.py | 38 +++++++++----------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
index 48364a4a79a..eb1dec7e5bd 100644
--- a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
@@ -19,49 +19,49 @@
 from cugraph.dask.common.mg_utils import is_single_gpu
 from cugraph.datasets import karate, netscience
 
+# Get parameters from standard betwenness_centrality_test
+# As tests directory is not a module, we need to add it to the path
+# FIXME: Test must be reworked to import from 'cugraph.testing' instead of
+# importing from other tests
+from test_edge_betweenness_centrality import (
+    DIRECTED_GRAPH_OPTIONS,
+    NORMALIZED_OPTIONS,
+    DEFAULT_EPSILON,
+    SUBSET_SIZE_OPTIONS,
+)
+
 from test_edge_betweenness_centrality import (
     calc_edge_betweenness_centrality,
     compare_scores,
 )
 
-
 # =============================================================================
 # Parameters
 # =============================================================================
-
-
 DATASETS = [karate, netscience]
-IS_DIRECTED = [True, False]
-IS_NORMALIZED = [True, False]
-DEFAULT_EPSILON = 0.0001
-SUBSET_SIZES = [4, None]
-RESULT_DTYPES = [np.float32, np.float64]
+
+# FIXME: The "preset_gpu_count" from 21.08 and below are not supported and have
+# been removed
+RESULT_DTYPE_OPTIONS = [np.float32, np.float64]
 
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
-
-
 def setup_function():
     gc.collect()
 
 
-# =============================================================================
-# Tests
-# =============================================================================
-
-
 # FIXME: Fails for directed = False(bc score twice as much) and normalized = True.
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
 @pytest.mark.parametrize(
     "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS]
 )
-@pytest.mark.parametrize("directed", IS_DIRECTED)
-@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
-@pytest.mark.parametrize("normalized", IS_NORMALIZED)
-@pytest.mark.parametrize("result_dtype", RESULT_DTYPES)
+@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
+@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
+@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS)
 def test_mg_edge_betweenness_centrality(
     graph_file,
     directed,

From f98b0bc3df43a5ea1185b5f4bd62a363eb2130f2 Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Sun, 10 Mar 2024 22:59:10 -0700
Subject: [PATCH 06/20] Revert test_betweenness_centrality

---
 .../test_betweenness_centrality_mg.py         | 177 +++++++++++-------
 1 file changed, 112 insertions(+), 65 deletions(-)

diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
index 48fbe796bb4..60d344eb0fd 100644
--- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
@@ -15,11 +15,13 @@
 
 import pytest
 
+import dask_cudf
 import cupy
 import cudf
 import cugraph
 import cugraph.dask as dcg
-from cugraph.datasets import karate, dolphins
+from cugraph.testing import utils
+from pylibcugraph.testing import gen_fixture_params_product
 
 
 # =============================================================================
@@ -31,102 +33,145 @@ def setup_function():
     gc.collect()
 
 
-# =============================================================================
-# Parameters
-# =============================================================================
-
-DATASETS = [karate, dolphins]
 IS_DIRECTED = [True, False]
-IS_NORMALIZED = [True, False]
-ENDPOINTS = [True, False]
-SUBSET_SEEDS = [42, None]
-SUBSET_SIZES = [None, 15]
-VERTEX_LIST_TYPES = [list, cudf]
+
 
 # =============================================================================
-# Helper functions
+# Pytest fixtures
 # =============================================================================
 
-
-def get_sg_graph(dataset, directed):
-    G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
-
-    return G
-
-
-def get_mg_graph(dataset, directed):
-    ddf = dataset.get_dask_edgelist()
-    dg = cugraph.Graph(directed=directed)
-    dg.from_dask_cudf_edgelist(
-        ddf,
-        source="src",
-        destination="dst",
-        edge_attr="wgt",
-        renumber=True,
-        store_transposed=True,
+datasets = utils.DATASETS_UNDIRECTED
+
+fixture_params = gen_fixture_params_product(
+    (datasets, "graph_file"),
+    ([False, True], "normalized"),
+    ([False, True], "endpoints"),
+    ([42, None], "subset_seed"),
+    ([None, 15], "subset_size"),
+    (IS_DIRECTED, "directed"),
+    ([list, cudf], "vertex_list_type"),
+)
+
+
+@pytest.fixture(scope="module", params=fixture_params)
+def input_combo(request):
+    """
+    Simply return the current combination of params as a dictionary for use in
+    tests or other parameterized fixtures.
+    """
+    parameters = dict(
+        zip(
+            (
+                "graph_file",
+                "normalized",
+                "endpoints",
+                "subset_seed",
+                "subset_size",
+                "directed",
+                "vertex_list_type",
+            ),
+            request.param,
+        )
     )
 
-    return dg
+    return parameters
 
 
-# =============================================================================
-# Tests
-# =============================================================================
+@pytest.fixture(scope="module")
+def input_expected_output(input_combo):
+    """
+    This fixture returns the inputs and expected results from the
+    betweenness_centrality algo based on cuGraph betweenness_centrality) which can
+    be used for validation.
+    """
 
+    input_data_path = input_combo["graph_file"]
+    normalized = input_combo["normalized"]
+    endpoints = input_combo["endpoints"]
+    random_state = input_combo["subset_seed"]
+    subset_size = input_combo["subset_size"]
+    directed = input_combo["directed"]
+    vertex_list_type = input_combo["vertex_list_type"]
 
-@pytest.mark.mg
-@pytest.mark.parametrize("dataset", DATASETS)
-@pytest.mark.parametrize("directed", IS_DIRECTED)
-@pytest.mark.parametrize("normalized", IS_NORMALIZED)
-@pytest.mark.parametrize("endpoint", ENDPOINTS)
-@pytest.mark.parametrize("subset_seed", SUBSET_SEEDS)
-@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
-@pytest.mark.parametrize("v_list_type", VERTEX_LIST_TYPES)
-def test_dask_mg_betweenness_centrality(
-    dataset,
-    directed,
-    normalized,
-    endpoint,
-    subset_seed,
-    subset_size,
-    v_list_type,
-    dask_client,
-    benchmark,
-):
-    g = get_sg_graph(dataset, directed)
-    dataset.unload()
-    dg = get_mg_graph(dataset, directed)
-    random_state = subset_seed
+    G = utils.generate_cugraph_graph_from_file(input_data_path, directed=directed)
 
     if subset_size is None:
         k = subset_size
     elif isinstance(subset_size, int):
         # Select random vertices
-        k = g.select_random_vertices(
+        k = G.select_random_vertices(
             random_state=random_state, num_vertices=subset_size
         )
-        if v_list_type is list:
+        if vertex_list_type is list:
             k = k.to_arrow().to_pylist()
 
         print("the seeds are \n", k)
-        if v_list_type is int:
+        if vertex_list_type is int:
             # This internally sample k vertices in betweenness centrality.
             # Since the nodes that will be sampled by each implementation will
             # be random, therefore sample all vertices which will make the test
             # consistent.
-            k = len(g.nodes())
+            k = len(G.nodes())
+
+    input_combo["k"] = k
 
     sg_cugraph_bc = cugraph.betweenness_centrality(
-        g, k=k, normalized=normalized, endpoints=endpoint, random_state=random_state
+        G, k=k, normalized=normalized, endpoints=endpoints, random_state=random_state
     )
+    # Save the results back to the input_combo dictionary to prevent redundant
+    # cuGraph runs. Other tests using the input_combo fixture will look for
+    # them, and if not present they will have to re-run the same cuGraph call.
     sg_cugraph_bc = sg_cugraph_bc.sort_values("vertex").reset_index(drop=True)
 
+    input_combo["sg_cugraph_results"] = sg_cugraph_bc
+    chunksize = dcg.get_chunksize(input_data_path)
+    ddf = dask_cudf.read_csv(
+        input_data_path,
+        chunksize=chunksize,
+        delimiter=" ",
+        names=["src", "dst", "value"],
+        dtype=["int32", "int32", "float32"],
+    )
+
+    dg = cugraph.Graph(directed=directed)
+    dg.from_dask_cudf_edgelist(
+        ddf,
+        source="src",
+        destination="dst",
+        edge_attr="value",
+        renumber=True,
+        store_transposed=True,
+    )
+
+    input_combo["MGGraph"] = dg
+
+    return input_combo
+
+
+# =============================================================================
+# Tests
+# =============================================================================
+
+
+# @pytest.mark.skipif(
+#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
+# )
+
+
+@pytest.mark.mg
+def test_dask_mg_betweenness_centrality(dask_client, benchmark, input_expected_output):
+
+    dg = input_expected_output["MGGraph"]
+    k = input_expected_output["k"]
+    endpoints = input_expected_output["endpoints"]
+    normalized = input_expected_output["normalized"]
+    random_state = input_expected_output["subset_seed"]
     mg_bc_results = benchmark(
         dcg.betweenness_centrality,
         dg,
         k=k,
         normalized=normalized,
-        endpoints=endpoint,
+        endpoints=endpoints,
         random_state=random_state,
     )
 
@@ -134,9 +179,11 @@ def test_dask_mg_betweenness_centrality(
         mg_bc_results.compute().sort_values("vertex").reset_index(drop=True)
     )["betweenness_centrality"].to_cupy()
 
-    sg_bc_results = (sg_cugraph_bc.sort_values("vertex").reset_index(drop=True))[
-        "betweenness_centrality"
-    ].to_cupy()
+    sg_bc_results = (
+        input_expected_output["sg_cugraph_results"]
+        .sort_values("vertex")
+        .reset_index(drop=True)
+    )["betweenness_centrality"].to_cupy()
 
     diff = cupy.isclose(mg_bc_results, sg_bc_results)
 

From 36b31c9c2f5c575ba4ea99f5e2a407a449d9a10e Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Mon, 11 Mar 2024 06:46:48 -0700
Subject: [PATCH 07/20] Revert degree_centrality

---
 .../centrality/test_degree_centrality_mg.py   | 75 ++++++++-----------
 1 file changed, 33 insertions(+), 42 deletions(-)

diff --git a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py
index 8606649c745..18a9941ab06 100644
--- a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py
@@ -15,12 +15,13 @@
 
 import pytest
 
+import cudf
+import dask_cudf
 import cugraph
 from cugraph.dask.common.mg_utils import is_single_gpu
-from cugraph.datasets import karate_asymmetric, polbooks, email_Eu_core
+from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH
 from cudf.testing import assert_series_equal
 
-
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
@@ -30,55 +31,45 @@ def setup_function():
     gc.collect()
 
 
-# =============================================================================
-# Parameters
-# =============================================================================
-
-
-DATASETS = [karate_asymmetric, polbooks, email_Eu_core]
 IS_DIRECTED = [True, False]
 
-
-# =============================================================================
-# Helper functions
-# =============================================================================
-
-
-def get_sg_graph(dataset, directed):
-    G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
-
-    return G
-
-
-def get_mg_graph(dataset, directed):
-    ddf = dataset.get_dask_edgelist()
-    dg = cugraph.Graph(directed=directed)
-    dg.from_dask_cudf_edgelist(
-        ddf,
-        source="src",
-        destination="dst",
-        edge_attr="wgt",
-        renumber=True,
-        store_transposed=True,
-    )
-
-    return dg
-
-
-# =============================================================================
-# Tests
-# =============================================================================
+DATA_PATH = [
+    (RAPIDS_DATASET_ROOT_DIR_PATH / "karate-asymmetric.csv").as_posix(),
+    (RAPIDS_DATASET_ROOT_DIR_PATH / "polbooks.csv").as_posix(),
+    (RAPIDS_DATASET_ROOT_DIR_PATH / "email-Eu-core.csv").as_posix(),
+]
 
 
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
-@pytest.mark.parametrize("dataset", DATASETS)
 @pytest.mark.parametrize("directed", IS_DIRECTED)
-def test_dask_mg_degree(dask_client, dataset, directed):
-    dg = get_mg_graph(dataset, directed)
+@pytest.mark.parametrize("data_file", DATA_PATH)
+def test_dask_mg_degree(dask_client, directed, data_file):
+
+    input_data_path = data_file
+    chunksize = cugraph.dask.get_chunksize(input_data_path)
+
+    ddf = dask_cudf.read_csv(
+        input_data_path,
+        chunksize=chunksize,
+        delimiter=" ",
+        names=["src", "dst", "value"],
+        dtype=["int32", "int32", "float32"],
+    )
+
+    df = cudf.read_csv(
+        input_data_path,
+        delimiter=" ",
+        names=["src", "dst", "value"],
+        dtype=["int32", "int32", "float32"],
+    )
+
+    dg = cugraph.Graph(directed=directed)
+    dg.from_dask_cudf_edgelist(ddf, "src", "dst")
     dg.compute_renumber_edge_list()
 
-    g = get_sg_graph(dataset, directed)
+    g = cugraph.Graph(directed=directed)
+    g.from_cudf_edgelist(df, "src", "dst")
 
     merge_df_in_degree = (
         dg.in_degree()

From 786dc9f5bf1a77b9528a50a560ee4902d63ecd9f Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Mon, 11 Mar 2024 08:13:00 -0700
Subject: [PATCH 08/20] Revert edge_betweenness_centrality

---
 .../test_edge_betweenness_centrality_mg.py    | 226 +++++++++++-------
 1 file changed, 139 insertions(+), 87 deletions(-)

diff --git a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py
index ae44917dc3a..6c066a947ac 100644
--- a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py
@@ -14,9 +14,14 @@
 import gc
 import pytest
 
+import dask_cudf
+from pylibcugraph.testing.utils import gen_fixture_params_product
+from cugraph.datasets import karate, dolphins
+
 import cugraph
 import cugraph.dask as dcg
-from cugraph.datasets import karate, dolphins
+
+# from cugraph.dask.common.mg_utils import is_single_gpu
 
 
 # =============================================================================
@@ -28,34 +33,79 @@ def setup_function():
     gc.collect()
 
 
-# =============================================================================
-# Parameters
-# =============================================================================
+IS_DIRECTED = [True, False]
+INCLUDE_WEIGHTS = [False, True]
+INCLUDE_EDGE_IDS = [False, True]
+NORMALIZED_OPTIONS = [False, True]
+SUBSET_SIZE_OPTIONS = [4, None]
 
 
-DATASETS = [karate, dolphins]
-IS_DIRECTED = [True, False]
-IS_WEIGHTED = [True, False]
-INCLUDE_EDGE_IDS = [True, False]
-IS_NORMALIZED = [True, False]
-SUBSET_SIZES = [4, None]
+# email_Eu_core is too expensive to test
+datasets = [karate, dolphins]
 
 
 # =============================================================================
-# Helper functions
+# Pytest fixtures
 # =============================================================================
 
 
-def get_sg_graph(dataset, directed, edge_ids):
-    dataset.unload()
-    df = dataset.get_edgelist()
+fixture_params = gen_fixture_params_product(
+    (datasets, "graph_file"),
+    (IS_DIRECTED, "directed"),
+    (INCLUDE_WEIGHTS, "include_weights"),
+    (INCLUDE_EDGE_IDS, "include_edgeids"),
+    (NORMALIZED_OPTIONS, "normalized"),
+    (SUBSET_SIZE_OPTIONS, "subset_size"),
+)
+
+
+@pytest.fixture(scope="module", params=fixture_params)
+def input_combo(request):
+    """
+    Simply return the current combination of params as a dictionary for use in
+    tests or other parameterized fixtures.
+    """
+    parameters = dict(
+        zip(
+            (
+                "graph_file",
+                "directed",
+                "include_weights",
+                "include_edge_ids",
+                "normalized",
+                "subset_size",
+                "subset_seed",
+            ),
+            request.param,
+        )
+    )
+
+    return parameters
+
+
+@pytest.fixture(scope="module")
+def input_expected_output(input_combo):
+    """
+    This fixture returns the inputs and expected results from the edge
+    betweenness centrality algo.
+    (based on cuGraph edge betweenness centrality) which can be used
+    for validation.
+    """
+    directed = input_combo["directed"]
+    normalized = input_combo["normalized"]
+    k = input_combo["subset_size"]
+    subset_seed = 42
+    edge_ids = input_combo["include_edge_ids"]
+    weight = input_combo["include_weights"]
+
+    df = input_combo["graph_file"].get_edgelist()
     if edge_ids:
         if not directed:
             # Edge ids not supported for undirected graph
-            return None
-        dtype = df.dtypes.iloc[0]
+            return
+        dtype = df.dtypes[0]
         edge_id = "edge_id"
-        df[edge_id] = df.index
+        df["edge_id"] = df.index
         df = df.astype(dtype)
 
     else:
@@ -65,13 +115,30 @@ def get_sg_graph(dataset, directed, edge_ids):
     G.from_cudf_edgelist(
         df, source="src", destination="dst", weight="wgt", edge_id=edge_id
     )
+    if isinstance(k, int):
+        k = G.select_random_vertices(subset_seed, k)
 
-    return G
+    input_combo["k"] = k
+    # Save the results back to the input_combo dictionary to prevent redundant
+    # cuGraph runs. Other tests using the input_combo fixture will look for
+    # them, and if not present they will have to re-run the same cuGraph call.
+    sg_cugraph_edge_bc = (
+        cugraph.edge_betweenness_centrality(G, k, normalized)
+        .sort_values(["src", "dst"])
+        .reset_index(drop=True)
+    )
 
+    input_data_path = input_combo["graph_file"].get_path()
 
-def get_mg_graph(dataset, directed, edge_ids, weight):
-    dataset.unload()
-    ddf = dataset.get_dask_edgelist()
+    input_combo["sg_cugraph_results"] = sg_cugraph_edge_bc
+    chunksize = dcg.get_chunksize(input_data_path)
+    ddf = dask_cudf.read_csv(
+        input_data_path,
+        chunksize=chunksize,
+        delimiter=" ",
+        names=["src", "dst", "value"],
+        dtype=["int32", "int32", "float32"],
+    )
 
     if weight:
         weight = ddf
@@ -87,16 +154,20 @@ def get_mg_graph(dataset, directed, edge_ids, weight):
         edge_id = None
 
     dg = cugraph.Graph(directed=directed)
+
     dg.from_dask_cudf_edgelist(
         ddf,
         source="src",
         destination="dst",
-        weight="wgt",
+        weight="value",
         edge_id=edge_id,
         renumber=True,
     )
 
-    return dg, weight
+    input_combo["MGGraph"] = dg
+    input_combo["include_weights"] = weight
+
+    return input_combo
 
 
 # =============================================================================
@@ -104,76 +175,57 @@ def get_mg_graph(dataset, directed, edge_ids, weight):
 # =============================================================================
 
 
+# @pytest.mark.skipif(
+#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
+# )
 @pytest.mark.mg
-@pytest.mark.parametrize("dataset", DATASETS)
-@pytest.mark.parametrize("directed", IS_DIRECTED)
-@pytest.mark.parametrize("weighted", IS_WEIGHTED)
-@pytest.mark.parametrize("edge_ids", INCLUDE_EDGE_IDS)
-@pytest.mark.parametrize("normalized", IS_NORMALIZED)
-@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
 def test_dask_mg_edge_betweenness_centrality(
-    dask_client,
-    dataset,
-    directed,
-    weighted,
-    edge_ids,
-    normalized,
-    subset_size,
-    benchmark,
+    dask_client, benchmark, input_expected_output
 ):
-    g = get_sg_graph(dataset, directed, edge_ids)
-
-    if g is None:
-        pytest.skip("Edge_ids not supported for undirected graph")
-
-    dg, weight = get_mg_graph(dataset, directed, edge_ids, weighted)
-    subset_seed = 42
-
-    k = subset_size
-    if isinstance(k, int):
-        k = g.select_random_vertices(subset_seed, k)
-
-    sg_cugraph_edge_bc = (
-        cugraph.edge_betweenness_centrality(g, k, normalized)
-        .sort_values(["src", "dst"])
-        .reset_index(drop=True)
-    )
-
-    if weight is not None:
-        with pytest.raises(NotImplementedError):
+    if input_expected_output is not None:
+        dg = input_expected_output["MGGraph"]
+        k = input_expected_output["k"]
+        normalized = input_expected_output["normalized"]
+        weight = input_expected_output["include_weights"]
+        if weight is not None:
+            with pytest.raises(NotImplementedError):
+                result_edge_bc = benchmark(
+                    dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight
+                )
+
+        else:
             result_edge_bc = benchmark(
                 dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight
             )
+            result_edge_bc = (
+                result_edge_bc.compute()
+                .sort_values(["src", "dst"])
+                .reset_index(drop=True)
+                .rename(columns={"betweenness_centrality": "mg_betweenness_centrality"})
+            )
 
-    else:
-        result_edge_bc = benchmark(
-            dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight
-        )
-        result_edge_bc = (
-            result_edge_bc.compute()
-            .sort_values(["src", "dst"])
-            .reset_index(drop=True)
-            .rename(columns={"betweenness_centrality": "mg_betweenness_centrality"})
-        )
-
-        if len(result_edge_bc.columns) > 3:
-            result_edge_bc = result_edge_bc.rename(columns={"edge_id": "mg_edge_id"})
-
-        expected_output = sg_cugraph_edge_bc.reset_index(drop=True)
-        result_edge_bc["betweenness_centrality"] = expected_output[
-            "betweenness_centrality"
-        ]
-        if len(expected_output.columns) > 3:
-            result_edge_bc["edge_id"] = expected_output["edge_id"]
-            edge_id_diff = result_edge_bc.query("mg_edge_id != edge_id")
-            assert len(edge_id_diff) == 0
+            if len(result_edge_bc.columns) > 3:
+                result_edge_bc = result_edge_bc.rename(
+                    columns={"edge_id": "mg_edge_id"}
+                )
 
-        edge_bc_diffs1 = result_edge_bc.query(
-            "mg_betweenness_centrality - betweenness_centrality > 0.01"
-        )
-        edge_bc_diffs2 = result_edge_bc.query(
-            "betweenness_centrality - mg_betweenness_centrality < -0.01"
-        )
+            expected_output = input_expected_output["sg_cugraph_results"].reset_index(
+                drop=True
+            )
+            result_edge_bc["betweenness_centrality"] = expected_output[
+                "betweenness_centrality"
+            ]
+            if len(expected_output.columns) > 3:
+                result_edge_bc["edge_id"] = expected_output["edge_id"]
+                edge_id_diff = result_edge_bc.query("mg_edge_id != edge_id")
+                assert len(edge_id_diff) == 0
+
+            edge_bc_diffs1 = result_edge_bc.query(
+                "mg_betweenness_centrality - betweenness_centrality > 0.01"
+            )
+            edge_bc_diffs2 = result_edge_bc.query(
+                "betweenness_centrality - mg_betweenness_centrality < -0.01"
+            )
 
-        assert len(edge_bc_diffs1) == 0
-        assert len(edge_bc_diffs2) == 0
+            assert len(edge_bc_diffs1) == 0
+            assert len(edge_bc_diffs2) == 0

From c9d0e3047ee95b8492ef21e3ce4ed783a3ea30b7 Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Mon, 11 Mar 2024 12:48:45 -0700
Subject: [PATCH 09/20] Add back changes except edge_betweenness_centrality

---
 .../test_batch_betweenness_centrality_mg.py   |  41 ++--
 ...st_batch_edge_betweenness_centrality_mg.py |  38 ++--
 .../test_betweenness_centrality_mg.py         | 177 +++++++-----------
 .../centrality/test_degree_centrality_mg.py   |  75 ++++----
 4 files changed, 149 insertions(+), 182 deletions(-)

diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
index 9d858919786..7050d0c9e55 100644
--- a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
@@ -24,44 +24,49 @@
     compare_scores,
 )
 
-DIRECTED_GRAPH_OPTIONS = [False, True]
-WEIGHTED_GRAPH_OPTIONS = [False, True]
-ENDPOINTS_OPTIONS = [False, True]
-NORMALIZED_OPTIONS = [False, True]
-DEFAULT_EPSILON = 0.0001
-SUBSET_SIZE_OPTIONS = [4, None]
-SUBSET_SEED_OPTIONS = [42]
-
 
 # =============================================================================
 # Parameters
 # =============================================================================
-DATASETS = [karate]
-# FIXME: The "preset_gpu_count" from 21.08 and below are currently not
-# supported and have been removed
 
-RESULT_DTYPE_OPTIONS = [np.float64]
+
+DATASETS = [karate]
+DEFAULT_EPSILON = 0.0001
+IS_DIRECTED = [False, True]
+ENDPOINTS = [False, True]
+IS_NORMALIZED = [False, True]
+RESULT_DTYPES = [np.float64]
+SUBSET_SIZES = [4, None]
+SUBSET_SEEDS = [42]
+IS_WEIGHTED = [False, True]
 
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
+
+
 def setup_function():
     gc.collect()
 
 
+# =============================================================================
+# Tests
+# =============================================================================
+
+
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
 @pytest.mark.parametrize(
     "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS]
 )
-@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
-@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
-@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
+@pytest.mark.parametrize("directed", IS_DIRECTED)
+@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
+@pytest.mark.parametrize("normalized", IS_NORMALIZED)
 @pytest.mark.parametrize("weight", [None])
-@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS)
-@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS)
-@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS)
+@pytest.mark.parametrize("endpoints", ENDPOINTS)
+@pytest.mark.parametrize("subset_seed", SUBSET_SEEDS)
+@pytest.mark.parametrize("result_dtype", RESULT_DTYPES)
 def test_mg_betweenness_centrality(
     graph_file,
     directed,
diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
index eb1dec7e5bd..48364a4a79a 100644
--- a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
@@ -19,49 +19,49 @@
 from cugraph.dask.common.mg_utils import is_single_gpu
 from cugraph.datasets import karate, netscience
 
-# Get parameters from standard betwenness_centrality_test
-# As tests directory is not a module, we need to add it to the path
-# FIXME: Test must be reworked to import from 'cugraph.testing' instead of
-# importing from other tests
-from test_edge_betweenness_centrality import (
-    DIRECTED_GRAPH_OPTIONS,
-    NORMALIZED_OPTIONS,
-    DEFAULT_EPSILON,
-    SUBSET_SIZE_OPTIONS,
-)
-
 from test_edge_betweenness_centrality import (
     calc_edge_betweenness_centrality,
     compare_scores,
 )
 
+
 # =============================================================================
 # Parameters
 # =============================================================================
-DATASETS = [karate, netscience]
 
-# FIXME: The "preset_gpu_count" from 21.08 and below are not supported and have
-# been removed
-RESULT_DTYPE_OPTIONS = [np.float32, np.float64]
+
+DATASETS = [karate, netscience]
+IS_DIRECTED = [True, False]
+IS_NORMALIZED = [True, False]
+DEFAULT_EPSILON = 0.0001
+SUBSET_SIZES = [4, None]
+RESULT_DTYPES = [np.float32, np.float64]
 
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
+
+
 def setup_function():
     gc.collect()
 
 
+# =============================================================================
+# Tests
+# =============================================================================
+
+
 # FIXME: Fails for directed = False(bc score twice as much) and normalized = True.
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
 @pytest.mark.parametrize(
     "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS]
 )
-@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
-@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
-@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
-@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS)
+@pytest.mark.parametrize("directed", IS_DIRECTED)
+@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
+@pytest.mark.parametrize("normalized", IS_NORMALIZED)
+@pytest.mark.parametrize("result_dtype", RESULT_DTYPES)
 def test_mg_edge_betweenness_centrality(
     graph_file,
     directed,
diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
index 60d344eb0fd..48fbe796bb4 100644
--- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
@@ -15,13 +15,11 @@
 
 import pytest
 
-import dask_cudf
 import cupy
 import cudf
 import cugraph
 import cugraph.dask as dcg
-from cugraph.testing import utils
-from pylibcugraph.testing import gen_fixture_params_product
+from cugraph.datasets import karate, dolphins
 
 
 # =============================================================================
@@ -33,145 +31,102 @@ def setup_function():
     gc.collect()
 
 
-IS_DIRECTED = [True, False]
+# =============================================================================
+# Parameters
+# =============================================================================
 
+DATASETS = [karate, dolphins]
+IS_DIRECTED = [True, False]
+IS_NORMALIZED = [True, False]
+ENDPOINTS = [True, False]
+SUBSET_SEEDS = [42, None]
+SUBSET_SIZES = [None, 15]
+VERTEX_LIST_TYPES = [list, cudf]
 
 # =============================================================================
-# Pytest fixtures
+# Helper functions
 # =============================================================================
 
-datasets = utils.DATASETS_UNDIRECTED
-
-fixture_params = gen_fixture_params_product(
-    (datasets, "graph_file"),
-    ([False, True], "normalized"),
-    ([False, True], "endpoints"),
-    ([42, None], "subset_seed"),
-    ([None, 15], "subset_size"),
-    (IS_DIRECTED, "directed"),
-    ([list, cudf], "vertex_list_type"),
-)
-
-
-@pytest.fixture(scope="module", params=fixture_params)
-def input_combo(request):
-    """
-    Simply return the current combination of params as a dictionary for use in
-    tests or other parameterized fixtures.
-    """
-    parameters = dict(
-        zip(
-            (
-                "graph_file",
-                "normalized",
-                "endpoints",
-                "subset_seed",
-                "subset_size",
-                "directed",
-                "vertex_list_type",
-            ),
-            request.param,
-        )
+
+def get_sg_graph(dataset, directed):
+    G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
+
+    return G
+
+
+def get_mg_graph(dataset, directed):
+    ddf = dataset.get_dask_edgelist()
+    dg = cugraph.Graph(directed=directed)
+    dg.from_dask_cudf_edgelist(
+        ddf,
+        source="src",
+        destination="dst",
+        edge_attr="wgt",
+        renumber=True,
+        store_transposed=True,
     )
 
-    return parameters
+    return dg
 
 
-@pytest.fixture(scope="module")
-def input_expected_output(input_combo):
-    """
-    This fixture returns the inputs and expected results from the
-    betweenness_centrality algo based on cuGraph betweenness_centrality) which can
-    be used for validation.
-    """
+# =============================================================================
+# Tests
+# =============================================================================
 
-    input_data_path = input_combo["graph_file"]
-    normalized = input_combo["normalized"]
-    endpoints = input_combo["endpoints"]
-    random_state = input_combo["subset_seed"]
-    subset_size = input_combo["subset_size"]
-    directed = input_combo["directed"]
-    vertex_list_type = input_combo["vertex_list_type"]
 
-    G = utils.generate_cugraph_graph_from_file(input_data_path, directed=directed)
+@pytest.mark.mg
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("directed", IS_DIRECTED)
+@pytest.mark.parametrize("normalized", IS_NORMALIZED)
+@pytest.mark.parametrize("endpoint", ENDPOINTS)
+@pytest.mark.parametrize("subset_seed", SUBSET_SEEDS)
+@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
+@pytest.mark.parametrize("v_list_type", VERTEX_LIST_TYPES)
+def test_dask_mg_betweenness_centrality(
+    dataset,
+    directed,
+    normalized,
+    endpoint,
+    subset_seed,
+    subset_size,
+    v_list_type,
+    dask_client,
+    benchmark,
+):
+    g = get_sg_graph(dataset, directed)
+    dataset.unload()
+    dg = get_mg_graph(dataset, directed)
+    random_state = subset_seed
 
     if subset_size is None:
         k = subset_size
     elif isinstance(subset_size, int):
         # Select random vertices
-        k = G.select_random_vertices(
+        k = g.select_random_vertices(
             random_state=random_state, num_vertices=subset_size
         )
-        if vertex_list_type is list:
+        if v_list_type is list:
             k = k.to_arrow().to_pylist()
 
         print("the seeds are \n", k)
-        if vertex_list_type is int:
+        if v_list_type is int:
             # This internally sample k vertices in betweenness centrality.
             # Since the nodes that will be sampled by each implementation will
             # be random, therefore sample all vertices which will make the test
             # consistent.
-            k = len(G.nodes())
-
-    input_combo["k"] = k
+            k = len(g.nodes())
 
     sg_cugraph_bc = cugraph.betweenness_centrality(
-        G, k=k, normalized=normalized, endpoints=endpoints, random_state=random_state
+        g, k=k, normalized=normalized, endpoints=endpoint, random_state=random_state
     )
-    # Save the results back to the input_combo dictionary to prevent redundant
-    # cuGraph runs. Other tests using the input_combo fixture will look for
-    # them, and if not present they will have to re-run the same cuGraph call.
     sg_cugraph_bc = sg_cugraph_bc.sort_values("vertex").reset_index(drop=True)
 
-    input_combo["sg_cugraph_results"] = sg_cugraph_bc
-    chunksize = dcg.get_chunksize(input_data_path)
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    dg = cugraph.Graph(directed=directed)
-    dg.from_dask_cudf_edgelist(
-        ddf,
-        source="src",
-        destination="dst",
-        edge_attr="value",
-        renumber=True,
-        store_transposed=True,
-    )
-
-    input_combo["MGGraph"] = dg
-
-    return input_combo
-
-
-# =============================================================================
-# Tests
-# =============================================================================
-
-
-# @pytest.mark.skipif(
-#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
-# )
-
-
-@pytest.mark.mg
-def test_dask_mg_betweenness_centrality(dask_client, benchmark, input_expected_output):
-
-    dg = input_expected_output["MGGraph"]
-    k = input_expected_output["k"]
-    endpoints = input_expected_output["endpoints"]
-    normalized = input_expected_output["normalized"]
-    random_state = input_expected_output["subset_seed"]
     mg_bc_results = benchmark(
         dcg.betweenness_centrality,
         dg,
         k=k,
         normalized=normalized,
-        endpoints=endpoints,
+        endpoints=endpoint,
         random_state=random_state,
     )
 
@@ -179,11 +134,9 @@ def test_dask_mg_betweenness_centrality(dask_client, benchmark, input_expected_o
         mg_bc_results.compute().sort_values("vertex").reset_index(drop=True)
     )["betweenness_centrality"].to_cupy()
 
-    sg_bc_results = (
-        input_expected_output["sg_cugraph_results"]
-        .sort_values("vertex")
-        .reset_index(drop=True)
-    )["betweenness_centrality"].to_cupy()
+    sg_bc_results = (sg_cugraph_bc.sort_values("vertex").reset_index(drop=True))[
+        "betweenness_centrality"
+    ].to_cupy()
 
     diff = cupy.isclose(mg_bc_results, sg_bc_results)
 
diff --git a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py
index 18a9941ab06..8606649c745 100644
--- a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py
@@ -15,13 +15,12 @@
 
 import pytest
 
-import cudf
-import dask_cudf
 import cugraph
 from cugraph.dask.common.mg_utils import is_single_gpu
-from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH
+from cugraph.datasets import karate_asymmetric, polbooks, email_Eu_core
 from cudf.testing import assert_series_equal
 
+
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
@@ -31,45 +30,55 @@ def setup_function():
     gc.collect()
 
 
+# =============================================================================
+# Parameters
+# =============================================================================
+
+
+DATASETS = [karate_asymmetric, polbooks, email_Eu_core]
 IS_DIRECTED = [True, False]
 
-DATA_PATH = [
-    (RAPIDS_DATASET_ROOT_DIR_PATH / "karate-asymmetric.csv").as_posix(),
-    (RAPIDS_DATASET_ROOT_DIR_PATH / "polbooks.csv").as_posix(),
-    (RAPIDS_DATASET_ROOT_DIR_PATH / "email-Eu-core.csv").as_posix(),
-]
 
+# =============================================================================
+# Helper functions
+# =============================================================================
 
-@pytest.mark.mg
-@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
-@pytest.mark.parametrize("directed", IS_DIRECTED)
-@pytest.mark.parametrize("data_file", DATA_PATH)
-def test_dask_mg_degree(dask_client, directed, data_file):
-
-    input_data_path = data_file
-    chunksize = cugraph.dask.get_chunksize(input_data_path)
-
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
 
-    df = cudf.read_csv(
-        input_data_path,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
+def get_sg_graph(dataset, directed):
+    G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
+
+    return G
 
+
+def get_mg_graph(dataset, directed):
+    ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=directed)
-    dg.from_dask_cudf_edgelist(ddf, "src", "dst")
+    dg.from_dask_cudf_edgelist(
+        ddf,
+        source="src",
+        destination="dst",
+        edge_attr="wgt",
+        renumber=True,
+        store_transposed=True,
+    )
+
+    return dg
+
+
+# =============================================================================
+# Tests
+# =============================================================================
+
+
+@pytest.mark.mg
+@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("directed", IS_DIRECTED)
+def test_dask_mg_degree(dask_client, dataset, directed):
+    dg = get_mg_graph(dataset, directed)
     dg.compute_renumber_edge_list()
 
-    g = cugraph.Graph(directed=directed)
-    g.from_cudf_edgelist(df, "src", "dst")
+    g = get_sg_graph(dataset, directed)
 
     merge_df_in_degree = (
         dg.in_degree()

From 2ad3aeb2e66a668d93a18a3f0fe8d7e3e6bed40a Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Wed, 13 Mar 2024 12:06:14 -0700
Subject: [PATCH 10/20] Add call to deload internal dataset edge list

---
 .../test_batch_betweenness_centrality_mg.py   |  11 +-
 ...st_batch_edge_betweenness_centrality_mg.py |  10 +-
 .../test_betweenness_centrality_mg.py         |   5 +
 .../centrality/test_degree_centrality_mg.py   |   5 +
 .../test_edge_betweenness_centrality_mg.py    | 229 +++++++-----------
 .../test_eigenvector_centrality_mg.py         |  50 ++--
 .../centrality/test_katz_centrality_mg.py     |  71 +++---
 7 files changed, 170 insertions(+), 211 deletions(-)

diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
index 7050d0c9e55..1c73ebb0216 100644
--- a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
@@ -57,9 +57,7 @@ def setup_function():
 
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
-@pytest.mark.parametrize(
-    "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS]
-)
+@pytest.mark.parametrize("dataset", DATASETS)
 @pytest.mark.parametrize("directed", IS_DIRECTED)
 @pytest.mark.parametrize("subset_size", SUBSET_SIZES)
 @pytest.mark.parametrize("normalized", IS_NORMALIZED)
@@ -68,7 +66,7 @@ def setup_function():
 @pytest.mark.parametrize("subset_seed", SUBSET_SEEDS)
 @pytest.mark.parametrize("result_dtype", RESULT_DTYPES)
 def test_mg_betweenness_centrality(
-    graph_file,
+    dataset,
     directed,
     subset_size,
     normalized,
@@ -79,7 +77,7 @@ def test_mg_betweenness_centrality(
     dask_client,
 ):
     sorted_df = calc_betweenness_centrality(
-        graph_file,
+        dataset,
         directed=directed,
         normalized=normalized,
         k=subset_size,
@@ -95,3 +93,6 @@ def test_mg_betweenness_centrality(
         second_key="ref_bc",
         epsilon=DEFAULT_EPSILON,
     )
+
+    # Clean-up stored dataset edge-lists
+    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
index 48364a4a79a..4530dd3da86 100644
--- a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
@@ -55,15 +55,13 @@ def setup_function():
 # FIXME: Fails for directed = False(bc score twice as much) and normalized = True.
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
-@pytest.mark.parametrize(
-    "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS]
-)
+@pytest.mark.parametrize("dataset", DATASETS)
 @pytest.mark.parametrize("directed", IS_DIRECTED)
 @pytest.mark.parametrize("subset_size", SUBSET_SIZES)
 @pytest.mark.parametrize("normalized", IS_NORMALIZED)
 @pytest.mark.parametrize("result_dtype", RESULT_DTYPES)
 def test_mg_edge_betweenness_centrality(
-    graph_file,
+    dataset,
     directed,
     subset_size,
     normalized,
@@ -71,7 +69,7 @@ def test_mg_edge_betweenness_centrality(
     dask_client,
 ):
     sorted_df = calc_edge_betweenness_centrality(
-        graph_file,
+        dataset,
         directed=directed,
         normalized=normalized,
         k=subset_size,
@@ -86,3 +84,5 @@ def test_mg_edge_betweenness_centrality(
         second_key="ref_bc",
         epsilon=DEFAULT_EPSILON,
     )
+    # Clean-up stored dataset edge-lists
+    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
index 48fbe796bb4..c94c2dcaff6 100644
--- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
@@ -49,12 +49,14 @@ def setup_function():
 
 
 def get_sg_graph(dataset, directed):
+    dataset.unload()
     G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
 
     return G
 
 
 def get_mg_graph(dataset, directed):
+    dataset.unload()
     ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=directed)
     dg.from_dask_cudf_edgelist(
@@ -141,3 +143,6 @@ def test_dask_mg_betweenness_centrality(
     diff = cupy.isclose(mg_bc_results, sg_bc_results)
 
     assert diff.all()
+
+    # Clean-up stored dataset edge-lists
+    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py
index 8606649c745..68daff9238c 100644
--- a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py
@@ -45,12 +45,14 @@ def setup_function():
 
 
 def get_sg_graph(dataset, directed):
+    dataset.unload()
     G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
 
     return G
 
 
 def get_mg_graph(dataset, directed):
+    dataset.unload()
     ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=directed)
     dg.from_dask_cudf_edgelist(
@@ -116,3 +118,6 @@ def test_dask_mg_degree(dask_client, dataset, directed):
         check_names=False,
         check_dtype=False,
     )
+
+    # Clean-up stored dataset edge-lists
+    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py
index 6c066a947ac..c3a559da5c9 100644
--- a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py
@@ -14,14 +14,9 @@
 import gc
 import pytest
 
-import dask_cudf
-from pylibcugraph.testing.utils import gen_fixture_params_product
-from cugraph.datasets import karate, dolphins
-
 import cugraph
 import cugraph.dask as dcg
-
-# from cugraph.dask.common.mg_utils import is_single_gpu
+from cugraph.datasets import karate, dolphins
 
 
 # =============================================================================
@@ -33,79 +28,34 @@ def setup_function():
     gc.collect()
 
 
-IS_DIRECTED = [True, False]
-INCLUDE_WEIGHTS = [False, True]
-INCLUDE_EDGE_IDS = [False, True]
-NORMALIZED_OPTIONS = [False, True]
-SUBSET_SIZE_OPTIONS = [4, None]
-
-
-# email_Eu_core is too expensive to test
-datasets = [karate, dolphins]
-
-
 # =============================================================================
-# Pytest fixtures
+# Parameters
 # =============================================================================
 
 
-fixture_params = gen_fixture_params_product(
-    (datasets, "graph_file"),
-    (IS_DIRECTED, "directed"),
-    (INCLUDE_WEIGHTS, "include_weights"),
-    (INCLUDE_EDGE_IDS, "include_edgeids"),
-    (NORMALIZED_OPTIONS, "normalized"),
-    (SUBSET_SIZE_OPTIONS, "subset_size"),
-)
-
-
-@pytest.fixture(scope="module", params=fixture_params)
-def input_combo(request):
-    """
-    Simply return the current combination of params as a dictionary for use in
-    tests or other parameterized fixtures.
-    """
-    parameters = dict(
-        zip(
-            (
-                "graph_file",
-                "directed",
-                "include_weights",
-                "include_edge_ids",
-                "normalized",
-                "subset_size",
-                "subset_seed",
-            ),
-            request.param,
-        )
-    )
+DATASETS = [karate, dolphins]
+IS_DIRECTED = [True, False]
+IS_WEIGHTED = [True, False]
+INCLUDE_EDGE_IDS = [True, False]
+IS_NORMALIZED = [True, False]
+SUBSET_SIZES = [4, None]
 
-    return parameters
 
+# =============================================================================
+# Helper functions
+# =============================================================================
 
-@pytest.fixture(scope="module")
-def input_expected_output(input_combo):
-    """
-    This fixture returns the inputs and expected results from the edge
-    betweenness centrality algo.
-    (based on cuGraph edge betweenness centrality) which can be used
-    for validation.
-    """
-    directed = input_combo["directed"]
-    normalized = input_combo["normalized"]
-    k = input_combo["subset_size"]
-    subset_seed = 42
-    edge_ids = input_combo["include_edge_ids"]
-    weight = input_combo["include_weights"]
 
-    df = input_combo["graph_file"].get_edgelist()
+def get_sg_graph(dataset, directed, edge_ids):
+    dataset.unload()
+    df = dataset.get_edgelist()
     if edge_ids:
         if not directed:
             # Edge ids not supported for undirected graph
-            return
-        dtype = df.dtypes[0]
+            return None
+        dtype = df.dtypes.iloc[0]
         edge_id = "edge_id"
-        df["edge_id"] = df.index
+        df[edge_id] = df.index
         df = df.astype(dtype)
 
     else:
@@ -115,30 +65,13 @@ def input_expected_output(input_combo):
     G.from_cudf_edgelist(
         df, source="src", destination="dst", weight="wgt", edge_id=edge_id
     )
-    if isinstance(k, int):
-        k = G.select_random_vertices(subset_seed, k)
 
-    input_combo["k"] = k
-    # Save the results back to the input_combo dictionary to prevent redundant
-    # cuGraph runs. Other tests using the input_combo fixture will look for
-    # them, and if not present they will have to re-run the same cuGraph call.
-    sg_cugraph_edge_bc = (
-        cugraph.edge_betweenness_centrality(G, k, normalized)
-        .sort_values(["src", "dst"])
-        .reset_index(drop=True)
-    )
+    return G
 
-    input_data_path = input_combo["graph_file"].get_path()
 
-    input_combo["sg_cugraph_results"] = sg_cugraph_edge_bc
-    chunksize = dcg.get_chunksize(input_data_path)
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
+def get_mg_graph(dataset, directed, edge_ids, weight):
+    dataset.unload()
+    ddf = dataset.get_dask_edgelist()
 
     if weight:
         weight = ddf
@@ -154,20 +87,16 @@ def input_expected_output(input_combo):
         edge_id = None
 
     dg = cugraph.Graph(directed=directed)
-
     dg.from_dask_cudf_edgelist(
         ddf,
         source="src",
         destination="dst",
-        weight="value",
+        weight="wgt",
         edge_id=edge_id,
         renumber=True,
     )
 
-    input_combo["MGGraph"] = dg
-    input_combo["include_weights"] = weight
-
-    return input_combo
+    return dg, weight
 
 
 # =============================================================================
@@ -175,57 +104,79 @@ def input_expected_output(input_combo):
 # =============================================================================
 
 
-# @pytest.mark.skipif(
-#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
-# )
 @pytest.mark.mg
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("directed", IS_DIRECTED)
+@pytest.mark.parametrize("weighted", IS_WEIGHTED)
+@pytest.mark.parametrize("edge_ids", INCLUDE_EDGE_IDS)
+@pytest.mark.parametrize("normalized", IS_NORMALIZED)
+@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
 def test_dask_mg_edge_betweenness_centrality(
-    dask_client, benchmark, input_expected_output
+    dask_client,
+    dataset,
+    directed,
+    weighted,
+    edge_ids,
+    normalized,
+    subset_size,
+    benchmark,
 ):
-    if input_expected_output is not None:
-        dg = input_expected_output["MGGraph"]
-        k = input_expected_output["k"]
-        normalized = input_expected_output["normalized"]
-        weight = input_expected_output["include_weights"]
-        if weight is not None:
-            with pytest.raises(NotImplementedError):
-                result_edge_bc = benchmark(
-                    dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight
-                )
-
-        else:
+    g = get_sg_graph(dataset, directed, edge_ids)
+
+    if g is None:
+        pytest.skip("Edge_ids not supported for undirected graph")
+
+    dg, weight = get_mg_graph(dataset, directed, edge_ids, weighted)
+    subset_seed = 42
+
+    k = subset_size
+    if isinstance(k, int):
+        k = g.select_random_vertices(subset_seed, k)
+
+    sg_cugraph_edge_bc = (
+        cugraph.edge_betweenness_centrality(g, k, normalized)
+        .sort_values(["src", "dst"])
+        .reset_index(drop=True)
+    )
+
+    if weight is not None:
+        with pytest.raises(NotImplementedError):
             result_edge_bc = benchmark(
                 dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight
             )
-            result_edge_bc = (
-                result_edge_bc.compute()
-                .sort_values(["src", "dst"])
-                .reset_index(drop=True)
-                .rename(columns={"betweenness_centrality": "mg_betweenness_centrality"})
-            )
 
-            if len(result_edge_bc.columns) > 3:
-                result_edge_bc = result_edge_bc.rename(
-                    columns={"edge_id": "mg_edge_id"}
-                )
+    else:
+        result_edge_bc = benchmark(
+            dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight
+        )
+        result_edge_bc = (
+            result_edge_bc.compute()
+            .sort_values(["src", "dst"])
+            .reset_index(drop=True)
+            .rename(columns={"betweenness_centrality": "mg_betweenness_centrality"})
+        )
 
-            expected_output = input_expected_output["sg_cugraph_results"].reset_index(
-                drop=True
-            )
-            result_edge_bc["betweenness_centrality"] = expected_output[
-                "betweenness_centrality"
-            ]
-            if len(expected_output.columns) > 3:
-                result_edge_bc["edge_id"] = expected_output["edge_id"]
-                edge_id_diff = result_edge_bc.query("mg_edge_id != edge_id")
-                assert len(edge_id_diff) == 0
-
-            edge_bc_diffs1 = result_edge_bc.query(
-                "mg_betweenness_centrality - betweenness_centrality > 0.01"
-            )
-            edge_bc_diffs2 = result_edge_bc.query(
-                "betweenness_centrality - mg_betweenness_centrality < -0.01"
-            )
+        if len(result_edge_bc.columns) > 3:
+            result_edge_bc = result_edge_bc.rename(columns={"edge_id": "mg_edge_id"})
+
+        expected_output = sg_cugraph_edge_bc.reset_index(drop=True)
+        result_edge_bc["betweenness_centrality"] = expected_output[
+            "betweenness_centrality"
+        ]
+        if len(expected_output.columns) > 3:
+            result_edge_bc["edge_id"] = expected_output["edge_id"]
+            edge_id_diff = result_edge_bc.query("mg_edge_id != edge_id")
+            assert len(edge_id_diff) == 0
+
+        edge_bc_diffs1 = result_edge_bc.query(
+            "mg_betweenness_centrality - betweenness_centrality > 0.01"
+        )
+        edge_bc_diffs2 = result_edge_bc.query(
+            "betweenness_centrality - mg_betweenness_centrality < -0.01"
+        )
+
+        assert len(edge_bc_diffs1) == 0
+        assert len(edge_bc_diffs2) == 0
 
-            assert len(edge_bc_diffs1) == 0
-            assert len(edge_bc_diffs2) == 0
+    # Clean-up stored dataset edge-lists
+    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py
index e2ce7d2c341..60d39273777 100644
--- a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -16,11 +16,10 @@
 import pytest
 
 import cudf
-import dask_cudf
 import cugraph
 import cugraph.dask as dcg
 from cugraph.dask.common.mg_utils import is_single_gpu
-from cugraph.testing.utils import DATASETS
+from cugraph.datasets import karate_disjoint, dolphins, netscience
 
 
 # =============================================================================
@@ -32,28 +31,33 @@ def setup_function():
     gc.collect()
 
 
+# =============================================================================
+# Parameters
+# =============================================================================
+
+
+DATASETS = [karate_disjoint, dolphins, netscience]
 IS_DIRECTED = [True, False]
 
 
+# =============================================================================
+# Tests
+# =============================================================================
+
+
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
+@pytest.mark.parametrize("dataset", DATASETS)
 @pytest.mark.parametrize("directed", IS_DIRECTED)
-@pytest.mark.parametrize("input_data_path", DATASETS)
-def test_dask_mg_eigenvector_centrality(dask_client, directed, input_data_path):
-    input_data_path = input_data_path.as_posix()
+def test_dask_mg_eigenvector_centrality(dask_client, dataset, directed):
+    input_data_path = dataset.get_path()
     print(f"dataset={input_data_path}")
-    chunksize = dcg.get_chunksize(input_data_path)
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
+    ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=True)
     dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True)
     mg_res = dcg.eigenvector_centrality(dg, tol=1e-6)
     mg_res = mg_res.compute()
+
     import networkx as nx
     from cugraph.testing import utils
 
@@ -84,20 +88,15 @@ def test_dask_mg_eigenvector_centrality(dask_client, directed, input_data_path):
             err = err + 1
     assert err == 0
 
+    # Clean-up stored dataset edge-lists
+    dataset.unload()
+
 
 @pytest.mark.mg
 def test_dask_mg_eigenvector_centrality_transposed_false(dask_client):
-    input_data_path = DATASETS[0]
+    dataset = DATASETS[0]
 
-    chunksize = dcg.get_chunksize(input_data_path)
-
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
+    ddf = dataset.get_dask_edgelist()
 
     dg = cugraph.Graph(directed=True)
     dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=False)
@@ -110,3 +109,6 @@ def test_dask_mg_eigenvector_centrality_transposed_false(dask_client):
 
     with pytest.warns(UserWarning, match=warning_msg):
         dcg.eigenvector_centrality(dg)
+
+    # Clean-up stored dataset edge-lists
+    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py
index 72b81ce50bb..d1a899eba06 100644
--- a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -16,11 +16,10 @@
 import pytest
 
 import cudf
-import dask_cudf
 import cugraph
 import cugraph.dask as dcg
 from cugraph.dask.common.mg_utils import is_single_gpu
-from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH
+from cugraph.datasets import karate
 
 
 # =============================================================================
@@ -32,25 +31,30 @@ def setup_function():
     gc.collect()
 
 
+# =============================================================================
+# Parameters
+# =============================================================================
+
+
+DATASETS = [karate]
 IS_DIRECTED = [True, False]
 
 
+# =============================================================================
+# Tests
+# =============================================================================
+
+
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
+@pytest.mark.parametrize("dataset", DATASETS)
 @pytest.mark.parametrize("directed", IS_DIRECTED)
-def test_dask_mg_katz_centrality(dask_client, directed):
+def test_dask_mg_katz_centrality(dask_client, dataset, directed):
 
-    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix()
+    input_data_path = dataset.get_path()
     print(f"dataset={input_data_path}")
-    chunksize = dcg.get_chunksize(input_data_path)
-
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
+
+    ddf = dataset.get_dask_edgelist()
 
     dg = cugraph.Graph(directed=True)
     dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True)
@@ -92,22 +96,16 @@ def test_dask_mg_katz_centrality(dask_client, directed):
             err = err + 1
     assert err == 0
 
+    # Clean-up stored dataset edge-lists
+    dataset.unload()
+
 
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
+@pytest.mark.parametrize("dataset", DATASETS)
 @pytest.mark.parametrize("directed", IS_DIRECTED)
-def test_dask_mg_katz_centrality_nstart(dask_client, directed):
-    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix()
-    print(f"dataset={input_data_path}")
-    chunksize = dcg.get_chunksize(input_data_path)
-
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
+def test_dask_mg_katz_centrality_nstart(dask_client, dataset, directed):
+    ddf = dataset.get_dask_edgelist()
 
     dg = cugraph.Graph(directed=True)
     dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True)
@@ -139,20 +137,14 @@ def test_dask_mg_katz_centrality_nstart(dask_client, directed):
             err = err + 1
     assert err == 0
 
+    # Clean-up stored dataset edge-lists
+    dataset.unload()
 
-@pytest.mark.mg
-def test_dask_mg_katz_centrality_transposed_false(dask_client):
-    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix()
 
-    chunksize = dcg.get_chunksize(input_data_path)
-
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
+@pytest.mark.mg
+@pytest.mark.parametrize("dataset", DATASETS)
+def test_dask_mg_katz_centrality_transposed_false(dask_client, dataset):
+    ddf = dataset.get_dask_edgelist()
 
     dg = cugraph.Graph(directed=True)
     dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=False)
@@ -165,3 +157,6 @@ def test_dask_mg_katz_centrality_transposed_false(dask_client):
 
     with pytest.warns(UserWarning, match=warning_msg):
         dcg.katz_centrality(dg)
+
+    # Clean-up stored dataset edge-lists
+    dataset.unload()

From 5b8000603ebc918ebe05e6d6893b7ee7365b3c2c Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Wed, 13 Mar 2024 14:43:10 -0700
Subject: [PATCH 11/20] Unload pre-existing internal DF

---
 .../cugraph/tests/centrality/test_eigenvector_centrality_mg.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py
index 60d39273777..8cd77fb5e24 100644
--- a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py
@@ -52,6 +52,7 @@ def setup_function():
 def test_dask_mg_eigenvector_centrality(dask_client, dataset, directed):
     input_data_path = dataset.get_path()
     print(f"dataset={input_data_path}")
+    dataset.unload()
     ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=True)
     dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True)
@@ -96,8 +97,8 @@ def test_dask_mg_eigenvector_centrality(dask_client, dataset, directed):
 def test_dask_mg_eigenvector_centrality_transposed_false(dask_client):
     dataset = DATASETS[0]
 
+    dataset.unload()
     ddf = dataset.get_dask_edgelist()
-
     dg = cugraph.Graph(directed=True)
     dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=False)
 

From e62cad98b2a3874074cb936e80330b7392a78c63 Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Wed, 13 Mar 2024 14:44:18 -0700
Subject: [PATCH 12/20] Unload pre-existing internal DF

---
 .../cugraph/tests/centrality/test_katz_centrality_mg.py    | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py
index d1a899eba06..ebbe5974814 100644
--- a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py
@@ -50,12 +50,11 @@ def setup_function():
 @pytest.mark.parametrize("dataset", DATASETS)
 @pytest.mark.parametrize("directed", IS_DIRECTED)
 def test_dask_mg_katz_centrality(dask_client, dataset, directed):
-
     input_data_path = dataset.get_path()
     print(f"dataset={input_data_path}")
 
+    dataset.unload()
     ddf = dataset.get_dask_edgelist()
-
     dg = cugraph.Graph(directed=True)
     dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True)
 
@@ -105,8 +104,8 @@ def test_dask_mg_katz_centrality(dask_client, dataset, directed):
 @pytest.mark.parametrize("dataset", DATASETS)
 @pytest.mark.parametrize("directed", IS_DIRECTED)
 def test_dask_mg_katz_centrality_nstart(dask_client, dataset, directed):
+    dataset.unload()
     ddf = dataset.get_dask_edgelist()
-
     dg = cugraph.Graph(directed=True)
     dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True)
 
@@ -144,8 +143,8 @@ def test_dask_mg_katz_centrality_nstart(dask_client, dataset, directed):
 @pytest.mark.mg
 @pytest.mark.parametrize("dataset", DATASETS)
 def test_dask_mg_katz_centrality_transposed_false(dask_client, dataset):
+    dataset.unload()
     ddf = dataset.get_dask_edgelist()
-
     dg = cugraph.Graph(directed=True)
     dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=False)
 

From 945c866760b532e9c1d79f81257f7a5f9ec890a7 Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Fri, 15 Mar 2024 08:00:33 -0700
Subject: [PATCH 13/20] Update comms and community MG tests

---
 .../cugraph/tests/comms/test_comms_mg.py      |  99 ++++++--------
 .../community/test_induced_subgraph_mg.py     |  20 ++-
 .../cugraph/tests/community/test_leiden_mg.py | 126 +++++-------------
 .../tests/community/test_louvain_mg.py        | 125 ++++-------------
 .../tests/community/test_triangle_count_mg.py | 125 +++++++----------
 5 files changed, 156 insertions(+), 339 deletions(-)

diff --git a/python/cugraph/cugraph/tests/comms/test_comms_mg.py b/python/cugraph/cugraph/tests/comms/test_comms_mg.py
index 747ef935e01..18d4db2d77f 100644
--- a/python/cugraph/cugraph/tests/comms/test_comms_mg.py
+++ b/python/cugraph/cugraph/tests/comms/test_comms_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -16,10 +16,9 @@
 import pytest
 import cugraph.dask as dcg
 
-import cudf
-import dask_cudf
 import cugraph
-from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH
+from cugraph.datasets import karate, dolphins
+
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
@@ -30,12 +29,37 @@ def setup_function():
     gc.collect()
 
 
+# =============================================================================
+# Parameters
+# =============================================================================
+
+
+DATASETS = [karate, dolphins]
 IS_DIRECTED = [True, False]
 
 
-# @pytest.mark.skipif(
-#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
-# )
+# =============================================================================
+# Helper Functions
+# =============================================================================
+
+
+def get_pagerank_result(dataset, is_mg):
+    """Return the cugraph.pagerank result for an MG or SG graph"""
+    dataset.unload()
+
+    if is_mg:
+        dg = dataset.get_dask_graph(store_transposed=True)
+        return dcg.pagerank(dg).compute()
+    else:
+        g = dataset.get_graph(store_transposed=True)
+        return cugraph.pagerank(g)
+
+
+# =============================================================================
+# Tests
+# =============================================================================
+
+
 @pytest.mark.mg
 @pytest.mark.parametrize("directed", IS_DIRECTED)
 def test_dask_mg_pagerank(dask_client, directed):
@@ -43,62 +67,17 @@ def test_dask_mg_pagerank(dask_client, directed):
     # Initialize and run pagerank on two distributed graphs
     # with same communicator
 
-    input_data_path1 = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix()
+    input_data_path1 = karate.get_path()
     print(f"dataset1={input_data_path1}")
-    chunksize1 = dcg.get_chunksize(input_data_path1)
+    result_pr1 = get_pagerank_result(karate, is_mg=True)
 
-    input_data_path2 = (RAPIDS_DATASET_ROOT_DIR_PATH / "dolphins.csv").as_posix()
+    input_data_path2 = dolphins.get_path()
     print(f"dataset2={input_data_path2}")
-    chunksize2 = dcg.get_chunksize(input_data_path2)
-
-    ddf1 = dask_cudf.read_csv(
-        input_data_path1,
-        chunksize=chunksize1,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    dg1 = cugraph.Graph(directed=directed)
-    dg1.from_dask_cudf_edgelist(ddf1, "src", "dst")
-
-    result_pr1 = dcg.pagerank(dg1).compute()
-
-    ddf2 = dask_cudf.read_csv(
-        input_data_path2,
-        chunksize=chunksize2,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    dg2 = cugraph.Graph(directed=directed)
-    dg2.from_dask_cudf_edgelist(ddf2, "src", "dst")
-
-    result_pr2 = dcg.pagerank(dg2).compute()
+    result_pr2 = get_pagerank_result(dolphins, is_mg=True)
 
     # Calculate single GPU pagerank for verification of results
-    df1 = cudf.read_csv(
-        input_data_path1,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    g1 = cugraph.Graph(directed=directed)
-    g1.from_cudf_edgelist(df1, "src", "dst")
-    expected_pr1 = cugraph.pagerank(g1)
-
-    df2 = cudf.read_csv(
-        input_data_path2,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    g2 = cugraph.Graph(directed=directed)
-    g2.from_cudf_edgelist(df2, "src", "dst")
-    expected_pr2 = cugraph.pagerank(g2)
+    expected_pr1 = get_pagerank_result(karate, is_mg=False)
+    expected_pr2 = get_pagerank_result(dolphins, is_mg=False)
 
     # Compare and verify pagerank results
 
@@ -134,3 +113,7 @@ def test_dask_mg_pagerank(dask_client, directed):
             err2 = err2 + 1
     print("Mismatches in ", input_data_path2, ": ", err2)
     assert err1 == err2 == 0
+
+    # Clean-up stored dataset edge-lists
+    karate.unload()
+    dolphins.unload()
diff --git a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py
index 45ec8eca0e8..9e199840fbb 100644
--- a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py
+++ b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py
@@ -17,7 +17,6 @@
 
 import cugraph
 import cugraph.dask as dcg
-import dask_cudf
 from cudf.testing.testing import assert_frame_equal
 from cugraph.dask.common.mg_utils import is_single_gpu
 from cugraph.datasets import karate, dolphins, email_Eu_core
@@ -36,32 +35,28 @@ def setup_function():
 # Parameters
 # =============================================================================
 
+
 DATASETS = [karate, dolphins, email_Eu_core]
 IS_DIRECTED = [True, False]
 NUM_VERTICES = [2, 5, 10, 20]
 OFFSETS = [None]
 
+
 # =============================================================================
 # Helper functions
 # =============================================================================
 
 
 def get_sg_graph(dataset, directed):
+    dataset.unload()
     G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
 
     return G
 
 
 def get_mg_graph(dataset, directed):
-    input_data_path = dataset.get_path()
-    blocksize = dcg.get_chunksize(input_data_path)
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        blocksize=blocksize,
-        delimiter=dataset.metadata["delim"],
-        names=dataset.metadata["col_names"],
-        dtype=dataset.metadata["col_types"],
-    )
+    dataset.unload()
+    ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=directed)
     dg.from_dask_cudf_edgelist(
         ddf,
@@ -108,7 +103,7 @@ def test_mg_induced_subgraph(
 
     # FIXME: This parameter is not yet tested
     # mg_offsets = mg_offsets.compute().reset_index(drop=True)
-    mg_df, mg_offsets = result_induced_subgraph
+    mg_df, _ = result_induced_subgraph
 
     if mg_df is not None and sg_induced_subgraph is not None:
         # FIXME: 'edges()' or 'view_edgelist()' takes half the edges out if
@@ -126,3 +121,6 @@ def test_mg_induced_subgraph(
         # of all the vertices and ensure that there is None
         assert sg_induced_subgraph is None
         assert mg_df is None
+
+    # Clean-up stored dataset edge-lists
+    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/community/test_leiden_mg.py b/python/cugraph/cugraph/tests/community/test_leiden_mg.py
index 69fccdae260..4ed7244fe29 100644
--- a/python/cugraph/cugraph/tests/community/test_leiden_mg.py
+++ b/python/cugraph/cugraph/tests/community/test_leiden_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -13,123 +13,58 @@
 
 import pytest
 
-
-import dask_cudf
 import cugraph
 import cugraph.dask as dcg
-from cugraph.testing import utils
-
+from cugraph.datasets import karate_asymmetric, karate, dolphins
 
-try:
-    from rapids_pytest_benchmark import setFixtureParamNames
-except ImportError:
-    print(
-        "\n\nWARNING: rapids_pytest_benchmark is not installed, "
-        "falling back to pytest_benchmark fixtures.\n"
-    )
 
-    # if rapids_pytest_benchmark is not available, just perfrom time-only
-    # benchmarking and replace the util functions with nops
-    import pytest_benchmark
+# =============================================================================
+# Parameters
+# =============================================================================
 
-    gpubenchmark = pytest_benchmark.plugin.benchmark
 
-    def setFixtureParamNames(*args, **kwargs):
-        pass
+DATASETS = [karate, dolphins]
+DATASETS_ASYMMETRIC = [karate_asymmetric]
 
 
 # =============================================================================
-# Parameters
+# Helper Functions
 # =============================================================================
-DATASETS_ASYMMETRIC = [utils.RAPIDS_DATASET_ROOT_DIR_PATH / "karate-asymmetric.csv"]
-
-
-###############################################################################
-# Fixtures
-# @pytest.mark.skipif(
-#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
-# )
-@pytest.fixture(
-    scope="module",
-    params=DATASETS_ASYMMETRIC,
-    ids=[f"dataset={d.as_posix()}" for d in DATASETS_ASYMMETRIC],
-)
-def daskGraphFromDataset(request, dask_client):
-    """
-    Returns a new dask dataframe created from the dataset file param.
-    This creates a directed Graph.
-    """
-    # Since parameterized fixtures do not assign param names to param values,
-    # manually call the helper to do so.
-    setFixtureParamNames(request, ["dataset"])
-    dataset = request.param
-
-    chunksize = dcg.get_chunksize(dataset)
-    ddf = dask_cudf.read_csv(
-        dataset,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    dg = cugraph.Graph(directed=True)
-    dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value")
-    return dg
 
 
-@pytest.fixture(
-    scope="module",
-    params=utils.DATASETS_UNDIRECTED,
-    ids=[f"dataset={d.as_posix()}" for d in utils.DATASETS_UNDIRECTED],
-)
-def uddaskGraphFromDataset(request, dask_client):
-    """
-    Returns a new dask dataframe created from the dataset file param.
-    This creates an undirected Graph.
-    """
-    # Since parameterized fixtures do not assign param names to param
-    # values, manually call the helper to do so.
-    setFixtureParamNames(request, ["dataset"])
-    dataset = request.param
-
-    chunksize = dcg.get_chunksize(dataset)
-    ddf = dask_cudf.read_csv(
-        dataset,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    dg = cugraph.Graph(directed=False)
-    dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value")
+def get_mg_graph(dataset, directed):
+    """Returns an MG graph"""
+    ddf = dataset.get_dask_edgelist()
+
+    dg = cugraph.Graph(directed=directed)
+    dg.from_dask_cudf_edgelist(ddf, "src", "dst", "wgt")
     return dg
 
 
-###############################################################################
+# =============================================================================
 # Tests
-# @pytest.mark.skipif(
-#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
-# )
+# =============================================================================
 # FIXME: Implement more robust tests
+
+
 @pytest.mark.mg
-def test_mg_leiden_with_edgevals_directed_graph(daskGraphFromDataset):
+@pytest.mark.parametrize("dataset", DATASETS_ASYMMETRIC)
+def test_mg_leiden_with_edgevals_directed_graph(dask_client, dataset):
+    dg = get_mg_graph(dataset, directed=True)
     # Directed graphs are not supported by Leiden and a ValueError should be
     # raised
     with pytest.raises(ValueError):
-        parts, mod = dcg.leiden(daskGraphFromDataset)
+        parts, mod = dcg.leiden(dg)
+
+    # Clean-up stored dataset edge-lists
+    dataset.unload()
 
 
-###############################################################################
-# Tests
-# @pytest.mark.skipif(
-#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
-# )
-# FIXME: Implement more robust tests
 @pytest.mark.mg
-def test_mg_leiden_with_edgevals_undirected_graph(uddaskGraphFromDataset):
-    parts, mod = dcg.leiden(uddaskGraphFromDataset)
+@pytest.mark.parametrize("dataset", DATASETS)
+def test_mg_leiden_with_edgevals_undirected_graph(dask_client, dataset):
+    dg = get_mg_graph(dataset, directed=False)
+    parts, mod = dcg.leiden(dg)
 
     # FIXME: either call Nx with the same dataset and compare results, or
     # hardcode golden results to compare to.
@@ -137,3 +72,6 @@ def test_mg_leiden_with_edgevals_undirected_graph(uddaskGraphFromDataset):
     print(parts.compute())
     print(mod)
     print()
+
+    # Clean-up stored dataset edge-lists
+    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/community/test_louvain_mg.py b/python/cugraph/cugraph/tests/community/test_louvain_mg.py
index 5318262fe26..ce89f7f62a2 100644
--- a/python/cugraph/cugraph/tests/community/test_louvain_mg.py
+++ b/python/cugraph/cugraph/tests/community/test_louvain_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -14,122 +14,44 @@
 import pytest
 
 import cugraph.dask as dcg
+from cugraph.datasets import karate_asymmetric, karate, dolphins
 
-import cugraph
-import dask_cudf
-from cugraph.testing import utils
+from test_leiden_mg import get_mg_graph
 
 
-try:
-    from rapids_pytest_benchmark import setFixtureParamNames
-except ImportError:
-    print(
-        "\n\nWARNING: rapids_pytest_benchmark is not installed, "
-        "falling back to pytest_benchmark fixtures.\n"
-    )
-
-    # if rapids_pytest_benchmark is not available, just perfrom time-only
-    # benchmarking and replace the util functions with nops
-    import pytest_benchmark
+# =============================================================================
+# Parameters
+# =============================================================================
 
-    gpubenchmark = pytest_benchmark.plugin.benchmark
 
-    def setFixtureParamNames(*args, **kwargs):
-        pass
+DATASETS_ASYMMETRIC = DATASETS_ASYMMETRIC = [karate_asymmetric]
+DATASETS = [karate, dolphins]
 
 
 # =============================================================================
-# Parameters
-# =============================================================================
-DATASETS_ASYMMETRIC = [utils.RAPIDS_DATASET_ROOT_DIR_PATH / "karate-asymmetric.csv"]
-
-
-###############################################################################
-# Fixtures
-# @pytest.mark.skipif(
-#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
-# )
-@pytest.fixture(
-    scope="module",
-    params=DATASETS_ASYMMETRIC,
-    ids=[f"dataset={d.as_posix()}" for d in DATASETS_ASYMMETRIC],
-)
-def daskGraphFromDataset(request, dask_client):
-    """
-    Returns a new dask dataframe created from the dataset file param.
-    This creates a directed Graph.
-    """
-    # Since parameterized fixtures do not assign param names to param values,
-    # manually call the helper to do so.
-    setFixtureParamNames(request, ["dataset"])
-    dataset = request.param
-
-    chunksize = dcg.get_chunksize(dataset)
-    ddf = dask_cudf.read_csv(
-        dataset,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    dg = cugraph.Graph(directed=True)
-    dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value")
-    return dg
-
-
-@pytest.fixture(
-    scope="module",
-    params=utils.DATASETS_UNDIRECTED,
-    ids=[f"dataset={d.as_posix()}" for d in utils.DATASETS_UNDIRECTED],
-)
-def uddaskGraphFromDataset(request, dask_client):
-    """
-    Returns a new dask dataframe created from the dataset file param.
-    This creates an undirected Graph.
-    """
-    # Since parameterized fixtures do not assign param names to param
-    # values, manually call the helper to do so.
-    setFixtureParamNames(request, ["dataset"])
-    dataset = request.param
-
-    chunksize = dcg.get_chunksize(dataset)
-    ddf = dask_cudf.read_csv(
-        dataset,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    dg = cugraph.Graph(directed=False)
-    dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value")
-    return dg
-
-
-###############################################################################
 # Tests
-# @pytest.mark.skipif(
-#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
-# )
+# =============================================================================
 # FIXME: Implement more robust tests
+
+
 @pytest.mark.mg
-def test_mg_louvain_with_edgevals_directed_graph(daskGraphFromDataset):
+@pytest.mark.parametrize("dataset", DATASETS_ASYMMETRIC)
+def test_mg_louvain_with_edgevals_directed_graph(dask_client, dataset):
+    dg = get_mg_graph(dataset, directed=True)
     # Directed graphs are not supported by Louvain and a ValueError should be
     # raised
     with pytest.raises(ValueError):
-        parts, mod = dcg.louvain(daskGraphFromDataset)
+        parts, mod = dcg.louvain(dg)
+
+    # Clean-up stored dataset edge-lists
+    dataset.unload()
 
 
-###############################################################################
-# Tests
-# @pytest.mark.skipif(
-#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
-# )
-# FIXME: Implement more robust tests
 @pytest.mark.mg
-def test_mg_louvain_with_edgevals_undirected_graph(uddaskGraphFromDataset):
-    parts, mod = dcg.louvain(uddaskGraphFromDataset)
+@pytest.mark.parametrize("dataset", DATASETS)
+def test_mg_louvain_with_edgevals_undirected_graph(dask_client, dataset):
+    dg = get_mg_graph(dataset, directed=False)
+    parts, mod = dcg.louvain(dg)
 
     # FIXME: either call Nx with the same dataset and compare results, or
     # hardcode golden results to compare to.
@@ -137,3 +59,6 @@ def test_mg_louvain_with_edgevals_undirected_graph(uddaskGraphFromDataset):
     print(parts.compute())
     print(mod)
     print()
+
+    # Clean-up stored dataset edge-lists
+    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py
index 0f7bb14581f..02723d75527 100644
--- a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py
+++ b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -16,115 +16,85 @@
 import random
 import pytest
 
-import cudf
-import dask_cudf
 import cugraph
 import cugraph.dask as dcg
-from cugraph.testing import utils
-from pylibcugraph.testing.utils import gen_fixture_params_product
+from cugraph.datasets import karate, dolphins
 
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
+
+
 def setup_function():
     gc.collect()
 
 
 # =============================================================================
-# Pytest fixtures
+# Parameters
 # =============================================================================
-datasets = utils.DATASETS_UNDIRECTED
-fixture_params = gen_fixture_params_product(
-    (datasets, "graph_file"),
-    ([True, False], "start_list"),
-)
-
-
-@pytest.fixture(scope="module", params=fixture_params)
-def input_combo(request):
-    """
-    Simply return the current combination of params as a dictionary for use in
-    tests or other parameterized fixtures.
-    """
-    parameters = dict(zip(("graph_file", "start_list", "edgevals"), request.param))
-
-    return parameters
-
-
-@pytest.fixture(scope="module")
-def input_expected_output(dask_client, input_combo):
-    """
-    This fixture returns the inputs and expected results from the triangle
-    count algo.
-    """
-    start_list = input_combo["start_list"]
-    input_data_path = input_combo["graph_file"]
-    G = utils.generate_cugraph_graph_from_file(
-        input_data_path, directed=False, edgevals=True
-    )
 
-    input_combo["SGGraph"] = G
 
-    if start_list:
+DATASETS = [karate, dolphins]
+START_LIST = [True, False]
+
+
+# =============================================================================
+# Helper Functions
+# =============================================================================
+
+
+def get_sg_graph(dataset, directed, start):
+    dataset.unload()
+    G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
+    if start:
         # sample k nodes from the cuGraph graph
-        k = random.randint(1, 10)
-        srcs = G.view_edge_list()[G.source_columns]
-        dsts = G.view_edge_list()[G.destination_columns]
-        nodes = cudf.concat([srcs, dsts]).drop_duplicates()
-        start_list = nodes.sample(k)
+        start = G.select_random_vertices(num_vertices=random.randint(1, 10))
     else:
-        start_list = None
+        start = None
 
-    sg_triangle_results = cugraph.triangle_count(G, start_list)
-    sg_triangle_results = sg_triangle_results.sort_values("vertex").reset_index(
-        drop=True
-    )
+    return G, start
 
-    input_combo["sg_triangle_results"] = sg_triangle_results
-    input_combo["start_list"] = start_list
-
-    # Creating an edgelist from a dask cudf dataframe
-    chunksize = dcg.get_chunksize(input_data_path)
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
 
-    dg = cugraph.Graph(directed=False)
+def get_mg_graph(dataset, directed):
+    dataset.unload()
+    ddf = dataset.get_dask_edgelist()
+    dg = cugraph.Graph(directed=directed)
     dg.from_dask_cudf_edgelist(
-        ddf, source="src", destination="dst", edge_attr="value", renumber=True
+        ddf, source="src", destination="dst", edge_attr="wgt", renumber=True
     )
 
-    input_combo["MGGraph"] = dg
-
-    return input_combo
+    return dg
 
 
 # =============================================================================
 # Tests
 # =============================================================================
+
+
 @pytest.mark.mg
-def test_sg_triangles(dask_client, benchmark, input_expected_output):
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("start", START_LIST)
+def test_sg_triangles(dask_client, dataset, start, benchmark):
     # This test is only for benchmark purposes.
     sg_triangle_results = None
-    G = input_expected_output["SGGraph"]
-    start_list = input_expected_output["start_list"]
-    sg_triangle_results = benchmark(cugraph.triangle_count, G, start_list)
+    G, start = get_sg_graph(dataset, False, start)
+
+    sg_triangle_results = benchmark(cugraph.triangle_count, G, start)
+    sg_triangle_results.sort_values("vertex").reset_index(drop=True)
     assert sg_triangle_results is not None
+    # Clean-up stored dataset edge-lists
+    dataset.unload()
 
 
 @pytest.mark.mg
-def test_triangles(dask_client, benchmark, input_expected_output):
-
-    dg = input_expected_output["MGGraph"]
-    start_list = input_expected_output["start_list"]
-
-    result_counts = benchmark(dcg.triangle_count, dg, start_list)
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("start", START_LIST)
+def test_triangles(dask_client, dataset, start, benchmark):
+    G, start = get_sg_graph(dataset, False, start)
+    dg = get_mg_graph(dataset, False)
 
+    result_counts = benchmark(dcg.triangle_count, dg, start)
     result_counts = (
         result_counts.drop_duplicates()
         .compute()
@@ -132,8 +102,9 @@ def test_triangles(dask_client, benchmark, input_expected_output):
         .reset_index(drop=True)
         .rename(columns={"counts": "mg_counts"})
     )
-
-    expected_output = input_expected_output["sg_triangle_results"]
+    expected_output = (
+        cugraph.triangle_count(G, start).sort_values("vertex").reset_index(drop=True)
+    )
 
     # Update the mg triangle count with sg triangle count results
     # for easy comparison using cuDF DataFrame methods.
@@ -141,3 +112,5 @@ def test_triangles(dask_client, benchmark, input_expected_output):
     counts_diffs = result_counts.query("mg_counts != sg_counts")
 
     assert len(counts_diffs) == 0
+    # Clean-up stored dataset edge-lists
+    dataset.unload()

From 2eef14ef5f7a4ef1281292fc710e3a37c182f33f Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Thu, 4 Apr 2024 09:23:24 -0700
Subject: [PATCH 14/20] Refactor test_connectivity_mg

---
 .../tests/components/test_connectivity_mg.py  | 54 ++++++++-----------
 1 file changed, 23 insertions(+), 31 deletions(-)

diff --git a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py
index 217c9f0f09f..b1f571cd896 100644
--- a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py
+++ b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -15,11 +15,9 @@
 
 import pytest
 
-import cudf
-import dask_cudf
 import cugraph
 import cugraph.dask as dcg
-from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH
+from cugraph.datasets import netscience
 
 
 # =============================================================================
@@ -31,41 +29,35 @@ def setup_function():
     gc.collect()
 
 
+# =============================================================================
+# Parameters
+# =============================================================================
+
+
+DATASETS = [netscience]
 # Directed graph is not currently supported
 IS_DIRECTED = [False, True]
 
 
-# @pytest.mark.skipif(
-#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
-# )
+# =============================================================================
+# Tests
+# =============================================================================
+
+
 @pytest.mark.mg
+@pytest.mark.parametrize("dataset", DATASETS)
 @pytest.mark.parametrize("directed", IS_DIRECTED)
-def test_dask_mg_wcc(dask_client, directed):
+def test_dask_mg_wcc(dask_client, directed, dataset):
 
-    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "netscience.csv").as_posix()
+    input_data_path = dataset.get_path()
     print(f"dataset={input_data_path}")
-    chunksize = dcg.get_chunksize(input_data_path)
-
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    df = cudf.read_csv(
-        input_data_path,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    g = cugraph.Graph(directed=directed)
-    g.from_cudf_edgelist(df, "src", "dst", renumber=True)
-
-    dg = cugraph.Graph(directed=directed)
-    dg.from_dask_cudf_edgelist(ddf, "src", "dst")
+    create_using = cugraph.Graph(directed=directed)
+
+    g = dataset.get_graph(create_using=create_using)
+    dataset.unload()
+
+    dg = dataset.get_dask_graph(create_using=create_using)
+    dataset.unload()
 
     if not directed:
         expected_dist = cugraph.weakly_connected_components(g)

From 90c3a1772564f66988ba22e7fab43f4f3b13c2ea Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Thu, 4 Apr 2024 09:23:41 -0700
Subject: [PATCH 15/20] Refactor core tests

---
 .../cugraph/tests/core/test_core_number_mg.py | 126 ++++----------
 .../cugraph/tests/core/test_k_core_mg.py      | 162 ++++++------------
 2 files changed, 86 insertions(+), 202 deletions(-)

diff --git a/python/cugraph/cugraph/tests/core/test_core_number_mg.py b/python/cugraph/cugraph/tests/core/test_core_number_mg.py
index 23214b5f51b..b52711c3c75 100644
--- a/python/cugraph/cugraph/tests/core/test_core_number_mg.py
+++ b/python/cugraph/cugraph/tests/core/test_core_number_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -15,107 +15,67 @@
 
 import pytest
 
-import dask_cudf
 import cugraph
 import cugraph.dask as dcg
-from cugraph.testing import utils
-from pylibcugraph.testing.utils import gen_fixture_params_product
+from cugraph.datasets import karate, dolphins, karate_asymmetric
 
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
+
+
 def setup_function():
     gc.collect()
 
 
 # =============================================================================
-# Pytest fixtures
+# Parameters
 # =============================================================================
-datasets = utils.DATASETS_UNDIRECTED
-degree_type = ["incoming", "outgoing", "bidirectional"]
-
-fixture_params = gen_fixture_params_product(
-    (datasets, "graph_file"),
-    (degree_type, "degree_type"),
-)
-
-
-@pytest.fixture(scope="module", params=fixture_params)
-def input_combo(request):
-    """
-    Simply return the current combination of params as a dictionary for use in
-    tests or other parameterized fixtures.
-    """
-    parameters = dict(zip(("graph_file", "degree_type"), request.param))
-
-    return parameters
-
-
-@pytest.fixture(scope="module")
-def input_expected_output(dask_client, input_combo):
-    """
-    This fixture returns the inputs and expected results from the Core number
-    algo.
-    """
-    degree_type = input_combo["degree_type"]
-    input_data_path = input_combo["graph_file"]
-    G = utils.generate_cugraph_graph_from_file(
-        input_data_path, directed=False, edgevals=True
-    )
 
-    input_combo["SGGraph"] = G
 
-    sg_core_number_results = cugraph.core_number(G, degree_type)
-    sg_core_number_results = sg_core_number_results.sort_values("vertex").reset_index(
-        drop=True
-    )
+DATASETS = [karate, dolphins]
+DEGREE_TYPE = ["incoming", "outgoing", "bidirectional"]
 
-    input_combo["sg_core_number_results"] = sg_core_number_results
-    input_combo["degree_type"] = degree_type
-
-    # Creating an edgelist from a dask cudf dataframe
-    chunksize = dcg.get_chunksize(input_data_path)
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
 
-    dg = cugraph.Graph(directed=False)
-    dg.from_dask_cudf_edgelist(
-        ddf, source="src", destination="dst", edge_attr="value", renumber=True
-    )
+# =============================================================================
+# Helper Functions
+# =============================================================================
 
-    input_combo["MGGraph"] = dg
 
-    return input_combo
+def get_sg_results(dataset, degree_type):
+    dataset.unload()
+    G = dataset.get_graph(create_using=cugraph.Graph(directed=False))
+    res = cugraph.core_number(G, degree_type)
+    res = res.sort_values("vertex").reset_index(drop=True)
+    return res
 
 
 # =============================================================================
 # Tests
 # =============================================================================
+
+
 @pytest.mark.mg
-def test_sg_core_number(dask_client, benchmark, input_expected_output):
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("degree_type", DEGREE_TYPE)
+def test_sg_core_number(dask_client, dataset, degree_type, benchmark):
     # This test is only for benchmark purposes.
     sg_core_number_results = None
-    G = input_expected_output["SGGraph"]
-    degree_type = input_expected_output["degree_type"]
-
+    G = dataset.get_graph(create_using=cugraph.Graph(directed=False))
+    dataset.unload()
     sg_core_number_results = benchmark(cugraph.core_number, G, degree_type)
     assert sg_core_number_results is not None
 
 
 @pytest.mark.mg
-def test_core_number(dask_client, benchmark, input_expected_output):
-
-    dg = input_expected_output["MGGraph"]
-    degree_type = input_expected_output["degree_type"]
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("degree_type", DEGREE_TYPE)
+def test_core_number(dask_client, dataset, degree_type, benchmark):
+    dataset.unload()
+    dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False))
 
     result_core_number = benchmark(dcg.core_number, dg, degree_type)
-
     result_core_number = (
         result_core_number.drop_duplicates()
         .compute()
@@ -124,7 +84,7 @@ def test_core_number(dask_client, benchmark, input_expected_output):
         .rename(columns={"core_number": "mg_core_number"})
     )
 
-    expected_output = input_expected_output["sg_core_number_results"]
+    expected_output = get_sg_results(dataset, degree_type)
 
     # Update the mg core number with sg core number results
     # for easy comparison using cuDF DataFrame methods.
@@ -132,33 +92,13 @@ def test_core_number(dask_client, benchmark, input_expected_output):
     counts_diffs = result_core_number.query("mg_core_number != sg_core_number")
 
     assert len(counts_diffs) == 0
+    dataset.unload()
 
 
 @pytest.mark.mg
-def test_core_number_invalid_input(input_expected_output):
-    input_data_path = (
-        utils.RAPIDS_DATASET_ROOT_DIR_PATH / "karate-asymmetric.csv"
-    ).as_posix()
-
-    chunksize = dcg.get_chunksize(input_data_path)
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    dg = cugraph.Graph(directed=True)
-    dg.from_dask_cudf_edgelist(
-        ddf,
-        source="src",
-        destination="dst",
-        edge_attr="value",
-        renumber=True,
-    )
-
+def test_core_number_invalid_input():
+    dg = karate_asymmetric.get_graph(create_using=cugraph.Graph(directed=True))
     invalid_degree_type = 3
-    dg = input_expected_output["MGGraph"]
+
     with pytest.raises(ValueError):
         dcg.core_number(dg, invalid_degree_type)
diff --git a/python/cugraph/cugraph/tests/core/test_k_core_mg.py b/python/cugraph/cugraph/tests/core/test_k_core_mg.py
index 32c4f4553a2..98ab58c3656 100644
--- a/python/cugraph/cugraph/tests/core/test_k_core_mg.py
+++ b/python/cugraph/cugraph/tests/core/test_k_core_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -15,58 +15,40 @@
 
 import pytest
 
-import dask_cudf
 import cugraph
 import cugraph.dask as dcg
-from cugraph.testing import utils
+from cugraph.datasets import karate, dolphins
 from cudf.testing.testing import assert_frame_equal
 from cugraph.structure.symmetrize import symmetrize_df
-from pylibcugraph.testing import gen_fixture_params_product
 
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
+
+
 def setup_function():
     gc.collect()
 
 
 # =============================================================================
-# Pytest fixtures
+# Parameters
+# =============================================================================
+
+
+DATASETS = [karate, dolphins]
+CORE_NUMBER = [True, False]
+DEGREE_TYPE = ["bidirectional", "outgoing", "incoming"]
+
+
+# =============================================================================
+# Helper Functions
 # =============================================================================
-datasets = utils.DATASETS_UNDIRECTED
-
-core_number = [True, False]
-degree_type = ["bidirectional", "outgoing", "incoming"]
-
-fixture_params = gen_fixture_params_product(
-    (datasets, "graph_file"), (core_number, "core_number"), (degree_type, "degree_type")
-)
-
-
-@pytest.fixture(scope="module", params=fixture_params)
-def input_combo(request):
-    """
-    Simply return the current combination of params as a dictionary for use in
-    tests or other parameterized fixtures.
-    """
-    parameters = dict(zip(("graph_file", "core_number", "degree_type"), request.param))
-
-    return parameters
-
-
-@pytest.fixture(scope="module")
-def input_expected_output(dask_client, input_combo):
-    """
-    This fixture returns the inputs and expected results from the Core number
-    algo.
-    """
-    core_number = input_combo["core_number"]
-    degree_type = input_combo["degree_type"]
-    input_data_path = input_combo["graph_file"]
-    G = utils.generate_cugraph_graph_from_file(
-        input_data_path, directed=False, edgevals=True
-    )
+
+
+def get_sg_results(dataset, core_number, degree_type):
+    dataset.unload()
+    G = dataset.get_graph(create_using=cugraph.Graph(directed=False))
 
     if core_number:
         # compute the core_number
@@ -74,78 +56,60 @@ def input_expected_output(dask_client, input_combo):
     else:
         core_number = None
 
-    input_combo["core_number"] = core_number
-
-    input_combo["SGGraph"] = G
-
     sg_k_core_graph = cugraph.k_core(
         G, core_number=core_number, degree_type=degree_type
     )
-    sg_k_core_results = sg_k_core_graph.view_edge_list()
+    res = sg_k_core_graph.view_edge_list()
     # FIXME: The result will come asymetric. Symmetrize the results
     srcCol = sg_k_core_graph.source_columns
     dstCol = sg_k_core_graph.destination_columns
     wgtCol = sg_k_core_graph.weight_column
-    sg_k_core_results = (
-        symmetrize_df(sg_k_core_results, srcCol, dstCol, wgtCol)
+    res = (
+        symmetrize_df(res, srcCol, dstCol, wgtCol)
         .sort_values([srcCol, dstCol])
         .reset_index(drop=True)
     )
-
-    input_combo["sg_k_core_results"] = sg_k_core_results
-
-    # Creating an edgelist from a dask cudf dataframe
-    chunksize = dcg.get_chunksize(input_data_path)
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    dg = cugraph.Graph(directed=False)
-    # FIXME: False when renumbering (C++ and python renumbering)
-    dg.from_dask_cudf_edgelist(
-        ddf,
-        source="src",
-        destination="dst",
-        edge_attr="value",
-        renumber=True,
-    )
-
-    input_combo["MGGraph"] = dg
-
-    return input_combo
+    return res, core_number
 
 
 # =============================================================================
 # Tests
 # =============================================================================
+
+
 @pytest.mark.mg
-def test_sg_k_core(dask_client, benchmark, input_expected_output):
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("core_number", CORE_NUMBER)
+@pytest.mark.parametrize("degree_type", DEGREE_TYPE)
+def test_sg_k_core(dask_client, dataset, core_number, degree_type, benchmark):
     # This test is only for benchmark purposes.
     sg_k_core = None
-    G = input_expected_output["SGGraph"]
-    core_number = input_expected_output["core_number"]
-    degree_type = input_expected_output["degree_type"]
-
+    dataset.unload()
+    G = dataset.get_graph(create_using=cugraph.Graph(directed=False))
+    if core_number:
+        # compute the core_number
+        core_number = cugraph.core_number(G, degree_type=degree_type)
+    else:
+        core_number = None
     sg_k_core = benchmark(
         cugraph.k_core, G, core_number=core_number, degree_type=degree_type
     )
     assert sg_k_core is not None
+    dataset.unload()
 
 
 @pytest.mark.mg
-def test_dask_mg_k_core(dask_client, benchmark, input_expected_output):
-
-    dg = input_expected_output["MGGraph"]
-    core_number = input_expected_output["core_number"]
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("core_number", CORE_NUMBER)
+@pytest.mark.parametrize("degree_type", DEGREE_TYPE)
+def test_dask_mg_k_core(dask_client, dataset, core_number, degree_type, benchmark):
+    expected_k_core_results, core_number = get_sg_results(
+        dataset, core_number, degree_type
+    )
 
+    dataset.unload()
+    dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False))
     k_core_results = benchmark(dcg.k_core, dg, core_number=core_number)
-
-    expected_k_core_results = input_expected_output["sg_k_core_results"]
-
     k_core_results = (
         k_core_results.compute()
         .sort_values(["src", "dst"])
@@ -156,40 +120,20 @@ def test_dask_mg_k_core(dask_client, benchmark, input_expected_output):
     assert_frame_equal(
         expected_k_core_results, k_core_results, check_dtype=False, check_like=True
     )
+    dataset.unload()
 
 
 @pytest.mark.mg
 def test_dask_mg_k_core_invalid_input(dask_client):
-    input_data_path = datasets[0]
-    chunksize = dcg.get_chunksize(input_data_path)
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
+    dataset = DATASETS[0]
+    dataset.unload()
+    dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=True))
 
-    dg = cugraph.Graph(directed=True)
-    dg.from_dask_cudf_edgelist(
-        ddf,
-        source="src",
-        destination="dst",
-        edge_attr="value",
-        renumber=True,
-        store_transposed=True,
-    )
     with pytest.raises(ValueError):
         dcg.k_core(dg)
 
-    dg = cugraph.Graph(directed=False)
-    dg.from_dask_cudf_edgelist(
-        ddf,
-        source="src",
-        destination="dst",
-        edge_attr="value",
-        store_transposed=True,
-    )
+    dataset.unload()
+    dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False))
 
     degree_type = "invalid"
     with pytest.raises(ValueError):

From 5658418728985fad263aad1a11880550279d3a2f Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Tue, 9 Apr 2024 12:58:01 -0700
Subject: [PATCH 16/20] Updates to internals MG tests

---
 .../tests/internals/test_renumber_mg.py       | 98 ++++++++++---------
 .../internals/test_replicate_edgelist_mg.py   | 71 +++++---------
 .../tests/internals/test_symmetrize_mg.py     |  4 +-
 3 files changed, 79 insertions(+), 94 deletions(-)

diff --git a/python/cugraph/cugraph/tests/internals/test_renumber_mg.py b/python/cugraph/cugraph/tests/internals/test_renumber_mg.py
index e9521f16594..64917d0c747 100644
--- a/python/cugraph/cugraph/tests/internals/test_renumber_mg.py
+++ b/python/cugraph/cugraph/tests/internals/test_renumber_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -24,33 +24,61 @@
 import dask_cudf
 import cugraph.dask as dcg
 import cugraph
+from cugraph.datasets import karate, karate_disjoint
 from cugraph.testing import utils
 from cugraph.structure.number_map import NumberMap
 from cugraph.dask.common.mg_utils import is_single_gpu
-from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH
 from cudf.testing import assert_frame_equal, assert_series_equal
 
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
+
+
 def setup_function():
     gc.collect()
 
 
+# =============================================================================
+# Parameters
+# =============================================================================
+
+
+DATASETS = [karate]
+DATASETS_UNRENUMBERED = [karate_disjoint]
 IS_DIRECTED = [True, False]
 
 
+# =============================================================================
+# Helper Functions
+# =============================================================================
+
+
+def get_sg_graph(dataset, directed):
+    dataset.unload()
+    g = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
+
+    return g
+
+
+def get_mg_graph(dataset, directed):
+    dataset.unload()
+    dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=directed))
+
+    return dg
+
+
+# =============================================================================
+# Tests
+# =============================================================================
+
+
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
-@pytest.mark.parametrize(
-    "graph_file",
-    utils.DATASETS_UNRENUMBERED,
-    ids=[f"dataset={d.as_posix()}" for d in utils.DATASETS_UNRENUMBERED],
-)
-def test_mg_renumber(graph_file, dask_client):
-
-    M = utils.read_csv_for_nx(graph_file)
+@pytest.mark.parametrize("dataset", DATASETS_UNRENUMBERED)
+def test_mg_renumber(dataset, dask_client):
+    M = utils.read_csv_for_nx(dataset.get_path())
     sources = cudf.Series(M["0"])
     destinations = cudf.Series(M["1"])
 
@@ -96,13 +124,9 @@ def test_mg_renumber(graph_file, dask_client):
 
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
-@pytest.mark.parametrize(
-    "graph_file",
-    utils.DATASETS_UNRENUMBERED,
-    ids=[f"dataset={d.as_posix()}" for d in utils.DATASETS_UNRENUMBERED],
-)
-def test_mg_renumber_add_internal_vertex_id(graph_file, dask_client):
-    M = utils.read_csv_for_nx(graph_file)
+@pytest.mark.parametrize("dataset", DATASETS_UNRENUMBERED)
+def test_mg_renumber_add_internal_vertex_id(dataset, dask_client):
+    M = utils.read_csv_for_nx(dataset.get_path())
     sources = cudf.Series(M["0"])
     destinations = cudf.Series(M["1"])
 
@@ -131,33 +155,13 @@ def test_mg_renumber_add_internal_vertex_id(graph_file, dask_client):
 
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
+@pytest.mark.parametrize("dataset", DATASETS)
 @pytest.mark.parametrize("directed", IS_DIRECTED)
-def test_dask_mg_pagerank(dask_client, directed):
+def test_dask_mg_pagerank(dask_client, dataset, directed):
     pandas.set_option("display.max_rows", 10000)
 
-    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix()
-    chunksize = dcg.get_chunksize(input_data_path)
-
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    df = cudf.read_csv(
-        input_data_path,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    g = cugraph.Graph(directed=directed)
-    g.from_cudf_edgelist(df, "src", "dst")
-
-    dg = cugraph.Graph(directed=directed)
-    dg.from_dask_cudf_edgelist(ddf, "src", "dst")
+    g = get_sg_graph(dataset, directed)
+    dg = get_mg_graph(dataset, directed)
 
     expected_pr = cugraph.pagerank(g)
     result_pr = dcg.pagerank(dg).compute()
@@ -178,20 +182,18 @@ def test_dask_mg_pagerank(dask_client, directed):
     print("Mismatches:", err)
     assert err == 0
 
+    dataset.unload()
+
 
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
-@pytest.mark.parametrize(
-    "graph_file",
-    utils.DATASETS_UNRENUMBERED,
-    ids=[f"dataset={d.as_posix()}" for d in utils.DATASETS_UNRENUMBERED],
-)
-def test_mg_renumber_common_col_names(graph_file, dask_client):
+@pytest.mark.parametrize("dataset", DATASETS_UNRENUMBERED)
+def test_mg_renumber_common_col_names(dataset, dask_client):
     """
     Ensure that commonly-used column names in the input do not conflict with
     names used internally by NumberMap.
     """
-    M = utils.read_csv_for_nx(graph_file)
+    M = utils.read_csv_for_nx(dataset.get_path())
     sources = cudf.Series(M["0"])
     destinations = cudf.Series(M["1"])
 
diff --git a/python/cugraph/cugraph/tests/internals/test_replicate_edgelist_mg.py b/python/cugraph/cugraph/tests/internals/test_replicate_edgelist_mg.py
index 3bdb5c079ef..09936e954e8 100644
--- a/python/cugraph/cugraph/tests/internals/test_replicate_edgelist_mg.py
+++ b/python/cugraph/cugraph/tests/internals/test_replicate_edgelist_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -17,73 +17,54 @@
 
 import dask_cudf
 import numpy as np
-from cugraph.testing import UNDIRECTED_DATASETS, karate_disjoint
-
+from cugraph.datasets import karate, dolphins, karate_disjoint
 from cugraph.structure.replicate_edgelist import replicate_edgelist
 from cudf.testing.testing import assert_frame_equal
-from pylibcugraph.testing.utils import gen_fixture_params_product
 
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
+
+
 def setup_function():
     gc.collect()
 
 
+# =============================================================================
+# Parameters
+# =============================================================================
+
+
 edgeWeightCol = "weights"
 edgeIdCol = "edge_id"
 edgeTypeCol = "edge_type"
 srcCol = "src"
 dstCol = "dst"
 
-
-input_data = UNDIRECTED_DATASETS + [karate_disjoint]
-datasets = [pytest.param(d) for d in input_data]
-
-fixture_params = gen_fixture_params_product(
-    (datasets, "graph_file"),
-    ([True, False], "distributed"),
-    ([True, False], "use_weights"),
-    ([True, False], "use_edge_ids"),
-    ([True, False], "use_edge_type_ids"),
-)
-
-
-@pytest.fixture(scope="module", params=fixture_params)
-def input_combo(request):
-    """
-    Simply return the current combination of params as a dictionary for use in
-    tests or other parameterized fixtures.
-    """
-    return dict(
-        zip(
-            (
-                "graph_file",
-                "use_weights",
-                "use_edge_ids",
-                "use_edge_type_ids",
-                "distributed",
-            ),
-            request.param,
-        )
-    )
+DATASETS = [karate, dolphins, karate_disjoint]
+IS_DISTRIBUTED = [True, False]
+USE_WEIGHTS = [True, False]
+USE_EDGE_IDS = [True, False]
+USE_EDGE_TYPE_IDS = [True, False]
 
 
 # =============================================================================
 # Tests
 # =============================================================================
-# @pytest.mark.skipif(
-#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
-# )
-@pytest.mark.mg
-def test_mg_replicate_edgelist(dask_client, input_combo):
-    df = input_combo["graph_file"].get_edgelist()
-    distributed = input_combo["distributed"]
 
-    use_weights = input_combo["use_weights"]
-    use_edge_ids = input_combo["use_edge_ids"]
-    use_edge_type_ids = input_combo["use_edge_type_ids"]
+
+@pytest.mark.mg
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("distributed", IS_DISTRIBUTED)
+@pytest.mark.parametrize("use_weights", USE_WEIGHTS)
+@pytest.mark.parametrize("use_edge_ids", USE_EDGE_IDS)
+@pytest.mark.parametrize("use_edge_type_ids", USE_EDGE_TYPE_IDS)
+def test_mg_replicate_edgelist(
+    dask_client, dataset, distributed, use_weights, use_edge_ids, use_edge_type_ids
+):
+    dataset.unload()
+    df = dataset.get_edgelist()
 
     columns = [srcCol, dstCol]
     weight = None
diff --git a/python/cugraph/cugraph/tests/internals/test_symmetrize_mg.py b/python/cugraph/cugraph/tests/internals/test_symmetrize_mg.py
index 05cc06e6282..913443fe400 100644
--- a/python/cugraph/cugraph/tests/internals/test_symmetrize_mg.py
+++ b/python/cugraph/cugraph/tests/internals/test_symmetrize_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -25,6 +25,8 @@
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
+
+
 def setup_function():
     gc.collect()
 

From 5d79f42b75bf04e791945b4fbcbe0b8e27c96bce Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Tue, 7 May 2024 17:13:29 -0700
Subject: [PATCH 17/20] Remove calls to  in MG tests

---
 .../test_batch_betweenness_centrality_mg.py          |  3 ---
 .../test_batch_edge_betweenness_centrality_mg.py     |  2 --
 .../centrality/test_betweenness_centrality_mg.py     |  6 ------
 .../tests/centrality/test_degree_centrality_mg.py    |  5 -----
 .../test_edge_betweenness_centrality_mg.py           |  5 -----
 .../centrality/test_eigenvector_centrality_mg.py     |  8 --------
 .../tests/centrality/test_katz_centrality_mg.py      | 12 ------------
 python/cugraph/cugraph/tests/comms/test_comms_mg.py  |  5 -----
 .../tests/community/test_induced_subgraph_mg.py      |  5 -----
 .../cugraph/tests/community/test_leiden_mg.py        |  6 ------
 .../cugraph/tests/community/test_louvain_mg.py       |  6 ------
 .../tests/community/test_triangle_count_mg.py        |  6 ------
 .../cugraph/tests/components/test_connectivity_mg.py |  3 ---
 .../cugraph/tests/core/test_core_number_mg.py        |  4 ----
 python/cugraph/cugraph/tests/core/test_k_core_mg.py  |  7 -------
 15 files changed, 83 deletions(-)

diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
index 1c73ebb0216..9f0980d4199 100644
--- a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
@@ -93,6 +93,3 @@ def test_mg_betweenness_centrality(
         second_key="ref_bc",
         epsilon=DEFAULT_EPSILON,
     )
-
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
index 4530dd3da86..4764c01f0fc 100644
--- a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
@@ -84,5 +84,3 @@ def test_mg_edge_betweenness_centrality(
         second_key="ref_bc",
         epsilon=DEFAULT_EPSILON,
     )
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
index c94c2dcaff6..35e199093ce 100644
--- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
@@ -49,14 +49,12 @@ def setup_function():
 
 
 def get_sg_graph(dataset, directed):
-    dataset.unload()
     G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
 
     return G
 
 
 def get_mg_graph(dataset, directed):
-    dataset.unload()
     ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=directed)
     dg.from_dask_cudf_edgelist(
@@ -96,7 +94,6 @@ def test_dask_mg_betweenness_centrality(
     benchmark,
 ):
     g = get_sg_graph(dataset, directed)
-    dataset.unload()
     dg = get_mg_graph(dataset, directed)
     random_state = subset_seed
 
@@ -143,6 +140,3 @@ def test_dask_mg_betweenness_centrality(
     diff = cupy.isclose(mg_bc_results, sg_bc_results)
 
     assert diff.all()
-
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py
index 68daff9238c..8606649c745 100644
--- a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py
@@ -45,14 +45,12 @@ def setup_function():
 
 
 def get_sg_graph(dataset, directed):
-    dataset.unload()
     G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
 
     return G
 
 
 def get_mg_graph(dataset, directed):
-    dataset.unload()
     ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=directed)
     dg.from_dask_cudf_edgelist(
@@ -118,6 +116,3 @@ def test_dask_mg_degree(dask_client, dataset, directed):
         check_names=False,
         check_dtype=False,
     )
-
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py
index 80acfe1c4ad..5b83a05e2a2 100644
--- a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py
@@ -47,7 +47,6 @@ def setup_function():
 
 
 def get_sg_graph(dataset, directed, edge_ids):
-    dataset.unload()
     df = dataset.get_edgelist()
     if edge_ids:
         if not directed:
@@ -71,7 +70,6 @@ def get_sg_graph(dataset, directed, edge_ids):
 
 
 def get_mg_graph(dataset, directed, edge_ids, weight):
-    dataset.unload()
     ddf = dataset.get_dask_edgelist()
 
     if weight:
@@ -178,6 +176,3 @@ def test_dask_mg_edge_betweenness_centrality(
 
         assert len(edge_bc_diffs1) == 0
         assert len(edge_bc_diffs2) == 0
-
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py
index 8cd77fb5e24..3a840c82e95 100644
--- a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py
@@ -52,7 +52,6 @@ def setup_function():
 def test_dask_mg_eigenvector_centrality(dask_client, dataset, directed):
     input_data_path = dataset.get_path()
     print(f"dataset={input_data_path}")
-    dataset.unload()
     ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=True)
     dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True)
@@ -89,15 +88,11 @@ def test_dask_mg_eigenvector_centrality(dask_client, dataset, directed):
             err = err + 1
     assert err == 0
 
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
-
 
 @pytest.mark.mg
 def test_dask_mg_eigenvector_centrality_transposed_false(dask_client):
     dataset = DATASETS[0]
 
-    dataset.unload()
     ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=True)
     dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=False)
@@ -110,6 +105,3 @@ def test_dask_mg_eigenvector_centrality_transposed_false(dask_client):
 
     with pytest.warns(UserWarning, match=warning_msg):
         dcg.eigenvector_centrality(dg)
-
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py
index ebbe5974814..5dcbd8173df 100644
--- a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py
@@ -53,7 +53,6 @@ def test_dask_mg_katz_centrality(dask_client, dataset, directed):
     input_data_path = dataset.get_path()
     print(f"dataset={input_data_path}")
 
-    dataset.unload()
     ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=True)
     dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True)
@@ -95,16 +94,12 @@ def test_dask_mg_katz_centrality(dask_client, dataset, directed):
             err = err + 1
     assert err == 0
 
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
-
 
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
 @pytest.mark.parametrize("dataset", DATASETS)
 @pytest.mark.parametrize("directed", IS_DIRECTED)
 def test_dask_mg_katz_centrality_nstart(dask_client, dataset, directed):
-    dataset.unload()
     ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=True)
     dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True)
@@ -136,14 +131,10 @@ def test_dask_mg_katz_centrality_nstart(dask_client, dataset, directed):
             err = err + 1
     assert err == 0
 
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
-
 
 @pytest.mark.mg
 @pytest.mark.parametrize("dataset", DATASETS)
 def test_dask_mg_katz_centrality_transposed_false(dask_client, dataset):
-    dataset.unload()
     ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=True)
     dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=False)
@@ -156,6 +147,3 @@ def test_dask_mg_katz_centrality_transposed_false(dask_client, dataset):
 
     with pytest.warns(UserWarning, match=warning_msg):
         dcg.katz_centrality(dg)
-
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/comms/test_comms_mg.py b/python/cugraph/cugraph/tests/comms/test_comms_mg.py
index 18d4db2d77f..d096eb7e5c2 100644
--- a/python/cugraph/cugraph/tests/comms/test_comms_mg.py
+++ b/python/cugraph/cugraph/tests/comms/test_comms_mg.py
@@ -45,7 +45,6 @@ def setup_function():
 
 def get_pagerank_result(dataset, is_mg):
     """Return the cugraph.pagerank result for an MG or SG graph"""
-    dataset.unload()
 
     if is_mg:
         dg = dataset.get_dask_graph(store_transposed=True)
@@ -113,7 +112,3 @@ def test_dask_mg_pagerank(dask_client, directed):
             err2 = err2 + 1
     print("Mismatches in ", input_data_path2, ": ", err2)
     assert err1 == err2 == 0
-
-    # Clean-up stored dataset edge-lists
-    karate.unload()
-    dolphins.unload()
diff --git a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py
index 9e199840fbb..311fd7a24bc 100644
--- a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py
+++ b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py
@@ -48,14 +48,12 @@ def setup_function():
 
 
 def get_sg_graph(dataset, directed):
-    dataset.unload()
     G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
 
     return G
 
 
 def get_mg_graph(dataset, directed):
-    dataset.unload()
     ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=directed)
     dg.from_dask_cudf_edgelist(
@@ -121,6 +119,3 @@ def test_mg_induced_subgraph(
         # of all the vertices and ensure that there is None
         assert sg_induced_subgraph is None
         assert mg_df is None
-
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/community/test_leiden_mg.py b/python/cugraph/cugraph/tests/community/test_leiden_mg.py
index 4ed7244fe29..5bbb513a615 100644
--- a/python/cugraph/cugraph/tests/community/test_leiden_mg.py
+++ b/python/cugraph/cugraph/tests/community/test_leiden_mg.py
@@ -56,9 +56,6 @@ def test_mg_leiden_with_edgevals_directed_graph(dask_client, dataset):
     with pytest.raises(ValueError):
         parts, mod = dcg.leiden(dg)
 
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
-
 
 @pytest.mark.mg
 @pytest.mark.parametrize("dataset", DATASETS)
@@ -72,6 +69,3 @@ def test_mg_leiden_with_edgevals_undirected_graph(dask_client, dataset):
     print(parts.compute())
     print(mod)
     print()
-
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/community/test_louvain_mg.py b/python/cugraph/cugraph/tests/community/test_louvain_mg.py
index ce89f7f62a2..0dff7f1c8b0 100644
--- a/python/cugraph/cugraph/tests/community/test_louvain_mg.py
+++ b/python/cugraph/cugraph/tests/community/test_louvain_mg.py
@@ -43,9 +43,6 @@ def test_mg_louvain_with_edgevals_directed_graph(dask_client, dataset):
     with pytest.raises(ValueError):
         parts, mod = dcg.louvain(dg)
 
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
-
 
 @pytest.mark.mg
 @pytest.mark.parametrize("dataset", DATASETS)
@@ -59,6 +56,3 @@ def test_mg_louvain_with_edgevals_undirected_graph(dask_client, dataset):
     print(parts.compute())
     print(mod)
     print()
-
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py
index 02723d75527..e2c47af8a1b 100644
--- a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py
+++ b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py
@@ -45,7 +45,6 @@ def setup_function():
 
 
 def get_sg_graph(dataset, directed, start):
-    dataset.unload()
     G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
     if start:
         # sample k nodes from the cuGraph graph
@@ -57,7 +56,6 @@ def get_sg_graph(dataset, directed, start):
 
 
 def get_mg_graph(dataset, directed):
-    dataset.unload()
     ddf = dataset.get_dask_edgelist()
     dg = cugraph.Graph(directed=directed)
     dg.from_dask_cudf_edgelist(
@@ -83,8 +81,6 @@ def test_sg_triangles(dask_client, dataset, start, benchmark):
     sg_triangle_results = benchmark(cugraph.triangle_count, G, start)
     sg_triangle_results.sort_values("vertex").reset_index(drop=True)
     assert sg_triangle_results is not None
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
 
 
 @pytest.mark.mg
@@ -112,5 +108,3 @@ def test_triangles(dask_client, dataset, start, benchmark):
     counts_diffs = result_counts.query("mg_counts != sg_counts")
 
     assert len(counts_diffs) == 0
-    # Clean-up stored dataset edge-lists
-    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py
index b1f571cd896..d1f6ddae604 100644
--- a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py
+++ b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py
@@ -54,10 +54,7 @@ def test_dask_mg_wcc(dask_client, directed, dataset):
     create_using = cugraph.Graph(directed=directed)
 
     g = dataset.get_graph(create_using=create_using)
-    dataset.unload()
-
     dg = dataset.get_dask_graph(create_using=create_using)
-    dataset.unload()
 
     if not directed:
         expected_dist = cugraph.weakly_connected_components(g)
diff --git a/python/cugraph/cugraph/tests/core/test_core_number_mg.py b/python/cugraph/cugraph/tests/core/test_core_number_mg.py
index b52711c3c75..a85181da7c1 100644
--- a/python/cugraph/cugraph/tests/core/test_core_number_mg.py
+++ b/python/cugraph/cugraph/tests/core/test_core_number_mg.py
@@ -44,7 +44,6 @@ def setup_function():
 
 
 def get_sg_results(dataset, degree_type):
-    dataset.unload()
     G = dataset.get_graph(create_using=cugraph.Graph(directed=False))
     res = cugraph.core_number(G, degree_type)
     res = res.sort_values("vertex").reset_index(drop=True)
@@ -63,7 +62,6 @@ def test_sg_core_number(dask_client, dataset, degree_type, benchmark):
     # This test is only for benchmark purposes.
     sg_core_number_results = None
     G = dataset.get_graph(create_using=cugraph.Graph(directed=False))
-    dataset.unload()
     sg_core_number_results = benchmark(cugraph.core_number, G, degree_type)
     assert sg_core_number_results is not None
 
@@ -72,7 +70,6 @@ def test_sg_core_number(dask_client, dataset, degree_type, benchmark):
 @pytest.mark.parametrize("dataset", DATASETS)
 @pytest.mark.parametrize("degree_type", DEGREE_TYPE)
 def test_core_number(dask_client, dataset, degree_type, benchmark):
-    dataset.unload()
     dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False))
 
     result_core_number = benchmark(dcg.core_number, dg, degree_type)
@@ -92,7 +89,6 @@ def test_core_number(dask_client, dataset, degree_type, benchmark):
     counts_diffs = result_core_number.query("mg_core_number != sg_core_number")
 
     assert len(counts_diffs) == 0
-    dataset.unload()
 
 
 @pytest.mark.mg
diff --git a/python/cugraph/cugraph/tests/core/test_k_core_mg.py b/python/cugraph/cugraph/tests/core/test_k_core_mg.py
index 98ab58c3656..3e8f97f6b1d 100644
--- a/python/cugraph/cugraph/tests/core/test_k_core_mg.py
+++ b/python/cugraph/cugraph/tests/core/test_k_core_mg.py
@@ -47,7 +47,6 @@ def setup_function():
 
 
 def get_sg_results(dataset, core_number, degree_type):
-    dataset.unload()
     G = dataset.get_graph(create_using=cugraph.Graph(directed=False))
 
     if core_number:
@@ -84,7 +83,6 @@ def get_sg_results(dataset, core_number, degree_type):
 def test_sg_k_core(dask_client, dataset, core_number, degree_type, benchmark):
     # This test is only for benchmark purposes.
     sg_k_core = None
-    dataset.unload()
     G = dataset.get_graph(create_using=cugraph.Graph(directed=False))
     if core_number:
         # compute the core_number
@@ -95,7 +93,6 @@ def test_sg_k_core(dask_client, dataset, core_number, degree_type, benchmark):
         cugraph.k_core, G, core_number=core_number, degree_type=degree_type
     )
     assert sg_k_core is not None
-    dataset.unload()
 
 
 @pytest.mark.mg
@@ -107,7 +104,6 @@ def test_dask_mg_k_core(dask_client, dataset, core_number, degree_type, benchmar
         dataset, core_number, degree_type
     )
 
-    dataset.unload()
     dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False))
     k_core_results = benchmark(dcg.k_core, dg, core_number=core_number)
     k_core_results = (
@@ -120,19 +116,16 @@ def test_dask_mg_k_core(dask_client, dataset, core_number, degree_type, benchmar
     assert_frame_equal(
         expected_k_core_results, k_core_results, check_dtype=False, check_like=True
     )
-    dataset.unload()
 
 
 @pytest.mark.mg
 def test_dask_mg_k_core_invalid_input(dask_client):
     dataset = DATASETS[0]
-    dataset.unload()
     dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=True))
 
     with pytest.raises(ValueError):
         dcg.k_core(dg)
 
-    dataset.unload()
     dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False))
 
     degree_type = "invalid"

From b55e935a67cd41fa777a388edc1855e207f3f549 Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Wed, 8 May 2024 11:26:03 -0700
Subject: [PATCH 18/20] Add MG vs. SG Check

---
 .../centrality/test_betweenness_centrality.py | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py
index db34c68a054..ff8859a01b1 100644
--- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py
+++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.:
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.:
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -111,11 +111,18 @@ def calc_betweenness_centrality(
     else:
         edge_attr = None
 
-    G = graph_file.get_graph(
-        download=True,
-        create_using=cugraph.Graph(directed=directed),
-        ignore_weights=not edgevals,
-    )
+    G = None
+    if multi_gpu_batch:
+        G = graph_file.get_dask_graph(
+            create_using=cugraph.Graph(directed=directed), ignore_weights=not edgevals
+        )
+        G.enable_batch()
+    else:
+        G = graph_file.get_graph(
+            download=True,
+            create_using=cugraph.Graph(directed=directed),
+            ignore_weights=not edgevals,
+        )
 
     M = G.to_pandas_edgelist().rename(
         columns={"src": "0", "dst": "1", "wgt": edge_attr}
@@ -130,8 +137,6 @@ def calc_betweenness_centrality(
     )
 
     assert G is not None and Gnx is not None
-    if multi_gpu_batch:
-        G.enable_batch()
 
     calc_func = None
     if k is not None and seed is not None:

From b8e22178b21d3670eaa5dcca4f3004b6583a9a91 Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Thu, 9 May 2024 08:14:45 -0700
Subject: [PATCH 19/20] Style

---
 python/cugraph/cugraph/tests/components/test_connectivity_mg.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py
index 5cb13433d5b..d1f6ddae604 100644
--- a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py
+++ b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py
@@ -56,7 +56,6 @@ def test_dask_mg_wcc(dask_client, directed, dataset):
     g = dataset.get_graph(create_using=create_using)
     dg = dataset.get_dask_graph(create_using=create_using)
 
-
     if not directed:
         expected_dist = cugraph.weakly_connected_components(g)
         result_dist = dcg.weakly_connected_components(dg)

From 264e37ed5e7ecebfb6e9dc1b943abf4432e02b31 Mon Sep 17 00:00:00 2001
From: Ralph Liu <ralphl@nvidia.com>
Date: Fri, 10 May 2024 09:32:27 -0700
Subject: [PATCH 20/20] Add MG graph helper to test_connectivity_mg.py

---
 .../tests/components/test_connectivity_mg.py  | 24 +++++++++++++++----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py
index d1f6ddae604..4ab251c0e29 100644
--- a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py
+++ b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py
@@ -39,6 +39,21 @@ def setup_function():
 IS_DIRECTED = [False, True]
 
 
+# =============================================================================
+# Helper
+# =============================================================================
+
+
+def get_mg_graph(dataset, directed):
+    """Returns an MG graph"""
+    ddf = dataset.get_dask_edgelist()
+
+    dg = cugraph.Graph(directed=directed)
+    dg.from_dask_cudf_edgelist(ddf, "src", "dst", "wgt")
+
+    return dg
+
+
 # =============================================================================
 # Tests
 # =============================================================================
@@ -47,15 +62,14 @@ def setup_function():
 @pytest.mark.mg
 @pytest.mark.parametrize("dataset", DATASETS)
 @pytest.mark.parametrize("directed", IS_DIRECTED)
-def test_dask_mg_wcc(dask_client, directed, dataset):
-
+def test_dask_mg_wcc(dask_client, dataset, directed):
     input_data_path = dataset.get_path()
     print(f"dataset={input_data_path}")
-    create_using = cugraph.Graph(directed=directed)
 
-    g = dataset.get_graph(create_using=create_using)
-    dg = dataset.get_dask_graph(create_using=create_using)
+    g = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
+    dg = get_mg_graph(dataset, directed)
 
+    # breakpoint()
     if not directed:
         expected_dist = cugraph.weakly_connected_components(g)
         result_dist = dcg.weakly_connected_components(dg)