rapidsai · rapids-bot · Dec 2, 2021 · Nov 30, 2021 · Dec 1, 2021 · Dec 1, 2021
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,7 @@ __pycache__
 .cache
 .coverage
 .vscode
+.lock
 *.swp
 *.pytest_cache
 DartConfiguration.tcl
@@ -32,6 +33,9 @@ dist/
 cugraph.egg-info/
 python/build
 python/cugraph/bindings/*.cpp
+
+## pylibcugraph build directories & artifacts
+python/pylibcugraph/pylibcugraph.egg-info
 
 ## Patching
 *.diff
@@ -82,6 +86,9 @@ python/_external_repositories/
 
 # created by Dask tests
 python/dask-worker-space
+python/cugraph/dask-worker-space
+python/cugraph/cugraph/dask-worker-space
+python/cugraph/cugraph/tests/dask-worker-space
 
 # Sphinx docs & build artifacts
 docs/cugraph/source/api_docs/api/*
@@ -52,6 +52,8 @@ conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda11
 # for CUDA 11.4
 conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.4.yml
 
+# for CUDA 11.5
+conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.5.yml
 
 # activate the environment
 conda activate cugraph_dev
@@ -65,11 +67,8 @@ conda deactivate
 
 ```bash
 
-# for CUDA 11.0
-conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.0.yml
-
-# for CUDA 11.2
-conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.2.yml
+# Where XXX is the CUDA 11 version
+conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.XXX.yml
 
 conda activate cugraph_dev
 ```

@@ -318,6 +318,6 @@ def _initialize_vertices_from_identifiers_list(G, identifiers):
     if G.renumbered:
         vertices = G.lookup_internal_vertex_id(
             cudf.Series(vertices)
-        ).to_array()
+        ).to_numpy()
 
     return vertices
@@ -89,7 +89,7 @@ def _convert_df_to_output_type(df, input_type, return_labels):
             if is_cp_matrix_type(input_type):
                 labels = cp.fromDlpack(sorted_df["labels"].to_dlpack())
             else:
-                labels = sorted_df["labels"].to_array()
+                labels = sorted_df["labels"].to_numpy()
             return (n_components, labels)
         else:
             return n_components

@@ -30,7 +30,7 @@ def cugraph_call(G, partitions):
     score = cugraph.analyzeClustering_edge_cut(
         G, partitions, df, 'vertex', 'cluster'
     )
-    return set(df["vertex"].to_array()), score
+    return set(df["vertex"].to_numpy()), score
 
 
 def random_call(G, partitions):
@@ -149,7 +149,7 @@ def test_edge_cut_clustering_with_edgevals_nx(graph_file, partitions):
         G, partitions, gdf, 'vertex', 'cluster'
     )
 
-    df = set(gdf["vertex"].to_array())
+    df = set(gdf["vertex"].to_numpy())
 
     Gcu = cugraph.utilities.convert_from_nx(G)
     rand_vid, rand_score = random_call(Gcu, partitions)

@@ -202,14 +202,14 @@ def _compare_bfs(cugraph_df, nx_distances, source):
     cu_distances = {
         vertex: dist
         for vertex, dist in zip(
-            cugraph_df["vertex"].to_array(), cugraph_df["distance"].to_array()
+            cugraph_df["vertex"].to_numpy(), cugraph_df["distance"].to_numpy()
         )
     }
     cu_predecessors = {
         vertex: dist
         for vertex, dist in zip(
-                cugraph_df["vertex"].to_array(),
-                cugraph_df["predecessor"].to_array()
+                cugraph_df["vertex"].to_numpy(),
+                cugraph_df["predecessor"].to_numpy()
         )
     }
 

@@ -355,6 +355,8 @@ def test_strong_cc(gpubenchmark, dataset_nxresults_strong,
                                   cugraph.strongly_connected_components,
                                   input_G_or_matrix)
 
+    assert isinstance(input_G_or_matrix, cugraph_input_type)
+
     # while cugraph returns a component label for each vertex;
     cg_n_components = len(cugraph_labels)
 

@@ -146,8 +146,8 @@ def has_pair(first_arr, second_arr, first, second):
 
 def check_all_two_hops(df, M):
     num_verts = len(M.indptr) - 1
-    first_arr = df["first"].to_array()
-    second_arr = df["second"].to_array()
+    first_arr = df["first"].to_numpy()
+    second_arr = df["second"].to_numpy()
     for start in range(num_verts):
         for idx in range(M.indptr[start], M.indptr[start + 1]):
             mid = M.indices[idx]
@@ -173,7 +173,7 @@ def test_add_edge_list_to_adj_list(graph_file):
     indices_exp = M.indices
 
     # cugraph add_egde_list to_adj_list call
-    G = cugraph.DiGraph()
+    G = cugraph.Graph(directed=True)
     G.from_cudf_edgelist(cu_M, source="0", destination="1", renumber=False)
     offsets_cu, indices_cu, values_cu = G.view_adj_list()
     compare_series(offsets_cu, offsets_exp)
@@ -198,7 +198,7 @@ def test_add_adj_list_to_edge_list(graph_file):
     destinations_exp = cudf.Series(Mcoo.col)
 
     # cugraph add_adj_list to_edge_list call
-    G = cugraph.DiGraph()
+    G = cugraph.Graph(directed=True)
     G.from_cudf_adjlist(offsets, indices, None)
 
     edgelist = G.view_edge_list()
@@ -219,7 +219,7 @@ def test_view_edge_list_from_adj_list(graph_file):
 
     offsets = cudf.Series(Mcsr.indptr)
     indices = cudf.Series(Mcsr.indices)
-    G = cugraph.DiGraph()
+    G = cugraph.Graph(directed=True)
     G.from_cudf_adjlist(offsets, indices, None)
     edgelist_df = G.view_edge_list()
     Mcoo = Mcsr.tocoo()
@@ -245,7 +245,7 @@ def test_delete_edge_list_delete_adj_list(graph_file):
     indices = cudf.Series(Mcsr.indices)
 
     # cugraph delete_adj_list delete_edge_list call
-    G = cugraph.DiGraph()
+    G = cugraph.Graph(directed=True)
     G.from_cudf_edgelist(df, source="src", destination="dst")
     G.delete_edge_list()
     with pytest.raises(Exception):
@@ -273,7 +273,7 @@ def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file):
     offsets = cudf.Series(Mcsr.indptr)
     indices = cudf.Series(Mcsr.indices)
 
-    G = cugraph.DiGraph()
+    G = cugraph.Graph(directed=True)
 
     # If cugraph has at least one graph representation, adding a new graph
     # should fail to prevent a single graph object storing two different
@@ -370,10 +370,10 @@ def test_view_edge_list_for_Graph(graph_file):
     # Compare nx and cugraph edges when viewing edgelist
     # assert cu_edge_list.equals(nx_edge_list)
     assert (
-        cu_edge_list["src"].to_array() == nx_edge_list["src"].to_array()
+        cu_edge_list["src"].to_numpy() == nx_edge_list["src"].to_numpy()
     ).all()
     assert (
-        cu_edge_list["dst"].to_array() == nx_edge_list["dst"].to_array()
+        cu_edge_list["dst"].to_numpy() == nx_edge_list["dst"].to_numpy()
     ).all()
 
 
@@ -416,7 +416,7 @@ def test_consolidation(graph_file):
 def test_two_hop_neighbors(graph_file):
     cu_M = utils.read_csv_file(graph_file)
 
-    G = cugraph.DiGraph()
+    G = cugraph.Graph(directed=True)
     G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")
 
     df = G.get_two_hop_neighbors()
@@ -436,7 +436,7 @@ def test_degree_functionality(graph_file):
     M = utils.read_csv_for_nx(graph_file)
     cu_M = utils.read_csv_file(graph_file)
 
-    G = cugraph.DiGraph()
+    G = cugraph.Graph(directed=True)
     G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")
 
     Gnx = nx.from_pandas_edgelist(
@@ -474,7 +474,7 @@ def test_degrees_functionality(graph_file):
     M = utils.read_csv_for_nx(graph_file)
     cu_M = utils.read_csv_file(graph_file)
 
-    G = cugraph.DiGraph()
+    G = cugraph.Graph(directed=True)
     G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")
 
     Gnx = nx.from_pandas_edgelist(
@@ -509,7 +509,7 @@ def test_number_of_vertices(graph_file):
         raise TypeError("Could not read the input graph")
 
     # cugraph add_edge_list
-    G = cugraph.DiGraph()
+    G = cugraph.Graph(directed=True)
     G.from_cudf_edgelist(cu_M, source="0", destination="1")
     Gnx = nx.from_pandas_edgelist(
         M, source="0", target="1", create_using=nx.DiGraph()
@@ -557,7 +557,7 @@ def test_to_undirected(graph_file):
     assert len(cu_M) == len(M)
 
     # cugraph add_edge_list
-    DiG = cugraph.DiGraph()
+    DiG = cugraph.Graph(directed=True)
     DiG.from_cudf_edgelist(cu_M, source="0", destination="1")
 
     DiGnx = nx.from_pandas_edgelist(

@@ -82,9 +82,9 @@ def cugraph_call(benchmark_callable, cu_M, edgevals=False):
     df = df.sort_values(["source", "destination"]).reset_index(drop=True)
 
     return (
-        df["source"].to_array(),
-        df["destination"].to_array(),
-        df["jaccard_coeff"].to_array(),
+        df["source"].to_numpy(),
+        df["destination"].to_numpy(),
+        df["jaccard_coeff"].to_numpy(),
     )
 
 

@@ -113,7 +113,7 @@ def test_multigraph_sssp(graph_file):
     )
     nx_paths = nx.single_source_dijkstra_path_length(Gnx, 0)
 
-    cu_dist = cu_paths.sort_values(by='vertex')['distance'].to_array()
+    cu_dist = cu_paths.sort_values(by='vertex')['distance'].to_numpy()
     nx_dist = [i[1] for i in sorted(nx_paths.items())]
 
     assert (cu_dist == nx_dist).all()
@@ -56,7 +56,7 @@ def cugraph_call(benchmark_callable, cu_M, pairs, edgevals=False):
     # cugraph Overlap Call
     df = benchmark_callable(cugraph.overlap, G, pairs)
     df = df.sort_values(by=["source", "destination"])
-    return df["overlap_coeff"].to_array()
+    return df["overlap_coeff"].to_numpy()
 
 
 def intersection(a, b, M):

@@ -66,7 +66,7 @@ def cugraph_call(G, max_iter, tol, alpha, personalization, nstart):
 
     df = df.sort_values("vertex").reset_index(drop=True)
 
-    pr_scores = df["pagerank"].to_array()
+    pr_scores = df["pagerank"].to_numpy()
     for i, rank in enumerate(pr_scores):
         sorted_pr.append((i, rank))
 
@@ -310,7 +310,7 @@ def test_pagerank_multi_column(
 
     df = df.sort_values("0_vertex").reset_index(drop=True)
 
-    pr_scores = df["pagerank"].to_array()
+    pr_scores = df["pagerank"].to_numpy()
     for i, rank in enumerate(pr_scores):
         cugraph_pr.append((i, rank))
 

@@ -92,7 +92,7 @@ def check_random_walks(path_data, seeds, df_G=None):
     offsets_idx = 0
     next_path_idx = 0
     v_paths = path_data[0]
-    sizes = path_data[2].to_array().tolist()
+    sizes = path_data[2].to_numpy().tolist()
 
     for s in sizes:
         for i in range(next_path_idx, next_path_idx+s-1):
@@ -156,12 +156,12 @@ def test_random_walks_coalesced(
 
     # Check path query output
     df = cugraph.rw_path(len(seeds), path_data[2])
-    v_offsets = [0] + path_data[2].cumsum()[:-1].to_array().tolist()
-    w_offsets = [0] + (path_data[2]-1).cumsum()[:-1].to_array().tolist()
+    v_offsets = [0] + path_data[2].cumsum()[:-1].to_numpy().tolist()
+    w_offsets = [0] + (path_data[2]-1).cumsum()[:-1].to_numpy().tolist()
 
     assert_series_equal(df['weight_sizes'], path_data[2]-1, check_names=False)
-    assert df['vertex_offsets'].to_array().tolist() == v_offsets
-    assert df['weight_offsets'].to_array().tolist() == w_offsets
+    assert df['vertex_offsets'].to_numpy().tolist() == v_offsets
+    assert df['weight_offsets'].to_numpy().tolist() == w_offsets
 
 
 @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)
@@ -208,7 +208,7 @@ def test_random_walks(
                          edge_attr="weight")
 
     k = random.randint(1, 10)
-    start_vertices = random.sample(G.nodes().to_array().tolist(), k)
+    start_vertices = random.sample(G.nodes().to_numpy().tolist(), k)
 
     seeds = cudf.DataFrame()
     seeds['v'] = start_vertices