diff --git a/README.md b/README.md index 62059e9c7b6..77377fe2bbc 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,6 @@ As of Release 0.18 - including 0.18 nightly | | Renumbering | Single-GPU | multiple columns, any data type | | | Symmetrize | Multi-GPU | | | Other | | | | -| | Hungarian Algorithm | Single-GPU | | | | Minimum Spanning Tree | Single-GPU | | | | Maximum Spanning Tree | Single-GPU | | | | | diff --git a/docs/source/api.rst b/docs/source/api.rst index dcdf3e6ff33..b02f8f488c5 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -48,6 +48,13 @@ Katz Centrality :undoc-members: +Katz Centrality (MG) +-------------------- + +.. automodule:: cugraph.dask.centrality.katz_centrality + :members: + :undoc-members: + Community ========= @@ -86,6 +93,14 @@ Louvain :members: :undoc-members: +Louvain (MG) +------------ + +.. automodule:: cugraph.dask.community.louvain + :members: + :undoc-members: + + Spectral Clustering ------------------- @@ -148,6 +163,17 @@ Force Atlas 2 :undoc-members: +Linear Assignment +================= + +Hungarian +------------- + +.. automodule:: cugraph.linear_assignment.hungarian + :members: + :undoc-members: + + Link Analysis ============= @@ -165,6 +191,13 @@ Pagerank :members: :undoc-members: +Pagerank (MG) +--------- + +.. automodule:: cugraph.dask.link_analysis.pagerank + :members: pagerank + :undoc-members: + Link Prediction =============== @@ -202,6 +235,13 @@ Breadth-first-search :members: :undoc-members: +Breadth-first-search (MG) +-------------------- + +.. automodule:: cugraph.dask.traversal.bfs + :members: + :undoc-members: + Single-source-shortest-path --------------------------- @@ -209,6 +249,13 @@ Single-source-shortest-path :members: :undoc-members: +Single-source-shortest-path (MG) +--------------------------- + +.. automodule:: cugraph.dask.traversal.sssp + :members: + :undoc-members: + Tree ========= @@ -227,3 +274,18 @@ Maximum Spanning Tree :members: :undoc-members: + +DASK MG Helper functions +=========================== + +.. automodule:: cugraph.comms.comms + :members: initialize + :undoc-members: + +.. automodule:: cugraph.comms.comms + :members: destroy + :undoc-members: + +.. automodule:: cugraph.dask.common.read_utils + :members: get_chunksize + :undoc-members: diff --git a/docs/source/dask-cugraph.rst b/docs/source/dask-cugraph.rst index b27ad382809..51487bfbf05 100644 --- a/docs/source/dask-cugraph.rst +++ b/docs/source/dask-cugraph.rst @@ -13,58 +13,41 @@ With cuGraph and Dask, whether you’re using a single NVIDIA GPU or multiple no If your graph comfortably fits in memory on a single GPU, you would want to use the single-GPU version of cuGraph. If you want to distribute your workflow across multiple GPUs and have more data than you can fit in memory on a single GPU, you would want to use cuGraph's multi-GPU features. +Example +======== -Distributed Graph Algorithms ----------------------------- +.. code-block:: python -.. automodule:: cugraph.dask.link_analysis.pagerank - :members: pagerank - :undoc-members: + from dask.distributed import Client, wait + from dask_cuda import LocalCUDACluster + import cugraph.comms as Comms + import cugraph.dask as dask_cugraph -.. automodule:: cugraph.dask.traversal.bfs - :members: bfs - :undoc-members: + cluster = LocalCUDACluster() + client = Client(cluster) + Comms.initialize(p2p=True) + # Helper function to set the reader chunk size to automatically get one partition per GPU + chunksize = dask_cugraph.get_chunksize(input_data_path) -Helper functions ----------------- + # Multi-GPU CSV reader + e_list = dask_cudf.read_csv(input_data_path, + chunksize = chunksize, + delimiter=' ', + names=['src', 'dst'], + dtype=['int32', 'int32']) -.. automodule:: cugraph.comms.comms - :members: initialize - :undoc-members: + G = cugraph.DiGraph() + G.from_dask_cudf_edgelist(e_list, source='src', destination='dst') -.. automodule:: cugraph.comms.comms - :members: destroy - :undoc-members: + # now run PageRank + pr_df = dask_cugraph.pagerank(G, tol=1e-4) -.. automodule:: cugraph.dask.common.read_utils - :members: get_chunksize - :undoc-members: + # All done, clean up + Comms.destroy() + client.close() + cluster.close() -Consolidation -============= -cuGraph can transparently interpret the Dask cuDF Dataframe as a regular Dataframe when loading the edge list. This is particularly helpful for workflows extracting a single GPU sized edge list from a distributed dataset. From there any existing single GPU feature will just work on this input. +| -For instance, consolidation allows leveraging Dask cuDF CSV reader to load file(s) on multiple GPUs and consolidate this input to a single GPU graph. Reading is often the time and memory bottleneck, with this feature users can call the Multi-GPU version of the reader without changing anything else. - -Batch Processing -================ - -cuGraph can leverage multi GPUs to increase processing speed for graphs that fit on a single GPU, providing faster analytics on such graphs. -You will be able to use the Graph the same way as you used to in a Single GPU environment, but analytics that support batch processing will automatically use the GPUs available to the dask client. -For example, Betweenness Centrality scores can be slow to obtain depending on the number of vertices used in the approximation. Thank to Multi GPUs Batch Processing, -you can create Single GPU graph as you would regularly do it using cuDF CSV reader, enable Batch analytics on it, and obtain scores much faster as each GPU will handle a sub-set of the sources. -In order to use Batch Analytics you need to set up a Dask Cluster and Client in addition to the cuGraph communicator, then you can simply call `enable_batch()` on you graph, and algorithms supporting batch processing will use multiple GPUs. - -Algorithms supporting Batch Processing --------------------------------------- -.. automodule:: cugraph.centrality - :members: betweenness_centrality - :undoc-members: - :noindex: - -.. automodule:: cugraph.centrality - :members: edge_betweenness_centrality - :undoc-members: - :noindex: diff --git a/python/cugraph/bsp/traversal/bfs_bsp.py b/python/cugraph/bsp/traversal/bfs_bsp.py index 28a71631443..9a2fd48e201 100644 --- a/python/cugraph/bsp/traversal/bfs_bsp.py +++ b/python/cugraph/bsp/traversal/bfs_bsp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. - +import warnings import cudf from collections import OrderedDict @@ -59,6 +59,12 @@ def bfs_df_pregel(_df, start, src_col='src', dst_col='dst', copy_data=True): """ + warnings.warn( + "This feature is deprecated and will be" + "dropped from cuGraph in release 0.20.", + FutureWarning, + ) + # extract the src and dst into a dataframe that can be modified if copy_data: coo_data = _df[[src_col, dst_col]] diff --git a/python/cugraph/community/egonet.py b/python/cugraph/community/egonet.py index 9ff12158b13..ca3c6149ece 100644 --- a/python/cugraph/community/egonet.py +++ b/python/cugraph/community/egonet.py @@ -74,6 +74,17 @@ def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None): G_ego : cuGraph.Graph or networkx.Graph A graph descriptor with a minimum spanning tree or forest. The networkx graph will not have all attributes copied over + + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', + delimiter = ' ', + dtype=['int32', 'int32', 'float32'], + header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, source='0', destination='1') + >>> ego_graph = cugraph.ego_graph(G, seed, radius=2) + """ (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight") diff --git a/python/cugraph/components/connectivity.py b/python/cugraph/components/connectivity.py index 7c68afd7ced..72f33ebfcbb 100644 --- a/python/cugraph/components/connectivity.py +++ b/python/cugraph/components/connectivity.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -378,7 +378,7 @@ def connected_components(G, header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None) - >>> df = cugraph.strongly_connected_components(G) + >>> df = cugraph.connected_components(G, connection="weak") """ if connection == "weak": return weakly_connected_components(G, directed, diff --git a/python/cugraph/dask/centrality/katz_centrality.py b/python/cugraph/dask/centrality/katz_centrality.py index cf6ad95f974..e690e291928 100644 --- a/python/cugraph/dask/centrality/katz_centrality.py +++ b/python/cugraph/dask/centrality/katz_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -115,7 +115,8 @@ def katz_centrality(input_graph, Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize(p2p=True) + >>> ... Init a DASK Cluster + >> see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, delimiter=' ', @@ -125,7 +126,6 @@ def katz_centrality(input_graph, >>> dg.from_dask_cudf_edgelist(ddf, source='src', destination='dst', edge_attr='value') >>> pr = dcg.katz_centrality(dg) - >>> Comms.destroy() """ nstart = None diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py index 11ecb78375f..495061c0f81 100644 --- a/python/cugraph/dask/community/louvain.py +++ b/python/cugraph/dask/community/louvain.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -55,7 +55,8 @@ def louvain(input_graph, max_iter=100, resolution=1.0): Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize(p2p=True) + >>> ... Init a DASK Cluster + >> see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv('datasets/karate.csv', chunksize=chunksize, delimiter=' ', diff --git a/python/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/dask/link_analysis/pagerank.py index 1e9d79e0aa6..d8a76f1231e 100644 --- a/python/cugraph/dask/link_analysis/pagerank.py +++ b/python/cugraph/dask/link_analysis/pagerank.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -105,7 +105,8 @@ def pagerank(input_graph, Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize(p2p=True) + >>> ... Init a DASK Cluster + >> see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, delimiter=' ', @@ -115,7 +116,6 @@ def pagerank(input_graph, >>> dg.from_dask_cudf_edgelist(ddf, source='src', destination='dst', edge_attr='value') >>> pr = dcg.pagerank(dg) - >>> Comms.destroy() """ from cugraph.structure.graph import null_check diff --git a/python/cugraph/dask/traversal/bfs.py b/python/cugraph/dask/traversal/bfs.py index 7a2c50a3bc0..51e0dc0de5d 100644 --- a/python/cugraph/dask/traversal/bfs.py +++ b/python/cugraph/dask/traversal/bfs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -76,7 +76,8 @@ def bfs(graph, Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize(p2p=True) + >>> ... Init a DASK Cluster + >> see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, delimiter=' ', @@ -85,7 +86,6 @@ def bfs(graph, >>> dg = cugraph.DiGraph() >>> dg.from_dask_cudf_edgelist(ddf, 'src', 'dst') >>> df = dcg.bfs(dg, 0) - >>> Comms.destroy() """ client = default_client() diff --git a/python/cugraph/dask/traversal/sssp.py b/python/cugraph/dask/traversal/sssp.py index ce0c7908664..52f2b9b256c 100644 --- a/python/cugraph/dask/traversal/sssp.py +++ b/python/cugraph/dask/traversal/sssp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -76,7 +76,8 @@ def sssp(graph, Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize(p2p=True) + >>> ... Init a DASK Cluster + >> see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, delimiter=' ', @@ -85,7 +86,6 @@ def sssp(graph, >>> dg = cugraph.DiGraph() >>> dg.from_dask_cudf_edgelist(ddf, 'src', 'dst') >>> df = dcg.sssp(dg, 0) - >>> Comms.destroy() """ client = default_client() diff --git a/python/cugraph/tree/minimum_spanning_tree.py b/python/cugraph/tree/minimum_spanning_tree.py index 25a365665df..45e996aa083 100644 --- a/python/cugraph/tree/minimum_spanning_tree.py +++ b/python/cugraph/tree/minimum_spanning_tree.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,7 +17,7 @@ from cugraph.utilities import cugraph_to_nx -def minimum_spanning_tree_subgraph(G): +def _minimum_spanning_tree_subgraph(G): mst_subgraph = Graph() if type(G) is not Graph: raise Exception("input graph must be undirected") @@ -32,7 +32,7 @@ def minimum_spanning_tree_subgraph(G): return mst_subgraph -def maximum_spanning_tree_subgraph(G): +def _maximum_spanning_tree_subgraph(G): mst_subgraph = Graph() if type(G) is not Graph: raise Exception("input graph must be undirected") @@ -68,28 +68,33 @@ def minimum_spanning_tree( ---------- G : cuGraph.Graph or networkx.Graph cuGraph graph descriptor with connectivity information. + weight : string default to the weights in the graph, if the graph edges do not have a weight attribute a default weight of 1 will be used. + algorithm : string Default to 'boruvka'. The parallel algorithm to use when finding a minimum spanning tree. + ignore_nan : bool Default to False + Returns ------- G_mst : cuGraph.Graph or networkx.Graph A graph descriptor with a minimum spanning tree or forest. The networkx graph will not have all attributes copied over + """ G, isNx = check_nx_graph(G) if isNx is True: - mst = minimum_spanning_tree_subgraph(G) + mst = _minimum_spanning_tree_subgraph(G) return cugraph_to_nx(mst) else: - return minimum_spanning_tree_subgraph(G) + return _minimum_spanning_tree_subgraph(G) def maximum_spanning_tree( @@ -103,25 +108,30 @@ def maximum_spanning_tree( ---------- G : cuGraph.Graph or networkx.Graph cuGraph graph descriptor with connectivity information. + weight : string default to the weights in the graph, if the graph edges do not have a weight attribute a default weight of 1 will be used. + algorithm : string Default to 'boruvka'. The parallel algorithm to use when finding a maximum spanning tree. + ignore_nan : bool Default to False + Returns ------- G_mst : cuGraph.Graph or networkx.Graph A graph descriptor with a maximum spanning tree or forest. The networkx graph will not have all attributes copied over + """ G, isNx = check_nx_graph(G) if isNx is True: - mst = maximum_spanning_tree_subgraph(G) + mst = _maximum_spanning_tree_subgraph(G) return cugraph_to_nx(mst) else: - return maximum_spanning_tree_subgraph(G) + return _maximum_spanning_tree_subgraph(G)