diff --git a/docs/cugraph/source/api_docs/centrality.rst b/docs/cugraph/source/api_docs/centrality.rst index 81c4577fa87..4001328ee81 100644 --- a/docs/cugraph/source/api_docs/centrality.rst +++ b/docs/cugraph/source/api_docs/centrality.rst @@ -10,15 +10,15 @@ Betweenness Centrality .. autosummary:: :toctree: api/ - cugraph.centrality.betweenness_centrality.betweenness_centrality - cugraph.centrality.betweenness_centrality.edge_betweenness_centrality + cugraph.betweenness_centrality + cugraph.edge_betweenness_centrality Katz Centrality --------------- .. autosummary:: :toctree: api/ - cugraph.centrality.katz_centrality.katz_centrality + cugraph.katz_centrality Katz Centrality (MG) -------------------- diff --git a/docs/cugraph/source/api_docs/community.rst b/docs/cugraph/source/api_docs/community.rst index 98292e6fb71..23db12c44c7 100644 --- a/docs/cugraph/source/api_docs/community.rst +++ b/docs/cugraph/source/api_docs/community.rst @@ -10,15 +10,15 @@ EgoNet .. autosummary:: :toctree: api/ - cugraph.community.egonet.batched_ego_graphs - cugraph.community.egonet.ego_graph + cugraph.batched_ego_graphs + cugraph.ego_graph Ensemble clustering for graphs (ECG) ------------------------------------ .. autosummary:: :toctree: api/ - cugraph.community.ecg.ecg + cugraph.ecg K-Truss @@ -26,15 +26,15 @@ K-Truss .. autosummary:: :toctree: api/ - cugraph.community.ktruss_subgraph.k_truss - cugraph.community.ktruss_subgraph.ktruss_subgraph + cugraph.k_truss + cugraph.ktruss_subgraph Leiden ------ .. autosummary:: :toctree: api/ - cugraph.community.leiden.leiden + cugraph.leiden Louvain @@ -42,7 +42,7 @@ Louvain .. autosummary:: :toctree: api/ - cugraph.community.louvain.louvain + cugraph.louvain Louvain (MG) @@ -58,11 +58,11 @@ Spectral Clustering .. autosummary:: :toctree: api/ - cugraph.community.spectral_clustering.analyzeClustering_edge_cut - cugraph.community.spectral_clustering.analyzeClustering_modularity - cugraph.community.spectral_clustering.analyzeClustering_ratio_cut - cugraph.community.spectral_clustering.spectralBalancedCutClustering - cugraph.community.spectral_clustering.spectralModularityMaximizationClustering + cugraph.analyzeClustering_edge_cut + cugraph.analyzeClustering_modularity + cugraph.analyzeClustering_ratio_cut + cugraph.spectralBalancedCutClustering + cugraph.spectralModularityMaximizationClustering Subgraph Extraction @@ -70,7 +70,7 @@ Subgraph Extraction .. autosummary:: :toctree: api/ - cugraph.community.subgraph_extraction.subgraph + cugraph.subgraph Triangle Counting @@ -78,4 +78,4 @@ Triangle Counting .. autosummary:: :toctree: api/ - cugraph.community.triangle_count.triangles + cugraph.triangles diff --git a/docs/cugraph/source/api_docs/components.rst b/docs/cugraph/source/api_docs/components.rst index 0f7d3a9c2cb..cf2f09efab2 100644 --- a/docs/cugraph/source/api_docs/components.rst +++ b/docs/cugraph/source/api_docs/components.rst @@ -10,9 +10,9 @@ Connected Components .. autosummary:: :toctree: api/ - cugraph.components.connectivity.connected_components - cugraph.components.connectivity.strongly_connected_components - cugraph.components.connectivity.weakly_connected_components + cugraph.connected_components + cugraph.strongly_connected_components + cugraph.weakly_connected_components Connected Components (MG) diff --git a/docs/cugraph/source/api_docs/cores.rst b/docs/cugraph/source/api_docs/cores.rst index 2f0761597e2..0e48c584b4d 100644 --- a/docs/cugraph/source/api_docs/cores.rst +++ b/docs/cugraph/source/api_docs/cores.rst @@ -10,7 +10,7 @@ Core Number .. autosummary:: :toctree: api/ - cugraph.cores.core_number.core_number + cugraph.core_number K-Core @@ -18,4 +18,4 @@ K-Core .. autosummary:: :toctree: api/ - cugraph.cores.k_core.k_core + cugraph.k_core diff --git a/docs/cugraph/source/api_docs/graph_implementation.rst b/docs/cugraph/source/api_docs/graph_implementation.rst new file mode 100644 index 00000000000..e96c49c91bc --- /dev/null +++ b/docs/cugraph/source/api_docs/graph_implementation.rst @@ -0,0 +1,38 @@ +==================== +Graph Implementation +==================== +.. currentmodule:: cugraph.structure.graph_implementation.simpleGraphImpl + + +Graph Implementation +------------------------- +.. autosummary:: + :toctree: api/ + + view_edge_list + delete_edge_list + view_adj_list + view_transposed_adj_list + delete_adj_list + + enable_batch + batch_adjlists + batch_edgelists + batch_enabled + batch_transposed_adjlists + + get_two_hop_neighbors + number_of_vertices + number_of_nodes + number_of_edges + in_degree + out_degree + degree + degrees + has_edge + has_node + has_self_loop + edges + nodes + neighbors + vertex_column_size \ No newline at end of file diff --git a/docs/cugraph/source/api_docs/index.rst b/docs/cugraph/source/api_docs/index.rst index 9da23c396af..e7f94d7e19d 100644 --- a/docs/cugraph/source/api_docs/index.rst +++ b/docs/cugraph/source/api_docs/index.rst @@ -10,6 +10,7 @@ This page provides a list of all publicly accessible modules, methods and classe :caption: API Documentation structure + graph_implementation centrality community components diff --git a/docs/cugraph/source/api_docs/layout.rst b/docs/cugraph/source/api_docs/layout.rst index fc81df72221..b3943e4d399 100644 --- a/docs/cugraph/source/api_docs/layout.rst +++ b/docs/cugraph/source/api_docs/layout.rst @@ -10,5 +10,5 @@ Force Atlas 2 .. autosummary:: :toctree: api/ - cugraph.layout.force_atlas2.force_atlas2 + cugraph.force_atlas2 diff --git a/docs/cugraph/source/api_docs/linear_assignment.rst b/docs/cugraph/source/api_docs/linear_assignment.rst index 147597879d9..383bb4d4322 100644 --- a/docs/cugraph/source/api_docs/linear_assignment.rst +++ b/docs/cugraph/source/api_docs/linear_assignment.rst @@ -10,4 +10,5 @@ Hungarian .. autosummary:: :toctree: api/ - cugraph.linear_assignment.hungarian + cugraph.hungarian + cugraph.dense_hungarian diff --git a/docs/cugraph/source/api_docs/link_analysis.rst b/docs/cugraph/source/api_docs/link_analysis.rst index 162163afd2f..c19a36aedbe 100644 --- a/docs/cugraph/source/api_docs/link_analysis.rst +++ b/docs/cugraph/source/api_docs/link_analysis.rst @@ -10,7 +10,7 @@ HITS .. autosummary:: :toctree: api/ - cugraph.link_analysis.hits.hits + cugraph.hits Pagerank @@ -18,7 +18,7 @@ Pagerank .. autosummary:: :toctree: api/ - cugraph.link_analysis.pagerank.pagerank + cugraph.pagerank Pagerank (MG) ------------- diff --git a/docs/cugraph/source/api_docs/link_prediction.rst b/docs/cugraph/source/api_docs/link_prediction.rst index f1c713f6895..7c7b34f2f7c 100644 --- a/docs/cugraph/source/api_docs/link_prediction.rst +++ b/docs/cugraph/source/api_docs/link_prediction.rst @@ -10,9 +10,9 @@ Jaccard Coefficient .. autosummary:: :toctree: api/ - cugraph.link_prediction.jaccard.jaccard - cugraph.link_prediction.jaccard.jaccard_coefficient - cugraph.link_prediction.wjaccard.jaccard_w + cugraph.jaccard + cugraph.jaccard_coefficient + cugraph.jaccard_w Overlap Coefficient @@ -20,7 +20,16 @@ Overlap Coefficient .. autosummary:: :toctree: api/ - cugraph.link_prediction.overlap.overlap - cugraph.link_prediction.overlap.overlap_coefficient - cugraph.link_prediction.woverlap.overlap_w + cugraph.overlap + cugraph.overlap_coefficient + cugraph.overlap_w + +Sorensen Coefficient +-------------------- +.. autosummary:: + :toctree: api/ + + cugraph.sorensen + cugraph.sorensen_coefficient + cugraph.sorensen_w diff --git a/docs/cugraph/source/api_docs/pylibcugraph.rst b/docs/cugraph/source/api_docs/pylibcugraph.rst index ffc2e84e316..9317274cf8c 100644 --- a/docs/cugraph/source/api_docs/pylibcugraph.rst +++ b/docs/cugraph/source/api_docs/pylibcugraph.rst @@ -11,5 +11,5 @@ Methods .. autosummary:: :toctree: api/ - pylibcugraph.components.strongly_connected_components - pylibcugraph.components.weakly_connected_components + pylibcugraph.strongly_connected_components + pylibcugraph.weakly_connected_components diff --git a/docs/cugraph/source/api_docs/sampling.rst b/docs/cugraph/source/api_docs/sampling.rst index de7f707fb5f..6255c981a28 100644 --- a/docs/cugraph/source/api_docs/sampling.rst +++ b/docs/cugraph/source/api_docs/sampling.rst @@ -10,4 +10,4 @@ Random Walks .. autosummary:: :toctree: api/ - cugraph.sampling.random_walks.random_walks + cugraph.random_walks diff --git a/docs/cugraph/source/api_docs/structure.rst b/docs/cugraph/source/api_docs/structure.rst index 7b4e58aec51..f2d49781423 100644 --- a/docs/cugraph/source/api_docs/structure.rst +++ b/docs/cugraph/source/api_docs/structure.rst @@ -29,6 +29,7 @@ Adding Data Graph.add_internal_vertex_id Graph.add_nodes_from Graph.clear + Graph.unrenumber Checks ------ @@ -52,9 +53,9 @@ Symmetrize .. autosummary:: :toctree: api/ - cugraph.structure.symmetrize.symmetrize - cugraph.structure.symmetrize.symmetrize_ddf - cugraph.structure.symmetrize.symmetrize_df + cugraph.symmetrize + cugraph.symmetrize_ddf + cugraph.symmetrize_df Conversion from Other Formats @@ -62,22 +63,23 @@ Conversion from Other Formats .. autosummary:: :toctree: api/ - cugraph.structure.convert_matrix.from_adjlist - cugraph.structure.convert_matrix.from_cudf_edgelist - cugraph.structure.convert_matrix.from_edgelist - cugraph.structure.convert_matrix.from_numpy_array - cugraph.structure.convert_matrix.from_numpy_matrix - cugraph.structure.convert_matrix.from_pandas_adjacency - cugraph.structure.convert_matrix.from_pandas_edgelist - cugraph.structure.convert_matrix.to_numpy_array - cugraph.structure.convert_matrix.to_numpy_matrix - cugraph.structure.convert_matrix.to_pandas_adjacency - cugraph.structure.convert_matrix.to_pandas_edgelist + cugraph.from_adjlist + cugraph.from_cudf_edgelist + cugraph.from_edgelist + cugraph.from_numpy_array + cugraph.from_numpy_matrix + cugraph.from_pandas_adjacency + cugraph.from_pandas_edgelist + cugraph.to_numpy_array + cugraph.to_numpy_matrix + cugraph.to_pandas_adjacency + cugraph.to_pandas_edgelist Other ----------------------------- .. autosummary:: :toctree: api/ - Graph.unrenumber - cugraph.structure.hypergraph.hypergraph \ No newline at end of file + cugraph.hypergraph + cugraph.structure.shuffle + cugraph.structure.NumberMap diff --git a/docs/cugraph/source/api_docs/traversal.rst b/docs/cugraph/source/api_docs/traversal.rst index 3eb32e2cffa..d1736799e99 100644 --- a/docs/cugraph/source/api_docs/traversal.rst +++ b/docs/cugraph/source/api_docs/traversal.rst @@ -10,8 +10,8 @@ Breadth-first-search .. autosummary:: :toctree: api/ - cugraph.traversal.bfs.bfs - cugraph.traversal.bfs.bfs_edges + cugraph.bfs + cugraph.bfs_edges Breadth-first-search (MG) ------------------------- @@ -26,10 +26,10 @@ Single-source-shortest-path .. autosummary:: :toctree: api/ - cugraph.traversal.sssp.filter_unreachable - cugraph.traversal.sssp.shortest_path - cugraph.traversal.sssp.shortest_path_length - cugraph.traversal.sssp.sssp + cugraph.filter_unreachable + cugraph.shortest_path + cugraph.shortest_path_length + cugraph.sssp Single-source-shortest-path (MG) -------------------------------- diff --git a/python/cugraph/cugraph/centrality/betweenness_centrality.py b/python/cugraph/cugraph/centrality/betweenness_centrality.py index 4cc7b767340..bbffd123d11 100644 --- a/python/cugraph/cugraph/centrality/betweenness_centrality.py +++ b/python/cugraph/cugraph/centrality/betweenness_centrality.py @@ -62,8 +62,7 @@ def betweenness_centrality( defined as a list will be used assources for traversals inside the algorithm. - normalized : bool, optional - Default is True. + normalized : bool, optional (default=True) If true, the betweenness values are normalized by __2 / ((n - 1) * (n - 2))__ for undirected Graphs, and __1 / ((n - 1) * (n - 2))__ for directed Graphs @@ -185,7 +184,6 @@ def edge_betweenness_centrality( sources for traversals inside the algorithm. normalized : bool, optional (default=True) - Default is True. If true, the betweenness values are normalized by 2 / (n * (n - 1)) for undirected Graphs, and 1 / (n * (n - 1)) for directed Graphs diff --git a/python/cugraph/cugraph/components/connectivity.py b/python/cugraph/cugraph/components/connectivity.py index efba6232f03..c1601cd42bf 100644 --- a/python/cugraph/cugraph/components/connectivity.py +++ b/python/cugraph/cugraph/components/connectivity.py @@ -117,17 +117,17 @@ def weakly_connected_components(G, The adjacency list will be computed if not already present. The number of vertices should fit into a 32b int. - directed : bool, optional + directed : bool, optional (default=True) NOTE For non-Graph-type (eg. sparse matrix) values of G only. Raises TypeError if used with a Graph object. - If True (default), then convert the input matrix to directed - cugraph.Graph and only move from point i to point j along paths - csgraph[i, j]. If False, then find the shortest path on an undirected - graph: the algorithm can progress from point i to j along csgraph[i, j] - or csgraph[j, i]. + If True, then convert the input matrix to a cugraph.DiGraph + and only move from point i to point j along paths csgraph[i, j]. If + False, then find the shortest path on an undirected graph: the + algorithm can progress from point i to j along csgraph[i, j] or + csgraph[j, i]. connection : str, optional (default=None) @@ -135,13 +135,13 @@ def weakly_connected_components(G, (eg. sparse matrix) values of G only (raises TypeError if used with a Graph object), and can only be set to "weak" for this API. - return_labels : bool, optional + return_labels : bool, optional (default=True) NOTE For non-Graph-type (eg. sparse matrix) values of G only. Raises TypeError if used with a Graph object. - If True (default), then return the labels for each of the connected + If True, then return the labels for each of the connected components. Returns @@ -215,17 +215,17 @@ def strongly_connected_components(G, The adjacency list will be computed if not already present. The number of vertices should fit into a 32b int. - directed : bool, optional + directed : bool, optional (default=True) NOTE For non-Graph-type (eg. sparse matrix) values of G only. Raises TypeError if used with a Graph object. - If True (default), then convert the input matrix to directed - cugraph.Graph and only move from point i to point j along paths - csgraph[i, j]. If False, then find the shortest path on an undirected - graph: the algorithm can progress from point i to j along csgraph[i, j] - or csgraph[j, i]. + If True, then convert the input matrix to a cugraph.DiGraph + and only move from point i to point j along paths csgraph[i, j]. If + False, then find the shortest path on an undirected graph: the + algorithm can progress from point i to j along csgraph[i, j] or + csgraph[j, i]. connection : str, optional (default=None) @@ -233,13 +233,13 @@ def strongly_connected_components(G, (eg. sparse matrix) values of G only (raises TypeError if used with a Graph object), and can only be set to "strong" for this API. - return_labels : bool, optional + return_labels : bool, optional (default=True) NOTE For non-Graph-type (eg. sparse matrix) values of G only. Raises TypeError if used with a Graph object. - If True (default), then return the labels for each of the connected + If True, then return the labels for each of the connected components. Returns @@ -314,30 +314,30 @@ def connected_components(G, The adjacency list will be computed if not already present. The number of vertices should fit into a 32b int. - directed : bool, optional + directed : bool, optional (default=True) NOTE For non-Graph-type (eg. sparse matrix) values of G only. Raises TypeError if used with a Graph object. - If True (default), then convert the input matrix to directed - cugraph.Graph and only move from point i to point j along paths - csgraph[i, j]. If False, then find the shortest path on an undirected - graph: the algorithm can progress from point i to j along csgraph[i, j] - or csgraph[j, i]. + If True, then convert the input matrix to a cugraph.DiGraph + and only move from point i to point j along paths csgraph[i, j]. If + False, then find the shortest path on an undirected graph: the + algorithm can progress from point i to j along csgraph[i, j] or + csgraph[j, i]. connection : str, optional (default='weak') [‘weak’|’strong’]. Return either weakly or strongly connected components. - return_labels : bool, optional + return_labels : bool, optional (default=True) NOTE For non-Graph-type (eg. sparse matrix) values of G only. Raises TypeError if used with a Graph object. - If True (default), then return the labels for each of the connected + If True, then return the labels for each of the connected components. Returns diff --git a/python/cugraph/cugraph/cores/core_number.py b/python/cugraph/cugraph/cores/core_number.py index ec4d1a2832d..34464e024ae 100644 --- a/python/cugraph/cugraph/cores/core_number.py +++ b/python/cugraph/cugraph/cores/core_number.py @@ -50,7 +50,7 @@ def core_number(G): ... dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(gdf, source='0', destination='1') - >>> cn = cugraph.core_number(G) + >>> df = cugraph.core_number(G) """ diff --git a/python/cugraph/cugraph/layout/force_atlas2.py b/python/cugraph/cugraph/layout/force_atlas2.py index fca633c66a6..ec05a3b8482 100644 --- a/python/cugraph/cugraph/layout/force_atlas2.py +++ b/python/cugraph/cugraph/layout/force_atlas2.py @@ -120,6 +120,16 @@ def on_train_end(self, positions): pos : cudf.DataFrame GPU data frame of size V containing three columns: the vertex identifiers and the x and y positions. + + Examples + -------- + >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ', + ... dtype=['int32', 'int32', 'float32'], + ... header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') + >>> pos = cugraph.force_atlas2(G) + """ input_graph, isNx = ensure_cugraph_obj_for_nx(input_graph) diff --git a/python/cugraph/cugraph/linear_assignment/lap.py b/python/cugraph/cugraph/linear_assignment/lap.py index cb6b4653a11..77cbf2f9260 100644 --- a/python/cugraph/cugraph/linear_assignment/lap.py +++ b/python/cugraph/cugraph/linear_assignment/lap.py @@ -55,6 +55,7 @@ def hungarian(G, workers, epsilon=None): ------- cost : matches costs.dtype The cost of the overall assignment + df : cudf.DataFrame df['vertex'][i] gives the vertex id of the i'th vertex. Only vertices in the workers list are defined in this column. @@ -63,12 +64,9 @@ def hungarian(G, workers, epsilon=None): Examples -------- - >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. - >>> # M = cudf.read_csv(datasets_path / 'bipartite.csv', delimiter=' ', - >>> # dtype=['int32', 'int32', 'float32'], header=None) - >>> # G = cugraph.Graph() - >>> # G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2') - >>> # cost, df = cugraph.hungarian(G, workers) + >>> workers, G, costs = cugraph.utils.create_random_bipartite(5, 5, + ... 100, float) + >>> cost, df = cugraph.hungarian(G, workers) """ # FIXME: Create bipartite.csv and uncomment out the above example @@ -108,10 +106,13 @@ def dense_hungarian(costs, num_rows, num_columns, epsilon=None): graph. Each row represents a worker, each column represents a task, cost[i][j] represents the cost of worker i performing task j. + num_rows : int Number of rows in the matrix + num_columns : int Number of columns in the matrix + epsilon : float or double (matching weight type in graph) Used for determining when value is close enough to zero to consider 0. Defaults (if not specified) to 1e-6 in the C++ code. Unused for @@ -121,11 +122,17 @@ def dense_hungarian(costs, num_rows, num_columns, epsilon=None): ------- cost : matches costs.dtype The cost of the overall assignment + assignment : cudf.Series - assignment[i] gives the vertex id of the task assigned to the + assignment[i] gives the vertex id of the task assigned to the worker i - FIXME: Update this with a real example... + Examples + -------- + >>> workers, G, costs = cugraph.utils.create_random_bipartite(5, 5, + ... 100, float) + >>> costs_flattened = cudf.Series(costs.flatten()) + >>> cost, assignment = cugraph.dense_hungarian(costs_flattened, 5, 5) """ diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py index dcfd30a8a5b..816c580747b 100644 --- a/python/cugraph/cugraph/link_prediction/overlap.py +++ b/python/cugraph/cugraph/link_prediction/overlap.py @@ -21,7 +21,7 @@ def overlap_coefficient(G, ebunch=None): """ - NetworkX similar API. See 'jaccard' for a description + For NetworkX Compatability. See `overlap` """ vertex_pair = None diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py index afae4afbdf2..4a88f6b1558 100644 --- a/python/cugraph/cugraph/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/link_prediction/sorensen.py @@ -96,6 +96,7 @@ def sorensen(input_graph, vertex_pair=None): def sorensen_coefficient(G, ebunch=None): """ + For NetworkX Compatability. See `sorensen` Parameters ---------- diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 5313a48cdd0..23054151d1a 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -305,11 +305,13 @@ def view_adj_list(self): vertices). The gdf column contains the offsets for the vertices in this graph. Offsets are in the range [0, E] (E: number of edges). + index_col : cudf.Series This cudf.Series wraps a gdf_column of size E (E: number of edges). The gdf column contains the destination index for each edge. Destination indices are in the range [0, V) (V: number of vertices). + value_col : cudf.Series or ``None`` This pointer is ``None`` for unweighted graphs. For weighted graphs, this cudf.Series wraps a gdf_column of size E diff --git a/python/cugraph/cugraph/structure/number_map.py b/python/cugraph/cugraph/structure/number_map.py index b0fed9b174b..d587bf92263 100644 --- a/python/cugraph/cugraph/structure/number_map.py +++ b/python/cugraph/cugraph/structure/number_map.py @@ -328,15 +328,18 @@ def to_internal_vertex_id(self, df, col_names=None): """ Given a collection of external vertex ids, return the internal vertex ids + Parameters ---------- df: cudf.DataFrame, cudf.Series, dask_cudf.DataFrame, dask_cudf.Series Contains a list of external vertex identifiers that will be converted into internal vertex identifiers + col_names: (optional) list of strings This list of 1 or more strings contain the names of the columns that uniquely identify an external vertex identifier + Returns --------- vertex_ids : cudf.Series or dask_cudf.Series @@ -373,24 +376,29 @@ def add_internal_vertex_id( If a series-type input is provided then the series will be in a column named '0'. Otherwise the input column names in the DataFrame will be preserved. + Parameters ---------- df: cudf.DataFrame, cudf.Series, dask_cudf.DataFrame, dask_cudf.Series Contains a list of external vertex identifiers that will be converted into internal vertex identifiers - id_column_name: (optional) string + + id_column_name: string, optional (default="id") The name to be applied to the column containing the id - (defaults to 'id') - col_names: (optional) list of strings + + col_names: list of strings, optional (default=None) This list of 1 or more strings contain the names of the columns that uniquely identify an external vertex identifier - drop: (optional) boolean + + drop: boolean, optional (default=False) If True, drop the column names specified in col_names from - the returned DataFrame. Defaults to False. - preserve_order: (optional) boolean + the returned DataFrame. + + preserve_order: boolean, optional (default=False) If True, do extra sorting work to preserve the order - of the input DataFrame. Defaults to False. + of the input DataFrame. + Returns --------- df : cudf.DataFrame or dask_cudf.DataFrame @@ -443,16 +451,20 @@ def from_internal_vertex_id( in a column labeled 'id'. If df is a dataframe type object then internal_column_name should identify which column corresponds the the internal vertex id that should be converted - internal_column_name: (optional) string + + internal_column_name: string, optional (default=None) Name of the column containing the internal vertex id. If df is a series then this parameter is ignored. If df is a DataFrame this parameter is required. - external_column_names: (optional) string or list of strings + + external_column_names: string or list of str, optional (default=None) Name of the columns that define an external vertex id. If not specified, columns will be labeled '0', '1,', ..., 'n-1' - drop: (optional) boolean + + drop: boolean, optional (default=False) If True the internal column name will be dropped from the - DataFrame. Defaults to False. + DataFrame. + Returns --------- df : cudf.DataFrame or dask_cudf.DataFrame @@ -656,6 +668,7 @@ def unrenumber(self, df, column_name, preserve_order=False, n-1 with a suffix of _column_name. Note that this function does not guarantee order or partitioning in multi-GPU mode. + Parameters ---------- df: cudf.DataFrame or dask_cudf.DataFrame diff --git a/python/cugraph/cugraph/utilities/utils.py b/python/cugraph/cugraph/utilities/utils.py index 49647e3101e..332e09a545a 100644 --- a/python/cugraph/cugraph/utilities/utils.py +++ b/python/cugraph/cugraph/utilities/utils.py @@ -446,3 +446,33 @@ def import_optional(mod, default_mod_class=MissingModule): return importlib.import_module(mod) except ModuleNotFoundError: return default_mod_class(mod_name=mod) + + +def create_random_bipartite(v1, v2, size, dtype): + # Creates a full bipartite graph + import numpy as np + from cugraph.structure import Graph + + df1 = cudf.DataFrame() + df1['src'] = cudf.Series(range(0, v1, 1)) + df1['key'] = 1 + + df2 = cudf.DataFrame() + df2['dst'] = cudf.Series(range(v1, v1+v2, 1)) + df2['key'] = 1 + + edges = df1.merge(df2, on='key')[['src', 'dst']] + edges = edges.sort_values(['src', 'dst']).reset_index() + + # Generate edge weights + a = np.random.randint(1, high=size, size=(v1, v2)).astype(dtype) + edges['weight'] = a.flatten() + + g = Graph() + g.from_cudf_edgelist(edges, + source='src', + destination='dst', + edge_attr='weight', + renumber=False) + + return df1['src'], g, a