rapidsai · rapids-bot · Jun 2, 2021 · May 3, 2021 · May 4, 2021 · May 4, 2021
@@ -451,8 +451,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "pr_df.rename(columns={'pagerank': 'weight'}, inplace=True)",
     "# Call weighted Jaccard using the Pagerank scores as weights:\n",
-    "wdf = cugraph.jaccard_w(G, pr_df['pagerank'])"
+    "wdf = cugraph.jaccard_w(G, pr_df)"
    ]
   },
   {

@@ -190,8 +190,8 @@ def analyzeClustering_modularity(G, n_clusters, clustering,
         Specifies the number of clusters in the given clustering
     clustering : cudf.DataFrame
         The cluster assignment to analyze.
-    vertex_col_name : str
-        The name of the column in the clustering dataframe identifying
+    vertex_col_name : str or list of str
+        The names of the column in the clustering dataframe identifying
         the external vertex id
     cluster_col_name : str
         The name of the column in the clustering dataframe identifying
@@ -213,8 +213,10 @@ def analyzeClustering_modularity(G, n_clusters, clustering,
     >>> df = cugraph.spectralBalancedCutClustering(G, 5)
     >>> score = cugraph.analyzeClustering_modularity(G, 5, df)
     """
-
-    if type(vertex_col_name) is not str:
+    if type(vertex_col_name) is list:
+        if not all(isinstance(name, str) for name in vertex_col_name):
+            raise Exception("vertex_col_name must be list of string")
+    elif type(vertex_col_name) is not str:
         raise Exception("vertex_col_name must be a string")
 
     if type(cluster_col_name) is not str:
@@ -224,11 +226,11 @@ def analyzeClustering_modularity(G, n_clusters, clustering,
 
     if G.renumbered:
         clustering = G.add_internal_vertex_id(clustering,
-                                              vertex_col_name,
+                                              'vertex',
                                               vertex_col_name,
                                               drop=True)
 
-    clustering = clustering.sort_values(vertex_col_name)
+    clustering = clustering.sort_values('vertex')
 
     score = spectral_clustering_wrapper.analyzeClustering_modularity(
         G, n_clusters, clustering[cluster_col_name]
@@ -277,8 +279,10 @@ def analyzeClustering_edge_cut(G, n_clusters, clustering,
     >>> df = cugraph.spectralBalancedCutClustering(G, 5)
     >>> score = cugraph.analyzeClustering_edge_cut(G, 5, df)
     """
-
-    if type(vertex_col_name) is not str:
+    if type(vertex_col_name) is list:
+        if not all(isinstance(name, str) for name in vertex_col_name):
+            raise Exception("vertex_col_name must be list of string")
+    elif type(vertex_col_name) is not str:
         raise Exception("vertex_col_name must be a string")
 
     if type(cluster_col_name) is not str:
@@ -288,11 +292,11 @@ def analyzeClustering_edge_cut(G, n_clusters, clustering,
 
     if G.renumbered:
         clustering = G.add_internal_vertex_id(clustering,
-                                              vertex_col_name,
+                                              'vertex',
                                               vertex_col_name,
                                               drop=True)
 
-    clustering = clustering.sort_values(vertex_col_name).reset_index(drop=True)
+    clustering = clustering.sort_values('vertex').reset_index(drop=True)
 
     score = spectral_clustering_wrapper.analyzeClustering_edge_cut(
         G, n_clusters, clustering[cluster_col_name]
@@ -339,20 +343,22 @@ def analyzeClustering_ratio_cut(G, n_clusters, clustering,
     >>> score = cugraph.analyzeClustering_ratio_cut(G, 5, df,
     >>>   'vertex', 'cluster')
     """
-
-    if type(vertex_col_name) is not str:
+    if type(vertex_col_name) is list:
+        if not all(isinstance(name, str) for name in vertex_col_name):
+            raise Exception("vertex_col_name must be list of string")
+    elif type(vertex_col_name) is not str:
         raise Exception("vertex_col_name must be a string")
 
     if type(cluster_col_name) is not str:
         raise Exception("cluster_col_name must be a string")
 
     if G.renumbered:
         clustering = G.add_internal_vertex_id(clustering,
-                                              vertex_col_name,
+                                              'vertex',
                                               vertex_col_name,
                                               drop=True)
 
-    clustering = clustering.sort_values(vertex_col_name)
+    clustering = clustering.sort_values('vertex')
 
     score = spectral_clustering_wrapper.analyzeClustering_ratio_cut(
         G, n_clusters, clustering[cluster_col_name]

@@ -12,7 +12,6 @@
 # limitations under the License.
 
 from cugraph.layout import force_atlas2_wrapper
-from cugraph.structure.graph_classes import null_check
 
 
 def force_atlas2(
@@ -109,13 +108,14 @@ def on_train_end(self, positions):
     """
 
     if pos_list is not None:
-        null_check(pos_list["vertex"])
-        null_check(pos_list["x"])
-        null_check(pos_list["y"])
         if input_graph.renumbered is True:
+            if input_graph.vertex_column_size() > 1:
+                cols = pos_list.columns[:-2].to_list()
+            else:
+                cols = 'vertex'
             pos_list = input_graph.add_internal_vertex_id(pos_list,
                                                           "vertex",
-                                                          "vertex")
+                                                          cols)
 
     if prevent_overlapping:
         raise Exception("Feature not supported")

@@ -12,7 +12,6 @@
 # limitations under the License.
 
 from cugraph.link_analysis import pagerank_wrapper
-from cugraph.structure.graph_classes import null_check
 import cugraph
 
 
@@ -67,6 +66,10 @@ def pagerank(
             Subset of vertices of graph for initial guess for pagerank values
         nstart['values'] : cudf.Series
             Pagerank values for vertices
+    weight: str
+        The attribute column to be used as edge weights if Graph is a NetworkX
+        Graph. This parameter is here for NetworkX compatibility and is ignored
+        in case of a cugraph.Graph
     dangling : dict
         This parameter is here for NetworkX compatibility and ignored
 
@@ -94,17 +97,23 @@ def pagerank(
     G, isNx = cugraph.utilities.check_nx_graph(G, weight)
 
     if personalization is not None:
-        null_check(personalization["vertex"])
-        null_check(personalization["values"])
         if G.renumbered is True:
+            if len(G.renumber_map.implementation.col_names) > 1:
+                cols = personalization.columns[:-1].to_list()
+            else:
+                cols = 'vertex'
             personalization = G.add_internal_vertex_id(
-                personalization, "vertex", "vertex"
+                personalization, "vertex", cols
             )
 
     if nstart is not None:
         if G.renumbered is True:
+            if len(G.renumber_map.implementation.col_names) > 1:
+                cols = nstart.columns[:-1].to_list()
+            else:
+                cols = 'vertex'
             nstart = G.add_internal_vertex_id(
-                nstart, "vertex", "vertex"
+                nstart, "vertex", cols
             )
 
     df = pagerank_wrapper.pagerank(

@@ -13,10 +13,11 @@
 
 import pandas as pd
 import cudf
-from cugraph.structure.graph_classes import Graph, null_check
+from cugraph.structure.graph_classes import Graph
 from cugraph.link_prediction import jaccard_wrapper
 from cugraph.utilities import check_nx_graph
 from cugraph.utilities import df_edge_score_to_dictionary
+from cugraph.utilities import renumber_vertex_pair
 
 
 def jaccard(input_graph, vertex_pair=None):
@@ -108,15 +109,8 @@ def jaccard(input_graph, vertex_pair=None):
     if type(input_graph) is not Graph:
         raise Exception("input graph must be undirected")
 
-    # FIXME: Add support for multi-column vertices
     if type(vertex_pair) == cudf.DataFrame:
-        for col in vertex_pair.columns:
-            null_check(vertex_pair[col])
-            if input_graph.renumbered:
-                vertex_pair = input_graph.add_internal_vertex_id(
-                    vertex_pair, col, col
-                )
-
+        vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
     elif vertex_pair is None:
         pass
     else:

@@ -13,10 +13,10 @@
 
 import pandas as pd
 from cugraph.link_prediction import overlap_wrapper
-from cugraph.structure.graph_classes import null_check
 import cudf
 from cugraph.utilities import check_nx_graph
 from cugraph.utilities import df_edge_score_to_dictionary
+from cugraph.utilities import renumber_vertex_pair
 
 
 def overlap_coefficient(G, ebunch=None):
@@ -91,14 +91,8 @@ def overlap(input_graph, vertex_pair=None):
     >>> df = cugraph.overlap(G)
     """
 
-    # FIXME: Add support for multi-column vertices
     if type(vertex_pair) == cudf.DataFrame:
-        for col in vertex_pair.columns:
-            null_check(vertex_pair[col])
-            if input_graph.renumbered:
-                vertex_pair = input_graph.add_internal_vertex_id(
-                    vertex_pair, col, col,
-                )
+        vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
     elif vertex_pair is None:
         pass
     else:

@@ -68,8 +68,9 @@ def overlap(input_graph, weights_arr=None, vertex_pair=None):
         df = cudf.DataFrame()
         df['overlap_coeff'] = result
 
-        first = vertex_pair['first']
-        second = vertex_pair['second']
+        cols = vertex_pair.columns.to_list()
+        first = vertex_pair[cols[0]]
+        second = vertex_pair[cols[1]]
 
         # FIXME: multi column support
         df['source'] = first

@@ -11,9 +11,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from cugraph.structure.graph_classes import Graph, null_check
+from cugraph.structure.graph_classes import Graph
 from cugraph.link_prediction import jaccard_wrapper
 import cudf
+import numpy as np
+from cugraph.utilities import renumber_vertex_pair
 
 
 def jaccard_w(input_graph, weights, vertex_pair=None):
@@ -35,8 +37,15 @@ def jaccard_w(input_graph, weights, vertex_pair=None):
         as an edge list (edge weights are not used for this algorithm). The
         adjacency list will be computed if not already present.
 
-    weights : cudf.Series
+    weights : cudf.DataFrame
         Specifies the weights to be used for each vertex.
+        Vertex should be represented by multiple columns for multi-column
+        vertices.
+
+        weights['vertex'] : cudf.Series
+            Contains the vertex identifiers
+        weights['weight'] : cudf.Series
+            Contains the weights of vertices
 
     vertex_pair : cudf.DataFrame
         A GPU dataframe consisting of two columns representing pairs of
@@ -70,20 +79,28 @@ def jaccard_w(input_graph, weights, vertex_pair=None):
     if type(input_graph) is not Graph:
         raise Exception("input graph must be undirected")
 
-    # FIXME: Add support for multi-column vertices
     if type(vertex_pair) == cudf.DataFrame:
-        for col in vertex_pair.columns:
-            null_check(vertex_pair[col])
-            if input_graph.renumbered:
-                vertex_pair = input_graph.add_internal_vertex_id(
-                    vertex_pair, col, col,
-                )
+        vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
     elif vertex_pair is None:
         pass
     else:
         raise ValueError("vertex_pair must be a cudf dataframe")
 
-    df = jaccard_wrapper.jaccard(input_graph, weights, vertex_pair)
+    if input_graph.renumbered:
+        vertex_size = input_graph.vertex_column_size()
+        if vertex_size == 1:
+            weights = input_graph.add_internal_vertex_id(
+                weights, 'vertex', 'vertex'
+            )
+        else:
+            cols = weights.columns[:vertex_size].to_list()
+            weights = input_graph.add_internal_vertex_id(
+                weights, 'vertex', cols
+            )
+    jaccard_weights = cudf.Series(np.ones(len(weights)))
+    for i in range(len(weights)):
+        jaccard_weights[weights['vertex'].iloc[i]] = weights['weight'].iloc[i]
+    df = jaccard_wrapper.jaccard(input_graph, jaccard_weights, vertex_pair)
 
     if input_graph.renumbered:
         df = input_graph.unrenumber(df, "source")

@@ -12,8 +12,9 @@
 # limitations under the License.
 
 from cugraph.link_prediction import overlap_wrapper
-from cugraph.structure.graph_classes import null_check
 import cudf
+import numpy as np
+from cugraph.utilities import renumber_vertex_pair
 
 
 def overlap_w(input_graph, weights, vertex_pair=None):
@@ -67,20 +68,33 @@ def overlap_w(input_graph, weights, vertex_pair=None):
     >>> G.from_cudf_edgelist(M, source='0', destination='1')
     >>> df = cugraph.overlap_w(G, M[2])
     """
-    # FIXME: Add support for multi-column vertices
+
     if type(vertex_pair) == cudf.DataFrame:
-        for col in vertex_pair.columns:
-            null_check(vertex_pair[col])
-            if input_graph.renumbered:
-                vertex_pair = input_graph.add_internal_vertex_id(
-                    vertex_pair, col, col
-                )
+        vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
     elif vertex_pair is None:
         pass
     else:
         raise ValueError("vertex_pair must be a cudf dataframe")
 
-    df = overlap_wrapper.overlap(input_graph, weights, vertex_pair)
+    if input_graph.renumbered:
+        vertex_size = input_graph.vertex_column_size()
+        if vertex_size == 1:
+            weights = input_graph.add_internal_vertex_id(
+                weights, 'vertex', 'vertex'
+            )
+        else:
+            cols = weights.columns[:vertex_size].to_list()
+            weights = input_graph.add_internal_vertex_id(
+                weights, 'vertex', cols
+            )
+
+    overlap_weights = cudf.Series(np.ones(len(weights)))
+    for i in range(len(weights)):
+        overlap_weights[weights['vertex'].iloc[i]] = weights['weight'].iloc[i]
+
+    overlap_weights = overlap_weights.astype('float32')
+
+    df = overlap_wrapper.overlap(input_graph, overlap_weights, vertex_pair)
 
     if input_graph.renumbered:
         df = input_graph.unrenumber(df, "source")

@@ -472,3 +472,9 @@ def compute_renumber_edge_list(self, transposed=False):
             self.edgelist = self.EdgeList(renumbered_ddf)
             self.renumber_map = number_map
             self.properties.store_transposed = transposed
+
+    def vertex_column_size(self):
+        if self.properties.renumbered:
+            return self.renumber_map.vertex_column_size()
+        else:
+            return 1
@@ -823,3 +823,9 @@ def neighbors(self, n):
             return self.renumber_map.from_internal_vertex_id(neighbors)["0"]
         else:
             return neighbors
+
+    def vertex_column_size(self):
+        if self.properties.renumbered:
+            return self.renumber_map.vertex_column_size()
+        else:
+            return 1
@@ -671,3 +671,6 @@ def unrenumber(self, df, column_name, preserve_order=False,
             return df, col_names
         else:
             return df
+
+    def vertex_column_size(self):
+        return len(self.implementation.col_names)