From 77bf63b076ee6f10fe4b599acd30406264851f49 Mon Sep 17 00:00:00 2001
From: Rick Ratzel <rratzel@nvidia.com>
Date: Fri, 28 Jan 2022 00:55:50 -0600
Subject: [PATCH 1/6] Updates to experimental_warning_wrapper() to better
 handle classes by wrapping in a class wrapper instead of a function (where
 the return type is no longer a class type, and isinstance() unexpectedly
 fails). Added tests.

---
 python/cugraph/cugraph/utilities/api_tools.py | 77 +++++++++++++------
 python/pylibcugraph/pylibcugraph/graphs.pxd   |  9 ++-
 python/pylibcugraph/pylibcugraph/graphs.pyx   |  2 +-
 python/pylibcugraph/pylibcugraph/pagerank.pyx |  4 +-
 python/pylibcugraph/pylibcugraph/sssp.pyx     |  4 +-
 .../pylibcugraph/tests/test_utils.py          | 53 +++++++++++++
 .../pylibcugraph/utilities/api_tools.py       | 76 ++++++++++++------
 7 files changed, 170 insertions(+), 55 deletions(-)
 create mode 100644 python/pylibcugraph/pylibcugraph/tests/test_utils.py

diff --git a/python/cugraph/cugraph/utilities/api_tools.py b/python/cugraph/cugraph/utilities/api_tools.py
index e0281d86e5c..eea347a0b40 100644
--- a/python/cugraph/cugraph/utilities/api_tools.py
+++ b/python/cugraph/cugraph/utilities/api_tools.py
@@ -14,27 +14,34 @@
 import functools
 import warnings
 import inspect
+import types
 
 experimental_prefix = "EXPERIMENTAL"
 
+# FIXME: this utility is copied from pylibcugraph. Remove this copy and have
+# cugraph code call the version in pylibcugraph.
 
-def experimental_warning_wrapper(obj, make_public_name=True):
+def experimental_warning_wrapper(obj):
     """
-    Return a callable obj wrapped in a callable the prints a warning about it
-    being "experimental" (an object that is in the public API but subject to
-    change or removal) prior to calling it and returning its value.
-
-    If make_public_name is False, the object's name used in the warning message
-    is left unmodified. If True (default), any leading __ and/or EXPERIMENTAL
-    string are removed from the name used in warning messages. This allows an
-    object to be named with a "private" name in the public API so it can remain
-    hidden while it is still experimental, but have a public name within the
-    experimental namespace so it can be easily discovered and used.
+    Return a callable obj wrapped in a callable the prints a warning about
+    it being "experimental" (an object that is in the public API but subject
+    to change or removal) prior to calling it and returning its value.
+
+    The object's name used in the warning message also has any leading __
+    and/or EXPERIMENTAL string are removed from the name used in warning
+    messages. This allows an object to be named with a "private" name in the
+    public API so it can remain hidden while it is still experimental, but
+    have a public name within the experimental namespace so it can be easily
+    discovered and used.
     """
-    obj_name = obj.__qualname__
-    if make_public_name:
-        obj_name = obj_name.lstrip(experimental_prefix)
-        obj_name = obj_name.lstrip("__")
+    obj_type = type(obj)
+    if obj_type not in [type, types.FunctionType, types.BuiltinFunctionType]:
+        raise TypeError("obj must be a class or a function type, got "
+                        f"{obj_type}")
+
+    obj_name = obj.__name__
+    obj_name = obj_name.lstrip(experimental_prefix)
+    obj_name = obj_name.lstrip("__")
 
     # Assume the caller of this function is the module containing the
     # experimental obj and try to get its namespace name. Default to no
@@ -42,17 +49,41 @@ def experimental_warning_wrapper(obj, make_public_name=True):
     call_stack = inspect.stack()
     calling_frame = call_stack[1].frame
     ns_name = calling_frame.f_locals.get("__name__")
-    if ns_name is not None:
-        ns_name += "."
-    else:
-        ns_name = ""
+    dot = "." if ns_name is not None else ""
+
+    warning_msg = (f"{ns_name}{dot}{obj_name} is experimental and will "
+                   "change or be removed in a future release.")
+
+    # If obj is a class, create a wrapper class which 1) inherits from the
+    # incoming class, and 2) has a ctor that simply prints the warning and
+    # assigns self to an instance of the incoming class. Ideally a wrapper
+    # around __init__ would be created and assigned to the class as the new
+    # __init__, but #2 is necessary since assigning attributes cannot be done to
+    # a builtin type (such as what a class defined in cython produces).
+    if obj_type is type:
+        class WarningWrapperClass(obj):
+            def __init__(self, *args, **kwargs):
+                warnings.warn(warning_msg, PendingDeprecationWarning)
+                # cython classes do not have a standard __init__, but assigning
+                # to self works instead.
+                if type(obj.__init__) is types.FunctionType:
+                    super(WarningWrapperClass, self).__init__(*args, **kwargs)
+                else:
+                    self = obj(*args, **kwargs)
+        WarningWrapperClass.__module__ = ns_name
+        WarningWrapperClass.__qualname__ = obj_name
+        WarningWrapperClass.__name__ = obj_name
 
-    warning_msg = (f"{ns_name}{obj_name} is experimental and will change "
-                   "or be removed in a future release.")
+        return WarningWrapperClass
 
+    # If this point is reached, the incoming obj is a function so wrap it and
+    # return the wrapper (which is also a function type).
     @functools.wraps(obj)
-    def callable_warning_wrapper(*args, **kwargs):
+    def warning_wrapper_function(*args, **kwargs):
         warnings.warn(warning_msg, PendingDeprecationWarning)
         return obj(*args, **kwargs)
+    warning_wrapper_function.__module__ = ns_name
+    warning_wrapper_function.__qualname__ = obj_name
+    warning_wrapper_function.__name__ = obj_name
 
-    return callable_warning_wrapper
+    return warning_wrapper_function
diff --git a/python/pylibcugraph/pylibcugraph/graphs.pxd b/python/pylibcugraph/pylibcugraph/graphs.pxd
index 9da256f9928..63cbb01f547 100644
--- a/python/pylibcugraph/pylibcugraph/graphs.pxd
+++ b/python/pylibcugraph/pylibcugraph/graphs.pxd
@@ -19,11 +19,14 @@ from pylibcugraph._cugraph_c.graph cimport (
 )
 
 
-cdef class EXPERIMENTAL__Graph:
+# Base class allowing functions to accept either SGGraph or MGGraph
+# This is not visible in python
+cdef class _GPUGraph:
     cdef cugraph_graph_t* c_graph_ptr
 
-cdef class EXPERIMENTAL__SGGraph(EXPERIMENTAL__Graph):
+cdef class EXPERIMENTAL__SGGraph(_GPUGraph):
     pass
 
-# cdef class EXPERIMENTAL__MGGraph(EXPERIMENTAL__Graph):
+# Not yet supported
+# cdef class EXPERIMENTAL__MGGraph(_GPUGraph):
 #     pass
diff --git a/python/pylibcugraph/pylibcugraph/graphs.pyx b/python/pylibcugraph/pylibcugraph/graphs.pyx
index 381191c3e51..e4c60dc125c 100644
--- a/python/pylibcugraph/pylibcugraph/graphs.pyx
+++ b/python/pylibcugraph/pylibcugraph/graphs.pyx
@@ -48,7 +48,7 @@ from pylibcugraph.utils cimport (
 )
 
 
-cdef class EXPERIMENTAL__SGGraph(EXPERIMENTAL__Graph):
+cdef class EXPERIMENTAL__SGGraph(_GPUGraph):
     """
     RAII-stye Graph class for use with single-GPU APIs that manages the
     individual create/free calls and the corresponding cugraph_graph_t pointer.
diff --git a/python/pylibcugraph/pylibcugraph/pagerank.pyx b/python/pylibcugraph/pylibcugraph/pagerank.pyx
index a1b5a704693..b2aca789c15 100644
--- a/python/pylibcugraph/pylibcugraph/pagerank.pyx
+++ b/python/pylibcugraph/pylibcugraph/pagerank.pyx
@@ -48,7 +48,7 @@ from pylibcugraph.resource_handle cimport (
     EXPERIMENTAL__ResourceHandle,
 )
 from pylibcugraph.graphs cimport (
-    EXPERIMENTAL__Graph,
+    _GPUGraph,
 )
 from pylibcugraph.utils cimport (
     assert_success,
@@ -58,7 +58,7 @@ from pylibcugraph.utils cimport (
 
 
 def EXPERIMENTAL__pagerank(EXPERIMENTAL__ResourceHandle resource_handle,
-                           EXPERIMENTAL__Graph graph,
+                           _GPUGraph graph,
                            precomputed_vertex_out_weight_sums,
                            double alpha,
                            double epsilon,
diff --git a/python/pylibcugraph/pylibcugraph/sssp.pyx b/python/pylibcugraph/pylibcugraph/sssp.pyx
index af3eed36186..1d8aa7e5937 100644
--- a/python/pylibcugraph/pylibcugraph/sssp.pyx
+++ b/python/pylibcugraph/pylibcugraph/sssp.pyx
@@ -49,7 +49,7 @@ from pylibcugraph.resource_handle cimport (
     EXPERIMENTAL__ResourceHandle,
 )
 from pylibcugraph.graphs cimport (
-    EXPERIMENTAL__Graph,
+    _GPUGraph,
 )
 from pylibcugraph.utils cimport (
     assert_success,
@@ -58,7 +58,7 @@ from pylibcugraph.utils cimport (
 
 
 def EXPERIMENTAL__sssp(EXPERIMENTAL__ResourceHandle resource_handle,
-                       EXPERIMENTAL__Graph graph,
+                       _GPUGraph graph,
                        size_t source,
                        double cutoff,
                        bool_t compute_predecessors,
diff --git a/python/pylibcugraph/pylibcugraph/tests/test_utils.py b/python/pylibcugraph/pylibcugraph/tests/test_utils.py
new file mode 100644
index 00000000000..3156daa80ba
--- /dev/null
+++ b/python/pylibcugraph/pylibcugraph/tests/test_utils.py
@@ -0,0 +1,53 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import types
+
+import pytest
+
+
+def test_experimental_warning_wrapper_for_funcs():
+    from pylibcugraph.utilities.api_tools import experimental_warning_wrapper
+
+    def EXPERIMENTAL__func(a, b):
+        return a - b
+
+    exp_func = experimental_warning_wrapper(EXPERIMENTAL__func)
+
+    with pytest.warns(PendingDeprecationWarning):
+        assert 1 == exp_func(3, 2)
+
+
+def test_experimental_warning_wrapper_for_classes():
+    from pylibcugraph.utilities.api_tools import experimental_warning_wrapper
+
+    class EXPERIMENTAL__klass:
+        def __init__(self, a, b):
+            self.r = a - b
+
+    exp_klass = experimental_warning_wrapper(EXPERIMENTAL__klass)
+
+    with pytest.warns(PendingDeprecationWarning):
+        k = exp_klass(3, 2)
+        assert 1 == k.r
+        assert isinstance(k, exp_klass)
+        assert k.__class__.__name__ == "klass"
+
+
+def test_experimental_warning_wrapper_for_unsupported_type():
+    from pylibcugraph.utilities.api_tools import experimental_warning_wrapper
+
+    # A module type should not be allowed to be wrapped
+    mod = types.ModuleType("modname")
+    with pytest.raises(TypeError):
+        exp_mod = experimental_warning_wrapper(mod)
diff --git a/python/pylibcugraph/pylibcugraph/utilities/api_tools.py b/python/pylibcugraph/pylibcugraph/utilities/api_tools.py
index e0281d86e5c..f581d95c84b 100644
--- a/python/pylibcugraph/pylibcugraph/utilities/api_tools.py
+++ b/python/pylibcugraph/pylibcugraph/utilities/api_tools.py
@@ -14,27 +14,31 @@
 import functools
 import warnings
 import inspect
+import types
 
 experimental_prefix = "EXPERIMENTAL"
 
-
-def experimental_warning_wrapper(obj, make_public_name=True):
+def experimental_warning_wrapper(obj):
     """
-    Return a callable obj wrapped in a callable the prints a warning about it
-    being "experimental" (an object that is in the public API but subject to
-    change or removal) prior to calling it and returning its value.
-
-    If make_public_name is False, the object's name used in the warning message
-    is left unmodified. If True (default), any leading __ and/or EXPERIMENTAL
-    string are removed from the name used in warning messages. This allows an
-    object to be named with a "private" name in the public API so it can remain
-    hidden while it is still experimental, but have a public name within the
-    experimental namespace so it can be easily discovered and used.
+    Return a callable obj wrapped in a callable the prints a warning about
+    it being "experimental" (an object that is in the public API but subject
+    to change or removal) prior to calling it and returning its value.
+
+    The object's name used in the warning message also has any leading __
+    and/or EXPERIMENTAL string are removed from the name used in warning
+    messages. This allows an object to be named with a "private" name in the
+    public API so it can remain hidden while it is still experimental, but
+    have a public name within the experimental namespace so it can be easily
+    discovered and used.
     """
-    obj_name = obj.__qualname__
-    if make_public_name:
-        obj_name = obj_name.lstrip(experimental_prefix)
-        obj_name = obj_name.lstrip("__")
+    obj_type = type(obj)
+    if obj_type not in [type, types.FunctionType, types.BuiltinFunctionType]:
+        raise TypeError("obj must be a class or a function type, got "
+                        f"{obj_type}")
+
+    obj_name = obj.__name__
+    obj_name = obj_name.lstrip(experimental_prefix)
+    obj_name = obj_name.lstrip("__")
 
     # Assume the caller of this function is the module containing the
     # experimental obj and try to get its namespace name. Default to no
@@ -42,17 +46,41 @@ def experimental_warning_wrapper(obj, make_public_name=True):
     call_stack = inspect.stack()
     calling_frame = call_stack[1].frame
     ns_name = calling_frame.f_locals.get("__name__")
-    if ns_name is not None:
-        ns_name += "."
-    else:
-        ns_name = ""
+    dot = "." if ns_name is not None else ""
+
+    warning_msg = (f"{ns_name}{dot}{obj_name} is experimental and will "
+                   "change or be removed in a future release.")
+
+    # If obj is a class, create a wrapper class which 1) inherits from the
+    # incoming class, and 2) has a ctor that simply prints the warning and
+    # assigns self to an instance of the incoming class. Ideally a wrapper
+    # around __init__ would be created and assigned to the class as the new
+    # __init__, but #2 is necessary since assigning attributes cannot be done to
+    # a builtin type (such as what a class defined in cython produces).
+    if obj_type is type:
+        class WarningWrapperClass(obj):
+            def __init__(self, *args, **kwargs):
+                warnings.warn(warning_msg, PendingDeprecationWarning)
+                # cython classes do not have a standard __init__, but assigning
+                # to self works instead.
+                if type(obj.__init__) is types.FunctionType:
+                    super(WarningWrapperClass, self).__init__(*args, **kwargs)
+                else:
+                    self = obj(*args, **kwargs)
+        WarningWrapperClass.__module__ = ns_name
+        WarningWrapperClass.__qualname__ = obj_name
+        WarningWrapperClass.__name__ = obj_name
 
-    warning_msg = (f"{ns_name}{obj_name} is experimental and will change "
-                   "or be removed in a future release.")
+        return WarningWrapperClass
 
+    # If this point is reached, the incoming obj is a function so wrap it and
+    # return the wrapper (which is also a function type).
     @functools.wraps(obj)
-    def callable_warning_wrapper(*args, **kwargs):
+    def warning_wrapper_function(*args, **kwargs):
         warnings.warn(warning_msg, PendingDeprecationWarning)
         return obj(*args, **kwargs)
+    warning_wrapper_function.__module__ = ns_name
+    warning_wrapper_function.__qualname__ = obj_name
+    warning_wrapper_function.__name__ = obj_name
 
-    return callable_warning_wrapper
+    return warning_wrapper_function

From 7c5b5e2acba593222320cabd0f51b6ac0a5c42c1 Mon Sep 17 00:00:00 2001
From: Rick Ratzel <rratzel@nvidia.com>
Date: Fri, 28 Jan 2022 09:09:50 -0600
Subject: [PATCH 2/6] Updated comments, flake8 fixes.

---
 python/cugraph/cugraph/utilities/api_tools.py | 22 +++++++++++--------
 .../pylibcugraph/tests/test_utils.py          |  2 +-
 .../pylibcugraph/utilities/api_tools.py       | 22 +++++++++++--------
 3 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/python/cugraph/cugraph/utilities/api_tools.py b/python/cugraph/cugraph/utilities/api_tools.py
index eea347a0b40..26e3b0a1d62 100644
--- a/python/cugraph/cugraph/utilities/api_tools.py
+++ b/python/cugraph/cugraph/utilities/api_tools.py
@@ -18,6 +18,7 @@
 
 experimental_prefix = "EXPERIMENTAL"
 
+
 # FIXME: this utility is copied from pylibcugraph. Remove this copy and have
 # cugraph code call the version in pylibcugraph.
 
@@ -56,17 +57,19 @@ def experimental_warning_wrapper(obj):
 
     # If obj is a class, create a wrapper class which 1) inherits from the
     # incoming class, and 2) has a ctor that simply prints the warning and
-    # assigns self to an instance of the incoming class. Ideally a wrapper
-    # around __init__ would be created and assigned to the class as the new
-    # __init__, but #2 is necessary since assigning attributes cannot be done to
-    # a builtin type (such as what a class defined in cython produces).
+    # calls the base class ctor. A wrapper class is needed so the new type
+    # matches the incoming type.
+    # Ideally a wrapper function would be created and assigned to the class as
+    # the new __init__, but #2 is necessary since assigning attributes cannot
+    # be done to a builtin type (such as a class defined in cython).
     if obj_type is type:
         class WarningWrapperClass(obj):
             def __init__(self, *args, **kwargs):
                 warnings.warn(warning_msg, PendingDeprecationWarning)
-                # cython classes do not have a standard __init__, but assigning
-                # to self works instead.
-                if type(obj.__init__) is types.FunctionType:
+                # call base class __init__ for python, but cython classes do
+                # not have a standard callable __init__ and assigning to self
+                # works instead.
+                if isinstance(obj.__init__, types.FunctionType):
                     super(WarningWrapperClass, self).__init__(*args, **kwargs)
                 else:
                     self = obj(*args, **kwargs)
@@ -76,8 +79,9 @@ def __init__(self, *args, **kwargs):
 
         return WarningWrapperClass
 
-    # If this point is reached, the incoming obj is a function so wrap it and
-    # return the wrapper (which is also a function type).
+    # If this point is reached, the incoming obj is a function so simply wrap
+    # it and return the wrapper. Since the wrapper is a function type, it will
+    # match the incoming obj type.
     @functools.wraps(obj)
     def warning_wrapper_function(*args, **kwargs):
         warnings.warn(warning_msg, PendingDeprecationWarning)
diff --git a/python/pylibcugraph/pylibcugraph/tests/test_utils.py b/python/pylibcugraph/pylibcugraph/tests/test_utils.py
index 3156daa80ba..036a62b9c1e 100644
--- a/python/pylibcugraph/pylibcugraph/tests/test_utils.py
+++ b/python/pylibcugraph/pylibcugraph/tests/test_utils.py
@@ -50,4 +50,4 @@ def test_experimental_warning_wrapper_for_unsupported_type():
     # A module type should not be allowed to be wrapped
     mod = types.ModuleType("modname")
     with pytest.raises(TypeError):
-        exp_mod = experimental_warning_wrapper(mod)
+        experimental_warning_wrapper(mod)
diff --git a/python/pylibcugraph/pylibcugraph/utilities/api_tools.py b/python/pylibcugraph/pylibcugraph/utilities/api_tools.py
index f581d95c84b..e869e766c11 100644
--- a/python/pylibcugraph/pylibcugraph/utilities/api_tools.py
+++ b/python/pylibcugraph/pylibcugraph/utilities/api_tools.py
@@ -18,6 +18,7 @@
 
 experimental_prefix = "EXPERIMENTAL"
 
+
 def experimental_warning_wrapper(obj):
     """
     Return a callable obj wrapped in a callable the prints a warning about
@@ -53,17 +54,19 @@ def experimental_warning_wrapper(obj):
 
     # If obj is a class, create a wrapper class which 1) inherits from the
     # incoming class, and 2) has a ctor that simply prints the warning and
-    # assigns self to an instance of the incoming class. Ideally a wrapper
-    # around __init__ would be created and assigned to the class as the new
-    # __init__, but #2 is necessary since assigning attributes cannot be done to
-    # a builtin type (such as what a class defined in cython produces).
+    # calls the base class ctor. A wrapper class is needed so the new type
+    # matches the incoming type.
+    # Ideally a wrapper function would be created and assigned to the class as
+    # the new __init__, but #2 is necessary since assigning attributes cannot
+    # be done to a builtin type (such as a class defined in cython).
     if obj_type is type:
         class WarningWrapperClass(obj):
             def __init__(self, *args, **kwargs):
                 warnings.warn(warning_msg, PendingDeprecationWarning)
-                # cython classes do not have a standard __init__, but assigning
-                # to self works instead.
-                if type(obj.__init__) is types.FunctionType:
+                # call base class __init__ for python, but cython classes do
+                # not have a standard callable __init__ and assigning to self
+                # works instead.
+                if isinstance(obj.__init__, types.FunctionType):
                     super(WarningWrapperClass, self).__init__(*args, **kwargs)
                 else:
                     self = obj(*args, **kwargs)
@@ -73,8 +76,9 @@ def __init__(self, *args, **kwargs):
 
         return WarningWrapperClass
 
-    # If this point is reached, the incoming obj is a function so wrap it and
-    # return the wrapper (which is also a function type).
+    # If this point is reached, the incoming obj is a function so simply wrap
+    # it and return the wrapper. Since the wrapper is a function type, it will
+    # match the incoming obj type.
     @functools.wraps(obj)
     def warning_wrapper_function(*args, **kwargs):
         warnings.warn(warning_msg, PendingDeprecationWarning)

From 9c270fb7cf317bc2c2962dac6a81721a40cf14ef Mon Sep 17 00:00:00 2001
From: Rick Ratzel <rratzel@nvidia.com>
Date: Wed, 2 Feb 2022 13:48:36 -0600
Subject: [PATCH 3/6] Made PropertyGraph column name constants public for use
 by clients, changed num_vertices property impl for better performance, added
 get_vertices(), allow a None type_name for add_*_data(), updated docstrings,
 changed the edge_data added to Graphs to be a DataFrame for efficiency, added
 tests and benchmarks, refactored and added test fixtures, updated pytest.ini
 for new benchmarks.

---
 .../cugraph/structure/property_graph.py       | 255 +++++-----
 .../cugraph/tests/test_property_graph.py      | 457 +++++++++++++++---
 python/cugraph/pytest.ini                     |  28 ++
 3 files changed, 567 insertions(+), 173 deletions(-)

diff --git a/python/cugraph/cugraph/structure/property_graph.py b/python/cugraph/cugraph/structure/property_graph.py
index 4712b4f1067..14f18aa8f50 100644
--- a/python/cugraph/cugraph/structure/property_graph.py
+++ b/python/cugraph/cugraph/structure/property_graph.py
@@ -11,7 +11,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 import cudf
 
 import cugraph
@@ -59,12 +58,12 @@ class EXPERIMENTAL__PropertyGraph:
     algorithm results with corresponding properties.
     """
     # column name constants used in internal DataFrames
-    __vertex_col_name = "_VERTEX_"
-    __src_col_name = "_SRC_"
-    __dst_col_name = "_DST_"
-    __type_col_name = "_TYPE_"
-    __edge_id_col_name = "_EDGE_ID_"
-    __vertex_id_col_name = "_VERTEX_ID_"
+    vertex_col_name = "_VERTEX_"
+    src_col_name = "_SRC_"
+    dst_col_name = "_DST_"
+    type_col_name = "_TYPE_"
+    edge_id_col_name = "_EDGE_ID_"
+    vertex_id_col_name = "_VERTEX_ID_"
 
     def __init__(self):
         # The dataframe containing the properties for each vertex.
@@ -133,31 +132,24 @@ def __init__(self):
         # incrementing this counter.
         self.__last_edge_id = None
 
+        # Cached property values
+        self.__num_vertices = None
+
     # PropertyGraph read-only attributes
     @property
     def num_vertices(self):
-        # Create a Series of the appropriate type (cudf.Series, pandas.Series,
-        # etc.) based on the type currently in use, then use it to gather all
-        # unique vertices.
-        vpd = self.__vertex_prop_dataframe
-        epd = self.__edge_prop_dataframe
-        if (vpd is None) and (epd is None):
-            return 0
+        if self.__num_vertices is not None:
+            return self.__num_vertices
+
+        self.__num_vertices = 0
+        vert_sers = self.__get_all_vertices_series()
+        if vert_sers:
+            if self.__series_type is cudf.Series:
+                self.__num_vertices = cudf.concat(vert_sers).nunique()
+            else:
+                self.__num_vertices = pd.concat(vert_sers).nunique()
 
-        # Assume __series_type is set if this point reached!
-        verts = self.__series_type(dtype="object")
-        if vpd is not None:
-            verts = verts.append(vpd[self.__vertex_col_name])
-        if epd is not None:
-            # pandas.Series.unique() can return an ndarray, which cannot be
-            # appended to a Series. Always construct an appropriate series_type
-            # from the unique values prior to appending.
-            verts = verts.append(
-                self.__series_type(epd[self.__src_col_name].unique()))
-            verts = verts.append(
-                self.__series_type(epd[self.__dst_col_name].unique()))
-            verts = verts.unique()
-        return len(verts)
+        return self.__num_vertices
 
     @property
     def num_edges(self):
@@ -166,12 +158,19 @@ def num_edges(self):
         else:
             return 0
 
+    @property
+    def edges(self):
+        if self.__edge_prop_dataframe is not None:
+            return self.__edge_prop_dataframe[[self.src_col_name,
+                                               self.dst_col_name]]
+        return None
+
     @property
     def vertex_property_names(self):
         if self.__vertex_prop_dataframe is not None:
             props = list(self.__vertex_prop_dataframe.columns)
-            props.remove(self.__vertex_col_name)
-            props.remove(self.__type_col_name)  # should "type" be removed?
+            props.remove(self.vertex_col_name)
+            props.remove(self.type_col_name)  # should "type" be removed?
             return props
         return []
 
@@ -179,10 +178,10 @@ def vertex_property_names(self):
     def edge_property_names(self):
         if self.__edge_prop_dataframe is not None:
             props = list(self.__edge_prop_dataframe.columns)
-            props.remove(self.__src_col_name)
-            props.remove(self.__dst_col_name)
-            props.remove(self.__edge_id_col_name)
-            props.remove(self.__type_col_name)  # should "type" be removed?
+            props.remove(self.src_col_name)
+            props.remove(self.dst_col_name)
+            props.remove(self.edge_id_col_name)
+            props.remove(self.type_col_name)  # should "type" be removed?
             return props
         return []
 
@@ -195,6 +194,15 @@ def _vertex_prop_dataframe(self):
     def _edge_prop_dataframe(self):
         return self.__edge_prop_dataframe
 
+    def get_vertices(self, selection=None):
+        vert_sers = self.__get_all_vertices_series()
+        if vert_sers:
+            if self.__series_type is cudf.Series:
+                return self.__series_type(cudf.concat(vert_sers).unique())
+            else:
+                return self.__series_type(pd.concat(vert_sers).unique())
+        return self.__series_type()
+
     def add_vertex_data(self,
                         dataframe,
                         vertex_id_column,
@@ -235,7 +243,7 @@ def add_vertex_data(self,
         if vertex_id_column not in dataframe.columns:
             raise ValueError(f"{vertex_id_column} is not a column in "
                              f"dataframe: {dataframe.columns}")
-        if type(type_name) is not str:
+        if (type_name is not None) and not(isinstance(type_name, str)):
             raise TypeError("type_name must be a string, got: "
                             f"{type(type_name)}")
         if property_columns:
@@ -259,9 +267,13 @@ def add_vertex_data(self,
                                 "the PropertyGraph was already initialized "
                                 f"using type {self.__dataframe_type}")
 
+        # Clear the cached value for num_vertices since more could be added in
+        # this method.
+        self.__num_vertices = None
+
         # Initialize the __vertex_prop_dataframe if necessary using the same
         # type as the incoming dataframe.
-        default_vertex_columns = [self.__vertex_col_name, self.__type_col_name]
+        default_vertex_columns = [self.vertex_col_name, self.type_col_name]
         if self.__vertex_prop_dataframe is None:
             self.__vertex_prop_dataframe = \
                 self.__dataframe_type(columns=default_vertex_columns)
@@ -271,7 +283,7 @@ def add_vertex_data(self,
             # https://github.com/rapidsai/cudf/issues/9981)
             self.__update_dataframe_dtypes(
                 self.__vertex_prop_dataframe,
-                {self.__vertex_col_name: dataframe[vertex_id_column].dtype})
+                {self.vertex_col_name: dataframe[vertex_id_column].dtype})
 
         # Ensure that both the predetermined vertex ID column name and vertex
         # type column name are present for proper merging.
@@ -280,9 +292,9 @@ def add_vertex_data(self,
         # columns. The copied DataFrame is then merged (another copy) and then
         # deleted when out-of-scope.
         tmp_df = dataframe.copy(deep=True)
-        tmp_df[self.__vertex_col_name] = tmp_df[vertex_id_column]
+        tmp_df[self.vertex_col_name] = tmp_df[vertex_id_column]
         # FIXME: handle case of a type_name column already being in tmp_df
-        tmp_df[self.__type_col_name] = type_name
+        tmp_df[self.type_col_name] = type_name
 
         if property_columns:
             # all columns
@@ -352,7 +364,7 @@ def add_edge_data(self,
         if invalid_columns:
             raise ValueError("vertex_id_columns contains column(s) not found "
                              f"in dataframe: {list(invalid_columns)}")
-        if type(type_name) is not str:
+        if (type_name is not None) and not(isinstance(type_name, str)):
             raise TypeError("type_name must be a string, got: "
                             f"{type(type_name)}")
         if property_columns:
@@ -376,10 +388,14 @@ def add_edge_data(self,
                                 "the PropertyGraph was already initialized "
                                 f"using type {self.__dataframe_type}")
 
-        default_edge_columns = [self.__src_col_name,
-                                self.__dst_col_name,
-                                self.__edge_id_col_name,
-                                self.__type_col_name]
+        # Clear the cached value for num_vertices since more could be added in
+        # this method.
+        self.__num_vertices = None
+
+        default_edge_columns = [self.src_col_name,
+                                self.dst_col_name,
+                                self.edge_id_col_name,
+                                self.type_col_name]
         if self.__edge_prop_dataframe is None:
             self.__edge_prop_dataframe = \
                 self.__dataframe_type(columns=default_edge_columns)
@@ -389,18 +405,18 @@ def add_edge_data(self,
             # https://github.com/rapidsai/cudf/issues/9981)
             self.__update_dataframe_dtypes(
                 self.__edge_prop_dataframe,
-                {self.__src_col_name: dataframe[vertex_id_columns[0]].dtype,
-                 self.__dst_col_name: dataframe[vertex_id_columns[1]].dtype,
-                 self.__edge_id_col_name: "Int64"})
+                {self.src_col_name: dataframe[vertex_id_columns[0]].dtype,
+                 self.dst_col_name: dataframe[vertex_id_columns[1]].dtype,
+                 self.edge_id_col_name: "Int64"})
 
         # NOTE: This copies the incoming DataFrame in order to add the new
         # columns. The copied DataFrame is then merged (another copy) and then
         # deleted when out-of-scope.
         tmp_df = dataframe.copy(deep=True)
-        tmp_df[self.__src_col_name] = tmp_df[vertex_id_columns[0]]
-        tmp_df[self.__dst_col_name] = tmp_df[vertex_id_columns[1]]
+        tmp_df[self.src_col_name] = tmp_df[vertex_id_columns[0]]
+        tmp_df[self.dst_col_name] = tmp_df[vertex_id_columns[1]]
         # FIXME: handle case of a type_name column already being in tmp_df
-        tmp_df[self.__type_col_name] = type_name
+        tmp_df[self.type_col_name] = type_name
 
         if property_columns:
             # all columns
@@ -463,11 +479,11 @@ def select_vertices(self, expr, from_previous_selection=None):
             previously_selected_rows = self.__vertex_prop_dataframe[
                 from_previous_selection.vertex_selections]
             verts_from_previously_selected_rows = \
-                previously_selected_rows[self.__vertex_col_name]
+                previously_selected_rows[self.vertex_col_name]
             # get all the rows from the entire __vertex_prop_dataframe that
             # contain those verts
             rows_with_verts = \
-                self.__vertex_prop_dataframe[self.__vertex_col_name]\
+                self.__vertex_prop_dataframe[self.vertex_col_name]\
                     .isin(verts_from_previously_selected_rows)
             rows_to_eval = self.__vertex_prop_dataframe[rows_with_verts]
             locals = dict([(n, rows_to_eval[n])
@@ -520,7 +536,7 @@ def select_edges(self, expr):
             edge_selection_series=selected_col)
 
     def extract_subgraph(self,
-                         create_using=None,
+                         create_using=cugraph.Graph,
                          selection=None,
                          edge_weight_property=None,
                          default_edge_weight=None,
@@ -555,7 +571,9 @@ def extract_subgraph(self,
 
         Returns
         -------
-        None
+        A Graph instance of the same type as create_using containing only the
+        vertices and edges resulting from applying the selection to the set of
+        vertex and edge property data.
 
         Examples
         --------
@@ -570,8 +588,8 @@ def extract_subgraph(self,
         # vertices assume the original dtypes in the user input have been
         # preserved. However, merge operations on the DataFrames can change
         # dtypes (eg. int64 to float64 in order to add NaN entries). This
-        # should not be a problem since this the conversions do not change
-        # the values.
+        # should not be a problem since the conversions do not change the
+        # values.
         if (selection is not None) and \
            (selection.vertex_selections is not None):
             selected_vertex_dataframe = \
@@ -592,10 +610,10 @@ def extract_subgraph(self,
         # selected verts in both src and dst
         if (selected_vertex_dataframe is not None) and \
            not(selected_vertex_dataframe.empty):
-            selected_verts = selected_vertex_dataframe[self.__vertex_col_name]
-            has_srcs = selected_edge_dataframe[self.__src_col_name]\
+            selected_verts = selected_vertex_dataframe[self.vertex_col_name]
+            has_srcs = selected_edge_dataframe[self.src_col_name]\
                 .isin(selected_verts)
-            has_dsts = selected_edge_dataframe[self.__dst_col_name]\
+            has_dsts = selected_edge_dataframe[self.dst_col_name]\
                 .isin(selected_verts)
             edges = selected_edge_dataframe[has_srcs & has_dsts]
         else:
@@ -668,13 +686,15 @@ def annotate_dataframe(self, df, G, edge_vertex_id_columns):
             raise TypeError(f"df type {df_type} does not match DataFrame type "
                             f"{self.__dataframe_type} used in PropertyGraph")
 
-        # FIXME: check that G has edge_data attr
-
         # Add the src, dst, edge_id info from the Graph to a DataFrame
-        edge_info_df = self.__dataframe_type(columns=[self.__src_col_name,
-                                                      self.__dst_col_name,
-                                                      self.__edge_id_col_name],
-                                             data=G.edge_data)
+        # edge_info_df = self.__dataframe_type(columns=[self.src_col_name,
+        #                                               self.dst_col_name,
+        #                                               self.edge_id_col_name],
+        #                                      data=G.edge_data)
+        if hasattr(G, "edge_data"):
+            edge_info_df = G.edge_data
+        else:
+            raise AttributeError("Graph G does not have attribute 'edge_data'")
 
         # New result includes only properties from the src/dst edges identified
         # by edge IDs. All other data in df is merged based on src/dst values.
@@ -683,12 +703,12 @@ def annotate_dataframe(self, df, G, edge_vertex_id_columns):
                                            how="inner")
 
         # FIXME: also allow edge ID col to be passed in and renamed.
-        new_df = df.rename(columns={src_col_name: self.__src_col_name,
-                                    dst_col_name: self.__dst_col_name})
+        new_df = df.rename(columns={src_col_name: self.src_col_name,
+                                    dst_col_name: self.dst_col_name})
         new_df = new_df.merge(edge_props_df)
         # restore the original src/dst column names
-        new_df.rename(columns={self.__src_col_name: src_col_name,
-                               self.__dst_col_name: dst_col_name},
+        new_df.rename(columns={self.src_col_name: src_col_name,
+                               self.dst_col_name: dst_col_name},
                       inplace=True)
 
         # restore the original dtypes
@@ -700,31 +720,9 @@ def annotate_dataframe(self, df, G, edge_vertex_id_columns):
         # columns from edge types not included in the edges in df.
         return new_df
 
-    @classmethod
-    def get_edge_tuples(cls, edge_prop_df):
-        """
-        Returns a list of (src vertex, dst vertex, edge_id) tuples present in
-        edge_prop_df.
-        """
-        if cls.__src_col_name not in edge_prop_df.columns:
-            raise ValueError(f"column {cls.__src_col_name} missing from "
-                             "edge_prop_df")
-        if cls.__dst_col_name not in edge_prop_df.columns:
-            raise ValueError(f"column {cls.__dst_col_name} missing from "
-                             "edge_prop_df")
-        if cls.__edge_id_col_name not in edge_prop_df.columns:
-            raise ValueError(f"column {cls.__edge_id_col_name} missing "
-                             "from edge_prop_df")
-        src = edge_prop_df[cls.__src_col_name]
-        dst = edge_prop_df[cls.__dst_col_name]
-        edge_id = edge_prop_df[cls.__edge_id_col_name]
-        retlist = [(src.iloc[i], dst.iloc[i], edge_id.iloc[i])
-                   for i in range(len(src))]
-        return retlist
-
-    @classmethod
-    def edge_props_to_graph(cls, edge_prop_df,
-                            create_using=None,
+    def edge_props_to_graph(self,
+                            edge_prop_df,
+                            create_using,
                             edge_weight_property=None,
                             allow_multi_edges=False):
         """
@@ -737,12 +735,12 @@ def edge_props_to_graph(cls, edge_prop_df,
                              "edge_prop_df")
 
         # Set up the new Graph to return
-        if create_using is None:
-            G = cugraph.Graph()
-        elif isinstance(create_using, cugraph.Graph):
+        if isinstance(create_using, cugraph.Graph):
             # FIXME: extract more attrs from the create_using instance
             attrs = {"directed": create_using.is_directed()}
             G = type(create_using)(**attrs)
+        # FIXME: this allows anything to be instantiated does not check that
+        # the type is a valid Graph type.
         elif type(create_using) is type(type):
             G = create_using()
         else:
@@ -754,20 +752,20 @@ def edge_props_to_graph(cls, edge_prop_df,
         # non-MultiGraphs would result in ambiguous edge properties.
         # FIXME: make allow_multi_edges accept "auto" for use with MultiGraph
         if (allow_multi_edges is False) and \
-           cls.has_duplicate_edges(edge_prop_df):
+           self.has_duplicate_edges(edge_prop_df):
             if create_using:
                 if type(create_using) is type:
                     t = create_using.__name__
                 else:
                     t = type(create_using).__name__
-                msg = f"{t} graph type specified by create_using"
+                msg = f"'{t}' graph type specified by create_using"
             else:
                 msg = "default Graph graph type"
             raise RuntimeError("query resulted in duplicate edges which "
-                               f"cannot be represented with a {msg}")
+                               f"cannot be represented with the {msg}")
 
-        create_args = {"source": cls.__src_col_name,
-                       "destination": cls.__dst_col_name,
+        create_args = {"source": self.src_col_name,
+                       "destination": self.dst_col_name,
                        "edge_attr": edge_weight_property,
                        "renumber": True,
                        }
@@ -776,15 +774,12 @@ def edge_props_to_graph(cls, edge_prop_df,
         else:
             G.from_pandas_edgelist(edge_prop_df, **create_args)
 
-        # Set the edge_data on the resulting Graph to the list of edge tuples,
-        # which includes the unique edge IDs. Edge IDs are needed for future
-        # calls to annotate_dataframe() in order to apply properties from the
-        # correct edges.
-        # FIXME: this could be a very large list of tuples if the number of
-        # edges in G is large (eg. a large MNMG graph that cannot fit in host
-        # memory). Consider adding the edge IDs to the edgelist DataFrame in G
-        # instead.
-        G.edge_data = cls.get_edge_tuples(edge_prop_df)
+        # Set the edge_data on the resulting Graph to a DataFrame containing
+        # the edges and the edge ID for each. Edge IDs are needed for future
+        # calls to annotate_dataframe() in order to associate edges with their
+        # properties, since the PG can contain multiple edges between vertrices
+        # with different properties.
+        G.edge_data = self.__create_property_lookup_table(edge_prop_df)
         # FIXME: also add vertex_data
 
         return G
@@ -794,14 +789,27 @@ def has_duplicate_edges(cls, df):
         """
         Return True if df has >1 of the same src, dst pair
         """
+        # FIXME: this can be very expensive for large DataFrames
         if df.empty:
             return False
 
         def has_duplicate_dst(df):
-            return df[cls.__dst_col_name].nunique() != \
-                df[cls.__dst_col_name].size
+            return df[cls.dst_col_name].nunique() != \
+                df[cls.dst_col_name].size
 
-        return df.groupby(cls.__src_col_name).apply(has_duplicate_dst).any()
+        return df.groupby(cls.src_col_name).apply(has_duplicate_dst).any()
+
+    def __create_property_lookup_table(self, edge_prop_df):
+        """
+        Returns a DataFrame containing the src vertex, dst vertex, and edge_id
+        values from edge_prop_df.
+        """
+        src = edge_prop_df[self.src_col_name]
+        dst = edge_prop_df[self.dst_col_name]
+        edge_id = edge_prop_df[self.edge_id_col_name]
+        return self.__dataframe_type({self.src_col_name: src,
+                                      self.dst_col_name: dst,
+                                      self.edge_id_col_name: edge_id})
 
     def __add_edge_ids(self):
         """
@@ -809,7 +817,7 @@ def __add_edge_ids(self):
         incremented by 1 for each edge.
         """
         prev_eid = -1 if self.__last_edge_id is None else self.__last_edge_id
-        nans = self.__edge_prop_dataframe[self.__edge_id_col_name].isna()
+        nans = self.__edge_prop_dataframe[self.edge_id_col_name].isna()
 
         if nans.any():
             indices = nans.index[nans]
@@ -818,11 +826,26 @@ def __add_edge_ids(self):
             new_eids = self.__series_type(
                 range(starting_eid, starting_eid + num_indices))
 
-            self.__edge_prop_dataframe[self.__edge_id_col_name]\
+            self.__edge_prop_dataframe[self.edge_id_col_name]\
                 .iloc[indices] = new_eids
 
             self.__last_edge_id = starting_eid + num_indices - 1
 
+    def __get_all_vertices_series(self):
+        """
+        Return a list of all Series objects that contain vertices from all
+        tables.
+        """
+        vpd = self.__vertex_prop_dataframe
+        epd = self.__edge_prop_dataframe
+        vert_sers = []
+        if vpd is not None:
+            vert_sers.append(vpd[self.vertex_col_name])
+        if epd is not None:
+            vert_sers.append(epd[self.src_col_name])
+            vert_sers.append(epd[self.dst_col_name])
+        return vert_sers
+
     @staticmethod
     def __get_new_column_dtypes(from_df, to_df):
         """
diff --git a/python/cugraph/cugraph/tests/test_property_graph.py b/python/cugraph/cugraph/tests/test_property_graph.py
index 0578a7780de..8cced223bf7 100644
--- a/python/cugraph/cugraph/tests/test_property_graph.py
+++ b/python/cugraph/cugraph/tests/test_property_graph.py
@@ -11,14 +11,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import time
 import gc
 
 import pytest
 import pandas as pd
+import numpy as np
 import cudf
 from cudf.testing import assert_frame_equal, assert_series_equal
 
+# If the rapids-pytest-benchmark plugin is installed, the "gpubenchmark"
+# fixture will be available automatically. Check that this fixture is available
+# by trying to import rapids_pytest_benchmark, and if that fails, set
+# "gpubenchmark" to the standard "benchmark" fixture provided by
+# pytest-benchmark.
+try:
+    import rapids_pytest_benchmark  # noqa: F401
+except ImportError:
+    import pytest_benchmark
+    gpubenchmark = pytest_benchmark.plugin.benchmark
+
 import cugraph
+from cugraph.generators import rmat
 from cugraph.tests import utils
 
 # =============================================================================
@@ -107,21 +121,26 @@ def setup_function():
 df_types = [cudf.DataFrame, pd.DataFrame]
 
 
-def df_type_id(dft):
+def df_type_id(dataframe_type):
+    """
+    Return a string that describes the dataframe_type, used for test output.
+    """
     s = "df_type="
-    if dft == cudf.DataFrame:
+    if dataframe_type == cudf.DataFrame:
         return s+"cudf.DataFrame"
-    if dft == pd.DataFrame:
+    if dataframe_type == pd.DataFrame:
         return s+"pandas.DataFrame"
     return s+"?"
 
 
-@pytest.fixture(scope="module",
-                params=utils.genFixtureParamsProduct((df_types, df_type_id))
-                )
-def property_graph_instance(request):
+df_types_fixture_params = utils.genFixtureParamsProduct((df_types, df_type_id))
+
+
+@pytest.fixture(scope="module", params=df_types_fixture_params)
+def dataset1_PropertyGraph(request):
     """
-    FIXME: fill this in
+    Fixture which returns an instance of a PropertyGraph with vertex and edge
+    data added from dataset1, parameterized for different DataFrame types.
     """
     dataframe_type = request.param[0]
     from cugraph.experimental import PropertyGraph
@@ -177,8 +196,74 @@ def property_graph_instance(request):
     return pG
 
 
-###############################################################################
+@pytest.fixture(scope="module", params=df_types_fixture_params)
+def cyber_PropertyGraph(request):
+    """
+    Fixture which returns an instance of a PropertyGraph with vertex and edge
+    data added from the cyber.csv dataset, parameterized for different
+    DataFrame types.
+    """
+    from cugraph.experimental import PropertyGraph
+
+    dataframe_type = request.param[0]
+    cyber_csv = utils.RAPIDS_DATASET_ROOT_DIR_PATH/"cyber.csv"
+    source_col_name = "srcip"
+    dest_col_name = "dstip"
+
+    if dataframe_type is pd.DataFrame:
+        read_csv = pd.read_csv
+    else:
+        read_csv = cudf.read_csv
+    df = read_csv(cyber_csv, delimiter=",",
+                  dtype={"idx": "int32",
+                         source_col_name: "str",
+                         dest_col_name: "str"},
+                  header=0)
+
+    pG = PropertyGraph()
+    pG.add_edge_data(df, (source_col_name, dest_col_name))
+
+    return pG
+
+
+@pytest.fixture(scope="module", params=df_types_fixture_params)
+def rmat_PropertyGraph():
+    """
+    Fixture which uses the RMAT generator to generate a cuDF DataFrame
+    edgelist, then uses it to add vertex and edge data to a PropertyGraph
+    instance, then returns the (PropertyGraph, DataFrame) instances in a tuple.
+    """
+    from cugraph.experimental import PropertyGraph
+
+    source_col_name = "src"
+    dest_col_name = "dst"
+    weight_col_name = "weight"
+    scale = 20
+    edgefactor = 16
+    seed = 42
+    df = rmat(scale,
+              (2**scale)*edgefactor,
+              0.57,  # from Graph500
+              0.19,  # from Graph500
+              0.19,  # from Graph500
+              seed,
+              clip_and_flip=False,
+              scramble_vertex_ids=True,
+              create_using=None,  # None == return edgelist
+              mg=False
+              )
+    rng = np.random.default_rng(seed)
+    df[weight_col_name] = rng.random(size=len(df))
+
+    pG = PropertyGraph()
+    pG.add_edge_data(df, (source_col_name, dest_col_name))
+
+    return (pG, df)
+
+
+# =============================================================================
 # Tests
+# =============================================================================
 @pytest.mark.parametrize("df_type", df_types, ids=df_type_id)
 def test_add_vertex_data(df_type):
     """
@@ -202,6 +287,60 @@ def test_add_vertex_data(df_type):
     assert sorted(pG.vertex_property_names) == sorted(expected_props)
 
 
+@pytest.mark.parametrize("df_type", df_types, ids=df_type_id)
+def test_num_vertices(df_type):
+    """
+    Ensures num_vertices is correct after various additions of specific data.
+    """
+    from cugraph.experimental import PropertyGraph
+
+    merchants = dataset1["merchants"]
+    merchants_df = df_type(columns=merchants[0],
+                           data=merchants[1])
+
+    pG = PropertyGraph()
+    pG.add_vertex_data(merchants_df,
+                       type_name="merchants",
+                       vertex_id_column="merchant_id",
+                       property_columns=None)
+
+    # Test caching - the second retrieval should always be faster
+    st = time.time()
+    assert pG.num_vertices == 5
+    compute_time = time.time() - st
+    assert pG.num_edges == 0
+
+    st = time.time()
+    assert pG.num_vertices == 5
+    cache_retrieval_time = time.time() - st
+    assert cache_retrieval_time < compute_time
+
+    users = dataset1["users"]
+    users_df = df_type(columns=users[0], data=users[1])
+
+    pG.add_vertex_data(users_df,
+                       type_name="users",
+                       vertex_id_column="user_id",
+                       property_columns=None)
+
+    assert pG.num_vertices == 9
+    assert pG.num_edges == 0
+
+    # The taxpayers table does not add new vertices, it only adds properties to
+    # vertices already present in the merchants and users tables.
+    taxpayers = dataset1["taxpayers"]
+    taxpayers_df = df_type(columns=taxpayers[0],
+                           data=taxpayers[1])
+
+    pG.add_vertex_data(taxpayers_df,
+                       type_name="taxpayers",
+                       vertex_id_column="payer_id",
+                       property_columns=None)
+
+    assert pG.num_vertices == 9
+    assert pG.num_edges == 0
+
+
 @pytest.mark.parametrize("df_type", df_types, ids=df_type_id)
 def test_null_data(df_type):
     """
@@ -365,9 +504,9 @@ def test_add_edge_data_bad_args():
                          property_columns="time")
 
 
-def test_extract_subgraph_vertex_prop_condition_only(property_graph_instance):
+def test_extract_subgraph_vertex_prop_condition_only(dataset1_PropertyGraph):
 
-    pG = property_graph_instance
+    pG = dataset1_PropertyGraph
 
     selection = pG.select_vertices("(_TYPE_=='taxpayers') & (amount<100)")
     G = pG.extract_subgraph(selection=selection,
@@ -386,12 +525,15 @@ def test_extract_subgraph_vertex_prop_condition_only(property_graph_instance):
     assert_frame_equal(expected_edgelist, actual_edgelist, check_like=True)
 
 
-def test_extract_subgraph_vertex_edge_prop_condition(property_graph_instance):
-    pG = property_graph_instance
+def test_extract_subgraph_vertex_edge_prop_condition(dataset1_PropertyGraph):
+    from cugraph.experimental import PropertyGraph
+
+    pG = dataset1_PropertyGraph
+    tcn = PropertyGraph.type_col_name
 
     selection = pG.select_vertices("(user_location==47906) | "
                                    "(user_location==78750)")
-    selection += pG.select_edges("_TYPE_=='referrals'")
+    selection += pG.select_edges(f"{tcn}=='referrals'")
     G = pG.extract_subgraph(selection=selection,
                             create_using=DiGraph_inst,
                             edge_weight_property="stars")
@@ -407,10 +549,13 @@ def test_extract_subgraph_vertex_edge_prop_condition(property_graph_instance):
     assert_frame_equal(expected_edgelist, actual_edgelist, check_like=True)
 
 
-def test_extract_subgraph_edge_prop_condition_only(property_graph_instance):
-    pG = property_graph_instance
+def test_extract_subgraph_edge_prop_condition_only(dataset1_PropertyGraph):
+    from cugraph.experimental import PropertyGraph
 
-    selection = pG.select_edges("_TYPE_=='transactions'")
+    pG = dataset1_PropertyGraph
+    tcn = PropertyGraph.type_col_name
+
+    selection = pG.select_edges(f"{tcn} =='transactions'")
     G = pG.extract_subgraph(selection=selection,
                             create_using=DiGraph_inst)
 
@@ -431,27 +576,33 @@ def test_extract_subgraph_edge_prop_condition_only(property_graph_instance):
     assert_frame_equal(expected_edgelist, actual_edgelist, check_like=True)
 
 
-def test_extract_subgraph_unweighted(property_graph_instance):
+def test_extract_subgraph_unweighted(dataset1_PropertyGraph):
     """
     Ensure a subgraph is unweighted if the edge_weight_property is None.
     """
-    pG = property_graph_instance
+    from cugraph.experimental import PropertyGraph
 
-    selection = pG.select_edges("_TYPE_=='transactions'")
+    pG = dataset1_PropertyGraph
+    tcn = PropertyGraph.type_col_name
+
+    selection = pG.select_edges(f"{tcn} == 'transactions'")
     G = pG.extract_subgraph(selection=selection,
                             create_using=DiGraph_inst)
 
     assert G.is_weighted() is False
 
 
-def test_extract_subgraph_specific_query(property_graph_instance):
+def test_extract_subgraph_specific_query(dataset1_PropertyGraph):
     """
     Graph of only transactions after time 1639085000 for merchant_id 4 (should
     be a graph of 2 vertices, 1 edge)
     """
-    pG = property_graph_instance
+    from cugraph.experimental import PropertyGraph
 
-    selection = pG.select_edges("(_TYPE_=='transactions') & "
+    pG = dataset1_PropertyGraph
+    tcn = PropertyGraph.type_col_name
+
+    selection = pG.select_edges(f"({tcn}=='transactions') & "
                                 "(merchant_id==4) & "
                                 "(time>1639085000)")
     G = pG.extract_subgraph(selection=selection,
@@ -469,28 +620,34 @@ def test_extract_subgraph_specific_query(property_graph_instance):
     assert_frame_equal(expected_edgelist, actual_edgelist, check_like=True)
 
 
-def test_edge_props_to_graph(property_graph_instance):
+def test_edge_props_to_graph(dataset1_PropertyGraph):
     """
     Access the property DataFrames directly and use them to perform a more
     complex query, then call edge_props_to_graph() to create the corresponding
     graph.
     """
-    pG = property_graph_instance
+    from cugraph.experimental import PropertyGraph
+
+    pG = dataset1_PropertyGraph
+    vcn = PropertyGraph.vertex_col_name
+    tcn = PropertyGraph.type_col_name
+    scn = PropertyGraph.src_col_name
+    dcn = PropertyGraph.dst_col_name
 
     # Select referrals from only taxpayers who are users (should be 1)
 
     # Find the list of vertices that are both users and taxpayers
     def contains_both(df):
-        return (df["_TYPE_"] == "taxpayers").any() and \
-            (df["_TYPE_"] == "users").any()
-    verts = pG._vertex_prop_dataframe.groupby("_VERTEX_")\
+        return (df[tcn] == "taxpayers").any() and \
+            (df[tcn] == "users").any()
+    verts = pG._vertex_prop_dataframe.groupby(vcn)\
                                      .apply(contains_both)
     verts = verts[verts].keys()  # get an array of only verts that have both
 
     # Find the "referral" edge_props containing only those verts
-    referrals = pG._edge_prop_dataframe["_TYPE_"] == "referrals"
-    srcs = pG._edge_prop_dataframe[referrals]["_SRC_"].isin(verts)
-    dsts = pG._edge_prop_dataframe[referrals]["_DST_"].isin(verts)
+    referrals = pG._edge_prop_dataframe[tcn] == "referrals"
+    srcs = pG._edge_prop_dataframe[referrals][scn].isin(verts)
+    dsts = pG._edge_prop_dataframe[referrals][dcn].isin(verts)
     matching_edges = (srcs & dsts)
     indices = matching_edges.index[matching_edges]
     edge_props = pG._edge_prop_dataframe.loc[indices]
@@ -508,18 +665,21 @@ def contains_both(df):
     assert_frame_equal(expected_edgelist, actual_edgelist, check_like=True)
 
 
-def test_select_vertices_from_previous_selection(property_graph_instance):
+def test_select_vertices_from_previous_selection(dataset1_PropertyGraph):
     """
     Ensures that the intersection of vertices of multiple types (only vertices
     that are both type A and type B) can be selected.
     """
-    pG = property_graph_instance
+    from cugraph.experimental import PropertyGraph
+
+    pG = dataset1_PropertyGraph
+    tcn = PropertyGraph.type_col_name
 
     # Select referrals from only taxpayers who are users (should be 1)
-    selection = pG.select_vertices("_TYPE_ == 'taxpayers'")
-    selection = pG.select_vertices("_TYPE_ == 'users'",
+    selection = pG.select_vertices(f"{tcn} == 'taxpayers'")
+    selection = pG.select_vertices(f"{tcn} == 'users'",
                                    from_previous_selection=selection)
-    selection += pG.select_edges("_TYPE_ == 'referrals'")
+    selection += pG.select_edges(f"{tcn} == 'referrals'")
     G = pG.extract_subgraph(create_using=DiGraph_inst, selection=selection)
 
     expected_edgelist = cudf.DataFrame({"src": [89021], "dst": [78634]})
@@ -555,7 +715,8 @@ def test_extract_subgraph_graph_without_vert_props():
                      vertex_id_columns=("user_id_1", "user_id_2"),
                      property_columns=None)
 
-    G = pG.extract_subgraph(selection=pG.select_edges("_SRC_ == 89216"),
+    scn = PropertyGraph.src_col_name
+    G = pG.extract_subgraph(selection=pG.select_edges(f"{scn} == 89216"),
                             create_using=DiGraph_inst,
                             edge_weight_property="relationship_type",
                             default_edge_weight=0)
@@ -572,11 +733,11 @@ def test_extract_subgraph_graph_without_vert_props():
     assert_frame_equal(expected_edgelist, actual_edgelist, check_like=True)
 
 
-def test_extract_subgraph_no_edges(property_graph_instance):
+def test_extract_subgraph_no_edges(dataset1_PropertyGraph):
     """
     Valid query that only matches a single vertex.
     """
-    pG = property_graph_instance
+    pG = dataset1_PropertyGraph
 
     selection = pG.select_vertices("(_TYPE_=='merchants') & (merchant_id==86)")
     G = pG.extract_subgraph(selection=selection)
@@ -584,11 +745,11 @@ def test_extract_subgraph_no_edges(property_graph_instance):
     assert len(G.edgelist.edgelist_df) == 0
 
 
-def test_extract_subgraph_no_query(property_graph_instance):
+def test_extract_subgraph_no_query(dataset1_PropertyGraph):
     """
     Call extract with no args, should result in the entire property graph.
     """
-    pG = property_graph_instance
+    pG = dataset1_PropertyGraph
 
     G = pG.extract_subgraph(create_using=DiGraph_inst, allow_multi_edges=True)
 
@@ -602,16 +763,20 @@ def test_extract_subgraph_no_query(property_graph_instance):
     assert len(G.edgelist.edgelist_df) == num_edges
 
 
-def test_extract_subgraph_multi_edges(property_graph_instance):
+def test_extract_subgraph_multi_edges(dataset1_PropertyGraph):
     """
     Ensure an exception is thrown if a graph is attempted to be extracted with
     multi edges.
     NOTE: an option to allow multi edges when create_using is
     MultiGraph will be provided in the future.
     """
-    pG = property_graph_instance
+    from cugraph.experimental import PropertyGraph
+
+    pG = dataset1_PropertyGraph
+    tcn = PropertyGraph.type_col_name
+
     # referrals has multiple edges
-    selection = pG.select_edges("_TYPE_ == 'referrals'")
+    selection = pG.select_edges(f"{tcn} == 'referrals'")
 
     # FIXME: use a better exception
     with pytest.raises(RuntimeError):
@@ -619,8 +784,11 @@ def test_extract_subgraph_multi_edges(property_graph_instance):
                             create_using=DiGraph_inst)
 
 
-def test_extract_subgraph_bad_args(property_graph_instance):
-    pG = property_graph_instance
+def test_extract_subgraph_bad_args(dataset1_PropertyGraph):
+    from cugraph.experimental import PropertyGraph
+
+    pG = dataset1_PropertyGraph
+    tcn = PropertyGraph.type_col_name
 
     # non-PropertySelection selection
     with pytest.raises(TypeError):
@@ -629,7 +797,7 @@ def test_extract_subgraph_bad_args(property_graph_instance):
                             edge_weight_property="stars",
                             default_edge_weight=1.0)
 
-    selection = pG.select_edges("_TYPE_=='referrals'")
+    selection = pG.select_edges(f"{tcn}=='referrals'")
     # bad create_using type
     with pytest.raises(TypeError):
         pG.extract_subgraph(selection=selection,
@@ -648,14 +816,17 @@ def test_extract_subgraph_bad_args(property_graph_instance):
                             edge_weight_property="card_type")
 
 
-def test_extract_subgraph_default_edge_weight(property_graph_instance):
+def test_extract_subgraph_default_edge_weight(dataset1_PropertyGraph):
     """
     Ensure the default_edge_weight value is added to edges with missing
     properties used for weights.
     """
-    pG = property_graph_instance
+    from cugraph.experimental import PropertyGraph
 
-    selection = pG.select_edges("_TYPE_=='transactions'")
+    pG = dataset1_PropertyGraph
+    tcn = PropertyGraph.type_col_name
+
+    selection = pG.select_edges(f"{tcn}=='transactions'")
     G = pG.extract_subgraph(create_using=DiGraph_inst,
                             selection=selection,
                             edge_weight_property="volume",
@@ -685,12 +856,15 @@ def test_extract_subgraph_default_edge_weight(property_graph_instance):
     assert_frame_equal(expected_edgelist, actual_edgelist, check_like=True)
 
 
-def test_graph_edge_data_added(property_graph_instance):
+def test_graph_edge_data_added(dataset1_PropertyGraph):
     """
     Ensures the subgraph returned from extract_subgraph() has the edge_data
     attribute added which contains the proper edge IDs.
     """
-    pG = property_graph_instance
+    from cugraph.experimental import PropertyGraph
+
+    pG = dataset1_PropertyGraph
+    eicn = PropertyGraph.edge_id_col_name
 
     expected_num_edges = \
         len(dataset1["transactions"][-1]) + \
@@ -703,16 +877,15 @@ def test_graph_edge_data_added(property_graph_instance):
     # meta-data, which includes edge IDs.
     G = pG.extract_subgraph(create_using=DiGraph_inst, allow_multi_edges=True)
 
-    # G.edge_data should be set to a list of tuples of (src, dst, edge_id) for
-    # each edge in the graph.
+    # G.edge_data should be set to a DataFrame with rows for each graph edge.
     assert len(G.edge_data) == expected_num_edges
-    edge_ids = sorted([d[-1] for d in G.edge_data])
+    edge_ids = sorted(G.edge_data[eicn].values)
 
     assert edge_ids[0] == 0
     assert edge_ids[-1] == (expected_num_edges - 1)
 
 
-def test_annotate_dataframe(property_graph_instance):
+def test_annotate_dataframe(dataset1_PropertyGraph):
     """
     FIXME: Add tests for:
     properties list
@@ -720,7 +893,7 @@ def test_annotate_dataframe(property_graph_instance):
     copy=False
     invalid args raise correct exceptions
     """
-    pG = property_graph_instance
+    pG = dataset1_PropertyGraph
 
     selection = pG.select_edges("(_TYPE_ == 'referrals') & (stars > 3)")
     G = pG.extract_subgraph(selection=selection,
@@ -800,3 +973,173 @@ def test_different_vertex_edge_input_dataframe_types():
     pG.add_edge_data(df, type_name="bar", vertex_id_columns=("a", "b"))
     with pytest.raises(TypeError):
         pG.add_edge_data(pdf, type_name="bar", vertex_id_columns=("a", "b"))
+
+
+def test_get_vertices(dataset1_PropertyGraph):
+    """
+    Test that get_vertices() returns the correct set of vertices without
+    duplicates.
+    """
+    pG = dataset1_PropertyGraph
+
+    (merchants, users, taxpayers,
+     transactions, relationships, referrals) = dataset1.values()
+
+    expected_vertices = set([t[0] for t in merchants[1]] +
+                            [t[0] for t in users[1]] +
+                            [t[0] for t in taxpayers[1]])
+
+    assert sorted(pG.get_vertices().values) == sorted(expected_vertices)
+
+
+def test_get_edges(dataset1_PropertyGraph):
+    """
+    Test that get_edges() returns the correct set of edges (as src/dst
+    columns).
+    """
+    from cugraph.experimental import PropertyGraph
+
+    pG = dataset1_PropertyGraph
+
+    (merchants, users, taxpayers,
+     transactions, relationships, referrals) = dataset1.values()
+
+    expected_edges = \
+        [(src, dst) for (src, dst, _, _, _, _) in transactions[1]] + \
+        [(src, dst) for (src, dst, _) in relationships[1]] + \
+        [(src, dst) for (src, dst, _, _) in referrals[1]]
+
+    actual_edges = pG.edges
+
+    assert len(expected_edges) == len(actual_edges)
+    for i in range(len(expected_edges)):
+        src = actual_edges[PropertyGraph.src_col_name].iloc[i]
+        dst = actual_edges[PropertyGraph.dst_col_name].iloc[i]
+        assert (src, dst) in expected_edges
+
+
+@pytest.mark.skip(reason="unfinished")
+def test_extract_subgraph_with_vertex_ids():
+    """
+    FIXME: add a PropertyGraph API that makes it easy to support the common use
+    case of extracting a subgraph containing only specific vertex IDs. This is
+    currently done in the bench_extract_subgraph_for_* tests below, but could
+    be made easier for users to do.
+    """
+    raise NotImplementedError
+
+
+@pytest.mark.skip(reason="unfinished")
+def test_dgl_use_case():
+    """
+    FIXME: add a test demonstrating typical DGL use cases
+    """
+    raise NotImplementedError
+
+
+# =============================================================================
+# Benchmarks
+# =============================================================================
+def bench_num_vertices(gpubenchmark, dataset1_PropertyGraph):
+    pG = dataset1_PropertyGraph
+
+    def get_num_vertices():
+        return pG.num_vertices
+
+    assert gpubenchmark(get_num_vertices) == 9
+
+
+def bench_get_vertices(gpubenchmark, dataset1_PropertyGraph):
+    pG = dataset1_PropertyGraph
+
+    gpubenchmark(pG.get_vertices)
+
+
+def bench_extract_subgraph_for_cyber(gpubenchmark, cyber_PropertyGraph):
+    from cugraph.experimental import PropertyGraph
+
+    pG = cyber_PropertyGraph
+    scn = PropertyGraph.src_col_name
+    dcn = PropertyGraph.dst_col_name
+
+    # Create a Graph containing only specific src or dst vertices
+    verts = ["10.40.182.3", "10.40.182.255", "59.166.0.9", "59.166.0.8"]
+    selected_edges = \
+        pG.select_edges(f"{scn}.isin({verts}) | {dcn}.isin({verts})")
+    gpubenchmark(pG.extract_subgraph,
+                 create_using=cugraph.Graph(directed=True),
+                 selection=selected_edges,
+                 default_edge_weight=1.0,
+                 allow_multi_edges=True)
+
+
+def bench_extract_subgraph_for_cyber_detect_duplicate_edges(
+        gpubenchmark, cyber_PropertyGraph):
+    from cugraph.experimental import PropertyGraph
+
+    pG = cyber_PropertyGraph
+    scn = PropertyGraph.src_col_name
+    dcn = PropertyGraph.dst_col_name
+
+    # Create a Graph containing only specific src or dst vertices
+    verts = ["10.40.182.3", "10.40.182.255", "59.166.0.9", "59.166.0.8"]
+    selected_edges = \
+        pG.select_edges(f"{scn}.isin({verts}) | {dcn}.isin({verts})")
+
+    def func():
+        with pytest.raises(RuntimeError):
+            pG.extract_subgraph(create_using=cugraph.Graph(directed=True),
+                                selection=selected_edges,
+                                default_edge_weight=1.0,
+                                allow_multi_edges=False)
+
+    gpubenchmark(func)
+
+
+def bench_extract_subgraph_for_rmat(gpubenchmark, rmat_PropertyGraph):
+    from cugraph.experimental import PropertyGraph
+
+    (pG, generated_df) = rmat_PropertyGraph
+    scn = PropertyGraph.src_col_name
+    dcn = PropertyGraph.dst_col_name
+
+    verts = []
+    for i in range(0, 10000, 10):
+        verts.append(generated_df["src"].iloc[i])
+
+    selected_edges = \
+        pG.select_edges(f"{scn}.isin({verts}) | {dcn}.isin({verts})")
+    gpubenchmark(pG.extract_subgraph,
+                 create_using=cugraph.Graph(directed=True),
+                 selection=selected_edges,
+                 default_edge_weight=1.0,
+                 allow_multi_edges=True)
+
+
+# This test runs for *minutes* with the current implementation, and since
+# benchmarking can call it multiple times per run, the overall time for this
+# test can be ~20 minutes.
+@pytest.mark.slow
+def bench_extract_subgraph_for_rmat_detect_duplicate_edges(
+        gpubenchmark, rmat_PropertyGraph):
+    from cugraph.experimental import PropertyGraph
+
+    (pG, generated_df) = rmat_PropertyGraph
+    scn = PropertyGraph.src_col_name
+    dcn = PropertyGraph.dst_col_name
+
+    verts = []
+    for i in range(0, 10000, 10):
+        verts.append(generated_df["src"].iloc[i])
+
+    selected_edges = \
+        pG.select_edges(f"{scn}.isin({verts}) | {dcn}.isin({verts})")
+
+    def func():
+        with pytest.raises(RuntimeError):
+            pG.extract_subgraph(create_using=cugraph.Graph(directed=True),
+                                selection=selected_edges,
+                                default_edge_weight=1.0,
+                                allow_multi_edges=False)
+
+    gpubenchmark(func)
diff --git a/python/cugraph/pytest.ini b/python/cugraph/pytest.ini
index 046f972801c..0da378d3d13 100644
--- a/python/cugraph/pytest.ini
+++ b/python/cugraph/pytest.ini
@@ -1,9 +1,24 @@
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 [pytest]
 addopts =
            --benchmark-warmup=off
            --benchmark-max-time=0
            --benchmark-min-rounds=1
            --benchmark-columns="mean, rounds"
+           ## do not run the slow tests/benchmarks by default
+           -m "not slow"
            ## for use with rapids-pytest-benchmark plugin
            #--benchmark-gpu-disable
            ## for use with pytest-cov plugin
@@ -24,3 +39,16 @@ markers =
           cugraph_types: use cuGraph input types
           nx_types: use NetworkX input types
           matrix_types: use SciPy/CuPy matrix input types
+          slow: slow-running tests/benchmarks
+
+python_classes =
+          Bench*
+          Test*
+
+python_files =
+          bench_*
+          test_*
+
+python_functions =
+          bench_*
+          test_*

From f9e619a34a4626e93a359f56e61b77a555bae6ae Mon Sep 17 00:00:00 2001
From: Rick Ratzel <rratzel@nvidia.com>
Date: Wed, 2 Feb 2022 13:54:42 -0600
Subject: [PATCH 4/6] removed dead code, fixed comment.

---
 python/cugraph/cugraph/structure/property_graph.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/python/cugraph/cugraph/structure/property_graph.py b/python/cugraph/cugraph/structure/property_graph.py
index 14f18aa8f50..0a0cea61fea 100644
--- a/python/cugraph/cugraph/structure/property_graph.py
+++ b/python/cugraph/cugraph/structure/property_graph.py
@@ -678,7 +678,7 @@ def annotate_dataframe(self, df, G, edge_vertex_id_columns):
         --------
         >>>
         """
-        # FIXME: all check args
+        # FIXME: check all args
         (src_col_name, dst_col_name) = edge_vertex_id_columns
 
         df_type = type(df)
@@ -686,11 +686,6 @@ def annotate_dataframe(self, df, G, edge_vertex_id_columns):
             raise TypeError(f"df type {df_type} does not match DataFrame type "
                             f"{self.__dataframe_type} used in PropertyGraph")
 
-        # Add the src, dst, edge_id info from the Graph to a DataFrame
-        # edge_info_df = self.__dataframe_type(columns=[self.src_col_name,
-        #                                               self.dst_col_name,
-        #                                               self.edge_id_col_name],
-        #                                      data=G.edge_data)
         if hasattr(G, "edge_data"):
             edge_info_df = G.edge_data
         else:

From ff86defd66e34bc0172059860970db1cff1d58f9 Mon Sep 17 00:00:00 2001
From: Rick Ratzel <rratzel@nvidia.com>
Date: Sun, 13 Feb 2022 23:01:18 -0600
Subject: [PATCH 5/6] Allowed default_edge_weight to be used to add an edge
 weight value on extracted Graphs even when a weight property wasn't
 specified.

---
 .../cugraph/structure/property_graph.py       | 74 ++++++++++++-------
 .../cugraph/tests/test_property_graph.py      | 13 ++++
 2 files changed, 61 insertions(+), 26 deletions(-)

diff --git a/python/cugraph/cugraph/structure/property_graph.py b/python/cugraph/cugraph/structure/property_graph.py
index de941bb940b..14c6b14d299 100644
--- a/python/cugraph/cugraph/structure/property_graph.py
+++ b/python/cugraph/cugraph/structure/property_graph.py
@@ -64,6 +64,7 @@ class EXPERIMENTAL__PropertyGraph:
     type_col_name = "_TYPE_"
     edge_id_col_name = "_EDGE_ID_"
     vertex_id_col_name = "_VERTEX_ID_"
+    weight_col_name = "_WEIGHT_"
 
     def __init__(self):
         # The dataframe containing the properties for each vertex.
@@ -605,7 +606,7 @@ def extract_subgraph(self,
             selected_vertex_dataframe = \
                 self.__vertex_prop_dataframe[selection.vertex_selections]
         else:
-            selected_vertex_dataframe = self.__vertex_prop_dataframe
+            selected_vertex_dataframe = None
 
         if (selection is not None) and \
            (selection.edge_selections is not None):
@@ -629,25 +630,6 @@ def extract_subgraph(self,
         else:
             edges = selected_edge_dataframe
 
-        if edge_weight_property:
-            if edge_weight_property not in edges.columns:
-                raise ValueError("edge_weight_property "
-                                 f'"{edge_weight_property}" was not found in '
-                                 "the properties of the subgraph")
-
-            # Ensure a valid edge_weight_property can be used for applying
-            # weights to the subgraph, and if a default_edge_weight was
-            # specified, apply it to all NAs in the weight column.
-            prop_col = edges[edge_weight_property]
-            if prop_col.count() != prop_col.size:
-                if default_edge_weight is None:
-                    raise ValueError("edge_weight_property "
-                                     f'"{edge_weight_property}" '
-                                     "contains NA values in the subgraph and "
-                                     "default_edge_weight is not set")
-                else:
-                    prop_col.fillna(default_edge_weight, inplace=True)
-
         # The __*_prop_dataframes have likely been merged several times and
         # possibly had their dtypes converted in order to accommodate NaN
         # values. Restore the original dtypes in the resulting edges df prior
@@ -658,6 +640,7 @@ def extract_subgraph(self,
             edges,
             create_using=create_using,
             edge_weight_property=edge_weight_property,
+            default_edge_weight=default_edge_weight,
             allow_multi_edges=allow_multi_edges)
 
     def annotate_dataframe(self, df, G, edge_vertex_id_columns):
@@ -729,15 +712,42 @@ def edge_props_to_graph(self,
                             edge_prop_df,
                             create_using,
                             edge_weight_property=None,
+                            default_edge_weight=None,
                             allow_multi_edges=False):
         """
         Create and return a Graph from the edges in edge_prop_df.
         """
-        if edge_weight_property and \
-           (edge_weight_property not in edge_prop_df.columns):
-            raise ValueError("edge_weight_property "
-                             f'"{edge_weight_property}" was not found in '
-                             "edge_prop_df")
+        # FIXME: check default_edge_weight is valid
+
+        if edge_weight_property:
+            if edge_weight_property not in edge_prop_df.columns:
+                raise ValueError("edge_weight_property "
+                                 f'"{edge_weight_property}" was not found in '
+                                 "edge_prop_df")
+
+            # Ensure a valid edge_weight_property can be used for applying
+            # weights to the subgraph, and if a default_edge_weight was
+            # specified, apply it to all NAs in the weight column.
+            prop_col = edge_prop_df[edge_weight_property]
+            if prop_col.count() != prop_col.size:
+                if default_edge_weight is None:
+                    raise ValueError("edge_weight_property "
+                                     f'"{edge_weight_property}" '
+                                     "contains NA values in the subgraph and "
+                                     "default_edge_weight is not set")
+                else:
+                    prop_col.fillna(default_edge_weight, inplace=True)
+            edge_attr = edge_weight_property
+
+        # If a default_edge_weight was specified but an edge_weight_property was
+        # not, a new edge weight column must be added.
+        elif default_edge_weight:
+            edge_attr = self.__gen_unique_name(edge_prop_df.columns,
+                                               prefix=self.weight_col_name)
+            edge_prop_df[edge_attr] = default_edge_weight
+
+        else:
+            edge_attr = None
 
         # Set up the new Graph to return
         if isinstance(create_using, cugraph.Graph):
@@ -771,7 +781,7 @@ def edge_props_to_graph(self,
 
         create_args = {"source": self.src_col_name,
                        "destination": self.dst_col_name,
-                       "edge_attr": edge_weight_property,
+                       "edge_attr": edge_attr,
                        "renumber": True,
                        }
         if type(edge_prop_df) is cudf.DataFrame:
@@ -851,6 +861,18 @@ def __get_all_vertices_series(self):
             vert_sers.append(epd[self.dst_col_name])
         return vert_sers
 
+    @staticmethod
+    def __gen_unique_name(current_names, prefix="col"):
+        """
+        Helper function to generate a currently unused name.
+        """
+        name = prefix
+        counter = 2
+        while name in current_names:
+            name = f"{prefix}{counter}"
+            counter += 1
+        return name
+
     @staticmethod
     def __get_new_column_dtypes(from_df, to_df):
         """
diff --git a/python/cugraph/cugraph/tests/test_property_graph.py b/python/cugraph/cugraph/tests/test_property_graph.py
index 8cced223bf7..e71cffdf825 100644
--- a/python/cugraph/cugraph/tests/test_property_graph.py
+++ b/python/cugraph/cugraph/tests/test_property_graph.py
@@ -856,6 +856,19 @@ def test_extract_subgraph_default_edge_weight(dataset1_PropertyGraph):
     assert_frame_equal(expected_edgelist, actual_edgelist, check_like=True)
 
 
+def test_extract_subgraph_default_edge_weight_no_property(
+        dataset1_PropertyGraph):
+    """
+    Ensure default_edge_weight can be used to provide an edge value when a
+    property for the edge weight is not specified.
+    """
+    pG = dataset1_PropertyGraph
+    edge_weight = 99.2
+    G = pG.extract_subgraph(allow_multi_edges=True,
+                            default_edge_weight=edge_weight)
+    assert (G.edgelist.edgelist_df["weights"] == edge_weight).all()
+
+
 def test_graph_edge_data_added(dataset1_PropertyGraph):
     """
     Ensures the subgraph returned from extract_subgraph() has the edge_data

From f031fc8a79232f6dab70f4143e7356c2b9c46433 Mon Sep 17 00:00:00 2001
From: Rick Ratzel <rratzel@nvidia.com>
Date: Mon, 14 Feb 2022 11:22:23 -0600
Subject: [PATCH 6/6] flake8 fixes, changed arg name to better indicate that
 names should be passed in and not column/series objects.

---
 .../cugraph/structure/property_graph.py       | 46 +++++------
 .../cugraph/cugraph/tests/test_graph_store.py |  8 +-
 .../cugraph/tests/test_property_graph.py      | 76 +++++++++----------
 3 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/python/cugraph/cugraph/structure/property_graph.py b/python/cugraph/cugraph/structure/property_graph.py
index 14c6b14d299..4169537efe8 100644
--- a/python/cugraph/cugraph/structure/property_graph.py
+++ b/python/cugraph/cugraph/structure/property_graph.py
@@ -216,7 +216,7 @@ def vertices_ids(self):
 
     def add_vertex_data(self,
                         dataframe,
-                        vertex_id_column,
+                        vertex_col_name,
                         type_name=None,
                         property_columns=None
                         ):
@@ -228,7 +228,7 @@ def add_vertex_data(self,
         dataframe : DataFrame-compatible instance
             A DataFrame instance with a compatible Pandas-like DataFrame
             interface.
-        vertex_id_column : string
+        vertex_col_name : string
             The column name that contains the values to be used as vertex IDs.
         type_name : string
             The name to be assigned to the type of property being added. For
@@ -251,8 +251,8 @@ def add_vertex_data(self,
         if type(dataframe) not in _dataframe_types:
             raise TypeError("dataframe must be one of the following types: "
                             f"{_dataframe_types}, got: {type(dataframe)}")
-        if vertex_id_column not in dataframe.columns:
-            raise ValueError(f"{vertex_id_column} is not a column in "
+        if vertex_col_name not in dataframe.columns:
+            raise ValueError(f"{vertex_col_name} is not a column in "
                              f"dataframe: {dataframe.columns}")
         if (type_name is not None) and not(isinstance(type_name, str)):
             raise TypeError("type_name must be a string, got: "
@@ -294,7 +294,7 @@ def add_vertex_data(self,
             # https://github.com/rapidsai/cudf/issues/9981)
             self.__update_dataframe_dtypes(
                 self.__vertex_prop_dataframe,
-                {self.vertex_col_name: dataframe[vertex_id_column].dtype})
+                {self.vertex_col_name: dataframe[vertex_col_name].dtype})
 
         # Ensure that both the predetermined vertex ID column name and vertex
         # type column name are present for proper merging.
@@ -303,7 +303,7 @@ def add_vertex_data(self,
         # columns. The copied DataFrame is then merged (another copy) and then
         # deleted when out-of-scope.
         tmp_df = dataframe.copy(deep=True)
-        tmp_df[self.vertex_col_name] = tmp_df[vertex_id_column]
+        tmp_df[self.vertex_col_name] = tmp_df[vertex_col_name]
         # FIXME: handle case of a type_name column already being in tmp_df
         tmp_df[self.type_col_name] = type_name
 
@@ -332,7 +332,7 @@ def add_vertex_data(self,
 
     def add_edge_data(self,
                       dataframe,
-                      vertex_id_columns,
+                      vertex_col_names,
                       type_name=None,
                       property_columns=None
                       ):
@@ -344,7 +344,7 @@ def add_edge_data(self,
         dataframe : DataFrame-compatible instance
             A DataFrame instance with a compatible Pandas-like DataFrame
             interface.
-        vertex_id_columns : list of strings
+        vertex_col_names : list of strings
             The column names that contain the values to be used as the source
             and destination vertex IDs for the edges.
         type_name : string
@@ -368,12 +368,12 @@ def add_edge_data(self,
         if type(dataframe) not in _dataframe_types:
             raise TypeError("dataframe must be one of the following types: "
                             f"{_dataframe_types}, got: {type(dataframe)}")
-        if type(vertex_id_columns) not in [list, tuple]:
-            raise TypeError("vertex_id_columns must be a list or tuple, got: "
-                            f"{type(vertex_id_columns)}")
-        invalid_columns = set(vertex_id_columns).difference(dataframe.columns)
+        if type(vertex_col_names) not in [list, tuple]:
+            raise TypeError("vertex_col_names must be a list or tuple, got: "
+                            f"{type(vertex_col_names)}")
+        invalid_columns = set(vertex_col_names).difference(dataframe.columns)
         if invalid_columns:
-            raise ValueError("vertex_id_columns contains column(s) not found "
+            raise ValueError("vertex_col_names contains column(s) not found "
                              f"in dataframe: {list(invalid_columns)}")
         if (type_name is not None) and not(isinstance(type_name, str)):
             raise TypeError("type_name must be a string, got: "
@@ -416,16 +416,16 @@ def add_edge_data(self,
             # https://github.com/rapidsai/cudf/issues/9981)
             self.__update_dataframe_dtypes(
                 self.__edge_prop_dataframe,
-                {self.src_col_name: dataframe[vertex_id_columns[0]].dtype,
-                 self.dst_col_name: dataframe[vertex_id_columns[1]].dtype,
+                {self.src_col_name: dataframe[vertex_col_names[0]].dtype,
+                 self.dst_col_name: dataframe[vertex_col_names[1]].dtype,
                  self.edge_id_col_name: "Int64"})
 
         # NOTE: This copies the incoming DataFrame in order to add the new
         # columns. The copied DataFrame is then merged (another copy) and then
         # deleted when out-of-scope.
         tmp_df = dataframe.copy(deep=True)
-        tmp_df[self.src_col_name] = tmp_df[vertex_id_columns[0]]
-        tmp_df[self.dst_col_name] = tmp_df[vertex_id_columns[1]]
+        tmp_df[self.src_col_name] = tmp_df[vertex_col_names[0]]
+        tmp_df[self.dst_col_name] = tmp_df[vertex_col_names[1]]
         # FIXME: handle case of a type_name column already being in tmp_df
         tmp_df[self.type_col_name] = type_name
 
@@ -643,21 +643,21 @@ def extract_subgraph(self,
             default_edge_weight=default_edge_weight,
             allow_multi_edges=allow_multi_edges)
 
-    def annotate_dataframe(self, df, G, edge_vertex_id_columns):
+    def annotate_dataframe(self, df, G, edge_vertex_col_names):
         """
         Add properties to df that represent the vertices and edges in graph G.
 
         Parameters
         ----------
         df : cudf.DataFrame or pandas.DataFrame
-            A DataFrame containing edges identified by edge_vertex_id_columns
+            A DataFrame containing edges identified by edge_vertex_col_names
             which will have properties for those edges added to it.
         G : cugraph.Graph (or subclass of) instance.
             Graph containing the edges specified in df. The Graph instance must
             have been generated from a prior call to extract_subgraph() in
             order to have the edge meta-data used to look up the correct
             properties.
-        edge_vertex_id_columns : tuple of strings
+        edge_vertex_col_names : tuple of strings
             The column names in df that represent the source and destination
             vertices, used for identifying edges.
 
@@ -672,7 +672,7 @@ def annotate_dataframe(self, df, G, edge_vertex_id_columns):
         >>>
         """
         # FIXME: check all args
-        (src_col_name, dst_col_name) = edge_vertex_id_columns
+        (src_col_name, dst_col_name) = edge_vertex_col_names
 
         df_type = type(df)
         if df_type is not self.__dataframe_type:
@@ -739,8 +739,8 @@ def edge_props_to_graph(self,
                     prop_col.fillna(default_edge_weight, inplace=True)
             edge_attr = edge_weight_property
 
-        # If a default_edge_weight was specified but an edge_weight_property was
-        # not, a new edge weight column must be added.
+        # If a default_edge_weight was specified but an edge_weight_property
+        # was not, a new edge weight column must be added.
         elif default_edge_weight:
             edge_attr = self.__gen_unique_name(edge_prop_df.columns,
                                                prefix=self.weight_col_name)
diff --git a/python/cugraph/cugraph/tests/test_graph_store.py b/python/cugraph/cugraph/tests/test_graph_store.py
index 25790662119..5f783c3baa8 100644
--- a/python/cugraph/cugraph/tests/test_graph_store.py
+++ b/python/cugraph/cugraph/tests/test_graph_store.py
@@ -50,7 +50,7 @@ def test_using_pgraph(graph_file):
     pG = PropertyGraph()
     pG.add_edge_data(cu_M,
                      type_name="edge",
-                     vertex_id_columns=("0", "1"),
+                     vertex_col_names=("0", "1"),
                      property_columns=None)
 
     gstore = cugraph.gnn.CuGraphStore(graph=pG)
@@ -70,7 +70,7 @@ def test_node_data_pg(graph_file):
         pG = PropertyGraph()
         pG.add_edge_data(cu_M,
                          type_name="edge",
-                         vertex_id_columns=("0", "1"),
+                         vertex_col_names=("0", "1"),
                          property_columns=None)
 
         gstore = cugraph.gnn.CuGraphStore(graph=pG)
@@ -91,7 +91,7 @@ def test_egonet(graph_file):
     pG = PropertyGraph()
     pG.add_edge_data(cu_M,
                      type_name="edge",
-                     vertex_id_columns=("0", "1"),
+                     vertex_col_names=("0", "1"),
                      property_columns=None)
 
     gstore = cugraph.gnn.CuGraphStore(graph=pG)
@@ -117,7 +117,7 @@ def test_workflow(graph_file):
     pg = PropertyGraph()
     pg.add_edge_data(cu_M,
                      type_name="edge",
-                     vertex_id_columns=("0", "1"),
+                     vertex_col_names=("0", "1"),
                      property_columns=["2"])
 
     gstore = cugraph.gnn.CuGraphStore(graph=pg)
diff --git a/python/cugraph/cugraph/tests/test_property_graph.py b/python/cugraph/cugraph/tests/test_property_graph.py
index e71cffdf825..6643e3c3c46 100644
--- a/python/cugraph/cugraph/tests/test_property_graph.py
+++ b/python/cugraph/cugraph/tests/test_property_graph.py
@@ -158,39 +158,39 @@ def dataset1_PropertyGraph(request):
     # relationships, and referrals.
 
     # property_columns=None (the default) means all columns except
-    # vertex_id_column will be used as properties for the vertices/edges.
+    # vertex_col_name will be used as properties for the vertices/edges.
 
     pG.add_vertex_data(dataframe_type(columns=merchants[0],
                                       data=merchants[1]),
                        type_name="merchants",
-                       vertex_id_column="merchant_id",
+                       vertex_col_name="merchant_id",
                        property_columns=None)
     pG.add_vertex_data(dataframe_type(columns=users[0],
                                       data=users[1]),
                        type_name="users",
-                       vertex_id_column="user_id",
+                       vertex_col_name="user_id",
                        property_columns=None)
     pG.add_vertex_data(dataframe_type(columns=taxpayers[0],
                                       data=taxpayers[1]),
                        type_name="taxpayers",
-                       vertex_id_column="payer_id",
+                       vertex_col_name="payer_id",
                        property_columns=None)
 
     pG.add_edge_data(dataframe_type(columns=transactions[0],
                                     data=transactions[1]),
                      type_name="transactions",
-                     vertex_id_columns=("user_id", "merchant_id"),
+                     vertex_col_names=("user_id", "merchant_id"),
                      property_columns=None)
     pG.add_edge_data(dataframe_type(columns=relationships[0],
                                     data=relationships[1]),
                      type_name="relationships",
-                     vertex_id_columns=("user_id_1", "user_id_2"),
+                     vertex_col_names=("user_id_1", "user_id_2"),
                      property_columns=None)
     pG.add_edge_data(dataframe_type(columns=referrals[0],
                                     data=referrals[1]),
                      type_name="referrals",
-                     vertex_id_columns=("user_id_1",
-                                        "user_id_2"),
+                     vertex_col_names=("user_id_1",
+                                       "user_id_2"),
                      property_columns=None)
 
     return pG
@@ -278,7 +278,7 @@ def test_add_vertex_data(df_type):
     pG = PropertyGraph()
     pG.add_vertex_data(merchants_df,
                        type_name="merchants",
-                       vertex_id_column="merchant_id",
+                       vertex_col_name="merchant_id",
                        property_columns=None)
 
     assert pG.num_vertices == 5
@@ -301,7 +301,7 @@ def test_num_vertices(df_type):
     pG = PropertyGraph()
     pG.add_vertex_data(merchants_df,
                        type_name="merchants",
-                       vertex_id_column="merchant_id",
+                       vertex_col_name="merchant_id",
                        property_columns=None)
 
     # Test caching - the second retrieval should always be faster
@@ -320,7 +320,7 @@ def test_num_vertices(df_type):
 
     pG.add_vertex_data(users_df,
                        type_name="users",
-                       vertex_id_column="user_id",
+                       vertex_col_name="user_id",
                        property_columns=None)
 
     assert pG.num_vertices == 9
@@ -334,7 +334,7 @@ def test_num_vertices(df_type):
 
     pG.add_vertex_data(taxpayers_df,
                        type_name="taxpayers",
-                       vertex_id_column="payer_id",
+                       vertex_col_name="payer_id",
                        property_columns=None)
 
     assert pG.num_vertices == 9
@@ -370,7 +370,7 @@ def test_add_vertex_data_prop_columns(df_type):
     pG = PropertyGraph()
     pG.add_vertex_data(merchants_df,
                        type_name="merchants",
-                       vertex_id_column="merchant_id",
+                       vertex_col_name="merchant_id",
                        property_columns=expected_props)
 
     assert pG.num_vertices == 5
@@ -393,28 +393,28 @@ def test_add_vertex_data_bad_args():
     with pytest.raises(TypeError):
         pG.add_vertex_data(42,
                            type_name="merchants",
-                           vertex_id_column="merchant_id",
+                           vertex_col_name="merchant_id",
                            property_columns=None)
     with pytest.raises(TypeError):
         pG.add_vertex_data(merchants_df,
                            type_name=42,
-                           vertex_id_column="merchant_id",
+                           vertex_col_name="merchant_id",
                            property_columns=None)
     with pytest.raises(ValueError):
         pG.add_vertex_data(merchants_df,
                            type_name="merchants",
-                           vertex_id_column="bad_column_name",
+                           vertex_col_name="bad_column_name",
                            property_columns=None)
     with pytest.raises(ValueError):
         pG.add_vertex_data(merchants_df,
                            type_name="merchants",
-                           vertex_id_column="merchant_id",
+                           vertex_col_name="merchant_id",
                            property_columns=["bad_column_name",
                                              "merchant_name"])
     with pytest.raises(TypeError):
         pG.add_vertex_data(merchants_df,
                            type_name="merchants",
-                           vertex_id_column="merchant_id",
+                           vertex_col_name="merchant_id",
                            property_columns="merchant_name")
 
 
@@ -432,7 +432,7 @@ def test_add_edge_data(df_type):
     pG = PropertyGraph()
     pG.add_edge_data(transactions_df,
                      type_name="transactions",
-                     vertex_id_columns=("user_id", "merchant_id"),
+                     vertex_col_names=("user_id", "merchant_id"),
                      property_columns=None)
 
     assert pG.num_vertices == 7
@@ -457,7 +457,7 @@ def test_add_edge_data_prop_columns(df_type):
     pG = PropertyGraph()
     pG.add_edge_data(transactions_df,
                      type_name="transactions",
-                     vertex_id_columns=("user_id", "merchant_id"),
+                     vertex_col_names=("user_id", "merchant_id"),
                      property_columns=expected_props)
 
     assert pG.num_vertices == 7
@@ -480,27 +480,27 @@ def test_add_edge_data_bad_args():
     with pytest.raises(TypeError):
         pG.add_edge_data(42,
                          type_name="transactions",
-                         vertex_id_columns=("user_id", "merchant_id"),
+                         vertex_col_names=("user_id", "merchant_id"),
                          property_columns=None)
     with pytest.raises(TypeError):
         pG.add_edge_data(transactions_df,
                          type_name=42,
-                         vertex_id_columns=("user_id", "merchant_id"),
+                         vertex_col_names=("user_id", "merchant_id"),
                          property_columns=None)
     with pytest.raises(ValueError):
         pG.add_edge_data(transactions_df,
                          type_name="transactions",
-                         vertex_id_columns=("user_id", "bad_column"),
+                         vertex_col_names=("user_id", "bad_column"),
                          property_columns=None)
     with pytest.raises(ValueError):
         pG.add_edge_data(transactions_df,
                          type_name="transactions",
-                         vertex_id_columns=("user_id", "merchant_id"),
+                         vertex_col_names=("user_id", "merchant_id"),
                          property_columns=["bad_column_name", "time"])
     with pytest.raises(TypeError):
         pG.add_edge_data(transactions_df,
                          type_name="transactions",
-                         vertex_id_columns=("user_id", "merchant_id"),
+                         vertex_col_names=("user_id", "merchant_id"),
                          property_columns="time")
 
 
@@ -707,12 +707,12 @@ def test_extract_subgraph_graph_without_vert_props():
     pG.add_edge_data(cudf.DataFrame(columns=transactions[0],
                                     data=transactions[1]),
                      type_name="transactions",
-                     vertex_id_columns=("user_id", "merchant_id"),
+                     vertex_col_names=("user_id", "merchant_id"),
                      property_columns=None)
     pG.add_edge_data(cudf.DataFrame(columns=relationships[0],
                                     data=relationships[1]),
                      type_name="relationships",
-                     vertex_id_columns=("user_id_1", "user_id_2"),
+                     vertex_col_names=("user_id_1", "user_id_2"),
                      property_columns=None)
 
     scn = PropertyGraph.src_col_name
@@ -925,7 +925,7 @@ def test_annotate_dataframe(dataset1_PropertyGraph):
                                 inplace=True, ignore_index=True)
 
     new_algo_result = pG.annotate_dataframe(
-        algo_result, G, edge_vertex_id_columns=("from", "to"))
+        algo_result, G, edge_vertex_col_names=("from", "to"))
     expected_algo_result = df_type({"from": srcs, "to": dsts,
                                     "result": range(len(srcs)),
                                     "merchant_id": mids,
@@ -961,31 +961,31 @@ def test_different_vertex_edge_input_dataframe_types():
     from cugraph.experimental import PropertyGraph
 
     pG = PropertyGraph()
-    pG.add_vertex_data(df, type_name="foo", vertex_id_column="a")
+    pG.add_vertex_data(df, type_name="foo", vertex_col_name="a")
     with pytest.raises(TypeError):
-        pG.add_edge_data(pdf, type_name="bar", vertex_id_columns=("a", "b"))
+        pG.add_edge_data(pdf, type_name="bar", vertex_col_names=("a", "b"))
 
     pG = PropertyGraph()
-    pG.add_vertex_data(pdf, type_name="foo", vertex_id_column="a")
+    pG.add_vertex_data(pdf, type_name="foo", vertex_col_name="a")
     with pytest.raises(TypeError):
-        pG.add_edge_data(df, type_name="bar", vertex_id_columns=("a", "b"))
+        pG.add_edge_data(df, type_name="bar", vertex_col_names=("a", "b"))
 
     # Different order
     pG = PropertyGraph()
-    pG.add_edge_data(df, type_name="bar", vertex_id_columns=("a", "b"))
+    pG.add_edge_data(df, type_name="bar", vertex_col_names=("a", "b"))
     with pytest.raises(TypeError):
-        pG.add_vertex_data(pdf, type_name="foo", vertex_id_column="a")
+        pG.add_vertex_data(pdf, type_name="foo", vertex_col_name="a")
 
     # Same API call, different types
     pG = PropertyGraph()
-    pG.add_vertex_data(df, type_name="foo", vertex_id_column="a")
+    pG.add_vertex_data(df, type_name="foo", vertex_col_name="a")
     with pytest.raises(TypeError):
-        pG.add_vertex_data(pdf, type_name="foo", vertex_id_column="a")
+        pG.add_vertex_data(pdf, type_name="foo", vertex_col_name="a")
 
     pG = PropertyGraph()
-    pG.add_edge_data(df, type_name="bar", vertex_id_columns=("a", "b"))
+    pG.add_edge_data(df, type_name="bar", vertex_col_names=("a", "b"))
     with pytest.raises(TypeError):
-        pG.add_edge_data(pdf, type_name="bar", vertex_id_columns=("a", "b"))
+        pG.add_edge_data(pdf, type_name="bar", vertex_col_names=("a", "b"))
 
 
 def test_get_vertices(dataset1_PropertyGraph):