rapidsai · rapids-bot · Jun 6, 2024 · Jun 3, 2024 · Jun 3, 2024 · Jun 3, 2024
@@ -20,7 +20,7 @@ Graph Neural Networks API Documentation
 
 .. toctree::
     :maxdepth: 3
-    :caption: Graph Nerual Networks API Documentation
+    :caption: Graph Neural Networks API Documentation
 
     cugraph-dgl/cugraph_dgl.rst
     cugraph-pyg/cugraph_pyg.rst

@@ -201,7 +201,11 @@ def cast(cls, *args, **kwargs):
 
 class DaskGraphStore:
     """
-    Duck-typed version of PyG's GraphStore and FeatureStore.
+    Duck-typed version of PyG's GraphStore and FeatureStore that uses
+    Dask to distribute the graph structure across GPUs and a
+    cugraph.gnn.FeatureStore to store node/edge features.  Supports
+    single-node/single-GPU, single-node/multi-GPU, and multi-node/multi-GPU
+    configurations.  Supports both homogeneous and heterogeneous graphs.
     """
 
     # TODO allow (and possibly require) separate stores for node, edge attrs

@@ -36,6 +36,9 @@ class TensorDictFeatureStore(
     """
 
     def __init__(self):
+        """
+        Constructs an empty TensorDictFeatureStore.
+        """
         super().__init__()
 
         self.__features = {}
@@ -141,15 +144,24 @@ class WholeFeatureStore(
     distributed, and avoids data replication across workers.
 
     Data should be sliced before being passed into this feature store.
-    That means each worker should have its own partition.
+    That means each worker should have its own partition and put_tensor
+    should be called for each worker's local partition.  When calling
+    get_tensor, multi_get_tensor, etc., the entire tensor can be accessed
+    regardless of what worker's partition the desired slice of the tensor
+    is on.
     """
 
     def __init__(self, memory_type="distributed", location="cpu"):
         """
+        Constructs an empty WholeFeatureStore.
+
         Parameters
         ----------
         memory_type: str (optional, default='distributed')
-            The memory type of this store.
+            The memory type of this store.  Options are
+            'distributed', 'chunked', and 'continuous'.
+            For more information consult the WholeGraph
+            documentation.
         location: str(optional, default='cpu')
             The location ('cpu' or 'cuda') where data is stored.
         """

@@ -39,12 +39,23 @@ class GraphStore(
     else torch_geometric.data.GraphStore
 ):
     """
-    This object uses lazy graph creation.  Users can repeatedly call
-    put_edge_index, and the tensors won't be converted into a cuGraph
-    graph until one is needed (i.e. when creating a loader).
+    cuGraph-backed PyG GraphStore implementation that distributes
+    the graph across workers.  This object uses lazy graph creation.
+    Users can repeatedly call put_edge_index, and the tensors won't
+    be converted into a cuGraph graph until one is needed
+    (i.e. when creating a loader). Supports
+    single-node/single-GPU, single-node/multi-GPU, and
+    multi-node/multi-GPU graph storage.
+
+    Each worker should have a slice of the graph locally, and
+    call put_edge_index with its slice.
     """
 
     def __init__(self, is_multi_gpu: bool = False):
+        """
+        Constructs a new, empty GraphStore object.  This object
+        represents one slice of a graph on particular worker.
+        """
         self.__edge_indices = tensordict.TensorDict({}, batch_size=(2,))
         self.__sizes = {}
         self.__graph = None

@@ -42,6 +42,10 @@
 
 
 class BulkSampleLoader:
+    """
+    Iterator that executes sampling using Dask and cuGraph and
+    loads sampled minibatches from disk.
+    """
 
     __ex_parquet_file = re.compile(r"batch=([0-9]+)\-([0-9]+)\.parquet")
 
@@ -488,6 +492,11 @@ def __iter__(self):
 
 
 class DaskNeighborLoader:
+    """
+    Duck-typed version of the PyG NeighborLoader interface that uses
+    Dask to sample nodes using the uniform neighbor sampling algorithm.
+    """
+
     def __init__(
         self,
         data: Union[DaskGraphStore, Tuple[DaskGraphStore, DaskGraphStore]],
@@ -496,6 +505,8 @@ def __init__(
         **kwargs,
     ):
         """
+        Constructs a new DaskNeighborLoader object.
+
         Parameters
         ----------
         data: DaskGraphStore or (DaskGraphStore, DaskGraphStore)

@@ -28,10 +28,10 @@
 
 class NeighborLoader(NodeLoader):
     """
+    Duck-typed version of torch_geometric.loader.NeighborLoader
+
     Node loader that implements the neighbor sampling
     algorithm used in GraphSAGE.
-
-    Duck-typed version of torch_geometric.loader.NeighborLoader
     """
 
     def __init__(

@@ -24,7 +24,10 @@
 
 class NodeLoader:
     """
-    Duck-typed version of torch_geometric.loader.NodeLoader
+    Duck-typed version of torch_geometric.loader.NodeLoader.
+    Loads samples from batches of input nodes using a
+    `~cugraph_pyg.sampler.BaseSampler.sample_from_nodes`
+    function.
     """
 
     def __init__(

@@ -21,6 +21,12 @@
 
 
 class SampleIterator:
+    """
+    Iterator that combines output graphs with their
+    features to produce final output minibatches
+    that can be fed into a GNN model.
+    """
+
     def __init__(
         self,
         data: Tuple[
@@ -33,6 +39,18 @@ def __init__(
             ]
         ],
     ):
+        """
+        Constructs a new SampleIterator
+
+        Parameters
+        ----------
+        data: Tuple[torch_geometric.data.FeatureStore, torch_geometric.data.GraphStore]
+            The original graph that samples were generated from, as a
+            FeatureStore/GraphStore tuple.
+        output_iter: Iterator[Union["torch_geometric.sampler.HeteroSamplerOutput",
+        "torch_geometric.sampler.SamplerOutput"]]
+            An iterator over outputted sampling results.
+        """
         self.__feature_store, self.__graph_store = data
         self.__output_iter = output_iter
 
@@ -114,7 +132,20 @@ def __iter__(self):
 
 
 class SampleReader:
+    """
+    Iterator that processes results from the cuGraph distributed sampler.
+    """
+
     def __init__(self, base_reader: DistSampleReader):
+        """
+        Constructs a new SampleReader.
+
+        Parameters
+        ----------
+        base_reader: DistSampleReader
+            The reader responsible for loading saved samples produced by
+            the cuGraph distributed sampler.
+        """
         self.__base_reader = base_reader
         self.__num_samples_remaining = 0
         self.__index = 0
@@ -150,7 +181,21 @@ def __iter__(self):
 
 
 class HomogeneousSampleReader(SampleReader):
+    """
+    Subclass of SampleReader that reads homogeneous output samples
+    produced by the cuGraph distributed sampler.
+    """
+
     def __init__(self, base_reader: DistSampleReader):
+        """
+        Constructs a new HomogeneousSampleReader
+
+        Parameters
+        ----------
+        base_reader: DistSampleReader
+            The reader responsible for loading saved samples produced by
+            the cuGraph distributed sampler.
+        """
         super().__init__(base_reader)
 
     def __decode_csc(self, raw_sample_data: Dict[str, "torch.Tensor"], index: int):