Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DOC] Minor Improvements to cuGraph-PyG Documentation #4460

Merged
merged 8 commits into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/cugraph/source/api_docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Graph Neural Networks API Documentation

.. toctree::
:maxdepth: 3
:caption: Graph Nerual Networks API Documentation
:caption: Graph Neural Networks API Documentation

cugraph-dgl/cugraph_dgl.rst
cugraph-pyg/cugraph_pyg.rst
Expand Down
6 changes: 5 additions & 1 deletion python/cugraph-pyg/cugraph_pyg/data/dask_graph_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,11 @@ def cast(cls, *args, **kwargs):

class DaskGraphStore:
"""
Duck-typed version of PyG's GraphStore and FeatureStore.
Duck-typed version of PyG's GraphStore and FeatureStore that uses
Dask to distribute the graph structure across GPUs and a
cugraph.gnn.FeatureStore to store node/edge features. Supports
single-node/single-GPU, single-node/multi-GPU, and multi-node/multi-GPU
configurations. Supports both homogeneous and heterogeneous graphs.
"""

# TODO allow (and possibly require) separate stores for node, edge attrs
Expand Down
16 changes: 14 additions & 2 deletions python/cugraph-pyg/cugraph_pyg/data/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ class TensorDictFeatureStore(
"""

def __init__(self):
"""
Constructs an empty TensorDictFeatureStore.
"""
super().__init__()

self.__features = {}
Expand Down Expand Up @@ -141,15 +144,24 @@ class WholeFeatureStore(
distributed, and avoids data replication across workers.

Data should be sliced before being passed into this feature store.
That means each worker should have its own partition.
That means each worker should have its own partition and put_tensor
should be called for each worker's local partition. When calling
get_tensor, multi_get_tensor, etc., the entire tensor can be accessed
regardless of what worker's partition the desired slice of the tensor
is on.
"""

def __init__(self, memory_type="distributed", location="cpu"):
"""
Constructs an empty WholeFeatureStore.

Parameters
----------
memory_type: str (optional, default='distributed')
The memory type of this store.
The memory type of this store. Options are
'distributed', 'chunked', and 'continuous'.
For more information consult the WholeGraph
documentation.
location: str(optional, default='cpu')
The location ('cpu' or 'cuda') where data is stored.
"""
Expand Down
17 changes: 14 additions & 3 deletions python/cugraph-pyg/cugraph_pyg/data/graph_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,23 @@ class GraphStore(
else torch_geometric.data.GraphStore
):
"""
This object uses lazy graph creation. Users can repeatedly call
put_edge_index, and the tensors won't be converted into a cuGraph
graph until one is needed (i.e. when creating a loader).
cuGraph-backed PyG GraphStore implementation that distributes
the graph across workers. This object uses lazy graph creation.
Users can repeatedly call put_edge_index, and the tensors won't
be converted into a cuGraph graph until one is needed
(i.e. when creating a loader). Supports
single-node/single-GPU, single-node/multi-GPU, and
multi-node/multi-GPU graph storage.

Each worker should have a slice of the graph locally, and
call put_edge_index with its slice.
"""

def __init__(self, is_multi_gpu: bool = False):
"""
Constructs a new, empty GraphStore object. This object
represents one slice of a graph on particular worker.
"""
self.__edge_indices = tensordict.TensorDict({}, batch_size=(2,))
self.__sizes = {}
self.__graph = None
Expand Down
11 changes: 11 additions & 0 deletions python/cugraph-pyg/cugraph_pyg/loader/dask_node_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@


class BulkSampleLoader:
"""
Iterator that executes sampling using Dask and cuGraph and
loads sampled minibatches from disk.
"""

__ex_parquet_file = re.compile(r"batch=([0-9]+)\-([0-9]+)\.parquet")

Expand Down Expand Up @@ -488,6 +492,11 @@ def __iter__(self):


class DaskNeighborLoader:
"""
Duck-typed version of the PyG NeighborLoader interface that uses
Dask to sample nodes using the uniform neighbor sampling algorithm.
"""

def __init__(
self,
data: Union[DaskGraphStore, Tuple[DaskGraphStore, DaskGraphStore]],
Expand All @@ -496,6 +505,8 @@ def __init__(
**kwargs,
):
"""
Constructs a new DaskNeighborLoader object.

Parameters
----------
data: DaskGraphStore or (DaskGraphStore, DaskGraphStore)
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph-pyg/cugraph_pyg/loader/neighbor_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@

class NeighborLoader(NodeLoader):
"""
Duck-typed version of torch_geometric.loader.NeighborLoader

Node loader that implements the neighbor sampling
algorithm used in GraphSAGE.

Duck-typed version of torch_geometric.loader.NeighborLoader
"""

def __init__(
Expand Down
5 changes: 4 additions & 1 deletion python/cugraph-pyg/cugraph_pyg/loader/node_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@

class NodeLoader:
"""
Duck-typed version of torch_geometric.loader.NodeLoader
Duck-typed version of torch_geometric.loader.NodeLoader.
Loads samples from batches of input nodes using a
`~cugraph_pyg.sampler.BaseSampler.sample_from_nodes`
function.
"""

def __init__(
Expand Down
45 changes: 45 additions & 0 deletions python/cugraph-pyg/cugraph_pyg/sampler/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@


class SampleIterator:
"""
Iterator that combines output graphs with their
features to produce final output minibatches
that can be fed into a GNN model.
"""

def __init__(
self,
data: Tuple[
Expand All @@ -33,6 +39,18 @@ def __init__(
]
],
):
"""
Constructs a new SampleIterator

Parameters
----------
data: Tuple[torch_geometric.data.FeatureStore, torch_geometric.data.GraphStore]
The original graph that samples were generated from, as a
FeatureStore/GraphStore tuple.
output_iter: Iterator[Union["torch_geometric.sampler.HeteroSamplerOutput",
"torch_geometric.sampler.SamplerOutput"]]
An iterator over outputted sampling results.
"""
self.__feature_store, self.__graph_store = data
self.__output_iter = output_iter

Expand Down Expand Up @@ -114,7 +132,20 @@ def __iter__(self):


class SampleReader:
"""
Iterator that processes results from the cuGraph distributed sampler.
"""

def __init__(self, base_reader: DistSampleReader):
"""
Constructs a new SampleReader.

Parameters
----------
base_reader: DistSampleReader
The reader responsible for loading saved samples produced by
the cuGraph distributed sampler.
"""
self.__base_reader = base_reader
self.__num_samples_remaining = 0
self.__index = 0
Expand Down Expand Up @@ -150,7 +181,21 @@ def __iter__(self):


class HomogeneousSampleReader(SampleReader):
"""
Subclass of SampleReader that reads homogeneous output samples
produced by the cuGraph distributed sampler.
"""

def __init__(self, base_reader: DistSampleReader):
"""
Constructs a new HomogeneousSampleReader

Parameters
----------
base_reader: DistSampleReader
The reader responsible for loading saved samples produced by
the cuGraph distributed sampler.
"""
super().__init__(base_reader)

def __decode_csc(self, raw_sample_data: Dict[str, "torch.Tensor"], index: int):
Expand Down
Loading