From dfabfb78952257ff9141d92d90a1c50b8f9dc421 Mon Sep 17 00:00:00 2001
From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com>
Date: Fri, 12 Apr 2024 13:00:37 -0400
Subject: [PATCH 1/3] Test GNN Examples and Add CUDA 12 Testing (#4317)
Tests GNN examples, which were previously excluded from CI. Also updates the GNN packages to support testing with CUDA 12 in pip (not conda). Also fixes a bug in `graph_sage_sg.py` and updates `graph_sage_mg.py` to quit gracefully if dask hasn't been started.
Authors:
- Alex Barghi (https://github.com/alexbarghi-nv)
Approvers:
- Jake Awe (https://github.com/AyodeAwe)
- Rick Ratzel (https://github.com/rlratzel)
URL: https://github.com/rapidsai/cugraph/pull/4317
---
ci/build_docs.sh | 23 ++++--
ci/run_cugraph_pyg_pytests.sh | 6 ++
ci/test_python.sh | 32 +++++---
ci/test_wheel_cugraph-pyg.sh | 46 ++++++-----
.../all_cuda-118_arch-x86_64.yaml | 1 -
.../all_cuda-122_arch-x86_64.yaml | 1 -
dependencies.yaml | 1 -
.../conda/cugraph_dgl_dev_cuda-118.yaml | 1 -
.../conda/cugraph_pyg_dev_cuda-118.yaml | 1 -
.../cugraph_pyg/examples/graph_sage_mg.py | 31 ++++---
.../cugraph_pyg/examples/graph_sage_sg.py | 82 ++++++++++---------
11 files changed, 130 insertions(+), 95 deletions(-)
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 9b72c98f1bb..089558bd452 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -20,24 +20,33 @@ rapids-logger "Downloading artifacts from previous jobs"
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
+if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
+ CONDA_CUDA_VERSION="11.8"
+ DGL_CHANNEL="dglteam/label/cu118"
+else
+ CONDA_CUDA_VERSION="12.1"
+ DGL_CHANNEL="dglteam/label/cu121"
+fi
+
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
+ --channel conda-forge \
+ --channel pyg \
+ --channel nvidia \
+ --channel "${DGL_CHANNEL}" \
libcugraph \
pylibcugraph \
cugraph \
cugraph-pyg \
+ cugraph-dgl \
cugraph-service-server \
cugraph-service-client \
libcugraph_etl \
pylibcugraphops \
- pylibwholegraph
-
-# This command installs `cugraph-dgl` without its dependencies
-# since this package can currently only run in `11.6` CTK environments
-# due to the dependency version specifications in its conda recipe.
-rapids-logger "Install cugraph-dgl"
-rapids-mamba-retry install "${PYTHON_CHANNEL}/linux-64/cugraph-dgl-*.tar.bz2"
+ pylibwholegraph \
+ pytorch \
+ "cuda-version=${CONDA_CUDA_VERSION}"
export RAPIDS_VERSION="$(rapids-version)"
export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
diff --git a/ci/run_cugraph_pyg_pytests.sh b/ci/run_cugraph_pyg_pytests.sh
index 47ed6ba0008..0acc8aa462a 100755
--- a/ci/run_cugraph_pyg_pytests.sh
+++ b/ci/run_cugraph_pyg_pytests.sh
@@ -7,3 +7,9 @@ set -euo pipefail
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cugraph-pyg/cugraph_pyg
pytest --cache-clear --ignore=tests/mg "$@" .
+
+# Test examples
+for e in "$(pwd)"/examples/*.py; do
+ rapids-logger "running example $e"
+ python $e
+done
diff --git a/ci/test_python.sh b/ci/test_python.sh
index a9198306c2f..dfcf6816dec 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -154,8 +154,7 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
- --channel pytorch \
- --channel pytorch-nightly \
+ --channel conda-forge \
--channel dglteam/label/cu118 \
--channel nvidia \
libcugraph \
@@ -165,7 +164,7 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
cugraph-dgl \
'dgl>=1.1.0.cu*,<=2.0.0.cu*' \
'pytorch>=2.0' \
- 'pytorch-cuda>=11.8'
+ 'cuda-version=11.8'
rapids-print-env
@@ -191,33 +190,43 @@ fi
if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then
- rapids-mamba-retry env create --yes -f env.yaml -n test_cugraph_pyg
+ rapids-mamba-retry env create --force -f env.yaml -n test_cugraph_pyg
# Temporarily allow unbound variables for conda activation.
set +u
conda activate test_cugraph_pyg
set -u
+ # TODO re-enable logic once CUDA 12 is testable
+ #if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
+ CONDA_CUDA_VERSION="11.8"
+ PYG_URL="https://data.pyg.org/whl/torch-2.1.0+cu118.html"
+ #else
+ # CONDA_CUDA_VERSION="12.1"
+ # PYG_URL="https://data.pyg.org/whl/torch-2.1.0+cu121.html"
+ #fi
+
# Will automatically install built dependencies of cuGraph-PyG
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
--channel pytorch \
- --channel nvidia \
--channel pyg \
- --channel rapidsai-nightly \
+ --channel nvidia \
"cugraph-pyg" \
- "pytorch>=2.0,<2.1" \
- "pytorch-cuda=11.8"
+ "pytorch=2.1.0" \
+ "pytorch-cuda=${CONDA_CUDA_VERSION}"
# Install pyg dependencies (which requires pip)
+
+ pip install ogb
pip install \
pyg_lib \
torch_scatter \
torch_sparse \
torch_cluster \
torch_spline_conv \
- -f https://data.pyg.org/whl/torch-2.0.0+cu118.html
+ -f ${PYG_URL}
rapids-print-env
@@ -235,12 +244,11 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
conda deactivate
conda activate test
set -u
-
else
rapids-logger "skipping cugraph_pyg pytest on ARM64"
fi
else
- rapids-logger "skipping cugraph_pyg pytest on CUDA != 11.8"
+ rapids-logger "skipping cugraph_pyg pytest on CUDA!=11.8"
fi
# test cugraph-equivariant
@@ -253,7 +261,7 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
- --channel pytorch \
+ --channel conda-forge \
--channel nvidia \
cugraph-equivariant
pip install e3nn==0.5.1
diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
index 50cbfb3e1fe..ad615f0b3ff 100755
--- a/ci/test_wheel_cugraph-pyg.sh
+++ b/ci/test_wheel_cugraph-pyg.sh
@@ -25,25 +25,33 @@ python -m pip install $(ls ./dist/${python_package_name}*.whl)[test]
export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
if [[ "${CUDA_VERSION}" == "11.8.0" ]]; then
- rapids-logger "Installing PyTorch and PyG dependencies"
PYTORCH_URL="https://download.pytorch.org/whl/cu118"
- rapids-retry python -m pip install torch==2.1.0 --index-url ${PYTORCH_URL}
- rapids-retry python -m pip install torch-geometric==2.4.0
- rapids-retry python -m pip install \
- pyg_lib \
- torch_scatter \
- torch_sparse \
- torch_cluster \
- torch_spline_conv \
- -f https://data.pyg.org/whl/torch-2.1.0+cu118.html
-
- rapids-logger "pytest cugraph-pyg (single GPU)"
- pushd python/cugraph-pyg/cugraph_pyg
- python -m pytest \
- --cache-clear \
- --ignore=tests/mg \
- tests
- popd
+ PYG_URL="https://data.pyg.org/whl/torch-2.1.0+cu118.html"
else
- rapids-logger "skipping cugraph-pyg wheel test on CUDA!=11.8"
+ PYTORCH_URL="https://download.pytorch.org/whl/cu121"
+ PYG_URL="https://data.pyg.org/whl/torch-2.1.0+cu121.html"
fi
+rapids-logger "Installing PyTorch and PyG dependencies"
+rapids-retry python -m pip install torch==2.1.0 --index-url ${PYTORCH_URL}
+rapids-retry python -m pip install torch-geometric==2.4.0
+rapids-retry python -m pip install \
+ ogb \
+ pyg_lib \
+ torch_scatter \
+ torch_sparse \
+ torch_cluster \
+ torch_spline_conv \
+ -f ${PYG_URL}
+
+rapids-logger "pytest cugraph-pyg (single GPU)"
+pushd python/cugraph-pyg/cugraph_pyg
+python -m pytest \
+ --cache-clear \
+ --ignore=tests/mg \
+ tests
+# Test examples
+for e in "$(pwd)"/examples/*.py; do
+ rapids-logger "running example $e"
+ python $e
+done
+popd
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index f656df9e32b..f3aeedad641 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -4,7 +4,6 @@ channels:
- rapidsai
- rapidsai-nightly
- dask/label/dev
-- pytorch
- pyg
- dglteam/label/cu118
- conda-forge
diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
index 2bd0746e49c..4d64ce33a4e 100644
--- a/conda/environments/all_cuda-122_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -4,7 +4,6 @@ channels:
- rapidsai
- rapidsai-nightly
- dask/label/dev
-- pytorch
- pyg
- dglteam/label/cu118
- conda-forge
diff --git a/dependencies.yaml b/dependencies.yaml
index 85fb1344ce9..3fa19eb238a 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -285,7 +285,6 @@ channels:
- rapidsai
- rapidsai-nightly
- dask/label/dev
- - pytorch
- pyg
- dglteam/label/cu118
- conda-forge
diff --git a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml
index feafe57246c..bc8bf776a1e 100644
--- a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml
+++ b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml
@@ -4,7 +4,6 @@ channels:
- rapidsai
- rapidsai-nightly
- dask/label/dev
-- pytorch
- pyg
- dglteam/label/cu118
- conda-forge
diff --git a/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml
index 81c53044a1a..94e9f1decbd 100644
--- a/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml
+++ b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml
@@ -4,7 +4,6 @@ channels:
- rapidsai
- rapidsai-nightly
- dask/label/dev
-- pytorch
- pyg
- dglteam/label/cu118
- conda-forge
diff --git a/python/cugraph-pyg/cugraph_pyg/examples/graph_sage_mg.py b/python/cugraph-pyg/cugraph_pyg/examples/graph_sage_mg.py
index 4ca573504a1..80d683e6c79 100644
--- a/python/cugraph-pyg/cugraph_pyg/examples/graph_sage_mg.py
+++ b/python/cugraph-pyg/cugraph_pyg/examples/graph_sage_mg.py
@@ -17,6 +17,7 @@
import time
import argparse
import gc
+import warnings
import torch
import numpy as np
@@ -405,7 +406,8 @@ def parse_args():
"--dask_scheduler_file",
type=str,
help="The path to the dask scheduler file",
- required=True,
+ required=False,
+ default=None,
)
return parser.parse_args()
@@ -413,19 +415,24 @@ def parse_args():
def main():
args = parse_args()
+ if args.dask_scheduler_file is None:
+ warnings.warn(
+ "You must provide the dask scheduler file " "to run this example. Exiting."
+ )
- torch_devices = [int(d) for d in args.torch_devices.split(",")]
-
- train_args = (
- torch_devices,
- args.torch_manager_ip,
- args.torch_manager_port,
- args.dask_scheduler_file,
- args.num_epochs,
- args.features_on_gpu,
- )
+ else:
+ torch_devices = [int(d) for d in args.torch_devices.split(",")]
+
+ train_args = (
+ torch_devices,
+ args.torch_manager_ip,
+ args.torch_manager_port,
+ args.dask_scheduler_file,
+ args.num_epochs,
+ args.features_on_gpu,
+ )
- tmp.spawn(train, args=train_args, nprocs=len(torch_devices))
+ tmp.spawn(train, args=train_args, nprocs=len(torch_devices))
if __name__ == "__main__":
diff --git a/python/cugraph-pyg/cugraph_pyg/examples/graph_sage_sg.py b/python/cugraph-pyg/cugraph_pyg/examples/graph_sage_sg.py
index 9c96a707e4d..58a403084df 100644
--- a/python/cugraph-pyg/cugraph_pyg/examples/graph_sage_sg.py
+++ b/python/cugraph-pyg/cugraph_pyg/examples/graph_sage_sg.py
@@ -97,10 +97,13 @@ def train(device: int, features_device: Union[str, int] = "cpu", num_epochs=2) -
num_papers = data[0]["num_nodes_dict"]["paper"]
train_perc = 0.1
+
train_nodes = torch.randperm(num_papers)
train_nodes = train_nodes[: int(train_perc * num_papers)]
+
train_mask = torch.full((num_papers,), -1, device=device)
train_mask[train_nodes] = 1
+
fs.add_data(train_mask, "paper", "train")
cugraph_store = CuGraphStore(fs, G, N)
@@ -128,47 +131,46 @@ def train(device: int, features_device: Union[str, int] = "cpu", num_epochs=2) -
# barrier() cannot do this since the number of ops per rank is
# different. It essentially acts like barrier would if the
# number of ops per rank was the same.
- for epoch in range(num_epochs):
- for iter_i, hetero_data in enumerate(cugraph_bulk_loader):
- num_batches += 1
- if iter_i % 20 == 0:
- print(f"iteration {iter_i}")
-
- # train
- train_mask = hetero_data.train_dict["paper"]
- y_true = hetero_data.y_dict["paper"]
-
- y_pred = model(
- hetero_data.x_dict["paper"].to(device).to(torch.float32),
- hetero_data.edge_index_dict[("paper", "cites", "paper")].to(device),
- (len(y_true), len(y_true)),
- )
-
- y_true = F.one_hot(
- y_true[train_mask].to(torch.int64), num_classes=349
- ).to(torch.float32)
-
- y_pred = y_pred[train_mask]
-
- loss = F.cross_entropy(y_pred, y_true)
-
- optimizer.zero_grad()
- loss.backward()
- optimizer.step()
- total_loss += loss.item()
-
- del y_true
- del y_pred
- del loss
- del hetero_data
- gc.collect()
-
- end_time_train = time.perf_counter_ns()
- print(
- f"epoch {epoch} time: "
- f"{(end_time_train - start_time_train) / 1e9:3.4f} s"
+ for iter_i, hetero_data in enumerate(cugraph_bulk_loader):
+ num_batches += 1
+ if iter_i % 20 == 0:
+ print(f"iteration {iter_i}")
+
+ # train
+ train_mask = hetero_data.train_dict["paper"]
+ y_true = hetero_data.y_dict["paper"]
+
+ y_pred = model(
+ hetero_data.x_dict["paper"].to(device).to(torch.float32),
+ hetero_data.edge_index_dict[("paper", "cites", "paper")].to(device),
+ (len(y_true), len(y_true)),
)
- print(f"loss after epoch {epoch}: {total_loss / num_batches}")
+
+ y_true = F.one_hot(y_true[train_mask].to(torch.int64), num_classes=349).to(
+ torch.float32
+ )
+
+ y_pred = y_pred[train_mask]
+
+ loss = F.cross_entropy(y_pred, y_true)
+
+ optimizer.zero_grad()
+ loss.backward()
+ optimizer.step()
+ total_loss += loss.item()
+
+ del y_true
+ del y_pred
+ del loss
+ del hetero_data
+ gc.collect()
+
+ end_time_train = time.perf_counter_ns()
+ print(
+ f"epoch {epoch} time: "
+ f"{(end_time_train - start_time_train) / 1e9:3.4f} s"
+ )
+ print(f"loss after epoch {epoch}: {total_loss / num_batches}")
def parse_args():
From 3e9e7ac8892f70791578493bd16673240c8e8088 Mon Sep 17 00:00:00 2001
From: Don Acosta <97529984+acostadon@users.noreply.github.com>
Date: Fri, 12 Apr 2024 17:25:04 -0400
Subject: [PATCH 2/3] Notebook fixes to formulas and outputs (#4272)
Authors:
- Don Acosta (https://github.com/acostadon)
Approvers:
- Brad Rees (https://github.com/BradReesWork)
URL: https://github.com/rapidsai/cugraph/pull/4272
---
.../source/installation/source_build.md | 17 ++++++++++---
.../algorithms/centrality/Betweenness.ipynb | 2 +-
.../algorithms/centrality/Eigenvector.ipynb | 9 +++----
notebooks/algorithms/centrality/Katz.ipynb | 18 ++++++-------
.../algorithms/link_analysis/Pagerank.ipynb | 25 ++++++++++++++++---
5 files changed, 50 insertions(+), 21 deletions(-)
diff --git a/docs/cugraph/source/installation/source_build.md b/docs/cugraph/source/installation/source_build.md
index 1a129d45295..49af1375781 100644
--- a/docs/cugraph/source/installation/source_build.md
+++ b/docs/cugraph/source/installation/source_build.md
@@ -13,8 +13,8 @@ __Compilers:__
__CUDA:__
* CUDA 11.2+
-* NVIDIA driver 450.80.02+
-* Pascal architecture or better
+* NVIDIA driver 470.42.01 or newer
+* NVIDIA GPU, Volta architecture or later, with [compute capability](https://developer.nvidia.com/cuda-gpus) 7.0+
Further details and download links for these prerequisites are available on the
[RAPIDS System Requirements page](https://docs.rapids.ai/install#system-req).
@@ -42,6 +42,10 @@ files](https://github.com/rapidsai/cugraph/blob/main/conda/environments).
# for CUDA 11.x
conda env create --name cugraph_dev --file $CUGRAPH_HOME/conda/environments/all_cuda-118_arch-x86_64.yaml
+# for CUDA 12.x
+conda env create --name cugraph_dev --file $CUGRAPH_HOME/conda/environments/all_cuda-12.2_arch-x86_64.yaml
+
+
# activate the environment
conda activate cugraph_dev
@@ -55,6 +59,13 @@ The environment can be updated as cugraph adds/removes/updates its dependencies.
# for CUDA 11.x
conda env update --name cugraph_dev --file $CUGRAPH_HOME/conda/environments/all_cuda-118_arch-x86_64.yaml
conda activate cugraph_dev
+
+# for CUDA 12.x
+conda env update --name cugraph_dev --file $CUGRAPH_HOME/conda/environments/all_cuda-122_arch-x86_64.yaml
+conda activate cugraph_dev
+
+
+
```
### Build and Install
@@ -167,7 +178,7 @@ Run either the C++ or the Python tests with datasets
make test
```
-Note: This conda installation only applies to Linux and Python versions 3.8/3.10.
+Note: This conda installation only applies to Linux and Python versions 3.8/3.11.
### (OPTIONAL) Set environment variable on activation
diff --git a/notebooks/algorithms/centrality/Betweenness.ipynb b/notebooks/algorithms/centrality/Betweenness.ipynb
index 29ad37ec254..79da4434b3f 100644
--- a/notebooks/algorithms/centrality/Betweenness.ipynb
+++ b/notebooks/algorithms/centrality/Betweenness.ipynb
@@ -37,7 +37,7 @@
"source": [
"Betweenness centrality of a node 𝑣 is the sum of the fraction of all-pairs shortest paths that pass through 𝑣\n",
"\n",
- "\n"
+ "$c_B(v) =\\sum_{s,t \\in V} \\frac{\\sigma(s, t|v)}{\\sigma(s, t)}$"
]
},
{
diff --git a/notebooks/algorithms/centrality/Eigenvector.ipynb b/notebooks/algorithms/centrality/Eigenvector.ipynb
index 2d06aa39708..957b517fc31 100644
--- a/notebooks/algorithms/centrality/Eigenvector.ipynb
+++ b/notebooks/algorithms/centrality/Eigenvector.ipynb
@@ -34,13 +34,12 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "The eigenvector centrality for node i is the\n",
- "i-th element of the vector x defined by the eigenvector equation.\n",
+ "For a graph with V vertices where $A = (A_v,t)$ is the adjacency matrix, the Eigenvector centrality score of vertex $v$ is defined as\n",
"\n",
- "\n",
"\n",
+ "$x_v = \\frac{1}{\\lambda}{\\sum_{t \\in M(v)}}x_t = \\frac{1}{\\lambda}{\\sum_{t \\in V}{a_v,x_t}}$\n",
"\n",
- "Where M(v) is the adjacency list for the set of vertices(v) and λ is a constant.\n",
+ "Where $M(v)$ is the neighbor set for $v$ and λ is a constant.\n",
"\n",
"[Learn more about EigenVector Centrality](https://www.sci.unich.it/~francesc/teaching/network/eigenvector.html)\n"
]
@@ -314,7 +313,7 @@
"metadata": {},
"source": [
"___\n",
- "Copyright (c) 2022-2023, NVIDIA CORPORATION.\n",
+ "Copyright (c) 2022-2024, NVIDIA CORPORATION.\n",
"\n",
"Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n",
"\n",
diff --git a/notebooks/algorithms/centrality/Katz.ipynb b/notebooks/algorithms/centrality/Katz.ipynb
index 08ee42df788..650ef447492 100755
--- a/notebooks/algorithms/centrality/Katz.ipynb
+++ b/notebooks/algorithms/centrality/Katz.ipynb
@@ -35,14 +35,14 @@
"\n",
" G: cugraph.Graph object\n",
" alpha: float, Attenuation factor. default is 0.1\n",
- " max_iter: int, The maximum number of iterations before an answer is returned. \n",
- " This can be used to limit the execution time and do an early exit \n",
- " before the solver reaches the convergence tolerance. If this value \n",
+ " max_iter: int, The maximum number of iterations before an answer is returned.\n",
+ " This can be used to limit the execution time and do an early exit\n",
+ " before the solver reaches the convergence tolerance. If this value\n",
" is lower or equal to 0 cuGraph will use the default value, which is 100\n",
- " tol: float, Set the tolerance the approximation, this parameter should be a small \n",
- " magnitude value. The lower the tolerance the better the approximation. If \n",
- " this value is 0.0f, cuGraph will use the default value which is 0.00001. \n",
- " Setting too small a tolerance can lead to non-convergence due to numerical \n",
+ " tol: float, Set the tolerance the approximation, this parameter should be a small\n",
+ " magnitude value. The lower the tolerance the better the approximation. If\n",
+ " this value is 0.0f, cuGraph will use the default value which is 0.00001.\n",
+ " Setting too small a tolerance can lead to non-convergence due to numerical\n",
" roundoff. Usually values between 0.01 and 0.00001 are acceptable.\n",
" nstart: cuDataFrame, GPU Dataframe containing the initial guess for katz centrality. \n",
" Default is None\n",
@@ -58,8 +58,8 @@
"\n",
"\n",
"The value of _alpha_ should be
\n",
- "\n",
- "currently the user is responsible for setting alpha appropiatly. \n",
+ "$\\alpha < \\frac{1}{\\lambda _{max} }$\n",
+ "currently the user is responsible for setting alpha appropriately.\n",
"\n",
"### _NOTICE_\n",
"There is a difference between how cuGraph and how NetworkX computes the Katz centrality score. That difference leads to the scores not matching. cuGraph does not currently support the 'beta' and 'weight' parameters as seen in the corresponding networkX call. The cuGraph implementation is based on a relaxed version of Katz defined by Foster with a reduced computational complexity of O(n+m)\n",
diff --git a/notebooks/algorithms/link_analysis/Pagerank.ipynb b/notebooks/algorithms/link_analysis/Pagerank.ipynb
index 0e7d5b134a7..eeb3e8303f5 100755
--- a/notebooks/algorithms/link_analysis/Pagerank.ipynb
+++ b/notebooks/algorithms/link_analysis/Pagerank.ipynb
@@ -44,6 +44,22 @@
" * For more advanced renumbering support, see the examples in `structure/renumber.ipynb` and `structure/renumber-2.ipynb`\n"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The formula for Pagerank is:\n",
+ "\n",
+ "$PR(A) = \\frac{1 - d}{N} + d \\times \\left( \\sum_{i=1}^{N} \\frac{PR(T_i)}{C(T_i)} \\right)$\n",
+ "\n",
+ "In the formula:\n",
+ "* $PR(A)$ is the pagerank of A\n",
+ "* $d$ is damping factor which represents the probability of following an outgoing edge. A common default is 0.85\n",
+ "* $N$ is the total number of webpages\n",
+ "* $T_i$ are pages containing links to page A\n",
+ "* $C(T_i)$ is the number of links from node $T_i$"
+ ]
+ },
{
"attachments": {},
"cell_type": "markdown",
@@ -306,7 +322,7 @@
"source": [
"----\n",
"\n",
- "a PageRank score of _0.10047_ is very low, which can be an indication that there is no more central vertex than any other. Rather than just looking at the top score, let's look at the top three vertices and see if there are any insights that can be inferred. \n",
+ "a PageRank score of _0.100917324_ is very low, which can be an indication that there is no more central vertex than any other. Rather than just looking at the top score, let's look at the top three vertices and see if there are any insights that can be inferred. \n",
"\n",
"Since this is a very small graph, let's just sort and get the first three records"
]
@@ -353,6 +369,9 @@
"metadata": {},
"outputs": [],
"source": [
+ "# this started the vertices with 1 instead of 0 to coincide with the diagram\n",
+ "d['vertex'] = d['vertex']+1\n",
+ "\n",
"d.sort_values('out_degree', ascending=False).head(3)"
]
},
@@ -428,7 +447,7 @@
"metadata": {},
"source": [
"___\n",
- "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n",
+ "Copyright (c) 2019-2024, NVIDIA CORPORATION.\n",
"\n",
"Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n",
"\n",
@@ -453,7 +472,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.13"
+ "version": "3.11.8"
},
"vscode": {
"interpreter": {
From 2578421adf0f5e09b45c39bd4aa751218fdaee71 Mon Sep 17 00:00:00 2001
From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com>
Date: Sun, 14 Apr 2024 16:23:24 -0400
Subject: [PATCH 3/3] [FIX] Remove Unnecessary CUDA Check Causing Deprecation
Warning (#4239)
There is an unnecessary cuda check made in the cuGraph-PyG `CuGraphStore` object causing a deprecation warning. This PR removes the check.
Authors:
- Alex Barghi (https://github.com/alexbarghi-nv)
Approvers:
- Rick Ratzel (https://github.com/rlratzel)
URL: https://github.com/rapidsai/cugraph/pull/4239
---
python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py b/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py
index df16fc9fd6c..354eea8ee6b 100644
--- a/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py
+++ b/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py
@@ -336,9 +336,7 @@ def __del__(self):
def __make_offsets(self, input_dict):
offsets = {}
offsets["stop"] = [input_dict[v] for v in sorted(input_dict.keys())]
- offsets["stop"] = torch.tensor(offsets["stop"])
- if torch.has_cuda:
- offsets["stop"] = offsets["stop"].cuda()
+ offsets["stop"] = torch.tensor(offsets["stop"]).cuda()
cumsum = offsets["stop"].cumsum(0)
offsets["start"] = cumsum - offsets["stop"]