Skip to content

Commit

Permalink
Merge branch 'master' into update-cugraph-relgraphconv
Browse files Browse the repository at this point in the history
  • Loading branch information
tingyu66 authored Feb 15, 2023
2 parents a03d0b3 + 40c968b commit 474f8d2
Show file tree
Hide file tree
Showing 9 changed files with 196 additions and 21 deletions.
3 changes: 1 addition & 2 deletions python/dgl/nn/pytorch/conv/egatconv.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ class EGATConv(nn.Module):
f_{ij}^{\prime} &= \mathrm{LeakyReLU}\left(A [ h_{i} \| f_{ij} \| h_{j}]\right)
where :math:`f_{ij}^{\prime}` are edge features, :math:`\mathrm{A}` is weight matrix and
:math: `\vec{F}` is weight vector. After that, resulting node features
:math:`\vec{F}` is weight vector. After that, resulting node features
:math:`h_{i}^{\prime}` are updated in the same way as in regular GAT.
Parameters
Expand Down
2 changes: 1 addition & 1 deletion python/dgl/transforms/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -3508,7 +3508,7 @@ def radius_graph(x, r, p=2, self_loop=False,
distances = th.cdist(x, x, p=p, compute_mode=compute_mode)

if not self_loop:
distances.fill_diagonal_(r + 1e-4)
distances.fill_diagonal_(r + 1)

edges = th.nonzero(distances <= r, as_tuple=True)

Expand Down
112 changes: 112 additions & 0 deletions script/create_dev_conda_env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/bin/bash

readonly CUDA_VERSIONS="10.2,11.3,11.6,11.7"
readonly TORCH_VERSION="1.12.0"

usage() {
cat << EOF
usage: bash $0 OPTIONS
examples:
bash $0 -c
bash $0 -g 11.7
Create a developement environment for DGL developers.
OPTIONS:
-h Show this message.
-c Create dev environment in CPU mode.
-g Create dev environment in GPU mode with specified CUDA version,
supported: ${CUDA_VERSIONS}.
EOF
}

validate() {
values=$(echo "$1" | tr "," "\n")
for value in ${values}
do
if [[ "${value}" == $2 ]]; then
return 0
fi
done
return 1
}

confirm() {
echo "Continue? [yes/no]:"
read confirm
if [[ ! ${confirm} == "yes" ]]; then
exit 0
fi
}

# Parse flags.
while getopts "cg:h" flag; do
if [[ ${flag} == "c" ]]; then
cpu=1
elif [[ ${flag} == "g" ]]; then
gpu=${OPTARG}
elif [[ ${flag} == "h" ]]; then
usage
exit 0
else
usage
exit 1
fi
done

if [[ -n ${gpu} && ${cpu} -eq 1 ]]; then
echo "Only one mode can be specified."
exit 1
fi

if [[ -z ${gpu} && -z ${cpu} ]]; then
usage
exit 1
fi

# Set up CPU mode.
if [[ ${cpu} -eq 1 ]]; then
torchversion=${TORCH_VERSION}"+cpu"
name="dgl-dev-cpu"
fi

# Set up GPU mode.
if [[ -n ${gpu} ]]; then
if ! validate ${CUDA_VERSIONS} ${gpu}; then
echo "Error: Invalid CUDA version."
usage
exit 1
fi

echo "Confirm the installed CUDA version matches the specified one."
confirm

torchversion=${TORCH_VERSION}"+cu"${gpu//[-._]/}
name="dgl-dev-gpu"
fi

echo "Confirm you are excuting the script from your DGL root directory."
echo "Current working directory: ${PWD}"
confirm

# Prepare the conda environment yaml file.
rand=$(echo "${RANDOM}" | md5sum | head -c 20)
mkdir -p /tmp/${rand}
cp script/dgl_dev.yml.template /tmp/${rand}/dgl_dev.yml
sed -i "s|__NAME__|${name}|g" /tmp/${rand}/dgl_dev.yml
sed -i "s|__TORCH_VERSION__|${torchversion}|g" /tmp/${rand}/dgl_dev.yml
sed -i "s|__DGL_HOME__|${PWD}|g" /tmp/${rand}/dgl_dev.yml

# Ask for final confirmation.
echo "--------------------------------------------------"
cat /tmp/${rand}/dgl_dev.yml
echo "--------------------------------------------------"
echo "Create a conda enviroment with the config?"
confirm

# Create conda environment.
conda env create -f /tmp/${rand}/dgl_dev.yml

# Clean up created tmp conda environment yaml file.
rm -rf /tmp/${rand}
exit 0
29 changes: 29 additions & 0 deletions script/dgl_dev.yml.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: __NAME__
dependencies:
- python=3.7.0
- pip
- pip:
- --find-links https://download.pytorch.org/whl/torch_stable.html
- cython
- filelock
- matplotlib
- networkx
- nltk
- nose
- numpy
- ogb
- pandas
- psutil
- pyarrow
- pydantic
- pytest
- pyyaml
- rdflib
- requests[security]
- scikit-learn
- scipy
- torch==__TORCH_VERSION__
- torchmetrics
- tqdm
variables:
DGL_HOME: __DGL_HOME__
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
os.path.dirname(os.path.relpath(__file__)),
"..",
"..",
"..",
"examples",
"sparse",
)
Expand Down
2 changes: 2 additions & 0 deletions tests/scripts/task_example_test.bat
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ SET DGL_LIBRARY_PATH=!CD!\build
SET PYTHONPATH=!CD!\python;!PYTHONPATH!
SET DGL_DOWNLOAD_DIR=!CD!

python -m pytest -v --junitxml=pytest_backend.xml --durations=100 tests\examples || GOTO :FAIL

PUSHD !GCN_EXAMPLE_DIR!
python pagerank.py || GOTO :FAIL
python gcn\train.py --dataset cora || GOTO :FAIL
Expand Down
2 changes: 2 additions & 0 deletions tests/scripts/task_example_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ export DGL_DOWNLOAD_DIR=${PWD}

# test

python3 -m pytest -v --junitxml=pytest_backend.xml --durations=100 tests/examples || fail "sparse examples on $1"

pushd $GCN_EXAMPLE_DIR> /dev/null

python3 pagerank.py || fail "run pagerank.py on $1"
Expand Down
63 changes: 47 additions & 16 deletions tests/tools/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os
import json
import logging
import numpy as np
import torch
import os

import dgl
import numpy as np
import torch
from distpartitioning import array_readwriter
from distpartitioning.array_readwriter.parquet import ParquetArrayParser
from files import setdir
Expand All @@ -16,12 +16,16 @@ def _chunk_numpy_array(arr, fmt_meta, chunk_sizes, path_fmt, vector_rows=False):

for j, n in enumerate(chunk_sizes):
path = os.path.abspath(path_fmt % j)
arr_chunk = arr[offset: offset + n]
arr_chunk = arr[offset : offset + n]
shape = arr_chunk.shape
logging.info("Chunking %d-%d" % (offset, offset + n))
# If requested we write multi-column arrays as single-column vector Parquet files
array_parser = array_readwriter.get_array_parser(**fmt_meta)
if isinstance(array_parser, ParquetArrayParser) and len(shape) > 1 and shape[1] > 1:
if (
isinstance(array_parser, ParquetArrayParser)
and len(shape) > 1
and shape[1] > 1
):
array_parser.write(path, arr_chunk, vector_rows=vector_rows)
else:
array_parser.write(path, arr_chunk)
Expand Down Expand Up @@ -83,8 +87,15 @@ def _init(g, num_chunks, key, kwargs=None):


def _chunk_graph(
g, name, ndata_paths, edata_paths, num_chunks, data_fmt, edges_format,
vector_rows=False, **kwargs
g,
name,
ndata_paths,
edata_paths,
num_chunks,
data_fmt,
edges_format,
vector_rows=False,
**kwargs,
):
# First deal with ndata and edata that are homogeneous
# (i.e. not a dict-of-dict)
Expand Down Expand Up @@ -139,16 +150,24 @@ def _chunk_graph(
k: v for k, v in zip(g.canonical_etypes, num_edges_per_chunk)
}

idxes_etypestr = {
idx: (etype, etypestrs[etype])
for idx, etype in enumerate(g.canonical_etypes)
}
idxes = np.arange(len(idxes_etypestr))

# Split edge index
metadata["edges"] = {}
with setdir("edge_index"):
for etype in g.canonical_etypes:
etypestr = etypestrs[etype]
np.random.shuffle(idxes)
for idx in idxes:
etype = idxes_etypestr[idx][0]
etypestr = idxes_etypestr[idx][1]
logging.info("Chunking edge index for %s" % etypestr)
edges_meta = {}
if edges_format == 'csv':
if edges_format == "csv":
fmt_meta = {"name": edges_format, "delimiter": " "}
elif edges_format == 'parquet':
elif edges_format == "parquet":
fmt_meta = {"name": edges_format}
else:
raise RuntimeError(f"Invalid edges_fmt: {edges_format}")
Expand Down Expand Up @@ -259,7 +278,7 @@ def chunk_graph(
num_chunks,
output_path,
data_fmt="numpy",
edges_fmt='csv',
edges_fmt="csv",
vector_rows=False,
**kwargs,
):
Expand Down Expand Up @@ -302,14 +321,26 @@ def chunk_graph(
edata[key] = os.path.abspath(edata[key])
with setdir(output_path):
_chunk_graph(
g, name, ndata_paths, edata_paths, num_chunks, data_fmt, edges_fmt,
vector_rows, **kwargs
g,
name,
ndata_paths,
edata_paths,
num_chunks,
data_fmt,
edges_fmt,
vector_rows,
**kwargs,
)


def create_chunked_dataset(
root_dir, num_chunks, data_fmt="numpy", edges_fmt='csv',
vector_rows=False, **kwargs):
root_dir,
num_chunks,
data_fmt="numpy",
edges_fmt="csv",
vector_rows=False,
**kwargs,
):
"""
This function creates a sample dataset, based on MAG240 dataset.
Expand Down
3 changes: 2 additions & 1 deletion tools/distpartitioning/dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,8 @@ def get_dataset(input_dir, graph_name, rank, world_size, num_parts, schema_map):
]:
edge_datadict[col] = []

for etype_name, etype_info in edge_data.items():
for etype_name, etype_id in etype_name_idmap.items():
etype_info = edge_data[etype_name]
edge_info = etype_info[constants.STR_DATA]

# edgetype strings are in canonical format, src_node_type:edge_type:dst_node_type
Expand Down

0 comments on commit 474f8d2

Please sign in to comment.