Merge pull request #5 from rapidsai/branch-0.13

pull branch0.13
rapidsai · Feb 21, 2020 · 2825742 · 2825742
2 parents b6e5289 + 7334dc8
commit 2825742
Show file tree

Hide file tree

Showing 118 changed files with 2,563,090 additions and 799 deletions.
diff --git a/.github/ISSUE_TEMPLATE/enhancement-request.md b/.github/ISSUE_TEMPLATE/enhancement-request.md
@@ -0,0 +1,17 @@
+---
+name: Enhancement request
+about: 'Suggest an improveement to a feature '
+title: "[ENH]"
+labels: "? - Needs Triage, Enhancement"
+assignees: ''
+
+---
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md
@@ -0,0 +1,10 @@
+---
+name: Question
+about: Ask a Question
+title: "[QST]"
+labels: "? - Needs Triage, question"
+assignees: ''
+
+---
+
+Ask a question that could be converted into a feature or enhancement
diff --git a/.github/workflows/new-issues-to-triage-projects.yml b/.github/workflows/new-issues-to-triage-projects.yml
@@ -0,0 +1,35 @@
+name: Auto Assign New Issues to Triage Project
+
+on:
+  issues:
+    types: [opened]
+
+env:
+  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+jobs:
+  assign_one_project:
+    runs-on: ubuntu-latest
+    name: Assign to New Issues to Triage Project
+    steps:
+    - name: Process bug issues
+      uses: docker://takanabe/github-actions-automate-projects:v0.0.1
+      if: contains(github.event.issue.labels.*.name, 'bug') && contains(github.event.issue.labels.*.name, '? - Needs Triage')
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        GITHUB_PROJECT_URL: https://github.com/rapidsai/cugraph/projects/4
+        GITHUB_PROJECT_COLUMN_NAME: 'Needs prioritizing'
+    - name: Process feature issues
+      uses: docker://takanabe/github-actions-automate-projects:v0.0.1
+      if: contains(github.event.issue.labels.*.name, 'feature request') && contains(github.event.issue.labels.*.name, '? - Needs Triage')
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        GITHUB_PROJECT_URL: https://github.com/rapidsai/cugraph/projects/1
+        GITHUB_PROJECT_COLUMN_NAME: 'Needs prioritizing'
+    - name: Process other issues
+      uses: docker://takanabe/github-actions-automate-projects:v0.0.1
+      if: contains(github.event.issue.labels.*.name, '? - Needs Triage') && (!contains(github.event.issue.labels.*.name, 'bug') && !contains(github.event.issue.labels.*.name, 'feature request'))
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        GITHUB_PROJECT_URL: https://github.com/rapidsai/cugraph/projects/5
+        GITHUB_PROJECT_COLUMN_NAME: 'Needs prioritizing'
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,21 +1,54 @@
-# cuGraph 0.12.0 (Date TBD)
+
+# cuGraph 0.13.0 (Date TBD)
+
+## New Features
+
+## Improvements
+- PR #688 Cleanup datasets after testing on gpuCI
+- PR #694 Replace the expensive cudaGetDeviceProperties call in triangle counting with cheaper cudaDeviceGetAttribute calls
+- PR #701 Add option to filter datasets and tests when run from CI
+- PR #715 Added new YML file for CUDA 10.2
+- PR #719 Updated docs to remove CUDA 9.2 and add CUDA 10.2
+- PR #720 Updated error messages
+- PR #723 Added notebook testing to gpuCI gpu build
+
+## Bug Fixes
+- PR #697 Updated versions in conda environments.
+- PR #692 Add check after opening golden result files in C++ Katz Centrality tests.
+- PR #702 Add libcypher include path to target_include_directories
+- PR #716 Fixed bug due to disappearing get_column_data_ptr function in cudf
+- PR #726 Fixed SSSP notebook issues in last cell
+
+# cuGraph 0.12.0 (04 Feb 2020)
 
 ## New Features
 - PR #628 Add (Di)Graph constructor from Multi(Di)Graph
 - PR #630 Added ECG clustering
 - PR #636 Added Multi-column renumbering support
 
 ## Improvements
-- PR #640 remove gdf_column in sssp 
-- PR #629 get rid of gdf_column in pagerank 
+- PR #640 remove gdf_column in sssp
+- PR #629 get rid of gdf_column in pagerank
 - PR #641 Add codeowners
 - PR #646 Skipping all tests in test_bfs_bsp.py since SG BFS is not formally supported
 - PR #652 Remove gdf_column in BFS
+- PR #660 enable auto renumbering
+- PR #664 Added support for Louvain early termination.
+- PR #667 Drop `cython` from run requirements in conda recipe
+- PR #666 Incorporate multicolumn renumbering in python graph class for Multi(Di)Graph
+- PR #685 Avoid deep copy in index reset
+
 ## Bug Fixes
 - PR #634 renumber vertex ids passed in analytics
 - PR #649 Change variable names in wjaccard and woverlap to avoid exception
 - PR #651 fix cudf error in katz wrapper and test nstart
 - PR #663 Replaced use of cudf._lib.gdf_dtype_from_value based on cudf refactoring
+- PR #670 Use cudf pandas version
+- PR #672 fix snmg pagerank based on cudf Buffer changes
+- PR #681 fix column length mismatch cudf issue
+- PR #684 Deprecated cudf calls
+- PR #686 Balanced cut fix
+
 
 # cuGraph 0.11.0 (11 Dec 2019)
 

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -64,12 +64,12 @@ The following instructions are tested on Linux systems.
 Compiler requirement:
 
 * `gcc`     version 5.4+
-* `nvcc`    version 9.2
+* `nvcc`    version 10.0+
 * `cmake`   version 3.12
 
 CUDA requirement:
 
-* CUDA 9.2+
+* CUDA 10.0+
 * NVIDIA driver 396.44+
 * Pascal architecture or better
 
@@ -115,14 +115,15 @@ To install cuGraph from source, ensure the dependencies are met and follow the s
 
 ```bash
 # create the conda environment (assuming in base `cugraph` directory)
-# for CUDA 9.2
-conda env create --name cugraph_dev --file conda/environments/cugraph_dev92.yml
 
 # for CUDA 10
-conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda10.yml
+conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda10.0.yml
 
 # for CUDA 10.1
-conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda101.yml
+conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda10.1.yml
+
+# for CUDA 10.2
+conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda10.2.yml
 
 # activate the environment
 conda activate cugraph_dev
@@ -135,14 +136,15 @@ conda deactivate
 
 
 ```bash
-# for CUDA 9.2
-conda env update --name cugraph_dev --file conda/environments/cugraph_dev.yml
 
 # for CUDA 10
-conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda10.yml
+conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda10.0.yml
 
 # for CUDA 10.1
-conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda101.yml
+conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda10.1.yml
+
+# for CUDA 10.2
+conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda10.2.yml
 
 conda activate cugraph_dev
 ```
@@ -273,8 +275,8 @@ Next the env_vars.sh file needs to be edited
 vi ./etc/conda/activate.d/env_vars.sh
 
 #!/bin/bash
-export PATH=/usr/local/cuda-10.0/bin:$PATH # or cuda-9.2 if using CUDA 9.2
-export LD_LIBRARY_PATH=/usr/local/cuda-10.0/lib64:$LD_LIBRARY_PATH # or cuda-9.2 if using CUDA 9.2
+export PATH=/usr/local/cuda-10.0/bin:$PATH # or cuda-10.2 if using CUDA 10.2
+export LD_LIBRARY_PATH=/usr/local/cuda-10.0/lib64:$LD_LIBRARY_PATH # or cuda-10.2 if using CUDA 10.2
 ```
 
 ```

diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 # <div align="left"><img src="img/rapids_logo.png" width="90px"/>&nbsp;cuGraph - GPU Graph Analytics</div>
 
+[![Build Status](https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cugraph/job/branches/job/cugraph-branch-pipeline/badge/icon)](https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cugraph/job/branches/job/cugraph-branch-pipeline/)
+
 The [RAPIDS](https://rapids.ai) cuGraph library is a collection of graph analytics that process data found in GPU Dataframes - see [cuDF](https://github.com/rapidsai/cudf).  cuGraph aims to provide a NetworkX-like API that will be familiar to data scientists, so they can now build GPU-accelerated workflows more easily.
 
  For more project details, see [rapids.ai](https://rapids.ai/).
@@ -47,6 +49,7 @@ for i in range(len(gdf_page)):
 | Spectral Clustering - Balanced-Cut            | Single-GPU |                              |
 | Spectral Clustering - Modularity Maximization | Single-GPU |                              |
 | Louvain                                       | Single-GPU |                              |
+| Ensemble Clustering for Graphs (ECG)          | Single-GPU |                              |
 | Renumbering                                   | Single-GPU |                              |
 | Basic Graph Statistics                        | Single-GPU |                              |
 | Weakly Connected Components                   | Single-GPU |                              |
@@ -65,7 +68,7 @@ The current version of cuGraph has some limitations:
 
 cuGraph provides the renumber function to mitigate this problem. Input vertex IDs for the renumber function can be either 32-bit or 64-bit integers, can be non-contiguous, and can start from an arbitrary number. The renumber function maps the provided input vertex IDs to 32-bit contiguous integers starting from 0. cuGraph still requires the renumbered vertex IDs to be representable in 32-bit integers. These limitations are being addressed and will be fixed soon.
 
-Release 0.11 includes a new 'Graph' class that could cause errors to existing code.  Please see the [Trainsition Guide](TRANSITIONGUIDE.md)
+Release 0.11 includes a new 'Graph' class that could cause errors to existing code.  Please see the [Transition Guide](TRANSITIONGUIDE.md)
 
 
 
@@ -95,14 +98,15 @@ It is easy to install cuGraph using conda. You can get a minimal conda installat
 Install and update cuGraph using the conda command:
 
 ```bash
-# CUDA 9.2
-conda install -c nvidia -c rapidsai -c numba -c conda-forge -c defaults cugraph cudatoolkit=9.2
 
 # CUDA 10.0
 conda install -c nvidia -c rapidsai -c numba -c conda-forge -c defaults cugraph cudatoolkit=10.0
 
 # CUDA 10.1
 conda install -c nvidia -c rapidsai -c numba -c conda-forge -c defaults cugraph cudatoolkit=10.1
+
+# CUDA 10.2
+conda install -c nvidia -c rapidsai -c numba -c conda-forge -c defaults cugraph cudatoolkit=10.2
 ```
 
 Note: This conda installation only applies to Linux and Python versions 3.6/3.7.

diff --git a/TRANSITIONGUIDE.md b/TRANSITIONGUIDE.md
@@ -1,3 +1,22 @@
+# 0.12
+
+## Python API
+
+### Loading an edge list
+Renumbering is now enabled by default in `from_cudf_edgelist`. 
+The renumbering feature allows us to hide the fact that we need vertices to be integers starting at 0. The auto-renumbering feature converts the data into the proper data type required by the underlying implementation. All algorithms accepting vertex identifiers (like the souce vertex for SSSP) now automatically accept user's notation too. On output, it will  transparently un-renumber results, basically convert the internal IDs back. 
+
+## C++ API
+Pagerank, BFS, and SSSP have dropped the `gdf_column` dependency in favor of basic types and templates  
+
+Example : 
+```
+// 0.11 API 
+cugraph::pagerank(cugraph::Graph, gdf_column *pagerank, ...) 
+// 0.12 API 
+cugraph::pagerank<int,float>(cugraph::Graph, float *pagerank ...) 
+```
+
 # 0.11
 
 ## Python API

diff --git a/ci/cpu/prebuild.sh b/ci/cpu/prebuild.sh
@@ -3,7 +3,7 @@
 export BUILD_CUGRAPH=1
 export BUILD_LIBCUGRAPH=1
 
-if [[ "$CUDA" == "9.2" ]]; then
+if [[ "$CUDA" == "10.0" ]]; then
     export UPLOAD_CUGRAPH=1
 else
     export UPLOAD_CUGRAPH=0

diff --git a/ci/getGTestTimes.sh b/ci/getGTestTimes.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# This script will print the gtest results sorted by runtime. This will print
+# the results two ways: first by printing all tests sorted by runtime, then by
+# printing all tests grouped by test binary with tests sorted by runtime within
+# the group.
+#
+# To use this script, capture the test run output to a file then run this script
+# with the file as the first arg, or just redirect test output to this script.
+
+awk '/^Running GoogleTest .+$/ {
+       testbinary = $3
+     }
+     /^\[       OK \].+$/ {
+        testtime = substr($(NF-1),2)
+        newtestdata = testbinary ":" substr($0,14)
+        alltestdata = alltestdata newtestdata "\n"
+        testdata[testbinary] = testdata[testbinary] newtestdata "\n"
+        totaltime = totaltime + testtime
+     }
+     END {
+        # Print all tests sorted by time
+        system("echo \"" alltestdata "\" | sort -r -t\\( -nk2")
+        print "\n================================================================================"
+        # Print test binaries with tests sorted by time
+        print "Tests grouped by test binary:"
+        for (testbinary in testdata) {
+           print testbinary
+           system("echo \"" testdata[testbinary] "\" | sort -r -t\\( -nk2")
+        }
+        print "\n================================================================================"
+        print totaltime " milliseconds = " totaltime/60000 " minutes"
+     }
+' $1
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -4,19 +4,31 @@
 # cuGraph GPU build & testscript for CI  #
 ##########################################
 set -e
+set -o pipefail
 NUMARGS=$#
 ARGS=$*
 
-# Logger function for build status output
-function logger() {
+function logger {
   echo -e "\n>>>> $@\n"
 }
 
-# Arg parsing function
 function hasArg {
     (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
 }
 
+function cleanup {
+  logger "Removing datasets and temp files..."
+  rm -rf $WORKSPACE/datasets/test
+  rm -rf $WORKSPACE/datasets/benchmark
+  rm -f testoutput.txt
+}
+
+# Set cleanup trap for Jenkins
+if [ ! -z "$JENKINS_HOME" ] ; then
+  logger "Jenkins environment detected, setting cleanup trap..."
+  trap cleanup EXIT
+fi
+
 # Set path, build parallel level, and CUDA version
 export PATH=/conda/bin:/usr/local/cuda/bin:$PATH
 export PARALLEL_LEVEL=4
@@ -54,7 +66,9 @@ conda install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaul
       distributed>=2.1.0 \
       dask-cudf=${MINOR_VERSION} \
       dask-cuda=${MINOR_VERSION} \
-      libcypher-parser
+      libcypher-parser \
+      ipython=7.3* \
+      jupyterlab
 
 # Install the master version of dask and distributed
 logger "pip install git+https://github.com/dask/distributed.git --upgrade --no-deps"
@@ -87,25 +101,21 @@ else
     logger "Check GPU usage..."
     nvidia-smi
 
-    logger "Download datasets..."
-    cd $WORKSPACE/datasets
-    source ./get_test_data.sh
-
-    logger "GoogleTest for libcugraph..."
-    cd $WORKSPACE/cpp/build
-    export GTEST_OUTPUT="xml:${WORKSPACE}/test-results/"
-    for gt in gtests/*; do
-        test_name=`basename $gt`
-        logger "Running GoogleTest $test_name"
-        # FIXME: remove this ASAP
-        if [[ ${gt} == "gtests/SNMG_SPMV_TEST" ]]; then
-            ${gt} --gtest_filter=-hibench_test/Tests_MGSpmv_hibench.CheckFP32_hibench*
-        else
-            ${gt}
-        fi
-    done
-
-    logger "Python py.test for cuGraph..."
-    cd $WORKSPACE/python
-    py.test --cache-clear --junitxml=${WORKSPACE}/junit-cugraph.xml -v
+    # If this is a PR build, skip downloading large datasets and don't run the
+    # slow-running tests that use them.
+    # See: https://docs.rapids.ai/maintainers/gpuci/#environment-variables
+    if [ "$BUILD_MODE" = "pull-request" ]; then
+        TEST_MODE_FLAG="--quick"
+    else
+        TEST_MODE_FLAG=""
+    fi
+
+    ${WORKSPACE}/ci/test.sh ${TEST_MODE_FLAG} | tee testoutput.txt
+
+    echo -e "\nTOP 20 SLOWEST TESTS:\n"
+    # Wrap in echo to prevent non-zero exit since this command is non-essential
+    echo "$(${WORKSPACE}/ci/getGTestTimes.sh testoutput.txt | head -20)"
+
+    ${WORKSPACE}/ci/gpu/test-notebooks.sh 2>&1 | tee nbtest.log
+    python ${WORKSPACE}/ci/utils/nbtestlog2junitxml.py nbtest.log
 fi