diff --git a/build.sh b/build.sh index 32c15b647ab..151fcb4dcb6 100755 --- a/build.sh +++ b/build.sh @@ -53,7 +53,7 @@ BUILD_DIRS="${LIBCUGRAPH_BUILD_DIR} ${LIBCUGRAPH_ETL_BUILD_DIR} ${CUGRAPH_BUILD_ VERBOSE_FLAG="" CMAKE_VERBOSE_OPTION="" BUILD_TYPE=Release -INSTALL_TARGET=install +INSTALL_TARGET="--target install" BUILD_CPP_TESTS=ON BUILD_CPP_MG_TESTS=OFF BUILD_ALL_GPU_ARCH=0 @@ -198,7 +198,7 @@ if buildAll || hasArg libcugraph; then -DBUILD_TESTS=${BUILD_CPP_TESTS} \ -DBUILD_CUGRAPH_MG_TESTS=${BUILD_CPP_MG_TESTS} \ ${CMAKE_VERBOSE_OPTION} - cmake --build "${LIBCUGRAPH_BUILD_DIR}" -j${PARALLEL_LEVEL} --target ${INSTALL_TARGET} ${VERBOSE_FLAG} + cmake --build "${LIBCUGRAPH_BUILD_DIR}" -j${PARALLEL_LEVEL} ${INSTALL_TARGET} ${VERBOSE_FLAG} fi # Configure, build, and install libcugraph_etl @@ -220,7 +220,7 @@ if buildAll || hasArg libcugraph_etl; then -DBUILD_CUGRAPH_MG_TESTS=${BUILD_CPP_MG_TESTS} \ ${CMAKE_VERBOSE_OPTION} \ ${REPODIR}/cpp/libcugraph_etl - cmake --build "${LIBCUGRAPH_ETL_BUILD_DIR}" -j${PARALLEL_LEVEL} --target ${INSTALL_TARGET} ${VERBOSE_FLAG} + cmake --build "${LIBCUGRAPH_ETL_BUILD_DIR}" -j${PARALLEL_LEVEL} ${INSTALL_TARGET} ${VERBOSE_FLAG} fi # Build, and install pylibcugraph diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 1dc87a922d7..4b5ccd96715 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -50,6 +50,7 @@ sed_runner 's/version = .*/version = '"'${NEXT_SHORT_TAG}'"'/g' docs/cugraph/sou sed_runner 's/release = .*/release = '"'${NEXT_FULL_TAG}'"'/g' docs/cugraph/source/conf.py for FILE in conda/environments/*.yml; do + sed_runner "s/libcugraphops=${CURRENT_SHORT_TAG}/libcugraphops=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/rmm=${CURRENT_SHORT_TAG}/rmm=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/dask-cuda=${CURRENT_SHORT_TAG}/dask-cuda=${NEXT_SHORT_TAG}/g" ${FILE}; diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index d7c7ee0778f..cdd40dbe6fd 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -49,6 +49,7 @@ requirements: - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }} - nccl>=2.9.9 - ucx-proc=*=gpu + - libcugraphops {{ minor_version }}.* - libcusolver>=11.2.1 about: diff --git a/conda/recipes/libcugraph_etl/meta.yaml b/conda/recipes/libcugraph_etl/meta.yaml index 795b8d56b4f..18426907c8c 100644 --- a/conda/recipes/libcugraph_etl/meta.yaml +++ b/conda/recipes/libcugraph_etl/meta.yaml @@ -37,7 +37,6 @@ requirements: - cmake>=3.20.1 - doxygen>=1.8.11 - cudatoolkit {{ cuda_version }}.* - - libcugraphops {{ minor_version }}.* # needed for cmake to find transitive deps - libcudf {{ minor_version }}.* - libcugraph {{ minor_version }}.* run: diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 477906c14f7..ed7005cc769 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -181,6 +181,7 @@ add_library(cugraph SHARED src/community/legacy/triangles_counting.cu src/community/legacy/extract_subgraph_by_vertex.cu src/community/legacy/egonet.cu + src/sampling/neighborhood.cu src/sampling/random_walks.cu src/sampling/detail/gather_utils_impl.cu src/cores/legacy/core_number.cu @@ -285,6 +286,7 @@ target_include_directories(cugraph # - link libraries ------------------------------------------------------------- target_link_libraries(cugraph PUBLIC + cugraphops::cugraphops raft::raft PRIVATE cugraph::cuHornet diff --git a/cpp/cmake/thirdparty/get_libcugraphops.cmake b/cpp/cmake/thirdparty/get_libcugraphops.cmake index 2b257d87394..f2abfb7f4fd 100644 --- a/cpp/cmake/thirdparty/get_libcugraphops.cmake +++ b/cpp/cmake/thirdparty/get_libcugraphops.cmake @@ -24,9 +24,15 @@ function(find_and_configure_cugraphops) HEADER_NAMES graph/sampling.h LIBRARY_NAMES cugraph-ops++ INCLUDE_SUFFIXES cugraph-ops + BUILD_EXPORT_SET cugraph-exports + INSTALL_EXPORT_SET cugraph-exports ) - rapids_find_package(cugraphops REQUIRED) + rapids_find_package(cugraphops + REQUIRED + BUILD_EXPORT_SET cugraph-exports + INSTALL_EXPORT_SET cugraph-exports + ) endfunction() diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 5e5562c4095..6be1d8e81ca 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,8 @@ #include #include +#include + #include namespace cugraph { @@ -1416,6 +1418,68 @@ random_walks(raft::handle_t const& handle, bool use_padding = false, std::unique_ptr sampling_strategy = nullptr); +/** + * @brief generate sub-sampled graph as an adjacency list (CSR format) given input graph, + * list of vertices and sample size per vertex. The output graph consists of the given + * vertices with each vertex having at most `sample_size` neighbors from the original graph + * + * @tparam graph_t Type of input graph/view (typically, graph_view_t, non-transposed and + * single-gpu). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param rng The Rng (stateful) instance holding pseudo-random number generator state. + * @param graph Graph (view )object to sub-sample. + * @param ptr_d_start Device pointer to set of starting vertex indices for the sub-sampling. + * @param num_start_vertices = number(vertices) to use for sub-sampling. + * @param sampling_size = max number of neighbors per output vertex. + * @param sampling_algo = the sampling algorithm (algo R/algo L/etc.) used to produce outputs. + * @return std::tuple, + * rmm::device_uvector> + * Tuple consisting of two arrays representing the offsets and indices of + * the sub-sampled graph. + */ +template +std::tuple, + rmm::device_uvector> +sample_neighbors_adjacency_list(raft::handle_t const& handle, + ops::gnn::graph::Rng& rng, + graph_t const& graph, + typename graph_t::vertex_type const* ptr_d_start, + size_t num_start_vertices, + size_t sampling_size, + ops::gnn::graph::SamplingAlgoT sampling_algo); + +/** + * @brief generate sub-sampled graph as an edge list (COO format) given input graph, + * list of vertices and sample size per vertex. The output graph consists of the given + * vertices with each vertex having at most `sample_size` neighbors from the original graph + * + * @tparam graph_t Type of input graph/view (typically, graph_view_t, non-transposed and + * single-gpu). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param rng The Rng (stateful) instance holding pseudo-random number generator state. + * @param graph Graph (view )object to sub-sample. + * @param ptr_d_start Device pointer to set of starting vertex indices for the sub-sampling. + * @param num_start_vertices = number(vertices) to use for sub-sampling. + * @param sampling_size = max number of neighbors per output vertex. + * @param sampling_algo = the sampling algorithm (algo R/algo L/etc.) used to produce outputs. + * @return std::tuple, + * rmm::device_uvector> + * Tuple consisting of two arrays representing the source and destination nodes of + * the sub-sampled graph. + */ +template +std::tuple, + rmm::device_uvector> +sample_neighbors_edgelist(raft::handle_t const& handle, + ops::gnn::graph::Rng& rng, + graph_t const& graph, + typename graph_t::vertex_type const* ptr_d_start, + size_t num_start_vertices, + size_t sampling_size, + ops::gnn::graph::SamplingAlgoT sampling_algo); + /** * @brief Finds (weakly-connected-)component IDs of each vertices in the input graph. * diff --git a/cpp/src/sampling/neighborhood.cu b/cpp/src/sampling/neighborhood.cu new file mode 100644 index 00000000000..e555846362f --- /dev/null +++ b/cpp/src/sampling/neighborhood.cu @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include + +namespace cugraph { + +template +std::tuple, + rmm::device_uvector> +sample_neighbors_adjacency_list(raft::handle_t const& handle, + ops::gnn::graph::Rng& rng, + graph_t const& graph, + typename graph_t::vertex_type const* ptr_d_start, + size_t num_start_vertices, + size_t sampling_size, + ops::gnn::graph::SamplingAlgoT sampling_algo) +{ + const auto [ops_graph, max_degree] = detail::get_graph_and_max_degree(graph); + return ops::gnn::graph::uniform_sample_csr(rng, + ops_graph, + ptr_d_start, + num_start_vertices, + sampling_size, + sampling_algo, + max_degree, + handle.get_stream()); +} + +template +std::tuple, + rmm::device_uvector> +sample_neighbors_edgelist(raft::handle_t const& handle, + ops::gnn::graph::Rng& rng, + graph_t const& graph, + typename graph_t::vertex_type const* ptr_d_start, + size_t num_start_vertices, + size_t sampling_size, + ops::gnn::graph::SamplingAlgoT sampling_algo) +{ + const auto [ops_graph, max_degree] = detail::get_graph_and_max_degree(graph); + return ops::gnn::graph::uniform_sample_coo(rng, + ops_graph, + ptr_d_start, + num_start_vertices, + sampling_size, + sampling_algo, + max_degree, + handle.get_stream()); +} + +// template explicit instantiation directives (EIDir's): +// +// CSR SG FP32{ +template std::tuple, rmm::device_uvector> +sample_neighbors_adjacency_list>( + raft::handle_t const& handle, + ops::gnn::graph::Rng& rng, + graph_view_t const& gview, + int32_t const* ptr_d_start, + size_t num_start_vertices, + size_t sampling_size, + ops::gnn::graph::SamplingAlgoT sampling_algo); + +template std::tuple, rmm::device_uvector> +sample_neighbors_adjacency_list>( + raft::handle_t const& handle, + ops::gnn::graph::Rng& rng, + graph_view_t const& gview, + int64_t const* ptr_d_start, + size_t num_start_vertices, + size_t sampling_size, + ops::gnn::graph::SamplingAlgoT sampling_algo); +//} +// +// COO SG FP32{ +template std::tuple, rmm::device_uvector> +sample_neighbors_edgelist>( + raft::handle_t const& handle, + ops::gnn::graph::Rng& rng, + graph_view_t const& gview, + int32_t const* ptr_d_start, + size_t num_start_vertices, + size_t sampling_size, + ops::gnn::graph::SamplingAlgoT sampling_algo); + +template std::tuple, rmm::device_uvector> +sample_neighbors_edgelist>( + raft::handle_t const& handle, + ops::gnn::graph::Rng& rng, + graph_view_t const& gview, + int64_t const* ptr_d_start, + size_t num_start_vertices, + size_t sampling_size, + ops::gnn::graph::SamplingAlgoT sampling_algo); +//} + +} // namespace cugraph diff --git a/cpp/src/utilities/cugraph_ops_utils.hpp b/cpp/src/utilities/cugraph_ops_utils.hpp new file mode 100644 index 00000000000..bacf3f95278 --- /dev/null +++ b/cpp/src/utilities/cugraph_ops_utils.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +#include + +namespace cugraph { +namespace detail { + +template +ops::gnn::graph::fg_csr get_graph( + graph_view_t const& gview) +{ + ops::gnn::graph::fg_csr graph; + graph.n_nodes = gview.get_number_of_vertices(); + graph.n_indices = gview.get_number_of_edges(); + // FIXME: this is evil and is just temporary until we have a matching type in cugraph-ops + // or we change the type accepted by the functions calling into cugraph-ops + graph.offsets = const_cast(gview.get_matrix_partition_view().get_offsets()); + graph.indices = const_cast(gview.get_matrix_partition_view().get_indices()); + return graph; +} + +template +std::tuple, NodeTypeT> get_graph_and_max_degree( + graph_view_t const& gview) +{ + // FIXME this is sufficient for now, but if there is a fast (cached) way + // of getting max degree, use that instead + auto max_degree = std::numeric_limits::max(); + return std::make_tuple(get_graph(gview), max_degree); +} + +} // namespace detail +} // namespace cugraph