From 11b3ecf4c89364709dc7a8ee12ca190450421a49 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 16 May 2024 23:59:46 -0400 Subject: [PATCH 1/5] ConfigureCUDA.cmake now sets CUVS_ prefixed variables (#66) This makes sure that CUVS uses the compile flags that are required for RAPIDS C++ projects. Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Bradley Dice (https://github.com/bdice) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/66 --- cpp/cmake/modules/ConfigureCUDA.cmake | 32 +++++++++++++-------------- cpp/src/neighbors/ivf_pq_index.cpp | 6 ++--- cpp/test/neighbors/ann_ivf_pq.cuh | 4 ++-- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index ea8a077b0..213448291 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -13,45 +13,45 @@ # ============================================================================= if(DISABLE_DEPRECATION_WARNINGS) - list(APPEND RAFT_CXX_FLAGS -Wno-deprecated-declarations) - list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations) + list(APPEND CUVS_CXX_FLAGS -Wno-deprecated-declarations) + list(APPEND CUVS_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations) endif() # Be very strict when compiling with GCC as host compiler (and thus more lenient when compiling with # clang) if(CMAKE_COMPILER_IS_GNUCXX) - list(APPEND RAFT_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations) - list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations) + list(APPEND CUVS_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations) + list(APPEND CUVS_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations) # set warnings as errors if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0) - list(APPEND RAFT_CUDA_FLAGS -Werror=all-warnings) + list(APPEND CUVS_CUDA_FLAGS -Werror=all-warnings) endif() endif() if(CUDA_LOG_COMPILE_TIME) - list(APPEND RAFT_CUDA_FLAGS "--time=nvcc_compile_log.csv") + list(APPEND CUVS_CUDA_FLAGS "--time=nvcc_compile_log.csv") endif() -list(APPEND RAFT_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) -list(APPEND RAFT_CXX_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM") -list(APPEND RAFT_CUDA_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM") +list(APPEND CUVS_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) +list(APPEND CUVS_CXX_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM") +list(APPEND CUVS_CUDA_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM") # make sure we produce smallest binary size -list(APPEND RAFT_CUDA_FLAGS -Xfatbin=-compress-all) +list(APPEND CUVS_CUDA_FLAGS -Xfatbin=-compress-all) # Option to enable line info in CUDA device compilation to allow introspection when profiling / # memchecking if(CUDA_ENABLE_LINEINFO) - list(APPEND RAFT_CUDA_FLAGS -lineinfo) + list(APPEND CUVS_CUDA_FLAGS -lineinfo) endif() if(OpenMP_FOUND) - list(APPEND RAFT_CUDA_FLAGS -Xcompiler=${OpenMP_CXX_FLAGS}) + list(APPEND CUVS_CUDA_FLAGS -Xcompiler=${OpenMP_CXX_FLAGS}) endif() # Debug options if(CMAKE_BUILD_TYPE MATCHES Debug) - message(VERBOSE "RAFT: Building with debugging flags") - list(APPEND RAFT_CUDA_FLAGS -G -Xcompiler=-rdynamic) - list(APPEND RAFT_CUDA_FLAGS -Xptxas --suppress-stack-size-warning) + message(VERBOSE "cuVS: Building with debugging flags") + list(APPEND CUVS_CUDA_FLAGS -G -Xcompiler=-rdynamic) + list(APPEND CUVS_CUDA_FLAGS -Xptxas --suppress-stack-size-warning) endif() diff --git a/cpp/src/neighbors/ivf_pq_index.cpp b/cpp/src/neighbors/ivf_pq_index.cpp index 1e1919867..fcb2c976d 100644 --- a/cpp/src/neighbors/ivf_pq_index.cpp +++ b/cpp/src/neighbors/ivf_pq_index.cpp @@ -47,13 +47,13 @@ index::index(raft::resources const& handle, pq_bits_(pq_bits), pq_dim_(pq_dim == 0 ? calculate_pq_dim(dim) : pq_dim), conservative_memory_allocation_(conservative_memory_allocation), - pq_centers_{raft::make_device_mdarray(handle, make_pq_centers_extents())}, lists_{n_lists}, - rotation_matrix_{ - raft::make_device_matrix(handle, this->rot_dim(), this->dim())}, list_sizes_{raft::make_device_vector(handle, n_lists)}, + pq_centers_{raft::make_device_mdarray(handle, make_pq_centers_extents())}, centers_{raft::make_device_matrix(handle, n_lists, this->dim_ext())}, centers_rot_{raft::make_device_matrix(handle, n_lists, this->rot_dim())}, + rotation_matrix_{ + raft::make_device_matrix(handle, this->rot_dim(), this->dim())}, data_ptrs_{raft::make_device_vector(handle, n_lists)}, inds_ptrs_{raft::make_device_vector(handle, n_lists)}, accum_sorted_sizes_{raft::make_host_vector(n_lists + 1)} diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh index 19442e0b5..0a17aec12 100644 --- a/cpp/test/neighbors/ann_ivf_pq.cuh +++ b/cpp/test/neighbors/ann_ivf_pq.cuh @@ -354,8 +354,8 @@ class ivf_pq_test : public ::testing::TestWithParam { cuvs::Compare{})); // Another test with the API that take list_data directly - auto list_data = index->lists()[label]->data.view(); - uint32_t n_take = 4; + [[maybe_unused]] auto list_data = index->lists()[label]->data.view(); + uint32_t n_take = 4; ASSERT_TRUE(row_offset + n_take < n_rows); auto codes2 = raft::make_device_matrix(handle_, n_take, index->pq_dim()); ivf_pq::helpers::codepacker::unpack_list_data( From 7c1054227d3e64e73da6509562e238b2a4e1686c Mon Sep 17 00:00:00 2001 From: Micka Date: Fri, 17 May 2024 19:35:16 +0200 Subject: [PATCH 2/5] Fix IVF-PQ helper functions (#116) Some IVF-PQ helper functions needed for FAISS were missing. Authors: - Micka (https://github.com/lowener) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/116 --- .gitignore | 2 + cpp/include/cuvs/neighbors/ivf_pq.hpp | 672 ++++++++++++++++++ cpp/include/cuvs/neighbors/ivf_pq_helpers.hpp | 273 ------- cpp/src/neighbors/detail/ann_utils.cuh | 4 +- cpp/src/neighbors/ivf_flat_index.cpp | 4 +- cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh | 15 +- .../neighbors/ivf_pq/ivf_pq_build_common.cu | 251 +++++-- cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh | 4 +- cpp/test/neighbors/ann_ivf_pq.cuh | 35 +- docs/source/cpp_api/neighbors_ivf_pq.rst | 12 + 10 files changed, 900 insertions(+), 372 deletions(-) delete mode 100644 cpp/include/cuvs/neighbors/ivf_pq_helpers.hpp diff --git a/.gitignore b/.gitignore index 68996dbdf..4b6f46320 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,8 @@ dask-worker-space/ *.bin bench/ann/data temporary_*.json +rust/target/ +rust/Cargo.lock ## scikit-build _skbuild diff --git a/cpp/include/cuvs/neighbors/ivf_pq.hpp b/cpp/include/cuvs/neighbors/ivf_pq.hpp index f013615de..c6b1b2ee7 100644 --- a/cpp/include/cuvs/neighbors/ivf_pq.hpp +++ b/cpp/include/cuvs/neighbors/ivf_pq.hpp @@ -1064,4 +1064,676 @@ void deserialize(raft::resources const& handle, * @} */ +namespace helpers { +/** + * @defgroup ivf_pq_cpp_helpers IVF-PQ helper methods + * @{ + */ +namespace codepacker { +/** + * @addtogroup ivf_pq_cpp_helpers + * @{ + */ +/** + * @brief Unpack `n_take` consecutive records of a single list (cluster) in the compressed index + * starting at given `offset`. + * + * Bit compression is removed, which means output will have pq_dim dimensional vectors (one code per + * byte, instead of ceildiv(pq_dim * pq_bits, 8) bytes of pq codes). + * + * Usage example: + * @code{.cpp} + * auto list_data = index.lists()[label]->data.view(); + * // allocate the buffer for the output + * uint32_t n_take = 4; + * auto codes = raft::make_device_matrix(res, n_take, index.pq_dim()); + * uint32_t offset = 0; + * // unpack n_take elements from the list + * ivf_pq::helpers::codepacker::unpack(res, list_data, index.pq_bits(), offset, codes.view()); + * @endcode + * + * @param[in] res raft resource + * @param[in] list_data block to read from + * @param[in] pq_bits bit length of encoded vector elements + * @param[in] offset + * How many records in the list to skip. + * @param[out] codes + * the destination buffer [n_take, index.pq_dim()]. + * The length `n_take` defines how many records to unpack, + * it must be smaller than the list size. + */ +void unpack( + raft::resources const& res, + raft::device_mdspan::list_extents, raft::row_major> + list_data, + uint32_t pq_bits, + uint32_t offset, + raft::device_matrix_view codes); + +/** + * @brief Unpack `n_rows` consecutive records of a single list (cluster) in the compressed index + * starting at given `offset`. The output codes of a single vector are contiguous, not expanded to + * one code per byte, which means the output has ceildiv(pq_dim * pq_bits, 8) bytes per PQ encoded + * vector. + * + * Usage example: + * @code{.cpp} + * raft::resources res; + * auto list_data = index.lists()[label]->data.view(); + * // allocate the buffer for the output + * uint32_t n_rows = 4; + * auto codes = raft::make_device_matrix( + * res, n_rows, raft::ceildiv(index.pq_dim() * index.pq_bits(), 8)); + * uint32_t offset = 0; + * // unpack n_rows elements from the list + * ivf_pq::helpers::codepacker::unpack_contiguous( + * res, list_data, index.pq_bits(), offset, n_rows, index.pq_dim(), codes.data_handle()); + * @endcode + * + * @param[in] res raft resource + * @param[in] list_data block to read from + * @param[in] pq_bits bit length of encoded vector elements + * @param[in] offset + * How many records in the list to skip. + * @param[in] n_rows How many records to unpack + * @param[in] pq_dim The dimensionality of the PQ compressed records + * @param[out] codes + * the destination buffer [n_rows, ceildiv(pq_dim * pq_bits, 8)]. + * The length `n_rows` defines how many records to unpack, + * it must be smaller than the list size. + */ +void unpack_contiguous( + raft::resources const& res, + raft::device_mdspan::list_extents, raft::row_major> + list_data, + uint32_t pq_bits, + uint32_t offset, + uint32_t n_rows, + uint32_t pq_dim, + uint8_t* codes); + +/** + * Write flat PQ codes into an existing list by the given offset. + * + * NB: no memory allocation happens here; the list must fit the data (offset + n_vec). + * + * Usage example: + * @code{.cpp} + * auto list_data = index.lists()[label]->data.view(); + * // allocate the buffer for the input codes + * auto codes = raft::make_device_matrix(res, n_vec, index.pq_dim()); + * ... prepare n_vecs to pack into the list in codes ... + * // write codes into the list starting from the 42nd position + * ivf_pq::helpers::codepacker::pack( + * res, make_const_mdspan(codes.view()), index.pq_bits(), 42, list_data); + * @endcode + * + * @param[in] res raft resource + * @param[in] codes flat PQ codes, one code per byte [n_vec, pq_dim] + * @param[in] pq_bits bit length of encoded vector elements + * @param[in] offset how many records to skip before writing the data into the list + * @param[in] list_data block to write into + */ +void pack(raft::resources const& res, + raft::device_matrix_view codes, + uint32_t pq_bits, + uint32_t offset, + raft::device_mdspan::list_extents, raft::row_major> + list_data); + +/** + * Write flat PQ codes into an existing list by the given offset. The input codes of a single vector + * are contiguous (not expanded to one code per byte). + * + * NB: no memory allocation happens here; the list must fit the data (offset + n_rows records). + * + * Usage example: + * @code{.cpp} + * raft::resources res; + * auto list_data = index.lists()[label]->data.view(); + * // allocate the buffer for the input codes + * auto codes = raft::make_device_matrix( + * res, n_rows, raft::ceildiv(index.pq_dim() * index.pq_bits(), 8)); + * ... prepare compressed vectors to pack into the list in codes ... + * // write codes into the list starting from the 42nd position. If the current size of the list + * // is greater than 42, this will overwrite the codes starting at this offset. + * ivf_pq::helpers::codepacker::pack_contiguous( + * res, codes.data_handle(), n_rows, index.pq_dim(), index.pq_bits(), 42, list_data); + * @endcode + * + * @param[in] res raft resource + * @param[in] codes flat PQ codes, [n_vec, ceildiv(pq_dim * pq_bits, 8)] + * @param[in] n_rows number of records + * @param[in] pq_dim + * @param[in] pq_bits bit length of encoded vector elements + * @param[in] offset how many records to skip before writing the data into the list + * @param[in] list_data block to write into + */ +void pack_contiguous( + raft::resources const& res, + const uint8_t* codes, + uint32_t n_rows, + uint32_t pq_dim, + uint32_t pq_bits, + uint32_t offset, + raft::device_mdspan::list_extents, raft::row_major> + list_data); + +/** + * Write flat PQ codes into an existing list by the given offset. + * + * The list is identified by its label. + * + * NB: no memory allocation happens here; the list must fit the data (offset + n_vec). + * + * Usage example: + * @code{.cpp} + * // We will write into the 137th cluster + * uint32_t label = 137; + * // allocate the buffer for the input codes + * auto codes = raft::make_device_matrix(res, n_vec, index.pq_dim()); + * ... prepare n_vecs to pack into the list in codes ... + * // write codes into the list starting from the 42nd position + * ivf_pq::helpers::codepacker::pack_list_data(res, &index, codes_to_pack, label, 42); + * @endcode + * + * @param[in] res raft resource + * @param[inout] index IVF-PQ index. + * @param[in] codes flat PQ codes, one code per byte [n_rows, pq_dim] + * @param[in] label The id of the list (cluster) into which we write. + * @param[in] offset how many records to skip before writing the data into the list + */ +void pack_list_data(raft::resources const& res, + index* index, + raft::device_matrix_view codes, + uint32_t label, + uint32_t offset); + +/** + * Write flat PQ codes into an existing list by the given offset. Use this when the input + * vectors are PQ encoded and not expanded to one code per byte. + * + * The list is identified by its label. + * + * NB: no memory allocation happens here; the list into which the vectors are packed must fit offset + * + n_rows rows. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * raft::resources res; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_pq::build(res, index_params, dataset, N, D); + * // allocate the buffer for n_rows input codes. Each vector occupies + * // raft::ceildiv(index.pq_dim() * index.pq_bits(), 8) bytes because + * // codes are compressed and without gaps. + * auto codes = raft::make_device_matrix( + * res, n_rows, raft::ceildiv(index.pq_dim() * index.pq_bits(), 8)); + * ... prepare the compressed vectors to pack into the list in codes ... + * // the first n_rows codes in the fourth IVF list are to be overwritten. + * uint32_t label = 3; + * // write codes into the list starting from the 0th position + * ivf_pq::helpers::codepacker::pack_contiguous_list_data( + * res, &index, codes.data_handle(), n_rows, label, 0); + * @endcode + * + * @param[in] res raft resource + * @param[inout] index pointer to IVF-PQ index + * @param[in] codes flat contiguous PQ codes [n_rows, ceildiv(pq_dim * pq_bits, 8)] + * @param[in] n_rows how many records to pack + * @param[in] label The id of the list (cluster) into which we write. + * @param[in] offset how many records to skip before writing the data into the list + */ +void pack_contiguous_list_data(raft::resources const& res, + index* index, + uint8_t* codes, + uint32_t n_rows, + uint32_t label, + uint32_t offset); + +/** + * @brief Unpack `n_take` consecutive records of a single list (cluster) in the compressed index + * starting at given `offset`, one code per byte (independently of pq_bits). + * + * Usage example: + * @code{.cpp} + * // We will unpack the fourth cluster + * uint32_t label = 3; + * // Get the list size + * uint32_t list_size = 0; + * raft::copy(&list_size, index.list_sizes().data_handle() + label, 1, + * resource::get_cuda_stream(res)); resource::sync_stream(res); + * // allocate the buffer for the output + * auto codes = raft::make_device_matrix(res, list_size, index.pq_dim()); + * // unpack the whole list + * ivf_pq::helpers::codepacker::unpack_list_data(res, index, codes.view(), label, 0); + * @endcode + * + * @param[in] res + * @param[in] index + * @param[out] out_codes + * the destination buffer [n_take, index.pq_dim()]. + * The length `n_take` defines how many records to unpack, + * it must be smaller than the list size. + * @param[in] label + * The id of the list (cluster) to decode. + * @param[in] offset + * How many records in the list to skip. + */ +void unpack_list_data(raft::resources const& res, + const index& index, + raft::device_matrix_view out_codes, + uint32_t label, + uint32_t offset); + +/** + * @brief Unpack a series of records of a single list (cluster) in the compressed index + * by their in-list offsets, one code per byte (independently of pq_bits). + * + * Usage example: + * @code{.cpp} + * // We will unpack the fourth cluster + * uint32_t label = 3; + * // Create the selection vector + * auto selected_indices = raft::make_device_vector(res, 4); + * ... fill the indices ... + * resource::sync_stream(res); + * // allocate the buffer for the output + * auto codes = raft::make_device_matrix(res, selected_indices.size(), index.pq_dim()); + * // decode the whole list + * ivf_pq::helpers::codepacker::unpack_list_data( + * res, index, selected_indices.view(), codes.view(), label); + * @endcode + * + * @param[in] res raft resource + * @param[in] index IVF-PQ index (passed by reference) + * @param[in] in_cluster_indices + * The offsets of the selected indices within the cluster. + * @param[out] out_codes + * the destination buffer [n_take, index.pq_dim()]. + * The length `n_take` defines how many records to unpack, + * it must be smaller than the list size. + * @param[in] label + * The id of the list (cluster) to decode. + */ +void unpack_list_data(raft::resources const& res, + const index& index, + raft::device_vector_view in_cluster_indices, + raft::device_matrix_view out_codes, + uint32_t label); + +/** + * @brief Unpack `n_rows` consecutive PQ encoded vectors of a single list (cluster) in the + * compressed index starting at given `offset`, not expanded to one code per byte. Each code in the + * output buffer occupies ceildiv(index.pq_dim() * index.pq_bits(), 8) bytes. + * + * Usage example: + * @code{.cpp} + * raft::resources res; + * // We will unpack the whole fourth cluster + * uint32_t label = 3; + * // Get the list size + * uint32_t list_size = 0; + * raft::update_host(&list_size, index.list_sizes().data_handle() + label, 1, + * raft::resource::get_cuda_stream(res)); + * raft::resource::sync_stream(res); + * // allocate the buffer for the output + * auto codes = raft::make_device_matrix(res, list_size, raft::ceildiv(index.pq_dim() * + * index.pq_bits(), 8)); + * // unpack the whole list + * ivf_pq::helpers::codepacker::unpack_list_data(res, index, codes.data_handle(), list_size, + * label, 0); + * @endcode + * + * @param[in] res raft resource + * @param[in] index IVF-PQ index (passed by reference) + * @param[out] out_codes + * the destination buffer [n_rows, ceildiv(index.pq_dim() * index.pq_bits(), 8)]. + * The length `n_rows` defines how many records to unpack, + * offset + n_rows must be smaller than or equal to the list size. + * @param[in] n_rows how many codes to unpack + * @param[in] label + * The id of the list (cluster) to decode. + * @param[in] offset + * How many records in the list to skip. + */ +void unpack_contiguous_list_data(raft::resources const& res, + const index& index, + uint8_t* out_codes, + uint32_t n_rows, + uint32_t label, + uint32_t offset); + +/** + * @brief Decode `n_take` consecutive records of a single list (cluster) in the compressed index + * starting at given `offset`. + * + * Usage example: + * @code{.cpp} + * // We will reconstruct the fourth cluster + * uint32_t label = 3; + * // Get the list size + * uint32_t list_size = 0; + * raft::copy(&list_size, index.list_sizes().data_handle() + label, 1, + * resource::get_cuda_stream(res)); resource::sync_stream(res); + * // allocate the buffer for the output + * auto decoded_vectors = raft::make_device_matrix(res, list_size, index.dim()); + * // decode the whole list + * ivf_pq::helpers::codepacker::reconstruct_list_data(res, index, decoded_vectors.view(), label, + * 0); + * @endcode + * + * @param[in] res + * @param[in] index + * @param[out] out_vectors + * the destination buffer [n_take, index.dim()]. + * The length `n_take` defines how many records to reconstruct, + * it must be smaller than the list size. + * @param[in] label + * The id of the list (cluster) to decode. + * @param[in] offset + * How many records in the list to skip. + */ +void reconstruct_list_data(raft::resources const& res, + const index& index, + raft::device_matrix_view out_vectors, + uint32_t label, + uint32_t offset); + +void reconstruct_list_data(raft::resources const& res, + const index& index, + raft::device_matrix_view out_vectors, + uint32_t label, + uint32_t offset); + +void reconstruct_list_data(raft::resources const& res, + const index& index, + raft::device_matrix_view out_vectors, + uint32_t label, + uint32_t offset); + +/** + * @brief Decode a series of records of a single list (cluster) in the compressed index + * by their in-list offsets. + * + * Usage example: + * @code{.cpp} + * // We will reconstruct the fourth cluster + * uint32_t label = 3; + * // Create the selection vector + * auto selected_indices = raft::make_device_vector(res, 4); + * ... fill the indices ... + * resource::sync_stream(res); + * // allocate the buffer for the output + * auto decoded_vectors = raft::make_device_matrix( + * res, selected_indices.size(), index.dim()); + * // decode the whole list + * ivf_pq::helpers::codepacker::reconstruct_list_data( + * res, index, selected_indices.view(), decoded_vectors.view(), label); + * @endcode + * + * @param[in] res + * @param[in] index + * @param[in] in_cluster_indices + * The offsets of the selected indices within the cluster. + * @param[out] out_vectors + * the destination buffer [n_take, index.dim()]. + * The length `n_take` defines how many records to reconstruct, + * it must be smaller than the list size. + * @param[in] label + * The id of the list (cluster) to decode. + */ +void reconstruct_list_data(raft::resources const& res, + const index& index, + raft::device_vector_view in_cluster_indices, + raft::device_matrix_view out_vectors, + uint32_t label); +void reconstruct_list_data(raft::resources const& res, + const index& index, + raft::device_vector_view in_cluster_indices, + raft::device_matrix_view out_vectors, + uint32_t label); +void reconstruct_list_data(raft::resources const& res, + const index& index, + raft::device_vector_view in_cluster_indices, + raft::device_matrix_view out_vectors, + uint32_t label); + +/** + * @brief Extend one list of the index in-place, by the list label, skipping the classification and + * encoding steps. + * + * Usage example: + * @code{.cpp} + * // We will extend the fourth cluster + * uint32_t label = 3; + * // We will fill 4 new vectors + * uint32_t n_vec = 4; + * // Indices of the new vectors + * auto indices = raft::make_device_vector(res, n_vec); + * ... fill the indices ... + * auto new_codes = raft::make_device_matrix new_codes( + * res, n_vec, index.pq_dim()); + * ... fill codes ... + * // extend list with new codes + * ivf_pq::helpers::codepacker::extend_list_with_codes( + * res, &index, codes.view(), indices.view(), label); + * @endcode + * + * @param[in] res + * @param[inout] index + * @param[in] new_codes flat PQ codes, one code per byte [n_rows, index.pq_dim()] + * @param[in] new_indices source indices [n_rows] + * @param[in] label the id of the target list (cluster). + */ +void extend_list_with_codes( + raft::resources const& res, + index* index, + raft::device_matrix_view new_codes, + raft::device_vector_view new_indices, + uint32_t label); + +/** + * @brief Extend one list of the index in-place, by the list label, skipping the classification + * step. + * + * Usage example: + * @code{.cpp} + * // We will extend the fourth cluster + * uint32_t label = 3; + * // We will extend with 4 new vectors + * uint32_t n_vec = 4; + * // Indices of the new vectors + * auto indices = raft::make_device_vector(res, n_vec); + * ... fill the indices ... + * auto new_vectors = raft::make_device_matrix new_codes( + * res, n_vec, index.dim()); + * ... fill vectors ... + * // extend list with new vectors + * ivf_pq::helpers::codepacker::extend_list( + * res, &index, new_vectors.view(), indices.view(), label); + * @endcode + * + * + * @param[in] res + * @param[inout] index + * @param[in] new_vectors data to encode [n_rows, index.dim()] + * @param[in] new_indices source indices [n_rows] + * @param[in] label the id of the target list (cluster). + */ +void extend_list(raft::resources const& res, + index* index, + raft::device_matrix_view new_vectors, + raft::device_vector_view new_indices, + uint32_t label); +void extend_list(raft::resources const& res, + index* index, + raft::device_matrix_view new_vectors, + raft::device_vector_view new_indices, + uint32_t label); +void extend_list(raft::resources const& res, + index* index, + raft::device_matrix_view new_vectors, + raft::device_vector_view new_indices, + uint32_t label); + +/** + * @} + */ +}; // namespace codepacker + +/** + * @brief Remove all data from a single list (cluster) in the index. + * + * Usage example: + * @code{.cpp} + * // We will erase the fourth cluster (label = 3) + * ivf_pq::helpers::erase_list(res, &index, 3); + * @endcode + * + * + * @param[in] res + * @param[inout] index + * @param[in] label the id of the target list (cluster). + */ +void erase_list(raft::resources const& res, index* index, uint32_t label); + +/** + * @brief Public helper API to reset the data and indices ptrs, and the list sizes. Useful for + * externally modifying the index without going through the build stage. The data and indices of the + * IVF lists will be lost. + * + * Usage example: + * @code{.cpp} + * raft::resources res; + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // initialize an empty index + * ivf_pq::index index(res, index_params, D); + * // reset the index's state and list sizes + * ivf_pq::helpers::reset_index(res, &index); + * @endcode + * + * @param[in] res raft resource + * @param[inout] index pointer to IVF-PQ index + */ +void reset_index(const raft::resources& res, index* index); + +/** + * @brief Public helper API exposing the computation of the index's rotation matrix. + * NB: This is to be used only when the rotation matrix is not already computed through + * cuvs::neighbors::ivf_pq::build. + * + * Usage example: + * @code{.cpp} + * raft::resources res; + * // use default index parameters + * ivf_pq::index_params index_params; + * // force random rotation + * index_params.force_random_rotation = true; + * // initialize an empty index + * cuvs::neighbors::ivf_pq::index index(res, index_params, D); + * // reset the index + * reset_index(res, &index); + * // compute the rotation matrix with random_rotation + * cuvs::neighbors::ivf_pq::helpers::make_rotation_matrix( + * res, &index, index_params.force_random_rotation); + * @endcode + * + * @param[in] res raft resource + * @param[inout] index pointer to IVF-PQ index + * @param[in] force_random_rotation whether to apply a random rotation matrix on the input data. See + * cuvs::neighbors::ivf_pq::index_params for more details. + */ +void make_rotation_matrix(raft::resources const& res, + index* index, + bool force_random_rotation); + +/** + * @brief Public helper API for externally modifying the index's IVF centroids. + * NB: The index must be reset before this. Use raft::neighbors::ivf_pq::extend to construct IVF + lists according to new centroids. + * + * Usage example: + * @code{.cpp} + * raft::resources res; + * // allocate the buffer for the input centers + * auto cluster_centers = raft::make_device_matrix(res, index.n_lists(), + index.dim()); + * ... prepare ivf centroids in cluster_centers ... + * // reset the index + * reset_index(res, &index); + * // recompute the state of the index + * cuvs::neighbors::ivf_pq::helpers::recompute_internal_state(res, index); + * // Write the IVF centroids + * cuvs::neighbors::ivf_pq::helpers::set_centers( + res, + &index, + cluster_centers); + * @endcode + * + * @param[in] res raft resource + * @param[inout] index pointer to IVF-PQ index + * @param[in] cluster_centers new cluster centers [index.n_lists(), index.dim()] + */ +void set_centers(raft::resources const& res, + index* index, + raft::device_matrix_view cluster_centers); +/** + * @brief Helper exposing the re-computation of list sizes and related arrays if IVF lists have been + * modified. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * raft::resources res; + * // use default index parameters + * ivf_pq::index_params index_params; + * // initialize an empty index + * ivf_pq::index index(res, index_params, D); + * ivf_pq::helpers::reset_index(res, &index); + * // resize the first IVF list to hold 5 records + * auto spec = list_spec{ + * index->pq_bits(), index->pq_dim(), index->conservative_memory_allocation()}; + * uint32_t new_size = 5; + * ivf::resize_list(res, list, spec, new_size, 0); + * raft::update_device(index.list_sizes(), &new_size, 1, stream); + * // recompute the internal state of the index + * ivf_pq::helpers::recompute_internal_state(res, &index); + * @endcode + * + * @param[in] res raft resource + * @param[inout] index pointer to IVF-PQ index + */ +void recompute_internal_state(const raft::resources& res, index* index); + +/** + * @brief Public helper API for fetching a trained index's IVF centroids into a buffer that may be + * allocated on either host or device. + * + * Usage example: + * @code{.cpp} + * raft::resources res; + * // allocate the buffer for the output centers + * auto cluster_centers = raft::make_device_matrix( + * res, index.n_lists(), index.dim()); + * // Extract the IVF centroids into the buffer + * cuvs::neighbors::ivf_pq::helpers::extract_centers(res, index, cluster_centers.data_handle()); + * @endcode + * + * @param[in] res raft resource + * @param[in] index IVF-PQ index (passed by reference) + * @param[out] cluster_centers IVF cluster centers [index.n_lists(), index.dim] + */ +void extract_centers(raft::resources const& res, + const index& index, + raft::device_matrix_view cluster_centers); +/** + * @} + */ +} // namespace helpers + } // namespace cuvs::neighbors::ivf_pq diff --git a/cpp/include/cuvs/neighbors/ivf_pq_helpers.hpp b/cpp/include/cuvs/neighbors/ivf_pq_helpers.hpp deleted file mode 100644 index 39e6eb54f..000000000 --- a/cpp/include/cuvs/neighbors/ivf_pq_helpers.hpp +++ /dev/null @@ -1,273 +0,0 @@ -/* - * raft::copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a raft::copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -namespace cuvs::neighbors::ivf_pq::helpers { - -namespace codepacker { -/** - * Unpack flat PQ codes from an existing list by the given offset. - * - * @param[in] res - * @param[in] index - * @param[out] out_codes flat PQ codes, one code per byte [n_rows, pq_dim] - * @param[in] label - * @param[in] offset_or_indices how many records in the list to skip or the exact indices. - */ -void unpack_list_data(raft::resources const& res, - const index& index, - raft::device_matrix_view out_codes, - uint32_t label, - std::variant offset_or_indices); -/** - * Write flat PQ codes into an existing list by the given offset. - * - * NB: no memory allocation happens here; the list must fit the data (offset + n_rows). - * - * @param[in] res - * @param[in] index - * @param[out] new_codes - * @param[in] label - * @param[in] offset_or_indices how many records in the list to skip or the exact indices. - */ -void pack_list_data(raft::resources const& res, - index* index, - raft::device_matrix_view new_codes, - uint32_t label, - std::variant offset_or_indices); - -}; // namespace codepacker - -/** - * @brief Fill-in a random orthogonal transformation matrix. - * - * @param handle - * @param force_random_rotation - * @param n_rows - * @param n_cols - * @param[out] rotation_matrix device pointer to a row-major matrix of size [n_rows, n_cols]. - * @param rng random number generator state - */ -void make_rotation_matrix(raft::resources const& handle, - bool force_random_rotation, - uint32_t n_rows, - uint32_t n_cols, - float* rotation_matrix, - raft::random::RngState rng = raft::random::RngState(7ULL)); - -/** - * @brief Set cluster centers on an index - * - * @param handle - * @param index - * @param cluster_centers - */ -void set_centers(raft::resources const& handle, - index* index, - const float* cluster_centers); - -/** - * @brief Decode `n_take` consecutive records of a single list (cluster) in the compressed index - * starting at given `offset`. - * - * Usage example: - * @code{.cpp} - * // We will reconstruct the fourth cluster - * uint32_t label = 3; - * // Get the list size - * uint32_t list_size = 0; - * raft::copy(&list_size, index.list_sizes().data_handle() + label, 1, - * resource::get_cuda_stream(res)); resource::sync_stream(res); - * // allocate the buffer for the output - * auto decoded_vectors = raft::make_device_matrix(res, list_size, index.dim()); - * // decode the whole list - * ivf_pq::helpers::reconstruct_list_data(res, index, decoded_vectors.view(), label, 0); - * @endcode - * - * @param[in] res - * @param[in] index - * @param[out] out_vectors - * the destination buffer [n_take, index.dim()]. - * The length `n_take` defines how many records to reconstruct, - * it must be smaller than the list size. - * @param[in] label - * The id of the list (cluster) to decode. - * @param[in] offset_or_indices - * How many records in the list to skip. - */ -void reconstruct_list_data(raft::resources const& res, - const index& index, - raft::device_matrix_view out_vectors, - uint32_t label, - std::variant offset_or_indices); -void reconstruct_list_data(raft::resources const& res, - const index& index, - raft::device_matrix_view out_vectors, - uint32_t label, - std::variant offset_or_indices); -void reconstruct_list_data(raft::resources const& res, - const index& index, - raft::device_matrix_view out_vectors, - uint32_t label, - std::variant offset_or_indices); - -/** - * Write flat PQ codes into an existing list by the given offset. The input codes of a single vector - * are contiguous (not expanded to one code per byte). - * - * NB: no memory allocation happens here; the list must fit the data (offset + n_rows records). - * - * Usage example: - * @code{.cpp} - * raft::resources res; - * auto list_data = index.lists()[label]->data.view(); - * // allocate the buffer for the input codes - * auto codes = raft::make_device_matrix( - * res, n_rows, raft::ceildiv(index.pq_dim() * index.pq_bits(), 8)); - * ... prepare compressed vectors to pack into the list in codes ... - * // write codes into the list starting from the 42nd position. If the current size of the list - * // is greater than 42, this will overwrite the codes starting at this offset. - * ivf_pq::helpers::codepacker::pack_contiguous( - * res, codes.data_handle(), n_rows, index.pq_dim(), index.pq_bits(), 42, list_data); - * @endcode - * - * @param[in] res raft resource - * @param[inout] index - * @param[in] new_codes flat PQ codes, [n_vec, ceildiv(pq_dim * pq_bits, 8)] - * @param[in] n_rows number of records - * @param[in] label The id of the list (cluster) to decode. - * @param[in] offset_or_indices how many records in the list to skip or the exact indices. - */ -void pack_contiguous_list_data(raft::resources const& res, - index* index, - const uint8_t* new_codes, - uint32_t n_rows, - uint32_t label, - std::variant offset_or_indices); - -/** - * @brief Extend one list of the index in-place, by the list label, skipping the classification and - * encoding steps. - * - * Usage example: - * @code{.cpp} - * // We will extend the fourth cluster - * uint32_t label = 3; - * // We will fill 4 new vectors - * uint32_t n_vec = 4; - * // Indices of the new vectors - * auto indices = raft::make_device_vector(res, n_vec); - * ... fill the indices ... - * auto new_codes = raft::make_device_matrix new_codes( - * res, n_vec, index.pq_dim()); - * ... fill codes ... - * // extend list with new codes - * ivf_pq::helpers::extend_list_with_codes( - * res, &index, codes.view(), indices.view(), label); - * @endcode - * - * @param[in] res - * @param[inout] index - * @param[in] new_codes flat PQ codes, one code per byte [n_rows, index.pq_dim()] - * @param[in] new_indices source indices [n_rows] - * @param[in] label the id of the target list (cluster). - */ -void extend_list_with_codes( - raft::resources const& res, - index* index, - raft::device_matrix_view new_codes, - raft::device_vector_view new_indices, - uint32_t label); - -/** - * @brief Extend one list of the index in-place, by the list label, skipping the classification - * step. - * - * Usage example: - * @code{.cpp} - * // We will extend the fourth cluster - * uint32_t label = 3; - * // We will extend with 4 new vectors - * uint32_t n_vec = 4; - * // Indices of the new vectors - * auto indices = raft::make_device_vector(res, n_vec); - * ... fill the indices ... - * auto new_vectors = raft::make_device_matrix new_codes( - * res, n_vec, index.dim()); - * ... fill vectors ... - * // extend list with new vectors - * ivf_pq::helpers::extend_list( - * res, &index, new_vectors.view(), indices.view(), label); - * @endcode - * - * - * @param[in] res - * @param[inout] index - * @param[in] new_vectors data to encode [n_rows, index.dim()] - * @param[in] new_indices source indices [n_rows] - * @param[in] label the id of the target list (cluster). - */ -void extend_list(raft::resources const& res, - index* index, - raft::device_matrix_view new_vectors, - raft::device_vector_view new_indices, - uint32_t label); -void extend_list(raft::resources const& res, - index* index, - raft::device_matrix_view new_vectors, - raft::device_vector_view new_indices, - uint32_t label); -void extend_list(raft::resources const& res, - index* index, - raft::device_matrix_view new_vectors, - raft::device_vector_view new_indices, - uint32_t label); -/** - * @brief Remove all data from a single list (cluster) in the index. - * - * Usage example: - * @code{.cpp} - * // We will erase the fourth cluster (label = 3) - * ivf_pq::helpers::erase_list(res, &index, 3); - * @endcode - * - * - * @param[in] res - * @param[inout] index - * @param[in] label the id of the target list (cluster). - */ -void erase_list(raft::resources const& res, index* index, uint32_t label); - -} // namespace cuvs::neighbors::ivf_pq::helpers diff --git a/cpp/src/neighbors/detail/ann_utils.cuh b/cpp/src/neighbors/detail/ann_utils.cuh index 222b4c6a7..c634fb146 100644 --- a/cpp/src/neighbors/detail/ann_utils.cuh +++ b/cpp/src/neighbors/detail/ann_utils.cuh @@ -1,9 +1,9 @@ /* - * raft::copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. - * You may obtain a raft::copy of the License at + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/cpp/src/neighbors/ivf_flat_index.cpp b/cpp/src/neighbors/ivf_flat_index.cpp index b2fbbfc12..b25bb051a 100644 --- a/cpp/src/neighbors/ivf_flat_index.cpp +++ b/cpp/src/neighbors/ivf_flat_index.cpp @@ -41,10 +41,10 @@ index::index(raft::resources const& res, metric_(metric), adaptive_centers_(adaptive_centers), conservative_memory_allocation_{conservative_memory_allocation}, - centers_(raft::make_device_matrix(res, n_lists, dim)), - center_norms_(std::nullopt), lists_{n_lists}, list_sizes_{raft::make_device_vector(res, n_lists)}, + centers_(raft::make_device_matrix(res, n_lists, dim)), + center_norms_(std::nullopt), data_ptrs_{raft::make_device_vector(res, n_lists)}, inds_ptrs_{raft::make_device_vector(res, n_lists)}, accum_sorted_sizes_{raft::make_host_vector(n_lists + 1)} diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh index b843ea067..07e3c497c 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh @@ -1,9 +1,9 @@ /* - * raft::copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. - * You may obtain a raft::copy of the License at + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -24,7 +24,6 @@ #include #include #include -#include #include "../detail/ann_utils.cuh" // utils::mapping @@ -1393,7 +1392,7 @@ void extend_list_with_codes( // Allocate memory and write indices auto offset = extend_list_prepare(res, index, new_indices, label); // Pack the data - helpers::codepacker::pack_list_data(res, index, new_codes, label, offset); + pack_list_data(res, index, new_codes, label, offset); // Update the pointers and the sizes ivf::detail::recompute_internal_state(res, *index); } @@ -1780,13 +1779,9 @@ auto build(raft::resources const& handle, utils::mapping()); // Make rotation matrix - helpers::make_rotation_matrix(handle, - params.force_random_rotation, - index.rot_dim(), - index.dim(), - index.rotation_matrix().data_handle()); + helpers::make_rotation_matrix(handle, &index, params.force_random_rotation); - helpers::set_centers(handle, &index, cluster_centers); + helpers::set_centers(handle, &index, raft::make_const_mdspan(centers_view)); // Train PQ codebooks switch (index.codebook_kind()) { diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build_common.cu b/cpp/src/neighbors/ivf_pq/ivf_pq_build_common.cu index b08ebb17e..db488b9c0 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_build_common.cu +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build_common.cu @@ -1,9 +1,9 @@ /* - * raft::copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. - * You may obtain a raft::copy of the License at + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -24,104 +24,153 @@ namespace cuvs::neighbors::ivf_pq::helpers { namespace codepacker { -void unpack_list_data(raft::resources const& res, - const index& index, - raft::device_matrix_view out_codes, - uint32_t label, - std::variant offset_or_indices) +void unpack( + raft::resources const& res, + raft::device_mdspan::list_extents, raft::row_major> + list_data, + uint32_t pq_bits, + uint32_t offset, + raft::device_matrix_view codes) { - detail::unpack_list_data(res, index, out_codes, label, offset_or_indices); + detail::unpack_list_data(codes, list_data, offset, pq_bits, raft::resource::get_cuda_stream(res)); +} + +void unpack_contiguous( + raft::resources const& res, + raft::device_mdspan::list_extents, raft::row_major> + list_data, + uint32_t pq_bits, + uint32_t offset, + uint32_t n_rows, + uint32_t pq_dim, + uint8_t* codes) +{ + detail::unpack_contiguous_list_data( + codes, list_data, n_rows, pq_dim, offset, pq_bits, raft::resource::get_cuda_stream(res)); +} +void pack(raft::resources const& res, + raft::device_matrix_view codes, + uint32_t pq_bits, + uint32_t offset, + raft::device_mdspan::list_extents, raft::row_major> + list_data) +{ + detail::pack_list_data(list_data, codes, offset, pq_bits, raft::resource::get_cuda_stream(res)); +} + +void pack_contiguous( + raft::resources const& res, + const uint8_t* codes, + uint32_t n_rows, + uint32_t pq_dim, + uint32_t pq_bits, + uint32_t offset, + raft::device_mdspan::list_extents, raft::row_major> + list_data) +{ + detail::pack_contiguous_list_data( + list_data, codes, n_rows, pq_dim, offset, pq_bits, raft::resource::get_cuda_stream(res)); } void pack_list_data(raft::resources const& res, index* index, - raft::device_matrix_view new_codes, + raft::device_matrix_view codes, uint32_t label, - std::variant offset_or_indices) + uint32_t offset) { - detail::pack_list_data(res, index, new_codes, label, offset_or_indices); + detail::pack_list_data(res, index, codes, label, offset); } -}; // namespace codepacker +void pack_contiguous_list_data(raft::resources const& res, + index* index, + uint8_t* codes, + uint32_t n_rows, + uint32_t label, + uint32_t offset) +{ + detail::pack_contiguous_list_data(res, index, codes, n_rows, label, offset); +} -void make_rotation_matrix(raft::resources const& handle, - bool force_random_rotation, - uint32_t n_rows, - uint32_t n_cols, - float* rotation_matrix, - raft::random::RngState rng) +void unpack_list_data(raft::resources const& res, + const index& index, + raft::device_matrix_view out_codes, + uint32_t label, + uint32_t offset) { - raft::common::nvtx::range fun_scope( - "ivf_pq::make_rotation_matrix(%u * %u)", n_rows, n_cols); - auto stream = raft::resource::get_cuda_stream(handle); - bool inplace = n_rows == n_cols; - uint32_t n = std::max(n_rows, n_cols); - if (force_random_rotation || !inplace) { - rmm::device_uvector buf(inplace ? 0 : n * n, stream); - float* mat = inplace ? rotation_matrix : buf.data(); - raft::random::normal(handle, rng, mat, n * n, 0.0f, 1.0f); - raft::linalg::detail::qrGetQ_inplace(handle, mat, n, n, stream); - if (!inplace) { - RAFT_CUDA_TRY(cudaMemcpy2DAsync(rotation_matrix, - sizeof(float) * n_cols, - mat, - sizeof(float) * n, - sizeof(float) * n_cols, - n_rows, - cudaMemcpyDefault, - stream)); - } - } else { - uint32_t stride = n + 1; - auto rotation_matrix_view = - raft::make_device_vector_view(rotation_matrix, n * n); - raft::linalg::map_offset(handle, rotation_matrix_view, [stride] __device__(uint32_t i) { - return static_cast(i % stride == 0u); - }); - } + detail::unpack_list_data(res, index, out_codes, label, offset); } -void set_centers(raft::resources const& handle, index* index, const float* cluster_centers) +void unpack_list_data(raft::resources const& res, + const index& index, + raft::device_vector_view in_cluster_indices, + raft::device_matrix_view out_codes, + uint32_t label) +{ + detail::unpack_list_data(res, index, out_codes, label, in_cluster_indices.data_handle()); +} + +void unpack_contiguous_list_data(raft::resources const& res, + const index& index, + uint8_t* out_codes, + uint32_t n_rows, + uint32_t label, + uint32_t offset) { - detail::set_centers(handle, index, cluster_centers); + detail::unpack_contiguous_list_data(res, index, out_codes, n_rows, label, offset); } void reconstruct_list_data(raft::resources const& res, const index& index, raft::device_matrix_view out_vectors, uint32_t label, - std::variant offset_or_indices) + uint32_t offset) { - detail::reconstruct_list_data(res, index, out_vectors, label, offset_or_indices); + detail::reconstruct_list_data(res, index, out_vectors, label, offset); } void reconstruct_list_data(raft::resources const& res, const index& index, raft::device_matrix_view out_vectors, uint32_t label, - std::variant offset_or_indices) + uint32_t offset) { - detail::reconstruct_list_data(res, index, out_vectors, label, offset_or_indices); + detail::reconstruct_list_data(res, index, out_vectors, label, offset); } void reconstruct_list_data(raft::resources const& res, const index& index, raft::device_matrix_view out_vectors, uint32_t label, - std::variant offset_or_indices) + uint32_t offset) { - detail::reconstruct_list_data( - res, index, out_vectors, label, offset_or_indices); + detail::reconstruct_list_data(res, index, out_vectors, label, offset); } -void pack_contiguous_list_data(raft::resources const& res, - index* index, - const uint8_t* new_codes, - uint32_t n_rows, - uint32_t label, - std::variant offset_or_indices) +void reconstruct_list_data(raft::resources const& res, + const index& index, + raft::device_vector_view in_cluster_indices, + raft::device_matrix_view out_vectors, + uint32_t label) { - detail::pack_contiguous_list_data( - res, index, new_codes, n_rows, label, offset_or_indices); + detail::reconstruct_list_data( + res, index, out_vectors, label, in_cluster_indices.data_handle()); +} +void reconstruct_list_data(raft::resources const& res, + const index& index, + raft::device_vector_view in_cluster_indices, + raft::device_matrix_view out_vectors, + uint32_t label) +{ + detail::reconstruct_list_data( + res, index, out_vectors, label, in_cluster_indices.data_handle()); +} +void reconstruct_list_data(raft::resources const& res, + const index& index, + raft::device_vector_view in_cluster_indices, + raft::device_matrix_view out_vectors, + uint32_t label) +{ + detail::reconstruct_list_data( + res, index, out_vectors, label, in_cluster_indices.data_handle()); } void extend_list_with_codes( @@ -159,9 +208,85 @@ void extend_list(raft::resources const& res, detail::extend_list(res, index, new_vectors, new_indices, label); } +}; // namespace codepacker + void erase_list(raft::resources const& res, index* index, uint32_t label) { detail::erase_list(res, index, label); } +void reset_index(const raft::resources& res, index* index) +{ + auto stream = raft::resource::get_cuda_stream(res); + + cuvs::spatial::knn::detail::utils::memzero( + index->accum_sorted_sizes().data_handle(), index->accum_sorted_sizes().size(), stream); + cuvs::spatial::knn::detail::utils::memzero( + index->list_sizes().data_handle(), index->list_sizes().size(), stream); + cuvs::spatial::knn::detail::utils::memzero( + index->data_ptrs().data_handle(), index->data_ptrs().size(), stream); + cuvs::spatial::knn::detail::utils::memzero( + index->inds_ptrs().data_handle(), index->inds_ptrs().size(), stream); +} + +void make_rotation_matrix(raft::resources const& handle, + bool force_random_rotation, + uint32_t n_rows, + uint32_t n_cols, + float* rotation_matrix, + raft::random::RngState rng = raft::random::RngState(7ULL)) +{ + raft::common::nvtx::range fun_scope( + "ivf_pq::make_rotation_matrix(%u * %u)", n_rows, n_cols); + auto stream = raft::resource::get_cuda_stream(handle); + bool inplace = n_rows == n_cols; + uint32_t n = std::max(n_rows, n_cols); + if (force_random_rotation || !inplace) { + rmm::device_uvector buf(inplace ? 0 : n * n, stream); + float* mat = inplace ? rotation_matrix : buf.data(); + raft::random::normal(handle, rng, mat, n * n, 0.0f, 1.0f); + raft::linalg::detail::qrGetQ_inplace(handle, mat, n, n, stream); + if (!inplace) { + RAFT_CUDA_TRY(cudaMemcpy2DAsync(rotation_matrix, + sizeof(float) * n_cols, + mat, + sizeof(float) * n, + sizeof(float) * n_cols, + n_rows, + cudaMemcpyDefault, + stream)); + } + } else { + uint32_t stride = n + 1; + auto rotation_matrix_view = + raft::make_device_vector_view(rotation_matrix, n * n); + raft::linalg::map_offset(handle, rotation_matrix_view, [stride] __device__(uint32_t i) { + return static_cast(i % stride == 0u); + }); + } +} + +void make_rotation_matrix(raft::resources const& res, + index* index, + bool force_random_rotation) +{ + make_rotation_matrix(res, + force_random_rotation, + index->rot_dim(), + index->dim(), + index->rotation_matrix().data_handle()); +} + +void set_centers(raft::resources const& handle, + index* index, + raft::device_matrix_view cluster_centers) +{ + RAFT_EXPECTS(cluster_centers.extent(0) == index->n_lists(), + "Number of rows in the new centers must be equal to the number of IVF lists"); + RAFT_EXPECTS(cluster_centers.extent(1) == index->dim(), + "Number of columns in the new cluster centers and index dim are different"); + RAFT_EXPECTS(index->size() == 0, "Index must be empty"); + detail::set_centers(handle, index, cluster_centers.data_handle()); +} + } // namespace cuvs::neighbors::ivf_pq::helpers diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh index 5c77e0008..ed28969a5 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh @@ -1,9 +1,9 @@ /* - * raft::copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. - * You may obtain a raft::copy of the License at + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh index 0a17aec12..985134221 100644 --- a/cpp/test/neighbors/ann_ivf_pq.cuh +++ b/cpp/test/neighbors/ann_ivf_pq.cuh @@ -19,12 +19,12 @@ #include "ann_utils.cuh" #include "naive_knn.cuh" #include -#include #include #include #include #include +#include #include namespace cuvs::neighbors::ivf_pq { @@ -263,7 +263,8 @@ class ivf_pq_test : public ::testing::TestWithParam { auto rec_data = raft::make_device_matrix(handle_, n_take, dim); auto orig_data = raft::make_device_matrix(handle_, n_take, dim); - ivf_pq::helpers::reconstruct_list_data(handle_, index, rec_data.view(), label, n_skip); + ivf_pq::helpers::codepacker::reconstruct_list_data( + handle_, index, rec_data.view(), label, n_skip); raft::matrix::gather(database.data(), IdxT{dim}, @@ -288,11 +289,13 @@ class ivf_pq_test : public ::testing::TestWithParam { auto indices = raft::make_device_vector(handle_, n_rows); raft::copy(indices.data_handle(), old_list->indices.data_handle(), n_rows, stream_); - ivf_pq::helpers::reconstruct_list_data(handle_, *index, vectors_1.view(), label, uint32_t(0)); + ivf_pq::helpers::codepacker::reconstruct_list_data( + handle_, *index, vectors_1.view(), label, uint32_t(0)); ivf_pq::helpers::erase_list(handle_, index, label); // NB: passing the type parameter because const->non-const implicit conversion of the mdspans // breaks type inference - ivf_pq::helpers::extend_list(handle_, index, vectors_1.view(), indices.view(), label); + ivf_pq::helpers::codepacker::extend_list( + handle_, index, vectors_1.view(), indices.view(), label); auto& new_list = index->lists()[label]; ASSERT_NE(old_list.get(), new_list.get()) @@ -300,7 +303,8 @@ class ivf_pq_test : public ::testing::TestWithParam { "corresponding cluster."; auto vectors_2 = raft::make_device_matrix(handle_, n_rows, index->dim()); - ivf_pq::helpers::reconstruct_list_data(handle_, *index, vectors_2.view(), label, uint32_t(0)); + ivf_pq::helpers::codepacker::reconstruct_list_data( + handle_, *index, vectors_2.view(), label, uint32_t(0)); // The code search is unstable, and there's high chance of repeating values of the lvl-2 codes. // Hence, encoding-decoding chain often leads to altering both the PQ codes and the // reconstructed data. @@ -322,7 +326,8 @@ class ivf_pq_test : public ::testing::TestWithParam { ivf_pq::helpers::codepacker::unpack_list_data( handle_, *index, codes.view(), label, uint32_t(0)); ivf_pq::helpers::erase_list(handle_, index, label); - ivf_pq::helpers::extend_list_with_codes(handle_, index, codes.view(), indices.view(), label); + ivf_pq::helpers::codepacker::extend_list_with_codes( + handle_, index, codes.view(), indices.view(), label); auto& new_list = index->lists()[label]; ASSERT_NE(old_list.get(), new_list.get()) @@ -358,22 +363,12 @@ class ivf_pq_test : public ::testing::TestWithParam { uint32_t n_take = 4; ASSERT_TRUE(row_offset + n_take < n_rows); auto codes2 = raft::make_device_matrix(handle_, n_take, index->pq_dim()); - ivf_pq::helpers::codepacker::unpack_list_data( - // handle_, list_data, index->pq_bits(), row_offset, codes2.view()); - handle_, - *index, - codes2.view(), - label, - uint32_t(row_offset)); + ivf_pq::helpers::codepacker::unpack( + handle_, list_data, index->pq_bits(), row_offset, codes2.view()); // Write it back - ivf_pq::helpers::codepacker::pack_list_data( - // handle_, make_const_mdspan(codes2.view()), index->pq_bits(), row_offset, list_data); - handle_, - index, - make_const_mdspan(codes2.view()), - label, - uint32_t(row_offset)); + ivf_pq::helpers::codepacker::pack( + handle_, make_const_mdspan(codes2.view()), index->pq_bits(), row_offset, list_data); ASSERT_TRUE(cuvs::devArrMatch(old_list->data.data_handle(), new_list->data.data_handle(), list_data_size, diff --git a/docs/source/cpp_api/neighbors_ivf_pq.rst b/docs/source/cpp_api/neighbors_ivf_pq.rst index 0d4d7061a..cc515682b 100644 --- a/docs/source/cpp_api/neighbors_ivf_pq.rst +++ b/docs/source/cpp_api/neighbors_ivf_pq.rst @@ -66,3 +66,15 @@ Index serialize :project: cuvs :members: :content-only: + +Helper Methods +--------------- + +Additional helper functions for manipulating the underlying data of an IVF-PQ index, unpacking records, and writing PQ codes into an existing IVF list. + +namespace *cuvs::neighbors::ivf_pq::helpers* + +.. doxygengroup:: ivf_pq_cpp_helpers + :project: cuvs + :members: + :content-only: From 85aa026ed65f929f983d7dd4a3974ddb17e9226d Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Fri, 17 May 2024 15:28:42 -0700 Subject: [PATCH 3/5] Speed-up rust build (#138) We are using cmake-rs to link the c++ cmake build and the rust build. This had the side effect of causing us to always recompile all of libcuvs with the rust bindings. With the migration of the libraft ann code to cuvs, this has led to the rust bindings being the bottleneck in building. Get around this by creating a new dummy cmake target that depends on cuvs, and having this target only build libcuvs if needed. Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/138 --- rust/cuvs-sys/CMakeLists.txt | 65 ++++++++++++++++++++++++++++++++++++ rust/cuvs-sys/build.rs | 13 +------- 2 files changed, 66 insertions(+), 12 deletions(-) create mode 100644 rust/cuvs-sys/CMakeLists.txt diff --git a/rust/cuvs-sys/CMakeLists.txt b/rust/cuvs-sys/CMakeLists.txt new file mode 100644 index 000000000..e2ca3a444 --- /dev/null +++ b/rust/cuvs-sys/CMakeLists.txt @@ -0,0 +1,65 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) + +include(../../rapids_config.cmake) +include(rapids-cmake) +include(rapids-cpm) +include(rapids-export) +include(rapids-find) +rapids_cpm_init() + +# we want to use the already built libcuvs if its available, but the rust cmake-rs project doesn't +# support anything like find_package https://github.com/rust-lang/cmake-rs/issues/111 instead we're +# adding an extra level of indirection here - cmake-rs will attempt to build this project, and we'll +# using the existing libcuvs if its already built, and only fall back to building libcuvs if it +# isn't + +project( + cuvs-rs + VERSION "${RAPIDS_VERSION}" + LANGUAGES CXX CUDA +) + +option(FIND_CUVS_CPP "Search for existing CUVS C++ installations before defaulting to local files" + ON +) + +# If the user requested it we attempt to find CUVS. +if(FIND_CUVS_CPP) + find_package(cuvs "${RAPIDS_VERSION}" REQUIRED COMPONENTS compiled) + if(NOT TARGET cuvs::cuvs) + message( + FATAL_ERROR + "Building against a preexisting libcuvs library requires the compiled libcuvs to have been built!" + ) + + endif() +else() + set(cuvs_FOUND OFF) +endif() + +if(NOT cuvs_FOUND) + set(BUILD_TESTS OFF) + set(BUILD_C_LIBRARY ON) + add_subdirectory(../../cpp cuvs-cpp EXCLUDE_FROM_ALL) +endif() + +include(../../cpp/cmake/thirdparty/get_dlpack.cmake) + +# add a dummy target here, +add_library(cuvs-rust INTERFACE) +target_link_libraries(cuvs-rust INTERFACE cuvs::cuvs) +install(TARGETS cuvs-rust) diff --git a/rust/cuvs-sys/build.rs b/rust/cuvs-sys/build.rs index 3dcf185de..bcb0a88c7 100644 --- a/rust/cuvs-sys/build.rs +++ b/rust/cuvs-sys/build.rs @@ -18,20 +18,9 @@ use std::env; use std::io::BufRead; use std::path::PathBuf; -/* - TODO: - * would be nice to use already built versions of libcuvs_c / libcuvs - if they already existed, but this might not be possible here using cmake-rs - (https://github.com/rust-lang/cmake-rs/issues/111) - * figure out how this works with rust packaging: does the c++ code - need to be in a subdirectory? If so would a symlink work here - should we be using static linking ? -*/ fn main() { // build the cuvs c-api library with cmake, and link it into this crate - let cuvs_build = cmake::Config::new("../../cpp") - .configure_arg("-DBUILD_TESTS:BOOL=OFF") - .configure_arg("-DBUILD_C_LIBRARY:BOOL=ON") + let cuvs_build = cmake::Config::new(".") .build(); println!( From cb581ac648f4d829ffd85385b8161edde13dcbaf Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Mon, 20 May 2024 09:21:59 -0500 Subject: [PATCH 4/5] Adds missing files to `update-version.sh` (#69) Co-authored-by: Corey J. Nolet Co-authored-by: Ray Douglass <3107146+raydouglass@users.noreply.github.com> --- ci/release/update-version.sh | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index b8f15aba7..0082461c5 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -37,7 +37,7 @@ function sed_runner() { sed -i.bak ''"$1"'' $2 && rm -f ${2}.bak } -sed_runner "s/set(RAPIDS_VERSION .*)/set(RAPIDS_VERSION \"${NEXT_SHORT_TAG}\")/g" cpp/examples/cmake/thirdparty/fetch_rapids.cmake +sed_runner "s/set(RAPIDS_VERSION .*)/set(RAPIDS_VERSION \"${NEXT_SHORT_TAG}\")/g" examples/cmake/thirdparty/fetch_rapids.cmake # Centralized version file update echo "${NEXT_FULL_TAG}" > VERSION @@ -47,6 +47,7 @@ DEPENDENCIES=( cuvs cuvs-cu11 cuvs-cu12 + pylibraft pylibraft-cu11 pylibraft-cu12 rmm @@ -68,19 +69,13 @@ for FILE in python/*/pyproject.toml; do sed_runner "/\"ucx-py==/ s/==.*\"/==${NEXT_UCX_PY_SHORT_TAG_PEP440}.*\"/g" ${FILE} done -sed_runner "/^ucx_py_version:$/ {n;s/.*/ - \"${NEXT_UCX_PY_VERSION}\"/}" conda/recipes/raft-dask/conda_build_config.yaml - for FILE in .github/workflows/*.yaml; do sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" done -sed_runner "/^set(CUVS_VERSION/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" docs/source/build.md -sed_runner "s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" docs/source/build.md sed_runner "/rapidsai\/raft/ s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" docs/source/developer_guide.md -sed_runner "s|:[0-9][0-9].[0-9][0-9]|:${NEXT_SHORT_TAG}|g" docs/source/raft_ann_benchmarks.md - -sed_runner "s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" README.md +sed_runner "s|=[0-9][0-9].[0-9][0-9]|=${NEXT_SHORT_TAG}|g" README.md # rust can't handle leading 0's in the major/minor/patch version - remove NEXT_FULL_RUST_TAG=$(printf "%d.%d.%d" $((10#$NEXT_MAJOR)) $((10#$NEXT_MINOR)) $((10#$NEXT_PATCH))) @@ -90,6 +85,7 @@ sed_runner "s/version = \".*\"/version = \"${NEXT_FULL_RUST_TAG}\"/g" rust/Cargo find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r -d '' filename; do sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}" sed_runner "s@rapidsai/devcontainers/features/ucx:[0-9.]*@rapidsai/devcontainers/features/ucx:${NEXT_SHORT_TAG_PEP440}@" "${filename}" + sed_runner "s@rapidsai/devcontainers/features/cuda:[0-9.]*@rapidsai/devcontainers/features/cuda:${NEXT_SHORT_TAG_PEP440}@" "${filename}" sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}" sed_runner "s@rapids-\${localWorkspaceFolderBasename}-${CURRENT_SHORT_TAG}@rapids-\${localWorkspaceFolderBasename}-${NEXT_SHORT_TAG}@g" "${filename}" done From b757c19b3a02eb8c4e5cebbbb148cbfc09825cb9 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 21 May 2024 13:29:45 -0400 Subject: [PATCH 5/5] Removing `libraft.so` from libcuvs dependencies (#132) Unfortunately, libraft.so is still a dependency of pylibraft and so it's needed in order to use pylibraft. We'll work on that during the 24.08 release cycle. For now, libraft.so is no longer a required dependency to build `libcuvs` or any of its wrappers. Authors: - Corey J. Nolet (https://github.com/cjnolet) - Micka (https://github.com/lowener) Approvers: - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/cuvs/pull/132 --- .pre-commit-config.yaml | 1 - cpp/CMakeLists.txt | 25 +- cpp/cmake/thirdparty/get_raft.cmake | 15 +- cpp/include/cuvs/cluster/agglomerative.hpp | 2 +- cpp/include/cuvs/cluster/kmeans.hpp | 2 +- .../distance/{distance_types.h => distance.h} | 0 .../{pairwise_distance.hpp => distance.hpp} | 55 ++- cpp/include/cuvs/distance/distance_types.hpp | 63 ---- cpp/include/cuvs/neighbors/ann_types.hpp | 90 ----- cpp/include/cuvs/neighbors/brute_force.h | 2 +- cpp/include/cuvs/neighbors/brute_force.hpp | 6 +- cpp/include/cuvs/neighbors/cagra.hpp | 18 +- .../neighbors/{dataset.hpp => common.hpp} | 315 +++++++++++++++++- cpp/include/cuvs/neighbors/ivf_flat.h | 2 +- cpp/include/cuvs/neighbors/ivf_flat.hpp | 13 +- cpp/include/cuvs/neighbors/ivf_list.hpp | 123 ------- cpp/include/cuvs/neighbors/ivf_pq.h | 2 +- cpp/include/cuvs/neighbors/ivf_pq.hpp | 11 +- cpp/include/cuvs/neighbors/nn_descent.hpp | 12 +- cpp/include/cuvs/neighbors/sample_filter.hpp | 171 ---------- .../cuvs_internal/neighbors/naive_knn.cuh | 2 +- .../cuvs_internal/neighbors/refine_helper.cuh | 2 +- cpp/src/cluster/detail/connectivities.cuh | 3 +- cpp/src/cluster/detail/kmeans.cuh | 2 +- cpp/src/cluster/detail/kmeans_balanced.cuh | 2 +- cpp/src/cluster/detail/kmeans_common.cuh | 2 +- cpp/src/distance/detail/distance.cuh | 2 +- cpp/src/distance/detail/fused_distance_nn.cuh | 2 +- .../distance/detail/kernels/gram_matrix.cuh | 2 +- .../detail/kernels/kernel_factory.cuh | 2 +- cpp/src/distance/distance-ext.cuh | 12 +- cpp/src/distance/distance-inl.cuh | 2 +- cpp/src/distance/pairwise_distance.cu | 3 +- cpp/src/neighbors/brute_force.cu | 6 +- cpp/src/neighbors/cagra.cuh | 4 +- cpp/src/neighbors/cagra_c.cpp | 4 +- cpp/src/neighbors/detail/ann_utils.cuh | 2 +- .../neighbors/detail/cagra/cagra_build.cuh | 2 +- .../neighbors/detail/cagra/cagra_search.cuh | 4 +- .../detail/cagra/compute_distance.hpp | 2 +- .../detail/cagra/compute_distance_vpq.cuh | 2 +- cpp/src/neighbors/detail/cagra/factory.cuh | 2 +- .../detail/cagra/search_multi_cta.cuh | 2 +- .../detail/cagra/search_multi_cta_inst.cuh | 2 +- .../cagra/search_multi_cta_kernel-ext.cuh | 4 +- .../cagra/search_multi_cta_kernel-inl.cuh | 4 +- .../detail/cagra/search_multi_kernel.cuh | 4 +- .../neighbors/detail/cagra/search_plan.cuh | 4 +- .../detail/cagra/search_single_cta_inst.cuh | 2 +- .../cagra/search_single_cta_kernel-ext.cuh | 2 +- .../cagra/search_single_cta_kernel-inl.cuh | 4 +- .../neighbors/detail/dataset_serialize.hpp | 2 +- cpp/src/neighbors/detail/knn_brute_force.cuh | 2 +- cpp/src/neighbors/detail/refine_common.hpp | 2 +- cpp/src/neighbors/detail/refine_device.cuh | 2 +- cpp/src/neighbors/detail/refine_host-ext.hpp | 8 +- cpp/src/neighbors/detail/vpq_dataset.cuh | 2 +- cpp/src/neighbors/ivf_common.cuh | 2 +- cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh | 4 +- .../ivf_flat/ivf_flat_interleaved_scan.cuh | 4 +- .../neighbors/ivf_flat/ivf_flat_search.cuh | 16 +- .../neighbors/ivf_flat/ivf_flat_serialize.cuh | 2 +- cpp/src/neighbors/ivf_flat_index.cpp | 2 +- cpp/src/neighbors/ivf_list.cuh | 2 +- cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh | 4 +- .../neighbors/ivf_pq/ivf_pq_build_common.cu | 4 +- .../neighbors/ivf_pq/ivf_pq_codepacking.cuh | 2 +- .../ivf_pq/ivf_pq_compute_similarity.cuh | 8 +- .../ivf_pq/ivf_pq_compute_similarity_impl.cuh | 8 +- cpp/src/neighbors/ivf_pq/ivf_pq_list.cuh | 2 +- cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh | 2 +- cpp/src/neighbors/ivf_pq/ivf_pq_serialize.cuh | 2 +- cpp/src/neighbors/ivf_pq_index.cpp | 2 +- cpp/src/neighbors/refine-ext.cuh | 10 +- cpp/src/neighbors/sample_filter.cuh | 2 +- cpp/src/neighbors/vpq_dataset.cuh | 2 +- cpp/test/CMakeLists.txt | 1 + cpp/test/cluster/linkage.cu | 2 +- cpp/test/distance/distance_base.cuh | 4 +- cpp/test/neighbors/ann_cagra.cuh | 2 +- cpp/test/neighbors/ann_ivf_pq.cuh | 2 +- cpp/test/neighbors/ann_utils.cuh | 2 +- cpp/test/neighbors/brute_force.cu | 2 +- cpp/test/neighbors/naive_knn.cuh | 2 +- python/cuvs/cuvs/distance_type.pxd | 2 +- 85 files changed, 519 insertions(+), 620 deletions(-) rename cpp/include/cuvs/distance/{distance_types.h => distance.h} (100%) rename cpp/include/cuvs/distance/{pairwise_distance.hpp => distance.hpp} (81%) delete mode 100644 cpp/include/cuvs/distance/distance_types.hpp delete mode 100644 cpp/include/cuvs/neighbors/ann_types.hpp rename cpp/include/cuvs/neighbors/{dataset.hpp => common.hpp} (53%) delete mode 100644 cpp/include/cuvs/neighbors/ivf_list.hpp delete mode 100644 cpp/include/cuvs/neighbors/sample_filter.hpp diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 576b53907..b1de648b0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -114,7 +114,6 @@ repos: setup[.]cfg$ exclude: | (?x) - cpp/include/cuvs/neighbors/detail/faiss_select/| docs/source/sphinxext/github_link\.py| cpp/cmake/modules/FindAVX\.cmake| diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5715ee8a2..b972fc122 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -447,14 +447,29 @@ target_include_directories( "$" ) +rapids_find_package( + OpenMP REQUIRED + BUILD_EXPORT_SET cuvs-exports + INSTALL_EXPORT_SET cuvs-exports +) + if(NOT BUILD_CPU_ONLY) + + set(CUVS_CUSOLVER_DEPENDENCY CUDA::cusolver${_ctk_static_suffix}) + set(CUVS_CUBLAS_DEPENDENCY CUDA::cublas${_ctk_static_suffix}) + set(CUVS_CURAND_DEPENDENCY CUDA::curand${_ctk_static_suffix}) + set(CUVS_CUSPARSE_DEPENDENCY CUDA::cusparse${_ctk_static_suffix}) + + set(CUVS_CTK_MATH_DEPENDENCIES ${CUVS_CUBLAS_DEPENDENCY} ${CUVS_CUSOLVER_DEPENDENCY} + ${CUVS_CUSPARSE_DEPENDENCY} ${CUVS_CURAND_DEPENDENCY} + ) + # Keep cuVS as lightweight as possible. Only CUDA libs and rmm should be used in global target. target_link_libraries( cuvs - PUBLIC rmm::rmm $<$>:raft::raft> - $<$>:raft::compiled> - PRIVATE $<$:raft::raft> - $<$:raft::compiled_static> nvidia::cutlass::cutlass + PUBLIC rmm::rmm raft::raft + PRIVATE nvidia::cutlass::cutlass ${CUVS_CTK_MATH_DEPENDENCIES} + $ ) endif() @@ -559,7 +574,7 @@ if(BUILD_C_LIBRARY) target_link_libraries( cuvs_c PUBLIC cuvs::cuvs - PRIVATE $<$:raft::raft> + PRIVATE raft::raft ${CUVS_CTK_MATH_DEPENDENCIES} ) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index e3c2de320..44825f91c 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -17,7 +17,7 @@ set(RAFT_FORK "rapidsai") set(RAFT_PINNED_TAG "branch-${RAPIDS_VERSION_MAJOR_MINOR}") function(find_and_configure_raft) - set(oneValueArgs VERSION FORK PINNED_TAG COMPILE_LIBRARY USE_RAFT_STATIC ENABLE_NVTX ENABLE_MNMG_DEPENDENCIES) + set(oneValueArgs VERSION FORK PINNED_TAG USE_RAFT_STATIC ENABLE_NVTX ENABLE_MNMG_DEPENDENCIES) cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) @@ -31,16 +31,6 @@ function(find_and_configure_raft) set(RAFT_COMPONENTS "") - if(PKG_COMPILE_LIBRARY) - if(NOT PKG_USE_RAFT_STATIC) - string(APPEND RAFT_COMPONENTS " compiled") - set(RAFT_COMPILED_LIB raft::compiled PARENT_SCOPE) - else() - string(APPEND RAFT_COMPONENTS " compiled_static") - set(RAFT_COMPILED_LIB raft::compiled_static PARENT_SCOPE) - endif() - endif() - if(PKG_ENABLE_MNMG_DEPENDENCIES) string(APPEND RAFT_COMPONENTS " distributed") endif() @@ -62,7 +52,7 @@ function(find_and_configure_raft) "BUILD_PRIMS_BENCH OFF" "BUILD_ANN_BENCH OFF" "RAFT_NVTX ${PKG_ENABLE_NVTX}" - "RAFT_COMPILE_LIBRARY ${PKG_COMPILE_LIBRARY}" + "RAFT_COMPILE_LIBRARY OFF" ) endfunction() @@ -72,7 +62,6 @@ endfunction() find_and_configure_raft(VERSION ${RAFT_VERSION}.00 FORK ${RAFT_FORK} PINNED_TAG ${RAFT_PINNED_TAG} - COMPILE_LIBRARY ON ENABLE_MNMG_DEPENDENCIES OFF ENABLE_NVTX OFF USE_RAFT_STATIC ${CUVS_USE_RAFT_STATIC} diff --git a/cpp/include/cuvs/cluster/agglomerative.hpp b/cpp/include/cuvs/cluster/agglomerative.hpp index 5da03c4d1..658f5628c 100644 --- a/cpp/include/cuvs/cluster/agglomerative.hpp +++ b/cpp/include/cuvs/cluster/agglomerative.hpp @@ -16,7 +16,7 @@ #pragma once -#include +#include #include #include #include diff --git a/cpp/include/cuvs/cluster/kmeans.hpp b/cpp/include/cuvs/cluster/kmeans.hpp index 471a4d12d..7a372b1bc 100644 --- a/cpp/include/cuvs/cluster/kmeans.hpp +++ b/cpp/include/cuvs/cluster/kmeans.hpp @@ -14,7 +14,7 @@ * limitations under the License. */ #pragma once -#include +#include #include #include #include diff --git a/cpp/include/cuvs/distance/distance_types.h b/cpp/include/cuvs/distance/distance.h similarity index 100% rename from cpp/include/cuvs/distance/distance_types.h rename to cpp/include/cuvs/distance/distance.h diff --git a/cpp/include/cuvs/distance/pairwise_distance.hpp b/cpp/include/cuvs/distance/distance.hpp similarity index 81% rename from cpp/include/cuvs/distance/pairwise_distance.hpp rename to cpp/include/cuvs/distance/distance.hpp index 5f774e3bc..5786b0a32 100644 --- a/cpp/include/cuvs/distance/pairwise_distance.hpp +++ b/cpp/include/cuvs/distance/distance.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,13 +16,54 @@ #pragma once +#include "distance.h" + #include -#include #include #include namespace cuvs::distance { +using DistanceType = cuvsDistanceType; + +/** + * Whether minimal distance corresponds to similar elements (using the given metric). + */ +inline bool is_min_close(DistanceType metric) +{ + bool select_min; + switch (metric) { + case DistanceType::InnerProduct: + // Similarity metrics have the opposite meaning, i.e. nearest neighbors are those with larger + // similarity (See the same logic at cpp/include/raft/sparse/spatial/detail/knn.cuh:362 + // {perform_k_selection}) + select_min = false; + break; + default: select_min = true; + } + return select_min; +} + +namespace kernels { +enum KernelType { LINEAR, POLYNOMIAL, RBF, TANH }; + +/** + * Parameters for kernel matrices. + * The following kernels are implemented: + * - LINEAR \f[ K(x_1,x_2) = , \f] where \f$< , >\f$ is the dot product + * - POLYNOMIAL \f[ K(x_1, x_2) = (\gamma + \mathrm{coef0})^\mathrm{degree} \f] + * - RBF \f[ K(x_1, x_2) = \exp(- \gamma |x_1-x_2|^2) \f] + * - TANH \f[ K(x_1, x_2) = \tanh(\gamma + \mathrm{coef0}) \f] + */ +struct KernelParams { + // Kernel function parameters + KernelType kernel; //!< Type of the kernel function + int degree; //!< Degree of polynomial kernel (ignored by others) + double gamma; //!< multiplier in the + double coef0; //!< additive constant in poly and tanh kernels +}; +} // end namespace kernels + /** * @defgroup pairwise_distance Pairwise Distances API * @{ @@ -37,7 +78,7 @@ namespace cuvs::distance { * @code{.cpp} * #include * #include - * #include + * #include * * raft::resources handle; * int n_samples = 5000; @@ -81,7 +122,7 @@ void pairwise_distance( * @code{.cpp} * #include * #include - * #include + * #include * * raft::resources handle; * int n_samples = 5000; @@ -125,7 +166,7 @@ void pairwise_distance( * @code{.cpp} * #include * #include - * #include + * #include * * raft::resources handle; * int n_samples = 5000; @@ -168,7 +209,7 @@ void pairwise_distance( * @code{.cpp} * #include * #include - * #include + * #include * * raft::resources handle; * int n_samples = 5000; @@ -205,4 +246,4 @@ void pairwise_distance( /** @} */ // end group pairwise_distance_runtime -} // namespace cuvs::distance +}; // namespace cuvs::distance diff --git a/cpp/include/cuvs/distance/distance_types.hpp b/cpp/include/cuvs/distance/distance_types.hpp deleted file mode 100644 index 8d6eb7137..000000000 --- a/cpp/include/cuvs/distance/distance_types.hpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "distance_types.h" - -namespace cuvs::distance { - -using DistanceType = cuvsDistanceType; - -/** - * Whether minimal distance corresponds to similar elements (using the given metric). - */ -inline bool is_min_close(DistanceType metric) -{ - bool select_min; - switch (metric) { - case DistanceType::InnerProduct: - // Similarity metrics have the opposite meaning, i.e. nearest neighbors are those with larger - // similarity (See the same logic at cpp/include/raft/sparse/spatial/detail/knn.cuh:362 - // {perform_k_selection}) - select_min = false; - break; - default: select_min = true; - } - return select_min; -} - -namespace kernels { -enum KernelType { LINEAR, POLYNOMIAL, RBF, TANH }; - -/** - * Parameters for kernel matrices. - * The following kernels are implemented: - * - LINEAR \f[ K(x_1,x_2) = , \f] where \f$< , >\f$ is the dot product - * - POLYNOMIAL \f[ K(x_1, x_2) = (\gamma + \mathrm{coef0})^\mathrm{degree} \f] - * - RBF \f[ K(x_1, x_2) = \exp(- \gamma |x_1-x_2|^2) \f] - * - TANH \f[ K(x_1, x_2) = \tanh(\gamma + \mathrm{coef0}) \f] - */ -struct KernelParams { - // Kernel function parameters - KernelType kernel; //!< Type of the kernel function - int degree; //!< Degree of polynomial kernel (ignored by others) - double gamma; //!< multiplier in the - double coef0; //!< additive constant in poly and tanh kernels -}; -} // end namespace kernels - -}; // namespace cuvs::distance diff --git a/cpp/include/cuvs/neighbors/ann_types.hpp b/cpp/include/cuvs/neighbors/ann_types.hpp deleted file mode 100644 index e9b7bd903..000000000 --- a/cpp/include/cuvs/neighbors/ann_types.hpp +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace cuvs::neighbors::ann { - -/** Parameters for VPQ compression. */ -struct vpq_params { - /** - * The bit length of the vector element after compression by PQ. - * - * Possible values: [4, 5, 6, 7, 8]. - * - * Hint: the smaller the 'pq_bits', the smaller the index size and the better the search - * performance, but the lower the recall. - */ - uint32_t pq_bits = 8; - /** - * The dimensionality of the vector after compression by PQ. - * When zero, an optimal value is selected using a heuristic. - * - * TODO: at the moment `dim` must be a multiple `pq_dim`. - */ - uint32_t pq_dim = 0; - /** - * Vector Quantization (VQ) codebook size - number of "coarse cluster centers". - * When zero, an optimal value is selected using a heuristic. - */ - uint32_t vq_n_centers = 0; - /** The number of iterations searching for kmeans centers (both VQ & PQ phases). */ - uint32_t kmeans_n_iters = 25; - /** - * The fraction of data to use during iterative kmeans building (VQ phase). - * When zero, an optimal value is selected using a heuristic. - */ - double vq_kmeans_trainset_fraction = 0; - /** - * The fraction of data to use during iterative kmeans building (PQ phase). - * When zero, an optimal value is selected using a heuristic. - */ - double pq_kmeans_trainset_fraction = 0; -}; - -/** - * @defgroup ann_types Approximate Nearest Neighbors Types - * @{ - */ - -/** The base for approximate KNN index structures. */ -struct index {}; - -/** The base for KNN index parameters. */ -struct index_params { - /** Distance type. */ - cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded; - /** The argument used by some distance metrics. */ - float metric_arg = 2.0f; - /** - * Whether to add the dataset content to the index, i.e.: - * - * - `true` means the index is filled with the dataset vectors and ready to search after calling - * `build`. - * - `false` means `build` only trains the underlying model (e.g. quantizer or clustering), but - * the index is left empty; you'd need to call `extend` on the index afterwards to populate it. - */ - bool add_data_on_build = true; -}; - -struct search_params {}; - -/** @} */ // end group ann_types - -}; // namespace cuvs::neighbors::ann diff --git a/cpp/include/cuvs/neighbors/brute_force.h b/cpp/include/cuvs/neighbors/brute_force.h index 145bb5555..e285eae37 100644 --- a/cpp/include/cuvs/neighbors/brute_force.h +++ b/cpp/include/cuvs/neighbors/brute_force.h @@ -17,7 +17,7 @@ #pragma once #include -#include +#include #include #include diff --git a/cpp/include/cuvs/neighbors/brute_force.hpp b/cpp/include/cuvs/neighbors/brute_force.hpp index 755a94122..755c8cfdb 100644 --- a/cpp/include/cuvs/neighbors/brute_force.hpp +++ b/cpp/include/cuvs/neighbors/brute_force.hpp @@ -16,8 +16,8 @@ #pragma once -#include "ann_types.hpp" -#include +#include "common.hpp" +#include #include #include #include @@ -37,7 +37,7 @@ namespace cuvs::neighbors::brute_force { * @tparam T data element type */ template -struct index : cuvs::neighbors::ann::index { +struct index : cuvs::neighbors::index { public: index(const index&) = delete; index(index&&) = default; diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index d094e6afa..9d976d28e 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -16,9 +16,9 @@ #pragma once -#include "ann_types.hpp" -#include -#include +#include "common.hpp" +#include +#include #include #include #include @@ -47,7 +47,7 @@ enum class graph_build_algo { NN_DESCENT }; -struct index_params : ann::index_params { +struct index_params : cuvs::neighbors::index_params { /** Degree of input graph for pruning. */ size_t intermediate_graph_degree = 128; /** Degree of output graph. */ @@ -61,7 +61,7 @@ struct index_params : ann::index_params { * * NOTE: this is experimental new API, consider it unsafe. */ - std::optional compression = std::nullopt; + std::optional compression = std::nullopt; }; /** @@ -84,7 +84,7 @@ enum class search_algo { enum class hash_mode { HASH, SMALL, AUTO }; -struct search_params : ann::search_params { +struct search_params : cuvs::neighbors::search_params { /** Maximum number of queries to search at the same time (batch size). Auto select when 0.*/ size_t max_queries = 0; @@ -149,7 +149,7 @@ static_assert(std::is_aggregate_v); * */ template -struct index : ann::index { +struct index : cuvs::neighbors::index { static_assert(!raft::is_narrowing_v, "IdxT must be able to represent all values of uint32_t"); @@ -207,7 +207,7 @@ struct index : ann::index { /** Construct an empty index. */ index(raft::resources const& res, cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded) - : ann::index(), + : cuvs::neighbors::index(), metric_(metric), graph_(raft::make_device_matrix(res, 0, 0)), dataset_(new cuvs::neighbors::empty_dataset(0)) @@ -274,7 +274,7 @@ struct index : ann::index { raft::mdspan, raft::row_major, data_accessor> dataset, raft::mdspan, raft::row_major, graph_accessor> knn_graph) - : ann::index(), + : cuvs::neighbors::index(), metric_(metric), graph_(raft::make_device_matrix(res, 0, 0)), dataset_(make_aligned_dataset(res, dataset, 16)) diff --git a/cpp/include/cuvs/neighbors/dataset.hpp b/cpp/include/cuvs/neighbors/common.hpp similarity index 53% rename from cpp/include/cuvs/neighbors/dataset.hpp rename to cpp/include/cuvs/neighbors/common.hpp index 79c3f644f..45fa1a107 100644 --- a/cpp/include/cuvs/neighbors/dataset.hpp +++ b/cpp/include/cuvs/neighbors/common.hpp @@ -13,9 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #pragma once -#include +#include +#include #include #include #include @@ -23,6 +25,9 @@ #include // get_device_for_address #include // rounding up +#include +#include + #include #include #include @@ -33,6 +38,72 @@ namespace cuvs::neighbors { +/** Parameters for VPQ compression. */ +struct vpq_params { + /** + * The bit length of the vector element after compression by PQ. + * + * Possible values: [4, 5, 6, 7, 8]. + * + * Hint: the smaller the 'pq_bits', the smaller the index size and the better the search + * performance, but the lower the recall. + */ + uint32_t pq_bits = 8; + /** + * The dimensionality of the vector after compression by PQ. + * When zero, an optimal value is selected using a heuristic. + * + * TODO: at the moment `dim` must be a multiple `pq_dim`. + */ + uint32_t pq_dim = 0; + /** + * Vector Quantization (VQ) codebook size - number of "coarse cluster centers". + * When zero, an optimal value is selected using a heuristic. + */ + uint32_t vq_n_centers = 0; + /** The number of iterations searching for kmeans centers (both VQ & PQ phases). */ + uint32_t kmeans_n_iters = 25; + /** + * The fraction of data to use during iterative kmeans building (VQ phase). + * When zero, an optimal value is selected using a heuristic. + */ + double vq_kmeans_trainset_fraction = 0; + /** + * The fraction of data to use during iterative kmeans building (PQ phase). + * When zero, an optimal value is selected using a heuristic. + */ + double pq_kmeans_trainset_fraction = 0; +}; + +/** + * @defgroup neighbors_index Approximate Nearest Neighbors Types + * @{ + */ + +/** The base for approximate KNN index structures. */ +struct index {}; + +/** The base for KNN index parameters. */ +struct index_params { + /** Distance type. */ + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded; + /** The argument used by some distance metrics. */ + float metric_arg = 2.0f; + /** + * Whether to add the dataset content to the index, i.e.: + * + * - `true` means the index is filled with the dataset vectors and ready to search after calling + * `build`. + * - `false` means `build` only trains the underlying model (e.g. quantizer or clustering), but + * the index is left empty; you'd need to call `extend` on the index afterwards to populate it. + */ + bool add_data_on_build = true; +}; + +struct search_params {}; + +/** @} */ // end group neighbors_index + /** Two-dimensional dataset; maybe owning, maybe compressed, maybe strided. */ template struct dataset { @@ -207,10 +278,6 @@ auto make_aligned_dataset(const raft::resources& res, const SrcT& src, uint32_t raft::round_up_safe(src.extent(1) * kSize, std::lcm(align_bytes, kSize)) / kSize; return make_strided_dataset(res, src, required_stride); } - -/** Parameters for VPQ compression. */ -using vpq_params = cuvs::neighbors::ann::vpq_params; - /** * @brief VPQ compressed dataset. * @@ -295,4 +362,240 @@ struct vpq_dataset : public dataset { } }; -} // namespace cuvs::neighbors +namespace filtering { + +/* A filter that filters nothing. This is the default behavior. */ +struct none_ivf_sample_filter { + inline _RAFT_HOST_DEVICE bool operator()( + // query index + const uint32_t query_ix, + // the current inverted list index + const uint32_t cluster_ix, + // the index of the current sample inside the current inverted list + const uint32_t sample_ix) const; +}; + +/* A filter that filters nothing. This is the default behavior. */ +struct none_cagra_sample_filter { + inline _RAFT_HOST_DEVICE bool operator()( + // query index + const uint32_t query_ix, + // the index of the current sample + const uint32_t sample_ix) const; +}; + +/** + * @brief Filter used to convert the cluster index and sample index + * of an IVF search into a sample index. This can be used as an + * intermediate filter. + * + * @tparam index_t Indexing type + * @tparam filter_t + */ +template +struct ivf_to_sample_filter { + const index_t* const* inds_ptrs_; + const filter_t next_filter_; + + ivf_to_sample_filter(const index_t* const* inds_ptrs, const filter_t next_filter); + + /** If the original filter takes three arguments, then don't modify the arguments. + * If the original filter takes two arguments, then we are using `inds_ptr_` to obtain the sample + * index. + */ + inline _RAFT_HOST_DEVICE bool operator()( + // query index + const uint32_t query_ix, + // the current inverted list index + const uint32_t cluster_ix, + // the index of the current sample inside the current inverted list + const uint32_t sample_ix) const; +}; + +/** + * @brief Filter an index with a bitset + * + * @tparam index_t Indexing type + */ +template +struct bitset_filter { + // View of the bitset to use as a filter + const cuvs::core::bitset_view bitset_view_; + + bitset_filter(const cuvs::core::bitset_view bitset_for_filtering); + inline _RAFT_HOST_DEVICE bool operator()( + // query index + const uint32_t query_ix, + // the index of the current sample + const uint32_t sample_ix) const; +}; + +/** + * If the filtering depends on the index of a sample, then the following + * filter template can be used: + * + * template + * struct index_ivf_sample_filter { + * using index_type = IdxT; + * + * const index_type* const* inds_ptr = nullptr; + * + * index_ivf_sample_filter() {} + * index_ivf_sample_filter(const index_type* const* _inds_ptr) + * : inds_ptr{_inds_ptr} {} + * index_ivf_sample_filter(const index_ivf_sample_filter&) = default; + * index_ivf_sample_filter(index_ivf_sample_filter&&) = default; + * index_ivf_sample_filter& operator=(const index_ivf_sample_filter&) = default; + * index_ivf_sample_filter& operator=(index_ivf_sample_filter&&) = default; + * + * inline _RAFT_HOST_DEVICE bool operator()( + * const uint32_t query_ix, + * const uint32_t cluster_ix, + * const uint32_t sample_ix) const { + * index_type database_idx = inds_ptr[cluster_ix][sample_ix]; + * + * // return true or false, depending on the database_idx + * return true; + * } + * }; + * + * Initialize it as: + * using filter_type = index_ivf_sample_filter; + * filter_type filter(cuvs_ivfpq_index.inds_ptrs().data_handle()); + * + * Use it as: + * cuvs::neighbors::ivf_pq::search_with_filtering( + * ...regular parameters here..., + * filter + * ); + * + * Another example would be the following filter that greenlights samples according + * to a contiguous bit mask vector. + * + * template + * struct bitmask_ivf_sample_filter { + * using index_type = IdxT; + * + * const index_type* const* inds_ptr = nullptr; + * const uint64_t* const bit_mask_ptr = nullptr; + * const int64_t bit_mask_stride_64 = 0; + * + * bitmask_ivf_sample_filter() {} + * bitmask_ivf_sample_filter( + * const index_type* const* _inds_ptr, + * const uint64_t* const _bit_mask_ptr, + * const int64_t _bit_mask_stride_64) + * : inds_ptr{_inds_ptr}, + * bit_mask_ptr{_bit_mask_ptr}, + * bit_mask_stride_64{_bit_mask_stride_64} {} + * bitmask_ivf_sample_filter(const bitmask_ivf_sample_filter&) = default; + * bitmask_ivf_sample_filter(bitmask_ivf_sample_filter&&) = default; + * bitmask_ivf_sample_filter& operator=(const bitmask_ivf_sample_filter&) = default; + * bitmask_ivf_sample_filter& operator=(bitmask_ivf_sample_filter&&) = default; + * + * inline _RAFT_HOST_DEVICE bool operator()( + * const uint32_t query_ix, + * const uint32_t cluster_ix, + * const uint32_t sample_ix) const { + * const index_type database_idx = inds_ptr[cluster_ix][sample_ix]; + * const uint64_t bit_mask_element = + * bit_mask_ptr[query_ix * bit_mask_stride_64 + database_idx / 64]; + * const uint64_t masked_bool = + * bit_mask_element & (1ULL << (uint64_t)(database_idx % 64)); + * const bool is_bit_set = (masked_bool != 0); + * + * return is_bit_set; + * } + * }; + */ +} // namespace filtering + +namespace ivf { + +/** + * Default value filled in the `indices` array. + * One may encounter it trying to access a record within a list that is outside of the + * `size` bound or whenever the list is allocated but not filled-in yet. + */ +template +constexpr static IdxT kInvalidRecord = + (std::is_signed_v ? IdxT{0} : std::numeric_limits::max()) - 1; + +/** The data for a single IVF list. */ +template