diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8ede967..229eb5b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -545,8 +545,9 @@ target_link_options(cuvs PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") # * cuvs_c ------------------------------------------------------------------------------- if(BUILD_C_LIBRARY) add_library( - cuvs_c SHARED src/core/c_api.cpp src/neighbors/brute_force_c.cpp src/neighbors/ivf_flat_c.cpp - src/neighbors/ivf_pq_c.cpp src/neighbors/cagra_c.cpp + cuvs_c SHARED + src/core/c_api.cpp src/neighbors/brute_force_c.cpp src/neighbors/ivf_flat_c.cpp + src/neighbors/ivf_pq_c.cpp src/neighbors/cagra_c.cpp src/distance/pairwise_distance_c.cpp ) add_library(cuvs::c_api ALIAS cuvs_c) diff --git a/cpp/include/cuvs/distance/pairwise_distance.h b/cpp/include/cuvs/distance/pairwise_distance.h new file mode 100644 index 0000000..a9e49b3 --- /dev/null +++ b/cpp/include/cuvs/distance/pairwise_distance.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Compute pairwise distances for two matrices + * + * + * Usage example: + * @code{.c} + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // Assume a populated `DLManagedTensor` type here + * DLManagedTensor x; + * DLManagedTensor y; + * DLManagedTensor dist; + * + * cuvsPairwiseDistance(res, &x, &y, &dist, L2SqrtUnexpanded, 2.0); + * @endcode + * + * @param[in] res cuvs resources object for managing expensive resources + * @param[in] x first set of points (size n*k) + * @param[in] y second set of points (size m*k) + * @param[out] dist output distance matrix (size n*m) + * @param[in] metric distance to evaluate + * @param[in] metric_arg metric argument (used for Minkowski distance) + */ +cuvsError_t cuvsPairwiseDistance(cuvsResources_t res, + DLManagedTensor* x, + DLManagedTensor* y, + DLManagedTensor* dist, + cuvsDistanceType metric, + float metric_arg); +#ifdef __cplusplus +} +#endif diff --git a/cpp/include/cuvs/neighbors/cagra.h b/cpp/include/cuvs/neighbors/cagra.h index 3a1a8c9..727c39c 100644 --- a/cpp/include/cuvs/neighbors/cagra.h +++ b/cpp/include/cuvs/neighbors/cagra.h @@ -356,7 +356,8 @@ cuvsError_t cuvsCagraBuild(cuvsResources_t res, * cuvsError_t params_create_status = cuvsCagraSearchParamsCreate(¶ms); * * // Search the `index` built using `cuvsCagraBuild` - * cuvsError_t search_status = cuvsCagraSearch(res, params, index, queries, neighbors, distances); + * cuvsError_t search_status = cuvsCagraSearch(res, params, index, &queries, &neighbors, + * &distances); * * // de-allocate `params` and `res` * cuvsError_t params_destroy_status = cuvsCagraSearchParamsDestroy(params); diff --git a/cpp/src/distance/pairwise_distance_c.cpp b/cpp/src/distance/pairwise_distance_c.cpp new file mode 100644 index 0000000..d457198 --- /dev/null +++ b/cpp/src/distance/pairwise_distance_c.cpp @@ -0,0 +1,81 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +namespace { + +template +void _pairwise_distance(cuvsResources_t res, + DLManagedTensor* x_tensor, + DLManagedTensor* y_tensor, + DLManagedTensor* distances_tensor, + cuvsDistanceType metric, + float metric_arg) +{ + auto res_ptr = reinterpret_cast(res); + + using mdspan_type = raft::device_matrix_view; + using distances_mdspan_type = raft::device_matrix_view; + + auto x_mds = cuvs::core::from_dlpack(x_tensor); + auto y_mds = cuvs::core::from_dlpack(y_tensor); + auto distances_mds = cuvs::core::from_dlpack(distances_tensor); + + cuvs::distance::pairwise_distance(*res_ptr, x_mds, y_mds, distances_mds, metric, metric_arg); +} +} // namespace + +extern "C" cuvsError_t cuvsPairwiseDistance(cuvsResources_t res, + DLManagedTensor* x_tensor, + DLManagedTensor* y_tensor, + DLManagedTensor* distances_tensor, + cuvsDistanceType metric, + float metric_arg) +{ + return cuvs::core::translate_exceptions([=] { + auto x_dt = x_tensor->dl_tensor.dtype; + auto y_dt = x_tensor->dl_tensor.dtype; + auto dist_dt = x_tensor->dl_tensor.dtype; + + if ((x_dt.code != kDLFloat) || (y_dt.code != kDLFloat) || (dist_dt.code != kDLFloat)) { + RAFT_FAIL("Inputs to cuvsPairwiseDistance must all be floating point tensors"); + } + + if ((x_dt.bits != y_dt.bits) || (x_dt.bits != dist_dt.bits)) { + RAFT_FAIL("Inputs to cuvsPairwiseDistance must all have the same dtype"); + } + + if (x_dt.bits == 32) { + _pairwise_distance(res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else if (x_dt.bits == 64) { + _pairwise_distance(res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else { + RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits); + } + }); +} diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index fac4c62..8d53233 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -178,6 +178,10 @@ endif() if(BUILD_C_TESTS) ConfigureTest(NAME INTEROP_TEST PATH test/core/interop.cu C_LIB) + ConfigureTest( + NAME DISTANCE_C_TEST PATH test/distance/run_pairwise_distance_c.c + test/distance/pairwise_distance_c.cu C_LIB + ) ConfigureTest( NAME BRUTEFORCE_C_TEST PATH test/neighbors/run_brute_force_c.c test/neighbors/brute_force_c.cu diff --git a/cpp/test/distance/pairwise_distance_c.cu b/cpp/test/distance/pairwise_distance_c.cu new file mode 100644 index 0000000..c79dca4 --- /dev/null +++ b/cpp/test/distance/pairwise_distance_c.cu @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include + +extern "C" void run_pairwise_distance(int64_t n_rows, + int64_t n_queries, + int64_t n_dim, + float* index_data, + float* query_data, + float* distances_data, + cuvsDistanceType metric); + +template +void generate_random_data(T* devPtr, size_t size) +{ + raft::handle_t handle; + raft::random::RngState r(1234ULL); + raft::random::uniform(handle, r, devPtr, size, T(0.1), T(2.0)); +}; + +TEST(PairwiseDistanceC, Distance) +{ + int64_t n_rows = 8096; + int64_t n_queries = 128; + int64_t n_dim = 32; + + cuvsDistanceType metric = L2Expanded; + + float *index_data, *query_data, *distances_data; + cudaMalloc(&index_data, sizeof(float) * n_rows * n_dim); + cudaMalloc(&query_data, sizeof(float) * n_queries * n_dim); + cudaMalloc(&distances_data, sizeof(float) * n_queries * n_rows); + + generate_random_data(index_data, n_rows * n_dim); + generate_random_data(query_data, n_queries * n_dim); + + run_pairwise_distance(n_rows, n_queries, n_dim, index_data, query_data, distances_data, metric); + + // delete device memory + cudaFree(index_data); + cudaFree(query_data); + cudaFree(distances_data); +} diff --git a/cpp/test/distance/run_pairwise_distance_c.c b/cpp/test/distance/run_pairwise_distance_c.c new file mode 100644 index 0000000..bbcf0d7 --- /dev/null +++ b/cpp/test/distance/run_pairwise_distance_c.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +void run_pairwise_distance(int64_t n_rows, + int64_t n_queries, + int64_t n_dim, + float* index_data, + float* query_data, + float* distances_data, + int64_t* neighbors_data, + cuvsDistanceType metric) +{ + // create cuvsResources_t + cuvsResources_t res; + cuvsResourcesCreate(&res); + + // create dataset DLTensor + DLManagedTensor dataset_tensor; + dataset_tensor.dl_tensor.data = index_data; + dataset_tensor.dl_tensor.device.device_type = kDLCUDA; + dataset_tensor.dl_tensor.ndim = 2; + dataset_tensor.dl_tensor.dtype.code = kDLFloat; + dataset_tensor.dl_tensor.dtype.bits = 32; + dataset_tensor.dl_tensor.dtype.lanes = 1; + int64_t dataset_shape[2] = {n_rows, n_dim}; + dataset_tensor.dl_tensor.shape = dataset_shape; + dataset_tensor.dl_tensor.strides = NULL; + + // create queries DLTensor + DLManagedTensor queries_tensor; + queries_tensor.dl_tensor.data = (void*)query_data; + queries_tensor.dl_tensor.device.device_type = kDLCUDA; + queries_tensor.dl_tensor.ndim = 2; + queries_tensor.dl_tensor.dtype.code = kDLFloat; + queries_tensor.dl_tensor.dtype.bits = 32; + queries_tensor.dl_tensor.dtype.lanes = 1; + int64_t queries_shape[2] = {n_queries, n_dim}; + queries_tensor.dl_tensor.shape = queries_shape; + queries_tensor.dl_tensor.strides = NULL; + + // create distances DLTensor + DLManagedTensor distances_tensor; + distances_tensor.dl_tensor.data = (void*)distances_data; + distances_tensor.dl_tensor.device.device_type = kDLCUDA; + distances_tensor.dl_tensor.ndim = 2; + distances_tensor.dl_tensor.dtype.code = kDLFloat; + distances_tensor.dl_tensor.dtype.bits = 32; + distances_tensor.dl_tensor.dtype.lanes = 1; + int64_t distances_shape[2] = {n_rows, n_queries}; + distances_tensor.dl_tensor.shape = distances_shape; + distances_tensor.dl_tensor.strides = NULL; + + // run pairwise distances + cuvsPairwiseDistance(res, &dataset_tensor, &queries_tensor, &distances_tensor, metric, 2.0); + + cuvsResourcesDestroy(res); +} diff --git a/docs/source/cpp_api/distance.rst b/docs/source/cpp_api/distance.rst index c1b8c61..2e83da7 100644 --- a/docs/source/cpp_api/distance.rst +++ b/docs/source/cpp_api/distance.rst @@ -11,7 +11,7 @@ distances have been highly optimized and support a wide assortment of different Distance Types -------------- -``#include `` +``#include `` namespace *cuvs::distance* @@ -22,11 +22,11 @@ namespace *cuvs::distance* Pairwise Distances ------------------ -``include `` +``include `` namespace *cuvs::distance* .. doxygengroup:: pairwise_distance :project: cuvs :members: - :content-only: \ No newline at end of file + :content-only: diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt index db6f256..d48cc74 100644 --- a/python/cuvs/CMakeLists.txt +++ b/python/cuvs/CMakeLists.txt @@ -81,6 +81,7 @@ endif() rapids_cython_init() add_subdirectory(cuvs/common) +add_subdirectory(cuvs/distance) add_subdirectory(cuvs/neighbors) if(DEFINED cython_lib_dir) diff --git a/python/cuvs/cuvs/distance/CMakeLists.txt b/python/cuvs/cuvs/distance/CMakeLists.txt new file mode 100644 index 0000000..363778a --- /dev/null +++ b/python/cuvs/cuvs/distance/CMakeLists.txt @@ -0,0 +1,24 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Set the list of Cython files to build +set(cython_sources distance.pyx) +set(linked_libraries cuvs::cuvs cuvs::c_api) + +# Build all of the Cython targets +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX distance_ +) diff --git a/python/cuvs/cuvs/distance/__init__.pxd b/python/cuvs/cuvs/distance/__init__.pxd new file mode 100644 index 0000000..e69de29 diff --git a/python/cuvs/cuvs/distance/__init__.py b/python/cuvs/cuvs/distance/__init__.py new file mode 100644 index 0000000..5c985e7 --- /dev/null +++ b/python/cuvs/cuvs/distance/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .distance import DISTANCE_TYPES, pairwise_distance + +__all__ = ["DISTANCE_TYPES", "pairwise_distance"] diff --git a/python/cuvs/cuvs/distance/distance.pxd b/python/cuvs/cuvs/distance/distance.pxd new file mode 100644 index 0000000..e40d2c4 --- /dev/null +++ b/python/cuvs/cuvs/distance/distance.pxd @@ -0,0 +1,29 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: language_level=3 + +from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t +from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor +from cuvs.distance_type cimport cuvsDistanceType + + +cdef extern from "cuvs/distance/pairwise_distance.h" nogil: + cuvsError_t cuvsPairwiseDistance(cuvsResources_t res, + DLManagedTensor* x, + DLManagedTensor* y, + DLManagedTensor* distances, + cuvsDistanceType metric, + float metric_arg) except + diff --git a/python/cuvs/cuvs/distance/distance.pyx b/python/cuvs/cuvs/distance/distance.pyx new file mode 100644 index 0000000..eb34366 --- /dev/null +++ b/python/cuvs/cuvs/distance/distance.pyx @@ -0,0 +1,139 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: language_level=3 + +import numpy as np + +from cuvs.common.exceptions import check_cuvs +from cuvs.common.resources import auto_sync_resources + +from cuvs.common cimport cydlpack + +from pylibraft.common import auto_convert_output, device_ndarray +from pylibraft.common.cai_wrapper import wrap_array + +DISTANCE_TYPES = { + "l2": cuvsDistanceType.L2SqrtExpanded, + "sqeuclidean": cuvsDistanceType.L2Expanded, + "euclidean": cuvsDistanceType.L2SqrtExpanded, + "l1": cuvsDistanceType.L1, + "cityblock": cuvsDistanceType.L1, + "inner_product": cuvsDistanceType.InnerProduct, + "chebyshev": cuvsDistanceType.Linf, + "canberra": cuvsDistanceType.Canberra, + "cosine": cuvsDistanceType.CosineExpanded, + "lp": cuvsDistanceType.LpUnexpanded, + "correlation": cuvsDistanceType.CorrelationExpanded, + "jaccard": cuvsDistanceType.JaccardExpanded, + "hellinger": cuvsDistanceType.HellingerExpanded, + "braycurtis": cuvsDistanceType.BrayCurtis, + "jensenshannon": cuvsDistanceType.JensenShannon, + "hamming": cuvsDistanceType.HammingUnexpanded, + "kl_divergence": cuvsDistanceType.KLDivergence, + "minkowski": cuvsDistanceType.LpUnexpanded, + "russellrao": cuvsDistanceType.RusselRaoExpanded, + "dice": cuvsDistanceType.DiceExpanded, +} + +SUPPORTED_DISTANCES = ["euclidean", "l1", "cityblock", "l2", "inner_product", + "chebyshev", "minkowski", "canberra", "kl_divergence", + "correlation", "russellrao", "hellinger", "lp", + "hamming", "jensenshannon", "cosine", "sqeuclidean"] + + +@auto_sync_resources +@auto_convert_output +def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0, + resources=None): + """ + Compute pairwise distances between X and Y + + Valid values for metric: + ["euclidean", "l2", "l1", "cityblock", "inner_product", + "chebyshev", "canberra", "lp", "hellinger", "jensenshannon", + "kl_divergence", "russellrao", "minkowski", "correlation", + "cosine"] + + Parameters + ---------- + + X : CUDA array interface compliant matrix shape (m, k) + Y : CUDA array interface compliant matrix shape (n, k) + out : Optional writable CUDA array interface matrix shape (m, n) + metric : string denoting the metric type (default="euclidean") + metric_arg : metric parameter (currently used only for "minkowski") + {resources_docstring} + + Examples + -------- + + >>> import cupy as cp + >>> from cuvs.distance import pairwise_distance + >>> n_samples = 5000 + >>> n_features = 50 + >>> in1 = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> in2 = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> output = pairwise_distance(in1, in2, metric="euclidean") + """ + + cdef cuvsResources_t res = resources.get_c_obj() + + x_cai = wrap_array(X) + y_cai = wrap_array(Y) + + m = x_cai.shape[0] + n = y_cai.shape[0] + + if out is None: + out = device_ndarray.empty((m, n), dtype=y_cai.dtype) + out_cai = wrap_array(out) + + x_k = x_cai.shape[1] + y_k = y_cai.shape[1] + + if x_k != y_k: + raise ValueError("Inputs must have same number of columns. " + "a=%s, b=%s" % (x_k, y_k)) + + if metric not in SUPPORTED_DISTANCES: + raise ValueError("metric %s is not supported" % metric) + + cdef cuvsDistanceType distance_type = DISTANCE_TYPES[metric] + + x_dt = x_cai.dtype + y_dt = y_cai.dtype + d_dt = out_cai.dtype + + if x_dt != y_dt or x_dt != d_dt: + raise ValueError("Inputs must have the same dtypes") + + cdef cydlpack.DLManagedTensor* x_dlpack = \ + cydlpack.dlpack_c(x_cai) + cdef cydlpack.DLManagedTensor* y_dlpack = \ + cydlpack.dlpack_c(y_cai) + cdef cydlpack.DLManagedTensor* out_dlpack = \ + cydlpack.dlpack_c(out_cai) + + check_cuvs(cuvsPairwiseDistance(res, + x_dlpack, + y_dlpack, + out_dlpack, + distance_type, + metric_arg)) + + return out diff --git a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx index 226ee23..490f1d3 100644 --- a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx +++ b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx @@ -31,9 +31,10 @@ from cuvs.distance_type cimport cuvsDistanceType from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.distance.pairwise_distance import DISTANCE_TYPES from pylibraft.neighbors.common import _check_input_array +from cuvs.distance import DISTANCE_TYPES + from cuvs.common.c_api cimport cuvsResources_t from cuvs.common.exceptions import check_cuvs @@ -194,7 +195,6 @@ def search(Index index, _check_input_array(distances_cai, [np.dtype('float32')], exp_rows=n_queries, exp_cols=k) - cdef cuvsError_t search_status cdef cydlpack.DLManagedTensor* queries_dlpack = \ cydlpack.dlpack_c(queries_cai) cdef cydlpack.DLManagedTensor* neighbors_dlpack = \ diff --git a/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx b/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx index 870e941..daa7230 100644 --- a/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx +++ b/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx @@ -30,9 +30,10 @@ from cuvs.distance_type cimport cuvsDistanceType from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.distance.pairwise_distance import DISTANCE_TYPES from pylibraft.neighbors.common import _check_input_array +from cuvs.distance import DISTANCE_TYPES + from libc.stdint cimport ( int8_t, int64_t, diff --git a/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx b/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx index a1ca676..81baed1 100644 --- a/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx +++ b/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx @@ -30,9 +30,10 @@ from cuvs.distance_type cimport cuvsDistanceType from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.distance.pairwise_distance import DISTANCE_TYPES from pylibraft.neighbors.common import _check_input_array +from cuvs.distance import DISTANCE_TYPES + from libc.stdint cimport ( int8_t, int64_t, diff --git a/python/cuvs/cuvs/test/test_distance.py b/python/cuvs/cuvs/test/test_distance.py new file mode 100644 index 0000000..681217f --- /dev/null +++ b/python/cuvs/cuvs/test/test_distance.py @@ -0,0 +1,79 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from pylibraft.common import device_ndarray +from scipy.spatial.distance import cdist + +from cuvs.distance import pairwise_distance + + +@pytest.mark.parametrize("n_rows", [50, 100]) +@pytest.mark.parametrize("n_cols", [10, 50]) +@pytest.mark.parametrize( + "metric", + [ + "euclidean", + "cityblock", + "chebyshev", + "canberra", + "correlation", + "hamming", + "jensenshannon", + "russellrao", + "cosine", + "sqeuclidean", + "inner_product", + ], +) +@pytest.mark.parametrize("inplace", [True, False]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_distance(n_rows, n_cols, inplace, metric, dtype): + input1 = np.random.random_sample((n_rows, n_cols)) + input1 = np.asarray(input1).astype(dtype) + + # RussellRao expects boolean arrays + if metric == "russellrao": + input1[input1 < 0.5] = 0 + input1[input1 >= 0.5] = 1 + + # JensenShannon expects probability arrays + elif metric == "jensenshannon": + norm = np.sum(input1, axis=1) + input1 = (input1.T / norm).T + + output = np.zeros((n_rows, n_rows), dtype=dtype) + + if metric == "inner_product": + expected = np.matmul(input1, input1.T) + else: + expected = cdist(input1, input1, metric) + + input1_device = device_ndarray(input1) + output_device = device_ndarray(output) if inplace else None + + ret_output = pairwise_distance( + input1_device, + input1_device, + output_device, + metric, + ) + + output_device = ret_output if not inplace else output_device + + actual = output_device.copy_to_host() + + assert np.allclose(expected, actual, atol=1e-3, rtol=1e-3) diff --git a/python/cuvs/cuvs/test/test_doctests.py b/python/cuvs/cuvs/test/test_doctests.py index 68b2861..64b0c5d 100644 --- a/python/cuvs/cuvs/test/test_doctests.py +++ b/python/cuvs/cuvs/test/test_doctests.py @@ -20,6 +20,7 @@ import pytest +import cuvs.distance import cuvs.neighbors # Code adapted from https://github.com/rapidsai/cudf/blob/branch-23.02/python/cudf/cudf/tests/test_doctests.py # noqa @@ -93,6 +94,7 @@ def _find_doctests_in_obj(obj, finder=None, criteria=None): DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.neighbors.brute_force)) DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.neighbors.ivf_flat)) DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.common)) +DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.distance)) @pytest.mark.parametrize( diff --git a/rust/cuvs-sys/cuvs_c_wrapper.h b/rust/cuvs-sys/cuvs_c_wrapper.h index c6ab02d..0c66928 100644 --- a/rust/cuvs-sys/cuvs_c_wrapper.h +++ b/rust/cuvs-sys/cuvs_c_wrapper.h @@ -17,6 +17,7 @@ // wrapper file containing all the C-API's we should automatically be creating rust // bindings for #include +#include #include #include #include diff --git a/rust/cuvs/src/distance/mod.rs b/rust/cuvs/src/distance/mod.rs new file mode 100644 index 0000000..0377f3e --- /dev/null +++ b/rust/cuvs/src/distance/mod.rs @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +use std::io::{stderr, Write}; + +use crate::distance_type::DistanceType; +use crate::dlpack::ManagedTensor; +use crate::error::{check_cuvs, Result}; +use crate::resources::Resources; + +/// Compute pairwise distances between X and Y +/// +/// # Arguments +/// +/// * `res` - Resources to use +/// * `x` - A matrix in device memory - shape (m, k) +/// * `y` - A matrix in device memory - shape (n, k) +/// * `distances` - A matrix in device memory that receives the output distances - shape (m, n) +/// * `metric` - DistanceType to use for building the index +/// * `metric_arg` - Optional value of `p` for Minkowski distances +pub fn pairwise_distance( + res: &Resources, + x: &ManagedTensor, + y: &ManagedTensor, + distances: &ManagedTensor, + metric: DistanceType, + metric_arg: Option, +) -> Result<()> { + unsafe { + check_cuvs(ffi::cuvsPairwiseDistance( + res.0, + x.as_ptr(), + y.as_ptr(), + distances.as_ptr(), + metric, + metric_arg.unwrap_or(2.0), + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use ndarray::s; + use ndarray_rand::rand_distr::Uniform; + use ndarray_rand::RandomExt; + + #[test] + fn test_pairwise_distance() { + let res = Resources::new().unwrap(); + + // Create a new random dataset to index + let n_datapoints = 256; + let n_features = 16; + let dataset = + ndarray::Array::::random((n_datapoints, n_features), Uniform::new(0., 1.0)); + let dataset_device = ManagedTensor::from(&dataset).to_device(&res).unwrap(); + + let mut distances_host = ndarray::Array::::zeros((n_datapoints, n_datapoints)); + let distances = ManagedTensor::from(&distances_host) + .to_device(&res) + .unwrap(); + + pairwise_distance(&res, &dataset_device, &dataset_device, &distances, DistanceType::L2Expanded, + None).unwrap(); + + // Copy back to host memory + distances.to_host(&res, &mut distances_host).unwrap(); + + // Self distance should be 0 + assert_eq!(distances_host[[0, 0]], 0.0); + } +} diff --git a/rust/cuvs/src/lib.rs b/rust/cuvs/src/lib.rs index 6ed450c..71a387b 100644 --- a/rust/cuvs/src/lib.rs +++ b/rust/cuvs/src/lib.rs @@ -20,6 +20,7 @@ //! approximate nearest neighbors search on the GPU. pub mod brute_force; pub mod cagra; +pub mod distance; pub mod distance_type; pub mod ivf_flat; mod dlpack;