From a9a4d0a57966db6866f2ac9a34477834d11426ec Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 21 May 2024 16:45:48 -0700 Subject: [PATCH 1/6] Add pairwise_distance api's for C, Python and Rust --- cpp/CMakeLists.txt | 5 +- cpp/include/cuvs/distance/distance.h | 37 +++++ cpp/include/cuvs/neighbors/cagra.h | 3 +- cpp/src/distance/pairwise_distance_c.cpp | 72 +++++++++ cpp/test/CMakeLists.txt | 4 + cpp/test/distance/pairwise_distance_c.cu | 64 ++++++++ cpp/test/distance/run_pairwise_distance_c.c | 72 +++++++++ docs/source/cpp_api/distance.rst | 6 +- python/cuvs/CMakeLists.txt | 1 + python/cuvs/cuvs/distance/CMakeLists.txt | 24 +++ python/cuvs/cuvs/distance/__init__.pxd | 0 python/cuvs/cuvs/distance/__init__.py | 19 +++ python/cuvs/cuvs/distance/distance.pxd | 52 +++++++ python/cuvs/cuvs/distance/distance.pyx | 139 ++++++++++++++++++ .../neighbors/brute_force/brute_force.pyx | 4 +- .../cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx | 3 +- python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx | 3 +- python/cuvs/cuvs/test/test_distance.py | 79 ++++++++++ python/cuvs/cuvs/test/test_doctests.py | 2 + rust/cuvs-sys/cuvs_c_wrapper.h | 1 + rust/cuvs/src/distance/mod.rs | 87 +++++++++++ rust/cuvs/src/lib.rs | 1 + 22 files changed, 668 insertions(+), 10 deletions(-) create mode 100644 cpp/src/distance/pairwise_distance_c.cpp create mode 100644 cpp/test/distance/pairwise_distance_c.cu create mode 100644 cpp/test/distance/run_pairwise_distance_c.c create mode 100644 python/cuvs/cuvs/distance/CMakeLists.txt create mode 100644 python/cuvs/cuvs/distance/__init__.pxd create mode 100644 python/cuvs/cuvs/distance/__init__.py create mode 100644 python/cuvs/cuvs/distance/distance.pxd create mode 100644 python/cuvs/cuvs/distance/distance.pyx create mode 100644 python/cuvs/cuvs/test/test_distance.py create mode 100644 rust/cuvs/src/distance/mod.rs diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b972fc122..f35362b65 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -546,8 +546,9 @@ target_link_options(cuvs PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") # * cuvs_c ------------------------------------------------------------------------------- if(BUILD_C_LIBRARY) add_library( - cuvs_c SHARED src/core/c_api.cpp src/neighbors/brute_force_c.cpp src/neighbors/ivf_flat_c.cpp - src/neighbors/ivf_pq_c.cpp src/neighbors/cagra_c.cpp + cuvs_c SHARED + src/core/c_api.cpp src/neighbors/brute_force_c.cpp src/neighbors/ivf_flat_c.cpp + src/neighbors/ivf_pq_c.cpp src/neighbors/cagra_c.cpp src/distance/pairwise_distance_c.cpp ) add_library(cuvs::c_api ALIAS cuvs_c) diff --git a/cpp/include/cuvs/distance/distance.h b/cpp/include/cuvs/distance/distance.h index 550221e8e..577f13ce8 100644 --- a/cpp/include/cuvs/distance/distance.h +++ b/cpp/include/cuvs/distance/distance.h @@ -14,6 +14,9 @@ * limitations under the License. */ #pragma once +#include + +#include #ifdef __cplusplus extern "C" { @@ -66,6 +69,40 @@ typedef enum { Precomputed = 100 } cuvsDistanceType; +/** + * @brief Compute pairwise distances for two matrices + * + * + * Usage example: + * @code{.c} + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // Assume a populated `DLManagedTensor` type here + * DLManagedTensor x; + * DLManagedTensor y; + * DLManagedTensor dist; + * + * cuvsPairwiseDistance(handle, &x, &y, &dist, L2SqrtUnexpanded, 2.0); + * @endcode + * + * @param[in] handle raft handle for managing expensive resources + * @param[in] x first set of points (size n*k) + * @param[in] y second set of points (size m*k) + * @param[out] dist output distance matrix (size n*m) + * @param[in] metric distance to evaluate + * @param[in] metric_arg metric argument (used for Minkowski distance) + */ +cuvsError_t cuvsPairwiseDistance(cuvsResources_t res, + DLManagedTensor* x, + DLManagedTensor* y, + DLManagedTensor* distances, + cuvsDistanceType metric, + float metric_arg); #ifdef __cplusplus } #endif diff --git a/cpp/include/cuvs/neighbors/cagra.h b/cpp/include/cuvs/neighbors/cagra.h index 3a1a8c9fe..727c39c6e 100644 --- a/cpp/include/cuvs/neighbors/cagra.h +++ b/cpp/include/cuvs/neighbors/cagra.h @@ -356,7 +356,8 @@ cuvsError_t cuvsCagraBuild(cuvsResources_t res, * cuvsError_t params_create_status = cuvsCagraSearchParamsCreate(¶ms); * * // Search the `index` built using `cuvsCagraBuild` - * cuvsError_t search_status = cuvsCagraSearch(res, params, index, queries, neighbors, distances); + * cuvsError_t search_status = cuvsCagraSearch(res, params, index, &queries, &neighbors, + * &distances); * * // de-allocate `params` and `res` * cuvsError_t params_destroy_status = cuvsCagraSearchParamsDestroy(params); diff --git a/cpp/src/distance/pairwise_distance_c.cpp b/cpp/src/distance/pairwise_distance_c.cpp new file mode 100644 index 000000000..ffa5924b8 --- /dev/null +++ b/cpp/src/distance/pairwise_distance_c.cpp @@ -0,0 +1,72 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace { + +template +void _pairwise_distance(cuvsResources_t res, + DLManagedTensor* x_tensor, + DLManagedTensor* y_tensor, + DLManagedTensor* distances_tensor, + cuvsDistanceType metric, + float metric_arg) +{ + auto res_ptr = reinterpret_cast(res); + + using mdspan_type = raft::device_matrix_view; + using distances_mdspan_type = raft::device_matrix_view; + + auto x_mds = cuvs::core::from_dlpack(x_tensor); + auto y_mds = cuvs::core::from_dlpack(y_tensor); + auto distances_mds = cuvs::core::from_dlpack(distances_tensor); + + cuvs::distance::pairwise_distance(*res_ptr, x_mds, y_mds, distances_mds, metric, metric_arg); +} +} // namespace + +extern "C" cuvsError_t cuvsPairwiseDistance(cuvsResources_t res, + DLManagedTensor* x_tensor, + DLManagedTensor* y_tensor, + DLManagedTensor* distances_tensor, + cuvsDistanceType metric, + float metric_arg) +{ + return cuvs::core::translate_exceptions([=] { + auto x = x_tensor->dl_tensor; + + if (x.dtype.code == kDLFloat && x.dtype.bits == 32) { + _pairwise_distance(res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else if (x.dtype.code == kDLFloat && x.dtype.bits == 64) { + _pairwise_distance(res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else { + RAFT_FAIL("Unsupported x DLtensor dtype: %d and bits: %d", x.dtype.code, x.dtype.bits); + } + }); +} diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 903c28f8d..4d4cd40bf 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -175,6 +175,10 @@ endif() if(BUILD_C_TESTS) ConfigureTest(NAME INTEROP_TEST PATH test/core/interop.cu C_LIB) + ConfigureTest( + NAME DISTANCE_C_TEST PATH test/distance/run_pairwise_distance_c.c + test/distance/pairwise_distance_c.cu C_LIB + ) ConfigureTest( NAME BRUTEFORCE_C_TEST PATH test/neighbors/run_brute_force_c.c test/neighbors/brute_force_c.cu diff --git a/cpp/test/distance/pairwise_distance_c.cu b/cpp/test/distance/pairwise_distance_c.cu new file mode 100644 index 000000000..c79dca4af --- /dev/null +++ b/cpp/test/distance/pairwise_distance_c.cu @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include + +extern "C" void run_pairwise_distance(int64_t n_rows, + int64_t n_queries, + int64_t n_dim, + float* index_data, + float* query_data, + float* distances_data, + cuvsDistanceType metric); + +template +void generate_random_data(T* devPtr, size_t size) +{ + raft::handle_t handle; + raft::random::RngState r(1234ULL); + raft::random::uniform(handle, r, devPtr, size, T(0.1), T(2.0)); +}; + +TEST(PairwiseDistanceC, Distance) +{ + int64_t n_rows = 8096; + int64_t n_queries = 128; + int64_t n_dim = 32; + + cuvsDistanceType metric = L2Expanded; + + float *index_data, *query_data, *distances_data; + cudaMalloc(&index_data, sizeof(float) * n_rows * n_dim); + cudaMalloc(&query_data, sizeof(float) * n_queries * n_dim); + cudaMalloc(&distances_data, sizeof(float) * n_queries * n_rows); + + generate_random_data(index_data, n_rows * n_dim); + generate_random_data(query_data, n_queries * n_dim); + + run_pairwise_distance(n_rows, n_queries, n_dim, index_data, query_data, distances_data, metric); + + // delete device memory + cudaFree(index_data); + cudaFree(query_data); + cudaFree(distances_data); +} diff --git a/cpp/test/distance/run_pairwise_distance_c.c b/cpp/test/distance/run_pairwise_distance_c.c new file mode 100644 index 000000000..b8f40c972 --- /dev/null +++ b/cpp/test/distance/run_pairwise_distance_c.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +void run_pairwise_distance(int64_t n_rows, + int64_t n_queries, + int64_t n_dim, + float* index_data, + float* query_data, + float* distances_data, + int64_t* neighbors_data, + cuvsDistanceType metric) +{ + // create cuvsResources_t + cuvsResources_t res; + cuvsResourcesCreate(&res); + + // create dataset DLTensor + DLManagedTensor dataset_tensor; + dataset_tensor.dl_tensor.data = index_data; + dataset_tensor.dl_tensor.device.device_type = kDLCUDA; + dataset_tensor.dl_tensor.ndim = 2; + dataset_tensor.dl_tensor.dtype.code = kDLFloat; + dataset_tensor.dl_tensor.dtype.bits = 32; + dataset_tensor.dl_tensor.dtype.lanes = 1; + int64_t dataset_shape[2] = {n_rows, n_dim}; + dataset_tensor.dl_tensor.shape = dataset_shape; + dataset_tensor.dl_tensor.strides = NULL; + + // create queries DLTensor + DLManagedTensor queries_tensor; + queries_tensor.dl_tensor.data = (void*)query_data; + queries_tensor.dl_tensor.device.device_type = kDLCUDA; + queries_tensor.dl_tensor.ndim = 2; + queries_tensor.dl_tensor.dtype.code = kDLFloat; + queries_tensor.dl_tensor.dtype.bits = 32; + queries_tensor.dl_tensor.dtype.lanes = 1; + int64_t queries_shape[2] = {n_queries, n_dim}; + queries_tensor.dl_tensor.shape = queries_shape; + queries_tensor.dl_tensor.strides = NULL; + + // create distances DLTensor + DLManagedTensor distances_tensor; + distances_tensor.dl_tensor.data = (void*)distances_data; + distances_tensor.dl_tensor.device.device_type = kDLCUDA; + distances_tensor.dl_tensor.ndim = 2; + distances_tensor.dl_tensor.dtype.code = kDLFloat; + distances_tensor.dl_tensor.dtype.bits = 32; + distances_tensor.dl_tensor.dtype.lanes = 1; + int64_t distances_shape[2] = {n_rows, n_queries}; + distances_tensor.dl_tensor.shape = distances_shape; + distances_tensor.dl_tensor.strides = NULL; + + // run pairwise distances + cuvsPairwiseDistance(res, &dataset_tensor, &queries_tensor, &distances_tensor, metric, 2.0); + + cuvsResourcesDestroy(res); +} diff --git a/docs/source/cpp_api/distance.rst b/docs/source/cpp_api/distance.rst index c1b8c619d..2e83da7b3 100644 --- a/docs/source/cpp_api/distance.rst +++ b/docs/source/cpp_api/distance.rst @@ -11,7 +11,7 @@ distances have been highly optimized and support a wide assortment of different Distance Types -------------- -``#include `` +``#include `` namespace *cuvs::distance* @@ -22,11 +22,11 @@ namespace *cuvs::distance* Pairwise Distances ------------------ -``include `` +``include `` namespace *cuvs::distance* .. doxygengroup:: pairwise_distance :project: cuvs :members: - :content-only: \ No newline at end of file + :content-only: diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt index db6f25675..d48cc7405 100644 --- a/python/cuvs/CMakeLists.txt +++ b/python/cuvs/CMakeLists.txt @@ -81,6 +81,7 @@ endif() rapids_cython_init() add_subdirectory(cuvs/common) +add_subdirectory(cuvs/distance) add_subdirectory(cuvs/neighbors) if(DEFINED cython_lib_dir) diff --git a/python/cuvs/cuvs/distance/CMakeLists.txt b/python/cuvs/cuvs/distance/CMakeLists.txt new file mode 100644 index 000000000..363778a9c --- /dev/null +++ b/python/cuvs/cuvs/distance/CMakeLists.txt @@ -0,0 +1,24 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Set the list of Cython files to build +set(cython_sources distance.pyx) +set(linked_libraries cuvs::cuvs cuvs::c_api) + +# Build all of the Cython targets +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX distance_ +) diff --git a/python/cuvs/cuvs/distance/__init__.pxd b/python/cuvs/cuvs/distance/__init__.pxd new file mode 100644 index 000000000..e69de29bb diff --git a/python/cuvs/cuvs/distance/__init__.py b/python/cuvs/cuvs/distance/__init__.py new file mode 100644 index 000000000..aa29c5f76 --- /dev/null +++ b/python/cuvs/cuvs/distance/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# TODO: import distance_types PWD etc + +from .distance import DISTANCE_TYPES, pairwise_distance + +__all__ = ["DISTANCE_TYPES", "pairwise_distance"] diff --git a/python/cuvs/cuvs/distance/distance.pxd b/python/cuvs/cuvs/distance/distance.pxd new file mode 100644 index 000000000..e44235af6 --- /dev/null +++ b/python/cuvs/cuvs/distance/distance.pxd @@ -0,0 +1,52 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: language_level=3 + + +from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t +from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor + + +cdef extern from "cuvs/distance/distance.h" nogil: + ctypedef enum cuvsDistanceType: + L2Expanded + L2SqrtExpanded + CosineExpanded + L1 + L2Unexpanded + L2SqrtUnexpanded + InnerProduct + Linf + Canberra + LpUnexpanded + CorrelationExpanded + JaccardExpanded + HellingerExpanded + Haversine + BrayCurtis + JensenShannon + HammingUnexpanded + KLDivergence + RusselRaoExpanded + DiceExpanded + Precomputed + + cuvsError_t cuvsPairwiseDistance(cuvsResources_t res, + DLManagedTensor* x, + DLManagedTensor* y, + DLManagedTensor* distances, + cuvsDistanceType metric, + float metric_arg) except + diff --git a/python/cuvs/cuvs/distance/distance.pyx b/python/cuvs/cuvs/distance/distance.pyx new file mode 100644 index 000000000..eb34366e4 --- /dev/null +++ b/python/cuvs/cuvs/distance/distance.pyx @@ -0,0 +1,139 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: language_level=3 + +import numpy as np + +from cuvs.common.exceptions import check_cuvs +from cuvs.common.resources import auto_sync_resources + +from cuvs.common cimport cydlpack + +from pylibraft.common import auto_convert_output, device_ndarray +from pylibraft.common.cai_wrapper import wrap_array + +DISTANCE_TYPES = { + "l2": cuvsDistanceType.L2SqrtExpanded, + "sqeuclidean": cuvsDistanceType.L2Expanded, + "euclidean": cuvsDistanceType.L2SqrtExpanded, + "l1": cuvsDistanceType.L1, + "cityblock": cuvsDistanceType.L1, + "inner_product": cuvsDistanceType.InnerProduct, + "chebyshev": cuvsDistanceType.Linf, + "canberra": cuvsDistanceType.Canberra, + "cosine": cuvsDistanceType.CosineExpanded, + "lp": cuvsDistanceType.LpUnexpanded, + "correlation": cuvsDistanceType.CorrelationExpanded, + "jaccard": cuvsDistanceType.JaccardExpanded, + "hellinger": cuvsDistanceType.HellingerExpanded, + "braycurtis": cuvsDistanceType.BrayCurtis, + "jensenshannon": cuvsDistanceType.JensenShannon, + "hamming": cuvsDistanceType.HammingUnexpanded, + "kl_divergence": cuvsDistanceType.KLDivergence, + "minkowski": cuvsDistanceType.LpUnexpanded, + "russellrao": cuvsDistanceType.RusselRaoExpanded, + "dice": cuvsDistanceType.DiceExpanded, +} + +SUPPORTED_DISTANCES = ["euclidean", "l1", "cityblock", "l2", "inner_product", + "chebyshev", "minkowski", "canberra", "kl_divergence", + "correlation", "russellrao", "hellinger", "lp", + "hamming", "jensenshannon", "cosine", "sqeuclidean"] + + +@auto_sync_resources +@auto_convert_output +def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0, + resources=None): + """ + Compute pairwise distances between X and Y + + Valid values for metric: + ["euclidean", "l2", "l1", "cityblock", "inner_product", + "chebyshev", "canberra", "lp", "hellinger", "jensenshannon", + "kl_divergence", "russellrao", "minkowski", "correlation", + "cosine"] + + Parameters + ---------- + + X : CUDA array interface compliant matrix shape (m, k) + Y : CUDA array interface compliant matrix shape (n, k) + out : Optional writable CUDA array interface matrix shape (m, n) + metric : string denoting the metric type (default="euclidean") + metric_arg : metric parameter (currently used only for "minkowski") + {resources_docstring} + + Examples + -------- + + >>> import cupy as cp + >>> from cuvs.distance import pairwise_distance + >>> n_samples = 5000 + >>> n_features = 50 + >>> in1 = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> in2 = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> output = pairwise_distance(in1, in2, metric="euclidean") + """ + + cdef cuvsResources_t res = resources.get_c_obj() + + x_cai = wrap_array(X) + y_cai = wrap_array(Y) + + m = x_cai.shape[0] + n = y_cai.shape[0] + + if out is None: + out = device_ndarray.empty((m, n), dtype=y_cai.dtype) + out_cai = wrap_array(out) + + x_k = x_cai.shape[1] + y_k = y_cai.shape[1] + + if x_k != y_k: + raise ValueError("Inputs must have same number of columns. " + "a=%s, b=%s" % (x_k, y_k)) + + if metric not in SUPPORTED_DISTANCES: + raise ValueError("metric %s is not supported" % metric) + + cdef cuvsDistanceType distance_type = DISTANCE_TYPES[metric] + + x_dt = x_cai.dtype + y_dt = y_cai.dtype + d_dt = out_cai.dtype + + if x_dt != y_dt or x_dt != d_dt: + raise ValueError("Inputs must have the same dtypes") + + cdef cydlpack.DLManagedTensor* x_dlpack = \ + cydlpack.dlpack_c(x_cai) + cdef cydlpack.DLManagedTensor* y_dlpack = \ + cydlpack.dlpack_c(y_cai) + cdef cydlpack.DLManagedTensor* out_dlpack = \ + cydlpack.dlpack_c(out_cai) + + check_cuvs(cuvsPairwiseDistance(res, + x_dlpack, + y_dlpack, + out_dlpack, + distance_type, + metric_arg)) + + return out diff --git a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx index 226ee23d6..490f1d3ac 100644 --- a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx +++ b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx @@ -31,9 +31,10 @@ from cuvs.distance_type cimport cuvsDistanceType from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.distance.pairwise_distance import DISTANCE_TYPES from pylibraft.neighbors.common import _check_input_array +from cuvs.distance import DISTANCE_TYPES + from cuvs.common.c_api cimport cuvsResources_t from cuvs.common.exceptions import check_cuvs @@ -194,7 +195,6 @@ def search(Index index, _check_input_array(distances_cai, [np.dtype('float32')], exp_rows=n_queries, exp_cols=k) - cdef cuvsError_t search_status cdef cydlpack.DLManagedTensor* queries_dlpack = \ cydlpack.dlpack_c(queries_cai) cdef cydlpack.DLManagedTensor* neighbors_dlpack = \ diff --git a/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx b/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx index 870e9412a..daa723099 100644 --- a/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx +++ b/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx @@ -30,9 +30,10 @@ from cuvs.distance_type cimport cuvsDistanceType from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.distance.pairwise_distance import DISTANCE_TYPES from pylibraft.neighbors.common import _check_input_array +from cuvs.distance import DISTANCE_TYPES + from libc.stdint cimport ( int8_t, int64_t, diff --git a/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx b/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx index a1ca6768a..81baed1f0 100644 --- a/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx +++ b/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx @@ -30,9 +30,10 @@ from cuvs.distance_type cimport cuvsDistanceType from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.distance.pairwise_distance import DISTANCE_TYPES from pylibraft.neighbors.common import _check_input_array +from cuvs.distance import DISTANCE_TYPES + from libc.stdint cimport ( int8_t, int64_t, diff --git a/python/cuvs/cuvs/test/test_distance.py b/python/cuvs/cuvs/test/test_distance.py new file mode 100644 index 000000000..681217fc8 --- /dev/null +++ b/python/cuvs/cuvs/test/test_distance.py @@ -0,0 +1,79 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from pylibraft.common import device_ndarray +from scipy.spatial.distance import cdist + +from cuvs.distance import pairwise_distance + + +@pytest.mark.parametrize("n_rows", [50, 100]) +@pytest.mark.parametrize("n_cols", [10, 50]) +@pytest.mark.parametrize( + "metric", + [ + "euclidean", + "cityblock", + "chebyshev", + "canberra", + "correlation", + "hamming", + "jensenshannon", + "russellrao", + "cosine", + "sqeuclidean", + "inner_product", + ], +) +@pytest.mark.parametrize("inplace", [True, False]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_distance(n_rows, n_cols, inplace, metric, dtype): + input1 = np.random.random_sample((n_rows, n_cols)) + input1 = np.asarray(input1).astype(dtype) + + # RussellRao expects boolean arrays + if metric == "russellrao": + input1[input1 < 0.5] = 0 + input1[input1 >= 0.5] = 1 + + # JensenShannon expects probability arrays + elif metric == "jensenshannon": + norm = np.sum(input1, axis=1) + input1 = (input1.T / norm).T + + output = np.zeros((n_rows, n_rows), dtype=dtype) + + if metric == "inner_product": + expected = np.matmul(input1, input1.T) + else: + expected = cdist(input1, input1, metric) + + input1_device = device_ndarray(input1) + output_device = device_ndarray(output) if inplace else None + + ret_output = pairwise_distance( + input1_device, + input1_device, + output_device, + metric, + ) + + output_device = ret_output if not inplace else output_device + + actual = output_device.copy_to_host() + + assert np.allclose(expected, actual, atol=1e-3, rtol=1e-3) diff --git a/python/cuvs/cuvs/test/test_doctests.py b/python/cuvs/cuvs/test/test_doctests.py index 68b2861df..64b0c5da6 100644 --- a/python/cuvs/cuvs/test/test_doctests.py +++ b/python/cuvs/cuvs/test/test_doctests.py @@ -20,6 +20,7 @@ import pytest +import cuvs.distance import cuvs.neighbors # Code adapted from https://github.com/rapidsai/cudf/blob/branch-23.02/python/cudf/cudf/tests/test_doctests.py # noqa @@ -93,6 +94,7 @@ def _find_doctests_in_obj(obj, finder=None, criteria=None): DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.neighbors.brute_force)) DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.neighbors.ivf_flat)) DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.common)) +DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.distance)) @pytest.mark.parametrize( diff --git a/rust/cuvs-sys/cuvs_c_wrapper.h b/rust/cuvs-sys/cuvs_c_wrapper.h index c6ab02da0..e38bc6e7a 100644 --- a/rust/cuvs-sys/cuvs_c_wrapper.h +++ b/rust/cuvs-sys/cuvs_c_wrapper.h @@ -17,6 +17,7 @@ // wrapper file containing all the C-API's we should automatically be creating rust // bindings for #include +#include #include #include #include diff --git a/rust/cuvs/src/distance/mod.rs b/rust/cuvs/src/distance/mod.rs new file mode 100644 index 000000000..0377f3e32 --- /dev/null +++ b/rust/cuvs/src/distance/mod.rs @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +use std::io::{stderr, Write}; + +use crate::distance_type::DistanceType; +use crate::dlpack::ManagedTensor; +use crate::error::{check_cuvs, Result}; +use crate::resources::Resources; + +/// Compute pairwise distances between X and Y +/// +/// # Arguments +/// +/// * `res` - Resources to use +/// * `x` - A matrix in device memory - shape (m, k) +/// * `y` - A matrix in device memory - shape (n, k) +/// * `distances` - A matrix in device memory that receives the output distances - shape (m, n) +/// * `metric` - DistanceType to use for building the index +/// * `metric_arg` - Optional value of `p` for Minkowski distances +pub fn pairwise_distance( + res: &Resources, + x: &ManagedTensor, + y: &ManagedTensor, + distances: &ManagedTensor, + metric: DistanceType, + metric_arg: Option, +) -> Result<()> { + unsafe { + check_cuvs(ffi::cuvsPairwiseDistance( + res.0, + x.as_ptr(), + y.as_ptr(), + distances.as_ptr(), + metric, + metric_arg.unwrap_or(2.0), + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use ndarray::s; + use ndarray_rand::rand_distr::Uniform; + use ndarray_rand::RandomExt; + + #[test] + fn test_pairwise_distance() { + let res = Resources::new().unwrap(); + + // Create a new random dataset to index + let n_datapoints = 256; + let n_features = 16; + let dataset = + ndarray::Array::::random((n_datapoints, n_features), Uniform::new(0., 1.0)); + let dataset_device = ManagedTensor::from(&dataset).to_device(&res).unwrap(); + + let mut distances_host = ndarray::Array::::zeros((n_datapoints, n_datapoints)); + let distances = ManagedTensor::from(&distances_host) + .to_device(&res) + .unwrap(); + + pairwise_distance(&res, &dataset_device, &dataset_device, &distances, DistanceType::L2Expanded, + None).unwrap(); + + // Copy back to host memory + distances.to_host(&res, &mut distances_host).unwrap(); + + // Self distance should be 0 + assert_eq!(distances_host[[0, 0]], 0.0); + } +} diff --git a/rust/cuvs/src/lib.rs b/rust/cuvs/src/lib.rs index 6ed450c03..71a387b2c 100644 --- a/rust/cuvs/src/lib.rs +++ b/rust/cuvs/src/lib.rs @@ -20,6 +20,7 @@ //! approximate nearest neighbors search on the GPU. pub mod brute_force; pub mod cagra; +pub mod distance; pub mod distance_type; pub mod ivf_flat; mod dlpack; From 799a42df803a9df8e3c3e1976bd7374773a48e1e Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 21 May 2024 16:51:04 -0700 Subject: [PATCH 2/6] remove comment --- python/cuvs/cuvs/distance/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cuvs/cuvs/distance/__init__.py b/python/cuvs/cuvs/distance/__init__.py index aa29c5f76..5c985e7b1 100644 --- a/python/cuvs/cuvs/distance/__init__.py +++ b/python/cuvs/cuvs/distance/__init__.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: import distance_types PWD etc - from .distance import DISTANCE_TYPES, pairwise_distance __all__ = ["DISTANCE_TYPES", "pairwise_distance"] From c95753f376c3059169224474be3892c07ca2a59c Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 21 May 2024 17:24:57 -0700 Subject: [PATCH 3/6] move pairwise_distance c-api to its own file The C++ api includes the distance.h, and having a dlpack.h include breaks the build since we only add for C code --- cpp/include/cuvs/distance/distance.h | 37 ----------- cpp/include/cuvs/distance/pairwise_distance.h | 62 +++++++++++++++++++ python/cuvs/cuvs/distance/distance.pxd | 27 +------- rust/cuvs-sys/cuvs_c_wrapper.h | 2 +- 4 files changed, 65 insertions(+), 63 deletions(-) create mode 100644 cpp/include/cuvs/distance/pairwise_distance.h diff --git a/cpp/include/cuvs/distance/distance.h b/cpp/include/cuvs/distance/distance.h index 577f13ce8..550221e8e 100644 --- a/cpp/include/cuvs/distance/distance.h +++ b/cpp/include/cuvs/distance/distance.h @@ -14,9 +14,6 @@ * limitations under the License. */ #pragma once -#include - -#include #ifdef __cplusplus extern "C" { @@ -69,40 +66,6 @@ typedef enum { Precomputed = 100 } cuvsDistanceType; -/** - * @brief Compute pairwise distances for two matrices - * - * - * Usage example: - * @code{.c} - * #include - * #include - * - * // Create cuvsResources_t - * cuvsResources_t res; - * cuvsError_t res_create_status = cuvsResourcesCreate(&res); - * - * // Assume a populated `DLManagedTensor` type here - * DLManagedTensor x; - * DLManagedTensor y; - * DLManagedTensor dist; - * - * cuvsPairwiseDistance(handle, &x, &y, &dist, L2SqrtUnexpanded, 2.0); - * @endcode - * - * @param[in] handle raft handle for managing expensive resources - * @param[in] x first set of points (size n*k) - * @param[in] y second set of points (size m*k) - * @param[out] dist output distance matrix (size n*m) - * @param[in] metric distance to evaluate - * @param[in] metric_arg metric argument (used for Minkowski distance) - */ -cuvsError_t cuvsPairwiseDistance(cuvsResources_t res, - DLManagedTensor* x, - DLManagedTensor* y, - DLManagedTensor* distances, - cuvsDistanceType metric, - float metric_arg); #ifdef __cplusplus } #endif diff --git a/cpp/include/cuvs/distance/pairwise_distance.h b/cpp/include/cuvs/distance/pairwise_distance.h new file mode 100644 index 000000000..84a694072 --- /dev/null +++ b/cpp/include/cuvs/distance/pairwise_distance.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Compute pairwise distances for two matrices + * + * + * Usage example: + * @code{.c} + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // Assume a populated `DLManagedTensor` type here + * DLManagedTensor x; + * DLManagedTensor y; + * DLManagedTensor dist; + * + * cuvsPairwiseDistance(handle, &x, &y, &dist, L2SqrtUnexpanded, 2.0); + * @endcode + * + * @param[in] handle raft handle for managing expensive resources + * @param[in] x first set of points (size n*k) + * @param[in] y second set of points (size m*k) + * @param[out] dist output distance matrix (size n*m) + * @param[in] metric distance to evaluate + * @param[in] metric_arg metric argument (used for Minkowski distance) + */ +cuvsError_t cuvsPairwiseDistance(cuvsResources_t res, + DLManagedTensor* x, + DLManagedTensor* y, + DLManagedTensor* distances, + cuvsDistanceType metric, + float metric_arg); +#ifdef __cplusplus +} +#endif diff --git a/python/cuvs/cuvs/distance/distance.pxd b/python/cuvs/cuvs/distance/distance.pxd index e44235af6..e40d2c425 100644 --- a/python/cuvs/cuvs/distance/distance.pxd +++ b/python/cuvs/cuvs/distance/distance.pxd @@ -15,35 +15,12 @@ # # cython: language_level=3 - from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor +from cuvs.distance_type cimport cuvsDistanceType -cdef extern from "cuvs/distance/distance.h" nogil: - ctypedef enum cuvsDistanceType: - L2Expanded - L2SqrtExpanded - CosineExpanded - L1 - L2Unexpanded - L2SqrtUnexpanded - InnerProduct - Linf - Canberra - LpUnexpanded - CorrelationExpanded - JaccardExpanded - HellingerExpanded - Haversine - BrayCurtis - JensenShannon - HammingUnexpanded - KLDivergence - RusselRaoExpanded - DiceExpanded - Precomputed - +cdef extern from "cuvs/distance/pairwise_distance.h" nogil: cuvsError_t cuvsPairwiseDistance(cuvsResources_t res, DLManagedTensor* x, DLManagedTensor* y, diff --git a/rust/cuvs-sys/cuvs_c_wrapper.h b/rust/cuvs-sys/cuvs_c_wrapper.h index e38bc6e7a..0c66928bc 100644 --- a/rust/cuvs-sys/cuvs_c_wrapper.h +++ b/rust/cuvs-sys/cuvs_c_wrapper.h @@ -17,7 +17,7 @@ // wrapper file containing all the C-API's we should automatically be creating rust // bindings for #include -#include +#include #include #include #include From 0a80db54aa460059964efb22f69e0620dc3c80ae Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 21 May 2024 17:28:38 -0700 Subject: [PATCH 4/6] fix --- cpp/test/distance/run_pairwise_distance_c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/test/distance/run_pairwise_distance_c.c b/cpp/test/distance/run_pairwise_distance_c.c index b8f40c972..bbcf0d7b7 100644 --- a/cpp/test/distance/run_pairwise_distance_c.c +++ b/cpp/test/distance/run_pairwise_distance_c.c @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -#include +#include +#include void run_pairwise_distance(int64_t n_rows, int64_t n_queries, From aead431144ced16f0c414fdff25bf4a0cdbfce76 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 21 May 2024 19:45:07 -0700 Subject: [PATCH 5/6] fix docs --- cpp/include/cuvs/distance/pairwise_distance.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/include/cuvs/distance/pairwise_distance.h b/cpp/include/cuvs/distance/pairwise_distance.h index 84a694072..a9e49b359 100644 --- a/cpp/include/cuvs/distance/pairwise_distance.h +++ b/cpp/include/cuvs/distance/pairwise_distance.h @@ -41,10 +41,10 @@ extern "C" { * DLManagedTensor y; * DLManagedTensor dist; * - * cuvsPairwiseDistance(handle, &x, &y, &dist, L2SqrtUnexpanded, 2.0); + * cuvsPairwiseDistance(res, &x, &y, &dist, L2SqrtUnexpanded, 2.0); * @endcode * - * @param[in] handle raft handle for managing expensive resources + * @param[in] res cuvs resources object for managing expensive resources * @param[in] x first set of points (size n*k) * @param[in] y second set of points (size m*k) * @param[out] dist output distance matrix (size n*m) @@ -54,7 +54,7 @@ extern "C" { cuvsError_t cuvsPairwiseDistance(cuvsResources_t res, DLManagedTensor* x, DLManagedTensor* y, - DLManagedTensor* distances, + DLManagedTensor* dist, cuvsDistanceType metric, float metric_arg); #ifdef __cplusplus From d04668301139e5d8782b5952616113eb6f3e1016 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Wed, 22 May 2024 14:48:53 -0700 Subject: [PATCH 6/6] better validation of input tensors --- cpp/src/distance/pairwise_distance_c.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/cpp/src/distance/pairwise_distance_c.cpp b/cpp/src/distance/pairwise_distance_c.cpp index ffa5924b8..d457198a2 100644 --- a/cpp/src/distance/pairwise_distance_c.cpp +++ b/cpp/src/distance/pairwise_distance_c.cpp @@ -25,7 +25,6 @@ #include #include #include -#include #include namespace { @@ -59,14 +58,24 @@ extern "C" cuvsError_t cuvsPairwiseDistance(cuvsResources_t res, float metric_arg) { return cuvs::core::translate_exceptions([=] { - auto x = x_tensor->dl_tensor; + auto x_dt = x_tensor->dl_tensor.dtype; + auto y_dt = x_tensor->dl_tensor.dtype; + auto dist_dt = x_tensor->dl_tensor.dtype; - if (x.dtype.code == kDLFloat && x.dtype.bits == 32) { + if ((x_dt.code != kDLFloat) || (y_dt.code != kDLFloat) || (dist_dt.code != kDLFloat)) { + RAFT_FAIL("Inputs to cuvsPairwiseDistance must all be floating point tensors"); + } + + if ((x_dt.bits != y_dt.bits) || (x_dt.bits != dist_dt.bits)) { + RAFT_FAIL("Inputs to cuvsPairwiseDistance must all have the same dtype"); + } + + if (x_dt.bits == 32) { _pairwise_distance(res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); - } else if (x.dtype.code == kDLFloat && x.dtype.bits == 64) { + } else if (x_dt.bits == 64) { _pairwise_distance(res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); } else { - RAFT_FAIL("Unsupported x DLtensor dtype: %d and bits: %d", x.dtype.code, x.dtype.bits); + RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits); } }); }