diff --git a/conda/faiss-gpu-raft/meta.yaml b/conda/faiss-gpu-raft/meta.yaml index 1dde8e9868..8b6d974e19 100644 --- a/conda/faiss-gpu-raft/meta.yaml +++ b/conda/faiss-gpu-raft/meta.yaml @@ -112,7 +112,7 @@ outputs: - pytorch - pytorch-cuda {{ cuda_constraints }} commands: - - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*" + - python -X faulthandler -m unittest discover -v -s tests/ -p "(?!.*test_external_module\.py)test_.*py" - python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*" - cp tests/common_faiss_tests.py faiss/gpu/test - python -X faulthandler -m unittest discover -v -s faiss/gpu/test/ -p "test_*" diff --git a/conda/faiss-gpu/meta.yaml b/conda/faiss-gpu/meta.yaml index 05f7b59008..86937d318c 100644 --- a/conda/faiss-gpu/meta.yaml +++ b/conda/faiss-gpu/meta.yaml @@ -101,7 +101,7 @@ outputs: - pytorch - pytorch-cuda {{ cuda_constraints }} commands: - - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*" + - python -X faulthandler -m unittest discover -v -s tests/ -p "(?!.*test_external_module\.py)test_.*py" - python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*" - cp tests/common_faiss_tests.py faiss/gpu/test - python -X faulthandler -m unittest discover -v -s faiss/gpu/test/ -p "test_*" diff --git a/conda/faiss/meta.yaml b/conda/faiss/meta.yaml index 79e7be953e..e937a6557c 100644 --- a/conda/faiss/meta.yaml +++ b/conda/faiss/meta.yaml @@ -86,7 +86,7 @@ outputs: - scipy - pytorch commands: - - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*" + - python -X faulthandler -m unittest discover -v -s tests/ -p "(?!.*test_external_module\.py)test_.*py" - python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*" - sh test_cpu_dispatch.sh # [linux64] files: diff --git a/faiss/python/CMakeLists.txt b/faiss/python/CMakeLists.txt index aea99af795..38e79f768c 100644 --- a/faiss/python/CMakeLists.txt +++ b/faiss/python/CMakeLists.txt @@ -61,6 +61,7 @@ configure_swigfaiss(swigfaiss.swig) configure_swigfaiss(swigfaiss_avx2.swig) configure_swigfaiss(swigfaiss_avx512.swig) configure_swigfaiss(swigfaiss_sve.swig) +configure_swigfaiss(faiss_example_external_module.swig) if(TARGET faiss) # Manually add headers as extra dependencies of swigfaiss. @@ -74,6 +75,8 @@ if(TARGET faiss) "${faiss_SOURCE_DIR}/faiss/${h}") list(APPEND SWIG_MODULE_swigfaiss_sve_EXTRA_DEPS "${faiss_SOURCE_DIR}/faiss/${h}") + list(APPEND SWIG_MODULE_faiss_example_external_module_EXTRA_DEPS + "${faiss_SOURCE_DIR}/faiss/${h}") endforeach() if(FAISS_ENABLE_ROCM) foreach(h ${FAISS_GPU_HEADERS}) @@ -83,6 +86,8 @@ if(TARGET faiss) "${faiss_SOURCE_DIR}/faiss/gpu-rocm/${h}") list(APPEND SWIG_MODULE_swigfaiss_avx512_EXTRA_DEPS "${faiss_SOURCE_DIR}/faiss/gpu-rocm/${h}") + list(APPEND SWIG_MODULE_faiss_example_external_module_EXTRA_DEPS + "${faiss_SOURCE_DIR}/faiss/gpu-rocm/${h}") endforeach() else() foreach(h ${FAISS_GPU_HEADERS}) @@ -94,6 +99,8 @@ if(TARGET faiss) "${faiss_SOURCE_DIR}/faiss/gpu/${h}") list(APPEND SWIG_MODULE_swigfaiss_sve_EXTRA_DEPS "${faiss_SOURCE_DIR}/faiss/gpu/${h}") + list(APPEND SWIG_MODULE_faiss_example_external_module_EXTRA_DEPS + "${faiss_SOURCE_DIR}/faiss/gpu/${h}") endforeach() endif() else() @@ -152,18 +159,29 @@ if(NOT FAISS_OPT_LEVEL STREQUAL "sve") set_target_properties(swigfaiss_sve PROPERTIES EXCLUDE_FROM_ALL TRUE) endif() +set_property(SOURCE faiss_example_external_module.swig + PROPERTY SWIG_MODULE_NAME faiss_example_external_module) +swig_add_library(faiss_example_external_module + TYPE SHARED + LANGUAGE python + SOURCES faiss_example_external_module.swig +) +set_property(TARGET faiss_example_external_module PROPERTY SWIG_COMPILE_OPTIONS -doxygen) + if(NOT WIN32) # NOTE: Python does not recognize the dylib extension. set_target_properties(swigfaiss PROPERTIES SUFFIX .so) set_target_properties(swigfaiss_avx2 PROPERTIES SUFFIX .so) set_target_properties(swigfaiss_avx512 PROPERTIES SUFFIX .so) set_target_properties(swigfaiss_sve PROPERTIES SUFFIX .so) + set_target_properties(faiss_example_external_module PROPERTIES SUFFIX .so) else() # we need bigobj for the swig wrapper target_compile_options(swigfaiss PRIVATE /bigobj) target_compile_options(swigfaiss_avx2 PRIVATE /bigobj) target_compile_options(swigfaiss_avx512 PRIVATE /bigobj) target_compile_options(swigfaiss_sve PRIVATE /bigobj) + target_compile_options(faiss_example_external_module PRIVATE /bigobj) endif() if(FAISS_ENABLE_GPU) @@ -171,6 +189,7 @@ if(FAISS_ENABLE_GPU) target_link_libraries(swigfaiss PRIVATE hip::host) target_link_libraries(swigfaiss_avx2 PRIVATE hip::host) target_link_libraries(swigfaiss_avx512 PRIVATE hip::host) + target_link_libraries(faiss_example_external_module PRIVATE hip::host) else() find_package(CUDAToolkit REQUIRED) if(FAISS_ENABLE_RAFT) @@ -221,12 +240,21 @@ target_link_libraries(swigfaiss_sve PRIVATE OpenMP::OpenMP_CXX ) +target_link_libraries(faiss_example_external_module PRIVATE + Python::Module + Python::NumPy + OpenMP::OpenMP_CXX + swigfaiss + faiss +) + # Hack so that python_callbacks.h can be included as # `#include `. target_include_directories(swigfaiss PRIVATE ${PROJECT_SOURCE_DIR}/../..) target_include_directories(swigfaiss_avx2 PRIVATE ${PROJECT_SOURCE_DIR}/../..) target_include_directories(swigfaiss_avx512 PRIVATE ${PROJECT_SOURCE_DIR}/../..) target_include_directories(swigfaiss_sve PRIVATE ${PROJECT_SOURCE_DIR}/../..) +target_include_directories(faiss_example_external_module PRIVATE ${PROJECT_SOURCE_DIR}/../..) find_package(Python REQUIRED COMPONENTS Development NumPy @@ -252,6 +280,7 @@ target_link_libraries(swigfaiss PRIVATE faiss_python_callbacks) target_link_libraries(swigfaiss_avx2 PRIVATE faiss_python_callbacks) target_link_libraries(swigfaiss_avx512 PRIVATE faiss_python_callbacks) target_link_libraries(swigfaiss_sve PRIVATE faiss_python_callbacks) +target_link_libraries(faiss_example_external_module PRIVATE faiss_python_callbacks) configure_file(setup.py setup.py COPYONLY) configure_file(__init__.py __init__.py COPYONLY) diff --git a/faiss/python/faiss_example_external_module.swig b/faiss/python/faiss_example_external_module.swig new file mode 100644 index 0000000000..b26b9b2fff --- /dev/null +++ b/faiss/python/faiss_example_external_module.swig @@ -0,0 +1,133 @@ + +%module faiss_example_external_module; + + +// Put C++ includes here +%{ + +#include +#include + +%} + +#pragma SWIG nowarn=322 + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; + +typedef signed char int8_t; +typedef short int16_t; +typedef int int32_t; + +#ifdef SWIGWORDSIZE64 +typedef unsigned long uint64_t; +typedef long int64_t; +#else +typedef unsigned long long uint64_t; +typedef long long int64_t; +#endif + +typedef uint64_t size_t; + +// This means: assume what's declared in these .h files is provided +// by the Faiss module. +%import(module="faiss") "faiss/MetricType.h" +%import(module="faiss") "faiss/impl/IDSelector.h" + +// functions to be parsed here + +// This is important to release GIL and do Faiss exception handing +%exception { + Py_BEGIN_ALLOW_THREADS + try { + $action + } catch(faiss::FaissException & e) { + PyEval_RestoreThread(_save); + + if (PyErr_Occurred()) { + // some previous code already set the error type. + } else { + PyErr_SetString(PyExc_RuntimeError, e.what()); + } + SWIG_fail; + } catch(std::bad_alloc & ba) { + PyEval_RestoreThread(_save); + PyErr_SetString(PyExc_MemoryError, "std::bad_alloc"); + SWIG_fail; + } + Py_END_ALLOW_THREADS +} + + +// any class or function declared below will be made available +// in the module. +%inline %{ + +struct IDSelectorModulo : faiss::IDSelector { + int mod; + + IDSelectorModulo(int mod): mod(mod) {} + + bool is_member(faiss::idx_t id) const { + return id % mod == 0; + } + + ~IDSelectorModulo() override {} +}; + +faiss::idx_t sum_of_idx(size_t n, const faiss::idx_t *tab) { + faiss::idx_t sum = 0; + for(size_t i = 0; i < n; i++) { + sum += tab[i]; + } + return sum; +} + +float sum_of_float32(size_t n, const float *tab) { + float sum = 0; + for(size_t i = 0; i < n; i++) { + sum += tab[i]; + } + return sum; +} + +double sum_of_float64(size_t n, const double *tab) { + double sum = 0; + for(size_t i = 0; i < n; i++) { + sum += tab[i]; + } + return sum; +} + +%} + +/********************************************** + * To test if passing a swig_ptr on all array types works + **********************************************/ + +%define SUM_OF_TYPE(ty) + +%inline %{ + +ty##_t sum_of_##ty (size_t n, const ty##_t * tab) { + ty##_t sum = 0; + for(size_t i = 0; i < n; i++) { + sum += tab[i]; + } + return sum; +} + +%} + +%enddef + +SUM_OF_TYPE(uint8); +SUM_OF_TYPE(uint16); +SUM_OF_TYPE(uint32); +SUM_OF_TYPE(uint64); + +SUM_OF_TYPE(int8); +SUM_OF_TYPE(int16); +SUM_OF_TYPE(int32); +SUM_OF_TYPE(int64); diff --git a/faiss/python/setup.py b/faiss/python/setup.py index 46cacc0514..b009a4474d 100644 --- a/faiss/python/setup.py +++ b/faiss/python/setup.py @@ -4,10 +4,12 @@ # LICENSE file in the root directory of this source tree. from __future__ import print_function -from setuptools import setup, find_packages + import os -import shutil import platform +import shutil + +from setuptools import find_packages, setup # make the faiss python package dir shutil.rmtree("faiss", ignore_errors=True) @@ -20,25 +22,32 @@ shutil.copyfile("extra_wrappers.py", "faiss/extra_wrappers.py") shutil.copyfile("array_conversions.py", "faiss/array_conversions.py") -ext = ".pyd" if platform.system() == 'Windows' else ".so" -prefix = "Release/" * (platform.system() == 'Windows') +ext = ".pyd" if platform.system() == "Windows" else ".so" +prefix = "Release/" * (platform.system() == "Windows") swigfaiss_generic_lib = f"{prefix}_swigfaiss{ext}" swigfaiss_avx2_lib = f"{prefix}_swigfaiss_avx2{ext}" swigfaiss_avx512_lib = f"{prefix}_swigfaiss_avx512{ext}" callbacks_lib = f"{prefix}libfaiss_python_callbacks{ext}" swigfaiss_sve_lib = f"{prefix}_swigfaiss_sve{ext}" +faiss_example_external_module_lib = f"_faiss_example_external_module{ext}" found_swigfaiss_generic = os.path.exists(swigfaiss_generic_lib) found_swigfaiss_avx2 = os.path.exists(swigfaiss_avx2_lib) found_swigfaiss_avx512 = os.path.exists(swigfaiss_avx512_lib) found_callbacks = os.path.exists(callbacks_lib) found_swigfaiss_sve = os.path.exists(swigfaiss_sve_lib) +found_faiss_example_external_module_lib = os.path.exists( + faiss_example_external_module_lib +) -assert (found_swigfaiss_generic or found_swigfaiss_avx2 or found_swigfaiss_avx512 or found_swigfaiss_sve), \ - f"Could not find {swigfaiss_generic_lib} or " \ - f"{swigfaiss_avx2_lib} or {swigfaiss_avx512_lib} or {swigfaiss_sve_lib}. " \ +assert ( + found_swigfaiss_generic or found_swigfaiss_avx2 or found_swigfaiss_avx512 or found_swigfaiss_sve or found_faiss_example_external_module_lib +), ( + f"Could not find {swigfaiss_generic_lib} or " + f"{swigfaiss_avx2_lib} or {swigfaiss_avx512_lib} or {swigfaiss_sve_lib} or {faiss_example_external_module_lib}. " f"Faiss may not be compiled yet." +) if found_swigfaiss_generic: print(f"Copying {swigfaiss_generic_lib}") @@ -64,7 +73,17 @@ shutil.copyfile("swigfaiss_sve.py", "faiss/swigfaiss_sve.py") shutil.copyfile(swigfaiss_sve_lib, f"faiss/_swigfaiss_sve{ext}") -long_description=""" +if found_faiss_example_external_module_lib: + print(f"Copying {faiss_example_external_module_lib}") + shutil.copyfile( + "faiss_example_external_module.py", "faiss/faiss_example_external_module.py" + ) + shutil.copyfile( + faiss_example_external_module_lib, + f"faiss/_faiss_example_external_module{ext}", + ) + +long_description = """ Faiss is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting @@ -73,20 +92,19 @@ are implemented on the GPU. It is developed by Facebook AI Research. """ setup( - name='faiss', - version='1.9.0', - description='A library for efficient similarity search and clustering of dense vectors', + name="faiss", + version="1.9.0", + description="A library for efficient similarity search and clustering of dense vectors", long_description=long_description, - url='https://github.com/facebookresearch/faiss', - author='Matthijs Douze, Jeff Johnson, Herve Jegou, Lucas Hosseini', - author_email='matthijs@meta.com', - license='MIT', - keywords='search nearest neighbors', - - install_requires=['numpy', 'packaging'], - packages=['faiss', 'faiss.contrib', 'faiss.contrib.torch'], + url="https://github.com/facebookresearch/faiss", + author="Matthijs Douze, Jeff Johnson, Herve Jegou, Lucas Hosseini", + author_email="matthijs@meta.com", + license="MIT", + keywords="search nearest neighbors", + install_requires=["numpy", "packaging"], + packages=["faiss", "faiss.contrib", "faiss.contrib.torch"], package_data={ - 'faiss': ['*.so', '*.pyd'], + "faiss": ["*.so", "*.pyd"], }, zip_safe=False, ) diff --git a/faiss/python/swigfaiss.swig b/faiss/python/swigfaiss.swig index 4d44fb650b..d20966bac3 100644 --- a/faiss/python/swigfaiss.swig +++ b/faiss/python/swigfaiss.swig @@ -33,7 +33,24 @@ #pragma SWIG nowarn=512 #pragma SWIG nowarn=362 -%include +// we need explict control of these typedefs... +// %include +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; + +// char != unsigned char AND char != signed char so be explicit +typedef signed char int8_t; +typedef short int16_t; +typedef int int32_t; + +#ifdef SWIGWORDSIZE64 +typedef unsigned long uint64_t; +typedef long int64_t; +#else +typedef unsigned long long uint64_t; +typedef long long int64_t; +#endif typedef uint64_t size_t; @@ -239,10 +256,15 @@ namespace std { // primitive array types %template(Float32Vector) std::vector; %template(Float64Vector) std::vector; + +// weird interaction within C++ between char and signed char +%ignore Int8Vector::swap; + %template(Int8Vector) std::vector; %template(Int16Vector) std::vector; %template(Int32Vector) std::vector; %template(Int64Vector) std::vector; + %template(UInt8Vector) std::vector; %template(UInt16Vector) std::vector; %template(UInt32Vector) std::vector; @@ -1086,6 +1108,13 @@ void *memcpy(void *dest, const void *src, size_t n); #ifdef SWIGPYTHON +// transfer SWIG flag to C++ +#ifdef SWIGWORDSIZE64 +%{ +#define SWIGWORDSIZE64_CPP +%} +#endif + %{ PyObject *swig_ptr (PyObject *a) { @@ -1120,7 +1149,7 @@ PyObject *swig_ptr (PyObject *a) return SWIG_NewPointerObj(data, SWIGTYPE_p_unsigned_char, 0); } if(PyArray_TYPE(ao) == NPY_INT8) { - return SWIG_NewPointerObj(data, SWIGTYPE_p_char, 0); + return SWIG_NewPointerObj(data, SWIGTYPE_p_signed_char, 0); } if(PyArray_TYPE(ao) == NPY_UINT16) { return SWIG_NewPointerObj(data, SWIGTYPE_p_unsigned_short, 0); @@ -1141,14 +1170,14 @@ PyObject *swig_ptr (PyObject *a) // Convert npy64 either long or long long and it depends on how compiler define int64_t. // In the 64bit machine, typically the int64_t should be long but it is not hold for Apple osx. // In this case, we want to convert npy64 to long_Long in osx -#if __SIZEOF_LONG__ == 8 && !defined(__APPLE__) +#ifdef SWIGWORDSIZE64_CPP return SWIG_NewPointerObj(data, SWIGTYPE_p_unsigned_long, 0); #else return SWIG_NewPointerObj(data, SWIGTYPE_p_unsigned_long_long, 0); #endif } if(PyArray_TYPE(ao) == NPY_INT64) { -#if __SIZEOF_LONG__ == 8 && !defined(__APPLE__) +#ifdef SWIGWORDSIZE64_CPP return SWIG_NewPointerObj(data, SWIGTYPE_p_long, 0); #else return SWIG_NewPointerObj(data, SWIGTYPE_p_long_long, 0); @@ -1205,8 +1234,8 @@ PyObject * rev_swig_ptr(ctype *src, size_t size); REV_SWIG_PTR(float, NPY_FLOAT32); REV_SWIG_PTR(double, NPY_FLOAT64); -REV_SWIG_PTR(unsigned char, NPY_UINT8); -REV_SWIG_PTR(char, NPY_INT8); +REV_SWIG_PTR(uint8_t, NPY_UINT8); +REV_SWIG_PTR(int8_t, NPY_INT8); REV_SWIG_PTR(unsigned short, NPY_UINT16); REV_SWIG_PTR(short, NPY_INT16); REV_SWIG_PTR(int, NPY_INT32); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c41edf0cca..faa1493c22 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -88,6 +88,8 @@ if(FAISS_OPT_LEVEL STREQUAL "sve") target_link_libraries(faiss_test PRIVATE faiss_sve) endif() +target_link_libraries(faiss_test PUBLIC faiss_example_external_module) + include(FetchContent) FetchContent_Declare( googletest diff --git a/tests/test_external_module.py b/tests/test_external_module.py new file mode 100644 index 0000000000..5ea6d328ff --- /dev/null +++ b/tests/test_external_module.py @@ -0,0 +1,61 @@ +import unittest + +import faiss + +import faiss.faiss_example_external_module as external_module + +import numpy as np + + +class TestCustomIDSelector(unittest.TestCase): + """test if we can construct a custom IDSelector""" + + def test_IDSelector(self): + ids = external_module.IDSelectorModulo(3) + self.assertFalse(ids.is_member(1)) + self.assertTrue(ids.is_member(3)) + + +class TestArrayConversions(unittest.TestCase): + + def test_idx_array(self): + tab = np.arange(10).astype("int64") + new_sum = external_module.sum_of_idx(len(tab), faiss.swig_ptr(tab)) + self.assertEqual(new_sum, tab.sum()) + + def do_array_test(self, ty): + tab = np.arange(10).astype(ty) + func = getattr(external_module, "sum_of_" + ty) + print("perceived type", faiss.swig_ptr(tab)) + new_sum = func(len(tab), faiss.swig_ptr(tab)) + self.assertEqual(new_sum, tab.sum()) + + def test_sum_uint8(self): + self.do_array_test("uint8") + + def test_sum_uint16(self): + self.do_array_test("uint16") + + def test_sum_uint32(self): + self.do_array_test("uint32") + + def test_sum_uint64(self): + self.do_array_test("uint64") + + def test_sum_int8(self): + self.do_array_test("int8") + + def test_sum_int16(self): + self.do_array_test("int16") + + def test_sum_int32(self): + self.do_array_test("int32") + + def test_sum_int64(self): + self.do_array_test("int64") + + def test_sum_float32(self): + self.do_array_test("float32") + + def test_sum_float64(self): + self.do_array_test("float64")