forked from erikbern/ann-benchmarks
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Yitao
committed
Jul 10, 2024
1 parent
cf50605
commit 4f8bb26
Showing
33 changed files
with
3,845 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# Prerequisites | ||
*.d | ||
|
||
# Compiled Object files | ||
*.slo | ||
*.lo | ||
*.o | ||
*.obj | ||
|
||
# Precompiled Headers | ||
*.gch | ||
*.pch | ||
|
||
# Compiled Dynamic libraries | ||
*.so | ||
*.dylib | ||
*.dll | ||
|
||
# Fortran module files | ||
*.mod | ||
*.smod | ||
|
||
# Compiled Static libraries | ||
*.lai | ||
*.la | ||
*.a | ||
*.lib | ||
|
||
# Executables | ||
*.exe | ||
*.out | ||
*.app | ||
|
||
# Directories | ||
build/ | ||
|
||
# Language related | ||
.clangd | ||
compile_commands.json | ||
|
||
# data | ||
output.bin | ||
|
||
build | ||
|
||
.cache |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
{ | ||
// 使用 IntelliSense 了解相关属性。 | ||
// 悬停以查看现有属性的描述。 | ||
// 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 | ||
"version": "0.2.0", | ||
"configurations": [ | ||
{ | ||
"type": "lldb", | ||
"request": "launch", | ||
"name": "Debug", | ||
"program": "${workspaceFolder}/build/pyglass/Debug/main", | ||
"args": [ | ||
"/home/ann-benchmark/dataset/sift/sift_base.fvecs", | ||
"/home/ann-benchmark/dataset/sift/sift_query.fvecs", | ||
"/home/ann-benchmark/dataset/sift/sift_groundtruth.ivecs", | ||
"10", | ||
"20", | ||
"15", | ||
], | ||
"cwd": "${workspaceFolder}" | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
{ | ||
"files.associations": { | ||
"cctype": "cpp", | ||
"clocale": "cpp", | ||
"cmath": "cpp", | ||
"cstdarg": "cpp", | ||
"cstddef": "cpp", | ||
"cstdio": "cpp", | ||
"cstdlib": "cpp", | ||
"cstring": "cpp", | ||
"ctime": "cpp", | ||
"cwchar": "cpp", | ||
"cwctype": "cpp", | ||
"array": "cpp", | ||
"atomic": "cpp", | ||
"bit": "cpp", | ||
"*.tcc": "cpp", | ||
"bitset": "cpp", | ||
"charconv": "cpp", | ||
"chrono": "cpp", | ||
"compare": "cpp", | ||
"concepts": "cpp", | ||
"condition_variable": "cpp", | ||
"cstdint": "cpp", | ||
"deque": "cpp", | ||
"list": "cpp", | ||
"map": "cpp", | ||
"set": "cpp", | ||
"string": "cpp", | ||
"unordered_map": "cpp", | ||
"unordered_set": "cpp", | ||
"vector": "cpp", | ||
"exception": "cpp", | ||
"algorithm": "cpp", | ||
"functional": "cpp", | ||
"iterator": "cpp", | ||
"memory": "cpp", | ||
"memory_resource": "cpp", | ||
"numeric": "cpp", | ||
"optional": "cpp", | ||
"random": "cpp", | ||
"ratio": "cpp", | ||
"string_view": "cpp", | ||
"system_error": "cpp", | ||
"tuple": "cpp", | ||
"type_traits": "cpp", | ||
"utility": "cpp", | ||
"format": "cpp", | ||
"fstream": "cpp", | ||
"future": "cpp", | ||
"initializer_list": "cpp", | ||
"iomanip": "cpp", | ||
"iosfwd": "cpp", | ||
"iostream": "cpp", | ||
"istream": "cpp", | ||
"limits": "cpp", | ||
"mutex": "cpp", | ||
"new": "cpp", | ||
"numbers": "cpp", | ||
"ostream": "cpp", | ||
"semaphore": "cpp", | ||
"span": "cpp", | ||
"sstream": "cpp", | ||
"stdexcept": "cpp", | ||
"stop_token": "cpp", | ||
"streambuf": "cpp", | ||
"thread": "cpp", | ||
"cinttypes": "cpp", | ||
"typeinfo": "cpp", | ||
"variant": "cpp" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
cmake_minimum_required (VERSION 3.17) | ||
project (HybridSearch) | ||
|
||
set(CMAKE_CXX_STANDARD 20) | ||
set(CMAKE_CXX_STANDARD_REQUIRED ON) | ||
# Code intelligence use | ||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) | ||
|
||
include(FetchContent) | ||
|
||
FetchContent_Declare( | ||
fmt | ||
URL https://github.com/fmtlib/fmt/releases/download/10.2.1/fmt-10.2.1.zip | ||
DOWNLOAD_EXTRACT_TIMESTAMP true | ||
) | ||
FetchContent_MakeAvailable(fmt) | ||
|
||
if (CMAKE_BUILD_TYPE STREQUAL "Debug") | ||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -march=native -pthread -g -Wall -pthread -mavx2 -mfma") | ||
else() | ||
# AVX2 | ||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG -march=native -mtune=native -O3 -mavx2 -pthread -mfma -msse2 -ftree-vectorize -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free -fopenmp -fopenmp-simd -funroll-loops -Wfatal-errors -DUSE_AVX2") | ||
# set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -pthread -std=c++17 -fPIC -mavx -mavx2 -mfma") | ||
endif() | ||
|
||
find_package(OpenMP REQUIRED) | ||
if (OPENMP_FOUND) | ||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") | ||
endif() | ||
|
||
add_subdirectory(pyglass) | ||
|
||
# include_directories("pyglass") | ||
# add_subdirectory("pyglass") | ||
|
||
# option(BUILD_TEST "Build the executable test" OFF) | ||
# option(BUILD_LIB "Build the shared library" ON) | ||
|
||
# if (BUILD_LIB) | ||
# set(LIBRARY_NAME hybrid_search) | ||
|
||
# # 添加源文件 | ||
# set(SOURCES baseline.cpp) | ||
|
||
# # 编译生成共享库 | ||
# add_library(${LIBRARY_NAME} SHARED ${SOURCES}) | ||
# target_link_libraries(${LIBRARY_NAME} glass) | ||
# endif() | ||
|
||
# if (BUILD_TEST) | ||
# add_executable(test_alaya test_alaya.cpp) | ||
# endif() | ||
|
||
# add_executable(hybrid_search baseline.cpp) | ||
# target_link_libraries(hybrid_search glass) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
FROM ann-benchmarks | ||
# FROM docker.io/library/ubuntu:24.04 | ||
|
||
# RUN apt-get update | ||
|
||
# RUN apt-get install -y python3-numpy python3-scipy python3-pip build-essential git | ||
# RUN pip3 install -U pip | ||
# RUN python3 --version | grep 'Python 3.10' | ||
# WORKDIR /home/app | ||
# COPY requirements.txt run_algorithm.py ./ | ||
# RUN pip3 install -r requirements.txt | ||
# RUN apt update | ||
# RUN apt install -y software-properties-common | ||
# RUN add-apt-repository ppa:ubuntu-toolchain-r/test | ||
# RUN apt-get update | ||
# RUN apt-get install -y gcc-13 g++13 --fix-missing | ||
|
||
COPY ./ann_benchmarks/algorithms/alaya/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# SIGMOD Programming Contest 2024: Hybrid Vector Search | ||
|
||
This repository is an open-source code for the [SIGMOD 2024 Programming Contest](http://sigmodcontest2024.eastus.cloudapp.azure.com/index.shtml), which challenges participants to design and implement efficient and scalable algorithms for **Hybrid Vector Search** in high dimensional space. | ||
|
||
# Getting Started | ||
|
||
## Prerequirement | ||
|
||
- CMake >= 3.16 | ||
- G++ >= 9.4.0 | ||
- OpenMP >= 4.0 | ||
|
||
## Quick start | ||
We have provided a script (`run.sh`) for compiling and running. | ||
``` | ||
sh ./run.sh | ||
``` | ||
|
||
## Build | ||
Clone this repository to your local computer: | ||
``` | ||
git clone https://github.com/KevinZeng08/sigmod-2024-contest.git | ||
cd sigmod-2024-contest | ||
``` | ||
Create the build directory for compilation. | ||
``` | ||
rm -rf build | ||
mkdir build | ||
cd build | ||
``` | ||
The **dataset path** has been **hardcode** in the `baseline.cpp` for the contest as [example solution](http://sigmodcontest2024.eastus.cloudapp.azure.com/baseline/baseline.tar.gz). | ||
``` | ||
cmake -DCMAKE_BUILD_TYPE=Release .. | ||
make -j${nproc} | ||
``` | ||
|
||
## Run | ||
``` | ||
cd .. | ||
./build/hybrid_search | ||
``` | ||
|
||
# Team: Alaya (Southern University of Science and Technology, Zhejiang University) | ||
- Members: | ||
|
||
| Name | Email | Institutions | | ||
| ----------- | ----------- | -------------- | | ||
| Long Xiang | [email protected] | Southern University of Science and Technology | | ||
| Bowen Zeng | [email protected] | Zhejiang University| | ||
| Yu Lei | [email protected] | Zhejiang University | | ||
| Yujun He | [email protected] | Southern University of Science and Technology | | ||
| Weijian Chen | [email protected] | Southern University of Science and Technology | | ||
| Yitao Zheng | [email protected] | Southern University of Science and Technology | | ||
| Yanqi Chen | [email protected] | Southern University of Science and Technology | | ||
|
||
# Copyright | ||
|
||
Some source code adapted from [pyglass](https://github.com/zilliztech/pyglass/tree/master) | ||
|
||
pyglass is under the MIT-licensed. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
from module_bace import BaseANN | ||
|
||
import numpy | ||
import ctypes | ||
import random | ||
import time | ||
|
||
|
||
c_module = ctypes.CDLL('../build/libhybrid_search.so') | ||
|
||
c_fit = c_module.fit | ||
c_fit.argtypes = [ctypes.POINTER(ctypes.c_float), ctypes.c_int, ctypes.c_int] | ||
c_fit.restype = ctypes.c_void_p | ||
|
||
c_batch_query = c_module.batch_query | ||
c_batch_query.argtypes = [ctypes.POINTER(ctypes.c_float), ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.POINTER(ctypes.c_uint32)] | ||
c_batch_query.restype = ctypes.c_void_p | ||
|
||
|
||
class Alaya(BaseANN): | ||
def __init__(self, metric, test_param=0): | ||
self._test_param = test_param | ||
self._metric = metric | ||
self.name = "Alaya(test_param=%d)" % self._test_param | ||
|
||
def fit(self, X: numpy.array): | ||
print('fit starts') | ||
print(X) | ||
rows, cols = X.shape | ||
X_flat = X.flatten() | ||
X_ctypes = X_flat.ctypes.data_as(ctypes.POINTER(ctypes.c_float)) | ||
|
||
c_fit(X_ctypes, rows, cols) | ||
|
||
print('fit done') | ||
|
||
def query(self, v: numpy.array, n: int): | ||
raise NotImplementedError | ||
|
||
def batch_query(self, X: numpy.array, n: int) -> None: | ||
print('batch_query starts') | ||
print(X) | ||
rows, cols = X.shape | ||
X_flat = X.flatten() | ||
X_ctypes = X_flat.ctypes.data_as(ctypes.POINTER(ctypes.c_float)) | ||
A_ctypes = (ctypes.c_uint32 * (rows * n))() | ||
|
||
c_batch_query(X_ctypes, rows, cols, n, A_ctypes) | ||
|
||
self.res = numpy.frombuffer(A_ctypes, dtype=numpy.uint32) | ||
|
||
print('batch_query done') | ||
print(self.res) | ||
|
||
|
||
def main(): | ||
random.seed(time.time()) | ||
dataset = numpy.array([numpy.random.uniform(0,1000000,100) for i in range(100000)], dtype=numpy.float32) | ||
algo = Alaya(0) | ||
algo.fit(dataset) | ||
queries = numpy.array([numpy.random.uniform(0,1000000,100) for i in range(5)], dtype=numpy.float32) | ||
algo.batch_query(queries, 100) | ||
ans = numpy.array([[i, i + 1] for i in range(5)], dtype=numpy.uint32) | ||
assert ans == algo.res | ||
|
||
|
||
if __name__ == '__main__': | ||
main() | ||
|
||
|
||
|
||
|
||
|
||
|
Oops, something went wrong.