Skip to content

Commit

Permalink
Udpate alaya
Browse files Browse the repository at this point in the history
  • Loading branch information
Yitao committed Jul 10, 2024
1 parent cf50605 commit 4f8bb26
Show file tree
Hide file tree
Showing 33 changed files with 3,845 additions and 0 deletions.
46 changes: 46 additions & 0 deletions ann_benchmarks/algorithms/alaya/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Prerequisites
*.d

# Compiled Object files
*.slo
*.lo
*.o
*.obj

# Precompiled Headers
*.gch
*.pch

# Compiled Dynamic libraries
*.so
*.dylib
*.dll

# Fortran module files
*.mod
*.smod

# Compiled Static libraries
*.lai
*.la
*.a
*.lib

# Executables
*.exe
*.out
*.app

# Directories
build/

# Language related
.clangd
compile_commands.json

# data
output.bin

build

.cache
23 changes: 23 additions & 0 deletions ann_benchmarks/algorithms/alaya/.vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
// 使用 IntelliSense 了解相关属性。
// 悬停以查看现有属性的描述。
// 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "lldb",
"request": "launch",
"name": "Debug",
"program": "${workspaceFolder}/build/pyglass/Debug/main",
"args": [
"/home/ann-benchmark/dataset/sift/sift_base.fvecs",
"/home/ann-benchmark/dataset/sift/sift_query.fvecs",
"/home/ann-benchmark/dataset/sift/sift_groundtruth.ivecs",
"10",
"20",
"15",
],
"cwd": "${workspaceFolder}"
}
]
}
72 changes: 72 additions & 0 deletions ann_benchmarks/algorithms/alaya/.vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
{
"files.associations": {
"cctype": "cpp",
"clocale": "cpp",
"cmath": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"array": "cpp",
"atomic": "cpp",
"bit": "cpp",
"*.tcc": "cpp",
"bitset": "cpp",
"charconv": "cpp",
"chrono": "cpp",
"compare": "cpp",
"concepts": "cpp",
"condition_variable": "cpp",
"cstdint": "cpp",
"deque": "cpp",
"list": "cpp",
"map": "cpp",
"set": "cpp",
"string": "cpp",
"unordered_map": "cpp",
"unordered_set": "cpp",
"vector": "cpp",
"exception": "cpp",
"algorithm": "cpp",
"functional": "cpp",
"iterator": "cpp",
"memory": "cpp",
"memory_resource": "cpp",
"numeric": "cpp",
"optional": "cpp",
"random": "cpp",
"ratio": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"utility": "cpp",
"format": "cpp",
"fstream": "cpp",
"future": "cpp",
"initializer_list": "cpp",
"iomanip": "cpp",
"iosfwd": "cpp",
"iostream": "cpp",
"istream": "cpp",
"limits": "cpp",
"mutex": "cpp",
"new": "cpp",
"numbers": "cpp",
"ostream": "cpp",
"semaphore": "cpp",
"span": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"stop_token": "cpp",
"streambuf": "cpp",
"thread": "cpp",
"cinttypes": "cpp",
"typeinfo": "cpp",
"variant": "cpp"
}
}
55 changes: 55 additions & 0 deletions ann_benchmarks/algorithms/alaya/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
cmake_minimum_required (VERSION 3.17)
project (HybridSearch)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# Code intelligence use
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

include(FetchContent)

FetchContent_Declare(
fmt
URL https://github.com/fmtlib/fmt/releases/download/10.2.1/fmt-10.2.1.zip
DOWNLOAD_EXTRACT_TIMESTAMP true
)
FetchContent_MakeAvailable(fmt)

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -march=native -pthread -g -Wall -pthread -mavx2 -mfma")
else()
# AVX2
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG -march=native -mtune=native -O3 -mavx2 -pthread -mfma -msse2 -ftree-vectorize -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free -fopenmp -fopenmp-simd -funroll-loops -Wfatal-errors -DUSE_AVX2")
# set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -pthread -std=c++17 -fPIC -mavx -mavx2 -mfma")
endif()

find_package(OpenMP REQUIRED)
if (OPENMP_FOUND)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif()

add_subdirectory(pyglass)

# include_directories("pyglass")
# add_subdirectory("pyglass")

# option(BUILD_TEST "Build the executable test" OFF)
# option(BUILD_LIB "Build the shared library" ON)

# if (BUILD_LIB)
# set(LIBRARY_NAME hybrid_search)

# # 添加源文件
# set(SOURCES baseline.cpp)

# # 编译生成共享库
# add_library(${LIBRARY_NAME} SHARED ${SOURCES})
# target_link_libraries(${LIBRARY_NAME} glass)
# endif()

# if (BUILD_TEST)
# add_executable(test_alaya test_alaya.cpp)
# endif()

# add_executable(hybrid_search baseline.cpp)
# target_link_libraries(hybrid_search glass)
18 changes: 18 additions & 0 deletions ann_benchmarks/algorithms/alaya/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM ann-benchmarks
# FROM docker.io/library/ubuntu:24.04

# RUN apt-get update

# RUN apt-get install -y python3-numpy python3-scipy python3-pip build-essential git
# RUN pip3 install -U pip
# RUN python3 --version | grep 'Python 3.10'
# WORKDIR /home/app
# COPY requirements.txt run_algorithm.py ./
# RUN pip3 install -r requirements.txt
# RUN apt update
# RUN apt install -y software-properties-common
# RUN add-apt-repository ppa:ubuntu-toolchain-r/test
# RUN apt-get update
# RUN apt-get install -y gcc-13 g++13 --fix-missing

COPY ./ann_benchmarks/algorithms/alaya/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/
60 changes: 60 additions & 0 deletions ann_benchmarks/algorithms/alaya/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# SIGMOD Programming Contest 2024: Hybrid Vector Search

This repository is an open-source code for the [SIGMOD 2024 Programming Contest](http://sigmodcontest2024.eastus.cloudapp.azure.com/index.shtml), which challenges participants to design and implement efficient and scalable algorithms for **Hybrid Vector Search** in high dimensional space.

# Getting Started

## Prerequirement

- CMake >= 3.16
- G++ >= 9.4.0
- OpenMP >= 4.0

## Quick start
We have provided a script (`run.sh`) for compiling and running.
```
sh ./run.sh
```

## Build
Clone this repository to your local computer:
```
git clone https://github.com/KevinZeng08/sigmod-2024-contest.git
cd sigmod-2024-contest
```
Create the build directory for compilation.
```
rm -rf build
mkdir build
cd build
```
The **dataset path** has been **hardcode** in the `baseline.cpp` for the contest as [example solution](http://sigmodcontest2024.eastus.cloudapp.azure.com/baseline/baseline.tar.gz).
```
cmake -DCMAKE_BUILD_TYPE=Release ..
make -j${nproc}
```

## Run
```
cd ..
./build/hybrid_search
```

# Team: Alaya (Southern University of Science and Technology, Zhejiang University)
- Members:

| Name | Email | Institutions |
| ----------- | ----------- | -------------- |
| Long Xiang | [email protected] | Southern University of Science and Technology |
| Bowen Zeng | [email protected] | Zhejiang University|
| Yu Lei | [email protected] | Zhejiang University |
| Yujun He | [email protected] | Southern University of Science and Technology |
| Weijian Chen | [email protected] | Southern University of Science and Technology |
| Yitao Zheng | [email protected] | Southern University of Science and Technology |
| Yanqi Chen | [email protected] | Southern University of Science and Technology |

# Copyright

Some source code adapted from [pyglass](https://github.com/zilliztech/pyglass/tree/master)

pyglass is under the MIT-licensed.
74 changes: 74 additions & 0 deletions ann_benchmarks/algorithms/alaya/ann_test/module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from module_bace import BaseANN

import numpy
import ctypes
import random
import time


c_module = ctypes.CDLL('../build/libhybrid_search.so')

c_fit = c_module.fit
c_fit.argtypes = [ctypes.POINTER(ctypes.c_float), ctypes.c_int, ctypes.c_int]
c_fit.restype = ctypes.c_void_p

c_batch_query = c_module.batch_query
c_batch_query.argtypes = [ctypes.POINTER(ctypes.c_float), ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.POINTER(ctypes.c_uint32)]
c_batch_query.restype = ctypes.c_void_p


class Alaya(BaseANN):
def __init__(self, metric, test_param=0):
self._test_param = test_param
self._metric = metric
self.name = "Alaya(test_param=%d)" % self._test_param

def fit(self, X: numpy.array):
print('fit starts')
print(X)
rows, cols = X.shape
X_flat = X.flatten()
X_ctypes = X_flat.ctypes.data_as(ctypes.POINTER(ctypes.c_float))

c_fit(X_ctypes, rows, cols)

print('fit done')

def query(self, v: numpy.array, n: int):
raise NotImplementedError

def batch_query(self, X: numpy.array, n: int) -> None:
print('batch_query starts')
print(X)
rows, cols = X.shape
X_flat = X.flatten()
X_ctypes = X_flat.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
A_ctypes = (ctypes.c_uint32 * (rows * n))()

c_batch_query(X_ctypes, rows, cols, n, A_ctypes)

self.res = numpy.frombuffer(A_ctypes, dtype=numpy.uint32)

print('batch_query done')
print(self.res)


def main():
random.seed(time.time())
dataset = numpy.array([numpy.random.uniform(0,1000000,100) for i in range(100000)], dtype=numpy.float32)
algo = Alaya(0)
algo.fit(dataset)
queries = numpy.array([numpy.random.uniform(0,1000000,100) for i in range(5)], dtype=numpy.float32)
algo.batch_query(queries, 100)
ans = numpy.array([[i, i + 1] for i in range(5)], dtype=numpy.uint32)
assert ans == algo.res


if __name__ == '__main__':
main()






Loading

0 comments on commit 4f8bb26

Please sign in to comment.