diff --git a/ann_benchmarks/algorithms/nsg/CMakeLists.txt b/ann_benchmarks/algorithms/nsg/CMakeLists.txt index e69de29bb..84ba572b1 100644 --- a/ann_benchmarks/algorithms/nsg/CMakeLists.txt +++ b/ann_benchmarks/algorithms/nsg/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 2.8) + +project(efanna2e) +include_directories(${PROJECT_SOURCE_DIR}/include) +#OpenMP +find_package(OpenMP) +if (OPENMP_FOUND) + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +else() + message(FATAL_ERROR "no OpenMP supprot") +endif() +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free") +add_definitions (-std=c++17 -O3 -lboost -march=native -Wall -DINFO) + +add_subdirectory(src) +add_subdirectory(tests) diff --git a/ann_benchmarks/algorithms/nsg/include/efanna2e/distance.h b/ann_benchmarks/algorithms/nsg/include/efanna2e/distance.h new file mode 100644 index 000000000..aa7f8f499 --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/include/efanna2e/distance.h @@ -0,0 +1,328 @@ +// +// Created by 付聪 on 2017/6/21. +// + +#ifndef EFANNA2E_DISTANCE_H +#define EFANNA2E_DISTANCE_H + +#include +#include +namespace efanna2e{ + enum Metric{ + L2 = 0, + INNER_PRODUCT = 1, + FAST_L2 = 2, + PQ = 3 + }; + class Distance { + public: + virtual float compare(const float* a, const float* b, unsigned length) const = 0; + virtual ~Distance() {} + }; + + class DistanceL2 : public Distance{ + public: + float compare(const float* a, const float* b, unsigned size) const { + float result = 0; + +#ifdef __GNUC__ +#ifdef __AVX__ + + #define AVX_L2SQR(addr1, addr2, dest, tmp1, tmp2) \ + tmp1 = _mm256_loadu_ps(addr1);\ + tmp2 = _mm256_loadu_ps(addr2);\ + tmp1 = _mm256_sub_ps(tmp1, tmp2); \ + tmp1 = _mm256_mul_ps(tmp1, tmp1); \ + dest = _mm256_add_ps(dest, tmp1); + + __m256 sum; + __m256 l0, l1; + __m256 r0, r1; + unsigned D = (size + 7) & ~7U; + unsigned DR = D % 16; + unsigned DD = D - DR; + const float *l = a; + const float *r = b; + const float *e_l = l + DD; + const float *e_r = r + DD; + float unpack[8] __attribute__ ((aligned (32))) = {0, 0, 0, 0, 0, 0, 0, 0}; + + sum = _mm256_loadu_ps(unpack); + if(DR){AVX_L2SQR(e_l, e_r, sum, l0, r0);} + + for (unsigned i = 0; i < DD; i += 16, l += 16, r += 16) { + AVX_L2SQR(l, r, sum, l0, r0); + AVX_L2SQR(l + 8, r + 8, sum, l1, r1); + } + _mm256_storeu_ps(unpack, sum); + result = unpack[0] + unpack[1] + unpack[2] + unpack[3] + unpack[4] + unpack[5] + unpack[6] + unpack[7]; + +#else +#ifdef __SSE2__ + #define SSE_L2SQR(addr1, addr2, dest, tmp1, tmp2) \ + tmp1 = _mm_load_ps(addr1);\ + tmp2 = _mm_load_ps(addr2);\ + tmp1 = _mm_sub_ps(tmp1, tmp2); \ + tmp1 = _mm_mul_ps(tmp1, tmp1); \ + dest = _mm_add_ps(dest, tmp1); + + __m128 sum; + __m128 l0, l1, l2, l3; + __m128 r0, r1, r2, r3; + unsigned D = (size + 3) & ~3U; + unsigned DR = D % 16; + unsigned DD = D - DR; + const float *l = a; + const float *r = b; + const float *e_l = l + DD; + const float *e_r = r + DD; + float unpack[4] __attribute__ ((aligned (16))) = {0, 0, 0, 0}; + + sum = _mm_load_ps(unpack); + switch (DR) { + case 12: + SSE_L2SQR(e_l+8, e_r+8, sum, l2, r2); + case 8: + SSE_L2SQR(e_l+4, e_r+4, sum, l1, r1); + case 4: + SSE_L2SQR(e_l, e_r, sum, l0, r0); + default: + break; + } + for (unsigned i = 0; i < DD; i += 16, l += 16, r += 16) { + SSE_L2SQR(l, r, sum, l0, r0); + SSE_L2SQR(l + 4, r + 4, sum, l1, r1); + SSE_L2SQR(l + 8, r + 8, sum, l2, r2); + SSE_L2SQR(l + 12, r + 12, sum, l3, r3); + } + _mm_storeu_ps(unpack, sum); + result += unpack[0] + unpack[1] + unpack[2] + unpack[3]; + +//nomal distance +#else + + float diff0, diff1, diff2, diff3; + const float* last = a + size; + const float* unroll_group = last - 3; + + /* Process 4 items with each loop for efficiency. */ + while (a < unroll_group) { + diff0 = a[0] - b[0]; + diff1 = a[1] - b[1]; + diff2 = a[2] - b[2]; + diff3 = a[3] - b[3]; + result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3; + a += 4; + b += 4; + } + /* Process last 0-3 pixels. Not needed for standard vector lengths. */ + while (a < last) { + diff0 = *a++ - *b++; + result += diff0 * diff0; + } +#endif +#endif +#endif + + return result; + } + }; + + class DistanceInnerProduct : public Distance{ + public: + float compare(const float* a, const float* b, unsigned size) const { + float result = 0; +#ifdef __GNUC__ +#ifdef __AVX__ + #define AVX_DOT(addr1, addr2, dest, tmp1, tmp2) \ + tmp1 = _mm256_loadu_ps(addr1);\ + tmp2 = _mm256_loadu_ps(addr2);\ + tmp1 = _mm256_mul_ps(tmp1, tmp2); \ + dest = _mm256_add_ps(dest, tmp1); + + __m256 sum; + __m256 l0, l1; + __m256 r0, r1; + unsigned D = (size + 7) & ~7U; + unsigned DR = D % 16; + unsigned DD = D - DR; + const float *l = a; + const float *r = b; + const float *e_l = l + DD; + const float *e_r = r + DD; + float unpack[8] __attribute__ ((aligned (32))) = {0, 0, 0, 0, 0, 0, 0, 0}; + + sum = _mm256_loadu_ps(unpack); + if(DR){AVX_DOT(e_l, e_r, sum, l0, r0);} + + for (unsigned i = 0; i < DD; i += 16, l += 16, r += 16) { + AVX_DOT(l, r, sum, l0, r0); + AVX_DOT(l + 8, r + 8, sum, l1, r1); + } + _mm256_storeu_ps(unpack, sum); + result = unpack[0] + unpack[1] + unpack[2] + unpack[3] + unpack[4] + unpack[5] + unpack[6] + unpack[7]; + +#else +#ifdef __SSE2__ + #define SSE_DOT(addr1, addr2, dest, tmp1, tmp2) \ + tmp1 = _mm128_loadu_ps(addr1);\ + tmp2 = _mm128_loadu_ps(addr2);\ + tmp1 = _mm128_mul_ps(tmp1, tmp2); \ + dest = _mm128_add_ps(dest, tmp1); + __m128 sum; + __m128 l0, l1, l2, l3; + __m128 r0, r1, r2, r3; + unsigned D = (size + 3) & ~3U; + unsigned DR = D % 16; + unsigned DD = D - DR; + const float *l = a; + const float *r = b; + const float *e_l = l + DD; + const float *e_r = r + DD; + float unpack[4] __attribute__ ((aligned (16))) = {0, 0, 0, 0}; + + sum = _mm_load_ps(unpack); + switch (DR) { + case 12: + SSE_DOT(e_l+8, e_r+8, sum, l2, r2); + case 8: + SSE_DOT(e_l+4, e_r+4, sum, l1, r1); + case 4: + SSE_DOT(e_l, e_r, sum, l0, r0); + default: + break; + } + for (unsigned i = 0; i < DD; i += 16, l += 16, r += 16) { + SSE_DOT(l, r, sum, l0, r0); + SSE_DOT(l + 4, r + 4, sum, l1, r1); + SSE_DOT(l + 8, r + 8, sum, l2, r2); + SSE_DOT(l + 12, r + 12, sum, l3, r3); + } + _mm_storeu_ps(unpack, sum); + result += unpack[0] + unpack[1] + unpack[2] + unpack[3]; +#else + + float dot0, dot1, dot2, dot3; + const float* last = a + size; + const float* unroll_group = last - 3; + + /* Process 4 items with each loop for efficiency. */ + while (a < unroll_group) { + dot0 = a[0] * b[0]; + dot1 = a[1] * b[1]; + dot2 = a[2] * b[2]; + dot3 = a[3] * b[3]; + result += dot0 + dot1 + dot2 + dot3; + a += 4; + b += 4; + } + /* Process last 0-3 pixels. Not needed for standard vector lengths. */ + while (a < last) { + result += *a++ * *b++; + } +#endif +#endif +#endif + return result; + } + + }; + class DistanceFastL2 : public DistanceInnerProduct{ + public: + float norm(const float* a, unsigned size) const{ + float result = 0; +#ifdef __GNUC__ +#ifdef __AVX__ +#define AVX_L2NORM(addr, dest, tmp) \ + tmp = _mm256_loadu_ps(addr); \ + tmp = _mm256_mul_ps(tmp, tmp); \ + dest = _mm256_add_ps(dest, tmp); + + __m256 sum; + __m256 l0, l1; + unsigned D = (size + 7) & ~7U; + unsigned DR = D % 16; + unsigned DD = D - DR; + const float *l = a; + const float *e_l = l + DD; + float unpack[8] __attribute__ ((aligned (32))) = {0, 0, 0, 0, 0, 0, 0, 0}; + + sum = _mm256_loadu_ps(unpack); + if(DR){AVX_L2NORM(e_l, sum, l0);} + for (unsigned i = 0; i < DD; i += 16, l += 16) { + AVX_L2NORM(l, sum, l0); + AVX_L2NORM(l + 8, sum, l1); + } + _mm256_storeu_ps(unpack, sum); + result = unpack[0] + unpack[1] + unpack[2] + unpack[3] + unpack[4] + unpack[5] + unpack[6] + unpack[7]; +#else +#ifdef __SSE2__ +#define SSE_L2NORM(addr, dest, tmp) \ + tmp = _mm128_loadu_ps(addr); \ + tmp = _mm128_mul_ps(tmp, tmp); \ + dest = _mm128_add_ps(dest, tmp); + + __m128 sum; + __m128 l0, l1, l2, l3; + unsigned D = (size + 3) & ~3U; + unsigned DR = D % 16; + unsigned DD = D - DR; + const float *l = a; + const float *e_l = l + DD; + float unpack[4] __attribute__ ((aligned (16))) = {0, 0, 0, 0}; + + sum = _mm_load_ps(unpack); + switch (DR) { + case 12: + SSE_L2NORM(e_l+8, sum, l2); + case 8: + SSE_L2NORM(e_l+4, sum, l1); + case 4: + SSE_L2NORM(e_l, sum, l0); + default: + break; + } + for (unsigned i = 0; i < DD; i += 16, l += 16) { + SSE_L2NORM(l, sum, l0); + SSE_L2NORM(l + 4, sum, l1); + SSE_L2NORM(l + 8, sum, l2); + SSE_L2NORM(l + 12, sum, l3); + } + _mm_storeu_ps(unpack, sum); + result += unpack[0] + unpack[1] + unpack[2] + unpack[3]; +#else + float dot0, dot1, dot2, dot3; + const float* last = a + size; + const float* unroll_group = last - 3; + + /* Process 4 items with each loop for efficiency. */ + while (a < unroll_group) { + dot0 = a[0] * a[0]; + dot1 = a[1] * a[1]; + dot2 = a[2] * a[2]; + dot3 = a[3] * a[3]; + result += dot0 + dot1 + dot2 + dot3; + a += 4; + } + /* Process last 0-3 pixels. Not needed for standard vector lengths. */ + while (a < last) { + result += (*a) * (*a); + a++; + } +#endif +#endif +#endif + return result; + } + using DistanceInnerProduct::compare; + float compare(const float* a, const float* b, float norm, unsigned size) const {//not implement + float result = -2 * DistanceInnerProduct::compare(a, b, size); + result += norm; + return result; + } + }; +} + + + +#endif //EFANNA2E_DISTANCE_H diff --git a/ann_benchmarks/algorithms/nsg/include/efanna2e/exceptions.h b/ann_benchmarks/algorithms/nsg/include/efanna2e/exceptions.h new file mode 100644 index 000000000..25c1515dc --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/include/efanna2e/exceptions.h @@ -0,0 +1,21 @@ +// +// Copyright (c) 2017 ZJULearning. All rights reserved. +// +// This source code is licensed under the MIT license. +// + +#ifndef EFANNA2E_EXCEPTIONS_H +#define EFANNA2E_EXCEPTIONS_H + +#include + +namespace efanna2e { + +class NotImplementedException : public std::logic_error { + public: + NotImplementedException() : std::logic_error("Function not yet implemented.") {} +}; + +} + +#endif //EFANNA2E_EXCEPTIONS_H diff --git a/ann_benchmarks/algorithms/nsg/include/efanna2e/index.h b/ann_benchmarks/algorithms/nsg/include/efanna2e/index.h new file mode 100644 index 000000000..39a353229 --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/include/efanna2e/index.h @@ -0,0 +1,56 @@ +// +// Copyright (c) 2017 ZJULearning. All rights reserved. +// +// This source code is licensed under the MIT license. +// + +#ifndef EFANNA2E_INDEX_H +#define EFANNA2E_INDEX_H + +#include +#include +#include +#include +#include "distance.h" +#include "parameters.h" + +namespace efanna2e { + +class Index { + public: + Index(const size_t dimension, const size_t n, Metric metric); + + + virtual ~Index(); + + virtual void Build(size_t n, const float *data, const Parameters ¶meters) = 0; + + virtual void Search( + const float *query, + const float *x, + size_t k, + const Parameters ¶meters, + unsigned *indices) = 0; + + virtual void Save(const char *filename) = 0; + + virtual void Load(const char *filename) = 0; + + inline bool HasBuilt() const { return has_built; } + + inline size_t GetDimension() const { return dimension_; }; + + inline size_t GetSizeOfDataset() const { return nd_; } + + inline const float *GetDataset() const { return data_; } + protected: + const size_t dimension_; + const float *data_ = nullptr; + size_t nd_; + bool has_built; + Distance* distance_ = nullptr; +}; + +} + +#endif //EFANNA2E_INDEX_H diff --git a/ann_benchmarks/algorithms/nsg/include/efanna2e/index_graph.h b/ann_benchmarks/algorithms/nsg/include/efanna2e/index_graph.h new file mode 100644 index 000000000..9196de30a --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/include/efanna2e/index_graph.h @@ -0,0 +1,76 @@ +// +// Copyright (c) 2017 ZJULearning. All rights reserved. +// +// This source code is licensed under the MIT license. +// + +#ifndef EFANNA2E_INDEX_GRAPH_H +#define EFANNA2E_INDEX_GRAPH_H + +#include +#include +#include +#include +#include +#include "util.h" +#include "parameters.h" +#include "neighbor.h" +#include "index.h" + + +namespace efanna2e { + +class IndexGraph : public Index { + public: + explicit IndexGraph(const size_t dimension, const size_t n, Metric m, Index *initializer); + + + virtual ~IndexGraph(); + + virtual void Save(const char *filename)override; + virtual void Load(const char *filename)override; + + + virtual void Build(size_t n, const float *data, const Parameters ¶meters) override; + + virtual void Search( + const float *query, + const float *x, + size_t k, + const Parameters ¶meters, + unsigned *indices) override; + + void GraphAdd(const float* data, unsigned n, unsigned dim, const Parameters ¶meters); + void RefineGraph(const float* data, const Parameters ¶meters); + + protected: + typedef std::vector KNNGraph; + typedef std::vector > CompactGraph; + typedef std::vector LockGraph; + + Index *initializer_; + KNNGraph graph_; + CompactGraph final_graph_; + + + + private: + void InitializeGraph(const Parameters ¶meters); + void InitializeGraph_Refine(const Parameters ¶meters); + void NNDescent(const Parameters ¶meters); + void join(); + void update(const Parameters ¶meters); + void generate_control_set(std::vector &c, + std::vector > &v, + unsigned N); + void eval_recall(std::vector& ctrl_points, std::vector > &acc_eval_set); + void get_neighbor_to_add(const float* point, const Parameters ¶meters, LockGraph& g, + std::mt19937& rng, std::vector& retset, unsigned n_total); + void compact_to_Lockgraph(LockGraph &g); + void parallel_graph_insert(unsigned id, Neighbor nn, LockGraph& g, size_t K); + +}; + +} + +#endif //EFANNA2E_INDEX_GRAPH_H diff --git a/ann_benchmarks/algorithms/nsg/include/efanna2e/index_nsg.h b/ann_benchmarks/algorithms/nsg/include/efanna2e/index_nsg.h new file mode 100644 index 000000000..98c38985d --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/include/efanna2e/index_nsg.h @@ -0,0 +1,85 @@ +#ifndef EFANNA2E_INDEX_NSG_H +#define EFANNA2E_INDEX_NSG_H + +#include "util.h" +#include "parameters.h" +#include "neighbor.h" +#include "index.h" +#include +#include +#include +#include +#include +#include + +namespace efanna2e { + +class IndexNSG : public Index { + public: + explicit IndexNSG(const size_t dimension, const size_t n, Metric m, Index *initializer); + + + virtual ~IndexNSG(); + + virtual void Save(const char *filename)override; + virtual void Load(const char *filename)override; + + + virtual void Build(size_t n, const float *data, const Parameters ¶meters) override; + + virtual void Search( + const float *query, + const float *x, + size_t k, + const Parameters ¶meters, + unsigned *indices) override; + void SearchWithOptGraph( + const float *query, + size_t K, + const Parameters ¶meters, + unsigned *indices); + void OptimizeGraph(float* data); + + protected: + typedef std::vector > CompactGraph; + typedef std::vector LockGraph; + typedef std::vector KNNGraph; + + CompactGraph final_graph_; + + Index *initializer_ = nullptr; + void init_graph(const Parameters ¶meters); + void get_neighbors( + const float *query, + const Parameters ¶meter, + std::vector &retset, + std::vector &fullset); + void get_neighbors( + const float *query, + const Parameters ¶meter, + boost::dynamic_bitset<>& flags, + std::vector &retset, + std::vector &fullset); + //void add_cnn(unsigned des, Neighbor p, unsigned range, LockGraph& cut_graph_); + void InterInsert(unsigned n, unsigned range, std::vector& locks, SimpleNeighbor* cut_graph_); + void sync_prune(unsigned q, std::vector& pool, const Parameters ¶meter, boost::dynamic_bitset<>& flags, SimpleNeighbor* cut_graph_); + void Link(const Parameters ¶meters, SimpleNeighbor* cut_graph_); + void Load_nn_graph(const char *filename); + void tree_grow(const Parameters ¶meter); + void DFS(boost::dynamic_bitset<> &flag, unsigned root, unsigned &cnt); + void findroot(boost::dynamic_bitset<> &flag, unsigned &root, const Parameters ¶meter); + + + private: + unsigned width; + unsigned ep_; + std::vector locks; + char* opt_graph_ = nullptr; + size_t node_size; + size_t data_len; + size_t neighbor_len; + KNNGraph nnd_graph; +}; +} + +#endif //EFANNA2E_INDEX_NSG_H diff --git a/ann_benchmarks/algorithms/nsg/include/efanna2e/index_random.h b/ann_benchmarks/algorithms/nsg/include/efanna2e/index_random.h new file mode 100644 index 000000000..d235a6352 --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/include/efanna2e/index_random.h @@ -0,0 +1,35 @@ +// +// Copyright (c) 2017 ZJULearning. All rights reserved. +// +// This source code is licensed under the MIT license. +// + +#ifndef EFANNA2E_INDEX_RANDOM_H +#define EFANNA2E_INDEX_RANDOM_H + +#include "index.h" +#include "util.h" + +namespace efanna2e { + +class IndexRandom : public Index { +public: + IndexRandom(const size_t dimension, const size_t n); + virtual ~IndexRandom(); + std::mt19937 rng; + void Save(const char *filename)override{} + void Load(const char *filename)override{} + virtual void Build(size_t n, const float *data, const Parameters ¶meters) override; + + virtual void Search( + const float *query, + const float *x, + size_t k, + const Parameters ¶meters, + unsigned *indices) override ; + +}; + +} + +#endif //EFANNA2E_INDEX_RANDOM_H diff --git a/ann_benchmarks/algorithms/nsg/include/efanna2e/neighbor.h b/ann_benchmarks/algorithms/nsg/include/efanna2e/neighbor.h new file mode 100644 index 000000000..d4fab7fd3 --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/include/efanna2e/neighbor.h @@ -0,0 +1,139 @@ +// +// Copyright (c) 2017 ZJULearning. All rights reserved. +// +// This source code is licensed under the MIT license. +// + +#ifndef EFANNA2E_GRAPH_H +#define EFANNA2E_GRAPH_H + +#include +#include +#include + +namespace efanna2e { + +struct Neighbor { + unsigned id; + float distance; + bool flag; + + Neighbor() = default; + Neighbor(unsigned id, float distance, bool f) : id{id}, distance{distance}, flag(f) {} + + inline bool operator<(const Neighbor &other) const { + return distance < other.distance; + } +}; + +typedef std::lock_guard LockGuard; +struct nhood{ + std::mutex lock; + std::vector pool; + unsigned M; + + std::vector nn_old; + std::vector nn_new; + std::vector rnn_old; + std::vector rnn_new; + + nhood(){} + nhood(unsigned l, unsigned s, std::mt19937 &rng, unsigned N){ + M = s; + nn_new.resize(s * 2); + GenRandom(rng, &nn_new[0], (unsigned)nn_new.size(), N); + nn_new.reserve(s * 2); + pool.reserve(l); + } + + nhood(const nhood &other){ + M = other.M; + std::copy(other.nn_new.begin(), other.nn_new.end(), std::back_inserter(nn_new)); + nn_new.reserve(other.nn_new.capacity()); + pool.reserve(other.pool.capacity()); + } + void insert (unsigned id, float dist) { + LockGuard guard(lock); + if (dist > pool.front().distance) return; + for(unsigned i=0; i + void join (C callback) const { + for (unsigned const i: nn_new) { + for (unsigned const j: nn_new) { + if (i < j) { + callback(i, j); + } + } + for (unsigned j: nn_old) { + callback(i, j); + } + } + } +}; + +struct LockNeighbor{ + std::mutex lock; + std::vector pool; +}; + +struct SimpleNeighbor{ + unsigned id; + float distance; + + SimpleNeighbor() = default; + SimpleNeighbor(unsigned id, float distance) : id{id}, distance{distance}{} + + inline bool operator<(const SimpleNeighbor &other) const { + return distance < other.distance; + } +}; +struct SimpleNeighbors{ + std::vector pool; +}; + +static inline int InsertIntoPool (Neighbor *addr, unsigned K, Neighbor nn) { + // find the location to insert + int left=0,right=K-1; + if(addr[left].distance>nn.distance){ + memmove((char *)&addr[left+1], &addr[left],K * sizeof(Neighbor)); + addr[left] = nn; + return left; + } + if(addr[right].distancenn.distance)right=mid; + else left=mid; + } + //check equal ID + + while (left > 0){ + if (addr[left].distance < nn.distance) break; + if (addr[left].id == nn.id) return K + 1; + left--; + } + if(addr[left].id == nn.id||addr[right].id==nn.id)return K+1; + memmove((char *)&addr[right+1], &addr[right],(K-right) * sizeof(Neighbor)); + addr[right]=nn; + return right; +} + +} + +#endif //EFANNA2E_GRAPH_H diff --git a/ann_benchmarks/algorithms/nsg/include/efanna2e/parameters.h b/ann_benchmarks/algorithms/nsg/include/efanna2e/parameters.h new file mode 100644 index 000000000..817666905 --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/include/efanna2e/parameters.h @@ -0,0 +1,61 @@ +// +// Copyright (c) 2017 ZJULearning. All rights reserved. +// +// This source code is licensed under the MIT license. +// + +#ifndef EFANNA2E_PARAMETERS_H +#define EFANNA2E_PARAMETERS_H + +#include +#include +#include +namespace efanna2e { + +class Parameters { + public: + template + inline void Set(const std::string &name, const ParamType &value) { + std::stringstream sstream; + sstream << value; + params[name] = sstream.str(); + } + + template + inline ParamType Get(const std::string &name) const { + auto item = params.find(name); + if (item == params.end()) { + throw std::invalid_argument("Invalid parameter name."); + } else { + return ConvertStrToValue(item->second); + } + } + + template + inline ParamType Get(const std::string &name, const ParamType &default_value) { + try { + return Get(name); + } catch (std::invalid_argument e) { + return default_value; + } + } + private: + std::unordered_map params; + + template + inline ParamType ConvertStrToValue(const std::string &str) const { + std::stringstream sstream(str); + ParamType value; + if (!(sstream >> value) || !sstream.eof()) { + std::stringstream err; + err << "Failed to convert value '" << str << "' to type: " << typeid(value).name(); + throw std::runtime_error(err.str()); + } + return value; + } + +}; + +} + +#endif //EFANNA2E_PARAMETERS_H diff --git a/ann_benchmarks/algorithms/nsg/include/efanna2e/util.h b/ann_benchmarks/algorithms/nsg/include/efanna2e/util.h new file mode 100644 index 000000000..f6f563973 --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/include/efanna2e/util.h @@ -0,0 +1,71 @@ +// +// Created by 付聪 on 2017/6/21. +// + +#ifndef EFANNA2E_UTIL_H +#define EFANNA2E_UTIL_H +#include +#include +#include +#include +#ifdef __APPLE__ +#else +#include +#endif +namespace efanna2e { + + static void GenRandom(std::mt19937 &rng, unsigned *addr, unsigned size, unsigned N) { + for (unsigned i = 0; i < size; ++i) { + addr[i] = rng() % (N - size); + } + std::sort(addr, addr + size); + for (unsigned i = 1; i < size; ++i) { + if (addr[i] <= addr[i - 1]) { + addr[i] = addr[i - 1] + 1; + } + } + unsigned off = rng() % N; + for (unsigned i = 0; i < size; ++i) { + addr[i] = (addr[i] + off) % N; + } + } + + inline float* data_align(float* data_ori, unsigned point_num, unsigned& dim){ + #ifdef __GNUC__ + #ifdef __AVX__ + #define DATA_ALIGN_FACTOR 8 + #else + #ifdef __SSE2__ + #define DATA_ALIGN_FACTOR 4 + #else + #define DATA_ALIGN_FACTOR 1 + #endif + #endif + #endif + + //std::cout << "align with : "< +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include + +namespace py = pybind11; +using namespace pybind11::literals; // needed to bring in _a literal + +inline void get_input_array_shapes(const py::buffer_info &buffer, size_t *rows, + size_t *features) { + if (buffer.ndim != 2 && buffer.ndim != 1) { + char msg[256]; + snprintf(msg, sizeof(msg), + "Input vector data wrong shape. Number of dimensions %d. Data " + "must be a 1D or 2D array.", + buffer.ndim); + } + if (buffer.ndim == 2) { + *rows = buffer.shape[0]; + *features = buffer.shape[1]; + } else { + *rows = 1; + *features = buffer.shape[0]; + } + +} + +void set_num_threads(int num_threads) { omp_set_num_threads(num_threads); } + +struct Index { + std::unique_ptr index = nullptr; + + typedef std::vector > CompactGraph; + + Index(int dim, efanna2e::Metric metric, + int R = 32, int L = 200 , int n) { + + efanna2e::IndexRandom init_index(dim , n); + + index = std::make_unique(dim, R, efanna2e::L2, (efanna2e::Index*)(&init_index)); + + } + + CompactGraph build(py::object input , py::object paras) { + py::array_t items(input); + auto buffer = items.request(); + size_t rows, features; + get_input_array_shapes(buffer, &rows, &features); + + py::array_t indices(paras); + efanna2e + + float *vector_data = (float *)items.data(0); + index->Build(vector_data, rows); + return Graph(index->GetGraph()); + } +}; diff --git a/ann_benchmarks/algorithms/nsg/python/setup.py b/ann_benchmarks/algorithms/nsg/python/setup.py new file mode 100644 index 000000000..9ef59dbee --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/python/setup.py @@ -0,0 +1,117 @@ +import os +import sys +import platform + +import numpy as np +import pybind11 +import setuptools +from setuptools import Extension, setup +from setuptools.command.build_ext import build_ext + +__version__ = '1.0.3' + + +include_dirs = [ + pybind11.get_include(), + np.get_include(), +] + +# compatibility when run in python_bindings +bindings_dir = 'python' +if bindings_dir in os.path.basename(os.getcwd()): + source_files = ['./bindings.cc'] + include_dirs.extend(['../']) +else: + source_files = ['./python/bindings.cc'] + include_dirs.extend(['./']) + + +libraries = [] +extra_objects = [] + + +ext_modules = [ + Extension( + 'glassppy', + source_files, + include_dirs=include_dirs, + libraries=libraries, + language='c++', + extra_objects=extra_objects, + ), +] + + +def has_flag(compiler, flagname): + """Return a boolean indicating whether a flag name is supported on + the specified compiler. + """ + import tempfile + with tempfile.NamedTemporaryFile('w', suffix='.cpp') as f: + f.write('int main (int argc, char **argv) { return 0; }') + try: + compiler.compile([f.name], extra_postargs=[flagname]) + except setuptools.distutils.errors.CompileError: + return False + return True + + +def cpp_flag(compiler): + """Return the -std=c++[11/14/17/20] compiler flag. + """ + if has_flag(compiler, '-std=c++17'): + return '-std=c++17' + elif has_flag(compiler, '-std=c++14'): + return '-std=c++14' + elif has_flag(compiler, '-std=c++11'): + return '-std=c++11' + else: + raise RuntimeError('Unsupported compiler -- at least C++11 support ' + 'is needed!') + +class BuildExt(build_ext): + """A custom build extension for adding compiler-specific options.""" + c_opts = { + 'unix': "-Ofast -lrt -march=native -fpic -fopenmp -ftree-vectorize -ftree-vectorizer-verbose=0".split() + } + + link_opts = { + 'unix': [], + } + + c_opts['unix'].append("-fopenmp") + link_opts['unix'].extend(['-fopenmp', '-pthread']) + + def build_extensions(self): + ct = self.compiler.compiler_type + opts = self.c_opts.get(ct, []) + if ct == 'unix': + opts.append('-DVERSION_INFO="%s"' % + self.distribution.get_version()) + opts.append(cpp_flag(self.compiler)) + if has_flag(self.compiler, '-fvisibility=hidden'): + opts.append('-fvisibility=hidden') + elif ct == 'msvc': + opts.append('/DVERSION_INFO=\\"%s\\"' % + self.distribution.get_version()) + + for ext in self.extensions: + ext.extra_compile_args.extend(opts) + ext.extra_link_args.extend(self.link_opts.get(ct, [])) + + build_ext.build_extensions(self) + + +setup( + name='glassppy', + version=__version__, + description='Graph Library for Approximate Similarity Search', + author='', + long_description="""Graph Library for Approximate Similarity Search""", + ext_modules=ext_modules, + install_requires=['numpy'], + packages=['ann_dataset'], + cmdclass={'build_ext': BuildExt}, + zip_safe=False, +) + diff --git a/ann_benchmarks/algorithms/nsg/src/CMakeLists.txt b/ann_benchmarks/algorithms/nsg/src/CMakeLists.txt new file mode 100644 index 000000000..216c6c62e --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/src/CMakeLists.txt @@ -0,0 +1,8 @@ +set(CMAKE_CXX_STANDARD 11) + +file(GLOB_RECURSE CPP_SOURCES *.cpp) + +add_library(${PROJECT_NAME} ${CPP_SOURCES}) +add_library(${PROJECT_NAME}_s STATIC ${CPP_SOURCES}) + +#install() \ No newline at end of file diff --git a/ann_benchmarks/algorithms/nsg/src/index.cpp b/ann_benchmarks/algorithms/nsg/src/index.cpp new file mode 100644 index 000000000..206e6a684 --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/src/index.cpp @@ -0,0 +1,18 @@ +// +// Copyright (c) 2017 ZJULearning. All rights reserved. +// +// This source code is licensed under the MIT license. +// +#include +namespace efanna2e { +Index::Index(const size_t dimension, const size_t n, Metric metric = L2) + : dimension_ (dimension), nd_(n), has_built(false) { + switch (metric) { + case L2:distance_ = new DistanceL2(); + break; + default:distance_ = new DistanceL2(); + break; + } +} +Index::~Index() {} +} diff --git a/ann_benchmarks/algorithms/nsg/src/index_graph.cpp b/ann_benchmarks/algorithms/nsg/src/index_graph.cpp new file mode 100644 index 000000000..375dd993b --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/src/index_graph.cpp @@ -0,0 +1,501 @@ +// +// Copyright (c) 2017 ZJULearning. All rights reserved. +// +// This source code is licensed under the MIT license. +// + +#include +#include +#include +#include +#include + +namespace efanna2e { +#define _CONTROL_NUM 100 +IndexGraph::IndexGraph(const size_t dimension, const size_t n, Metric m, Index *initializer) + : Index(dimension, n, m), + initializer_{initializer} { + assert(dimension == initializer->GetDimension()); +} +IndexGraph::~IndexGraph() {} + +void IndexGraph::join() { +#pragma omp parallel for default(shared) schedule(dynamic, 100) + for (unsigned n = 0; n < nd_; n++) { + graph_[n].join([&](unsigned i, unsigned j) { + if(i != j){ + float dist = distance_->compare(data_ + i * dimension_, data_ + j * dimension_, dimension_); + graph_[i].insert(j, dist); + graph_[j].insert(i, dist); + } + }); + } +} +void IndexGraph::update(const Parameters ¶meters) { + unsigned S = parameters.Get("S"); + unsigned R = parameters.Get("R"); + unsigned L = parameters.Get("L"); +#pragma omp parallel for + for (unsigned i = 0; i < nd_; i++) { + std::vector().swap(graph_[i].nn_new); + std::vector().swap(graph_[i].nn_old); + //std::vector().swap(graph_[i].rnn_new); + //std::vector().swap(graph_[i].rnn_old); + //graph_[i].nn_new.clear(); + //graph_[i].nn_old.clear(); + //graph_[i].rnn_new.clear(); + //graph_[i].rnn_old.clear(); + } +#pragma omp parallel for + for (unsigned n = 0; n < nd_; ++n) { + auto &nn = graph_[n]; + std::sort(nn.pool.begin(), nn.pool.end()); + if(nn.pool.size()>L)nn.pool.resize(L); + nn.pool.reserve(L); + unsigned maxl = std::min(nn.M + S, (unsigned) nn.pool.size()); + unsigned c = 0; + unsigned l = 0; + //std::sort(nn.pool.begin(), nn.pool.end()); + //if(n==0)std::cout << nn.pool[0].distance<<","<< nn.pool[1].distance<<","<< nn.pool[2].distance<< std::endl; + while ((l < maxl) && (c < S)) { + if (nn.pool[l].flag) ++c; + ++l; + } + nn.M = l; + } +#pragma omp parallel for + for (unsigned n = 0; n < nd_; ++n) { + auto &nnhd = graph_[n]; + auto &nn_new = nnhd.nn_new; + auto &nn_old = nnhd.nn_old; + for (unsigned l = 0; l < nnhd.M; ++l) { + auto &nn = nnhd.pool[l]; + auto &nhood_o = graph_[nn.id]; // nn on the other side of the edge + + if (nn.flag) { + nn_new.push_back(nn.id); + if (nn.distance > nhood_o.pool.back().distance) { + LockGuard guard(nhood_o.lock); + if(nhood_o.rnn_new.size() < R)nhood_o.rnn_new.push_back(n); + else{ + unsigned int pos = rand() % R; + nhood_o.rnn_new[pos] = n; + } + } + nn.flag = false; + } else { + nn_old.push_back(nn.id); + if (nn.distance > nhood_o.pool.back().distance) { + LockGuard guard(nhood_o.lock); + if(nhood_o.rnn_old.size() < R)nhood_o.rnn_old.push_back(n); + else{ + unsigned int pos = rand() % R; + nhood_o.rnn_old[pos] = n; + } + } + } + } + std::make_heap(nnhd.pool.begin(), nnhd.pool.end()); + } +#pragma omp parallel for + for (unsigned i = 0; i < nd_; ++i) { + auto &nn_new = graph_[i].nn_new; + auto &nn_old = graph_[i].nn_old; + auto &rnn_new = graph_[i].rnn_new; + auto &rnn_old = graph_[i].rnn_old; + if (R && rnn_new.size() > R) { + std::random_shuffle(rnn_new.begin(), rnn_new.end()); + rnn_new.resize(R); + } + nn_new.insert(nn_new.end(), rnn_new.begin(), rnn_new.end()); + if (R && rnn_old.size() > R) { + std::random_shuffle(rnn_old.begin(), rnn_old.end()); + rnn_old.resize(R); + } + nn_old.insert(nn_old.end(), rnn_old.begin(), rnn_old.end()); + if(nn_old.size() > R * 2){nn_old.resize(R * 2);nn_old.reserve(R*2);} + std::vector().swap(graph_[i].rnn_new); + std::vector().swap(graph_[i].rnn_old); + } +} + +void IndexGraph::NNDescent(const Parameters ¶meters) { + unsigned iter = parameters.Get("iter"); + std::mt19937 rng(rand()); + std::vector control_points(_CONTROL_NUM); + std::vector > acc_eval_set(_CONTROL_NUM); + GenRandom(rng, &control_points[0], control_points.size(), nd_); + generate_control_set(control_points, acc_eval_set, nd_); + for (unsigned it = 0; it < iter; it++) { + join(); + update(parameters); + //checkDup(); + eval_recall(control_points, acc_eval_set); + std::cout << "iter: " << it << std::endl; + } +} + +void IndexGraph::generate_control_set(std::vector &c, + std::vector > &v, + unsigned N){ +#pragma omp parallel for + for(unsigned i=0; i tmp; + for(unsigned j=0; jcompare(data_ + c[i] * dimension_, data_ + j * dimension_, dimension_); + tmp.push_back(Neighbor(j, dist, true)); + } + std::partial_sort(tmp.begin(), tmp.begin() + _CONTROL_NUM, tmp.end()); + for(unsigned j=0; j<_CONTROL_NUM; j++){ + v[i].push_back(tmp[j].id); + } + } +} + +void IndexGraph::eval_recall(std::vector& ctrl_points, std::vector > &acc_eval_set){ + float mean_acc=0; + for(unsigned i=0; i("L"); + const unsigned S = parameters.Get("S"); + + graph_.reserve(nd_); + std::mt19937 rng(rand()); + for (unsigned i = 0; i < nd_; i++) { + graph_.push_back(nhood(L, S, rng, (unsigned) nd_)); + } +#pragma omp parallel for + for (unsigned i = 0; i < nd_; i++) { + const float *query = data_ + i * dimension_; + std::vector tmp(S + 1); + initializer_->Search(query, data_, S + 1, parameters, tmp.data()); + + for (unsigned j = 0; j < S; j++) { + unsigned id = tmp[j]; + if (id == i)continue; + float dist = distance_->compare(data_ + i * dimension_, data_ + id * dimension_, (unsigned) dimension_); + + graph_[i].pool.push_back(Neighbor(id, dist, true)); + } + std::make_heap(graph_[i].pool.begin(), graph_[i].pool.end()); + graph_[i].pool.reserve(L); + } +} + +void IndexGraph::InitializeGraph_Refine(const Parameters ¶meters) { + assert(final_graph_.size() == nd_); + + const unsigned L = parameters.Get("L"); + const unsigned S = parameters.Get("S"); + + graph_.reserve(nd_); + std::mt19937 rng(rand()); + for (unsigned i = 0; i < nd_; i++) { + graph_.push_back(nhood(L, S, rng, (unsigned) nd_)); + } +#pragma omp parallel for + for (unsigned i = 0; i < nd_; i++) { + auto& ids = final_graph_[i]; + std::sort(ids.begin(), ids.end()); + + size_t K = ids.size(); + + for (unsigned j = 0; j < K; j++) { + unsigned id = ids[j]; + if (id == i || (j>0 &&id == ids[j-1]))continue; + float dist = distance_->compare(data_ + i * dimension_, data_ + id * dimension_, (unsigned) dimension_); + graph_[i].pool.push_back(Neighbor(id, dist, true)); + } + std::make_heap(graph_[i].pool.begin(), graph_[i].pool.end()); + graph_[i].pool.reserve(L); + std::vector().swap(ids); + } + CompactGraph().swap(final_graph_); +} + + +void IndexGraph::RefineGraph(const float* data, const Parameters ¶meters) { + data_ = data; + assert(initializer_->HasBuilt()); + + InitializeGraph_Refine(parameters); + NNDescent(parameters); + + final_graph_.reserve(nd_); + std::cout << nd_ << std::endl; + unsigned K = parameters.Get("K"); + for (unsigned i = 0; i < nd_; i++) { + std::vector tmp; + std::sort(graph_[i].pool.begin(), graph_[i].pool.end()); + for (unsigned j = 0; j < K; j++) { + tmp.push_back(graph_[i].pool[j].id); + } + tmp.reserve(K); + final_graph_.push_back(tmp); + std::vector().swap(graph_[i].pool); + std::vector().swap(graph_[i].nn_new); + std::vector().swap(graph_[i].nn_old); + std::vector().swap(graph_[i].rnn_new); + std::vector().swap(graph_[i].rnn_new); + } + std::vector().swap(graph_); + has_built = true; + +} + + +void IndexGraph::Build(size_t n, const float *data, const Parameters ¶meters) { + + //assert(initializer_->GetDataset() == data); + data_ = data; + assert(initializer_->HasBuilt()); + + InitializeGraph(parameters); + NNDescent(parameters); + //RefineGraph(parameters); + + final_graph_.reserve(nd_); + std::cout << nd_ << std::endl; + unsigned K = parameters.Get("K"); + for (unsigned i = 0; i < nd_; i++) { + std::vector tmp; + std::sort(graph_[i].pool.begin(), graph_[i].pool.end()); + for (unsigned j = 0; j < K; j++) { + tmp.push_back(graph_[i].pool[j].id); + } + tmp.reserve(K); + final_graph_.push_back(tmp); + std::vector().swap(graph_[i].pool); + std::vector().swap(graph_[i].nn_new); + std::vector().swap(graph_[i].nn_old); + std::vector().swap(graph_[i].rnn_new); + std::vector().swap(graph_[i].rnn_new); + } + std::vector().swap(graph_); + has_built = true; +} + +void IndexGraph::Search( + const float *query, + const float *x, + size_t K, + const Parameters ¶meter, + unsigned *indices) { + const unsigned L = parameter.Get("L_search"); + + std::vector retset(L+1); + std::vector init_ids(L); + std::mt19937 rng(rand()); + GenRandom(rng, init_ids.data(), L, (unsigned)nd_); + + std::vector flags(nd_); + memset(flags.data(), 0, nd_ * sizeof(char)); + for(unsigned i=0; icompare(data_ + dimension_*id, query, (unsigned)dimension_); + retset[i]=Neighbor(id, dist, true); + } + + std::sort(retset.begin(), retset.begin()+L); + int k=0; + while(k < (int)L) { + int nk = L; + + if (retset[k].flag) { + retset[k].flag = false; + unsigned n = retset[k].id; + + for (unsigned m = 0; m < final_graph_[n].size(); ++m) { + unsigned id = final_graph_[n][m]; + if(flags[id])continue; + flags[id] = 1; + float dist = distance_->compare(query, data_ + dimension_ * id, (unsigned)dimension_); + if(dist >= retset[L-1].distance)continue; + Neighbor nn(id, dist, true); + int r = InsertIntoPool(retset.data(), L, nn); + + //if(L+1 < retset.size()) ++L; + if(r < nk)nk=r; + } + //lock to here + } + if(nk <= k)k = nk; + else ++k; + } + for(size_t i=0; i < K; i++){ + indices[i] = retset[i].id; + } +} + +void IndexGraph::Save(const char *filename) { + std::ofstream out(filename, std::ios::binary | std::ios::out); + assert(final_graph_.size() == nd_); + unsigned GK = (unsigned) final_graph_[0].size(); + for (unsigned i = 0; i < nd_; i++) { + out.write((char *) &GK, sizeof(unsigned)); + out.write((char *) final_graph_[i].data(), GK * sizeof(unsigned)); + } + out.close(); +} + +void IndexGraph::Load(const char *filename) { + std::ifstream in(filename, std::ios::binary); + unsigned k; + in.read((char*)&k,4); + in.seekg(0,std::ios::end); + std::ios::pos_type ss = in.tellg(); + size_t fsize = (size_t)ss; + size_t num = fsize / ((size_t)k + 1) / 4; + in.seekg(0,std::ios::beg); + + final_graph_.resize(num); + for(size_t i = 0; i < num; i++){ + in.seekg(4,std::ios::cur); + final_graph_[i].resize(k); + final_graph_[i].reserve(k); + in.read((char*)final_graph_[i].data(), k * sizeof(unsigned)); + } + in.close(); +} + +void IndexGraph::parallel_graph_insert(unsigned id, Neighbor nn, LockGraph& g, size_t K){ + LockGuard guard(g[id].lock); + size_t l = g[id].pool.size(); + if(l == 0)g[id].pool.push_back(nn); + else{ + g[id].pool.resize(l+1); + g[id].pool.reserve(l+1); + InsertIntoPool(g[id].pool.data(), (unsigned)l, nn); + if(g[id].pool.size() > K)g[id].pool.reserve(K); + } + +} + +void IndexGraph::GraphAdd(const float* data, unsigned n_new, unsigned dim, const Parameters ¶meters) { + data_ = data; + data += nd_ * dimension_; + assert(final_graph_.size() == nd_); + assert(dim == dimension_); + unsigned total = n_new + (unsigned)nd_; + LockGraph graph_tmp(total); + size_t K = final_graph_[0].size(); + compact_to_Lockgraph(graph_tmp); + unsigned seed = 19930808; +#pragma omp parallel + { + std::mt19937 rng(seed ^ omp_get_thread_num()); +#pragma omp for + for(unsigned i = 0; i < n_new; i++){ + std::vector res; + get_neighbor_to_add(data + i * dim, parameters, graph_tmp, rng, res, n_new); + + for(unsigned j=0; j& retset, + unsigned n_new){ + const unsigned L = parameters.Get("L_ADD"); + + retset.resize(L+1); + std::vector init_ids(L); + GenRandom(rng, init_ids.data(), L/2, n_new); + for(unsigned i=0; i flags(n_new + n_total); + memset(flags.data(), 0, n_total * sizeof(char)); + for(unsigned i=0; icompare(data_ + dimension_*id, point, (unsigned)dimension_); + retset[i]=Neighbor(id, dist, true); + } + + std::sort(retset.begin(), retset.begin()+L); + int k=0; + while(k < (int)L) { + int nk = L; + + if (retset[k].flag) { + retset[k].flag = false; + unsigned n = retset[k].id; + + LockGuard guard(g[n].lock);//lock start + for (unsigned m = 0; m < g[n].pool.size(); ++m) { + unsigned id = g[n].pool[m].id; + if(flags[id])continue; + flags[id] = 1; + float dist = distance_->compare(point, data_ + dimension_ * id, (unsigned)dimension_); + if(dist >= retset[L-1].distance)continue; + Neighbor nn(id, dist, true); + int r = InsertIntoPool(retset.data(), L, nn); + + //if(L+1 < retset.size()) ++L; + if(r < nk)nk=r; + } + //lock to here + } + if(nk <= k)k = nk; + else ++k; + } + + +} + +void IndexGraph::compact_to_Lockgraph(LockGraph &g){ + + //g.resize(final_graph_.size()); + for(unsigned i=0; icompare(data_ + i*dimension_, + data_ + final_graph_[i][j]*dimension_, (unsigned)dimension_); + g[i].pool.push_back(Neighbor(final_graph_[i][j], dist, true)); + } + std::vector().swap(final_graph_[i]); + } + CompactGraph().swap(final_graph_); +} + + +} diff --git a/ann_benchmarks/algorithms/nsg/src/index_nsg.cpp b/ann_benchmarks/algorithms/nsg/src/index_nsg.cpp new file mode 100644 index 000000000..2fd5cf938 --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/src/index_nsg.cpp @@ -0,0 +1,701 @@ +#include "efanna2e/index_nsg.h" + +#include +#include +#include +#include +#include + +#include "efanna2e/exceptions.h" +#include "efanna2e/parameters.h" + +namespace efanna2e { +#define _CONTROL_NUM 100 +IndexNSG::IndexNSG(const size_t dimension, const size_t n, Metric m, + Index *initializer) + : Index(dimension, n, m), initializer_{initializer} {} + +IndexNSG::~IndexNSG() { + if (distance_ != nullptr) { + delete distance_; + distance_ = nullptr; + } + if (initializer_ != nullptr) { + delete initializer_; + initializer_ = nullptr; + } + if (opt_graph_ != nullptr) { + delete opt_graph_; + opt_graph_ = nullptr; + } +} + +void IndexNSG::Save(const char *filename) { + std::ofstream out(filename, std::ios::binary | std::ios::out); + assert(final_graph_.size() == nd_); + + out.write((char *)&width, sizeof(unsigned)); + out.write((char *)&ep_, sizeof(unsigned)); + for (unsigned i = 0; i < nd_; i++) { + unsigned GK = (unsigned)final_graph_[i].size(); + out.write((char *)&GK, sizeof(unsigned)); + out.write((char *)final_graph_[i].data(), GK * sizeof(unsigned)); + } + out.close(); +} + +void IndexNSG::Load(const char *filename) { + std::ifstream in(filename, std::ios::binary); + in.read((char *)&width, sizeof(unsigned)); + in.read((char *)&ep_, sizeof(unsigned)); + // width=100; + unsigned cc = 0; + while (!in.eof()) { + unsigned k; + in.read((char *)&k, sizeof(unsigned)); + if (in.eof()) break; + cc += k; + std::vector tmp(k); + in.read((char *)tmp.data(), k * sizeof(unsigned)); + final_graph_.push_back(tmp); + } + cc /= nd_; + // std::cout< &retset, + std::vector &fullset) { + unsigned L = parameter.Get("L"); + + retset.resize(L + 1); + std::vector init_ids(L); + // initializer_->Search(query, nullptr, L, parameter, init_ids.data()); + + boost::dynamic_bitset<> flags{nd_, 0}; + L = 0; + for (unsigned i = 0; i < init_ids.size() && i < final_graph_[ep_].size(); i++) { + init_ids[i] = final_graph_[ep_][i]; + flags[init_ids[i]] = true; + L++; + } + while (L < init_ids.size()) { + unsigned id = rand() % nd_; + if (flags[id]) continue; + init_ids[L] = id; + L++; + flags[id] = true; + } + + L = 0; + for (unsigned i = 0; i < init_ids.size(); i++) { + unsigned id = init_ids[i]; + if (id >= nd_) continue; + // std::cout<compare(data_ + dimension_ * (size_t)id, query, + (unsigned)dimension_); + retset[i] = Neighbor(id, dist, true); + // flags[id] = 1; + L++; + } + + std::sort(retset.begin(), retset.begin() + L); + int k = 0; + while (k < (int)L) { + int nk = L; + + if (retset[k].flag) { + retset[k].flag = false; + unsigned n = retset[k].id; + + for (unsigned m = 0; m < final_graph_[n].size(); ++m) { + unsigned id = final_graph_[n][m]; + if (flags[id]) continue; + flags[id] = 1; + + float dist = distance_->compare(query, data_ + dimension_ * (size_t)id, + (unsigned)dimension_); + Neighbor nn(id, dist, true); + fullset.push_back(nn); + if (dist >= retset[L - 1].distance) continue; + int r = InsertIntoPool(retset.data(), L, nn); + + if (L + 1 < retset.size()) ++L; + if (r < nk) nk = r; + } + } + if (nk <= k) + k = nk; + else + ++k; + } +} + +void IndexNSG::get_neighbors(const float *query, const Parameters ¶meter, + boost::dynamic_bitset<> &flags, + std::vector &retset, + std::vector &fullset) { + unsigned L = parameter.Get("L"); + + retset.resize(L + 1); + std::vector init_ids(L); + // initializer_->Search(query, nullptr, L, parameter, init_ids.data()); + + L = 0; + for (unsigned i = 0; i < init_ids.size() && i < final_graph_[ep_].size(); i++) { + init_ids[i] = final_graph_[ep_][i]; + flags[init_ids[i]] = true; + L++; + } + while (L < init_ids.size()) { + unsigned id = rand() % nd_; + if (flags[id]) continue; + init_ids[L] = id; + L++; + flags[id] = true; + } + + L = 0; + for (unsigned i = 0; i < init_ids.size(); i++) { + unsigned id = init_ids[i]; + if (id >= nd_) continue; + // std::cout<compare(data_ + dimension_ * (size_t)id, query, + (unsigned)dimension_); + retset[i] = Neighbor(id, dist, true); + fullset.push_back(retset[i]); + // flags[id] = 1; + L++; + } + + std::sort(retset.begin(), retset.begin() + L); + int k = 0; + while (k < (int)L) { + int nk = L; + + if (retset[k].flag) { + retset[k].flag = false; + unsigned n = retset[k].id; + + for (unsigned m = 0; m < final_graph_[n].size(); ++m) { + unsigned id = final_graph_[n][m]; + if (flags[id]) continue; + flags[id] = 1; + + float dist = distance_->compare(query, data_ + dimension_ * (size_t)id, + (unsigned)dimension_); + Neighbor nn(id, dist, true); + fullset.push_back(nn); + if (dist >= retset[L - 1].distance) continue; + int r = InsertIntoPool(retset.data(), L, nn); + + if (L + 1 < retset.size()) ++L; + if (r < nk) nk = r; + } + } + if (nk <= k) + k = nk; + else + ++k; + } +} + +void IndexNSG::init_graph(const Parameters ¶meters) { + float *center = new float[dimension_]; + for (unsigned j = 0; j < dimension_; j++) center[j] = 0; + for (unsigned i = 0; i < nd_; i++) { + for (unsigned j = 0; j < dimension_; j++) { + center[j] += data_[i * dimension_ + j]; + } + } + for (unsigned j = 0; j < dimension_; j++) { + center[j] /= nd_; + } + std::vector tmp, pool; + ep_ = rand() % nd_; // random initialize navigating point + get_neighbors(center, parameters, tmp, pool); + ep_ = tmp[0].id; + delete center; +} + +void IndexNSG::sync_prune(unsigned q, std::vector &pool, + const Parameters ¶meter, + boost::dynamic_bitset<> &flags, + SimpleNeighbor *cut_graph_) { + unsigned range = parameter.Get("R"); + unsigned maxc = parameter.Get("C"); + width = range; + unsigned start = 0; + + for (unsigned nn = 0; nn < final_graph_[q].size(); nn++) { + unsigned id = final_graph_[q][nn]; + if (flags[id]) continue; + float dist = + distance_->compare(data_ + dimension_ * (size_t)q, + data_ + dimension_ * (size_t)id, (unsigned)dimension_); + pool.push_back(Neighbor(id, dist, true)); + } + + std::sort(pool.begin(), pool.end()); + std::vector result; + if (pool[start].id == q) start++; + result.push_back(pool[start]); + + while (result.size() < range && (++start) < pool.size() && start < maxc) { + auto &p = pool[start]; + bool occlude = false; + for (unsigned t = 0; t < result.size(); t++) { + if (p.id == result[t].id) { + occlude = true; + break; + } + float djk = distance_->compare(data_ + dimension_ * (size_t)result[t].id, + data_ + dimension_ * (size_t)p.id, + (unsigned)dimension_); + if (djk < p.distance /* dik */) { + occlude = true; + break; + } + } + if (!occlude) result.push_back(p); + } + + SimpleNeighbor *des_pool = cut_graph_ + (size_t)q * (size_t)range; + for (size_t t = 0; t < result.size(); t++) { + des_pool[t].id = result[t].id; + des_pool[t].distance = result[t].distance; + } + if (result.size() < range) { + des_pool[result.size()].distance = -1; + } +} + +void IndexNSG::InterInsert(unsigned n, unsigned range, + std::vector &locks, + SimpleNeighbor *cut_graph_) { + SimpleNeighbor *src_pool = cut_graph_ + (size_t)n * (size_t)range; + for (size_t i = 0; i < range; i++) { + if (src_pool[i].distance == -1) break; + + SimpleNeighbor sn(n, src_pool[i].distance); + size_t des = src_pool[i].id; + SimpleNeighbor *des_pool = cut_graph_ + des * (size_t)range; + + std::vector temp_pool; + int dup = 0; + { + LockGuard guard(locks[des]); + for (size_t j = 0; j < range; j++) { + if (des_pool[j].distance == -1) break; + if (n == des_pool[j].id) { + dup = 1; + break; + } + temp_pool.push_back(des_pool[j]); + } + } + if (dup) continue; + + temp_pool.push_back(sn); + if (temp_pool.size() > range) { + std::vector result; + unsigned start = 0; + std::sort(temp_pool.begin(), temp_pool.end()); + result.push_back(temp_pool[start]); + while (result.size() < range && (++start) < temp_pool.size()) { + auto &p = temp_pool[start]; + bool occlude = false; + for (unsigned t = 0; t < result.size(); t++) { + if (p.id == result[t].id) { + occlude = true; + break; + } + float djk = distance_->compare(data_ + dimension_ * (size_t)result[t].id, + data_ + dimension_ * (size_t)p.id, + (unsigned)dimension_); + if (djk < p.distance /* dik */) { + occlude = true; + break; + } + } + if (!occlude) result.push_back(p); + } + { + LockGuard guard(locks[des]); + for (unsigned t = 0; t < result.size(); t++) { + des_pool[t] = result[t]; + } + } + } else { + LockGuard guard(locks[des]); + for (unsigned t = 0; t < range; t++) { + if (des_pool[t].distance == -1) { + des_pool[t] = sn; + if (t + 1 < range) des_pool[t + 1].distance = -1; + break; + } + } + } + } +} + +void IndexNSG::Link(const Parameters ¶meters, SimpleNeighbor *cut_graph_) { + /* + std::cout << " graph link" << std::endl; + unsigned progress=0; + unsigned percent = 100; + unsigned step_size = nd_/percent; + std::mutex progress_lock; + */ + unsigned range = parameters.Get("R"); + std::vector locks(nd_); + +#pragma omp parallel + { + // unsigned cnt = 0; + std::vector pool, tmp; + boost::dynamic_bitset<> flags{nd_, 0}; +#pragma omp for schedule(dynamic, 100) + for (unsigned n = 0; n < nd_; ++n) { + pool.clear(); + tmp.clear(); + flags.reset(); + get_neighbors(data_ + dimension_ * n, parameters, flags, tmp, pool); + sync_prune(n, pool, parameters, flags, cut_graph_); + /* + cnt++; + if(cnt % step_size == 0){ + LockGuard g(progress_lock); + std::cout<("nn_graph_path"); + unsigned range = parameters.Get("R"); + Load_nn_graph(nn_graph_path.c_str()); + data_ = data; + init_graph(parameters); + SimpleNeighbor *cut_graph_ = new SimpleNeighbor[nd_ * (size_t)range]; + Link(parameters, cut_graph_); + final_graph_.resize(nd_); + + for (size_t i = 0; i < nd_; i++) { + SimpleNeighbor *pool = cut_graph_ + i * (size_t)range; + unsigned pool_size = 0; + for (unsigned j = 0; j < range; j++) { + if (pool[j].distance == -1) break; + pool_size = j; + } + pool_size++; + final_graph_[i].resize(pool_size); + for (unsigned j = 0; j < pool_size; j++) { + final_graph_[i][j] = pool[j].id; + } + } + + tree_grow(parameters); + + unsigned max = 0, min = 1e6, avg = 0; + for (size_t i = 0; i < nd_; i++) { + auto size = final_graph_[i].size(); + max = max < size ? size : max; + min = min > size ? size : min; + avg += size; + } + avg /= 1.0 * nd_; + printf("Degree Statistics: Max = %d, Min = %d, Avg = %d\n", max, min, avg); + + has_built = true; + delete cut_graph_; +} + +void IndexNSG::Search(const float *query, const float *x, size_t K, + const Parameters ¶meters, unsigned *indices) { + const unsigned L = parameters.Get("L_search"); + data_ = x; + std::vector retset(L + 1); + std::vector init_ids(L); + boost::dynamic_bitset<> flags{nd_, 0}; + // std::mt19937 rng(rand()); + // GenRandom(rng, init_ids.data(), L, (unsigned) nd_); + + unsigned tmp_l = 0; + for (; tmp_l < L && tmp_l < final_graph_[ep_].size(); tmp_l++) { + init_ids[tmp_l] = final_graph_[ep_][tmp_l]; + flags[init_ids[tmp_l]] = true; + } + + while (tmp_l < L) { + unsigned id = rand() % nd_; + if (flags[id]) continue; + flags[id] = true; + init_ids[tmp_l] = id; + tmp_l++; + } + + for (unsigned i = 0; i < init_ids.size(); i++) { + unsigned id = init_ids[i]; + float dist = + distance_->compare(data_ + dimension_ * id, query, (unsigned)dimension_); + retset[i] = Neighbor(id, dist, true); + // flags[id] = true; + } + + std::sort(retset.begin(), retset.begin() + L); + int k = 0; + while (k < (int)L) { + int nk = L; + + if (retset[k].flag) { + retset[k].flag = false; + unsigned n = retset[k].id; + + for (unsigned m = 0; m < final_graph_[n].size(); ++m) { + unsigned id = final_graph_[n][m]; + if (flags[id]) continue; + flags[id] = 1; + float dist = + distance_->compare(query, data_ + dimension_ * id, (unsigned)dimension_); + if (dist >= retset[L - 1].distance) continue; + Neighbor nn(id, dist, true); + int r = InsertIntoPool(retset.data(), L, nn); + + if (r < nk) nk = r; + } + } + if (nk <= k) + k = nk; + else + ++k; + } + for (size_t i = 0; i < K; i++) { + indices[i] = retset[i].id; + } +} + +void IndexNSG::SearchWithOptGraph(const float *query, size_t K, + const Parameters ¶meters, unsigned *indices) { + unsigned L = parameters.Get("L_search"); + DistanceFastL2 *dist_fast = (DistanceFastL2 *)distance_; + + std::vector retset(L + 1); + std::vector init_ids(L); + // std::mt19937 rng(rand()); + // GenRandom(rng, init_ids.data(), L, (unsigned) nd_); + + boost::dynamic_bitset<> flags{nd_, 0}; + unsigned tmp_l = 0; + unsigned *neighbors = (unsigned *)(opt_graph_ + node_size * ep_ + data_len); + unsigned MaxM_ep = *neighbors; + neighbors++; + + for (; tmp_l < L && tmp_l < MaxM_ep; tmp_l++) { + init_ids[tmp_l] = neighbors[tmp_l]; + flags[init_ids[tmp_l]] = true; + } + + while (tmp_l < L) { + unsigned id = rand() % nd_; + if (flags[id]) continue; + flags[id] = true; + init_ids[tmp_l] = id; + tmp_l++; + } + + for (unsigned i = 0; i < init_ids.size(); i++) { + unsigned id = init_ids[i]; + if (id >= nd_) continue; + _mm_prefetch(opt_graph_ + node_size * id, _MM_HINT_T0); + } + L = 0; + for (unsigned i = 0; i < init_ids.size(); i++) { + unsigned id = init_ids[i]; + if (id >= nd_) continue; + float *x = (float *)(opt_graph_ + node_size * id); + float norm_x = *x; + x++; + float dist = dist_fast->compare(x, query, norm_x, (unsigned)dimension_); + retset[i] = Neighbor(id, dist, true); + flags[id] = true; + L++; + } + // std::cout<compare(query, data, norm, (unsigned)dimension_); + if (dist >= retset[L - 1].distance) continue; + Neighbor nn(id, dist, true); + int r = InsertIntoPool(retset.data(), L, nn); + + // if(L+1 < retset.size()) ++L; + if (r < nk) nk = r; + } + } + if (nk <= k) + k = nk; + else + ++k; + } + for (size_t i = 0; i < K; i++) { + indices[i] = retset[i].id; + } +} + +void IndexNSG::OptimizeGraph(float *data) { // use after build or load + + data_ = data; + data_len = (dimension_ + 1) * sizeof(float); + neighbor_len = (width + 1) * sizeof(unsigned); + node_size = data_len + neighbor_len; + opt_graph_ = (char *)malloc(node_size * nd_); + DistanceFastL2 *dist_fast = (DistanceFastL2 *)distance_; + for (unsigned i = 0; i < nd_; i++) { + char *cur_node_offset = opt_graph_ + i * node_size; + float cur_norm = dist_fast->norm(data_ + i * dimension_, dimension_); + std::memcpy(cur_node_offset, &cur_norm, sizeof(float)); + std::memcpy(cur_node_offset + sizeof(float), data_ + i * dimension_, + data_len - sizeof(float)); + + cur_node_offset += data_len; + unsigned k = final_graph_[i].size(); + std::memcpy(cur_node_offset, &k, sizeof(unsigned)); + std::memcpy(cur_node_offset + sizeof(unsigned), final_graph_[i].data(), + k * sizeof(unsigned)); + std::vector().swap(final_graph_[i]); + } + CompactGraph().swap(final_graph_); +} + +void IndexNSG::DFS(boost::dynamic_bitset<> &flag, unsigned root, unsigned &cnt) { + unsigned tmp = root; + std::stack s; + s.push(root); + if (!flag[root]) cnt++; + flag[root] = true; + while (!s.empty()) { + unsigned next = nd_ + 1; + for (unsigned i = 0; i < final_graph_[tmp].size(); i++) { + if (flag[final_graph_[tmp][i]] == false) { + next = final_graph_[tmp][i]; + break; + } + } + // std::cout << next <<":"< &flag, unsigned &root, + const Parameters ¶meter) { + unsigned id = nd_; + for (unsigned i = 0; i < nd_; i++) { + if (flag[i] == false) { + id = i; + break; + } + } + + if (id == nd_) return; // No Unlinked Node + + std::vector tmp, pool; + get_neighbors(data_ + dimension_ * id, parameter, tmp, pool); + std::sort(pool.begin(), pool.end()); + + unsigned found = 0; + for (unsigned i = 0; i < pool.size(); i++) { + if (flag[pool[i].id]) { + // std::cout << pool[i].id << '\n'; + root = pool[i].id; + found = 1; + break; + } + } + if (found == 0) { + while (true) { + unsigned rid = rand() % nd_; + if (flag[rid]) { + root = rid; + break; + } + } + } + final_graph_[root].push_back(id); +} +void IndexNSG::tree_grow(const Parameters ¶meter) { + unsigned root = ep_; + boost::dynamic_bitset<> flags{nd_, 0}; + unsigned unlinked_cnt = 0; + while (unlinked_cnt < nd_) { + DFS(flags, root, unlinked_cnt); + // std::cout << unlinked_cnt << '\n'; + if (unlinked_cnt >= nd_) break; + findroot(flags, root, parameter); + // std::cout << "new root"<<":"< width) { + width = final_graph_[i].size(); + } + } +} +} diff --git a/ann_benchmarks/algorithms/nsg/src/index_random.cpp b/ann_benchmarks/algorithms/nsg/src/index_random.cpp new file mode 100644 index 000000000..1be043c45 --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/src/index_random.cpp @@ -0,0 +1,29 @@ +// +// Copyright (c) 2017 ZJULearning. All rights reserved. +// +// This source code is licensed under the MIT license. +// + +#include + + +namespace efanna2e { + +IndexRandom::IndexRandom(const size_t dimension, const size_t n):Index(dimension, n, L2){ + has_built = true; +} +IndexRandom::~IndexRandom() {} +void IndexRandom::Build(size_t n, const float *data, const Parameters ¶meters) { + data_ = data; + nd_ = n; + + // Do Nothing + + has_built = true; +} +void IndexRandom::Search(const float *query, const float *x, size_t k, const Parameters ¶meters, unsigned *indices) { + + GenRandom(rng, indices, k, nd_); +} + +} diff --git a/ann_benchmarks/algorithms/nsg/tests/CMakeLists.txt b/ann_benchmarks/algorithms/nsg/tests/CMakeLists.txt new file mode 100644 index 000000000..c6fb2fe1f --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/tests/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_STANDARD 11) + +add_executable(test_nsg_index test_nsg_index.cpp) +target_link_libraries(test_nsg_index ${PROJECT_NAME} -ltcmalloc) + +add_executable(test_nsg_search test_nsg_search.cpp) +target_link_libraries(test_nsg_search ${PROJECT_NAME}) + +add_executable(test_nsg_optimized_search test_nsg_optimized_search.cpp) +target_link_libraries(test_nsg_optimized_search ${PROJECT_NAME} -ltcmalloc) + diff --git a/ann_benchmarks/algorithms/nsg/tests/test_nndescent.cpp b/ann_benchmarks/algorithms/nsg/tests/test_nndescent.cpp new file mode 100644 index 000000000..a6300d453 --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/tests/test_nndescent.cpp @@ -0,0 +1,59 @@ +// +// Created by 付聪 on 2017/6/21. +// + +#include +#include +#include + + +void load_data(char* filename, float*& data, unsigned& num,unsigned& dim){// load data with sift10K pattern + std::ifstream in(filename, std::ios::binary); + if(!in.is_open()){std::cout<<"open file error"<("K", K); + paras.Set("L", L); + paras.Set("iter", iter); + paras.Set("S", S); + paras.Set("R", R); + + auto s = std::chrono::high_resolution_clock::now(); + index.Build(points_num, data_load, paras); + auto e = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff = e-s; + std::cout <<"Time cost: "<< diff.count() << "\n"; + + index.Save(graph_filename); + + return 0; +} diff --git a/ann_benchmarks/algorithms/nsg/tests/test_nndescent_refine.cpp b/ann_benchmarks/algorithms/nsg/tests/test_nndescent_refine.cpp new file mode 100644 index 000000000..58ab99849 --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/tests/test_nndescent_refine.cpp @@ -0,0 +1,64 @@ +// +// Created by 付聪 on 2017/6/26. +// + +#include +#include +#include + + +void load_data(char* filename, float*& data, unsigned& num,unsigned& dim){// load data with sift10K pattern + std::ifstream in(filename, std::ios::binary); + if(!in.is_open()){std::cout<<"open file error"<("K", K); + paras.Set("L", L); + paras.Set("iter", iter); + paras.Set("S", S); + paras.Set("R", R); + + auto s = std::chrono::high_resolution_clock::now(); + + index.RefineGraph(data_load, paras); + + auto e = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff = e-s; + std::cout <<"Time cost: "<< diff.count() << "\n"; + + index.Save(graph_filename); + + return 0; +} diff --git a/ann_benchmarks/algorithms/nsg/tests/test_nsg_index.cpp b/ann_benchmarks/algorithms/nsg/tests/test_nsg_index.cpp new file mode 100644 index 000000000..ba544cb99 --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/tests/test_nsg_index.cpp @@ -0,0 +1,63 @@ +// +// Created by 付聪 on 2017/6/21. +// + +#include +#include + +void load_data(char* filename, float*& data, unsigned& num, + unsigned& dim) { // load data with sift10K pattern + std::ifstream in(filename, std::ios::binary); + if (!in.is_open()) { + std::cout << "open file error" << std::endl; + exit(-1); + } + in.read((char*)&dim, 4); + in.seekg(0, std::ios::end); + std::ios::pos_type ss = in.tellg(); + size_t fsize = (size_t)ss; + num = (unsigned)(fsize / (dim + 1) / 4); + data = new float[(size_t)num * (size_t)dim]; + + in.seekg(0, std::ios::beg); + for (size_t i = 0; i < num; i++) { + in.seekg(4, std::ios::cur); + in.read((char*)(data + i * dim), dim * 4); + } + in.close(); +} +int main(int argc, char** argv) { + if (argc != 7) { + std::cout << argv[0] << " data_file nn_graph_path L R C save_graph_file" + << std::endl; + exit(-1); + } + float* data_load = NULL; + unsigned points_num, dim; + load_data(argv[1], data_load, points_num, dim); + + std::string nn_graph_path(argv[2]); + unsigned L = (unsigned)atoi(argv[3]); + unsigned R = (unsigned)atoi(argv[4]); + unsigned C = (unsigned)atoi(argv[5]); + + // data_load = efanna2e::data_align(data_load, points_num, dim);//one must + // align the data before build + efanna2e::IndexNSG index(dim, points_num, efanna2e::L2, nullptr); + + auto s = std::chrono::high_resolution_clock::now(); + efanna2e::Parameters paras; + paras.Set("L", L); + paras.Set("R", R); + paras.Set("C", C); + paras.Set("nn_graph_path", nn_graph_path); + + index.Build(points_num, data_load, paras); + auto e = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff = e - s; + + std::cout << "indexing time: " << diff.count() << "\n"; + index.Save(argv[6]); + + return 0; +} diff --git a/ann_benchmarks/algorithms/nsg/tests/test_nsg_optimized_search.cpp b/ann_benchmarks/algorithms/nsg/tests/test_nsg_optimized_search.cpp new file mode 100644 index 000000000..766358675 --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/tests/test_nsg_optimized_search.cpp @@ -0,0 +1,92 @@ +// +// Created by 付聪 on 2017/6/21. +// + +#include +#include +#include +#include + +void load_data(char* filename, float*& data, unsigned& num, + unsigned& dim) { // load data with sift10K pattern + std::ifstream in(filename, std::ios::binary); + if (!in.is_open()) { + std::cout << "open file error" << std::endl; + exit(-1); + } + in.read((char*)&dim, 4); + // std::cout<<"data dimension: "< >& results) { + std::ofstream out(filename, std::ios::binary | std::ios::out); + + for (unsigned i = 0; i < results.size(); i++) { + unsigned GK = (unsigned)results[i].size(); + out.write((char*)&GK, sizeof(unsigned)); + out.write((char*)results[i].data(), GK * sizeof(unsigned)); + } + out.close(); +} +int main(int argc, char** argv) { + if (argc != 7) { + std::cout << argv[0] + << " data_file query_file nsg_path search_L search_K result_path" + << std::endl; + exit(-1); + } + float* data_load = NULL; + unsigned points_num, dim; + load_data(argv[1], data_load, points_num, dim); + float* query_load = NULL; + unsigned query_num, query_dim; + load_data(argv[2], query_load, query_num, query_dim); + assert(dim == query_dim); + + unsigned L = (unsigned)atoi(argv[4]); + unsigned K = (unsigned)atoi(argv[5]); + + if (L < K) { + std::cout << "search_L cannot be smaller than search_K!" << std::endl; + exit(-1); + } + + // data_load = efanna2e::data_align(data_load, points_num, dim);//one must + // align the data before build query_load = efanna2e::data_align(query_load, + // query_num, query_dim); + efanna2e::IndexNSG index(dim, points_num, efanna2e::FAST_L2, nullptr); + index.Load(argv[3]); + index.OptimizeGraph(data_load); + + efanna2e::Parameters paras; + paras.Set("L_search", L); + paras.Set("P_search", L); + + std::vector > res(query_num); + for (unsigned i = 0; i < query_num; i++) res[i].resize(K); + + auto s = std::chrono::high_resolution_clock::now(); + for (unsigned i = 0; i < query_num; i++) { + index.SearchWithOptGraph(query_load + i * dim, K, paras, res[i].data()); + } + auto e = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff = e - s; + std::cout << "search time: " << diff.count() << "\n"; + + save_result(argv[6], res); + + return 0; +} diff --git a/ann_benchmarks/algorithms/nsg/tests/test_nsg_search.cpp b/ann_benchmarks/algorithms/nsg/tests/test_nsg_search.cpp new file mode 100644 index 000000000..b1f441b7c --- /dev/null +++ b/ann_benchmarks/algorithms/nsg/tests/test_nsg_search.cpp @@ -0,0 +1,88 @@ +// +// Created by 付聪 on 2017/6/21. +// + +#include +#include + +void load_data(char* filename, float*& data, unsigned& num, + unsigned& dim) { // load data with sift10K pattern + std::ifstream in(filename, std::ios::binary); + if (!in.is_open()) { + std::cout << "open file error" << std::endl; + exit(-1); + } + in.read((char*)&dim, 4); + std::cout << "data dimension: " << dim << std::endl; + in.seekg(0, std::ios::end); + std::ios::pos_type ss = in.tellg(); + size_t fsize = (size_t)ss; + num = (unsigned)(fsize / (dim + 1) / 4); + data = new float[num * dim * sizeof(float)]; + + in.seekg(0, std::ios::beg); + for (size_t i = 0; i < num; i++) { + in.seekg(4, std::ios::cur); + in.read((char*)(data + i * dim), dim * 4); + } + in.close(); +} + +void save_result(char* filename, std::vector >& results) { + std::ofstream out(filename, std::ios::binary | std::ios::out); + + for (unsigned i = 0; i < results.size(); i++) { + unsigned GK = (unsigned)results[i].size(); + out.write((char*)&GK, sizeof(unsigned)); + out.write((char*)results[i].data(), GK * sizeof(unsigned)); + } + out.close(); +} +int main(int argc, char** argv) { + if (argc != 7) { + std::cout << argv[0] + << " data_file query_file nsg_path search_L search_K result_path" + << std::endl; + exit(-1); + } + float* data_load = NULL; + unsigned points_num, dim; + load_data(argv[1], data_load, points_num, dim); + float* query_load = NULL; + unsigned query_num, query_dim; + load_data(argv[2], query_load, query_num, query_dim); + assert(dim == query_dim); + + unsigned L = (unsigned)atoi(argv[4]); + unsigned K = (unsigned)atoi(argv[5]); + + if (L < K) { + std::cout << "search_L cannot be smaller than search_K!" << std::endl; + exit(-1); + } + + // data_load = efanna2e::data_align(data_load, points_num, dim);//one must + // align the data before build query_load = efanna2e::data_align(query_load, + // query_num, query_dim); + efanna2e::IndexNSG index(dim, points_num, efanna2e::L2, nullptr); + index.Load(argv[3]); + + efanna2e::Parameters paras; + paras.Set("L_search", L); + paras.Set("P_search", L); + + auto s = std::chrono::high_resolution_clock::now(); + std::vector > res; + for (unsigned i = 0; i < query_num; i++) { + std::vector tmp(K); + index.Search(query_load + i * dim, data_load, K, paras, tmp.data()); + res.push_back(tmp); + } + auto e = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff = e - s; + std::cout << "search time: " << diff.count() << "\n"; + + save_result(argv[6], res); + + return 0; +}