Skip to content

Commit

Permalink
Prefilter with attributes before Search
Browse files Browse the repository at this point in the history
  • Loading branch information
vinh.nguyen1 committed Oct 23, 2024
1 parent fd6d784 commit 5d6a6bc
Show file tree
Hide file tree
Showing 35 changed files with 9,772 additions and 924 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@
/tests/gtest/
faiss/python/swigfaiss_avx2.swig
faiss/python/swigfaiss_avx512.swig
faiss/python/swigfaiss_sve.swig
faiss/python/swigfaiss_sve.swig
118 changes: 117 additions & 1 deletion faiss/Index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,74 @@ void Index::assign(idx_t n, const float* x, idx_t* labels, idx_t k) const {
search(n, x, k, distances.data(), labels);
}

void Index::add_with_one_attribute(
idx_t /*n*/,
const float* /*x*/,
const float* /*attr*/) {
FAISS_THROW_MSG("add with one attribute not implemented for this type of index");
}

void Index::add_with_two_attribute(
idx_t /*n*/,
const float* /*x*/,
const float* /*attr_first*/,
const float* /*attr_second*/) {
FAISS_THROW_MSG("add with two attribute not implemented for this type of index");
}

void Index::search_with_one_attribute(
idx_t, /*n*/
const float*, /*x*/
const float, /*lower_attribute*/
const float, /*upper_attribute*/
idx_t, /*k*/
float*, /*distances*/
idx_t*, /*labels*/
float*, /*out_attrs*/
const SearchParameters* params) const {
FAISS_THROW_MSG("search with one attribute not implemented");
}

void Index::search_with_two_attribute(
idx_t, /*n*/
const float*, /*x*/
const float, /*lower_attribute_first*/
const float, /*upper_attribute_first*/
const float, /*lower_attribute_second*/
const float, /*upper_attribute_second*/
idx_t, /*k*/
float*, /*distances*/
idx_t*, /*labels*/
float*, /*out_attrs_first*/
float*, /*out_attrs_second*/
const SearchParameters* params) const {
FAISS_THROW_MSG("search with two attribute not implemented");
}

void Index::add_with_ids(
idx_t /*n*/,
const float* /*x*/,
const idx_t* /*xids*/) {
FAISS_THROW_MSG("add_with_ids not implemented for this type of index");
}

void Index::add_with_ids_with_one_attribute(
idx_t /*n*/,
const float* /*x*/,
const float* /*attr*/,
const idx_t* /*xids*/) {
FAISS_THROW_MSG("add_with_ids_with_one_attribute not implemented for this type of index");
}

void Index::add_with_ids_with_two_attribute(
idx_t /*n*/,
const float* /*x*/,
const float* /*attr_first*/,
const float* /*attr_second*/,
const idx_t* /*xids*/) {
FAISS_THROW_MSG("add_with_ids_with_two_attribute not implemented for this type of index");
}

size_t Index::remove_ids(const IDSelector& /*sel*/) {
FAISS_THROW_MSG("remove_ids not implemented for this type of index");
return -1;
Expand All @@ -54,6 +115,14 @@ void Index::reconstruct(idx_t, float*) const {
FAISS_THROW_MSG("reconstruct not implemented for this type of index");
}

void Index::reconstruct_one_attribute(idx_t, float*) const {
FAISS_THROW_MSG("reconstruct_one_attribute not implemented for this type of index");
}

void Index::reconstruct_two_attribute(idx_t, float*, float*) const {
FAISS_THROW_MSG("reconstruct_two_attribute not implemented for this type of index");
}

void Index::reconstruct_batch(idx_t n, const idx_t* keys, float* recons) const {
std::mutex exception_mutex;
std::string exception_string;
Expand All @@ -78,6 +147,20 @@ void Index::reconstruct_n(idx_t i0, idx_t ni, float* recons) const {
}
}

void Index::reconstruct_n_one_attribute(idx_t i0, idx_t ni, float* recons_attr) const {
#pragma omp parallel for if (ni > 1000)
for (idx_t i = 0; i < ni; i++) {
reconstruct_one_attribute(i0 + i, recons_attr + i);
}
}

void Index::reconstruct_n_two_attribute(idx_t i0, idx_t ni, float* recons_attr_first, float* recons_attr_second) const {
#pragma omp parallel for if (ni > 1000)
for (idx_t i = 0; i < ni; i++) {
reconstruct_two_attribute(i0 + i, recons_attr_first + i, recons_attr_second + i);
}
}

void Index::search_and_reconstruct(
idx_t n,
const float* x,
Expand Down Expand Up @@ -126,14 +209,47 @@ size_t Index::sa_code_size() const {
FAISS_THROW_MSG("standalone codec not implemented for this type of index");
}

size_t Index::sa_one_attribute_code_size() const {
FAISS_THROW_MSG("standalone codec for one attribute not implemented for this type of index");
}

size_t Index::sa_two_attribute_code_size() const {
FAISS_THROW_MSG("standalone codec for two attribute not implemented for this type of index");
}

void Index::sa_encode(idx_t, const float*, uint8_t*) const {
FAISS_THROW_MSG("standalone codec not implemented for this type of index");
}

void Index::sa_one_attribute_encode(idx_t, const float*, uint8_t*) const {
FAISS_THROW_MSG("standalone codec for one attribute not implemented for this type of index");
}

void Index::sa_two_attribute_encode(idx_t, const float*, const float*, uint8_t*, uint8_t*) const {
FAISS_THROW_MSG("standalone codec for two attribute not implemented for this type of index");
}

void Index::sa_decode(idx_t, const uint8_t*, float*) const {
FAISS_THROW_MSG("standalone codec not implemented for this type of index");
}

void Index::sa_one_attribute_decode(idx_t, const uint8_t*, float*) const {
FAISS_THROW_MSG("standalone codec for one attribute not implemented for this type of index");
}

void Index::sa_two_attribute_decode(idx_t, const uint8_t*, const uint8_t*, float*, float*) const {
FAISS_THROW_MSG("standalone codec for two attribute not implemented for this type of index");
}

void Index::set_is_include_one_attribute() {
FAISS_THROW_MSG("set_is_include_one_attribute not implemented for this type of index");
}

void Index::set_is_include_two_attribute() {
FAISS_THROW_MSG("set_is_include_two_attribute not implemented for this type of index");
}


namespace {

// storage that explicitly reconstructs vectors before computing distances
Expand Down Expand Up @@ -182,4 +298,4 @@ void Index::check_compatible_for_merge(const Index& /* otherIndex */) const {
FAISS_THROW_MSG("check_compatible_for_merge() not implemented");
}

} // namespace faiss
} // namespace faiss
57 changes: 49 additions & 8 deletions faiss/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <typeinfo>

#define FAISS_VERSION_MAJOR 1
#define FAISS_VERSION_MINOR 9
#define FAISS_VERSION_MINOR 8
#define FAISS_VERSION_PATCH 0

// Macro to combine the version components into a single string
Expand Down Expand Up @@ -116,6 +116,8 @@ struct Index {
* @param x input matrix, size n * d
*/
virtual void add(idx_t n, const float* x) = 0;
virtual void add_with_one_attribute(idx_t n, const float* x, const float* attr);
virtual void add_with_two_attribute(idx_t n, const float* x, const float* attr_first, const float* attr_second);

/** Same as add, but stores xids instead of sequential ids.
*
Expand All @@ -127,6 +129,8 @@ struct Index {
* @param xids if non-null, ids to store for the vectors (size n)
*/
virtual void add_with_ids(idx_t n, const float* x, const idx_t* xids);
virtual void add_with_ids_with_one_attribute(idx_t n, const float* x, const float* attr, const idx_t* xids);
virtual void add_with_ids_with_two_attribute(idx_t n, const float* x, const float* attr_first, const float* attr_second, const idx_t* xids);

/** query n vectors of dimension d to the index.
*
Expand All @@ -147,6 +151,32 @@ struct Index {
idx_t* labels,
const SearchParameters* params = nullptr) const = 0;

virtual void search_with_one_attribute(
idx_t n,
const float* x,
const float lower_attribute,
const float upper_attribute,
idx_t k,
float* distances,
idx_t* labels,
float* out_attrs,
const SearchParameters* params = nullptr) const;


virtual void search_with_two_attribute(
idx_t n,
const float* x,
const float lower_attribute_first,
const float upper_attribute_first,
const float lower_attribute_second,
const float upper_attribute_second,
idx_t k,
float* distances,
idx_t* labels,
float* out_attrs_first,
float* out_attrs_second,
const SearchParameters* params = nullptr) const;

/** query n vectors of dimension d to the index.
*
* return all vectors with distance < radius. Note that many
Expand Down Expand Up @@ -174,8 +204,7 @@ struct Index {
* @param labels output labels of the NNs, size n*k
* @param k number of nearest neighbours
*/
virtual void assign(idx_t n, const float* x, idx_t* labels, idx_t k = 1)
const;
virtual void assign(idx_t n, const float* x, idx_t* labels, idx_t k = 1) const;

/// removes all elements from the database.
virtual void reset() = 0;
Expand All @@ -192,6 +221,8 @@ struct Index {
* @param recons reconstucted vector (size d)
*/
virtual void reconstruct(idx_t key, float* recons) const;
virtual void reconstruct_one_attribute(idx_t key, float* recons_attr) const;
virtual void reconstruct_two_attribute(idx_t key, float* recons_attr_first, float* recons_attr_second) const;

/** Reconstruct several stored vectors (or an approximation if lossy
* coding)
Expand All @@ -201,8 +232,7 @@ struct Index {
* @param keys ids of the vectors to reconstruct (size n)
* @param recons reconstucted vector (size n * d)
*/
virtual void reconstruct_batch(idx_t n, const idx_t* keys, float* recons)
const;
virtual void reconstruct_batch(idx_t n, const idx_t* keys, float* recons) const;

/** Reconstruct vectors i0 to i0 + ni - 1
*
Expand All @@ -212,6 +242,8 @@ struct Index {
* @param recons reconstucted vector (size ni * d)
*/
virtual void reconstruct_n(idx_t i0, idx_t ni, float* recons) const;
virtual void reconstruct_n_one_attribute(idx_t i0, idx_t ni, float* recons_attr) const;
virtual void reconstruct_n_two_attribute(idx_t i0, idx_t ni, float* recons_attr_first, float* recons_attr_second) const;

/** Similar to search, but also reconstructs the stored vectors (or an
* approximation in the case of lossy coding) for the search results.
Expand Down Expand Up @@ -246,8 +278,7 @@ struct Index {
* @param residual output residual vector, size d
* @param key encoded index, as returned by search and assign
*/
virtual void compute_residual(const float* x, float* residual, idx_t key)
const;
virtual void compute_residual(const float* x, float* residual, idx_t key) const;

/** Computes a residual vector after indexing encoding (batch form).
* Equivalent to calling compute_residual for each vector.
Expand Down Expand Up @@ -280,6 +311,8 @@ struct Index {

/** size of the produced codes in bytes */
virtual size_t sa_code_size() const;
virtual size_t sa_one_attribute_code_size() const;
virtual size_t sa_two_attribute_code_size() const;

/** encode a set of vectors
*
Expand All @@ -288,6 +321,9 @@ struct Index {
* @param bytes output encoded vectors, size n * sa_code_size()
*/
virtual void sa_encode(idx_t n, const float* x, uint8_t* bytes) const;
virtual void sa_one_attribute_encode(idx_t n, const float* attr, uint8_t* bytes) const;
virtual void sa_two_attribute_encode(idx_t n, const float* attr_first, const float* attr_second,
uint8_t* bytes_first, uint8_t* bytes_second) const;

/** decode a set of vectors
*
Expand All @@ -296,6 +332,9 @@ struct Index {
* @param x output vectors, size n * d
*/
virtual void sa_decode(idx_t n, const uint8_t* bytes, float* x) const;
virtual void sa_one_attribute_decode(idx_t n, const uint8_t* bytes, float* attr) const;
virtual void sa_two_attribute_decode(idx_t n, const uint8_t* bytes_first, const uint8_t* bytes_second,
float* attr_first, float* attr_second) const;

/** moves the entries from another dataset to self.
* On output, other is empty.
Expand All @@ -307,8 +346,10 @@ struct Index {
* trained in the same way and have the same
* parameters). Otherwise throw. */
virtual void check_compatible_for_merge(const Index& otherIndex) const;
virtual void set_is_include_one_attribute();
virtual void set_is_include_two_attribute();
};

} // namespace faiss

#endif
#endif
Loading

0 comments on commit 5d6a6bc

Please sign in to comment.