Skip to content

Commit

Permalink
Add support for radial search in exact search
Browse files Browse the repository at this point in the history
When threshold value is set, knn plugin will not be creating graph.
Hence, when search request is trigged during that time, exact search
will return valid results. However, radial search was never included
as part of exact search. This will break radial search when threshold
is added and radial search is requested. In this commit, new method
is introduced to accept min score and return documents that are greater
than min score, similar to how radial search is performed by native
engines. This search is independent of engine, but, radial search is
supported only for FAISS engine out of all native engines.

Signed-off-by: Vijayan Balasubramanian <[email protected]>
  • Loading branch information
VijayanB committed Oct 1, 2024
1 parent 146eeee commit cbd28c4
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public class ExactSearcher {
public Map<Integer, Float> searchLeaf(final LeafReaderContext leafReaderContext, final ExactSearcherContext exactSearcherContext)
throws IOException {
KNNIterator iterator = getKNNIterator(leafReaderContext, exactSearcherContext);
if (exactSearcherContext.getRadius() != null) {
if (exactSearcherContext.getKnnQuery().getRadius() != null) {
return doRadialSearch(leafReaderContext, exactSearcherContext, iterator);
}
if (exactSearcherContext.getMatchedDocs() != null
Expand Down Expand Up @@ -258,7 +258,6 @@ public static class ExactSearcherContext {
*/
boolean useQuantizedVectorsForSearch;
int k;
Float radius;
BitSet matchedDocs;
KNNQuery knnQuery;
/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,8 @@ private int[] bitSetToIntArray(final BitSet bitSet) {

private Map<Integer, Float> doExactSearch(final LeafReaderContext context, final BitSet acceptedDocs, int k) throws IOException {
final ExactSearcherContextBuilder exactSearcherContextBuilder = ExactSearcher.ExactSearcherContext.builder()
.k(k)
.isParentHits(true)
.k(k)
// setting to true, so that if quantization details are present we want to do search on the quantized
// vectors as this flow is used in first pass of search.
.useQuantizedVectorsForSearch(true)
Expand Down
79 changes: 79 additions & 0 deletions src/test/java/org/opensearch/knn/index/FaissIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -1708,6 +1708,85 @@ public void testIVF_whenBinaryFormat_whenIVF_thenSuccess() {
validateGraphEviction();
}

@SneakyThrows
public void testEndToEnd_whenDoRadiusSearch_whenNoGraphFileIsCreated_whenDistanceThreshold_thenSucceed() {
SpaceType spaceType = SpaceType.L2;

List<Integer> mValues = ImmutableList.of(16, 32, 64, 128);
List<Integer> efConstructionValues = ImmutableList.of(16, 32, 64, 128);
List<Integer> efSearchValues = ImmutableList.of(16, 32, 64, 128);

Integer dimension = testData.indexData.vectors[0].length;
final Settings knnIndexSettings = buildKNNIndexSettings(-1);

// Create an index
XContentBuilder builder = XContentFactory.jsonBuilder()
.startObject()
.startObject("properties")
.startObject(FIELD_NAME)
.field("type", "knn_vector")
.field("dimension", dimension)
.startObject(KNN_METHOD)
.field(NAME, METHOD_HNSW)
.field(METHOD_PARAMETER_SPACE_TYPE, spaceType.getValue())
.field(KNN_ENGINE, KNNEngine.FAISS.getName())
.startObject(PARAMETERS)
.field(METHOD_PARAMETER_M, mValues.get(random().nextInt(mValues.size())))
.field(METHOD_PARAMETER_EF_CONSTRUCTION, efConstructionValues.get(random().nextInt(efConstructionValues.size())))
.field(KNNConstants.METHOD_PARAMETER_EF_SEARCH, efSearchValues.get(random().nextInt(efSearchValues.size())))
.endObject()
.endObject()
.endObject()
.endObject()
.endObject();
String mapping = builder.toString();
createKnnIndex(INDEX_NAME, knnIndexSettings, mapping);

// Index the test data
for (int i = 0; i < testData.indexData.docs.length; i++) {
addKnnDoc(
INDEX_NAME,
Integer.toString(testData.indexData.docs[i]),
FIELD_NAME,
Floats.asList(testData.indexData.vectors[i]).toArray()
);
}

// Assert we have the right number of documents
refreshAllNonSystemIndices();
assertEquals(testData.indexData.docs.length, getDocCount(INDEX_NAME));

float distance = 300000000000f;
final List<List<KNNResult>> resultsFromDistance = validateRadiusSearchResults(
INDEX_NAME,
FIELD_NAME,
testData.queries,
distance,
null,
spaceType,
null,
null
);
assertFalse(resultsFromDistance.isEmpty());
resultsFromDistance.forEach(result -> { assertFalse(result.isEmpty()); });
float score = spaceType.scoreTranslation(distance);
final List<List<KNNResult>> resultsFromScore = validateRadiusSearchResults(
INDEX_NAME,
FIELD_NAME,
testData.queries,
null,
score,
spaceType,
null,
null
);
assertFalse(resultsFromScore.isEmpty());
resultsFromScore.forEach(result -> { assertFalse(result.isEmpty()); });

// Delete index
deleteKNNIndex(INDEX_NAME);
}

@SneakyThrows
public void testQueryWithFilter_whenNonExistingFieldUsedInFilter_thenSuccessful() {
XContentBuilder builder = XContentFactory.jsonBuilder()
Expand Down
86 changes: 86 additions & 0 deletions src/test/java/org/opensearch/knn/index/OpenSearchIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,92 @@ public void testKNNIndex_whenBuildVectorDataStructureIsLessThanDocCount_thenBuil
deleteKNNIndex(indexName);
}

/*
For this testcase, we will create index with setting build_vector_data_structure_threshold as -1, then index few documents, perform knn search,
then, confirm hits because of exact search though there are no graph. In next step, update setting to 0, force merge segment to 1, perform knn search and confirm expected
hits are returned.
*/
public void testKNNIndex_whenBuildVectorGraphThresholdIsProvidedEndToEnd_thenBuildGraphBasedOnSettingUsingRadialSearch()
throws Exception {
final String indexName = "test-index-1";
final String fieldName1 = "test-field-1";
final String fieldName2 = "test-field-2";

final Integer dimension = testData.indexData.vectors[0].length;
final Settings knnIndexSettings = buildKNNIndexSettings(-1);

// Create an index
final XContentBuilder builder = XContentFactory.jsonBuilder()
.startObject()
.startObject("properties")
.startObject(fieldName1)
.field("type", "knn_vector")
.field("dimension", dimension)
.startObject(KNNConstants.KNN_METHOD)
.field(KNNConstants.NAME, KNNConstants.METHOD_HNSW)
.field(KNNConstants.KNN_ENGINE, KNNEngine.NMSLIB.getName())
.startObject(KNNConstants.PARAMETERS)
.endObject()
.endObject()
.endObject()
.startObject(fieldName2)
.field("type", "knn_vector")
.field("dimension", dimension)
.startObject(KNNConstants.KNN_METHOD)
.field(KNNConstants.NAME, KNNConstants.METHOD_HNSW)
.field(KNNConstants.KNN_ENGINE, KNNEngine.FAISS.getName())
.startObject(KNNConstants.PARAMETERS)
.endObject()
.endObject()
.endObject()
.endObject()
.endObject();

createKnnIndex(indexName, knnIndexSettings, builder.toString());

// Index the test data
for (int i = 0; i < testData.indexData.docs.length; i++) {
addKnnDoc(
indexName,
Integer.toString(testData.indexData.docs[i]),
ImmutableList.of(fieldName1, fieldName2),
ImmutableList.of(
Floats.asList(testData.indexData.vectors[i]).toArray(),
Floats.asList(testData.indexData.vectors[i]).toArray()
)
);
}

refreshAllIndices();
// Assert we have the right number of documents in the index
assertEquals(testData.indexData.docs.length, getDocCount(indexName));

final List<KNNResult> nmslibNeighbors = getResults(indexName, fieldName1, testData.queries[0], 1);
assertEquals("unexpected neighbors are returned", nmslibNeighbors.size(), nmslibNeighbors.size());

final List<KNNResult> faissNeighbors = getResults(indexName, fieldName2, testData.queries[0], 1);
assertEquals("unexpected neighbors are returned", faissNeighbors.size(), faissNeighbors.size());

// update build vector data structure setting
updateIndexSettings(indexName, Settings.builder().put(KNNSettings.INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD, 0));
forceMergeKnnIndex(indexName, 1);

final int k = 10;
for (int i = 0; i < testData.queries.length; i++) {
// Search nmslib field
final Response response = searchKNNIndex(indexName, new KNNQueryBuilder(fieldName1, testData.queries[i], k), k);
final String responseBody = EntityUtils.toString(response.getEntity());
final List<KNNResult> nmslibValidNeighbors = parseSearchResponse(responseBody, fieldName1);
assertEquals(k, nmslibValidNeighbors.size());
// Search faiss field
final List<KNNResult> faissValidNeighbors = getResults(indexName, fieldName2, testData.queries[i], k);
assertEquals(k, faissValidNeighbors.size());
}

// Delete index
deleteKNNIndex(indexName);
}

private List<KNNResult> getResults(final String indexName, final String fieldName, final float[] vector, final int k)
throws IOException, ParseException {
final Response searchResponseField = searchKNNIndex(indexName, new KNNQueryBuilder(fieldName, vector, k), k);
Expand Down
28 changes: 0 additions & 28 deletions src/test/java/org/opensearch/knn/integ/BinaryIndexIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -155,17 +155,6 @@ public void testFaissHnswBinary_whenBuildVectorGraphThresholdIsProvidedEndToEnd_
}
}

@SneakyThrows
public void testFaissHnswBinary_whenRadialSearch_thenThrowException() {
// Create Index
createKnnHnswBinaryIndex(KNNEngine.FAISS, INDEX_NAME, FIELD_NAME, 16);

// Query
float[] queryVector = { (byte) 0b10001111, (byte) 0b10000000 };
Exception e = expectThrows(Exception.class, () -> runRnnQuery(INDEX_NAME, FIELD_NAME, queryVector, 1, 4));
assertTrue(e.getMessage(), e.getMessage().contains("Binary data type does not support radial search"));
}

private float getRecall(final Set<String> truth, final Set<String> result) {
// Count the number of relevant documents retrieved
result.retainAll(truth);
Expand All @@ -178,23 +167,6 @@ private float getRecall(final Set<String> truth, final Set<String> result) {
return (float) relevantRetrieved / totalRelevant;
}

private List<KNNResult> runRnnQuery(
final String indexName,
final String fieldName,
final float[] queryVector,
final float minScore,
final int size
) throws Exception {
String query = KNNJsonQueryBuilder.builder()
.fieldName(fieldName)
.vector(ArrayUtils.toObject(queryVector))
.minScore(minScore)
.build()
.getQueryString();
Response response = searchKNNIndex(indexName, query, size);
return parseSearchResponse(EntityUtils.toString(response.getEntity()), fieldName);
}

private List<KNNResult> runKnnQuery(final String indexName, final String fieldName, final float[] queryVector, final int k)
throws Exception {
String query = KNNJsonQueryBuilder.builder()
Expand Down

0 comments on commit cbd28c4

Please sign in to comment.