Skip to content

Commit

Permalink
Fix doc ord bug & flush writer multiple times
Browse files Browse the repository at this point in the history
  • Loading branch information
dungba88 committed Nov 21, 2024
1 parent b67637a commit d9f331f
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
int docId = iterator.docID();
float[] vectorValue = floatVectorValues.vectorValue(docId);
float score = comparer.compare(vectorValue, target);
queue.insertWithOverflow(new ScoreDoc(docId, score));
queue.insertWithOverflow(new ScoreDoc(leaf.docBase + docId, score));
}
}
int i = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.lucene.search;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
Expand Down Expand Up @@ -46,7 +47,6 @@ public class TestRerankKnnFloatVectorQuery extends LuceneTestCase {
VectorSimilarityFunction.COSINE;
private Directory directory;
private IndexWriterConfig config;
private static final int NUM_VECTORS = 1000;
private static final int VECTOR_DIMENSION = 128;

@Before
Expand All @@ -66,15 +66,22 @@ public void testTwoPhaseKnnVectorQuery() throws Exception {

Random random = random();

int numVectors = atLeast(1000);

// Step 1: Index random vectors in quantized format
try (IndexWriter writer = new IndexWriter(directory, config)) {
for (int i = 0; i < NUM_VECTORS; i++) {
for (int i = 0; i < numVectors; i++) {
float[] vector = randomFloatVector(VECTOR_DIMENSION, random);
Document doc = new Document();
doc.add(new IntField("id", i, Field.Store.YES));
doc.add(new KnnFloatVectorField(FIELD, vector, VECTOR_SIMILARITY_FUNCTION));
writer.addDocument(doc);
vectors.put(i, vector);

// flush to create multiple segments
if (random.nextInt(10) == 0) {
writer.flush();
}
}
}

Expand All @@ -93,10 +100,11 @@ public void testTwoPhaseKnnVectorQuery() throws Exception {
// Step 3: Verify that TopDocs scores match similarity with unquantized vectors
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Document retrievedDoc = searcher.storedFields().document(scoreDoc.doc);
float[] docVector = vectors.get(retrievedDoc.getField("id").numericValue().intValue());
int id = retrievedDoc.getField("id").numericValue().intValue();
float[] docVector = vectors.get(id);
float expectedScore = VECTOR_SIMILARITY_FUNCTION.compare(targetVector, docVector);
Assert.assertEquals(
"Score does not match expected similarity for docId: " + scoreDoc.doc,
"Score does not match expected similarity for doc ord: " + scoreDoc.doc + ", id: " + id,
expectedScore,
scoreDoc.score,
1e-5);
Expand Down

0 comments on commit d9f331f

Please sign in to comment.