Skip to content

Commit

Permalink
KNN80DocValues should only be considered for BinaryDocValues fields
Browse files Browse the repository at this point in the history
Consider adding files from fields that has BinaryDocValues and doesn't
have filter values in producer.

Signed-off-by: Vijayan Balasubramanian <[email protected]>
  • Loading branch information
VijayanB committed Sep 25, 2024
1 parent 189562e commit 2c156b4
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 1 deletion.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Enhancements
* Add short circuit if no live docs are in segments [#2059](https://github.com/opensearch-project/k-NN/pull/2059)
### Bug Fixes
* KNN80DocValues should only be considered for BinaryDocValues fields [#2059](https://github.com/opensearch-project/k-NN/pull/2059)
### Infrastructure
### Documentation
### Maintenance
* Remove benchmarks folder from k-NN repo [#2127](https://github.com/opensearch-project/k-NN/pull/2127)
* Remove benchmarks folder from k-NN repo [#2147](https://github.com/opensearch-project/k-NN/pull/2147)
### Refactoring
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import lombok.extern.log4j.Log4j2;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
Expand Down Expand Up @@ -69,6 +70,13 @@ public KNN80DocValuesProducer(DocValuesProducer delegate, SegmentReadState state
if (!field.attributes().containsKey(KNN_FIELD)) {
continue;
}
// Only segments that contains BinaryDocValues and doesn't have vector values should be considered.
// By default, we don't create BinaryDocValues for knn field anymore. However, users can set doc_values = true
// to create binary doc values explicitly like any other field. Hence, we only want to include fields
// where approximate search is possible only by BinaryDocValues.
if (field.getDocValuesType() != DocValuesType.BINARY || field.hasVectorValues() == true) {
continue;
}
// Only Native Engine put into indexPathMap
KNNEngine knnEngine = getNativeKNNEngine(field);
if (knnEngine == null) {
Expand All @@ -77,6 +85,7 @@ public KNN80DocValuesProducer(DocValuesProducer delegate, SegmentReadState state
List<String> engineFiles = KNNCodecUtil.getEngineFiles(knnEngine.getExtension(), field.name, state.segmentInfo);
Path indexPath = PathUtils.get(directoryPath, engineFiles.get(0));
indexPathMap.putIfAbsent(field.getName(), indexPath.toString());

}
}

Expand Down
4 changes: 4 additions & 0 deletions src/test/java/org/opensearch/knn/index/OpenSearchIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import java.util.Locale;
import lombok.SneakyThrows;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.opensearch.knn.KNNRestTestCase;
import org.opensearch.knn.KNNResult;
import org.apache.hc.core5.http.io.entity.EntityUtils;
Expand Down Expand Up @@ -483,6 +484,9 @@ public void testIndexingVectorValidation_updateVectorWithNull() throws Exception
assertArrayEquals(vectorForDocumentOne, vectorRestoreInitialValue);
}

// This doesn't work since indices that are created post 2.17 don't evict by default when indices are closed or deleted.
// Enable this PR once https://github.com/opensearch-project/k-NN/issues/2148 is resolved.
@Ignore
public void testCacheClear_whenCloseIndex() throws Exception {
String indexName = "test-index-1";
KNNEngine knnEngine1 = KNNEngine.NMSLIB;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
Expand Down Expand Up @@ -127,4 +128,57 @@ public void testProduceKNNBinaryField_fromCodec_nmslibCurrent() throws IOExcepti
assertTrue(path.contains(segmentFiles.get(0)));
}

public void testProduceKNNBinaryField_whenFieldHasNonBinaryDocValues_thenSkipThoseField() throws IOException {
// Set information about the segment and the fields
DocValuesFormat mockDocValuesFormat = mock(DocValuesFormat.class);
Codec mockDelegateCodec = mock(Codec.class);
DocValuesProducer mockDocValuesProducer = mock(DocValuesProducer.class);
when(mockDelegateCodec.docValuesFormat()).thenReturn(mockDocValuesFormat);
when(mockDocValuesFormat.fieldsProducer(any())).thenReturn(mockDocValuesProducer);
when(mockDocValuesFormat.getName()).thenReturn("mockDocValuesFormat");
Codec codec = new KNN87Codec(mockDelegateCodec);

String segmentName = "_test";
int docsInSegment = 100;
String fieldName1 = String.format("test_field1%s", randomAlphaOfLength(4));
String fieldName2 = String.format("test_field2%s", randomAlphaOfLength(4));
List<String> segmentFiles = Arrays.asList(
String.format("%s_2011_%s%s", segmentName, fieldName1, KNNEngine.NMSLIB.getExtension()),
String.format("%s_165_%s%s", segmentName, fieldName2, KNNEngine.FAISS.getExtension())
);

KNNEngine knnEngine = KNNEngine.NMSLIB;
SpaceType spaceType = SpaceType.COSINESIMIL;
SegmentInfo segmentInfo = KNNCodecTestUtil.segmentInfoBuilder()
.directory(directory)
.segmentName(segmentName)
.docsInSegment(docsInSegment)
.codec(codec)
.build();

for (String name : segmentFiles) {
IndexOutput indexOutput = directory.createOutput(name, IOContext.DEFAULT);
indexOutput.close();
}
segmentInfo.setFiles(segmentFiles);

FieldInfo[] fieldInfoArray = new FieldInfo[] {
KNNCodecTestUtil.FieldInfoBuilder.builder(fieldName1)
.addAttribute(KNNVectorFieldMapper.KNN_FIELD, "true")
.addAttribute(KNNConstants.KNN_ENGINE, knnEngine.getName())
.addAttribute(KNNConstants.SPACE_TYPE, spaceType.getValue())
.docValuesType(DocValuesType.NONE)
.dvGen(-1)
.build() };

FieldInfos fieldInfos = new FieldInfos(fieldInfoArray);
SegmentReadState state = new SegmentReadState(directory, segmentInfo, fieldInfos, IOContext.DEFAULT);

DocValuesFormat docValuesFormat = codec.docValuesFormat();
assertTrue(docValuesFormat instanceof KNN80DocValuesFormat);
DocValuesProducer producer = docValuesFormat.fieldsProducer(state);
assertTrue(producer instanceof KNN80DocValuesProducer);
assertEquals(0, ((KNN80DocValuesProducer) producer).getOpenedIndexPath().size());
}

}

0 comments on commit 2c156b4

Please sign in to comment.