Skip to content

Commit

Permalink
Update settings from boolean to threshold
Browse files Browse the repository at this point in the history
If threshold is less than zero, no graph will be built, on every other value
graph will only be build if number of docs in segment is greater than the threshold.

Signed-off-by: Vijayan Balasubramanian <[email protected]>
  • Loading branch information
VijayanB committed Sep 4, 2024
1 parent 530dda8 commit db80d04
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 52 deletions.
14 changes: 7 additions & 7 deletions src/main/java/org/opensearch/knn/index/KNNSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public class KNNSettings {
* Settings name
*/
public static final String KNN_SPACE_TYPE = "index.knn.space_type";
public static final String KNN_BUILD_VECTOR_DATA_STRUCTURE = "index.knn.build_vector_data_structure";
public static final String INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD = "index.knn.build_vector_data_structure_threshold";
public static final String KNN_ALGO_PARAM_M = "index.knn.algo_param.m";
public static final String KNN_ALGO_PARAM_EF_CONSTRUCTION = "index.knn.algo_param.ef_construction";
public static final String KNN_ALGO_PARAM_EF_SEARCH = "index.knn.algo_param.ef_search";
Expand Down Expand Up @@ -98,7 +98,7 @@ public class KNNSettings {
*/
public static final boolean KNN_DEFAULT_FAISS_AVX2_DISABLED_VALUE = false;
public static final String INDEX_KNN_DEFAULT_SPACE_TYPE = "l2";
public static final boolean INDEX_KNN_DEFAULT_BUILD_VECTOR_DATA_STRUCTURE = true;
public static final Integer INDEX_KNN_DEFAULT_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD = 0;
public static final String INDEX_KNN_DEFAULT_SPACE_TYPE_FOR_BINARY = "hamming";
public static final Integer INDEX_KNN_DEFAULT_ALGO_PARAM_M = 16;
public static final Integer INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH = 100;
Expand Down Expand Up @@ -139,12 +139,12 @@ public class KNNSettings {
);

/**
* build_vector_datastructure - This parameter determines whether to build vector data structure for knn fields during indexing
* build_vector_data_structure_threshold - This parameter determines when to build vector data structure for knn fields during indexing
* and merging.
*/
public static final Setting<Boolean> INDEX_KNN_BUILD_VECTOR_DATASTRUCTURE = Setting.boolSetting(
KNN_BUILD_VECTOR_DATA_STRUCTURE,
INDEX_KNN_DEFAULT_BUILD_VECTOR_DATA_STRUCTURE,
public static final Setting<Integer> INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD_SETTING = Setting.intSetting(
INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD,
INDEX_KNN_DEFAULT_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD,
IndexScope,
Dynamic
);
Expand Down Expand Up @@ -480,7 +480,7 @@ private Setting<?> getSetting(String key) {
public List<Setting<?>> getSettings() {
List<Setting<?>> settings = Arrays.asList(
INDEX_KNN_SPACE_TYPE,
INDEX_KNN_BUILD_VECTOR_DATASTRUCTURE,
INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD_SETTING,
INDEX_KNN_ALGO_PARAM_M_SETTING,
INDEX_KNN_ALGO_PARAM_EF_CONSTRUCTION_SETTING,
INDEX_KNN_ALGO_PARAM_EF_SEARCH_SETTING,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,18 +123,22 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) {
return vectorsFormatSupplier.apply(knnVectorsFormatParams);
}

boolean buildVectorDatastructureSetting = getBuildVectorDatastructureSetting(mapperService.get());
int buildVectorDatastructureThreshold = getBuildVectorDatastructureThresholdSetting(mapperService.get());
// All native engines to use NativeEngines990KnnVectorsFormat
return new NativeEngines990KnnVectorsFormat(
new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()),
buildVectorDatastructureSetting
buildVectorDatastructureThreshold
);
}

private boolean getBuildVectorDatastructureSetting(final MapperService knnMapperService) {
IndexSettings indexSettings = knnMapperService.getIndexSettings();
Boolean buildVectorDatastructure = indexSettings.getValue(KNNSettings.INDEX_KNN_BUILD_VECTOR_DATASTRUCTURE);
return buildVectorDatastructure != null ? buildVectorDatastructure : true;
private int getBuildVectorDatastructureThresholdSetting(final MapperService knnMapperService) {
final IndexSettings indexSettings = knnMapperService.getIndexSettings();
final Integer buildVectorDatastructure = indexSettings.getValue(
KNNSettings.INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD_SETTING
);
return buildVectorDatastructure != null
? buildVectorDatastructure
: KNNSettings.INDEX_KNN_DEFAULT_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.opensearch.knn.index.KNNSettings;

import java.io.IOException;

Expand All @@ -30,20 +31,20 @@ public class NativeEngines990KnnVectorsFormat extends KnnVectorsFormat {
/** The format for storing, reading, merging vectors on disk */
private static FlatVectorsFormat flatVectorsFormat;
private static final String FORMAT_NAME = "NativeEngines99KnnVectorsFormat";
private static boolean buildVectorDatastructure;
private static int buildVectorDatastructureThreshold;

public NativeEngines990KnnVectorsFormat() {
this(new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer()));
}

public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat lucene99FlatVectorsFormat) {
this(lucene99FlatVectorsFormat, true);
public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat flatVectorsFormat) {
this(flatVectorsFormat, KNNSettings.INDEX_KNN_DEFAULT_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD);
}

public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat lucene99FlatVectorsFormat, boolean buildVectorDatastructureSetting) {
public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat flatVectorsFormat, int buildVectorDatastructureThreshold) {
super(FORMAT_NAME);
flatVectorsFormat = lucene99FlatVectorsFormat;
buildVectorDatastructure = buildVectorDatastructureSetting;
NativeEngines990KnnVectorsFormat.flatVectorsFormat = flatVectorsFormat;
NativeEngines990KnnVectorsFormat.buildVectorDatastructureThreshold = buildVectorDatastructureThreshold;
}

/**
Expand All @@ -53,7 +54,7 @@ public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat lucene99FlatVect
*/
@Override
public KnnVectorsWriter fieldsWriter(final SegmentWriteState state) throws IOException {
return new NativeEngines990KnnVectorsWriter(state, flatVectorsFormat.fieldsWriter(state), buildVectorDatastructure);
return new NativeEngines990KnnVectorsWriter(state, flatVectorsFormat.fieldsWriter(state), buildVectorDatastructureThreshold);
}

/**
Expand All @@ -72,8 +73,8 @@ public String toString() {
+ this.getClass().getSimpleName()
+ ", flatVectorsFormat="
+ flatVectorsFormat
+ ", buildVectorDatastructure"
+ buildVectorDatastructure
+ ", buildVectorDatastructureThreshold"
+ buildVectorDatastructureThreshold
+ ")";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

package org.opensearch.knn.index.codec.KNN990Codec;

import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.apache.lucene.codecs.KnnFieldVectorsWriter;
Expand Down Expand Up @@ -48,7 +49,8 @@ public class NativeEngines990KnnVectorsWriter extends KnnVectorsWriter {
private final SegmentWriteState segmentWriteState;
private final FlatVectorsWriter flatVectorsWriter;
private final List<NativeEngineFieldVectorsWriter<?>> fields = new ArrayList<>();
private final boolean buildVectorDataStructure;
@NonNull
private final Integer buildVectorDataStructureThreshold;
private boolean finished;
private final QuantizationService quantizationService = QuantizationService.getInstance();

Expand All @@ -74,10 +76,6 @@ public KnnFieldVectorsWriter<?> addField(final FieldInfo fieldInfo) throws IOExc
@Override
public void flush(int maxDoc, final Sorter.DocMap sortMap) throws IOException {
flatVectorsWriter.flush(maxDoc, sortMap);
if (!buildVectorDataStructure) {
log.info("Skip building vector data structure");
return;
}
for (final NativeEngineFieldVectorsWriter<?> field : fields) {
trainAndIndex(
field.getFieldInfo(),
Expand All @@ -92,10 +90,6 @@ public void flush(int maxDoc, final Sorter.DocMap sortMap) throws IOException {
public void mergeOneField(final FieldInfo fieldInfo, final MergeState mergeState) throws IOException {
// This will ensure that we are merging the FlatIndex during force merge.
flatVectorsWriter.mergeOneField(fieldInfo, mergeState);
if (!buildVectorDataStructure) {
log.info("Skip building vector data structure");
return;
}
// For merge, pick values from flat vector and reindex again. This will use the flush operation to create graphs
trainAndIndex(fieldInfo, this::getKNNVectorValuesForMerge, NativeIndexWriter::mergeIndex, mergeState);

Expand Down Expand Up @@ -237,6 +231,16 @@ private <T, C> void trainAndIndex(
: NativeIndexWriter.getWriter(fieldInfo, segmentWriteState);

knnVectorValues = vectorValuesRetriever.apply(vectorDataType, fieldInfo, VectorProcessingContext);
if (shouldSkipBuildingVectorDataStructure(knnVectorValues.totalLiveDocs())) {
return;
}
indexOperation.buildAndWrite(writer, knnVectorValues);
}

private boolean shouldSkipBuildingVectorDataStructure(final long docCount) {
if (buildVectorDataStructureThreshold < 0) {
return true;
}
return docCount < buildVectorDataStructureThreshold;
}
}
58 changes: 40 additions & 18 deletions src/test/java/org/opensearch/knn/index/OpenSearchIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -484,48 +484,70 @@ public void testIndexingVectorValidation_updateVectorWithNull() throws Exception
assertArrayEquals(vectorForDocumentOne, vectorRestoreInitialValue);
}

public void testKNNIndex_whenBuildGraphParamIsPresent_thenSuccess() throws Exception {
final boolean buildVectorDataStructure = randomBoolean();
final Settings settings = Settings.builder().put(buildKNNIndexSettings(buildVectorDataStructure)).build();
public void testKNNIndex_whenBuildGraphThresholdIsPresent_thenGetThresholdValue() throws Exception {
final Integer buildVectorDataStructureThreshold = randomInt();
final Settings settings = Settings.builder().put(buildKNNIndexSettings(buildVectorDataStructureThreshold)).build();
final String knnIndexMapping = createKnnIndexMapping(FIELD_NAME, KNNEngine.getMaxDimensionByEngine(KNNEngine.DEFAULT));
final String indexName = "test-index-with-build-graph-settings";
createKnnIndex(indexName, settings, knnIndexMapping);
final String buildVectorDataStructureSetting = getIndexSettingByName(indexName, KNNSettings.KNN_BUILD_VECTOR_DATA_STRUCTURE);
assertNotNull("build_vector_data_structure index setting is not found", buildVectorDataStructureSetting);
assertEquals("incorrect setting for build_graph", buildVectorDataStructure, Boolean.valueOf(buildVectorDataStructureSetting));
final String buildVectorDataStructureThresholdSetting = getIndexSettingByName(
indexName,
KNNSettings.INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD
);
assertNotNull("build_vector_data_structure_threshold index setting is not found", buildVectorDataStructureThresholdSetting);
assertEquals(
"incorrect setting for build_vector_data_structure_threshold",
buildVectorDataStructureThreshold,
Integer.valueOf(buildVectorDataStructureThresholdSetting)
);
deleteKNNIndex(indexName);
}

public void testKNNIndex_whenBuildGraphParamIsNotAdded_thenShouldNotReturnSetting() throws Exception {
public void testKNNIndex_whenBuildThresholdIsNotProvided_thenShouldNotReturnSetting() throws Exception {
final String knnIndexMapping = createKnnIndexMapping(FIELD_NAME, KNNEngine.getMaxDimensionByEngine(KNNEngine.DEFAULT));
final String indexName = "test-index-with-build-graph-settings";
createKnnIndex(indexName, knnIndexMapping);
final String buildVectorDataStructureSetting = getIndexSettingByName(indexName, KNNSettings.KNN_BUILD_VECTOR_DATA_STRUCTURE);
assertNull("build_vector_data_structure index setting should not be added in index setting", buildVectorDataStructureSetting);
final String buildVectorDataStructureThresholdSetting = getIndexSettingByName(
indexName,
KNNSettings.INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD
);
assertNull(
"build_vector_data_structure_threshold index setting should not be added in index setting",
buildVectorDataStructureThresholdSetting
);
deleteKNNIndex(indexName);
}

public void testKNNIndex_whenGetIndexSettingIsCalled_thenBuildGraphEnabledAsDefault() throws Exception {
public void testKNNIndex_whenGetIndexSettingWithDefaultIsCalled_thenReturnDefaultBuildGraphThresholdValue() throws Exception {
final String knnIndexMapping = createKnnIndexMapping(FIELD_NAME, KNNEngine.getMaxDimensionByEngine(KNNEngine.DEFAULT));
final String indexName = "test-index-with-build-vector-graph-settings";
createKnnIndex(indexName, knnIndexMapping);
final String buildVectorDataStructureSetting = getIndexSettingByName(indexName, KNNSettings.KNN_BUILD_VECTOR_DATA_STRUCTURE, true);
assertNotNull("build_vector_data_structure index setting is not found", buildVectorDataStructureSetting);
final String buildVectorDataStructureThresholdSetting = getIndexSettingByName(
indexName,
KNNSettings.INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD,
true
);
assertNotNull("build_vector_data_structure index setting is not found", buildVectorDataStructureThresholdSetting);
assertEquals(
"incorrect default setting for build_vector_data_structure",
KNNSettings.INDEX_KNN_DEFAULT_BUILD_VECTOR_DATA_STRUCTURE,
Boolean.valueOf(buildVectorDataStructureSetting)
"incorrect default setting for build_vector_data_structure_threshold",
KNNSettings.INDEX_KNN_DEFAULT_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD,
Integer.valueOf(buildVectorDataStructureThresholdSetting)
);
deleteKNNIndex(indexName);
}

public void testKNNIndex_whenDisablingBuildGraphParameter_thenNoSearchResults() throws Exception {
/*
For this testcase, we will create index with setting build_vector_data_structure_threshold as -1, then index few documents, perform knn search,
then, confirm no hits since there are no graph. In next step, update setting to 0, force merge segment to 1, perform knn search and confirm expected
hits are returned.
*/
public void testKNNIndex_whenBuildVectorGraphThresholdIsProvidedEndToEnd_thenBuildGraphBasedOnSetting() throws Exception {
final String indexName = "test-index-1";
final String fieldName1 = "test-field-1";
final String fieldName2 = "test-field-2";

final Integer dimension = testData.indexData.vectors[0].length;
final Settings knnIndexSettings = buildKNNIndexSettings(false);
final Settings knnIndexSettings = buildKNNIndexSettings(-1);

// Create an index
final XContentBuilder builder = XContentFactory.jsonBuilder()
Expand Down Expand Up @@ -580,7 +602,7 @@ public void testKNNIndex_whenDisablingBuildGraphParameter_thenNoSearchResults()
assertEquals("unexpected neighbors are returned", 0, faissNeighbors.size());

// update build vector data structure setting
updateIndexSettings(indexName, Settings.builder().put(KNNSettings.KNN_BUILD_VECTOR_DATA_STRUCTURE, true));
updateIndexSettings(indexName, Settings.builder().put(KNNSettings.INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD, 0));
forceMergeKnnIndex(indexName, 1);

final int k = 10;
Expand Down
6 changes: 3 additions & 3 deletions src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@
import static org.opensearch.knn.TestUtils.computeGroundTruthValues;

import static org.opensearch.knn.common.KNNConstants.VECTOR_DATA_TYPE_FIELD;
import static org.opensearch.knn.index.KNNSettings.KNN_BUILD_VECTOR_DATA_STRUCTURE;
import static org.opensearch.knn.index.KNNSettings.INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD;
import static org.opensearch.knn.index.KNNSettings.KNN_INDEX;
import static org.opensearch.knn.index.SpaceType.L2;
import static org.opensearch.knn.index.memory.NativeMemoryCacheManager.GRAPH_COUNT;
Expand Down Expand Up @@ -763,12 +763,12 @@ protected Settings getKNNSegmentReplicatedIndexSettings() {
.build();
}

protected Settings buildKNNIndexSettings(boolean buildVectorDatastructure) {
protected Settings buildKNNIndexSettings(int buildVectorDatastructureThreshold) {
return Settings.builder()
.put("number_of_shards", 1)
.put("number_of_replicas", 0)
.put(KNN_INDEX, true)
.put(KNN_BUILD_VECTOR_DATA_STRUCTURE, buildVectorDatastructure)
.put(INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD, buildVectorDatastructureThreshold)
.build();
}

Expand Down

0 comments on commit db80d04

Please sign in to comment.