Skip to content

Commit

Permalink
Address feedback
Browse files Browse the repository at this point in the history
Signed-off-by: Vijayan Balasubramanian <[email protected]>
  • Loading branch information
VijayanB committed Aug 28, 2024
1 parent 5453a7b commit 530dda8
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 49 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
* Add support for byte vector with Faiss Engine HNSW algorithm [#1823](https://github.com/opensearch-project/k-NN/pull/1823)
### Enhancements
* Adds iterative graph build capability into a faiss index to improve the memory footprint during indexing and Integrates KNNVectorsFormat for native engines[#1950](https://github.com/opensearch-project/k-NN/pull/1950)
* Introduce new setting to configure whether to build graph or not during segment creation [#2007](https://github.com/opensearch-project/k-NN/pull/2007)
* Introduce new setting to configure whether to build vector data structure or not during segment creation [#2007](https://github.com/opensearch-project/k-NN/pull/2007)
### Bug Fixes
* Corrected search logic for scenario with non-existent fields in filter [#1874](https://github.com/opensearch-project/k-NN/pull/1874)
* Add script_fields context to KNNAllowlist [#1917] (https://github.com/opensearch-project/k-NN/pull/1917)
Expand Down
16 changes: 10 additions & 6 deletions src/main/java/org/opensearch/knn/index/KNNSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public class KNNSettings {
* Settings name
*/
public static final String KNN_SPACE_TYPE = "index.knn.space_type";
public static final String KNN_BUILD_GRAPH = "index.knn.build_graph";
public static final String KNN_BUILD_VECTOR_DATA_STRUCTURE = "index.knn.build_vector_data_structure";
public static final String KNN_ALGO_PARAM_M = "index.knn.algo_param.m";
public static final String KNN_ALGO_PARAM_EF_CONSTRUCTION = "index.knn.algo_param.ef_construction";
public static final String KNN_ALGO_PARAM_EF_SEARCH = "index.knn.algo_param.ef_search";
Expand Down Expand Up @@ -98,7 +98,7 @@ public class KNNSettings {
*/
public static final boolean KNN_DEFAULT_FAISS_AVX2_DISABLED_VALUE = false;
public static final String INDEX_KNN_DEFAULT_SPACE_TYPE = "l2";
public static final boolean INDEX_KNN_DEFAULT_BUILD_GRAPH = true;
public static final boolean INDEX_KNN_DEFAULT_BUILD_VECTOR_DATA_STRUCTURE = true;
public static final String INDEX_KNN_DEFAULT_SPACE_TYPE_FOR_BINARY = "hamming";
public static final Integer INDEX_KNN_DEFAULT_ALGO_PARAM_M = 16;
public static final Integer INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH = 100;
Expand Down Expand Up @@ -138,9 +138,13 @@ public class KNNSettings {
Setting.Property.Deprecated
);

public static final Setting<Boolean> INDEX_KNN_BUILD_GRAPH_SETTING = Setting.boolSetting(
KNN_BUILD_GRAPH,
INDEX_KNN_DEFAULT_BUILD_GRAPH,
/**
* build_vector_datastructure - This parameter determines whether to build vector data structure for knn fields during indexing
* and merging.
*/
public static final Setting<Boolean> INDEX_KNN_BUILD_VECTOR_DATASTRUCTURE = Setting.boolSetting(
KNN_BUILD_VECTOR_DATA_STRUCTURE,
INDEX_KNN_DEFAULT_BUILD_VECTOR_DATA_STRUCTURE,
IndexScope,
Dynamic
);
Expand Down Expand Up @@ -476,7 +480,7 @@ private Setting<?> getSetting(String key) {
public List<Setting<?>> getSettings() {
List<Setting<?>> settings = Arrays.asList(
INDEX_KNN_SPACE_TYPE,
INDEX_KNN_BUILD_GRAPH_SETTING,
INDEX_KNN_BUILD_VECTOR_DATASTRUCTURE,
INDEX_KNN_ALGO_PARAM_M_SETTING,
INDEX_KNN_ALGO_PARAM_EF_CONSTRUCTION_SETTING,
INDEX_KNN_ALGO_PARAM_EF_SEARCH_SETTING,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,22 +123,18 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) {
return vectorsFormatSupplier.apply(knnVectorsFormatParams);
}

boolean buildGraph = getBuildGraphSetting();
boolean buildVectorDatastructureSetting = getBuildVectorDatastructureSetting(mapperService.get());
// All native engines to use NativeEngines990KnnVectorsFormat
return new NativeEngines990KnnVectorsFormat(
new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()),
buildGraph
buildVectorDatastructureSetting
);
}

private boolean getBuildGraphSetting() {
// Default should be true, only explicit false should disable building graph for
// native engine
if (mapperService.isEmpty()) {
return true;
}
IndexSettings indexSettings = mapperService.get().getIndexSettings();
return indexSettings.getValue(KNNSettings.INDEX_KNN_BUILD_GRAPH_SETTING);
private boolean getBuildVectorDatastructureSetting(final MapperService knnMapperService) {
IndexSettings indexSettings = knnMapperService.getIndexSettings();
Boolean buildVectorDatastructure = indexSettings.getValue(KNNSettings.INDEX_KNN_BUILD_VECTOR_DATASTRUCTURE);
return buildVectorDatastructure != null ? buildVectorDatastructure : true;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class NativeEngines990KnnVectorsFormat extends KnnVectorsFormat {
/** The format for storing, reading, merging vectors on disk */
private static FlatVectorsFormat flatVectorsFormat;
private static final String FORMAT_NAME = "NativeEngines99KnnVectorsFormat";
private static boolean buildGraph;
private static boolean buildVectorDatastructure;

public NativeEngines990KnnVectorsFormat() {
this(new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer()));
Expand All @@ -40,10 +40,10 @@ public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat lucene99FlatVect
this(lucene99FlatVectorsFormat, true);
}

public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat lucene99FlatVectorsFormat, boolean buildGraphSetting) {
public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat lucene99FlatVectorsFormat, boolean buildVectorDatastructureSetting) {
super(FORMAT_NAME);
flatVectorsFormat = lucene99FlatVectorsFormat;
buildGraph = buildGraphSetting;
buildVectorDatastructure = buildVectorDatastructureSetting;
}

/**
Expand All @@ -53,7 +53,7 @@ public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat lucene99FlatVect
*/
@Override
public KnnVectorsWriter fieldsWriter(final SegmentWriteState state) throws IOException {
return new NativeEngines990KnnVectorsWriter(state, flatVectorsFormat.fieldsWriter(state), buildGraph);
return new NativeEngines990KnnVectorsWriter(state, flatVectorsFormat.fieldsWriter(state), buildVectorDatastructure);
}

/**
Expand All @@ -68,6 +68,12 @@ public KnnVectorsReader fieldsReader(final SegmentReadState state) throws IOExce

@Override
public String toString() {
return "NativeEngines99KnnVectorsFormat(name=" + this.getClass().getSimpleName() + ", flatVectorsFormat=" + flatVectorsFormat + ")";
return "NativeEngines99KnnVectorsFormat(name="
+ this.getClass().getSimpleName()
+ ", flatVectorsFormat="
+ flatVectorsFormat
+ ", buildVectorDatastructure"
+ buildVectorDatastructure
+ ")";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public class NativeEngines990KnnVectorsWriter extends KnnVectorsWriter {
private final SegmentWriteState segmentWriteState;
private final FlatVectorsWriter flatVectorsWriter;
private final List<NativeEngineFieldVectorsWriter<?>> fields = new ArrayList<>();
private final boolean buildGraph;
private final boolean buildVectorDataStructure;
private boolean finished;
private final QuantizationService quantizationService = QuantizationService.getInstance();

Expand All @@ -74,7 +74,8 @@ public KnnFieldVectorsWriter<?> addField(final FieldInfo fieldInfo) throws IOExc
@Override
public void flush(int maxDoc, final Sorter.DocMap sortMap) throws IOException {
flatVectorsWriter.flush(maxDoc, sortMap);
if (!buildGraph) {
if (!buildVectorDataStructure) {
log.info("Skip building vector data structure");
return;
}
for (final NativeEngineFieldVectorsWriter<?> field : fields) {
Expand All @@ -91,7 +92,8 @@ public void flush(int maxDoc, final Sorter.DocMap sortMap) throws IOException {
public void mergeOneField(final FieldInfo fieldInfo, final MergeState mergeState) throws IOException {
// This will ensure that we are merging the FlatIndex during force merge.
flatVectorsWriter.mergeOneField(fieldInfo, mergeState);
if (!buildGraph) {
if (!buildVectorDataStructure) {
log.info("Skip building vector data structure");
return;
}
// For merge, pick values from flat vector and reindex again. This will use the flush operation to create graphs
Expand Down
40 changes: 20 additions & 20 deletions src/test/java/org/opensearch/knn/index/OpenSearchIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -484,37 +484,37 @@ public void testIndexingVectorValidation_updateVectorWithNull() throws Exception
assertArrayEquals(vectorForDocumentOne, vectorRestoreInitialValue);
}

public void testKNNIndex_WhenBuildGraphParamIsPresent_thenSuccess() throws Exception {
final boolean buildGraph = false;
final Settings settings = Settings.builder().put(buildKNNSettingsWithBuildGraphProperty(buildGraph)).build();
public void testKNNIndex_whenBuildGraphParamIsPresent_thenSuccess() throws Exception {
final boolean buildVectorDataStructure = randomBoolean();
final Settings settings = Settings.builder().put(buildKNNIndexSettings(buildVectorDataStructure)).build();
final String knnIndexMapping = createKnnIndexMapping(FIELD_NAME, KNNEngine.getMaxDimensionByEngine(KNNEngine.DEFAULT));
final String indexName = "test-index-with-build-graph-settings";
createKnnIndex(indexName, settings, knnIndexMapping);
final String buildGraphValue = getIndexSettingByName(indexName, KNNSettings.KNN_BUILD_GRAPH);
assertNotNull("build_graph index setting is not found", buildGraphValue);
assertEquals("incorrect setting for build_graph", buildGraph, Boolean.valueOf(buildGraphValue));
final String buildVectorDataStructureSetting = getIndexSettingByName(indexName, KNNSettings.KNN_BUILD_VECTOR_DATA_STRUCTURE);
assertNotNull("build_vector_data_structure index setting is not found", buildVectorDataStructureSetting);
assertEquals("incorrect setting for build_graph", buildVectorDataStructure, Boolean.valueOf(buildVectorDataStructureSetting));
deleteKNNIndex(indexName);
}

public void testKNNIndex_WhenBuildGraphParamIsNotAdded_thenShouldNotReturnSetting() throws Exception {
public void testKNNIndex_whenBuildGraphParamIsNotAdded_thenShouldNotReturnSetting() throws Exception {
final String knnIndexMapping = createKnnIndexMapping(FIELD_NAME, KNNEngine.getMaxDimensionByEngine(KNNEngine.DEFAULT));
final String indexName = "test-index-with-build-graph-settings";
createKnnIndex(indexName, knnIndexMapping);
final String buildGraphValue = getIndexSettingByName(indexName, KNNSettings.KNN_BUILD_GRAPH);
assertNull("build_graph index setting should not be added in index setting", buildGraphValue);
final String buildVectorDataStructureSetting = getIndexSettingByName(indexName, KNNSettings.KNN_BUILD_VECTOR_DATA_STRUCTURE);
assertNull("build_vector_data_structure index setting should not be added in index setting", buildVectorDataStructureSetting);
deleteKNNIndex(indexName);
}

public void testKNNIndex_WhenGetIndexSettingIsCalled_thenBuildGraphEnabledAsDefault() throws Exception {
public void testKNNIndex_whenGetIndexSettingIsCalled_thenBuildGraphEnabledAsDefault() throws Exception {
final String knnIndexMapping = createKnnIndexMapping(FIELD_NAME, KNNEngine.getMaxDimensionByEngine(KNNEngine.DEFAULT));
final String indexName = "test-index-with-build-graph-settings";
final String indexName = "test-index-with-build-vector-graph-settings";
createKnnIndex(indexName, knnIndexMapping);
final String buildGraphValue = getIndexSettingByName(indexName, KNNSettings.KNN_BUILD_GRAPH, true);
assertNotNull("build_graph index setting is not found", buildGraphValue);
final String buildVectorDataStructureSetting = getIndexSettingByName(indexName, KNNSettings.KNN_BUILD_VECTOR_DATA_STRUCTURE, true);
assertNotNull("build_vector_data_structure index setting is not found", buildVectorDataStructureSetting);
assertEquals(
"incorrect default setting for build_graph",
KNNSettings.INDEX_KNN_DEFAULT_BUILD_GRAPH,
Boolean.valueOf(buildGraphValue)
"incorrect default setting for build_vector_data_structure",
KNNSettings.INDEX_KNN_DEFAULT_BUILD_VECTOR_DATA_STRUCTURE,
Boolean.valueOf(buildVectorDataStructureSetting)
);
deleteKNNIndex(indexName);
}
Expand All @@ -525,7 +525,7 @@ public void testKNNIndex_whenDisablingBuildGraphParameter_thenNoSearchResults()
final String fieldName2 = "test-field-2";

final Integer dimension = testData.indexData.vectors[0].length;
final Settings indexSettingsWithoutBuildingGraph = buildKNNSettingsWithBuildGraphProperty(false);
final Settings knnIndexSettings = buildKNNIndexSettings(false);

// Create an index
final XContentBuilder builder = XContentFactory.jsonBuilder()
Expand Down Expand Up @@ -554,7 +554,7 @@ public void testKNNIndex_whenDisablingBuildGraphParameter_thenNoSearchResults()
.endObject()
.endObject();

createKnnIndex(indexName, indexSettingsWithoutBuildingGraph, builder.toString());
createKnnIndex(indexName, knnIndexSettings, builder.toString());

// Index the test data
for (int i = 0; i < testData.indexData.docs.length; i++) {
Expand All @@ -579,8 +579,8 @@ public void testKNNIndex_whenDisablingBuildGraphParameter_thenNoSearchResults()
final List<KNNResult> faissNeighbors = getResults(indexName, fieldName2, testData.queries[0], 1);
assertEquals("unexpected neighbors are returned", 0, faissNeighbors.size());

// update build graph setting
updateIndexSettings(indexName, Settings.builder().put(KNNSettings.KNN_BUILD_GRAPH, true));
// update build vector data structure setting
updateIndexSettings(indexName, Settings.builder().put(KNNSettings.KNN_BUILD_VECTOR_DATA_STRUCTURE, true));
forceMergeKnnIndex(indexName, 1);

final int k = 10;
Expand Down
10 changes: 6 additions & 4 deletions src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@
import static org.opensearch.knn.TestUtils.computeGroundTruthValues;

import static org.opensearch.knn.common.KNNConstants.VECTOR_DATA_TYPE_FIELD;
import static org.opensearch.knn.index.KNNSettings.KNN_BUILD_VECTOR_DATA_STRUCTURE;
import static org.opensearch.knn.index.KNNSettings.KNN_INDEX;
import static org.opensearch.knn.index.SpaceType.L2;
import static org.opensearch.knn.index.memory.NativeMemoryCacheManager.GRAPH_COUNT;
import static org.opensearch.knn.index.engine.KNNEngine.FAISS;
Expand Down Expand Up @@ -761,12 +763,12 @@ protected Settings getKNNSegmentReplicatedIndexSettings() {
.build();
}

protected Settings buildKNNSettingsWithBuildGraphProperty(boolean buildGraph) {
protected Settings buildKNNIndexSettings(boolean buildVectorDatastructure) {
return Settings.builder()
.put("number_of_shards", 1)
.put("number_of_replicas", 1)
.put("index.knn", true)
.put("index.knn.build_graph", buildGraph)
.put("number_of_replicas", 0)
.put(KNN_INDEX, true)
.put(KNN_BUILD_VECTOR_DATA_STRUCTURE, buildVectorDatastructure)
.build();
}

Expand Down

0 comments on commit 530dda8

Please sign in to comment.