From d6a4a05b2e68eefb47d61b6e8ea6192354b93c2a Mon Sep 17 00:00:00 2001 From: Vijayan Balasubramanian Date: Tue, 27 Aug 2024 15:39:06 -0700 Subject: [PATCH] Address feedback Signed-off-by: Vijayan Balasubramanian --- CHANGELOG.md | 2 +- .../org/opensearch/knn/index/KNNSettings.java | 16 +++++--- .../codec/BasePerFieldKnnVectorsFormat.java | 16 +++----- .../NativeEngines990KnnVectorsFormat.java | 16 +++++--- .../NativeEngines990KnnVectorsWriter.java | 7 ++-- .../opensearch/knn/index/OpenSearchIT.java | 40 +++++++++---------- .../org/opensearch/knn/KNNRestTestCase.java | 10 +++-- 7 files changed, 58 insertions(+), 49 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a4bf45d43..758071887 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), * Add support for byte vector with Faiss Engine HNSW algorithm [#1823](https://github.com/opensearch-project/k-NN/pull/1823) ### Enhancements * Adds iterative graph build capability into a faiss index to improve the memory footprint during indexing and Integrates KNNVectorsFormat for native engines[#1950](https://github.com/opensearch-project/k-NN/pull/1950) -* Introduce new setting to configure whether to build graph or not during segment creation [#2007](https://github.com/opensearch-project/k-NN/pull/2007) +* Introduce new setting to configure whether to build vector data structure or not during segment creation [#2007](https://github.com/opensearch-project/k-NN/pull/2007) ### Bug Fixes * Corrected search logic for scenario with non-existent fields in filter [#1874](https://github.com/opensearch-project/k-NN/pull/1874) * Add script_fields context to KNNAllowlist [#1917] (https://github.com/opensearch-project/k-NN/pull/1917) diff --git a/src/main/java/org/opensearch/knn/index/KNNSettings.java b/src/main/java/org/opensearch/knn/index/KNNSettings.java index 81b23f097..d50478e59 100644 --- a/src/main/java/org/opensearch/knn/index/KNNSettings.java +++ b/src/main/java/org/opensearch/knn/index/KNNSettings.java @@ -65,7 +65,7 @@ public class KNNSettings { * Settings name */ public static final String KNN_SPACE_TYPE = "index.knn.space_type"; - public static final String KNN_BUILD_GRAPH = "index.knn.build_graph"; + public static final String KNN_BUILD_VECTOR_DATA_STRUCTURE = "index.knn.build_vector_data_structure"; public static final String KNN_ALGO_PARAM_M = "index.knn.algo_param.m"; public static final String KNN_ALGO_PARAM_EF_CONSTRUCTION = "index.knn.algo_param.ef_construction"; public static final String KNN_ALGO_PARAM_EF_SEARCH = "index.knn.algo_param.ef_search"; @@ -98,7 +98,7 @@ public class KNNSettings { */ public static final boolean KNN_DEFAULT_FAISS_AVX2_DISABLED_VALUE = false; public static final String INDEX_KNN_DEFAULT_SPACE_TYPE = "l2"; - public static final boolean INDEX_KNN_DEFAULT_BUILD_GRAPH = true; + public static final boolean INDEX_KNN_DEFAULT_BUILD_VECTOR_DATA_STRUCTURE = true; public static final String INDEX_KNN_DEFAULT_SPACE_TYPE_FOR_BINARY = "hamming"; public static final Integer INDEX_KNN_DEFAULT_ALGO_PARAM_M = 16; public static final Integer INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH = 100; @@ -138,9 +138,13 @@ public class KNNSettings { Setting.Property.Deprecated ); - public static final Setting INDEX_KNN_BUILD_GRAPH_SETTING = Setting.boolSetting( - KNN_BUILD_GRAPH, - INDEX_KNN_DEFAULT_BUILD_GRAPH, + /** + * build_vector_datastructure - This parameter determines whether to build vector data structure for knn fields during indexing + * and merging. + */ + public static final Setting INDEX_KNN_BUILD_VECTOR_DATASTRUCTURE = Setting.boolSetting( + KNN_BUILD_VECTOR_DATA_STRUCTURE, + INDEX_KNN_DEFAULT_BUILD_VECTOR_DATA_STRUCTURE, IndexScope, Dynamic ); @@ -476,7 +480,7 @@ private Setting getSetting(String key) { public List> getSettings() { List> settings = Arrays.asList( INDEX_KNN_SPACE_TYPE, - INDEX_KNN_BUILD_GRAPH_SETTING, + INDEX_KNN_BUILD_VECTOR_DATASTRUCTURE, INDEX_KNN_ALGO_PARAM_M_SETTING, INDEX_KNN_ALGO_PARAM_EF_CONSTRUCTION_SETTING, INDEX_KNN_ALGO_PARAM_EF_SEARCH_SETTING, diff --git a/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java b/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java index 1265289bb..24c23efad 100644 --- a/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java +++ b/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java @@ -123,22 +123,18 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) { return vectorsFormatSupplier.apply(knnVectorsFormatParams); } - boolean buildGraph = getBuildGraphSetting(); + boolean buildVectorDatastructureSetting = getBuildVectorDatastructureSetting(mapperService.get()); // All native engines to use NativeEngines990KnnVectorsFormat return new NativeEngines990KnnVectorsFormat( new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()), - buildGraph + buildVectorDatastructureSetting ); } - private boolean getBuildGraphSetting() { - // Default should be true, only explicit false should disable building graph for - // native engine - if (mapperService.isEmpty()) { - return true; - } - IndexSettings indexSettings = mapperService.get().getIndexSettings(); - return indexSettings.getValue(KNNSettings.INDEX_KNN_BUILD_GRAPH_SETTING); + private boolean getBuildVectorDatastructureSetting(final MapperService knnMapperService) { + IndexSettings indexSettings = knnMapperService.getIndexSettings(); + Boolean buildVectorDatastructure = indexSettings.getValue(KNNSettings.INDEX_KNN_BUILD_VECTOR_DATASTRUCTURE); + return buildVectorDatastructure != null ? buildVectorDatastructure : true; } @Override diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsFormat.java b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsFormat.java index f7b74da97..a62a43d25 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsFormat.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsFormat.java @@ -30,7 +30,7 @@ public class NativeEngines990KnnVectorsFormat extends KnnVectorsFormat { /** The format for storing, reading, merging vectors on disk */ private static FlatVectorsFormat flatVectorsFormat; private static final String FORMAT_NAME = "NativeEngines99KnnVectorsFormat"; - private static boolean buildGraph; + private static boolean buildVectorDatastructure; public NativeEngines990KnnVectorsFormat() { this(new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer())); @@ -40,10 +40,10 @@ public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat lucene99FlatVect this(lucene99FlatVectorsFormat, true); } - public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat lucene99FlatVectorsFormat, boolean buildGraphSetting) { + public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat lucene99FlatVectorsFormat, boolean buildVectorDatastructureSetting) { super(FORMAT_NAME); flatVectorsFormat = lucene99FlatVectorsFormat; - buildGraph = buildGraphSetting; + buildVectorDatastructure = buildVectorDatastructureSetting; } /** @@ -53,7 +53,7 @@ public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat lucene99FlatVect */ @Override public KnnVectorsWriter fieldsWriter(final SegmentWriteState state) throws IOException { - return new NativeEngines990KnnVectorsWriter(state, flatVectorsFormat.fieldsWriter(state), buildGraph); + return new NativeEngines990KnnVectorsWriter(state, flatVectorsFormat.fieldsWriter(state), buildVectorDatastructure); } /** @@ -68,6 +68,12 @@ public KnnVectorsReader fieldsReader(final SegmentReadState state) throws IOExce @Override public String toString() { - return "NativeEngines99KnnVectorsFormat(name=" + this.getClass().getSimpleName() + ", flatVectorsFormat=" + flatVectorsFormat + ")"; + return "NativeEngines99KnnVectorsFormat(name=" + + this.getClass().getSimpleName() + + ", flatVectorsFormat=" + + flatVectorsFormat + + ", buildVectorDatastructure" + + buildVectorDatastructure + + ")"; } } diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java index 736438bec..baeb3ba2a 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java @@ -48,7 +48,7 @@ public class NativeEngines990KnnVectorsWriter extends KnnVectorsWriter { private final SegmentWriteState segmentWriteState; private final FlatVectorsWriter flatVectorsWriter; private final List> fields = new ArrayList<>(); - private final boolean buildGraph; + private final boolean buildVectorDatastructure; private boolean finished; private final QuantizationService quantizationService = QuantizationService.getInstance(); @@ -74,7 +74,8 @@ public KnnFieldVectorsWriter addField(final FieldInfo fieldInfo) throws IOExc @Override public void flush(int maxDoc, final Sorter.DocMap sortMap) throws IOException { flatVectorsWriter.flush(maxDoc, sortMap); - if (!buildGraph) { + if (!buildVectorDatastructure) { + log.info("Skip building vector datastructure"); return; } for (final NativeEngineFieldVectorsWriter field : fields) { @@ -91,7 +92,7 @@ public void flush(int maxDoc, final Sorter.DocMap sortMap) throws IOException { public void mergeOneField(final FieldInfo fieldInfo, final MergeState mergeState) throws IOException { // This will ensure that we are merging the FlatIndex during force merge. flatVectorsWriter.mergeOneField(fieldInfo, mergeState); - if (!buildGraph) { + if (!buildVectorDatastructure) { return; } // For merge, pick values from flat vector and reindex again. This will use the flush operation to create graphs diff --git a/src/test/java/org/opensearch/knn/index/OpenSearchIT.java b/src/test/java/org/opensearch/knn/index/OpenSearchIT.java index e241663d7..7eb8e5dec 100644 --- a/src/test/java/org/opensearch/knn/index/OpenSearchIT.java +++ b/src/test/java/org/opensearch/knn/index/OpenSearchIT.java @@ -484,37 +484,37 @@ public void testIndexingVectorValidation_updateVectorWithNull() throws Exception assertArrayEquals(vectorForDocumentOne, vectorRestoreInitialValue); } - public void testKNNIndex_WhenBuildGraphParamIsPresent_thenSuccess() throws Exception { - final boolean buildGraph = false; - final Settings settings = Settings.builder().put(buildKNNSettingsWithBuildGraphProperty(buildGraph)).build(); + public void testKNNIndex_whenBuildGraphParamIsPresent_thenSuccess() throws Exception { + final boolean buildVectorDataStructure = randomBoolean(); + final Settings settings = Settings.builder().put(buildKNNIndexSettings(buildVectorDataStructure)).build(); final String knnIndexMapping = createKnnIndexMapping(FIELD_NAME, KNNEngine.getMaxDimensionByEngine(KNNEngine.DEFAULT)); final String indexName = "test-index-with-build-graph-settings"; createKnnIndex(indexName, settings, knnIndexMapping); - final String buildGraphValue = getIndexSettingByName(indexName, KNNSettings.KNN_BUILD_GRAPH); - assertNotNull("build_graph index setting is not found", buildGraphValue); - assertEquals("incorrect setting for build_graph", buildGraph, Boolean.valueOf(buildGraphValue)); + final String buildVectorDataStructureSetting = getIndexSettingByName(indexName, KNNSettings.KNN_BUILD_VECTOR_DATA_STRUCTURE); + assertNotNull("build_vector_data_structure index setting is not found", buildVectorDataStructureSetting); + assertEquals("incorrect setting for build_graph", buildVectorDataStructure, Boolean.valueOf(buildVectorDataStructureSetting)); deleteKNNIndex(indexName); } - public void testKNNIndex_WhenBuildGraphParamIsNotAdded_thenShouldNotReturnSetting() throws Exception { + public void testKNNIndex_whenBuildGraphParamIsNotAdded_thenShouldNotReturnSetting() throws Exception { final String knnIndexMapping = createKnnIndexMapping(FIELD_NAME, KNNEngine.getMaxDimensionByEngine(KNNEngine.DEFAULT)); final String indexName = "test-index-with-build-graph-settings"; createKnnIndex(indexName, knnIndexMapping); - final String buildGraphValue = getIndexSettingByName(indexName, KNNSettings.KNN_BUILD_GRAPH); - assertNull("build_graph index setting should not be added in index setting", buildGraphValue); + final String buildVectorDataStructureSetting = getIndexSettingByName(indexName, KNNSettings.KNN_BUILD_VECTOR_DATA_STRUCTURE); + assertNull("build_vector_data_structure index setting should not be added in index setting", buildVectorDataStructureSetting); deleteKNNIndex(indexName); } - public void testKNNIndex_WhenGetIndexSettingIsCalled_thenBuildGraphEnabledAsDefault() throws Exception { + public void testKNNIndex_whenGetIndexSettingIsCalled_thenBuildGraphEnabledAsDefault() throws Exception { final String knnIndexMapping = createKnnIndexMapping(FIELD_NAME, KNNEngine.getMaxDimensionByEngine(KNNEngine.DEFAULT)); - final String indexName = "test-index-with-build-graph-settings"; + final String indexName = "test-index-with-build-vector-graph-settings"; createKnnIndex(indexName, knnIndexMapping); - final String buildGraphValue = getIndexSettingByName(indexName, KNNSettings.KNN_BUILD_GRAPH, true); - assertNotNull("build_graph index setting is not found", buildGraphValue); + final String buildVectorDataStructureSetting = getIndexSettingByName(indexName, KNNSettings.KNN_BUILD_VECTOR_DATA_STRUCTURE, true); + assertNotNull("build_vector_data_structure index setting is not found", buildVectorDataStructureSetting); assertEquals( - "incorrect default setting for build_graph", - KNNSettings.INDEX_KNN_DEFAULT_BUILD_GRAPH, - Boolean.valueOf(buildGraphValue) + "incorrect default setting for build_vector_data_structure", + KNNSettings.INDEX_KNN_DEFAULT_BUILD_VECTOR_DATA_STRUCTURE, + Boolean.valueOf(buildVectorDataStructureSetting) ); deleteKNNIndex(indexName); } @@ -525,7 +525,7 @@ public void testKNNIndex_whenDisablingBuildGraphParameter_thenNoSearchResults() final String fieldName2 = "test-field-2"; final Integer dimension = testData.indexData.vectors[0].length; - final Settings indexSettingsWithoutBuildingGraph = buildKNNSettingsWithBuildGraphProperty(false); + final Settings knnIndexSettings = buildKNNIndexSettings(false); // Create an index final XContentBuilder builder = XContentFactory.jsonBuilder() @@ -554,7 +554,7 @@ public void testKNNIndex_whenDisablingBuildGraphParameter_thenNoSearchResults() .endObject() .endObject(); - createKnnIndex(indexName, indexSettingsWithoutBuildingGraph, builder.toString()); + createKnnIndex(indexName, knnIndexSettings, builder.toString()); // Index the test data for (int i = 0; i < testData.indexData.docs.length; i++) { @@ -579,8 +579,8 @@ public void testKNNIndex_whenDisablingBuildGraphParameter_thenNoSearchResults() final List faissNeighbors = getResults(indexName, fieldName2, testData.queries[0], 1); assertEquals("unexpected neighbors are returned", 0, faissNeighbors.size()); - // update build graph setting - updateIndexSettings(indexName, Settings.builder().put(KNNSettings.KNN_BUILD_GRAPH, true)); + // update build vector data structure setting + updateIndexSettings(indexName, Settings.builder().put(KNNSettings.KNN_BUILD_VECTOR_DATA_STRUCTURE, true)); forceMergeKnnIndex(indexName, 1); final int k = 10; diff --git a/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java b/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java index 1e1a2c27b..c54d44f5f 100644 --- a/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java +++ b/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java @@ -99,6 +99,8 @@ import static org.opensearch.knn.TestUtils.computeGroundTruthValues; import static org.opensearch.knn.common.KNNConstants.VECTOR_DATA_TYPE_FIELD; +import static org.opensearch.knn.index.KNNSettings.KNN_BUILD_VECTOR_DATA_STRUCTURE; +import static org.opensearch.knn.index.KNNSettings.KNN_INDEX; import static org.opensearch.knn.index.SpaceType.L2; import static org.opensearch.knn.index.memory.NativeMemoryCacheManager.GRAPH_COUNT; import static org.opensearch.knn.index.engine.KNNEngine.FAISS; @@ -761,12 +763,12 @@ protected Settings getKNNSegmentReplicatedIndexSettings() { .build(); } - protected Settings buildKNNSettingsWithBuildGraphProperty(boolean buildGraph) { + protected Settings buildKNNIndexSettings(boolean buildVectorDatastructure) { return Settings.builder() .put("number_of_shards", 1) - .put("number_of_replicas", 1) - .put("index.knn", true) - .put("index.knn.build_graph", buildGraph) + .put("number_of_replicas", 0) + .put(KNN_INDEX, true) + .put(KNN_BUILD_VECTOR_DATA_STRUCTURE, buildVectorDatastructure) .build(); }