From c1c5776f90f0ae4236112b281346617dc0342781 Mon Sep 17 00:00:00 2001 From: Tejas Shah Date: Thu, 3 Oct 2024 17:13:25 -0700 Subject: [PATCH] Preloads .vec and .vex files LuceneFlatVectorReader uses IOContext.Random to open the read. IOContext.Random indicates the kernel to not read ahead the pages on to physical memory. This causes an increase in merge time due to increase of read ops at runtime. The preload settings signals the kernal to preload the files when the reader is opened Signed-off-by: Tejas Shah --- CHANGELOG.md | 1 + .../knn/index/engine/KNNLibrary.java | 3 +-- .../knn/index/engine/lucene/Lucene.java | 6 ----- .../org/opensearch/knn/plugin/KNNPlugin.java | 17 ++++++++++++++ .../knn/index/engine/faiss/FaissTests.java | 9 ++++++++ .../knn/index/engine/nmslib/NMSLibTests.java | 21 ++++++++++++++++++ .../opensearch/knn/plugin/KNNPluginTests.java | 22 +++++++++++++++++++ 7 files changed, 71 insertions(+), 8 deletions(-) create mode 100644 src/test/java/org/opensearch/knn/index/engine/nmslib/NMSLibTests.java create mode 100644 src/test/java/org/opensearch/knn/plugin/KNNPluginTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index fa86cbe3f1..0708814ebc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), * Introducing a loading layer in FAISS [#2033](https://github.com/opensearch-project/k-NN/issues/2033) ### Bug Fixes * Add DocValuesProducers for releasing memory when close index [#1946](https://github.com/opensearch-project/k-NN/pull/1946) +* Prelaods vec and vex files to address regression in force merge latencies [#2186](https://github.com/opensearch-project/k-NN/pull/2186) ### Infrastructure * Removed JDK 11 and 17 version from CI runs [#1921](https://github.com/opensearch-project/k-NN/pull/1921) ### Documentation diff --git a/src/main/java/org/opensearch/knn/index/engine/KNNLibrary.java b/src/main/java/org/opensearch/knn/index/engine/KNNLibrary.java index cf7c4ad82f..066665e5d7 100644 --- a/src/main/java/org/opensearch/knn/index/engine/KNNLibrary.java +++ b/src/main/java/org/opensearch/knn/index/engine/KNNLibrary.java @@ -8,7 +8,6 @@ import org.opensearch.common.ValidationException; import org.opensearch.knn.index.SpaceType; -import java.util.Collections; import java.util.List; /** @@ -137,6 +136,6 @@ KNNLibraryIndexingContext getKNNLibraryIndexingContext( * @return list of file extensions that will be read/write with mmap */ default List mmapFileExtensions() { - return Collections.emptyList(); + return List.of("vec", "vex"); } } diff --git a/src/main/java/org/opensearch/knn/index/engine/lucene/Lucene.java b/src/main/java/org/opensearch/knn/index/engine/lucene/Lucene.java index db516d309a..294f9eb66a 100644 --- a/src/main/java/org/opensearch/knn/index/engine/lucene/Lucene.java +++ b/src/main/java/org/opensearch/knn/index/engine/lucene/Lucene.java @@ -15,7 +15,6 @@ import org.opensearch.knn.index.engine.MethodResolver; import org.opensearch.knn.index.engine.ResolvedMethodContext; -import java.util.List; import java.util.Map; import java.util.function.Function; @@ -89,11 +88,6 @@ public Float scoreToRadialThreshold(Float score, SpaceType spaceType) { return score; } - @Override - public List mmapFileExtensions() { - return List.of("vec", "vex"); - } - @Override public ResolvedMethodContext resolveMethod( KNNMethodContext knnMethodContext, diff --git a/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java b/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java index ff079031f9..7200f7ca76 100644 --- a/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java +++ b/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java @@ -13,6 +13,7 @@ import org.opensearch.index.engine.EngineFactory; import org.opensearch.indices.SystemIndexDescriptor; import org.opensearch.knn.index.KNNCircuitBreaker; +import org.opensearch.knn.index.engine.KNNEngine; import org.opensearch.knn.plugin.search.KNNConcurrentSearchRequestDecider; import org.opensearch.knn.index.util.KNNClusterUtil; import org.opensearch.knn.index.query.KNNQueryBuilder; @@ -110,6 +111,7 @@ import java.util.Map; import java.util.Optional; import java.util.function.Supplier; +import java.util.stream.Collectors; import static java.util.Collections.singletonList; import static org.opensearch.knn.common.KNNConstants.KNN_THREAD_POOL_PREFIX; @@ -352,6 +354,21 @@ public Collection getSystemIndexDescriptors(Settings sett return ImmutableList.of(new SystemIndexDescriptor(MODEL_INDEX_NAME, "Index for storing models used for k-NN indices")); } + /** + * Plugin can provide additional node settings, that includes new settings or overrides for existing one from core. + * + * @return settings that are set by plugin + */ + @Override + public Settings additionalSettings() { + final List mmapFileExtensions = Arrays.stream(KNNEngine.values()) + .flatMap(engine -> engine.mmapFileExtensions().stream()) + .distinct() + .collect(Collectors.toList()); + + return Settings.builder().putList(IndexModule.INDEX_STORE_PRE_LOAD_SETTING.getKey(), mmapFileExtensions).build(); + } + @Override public Optional getConcurrentSearchRequestDeciderFactory() { return Optional.of(new KNNConcurrentSearchRequestDecider.Factory()); diff --git a/src/test/java/org/opensearch/knn/index/engine/faiss/FaissTests.java b/src/test/java/org/opensearch/knn/index/engine/faiss/FaissTests.java index 75da6811e7..34aac62329 100644 --- a/src/test/java/org/opensearch/knn/index/engine/faiss/FaissTests.java +++ b/src/test/java/org/opensearch/knn/index/engine/faiss/FaissTests.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.util.HashMap; +import java.util.List; import java.util.Locale; import java.util.Map; @@ -367,4 +368,12 @@ public void testMethodAsMapBuilder() throws IOException { assertEquals(expectedKNNMethodContext.getVectorValidator(), actualKNNLibraryIndexingContext.getVectorValidator()); } + public void testMmapFileExtensions() { + final List mMapExtensions = Faiss.INSTANCE.mmapFileExtensions(); + assertNotNull(mMapExtensions); + final List expectedSettings = List.of("vex", "vec"); + assertTrue(expectedSettings.containsAll(mMapExtensions)); + assertTrue(mMapExtensions.containsAll(expectedSettings)); + } + } diff --git a/src/test/java/org/opensearch/knn/index/engine/nmslib/NMSLibTests.java b/src/test/java/org/opensearch/knn/index/engine/nmslib/NMSLibTests.java new file mode 100644 index 0000000000..c76147d011 --- /dev/null +++ b/src/test/java/org/opensearch/knn/index/engine/nmslib/NMSLibTests.java @@ -0,0 +1,21 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.engine.nmslib; + +import org.opensearch.knn.KNNTestCase; + +import java.util.List; + +public class NMSLibTests extends KNNTestCase { + + public void testMmapFileExtensions() { + final List mmapExtensions = Nmslib.INSTANCE.mmapFileExtensions(); + assertNotNull(mmapExtensions); + final List expectedSettings = List.of("vex", "vec"); + assertTrue(expectedSettings.containsAll(mmapExtensions)); + assertTrue(mmapExtensions.containsAll(expectedSettings)); + } +} diff --git a/src/test/java/org/opensearch/knn/plugin/KNNPluginTests.java b/src/test/java/org/opensearch/knn/plugin/KNNPluginTests.java new file mode 100644 index 0000000000..84219c75a8 --- /dev/null +++ b/src/test/java/org/opensearch/knn/plugin/KNNPluginTests.java @@ -0,0 +1,22 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.plugin; + +import org.opensearch.common.settings.Settings; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.util.List; + +public class KNNPluginTests extends OpenSearchTestCase { + + public void testKNNPlugin_additionalSettings() throws IOException { + try (KNNPlugin knnPlugin = new KNNPlugin()) { + Settings additionalSettings = knnPlugin.additionalSettings(); + assertEquals(List.of("vec", "vex"), additionalSettings.getAsList("index.store.preload")); + } + } +}