From 9ec325b4b42ea45b5aa3454d4ee35b0fae7611a5 Mon Sep 17 00:00:00 2001 From: Tejas Shah Date: Thu, 3 Oct 2024 17:13:25 -0700 Subject: [PATCH] Preloads .vec and .vex files LuceneFlatVectorReader uses IOContext.Random to open the read. IOContext.Random indicates the kernel to not read ahead the pages on to physical memory. This causes an increase in merge time due to increase of read ops at runtime. The preload settings signals the kernal to preload the files when the reader is opened Signed-off-by: Tejas Shah --- CHANGELOG.md | 1 + .../knn/index/engine/KNNLibrary.java | 3 +- .../knn/index/engine/lucene/Lucene.java | 6 --- .../org/opensearch/knn/plugin/KNNPlugin.java | 26 +++++++++++ .../knn/index/engine/faiss/FaissTests.java | 9 ++++ .../knn/index/engine/nmslib/NMSLibTests.java | 21 +++++++++ .../opensearch/knn/plugin/KNNPluginTests.java | 45 +++++++++++++++++++ 7 files changed, 103 insertions(+), 8 deletions(-) create mode 100644 src/test/java/org/opensearch/knn/index/engine/nmslib/NMSLibTests.java create mode 100644 src/test/java/org/opensearch/knn/plugin/KNNPluginTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index c11d258e8..f72024c95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Enhancements ### Bug Fixes * Add DocValuesProducers for releasing memory when close index [#1946](https://github.com/opensearch-project/k-NN/pull/1946) +* Prelaods vec and vex files to address regression in force merge latencies [#2186](https://github.com/opensearch-project/k-NN/pull/2186) ### Infrastructure ### Documentation ### Maintenance diff --git a/src/main/java/org/opensearch/knn/index/engine/KNNLibrary.java b/src/main/java/org/opensearch/knn/index/engine/KNNLibrary.java index cf7c4ad82..066665e5d 100644 --- a/src/main/java/org/opensearch/knn/index/engine/KNNLibrary.java +++ b/src/main/java/org/opensearch/knn/index/engine/KNNLibrary.java @@ -8,7 +8,6 @@ import org.opensearch.common.ValidationException; import org.opensearch.knn.index.SpaceType; -import java.util.Collections; import java.util.List; /** @@ -137,6 +136,6 @@ KNNLibraryIndexingContext getKNNLibraryIndexingContext( * @return list of file extensions that will be read/write with mmap */ default List mmapFileExtensions() { - return Collections.emptyList(); + return List.of("vec", "vex"); } } diff --git a/src/main/java/org/opensearch/knn/index/engine/lucene/Lucene.java b/src/main/java/org/opensearch/knn/index/engine/lucene/Lucene.java index db516d309..294f9eb66 100644 --- a/src/main/java/org/opensearch/knn/index/engine/lucene/Lucene.java +++ b/src/main/java/org/opensearch/knn/index/engine/lucene/Lucene.java @@ -15,7 +15,6 @@ import org.opensearch.knn.index.engine.MethodResolver; import org.opensearch.knn.index.engine.ResolvedMethodContext; -import java.util.List; import java.util.Map; import java.util.function.Function; @@ -89,11 +88,6 @@ public Float scoreToRadialThreshold(Float score, SpaceType spaceType) { return score; } - @Override - public List mmapFileExtensions() { - return List.of("vec", "vex"); - } - @Override public ResolvedMethodContext resolveMethod( KNNMethodContext knnMethodContext, diff --git a/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java b/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java index cae8960bb..42c6516a4 100644 --- a/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java +++ b/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java @@ -11,6 +11,7 @@ import org.opensearch.core.action.ActionResponse; import org.opensearch.index.codec.CodecServiceFactory; import org.opensearch.index.engine.EngineFactory; +import org.opensearch.index.shard.IndexSettingProvider; import org.opensearch.indices.SystemIndexDescriptor; import org.opensearch.knn.index.KNNCircuitBreaker; import org.opensearch.knn.plugin.search.KNNConcurrentSearchRequestDecider; @@ -368,14 +369,39 @@ public Settings additionalSettings() { // that are set here. final List engineSettings = Arrays.stream(KNNEngine.values()) .flatMap(engine -> engine.mmapFileExtensions().stream()) + .distinct() .collect(Collectors.toList()); + final List combinedSettings = Stream.concat( IndexModule.INDEX_STORE_HYBRID_MMAP_EXTENSIONS.getDefault(Settings.EMPTY).stream(), engineSettings.stream() ).collect(Collectors.toList()); + return Settings.builder().putList(IndexModule.INDEX_STORE_HYBRID_MMAP_EXTENSIONS.getKey(), combinedSettings).build(); } + @Override + public Collection getAdditionalIndexSettingProviders() { + IndexSettingProvider preloadMmapFiles = new IndexSettingProvider() { + @Override + public Settings getAdditionalIndexSettings(String indexName, boolean isDataStreamIndex, Settings templateAndRequestSettings) { + + if (templateAndRequestSettings.getAsBoolean(KNNSettings.KNN_INDEX, Boolean.FALSE)) { + final List mmapFileExtensions = Arrays.stream(KNNEngine.values()) + .flatMap(engine -> engine.mmapFileExtensions().stream()) + .distinct() + .collect(Collectors.toList()); + + return Settings.builder().putList(IndexModule.INDEX_STORE_PRE_LOAD_SETTING.getKey(), mmapFileExtensions).build(); + } + + return Settings.EMPTY; + } + }; + + return List.of(preloadMmapFiles); + } + @Override public Optional getConcurrentSearchRequestDeciderFactory() { return Optional.of(new KNNConcurrentSearchRequestDecider.Factory()); diff --git a/src/test/java/org/opensearch/knn/index/engine/faiss/FaissTests.java b/src/test/java/org/opensearch/knn/index/engine/faiss/FaissTests.java index 75da6811e..34aac6232 100644 --- a/src/test/java/org/opensearch/knn/index/engine/faiss/FaissTests.java +++ b/src/test/java/org/opensearch/knn/index/engine/faiss/FaissTests.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.util.HashMap; +import java.util.List; import java.util.Locale; import java.util.Map; @@ -367,4 +368,12 @@ public void testMethodAsMapBuilder() throws IOException { assertEquals(expectedKNNMethodContext.getVectorValidator(), actualKNNLibraryIndexingContext.getVectorValidator()); } + public void testMmapFileExtensions() { + final List mMapExtensions = Faiss.INSTANCE.mmapFileExtensions(); + assertNotNull(mMapExtensions); + final List expectedSettings = List.of("vex", "vec"); + assertTrue(expectedSettings.containsAll(mMapExtensions)); + assertTrue(mMapExtensions.containsAll(expectedSettings)); + } + } diff --git a/src/test/java/org/opensearch/knn/index/engine/nmslib/NMSLibTests.java b/src/test/java/org/opensearch/knn/index/engine/nmslib/NMSLibTests.java new file mode 100644 index 000000000..c76147d01 --- /dev/null +++ b/src/test/java/org/opensearch/knn/index/engine/nmslib/NMSLibTests.java @@ -0,0 +1,21 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.engine.nmslib; + +import org.opensearch.knn.KNNTestCase; + +import java.util.List; + +public class NMSLibTests extends KNNTestCase { + + public void testMmapFileExtensions() { + final List mmapExtensions = Nmslib.INSTANCE.mmapFileExtensions(); + assertNotNull(mmapExtensions); + final List expectedSettings = List.of("vex", "vec"); + assertTrue(expectedSettings.containsAll(mmapExtensions)); + assertTrue(mmapExtensions.containsAll(expectedSettings)); + } +} diff --git a/src/test/java/org/opensearch/knn/plugin/KNNPluginTests.java b/src/test/java/org/opensearch/knn/plugin/KNNPluginTests.java new file mode 100644 index 000000000..e2d95c6d7 --- /dev/null +++ b/src/test/java/org/opensearch/knn/plugin/KNNPluginTests.java @@ -0,0 +1,45 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.plugin; + +import org.opensearch.common.settings.Settings; +import org.opensearch.knn.index.KNNSettings; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.util.List; + +public class KNNPluginTests extends OpenSearchTestCase { + + public void testKNNPlugin_additionalSettings() throws IOException { + try (KNNPlugin knnPlugin = new KNNPlugin()) { + Settings additionalSettings = knnPlugin.additionalSettings(); + List expectedHybrid = List.of("nvd", "dvd", "tim", "tip", "dim", "kdd", "kdi", "cfs", "doc", "vec", "vex"); + + Settings settings = Settings.builder().putList("index.store.hybrid.mmap.extensions", expectedHybrid).build(); + assertEquals(settings, additionalSettings); + } + } + + public void testKNNPlugin_additionalIndexProviderSettings() throws IOException { + try (KNNPlugin knnPlugin = new KNNPlugin()) { + Settings additionalSettings = knnPlugin.getAdditionalIndexSettingProviders() + .iterator() + .next() + .getAdditionalIndexSettings("index", false, Settings.builder().put(KNNSettings.KNN_INDEX, Boolean.TRUE).build()); + + Settings settings = Settings.builder().putList("index.store.preload", List.of("vec", "vex")).build(); + assertEquals(settings, additionalSettings); + + additionalSettings = knnPlugin.getAdditionalIndexSettingProviders() + .iterator() + .next() + .getAdditionalIndexSettings("index", false, Settings.builder().build()); + + assertEquals(Settings.EMPTY, additionalSettings); + } + } +}