From 2a61fcda4ceef36829bbfa2a1748f2c5b6bbe7df Mon Sep 17 00:00:00 2001 From: luyuncheng Date: Sun, 7 Jul 2024 23:44:48 +0800 Subject: [PATCH] Squash Commit Signed-off-by: luyuncheng --- CHANGELOG.md | 1 + .../org/opensearch/knn/index/KNNSettings.java | 24 +- .../knn/index/KNNVectorDVLeafFieldData.java | 40 ++ .../knn/index/fetch/KNNFetchSubPhase.java | 259 ++++++++ .../org/opensearch/knn/plugin/KNNPlugin.java | 7 + .../knn/index/KNNSyntheticSourceIT.java | 600 ++++++++++++++++++ .../index/KNNVectorDVLeafFieldDataTests.java | 6 + .../index/fetch/KNNFetchSubPhaseTests.java | 165 +++++ .../org/opensearch/knn/KNNRestTestCase.java | 20 + 9 files changed, 1121 insertions(+), 1 deletion(-) create mode 100644 src/main/java/org/opensearch/knn/index/fetch/KNNFetchSubPhase.java create mode 100644 src/test/java/org/opensearch/knn/index/KNNSyntheticSourceIT.java create mode 100644 src/test/java/org/opensearch/knn/index/fetch/KNNFetchSubPhaseTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 70f787cff..ef3728e9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 3.0](https://github.com/opensearch-project/k-NN/compare/2.x...HEAD) ### Features +* Reuse KNNVectorFieldData for reduce disk usage [#1571](https://github.com/opensearch-project/k-NN/pull/1571) ### Enhancements ### Bug Fixes ### Infrastructure diff --git a/src/main/java/org/opensearch/knn/index/KNNSettings.java b/src/main/java/org/opensearch/knn/index/KNNSettings.java index 88a396f44..8994df29e 100644 --- a/src/main/java/org/opensearch/knn/index/KNNSettings.java +++ b/src/main/java/org/opensearch/knn/index/KNNSettings.java @@ -21,6 +21,7 @@ import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.common.unit.TimeValue; import org.opensearch.index.IndexModule; +import org.opensearch.index.IndexSettings; import org.opensearch.knn.index.memory.NativeMemoryCacheManager; import org.opensearch.knn.index.memory.NativeMemoryCacheManagerDto; import org.opensearch.knn.index.util.IndexHyperParametersUtil; @@ -80,6 +81,7 @@ public class KNNSettings { public static final String MODEL_CACHE_SIZE_LIMIT = "knn.model.cache.size.limit"; public static final String ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD = "index.knn.advanced.filtered_exact_search_threshold"; public static final String KNN_FAISS_AVX2_DISABLED = "knn.faiss.avx2.disabled"; + public static final String KNN_SYNTHETIC_SOURCE_ENABLED = "index.knn.synthetic_source.enabled"; /** * Default setting values @@ -252,6 +254,13 @@ public class KNNSettings { NodeScope ); + public static final Setting KNN_SYNTHETIC_SOURCE_ENABLED_SETTING = Setting.boolSetting( + KNN_SYNTHETIC_SOURCE_ENABLED, + false, + IndexScope, + Dynamic + ); + /** * Dynamic settings */ @@ -369,6 +378,10 @@ private Setting getSetting(String key) { return KNN_VECTOR_STREAMING_MEMORY_LIMIT_PCT_SETTING; } + if (KNN_SYNTHETIC_SOURCE_ENABLED.equals(key)) { + return KNN_SYNTHETIC_SOURCE_ENABLED_SETTING; + } + throw new IllegalArgumentException("Cannot find setting by key [" + key + "]"); } @@ -387,7 +400,8 @@ public List> getSettings() { MODEL_CACHE_SIZE_LIMIT_SETTING, ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING, KNN_FAISS_AVX2_DISABLED_SETTING, - KNN_VECTOR_STREAMING_MEMORY_LIMIT_PCT_SETTING + KNN_VECTOR_STREAMING_MEMORY_LIMIT_PCT_SETTING, + KNN_SYNTHETIC_SOURCE_ENABLED_SETTING ); return Stream.concat(settings.stream(), dynamicCacheSettings.values().stream()).collect(Collectors.toList()); } @@ -432,6 +446,14 @@ public static Integer getFilteredExactSearchThreshold(final String indexName) { .getAsInt(ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE); } + /** + * check this index enabled/disabled synthetic source + * @param indexSettings settings + */ + public static boolean isKNNSyntheticSourceEnabled(IndexSettings indexSettings) { + return indexSettings.getValue(KNN_SYNTHETIC_SOURCE_ENABLED_SETTING); + } + public void initialize(Client client, ClusterService clusterService) { this.client = client; this.clusterService = clusterService; diff --git a/src/main/java/org/opensearch/knn/index/KNNVectorDVLeafFieldData.java b/src/main/java/org/opensearch/knn/index/KNNVectorDVLeafFieldData.java index 85f037c0f..70092d742 100644 --- a/src/main/java/org/opensearch/knn/index/KNNVectorDVLeafFieldData.java +++ b/src/main/java/org/opensearch/knn/index/KNNVectorDVLeafFieldData.java @@ -5,6 +5,7 @@ package org.opensearch.knn.index; +import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.LeafReader; @@ -12,6 +13,8 @@ import org.opensearch.index.fielddata.LeafFieldData; import org.opensearch.index.fielddata.ScriptDocValues; import org.opensearch.index.fielddata.SortedBinaryDocValues; +import org.opensearch.index.mapper.DocValueFetcher; +import org.opensearch.search.DocValueFormat; import java.io.IOException; @@ -70,4 +73,41 @@ public ScriptDocValues getScriptValues() { public SortedBinaryDocValues getBytesValues() { throw new UnsupportedOperationException("knn vector field '" + fieldName + "' doesn't support sorting"); } + + @Override + public DocValueFetcher.Leaf getLeafValueFetcher(DocValueFormat format) { + final BinaryDocValues binaryDocValues; + + try { + binaryDocValues = DocValues.getBinary(reader, fieldName); + } catch (IOException e) { + throw new IllegalStateException("Cannot load KNNDocValues from lucene", e); + } + + return new DocValueFetcher.Leaf() { + float[] floats; + boolean docExists = false; + + @Override + public boolean advanceExact(int docId) throws IOException { + if (binaryDocValues.advanceExact(docId)) { + docExists = true; + floats = vectorDataType.getVectorFromBytesRef(binaryDocValues.binaryValue()); + return docExists; + } + docExists = false; + return docExists; + } + + @Override + public int docValueCount() throws IOException { + return 1; + } + + @Override + public Object nextValue() throws IOException { + return floats; + } + }; + } } diff --git a/src/main/java/org/opensearch/knn/index/fetch/KNNFetchSubPhase.java b/src/main/java/org/opensearch/knn/index/fetch/KNNFetchSubPhase.java new file mode 100644 index 000000000..edb7f8ff5 --- /dev/null +++ b/src/main/java/org/opensearch/knn/index/fetch/KNNFetchSubPhase.java @@ -0,0 +1,259 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.knn.index.fetch; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.extern.log4j.Log4j2; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.BitSet; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.lucene.search.Queries; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.mapper.DocValueFetcher; +import org.opensearch.index.mapper.DocumentMapper; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.ObjectMapper; +import org.opensearch.index.mapper.ValueFetcher; +import org.opensearch.knn.index.KNNSettings; +import org.opensearch.knn.index.mapper.KNNVectorFieldMapper; +import org.opensearch.search.SearchHit; +import org.opensearch.search.fetch.FetchContext; +import org.opensearch.search.fetch.FetchSubPhase; +import org.opensearch.search.fetch.FetchSubPhaseProcessor; +import org.opensearch.search.internal.ContextIndexSearcher; +import org.opensearch.search.lookup.SourceLookup; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.opensearch.knn.common.KNNConstants.BYTES_PER_KILOBYTES; + +/** + * Fetch sub phase which pull data from doc values. + * and fulfill the value into source map + */ +@Log4j2 +public class KNNFetchSubPhase implements FetchSubPhase { + + @Override + public FetchSubPhaseProcessor getProcessor(FetchContext fetchContext) throws IOException { + IndexSettings indexSettings = fetchContext.getIndexSettings(); + if (!KNNSettings.isKNNSyntheticSourceEnabled(indexSettings)) { + log.debug("Synthetic is disabled for index: {}", fetchContext.getIndexName()); + return null; + } + MapperService mapperService = fetchContext.mapperService(); + + List fields = new ArrayList<>(); + for (MappedFieldType mappedFieldType : mapperService.fieldTypes()) { + if (mappedFieldType != null && mappedFieldType instanceof KNNVectorFieldMapper.KNNVectorFieldType) { + String fieldName = mappedFieldType.name(); + ValueFetcher fetcher = new DocValueFetcher( + mappedFieldType.docValueFormat(null, null), + fetchContext.searchLookup().doc().getForField(mappedFieldType) + ); + fields.add(new DocValueField(fieldName, fetcher)); + } + } + return new KNNFetchSubPhaseProcessor(fetchContext, fields); + } + + @AllArgsConstructor + @Getter + class KNNFetchSubPhaseProcessor implements FetchSubPhaseProcessor { + + private final FetchContext fetchContext; + private final List fields; + + @Override + public void setNextReader(LeafReaderContext readerContext) throws IOException { + for (DocValueField f : fields) { + f.fetcher.setNextReader(readerContext); + } + } + + @Override + public void process(HitContext hitContext) throws IOException { + MapperService mapperService = fetchContext.mapperService(); + final boolean hasNested = mapperService.hasNested(); + SearchHit hit = hitContext.hit(); + Map maps = hit.getSourceAsMap(); + if (maps == null) { + // when source is disabled, return + return; + } + + if (hasNested) { + syntheticNestedFieldWithDocValues(mapperService, hitContext, maps); + } + for (DocValueField f : fields) { + if (maps.containsKey(f.field)) { + continue; + } + List docValuesSource = f.fetcher.fetchValues(hitContext.sourceLookup()); + if (docValuesSource.size() > 0) { + maps.put(f.field, docValuesSource.get(0)); + } + } + BytesStreamOutput streamOutput = new BytesStreamOutput(BYTES_PER_KILOBYTES); + XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), streamOutput); + builder.value(maps); + hitContext.hit().sourceRef(BytesReference.bytes(builder)); + } + + protected void syntheticNestedFieldWithDocValues(MapperService mapperService, HitContext hitContext, Map sourceMaps) + throws IOException { + DocumentMapper documentMapper = mapperService.documentMapper(); + Map mapperMap = documentMapper.objectMappers(); + + for (ObjectMapper objectMapper : mapperMap.values()) { + if (objectMapper == null) { + continue; + } + if (!objectMapper.nested().isNested()) { + continue; + } + String path = objectMapper.fullPath(); + for (DocValueField f : fields) { + if (!checkNestedField(path, f, sourceMaps)) { + continue; + } + // nested array in one nested path + Object nestedObj = sourceMaps.get(path); + ArrayList nestedDocList = (ArrayList) nestedObj; + + log.debug( + "object mapper: nested:" + + objectMapper.nested().isNested() + + " Value:" + + objectMapper.fullPath() + + " field:" + + f.field + ); + + innerProcessOneNestedField(objectMapper, hitContext, nestedDocList, f, path); + } + } + } + + private void innerProcessOneNestedField( + ObjectMapper objectMapper, + HitContext hitContext, + ArrayList nestedDocList, + DocValueField f, + String path + ) throws IOException { + + BitSet parentBits = getParentDocBitSet(hitContext); + DocIdSetIterator childIter = getChildDocIdSetIterator(objectMapper, hitContext); + LeafReaderContext subReaderContext = hitContext.readerContext(); + + SearchHit hit = hitContext.hit(); + int currentParent = hit.docId() - subReaderContext.docBase; + int previousParent = parentBits.prevSetBit(currentParent - 1); + int childDocId = childIter.advance(previousParent + 1); + SourceLookup nestedVecSourceLookup = new SourceLookup(); + + // when nested field only have vector field and exclude source, list is empty + boolean isEmpty = nestedDocList.isEmpty(); + + for (int offset = 0; childDocId < currentParent && childDocId != DocIdSetIterator.NO_MORE_DOCS; childDocId = childIter + .nextDoc(), offset++) { + nestedVecSourceLookup.setSegmentAndDocument(subReaderContext, childDocId); + List nestedVecDocValuesSource = f.fetcher.fetchValues(nestedVecSourceLookup); + if (nestedVecDocValuesSource == null || nestedVecDocValuesSource.isEmpty()) { + continue; + } + if (isEmpty) { + nestedDocList.add(new HashMap()); + } + if (offset < nestedDocList.size()) { + Object sourceObj = nestedDocList.get(offset); + if (sourceObj instanceof Map) { + Map sourceMap = (Map) sourceObj; + String suffix = f.field.substring(path.length() + 1); + sourceMap.put(suffix, nestedVecDocValuesSource.get(0)); + } + } else { + /** + * TODO nested field partial doc only have vector and source exclude + * this source map nestedDocList would out-of-order, can not fill the vector into right offset + * "nested_field" : [ + * {"nested_vector": [2.6, 2.6]}, + * {"nested_numeric": 2, "nested_vector": [3.1, 2.3]} + * ] + */ + throw new UnsupportedOperationException( + String.format("\"Nested Path \"%s\" in Field \"%s\" with _ID \"%s\" can not be empty\"", path, f.field, hit.getId()) + ); + } + } + } + + private BitSet getParentDocBitSet(HitContext hitContext) throws IOException { + Query parentFilter = Queries.newNonNestedFilter(); + LeafReaderContext subReaderContext = hitContext.readerContext(); + BitSet parentBits = fetchContext.getQueryShardContext().bitsetFilter(parentFilter).getBitSet(subReaderContext); + return parentBits; + } + + private DocIdSetIterator getChildDocIdSetIterator(ObjectMapper objectMapper, HitContext hitContext) throws IOException { + Query childFilter = objectMapper.nestedTypeFilter(); + ContextIndexSearcher searcher = fetchContext.searcher(); + LeafReaderContext subReaderContext = hitContext.readerContext(); + final Weight childWeight = searcher.createWeight(searcher.rewrite(childFilter), ScoreMode.COMPLETE_NO_SCORES, 1f); + Scorer childScorer = childWeight.scorer(subReaderContext); + DocIdSetIterator childIter = childScorer.iterator(); + return childIter; + } + + private boolean checkNestedField(String path, DocValueField f, Map sourceMaps) { + if (!f.field.startsWith(path)) { + return false; + } + if (!sourceMaps.containsKey(path)) { + return false; + } + + // path to nested field: + Object nestedObj = sourceMaps.get(path); + if (!(nestedObj instanceof ArrayList)) { + return false; + } + return true; + } + } + + @Getter + public static class DocValueField { + private final String field; + private final ValueFetcher fetcher; + + DocValueField(String field, ValueFetcher fetcher) { + this.field = field; + this.fetcher = fetcher; + } + } +} diff --git a/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java b/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java index f898b622e..618f72e4d 100644 --- a/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java +++ b/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java @@ -14,6 +14,7 @@ import org.opensearch.indices.SystemIndexDescriptor; import org.opensearch.knn.index.KNNCircuitBreaker; import org.opensearch.knn.index.KNNClusterUtil; +import org.opensearch.knn.index.fetch.KNNFetchSubPhase; import org.opensearch.knn.index.mapper.KNNVectorFieldMapperUtil; import org.opensearch.knn.index.query.KNNQueryBuilder; import org.opensearch.knn.index.KNNSettings; @@ -96,6 +97,7 @@ import org.opensearch.script.ScriptContext; import org.opensearch.script.ScriptEngine; import org.opensearch.script.ScriptService; +import org.opensearch.search.fetch.FetchSubPhase; import org.opensearch.threadpool.ExecutorBuilder; import org.opensearch.threadpool.FixedExecutorBuilder; import org.opensearch.threadpool.ThreadPool; @@ -176,6 +178,11 @@ public List> getQueries() { return singletonList(new QuerySpec<>(KNNQueryBuilder.NAME, KNNQueryBuilder::new, KNNQueryBuilder::fromXContent)); } + @Override + public List getFetchSubPhases(FetchPhaseConstructionContext context) { + return singletonList(new KNNFetchSubPhase()); + } + @Override public Collection createComponents( Client client, diff --git a/src/test/java/org/opensearch/knn/index/KNNSyntheticSourceIT.java b/src/test/java/org/opensearch/knn/index/KNNSyntheticSourceIT.java new file mode 100644 index 000000000..fda3ceed0 --- /dev/null +++ b/src/test/java/org/opensearch/knn/index/KNNSyntheticSourceIT.java @@ -0,0 +1,600 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.knn.index; + +import org.apache.hc.core5.http.ParseException; +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.opensearch.client.Request; +import org.opensearch.client.Response; +import org.opensearch.client.ResponseException; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.knn.KNNRestTestCase; +import org.opensearch.knn.common.KNNConstants; +import org.opensearch.knn.index.query.KNNQueryBuilder; +import org.opensearch.knn.index.util.KNNEngine; + +import java.io.IOException; + +import static org.opensearch.knn.common.Constants.FIELD_FILTER; +import static org.opensearch.knn.common.Constants.FIELD_TERM; +import static org.opensearch.knn.common.KNNConstants.K; +import static org.opensearch.knn.common.KNNConstants.KNN; +import static org.opensearch.knn.common.KNNConstants.MIN_SCORE; +import static org.opensearch.knn.common.KNNConstants.PATH; +import static org.opensearch.knn.common.KNNConstants.QUERY; +import static org.opensearch.knn.common.KNNConstants.TYPE_NESTED; +import static org.opensearch.knn.common.KNNConstants.VECTOR; + +public class KNNSyntheticSourceIT extends KNNRestTestCase { + + static final String fieldName = "test-field-1"; + static final String nestedPath = "nested-field"; + static final String nestedFieldName = "test-nested-field-1"; + static final String nestedField = nestedPath + "." + nestedFieldName; + + public void testSyntheticSourceSearch_whenEnabledSynthetic_thenReturnSource() throws IOException, ParseException { + String indexNameWithSynthetic = "test-index-synthetic"; + + // Create an index + XContentBuilder builder = constructMappingBuilder(); + + String mapping = builder.toString(); + Settings indexSettingWithSynthetic = Settings.builder() + .put("number_of_shards", 1) + .put("number_of_replicas", 0) + .put("index.knn.synthetic_source.enabled", true) + .put("index.knn", true) + .build(); + + createKnnIndex(indexNameWithSynthetic, indexSettingWithSynthetic, mapping); + + Float[] vector = { 6.0f, 6.0f }; + addKnnDoc(indexNameWithSynthetic, "1", fieldName, vector); + float[] queryVector = { 6.0f, 6.0f }; + + Response responseWithSynthetic = searchKNNIndex(indexNameWithSynthetic, new KNNQueryBuilder(fieldName, queryVector, 10), 10); + String resp1 = EntityUtils.toString(responseWithSynthetic.getEntity()); + assertTrue(resp1.contains("\"test-field-1\":[6.0,6.0]")); + } + + public void testSyntheticSourceSearch_whenDisabledSynthetic_thenReturnNoSource() throws IOException, ParseException { + String indexNameWithoutSynthetic = "test-index-no-synthetic"; + + // Create an index + XContentBuilder builder = constructMappingBuilder(); + + String mapping = builder.toString(); + Settings indexSettingWithoutSynthetic = Settings.builder() + .put("number_of_shards", 1) + .put("number_of_replicas", 0) + .put("index.knn.synthetic_source.enabled", false) + .put("index.knn", true) + .build(); + + createKnnIndex(indexNameWithoutSynthetic, indexSettingWithoutSynthetic, mapping); + + Float[] vector = { 6.0f, 6.0f }; + addKnnDoc(indexNameWithoutSynthetic, "1", fieldName, vector); + float[] queryVector = { 6.0f, 6.0f }; + + Response responseWithoutSynthetic = searchKNNIndex(indexNameWithoutSynthetic, new KNNQueryBuilder(fieldName, queryVector, 10), 10); + String resp2 = EntityUtils.toString(responseWithoutSynthetic.getEntity()); + assertFalse(resp2.contains("\"test-field-1\":[6.0,6.0]")); + } + + public void testSyntheticSourceReindex_whenEnabledSynthetic_thenSuccess() throws IOException, ParseException { + String indexNameWithSynthetic = "test-index-synthetic"; + String reindexNameWithSynthetic = "test-reindex-synthetic"; + + // Create an index + XContentBuilder builder = constructMappingBuilder(); + + String mapping = builder.toString(); + Settings indexSettingWithSynthetic = Settings.builder() + .put("number_of_shards", 1) + .put("number_of_replicas", 0) + .put("index.knn.synthetic_source.enabled", true) + .put("index.knn", true) + .build(); + + createKnnIndex(indexNameWithSynthetic, indexSettingWithSynthetic, mapping); + createKnnIndex(reindexNameWithSynthetic, indexSettingWithSynthetic, mapping); + + Float[] vector = { 6.0f, 6.0f }; + addKnnDoc(indexNameWithSynthetic, "1", fieldName, vector); + float[] queryVector = { 6.0f, 6.0f }; + + doReindex(indexNameWithSynthetic, reindexNameWithSynthetic); + + Response responseWithSynthetic = searchKNNIndex(reindexNameWithSynthetic, new KNNQueryBuilder(fieldName, queryVector, 10), 10); + String resp1 = EntityUtils.toString(responseWithSynthetic.getEntity()); + assertTrue(resp1.contains("\"test-field-1\":[6.0,6.0]")); + } + + public void testSyntheticSourceReindex_whenDisableSynthetic_thenFailed() throws IOException, ParseException { + String indexNameWithoutSynthetic = "test-index-no-synthetic"; + String reindexNameWithoutSynthetic = "test-reindex-no-synthetic"; + String fieldName = "test-field-1"; + + // Create an index + XContentBuilder builder = constructMappingBuilder(); + + String mapping = builder.toString(); + Settings indexSettingWithoutSynthetic = Settings.builder() + .put("number_of_shards", 1) + .put("number_of_replicas", 0) + .put("index.knn.synthetic_source.enabled", false) + .put("index.knn", true) + .build(); + + createKnnIndex(indexNameWithoutSynthetic, indexSettingWithoutSynthetic, mapping); + createKnnIndex(reindexNameWithoutSynthetic, indexSettingWithoutSynthetic, mapping); + + Float[] vector = { 6.0f, 6.0f }; + addKnnDoc(indexNameWithoutSynthetic, "1", fieldName, vector); + float[] queryVector = { 6.0f, 6.0f }; + + doReindex(indexNameWithoutSynthetic, reindexNameWithoutSynthetic); + + Response responseWithoutSynthetic = searchKNNIndex( + reindexNameWithoutSynthetic, + new KNNQueryBuilder(fieldName, queryVector, 10), + 10 + ); + String resp2 = EntityUtils.toString(responseWithoutSynthetic.getEntity()); + assertFalse(resp2.contains("\"test-field-1\":[6.0,6.0]")); + } + + public void testNestedFieldSyntheticSourceSearch_whenEnabledSynthetic_thenReturnSourceSuccess() throws IOException, ParseException { + String indexNameWithSynthetic = "test-index-nested-field-synthetic"; + // Create index + XContentBuilder builder = constructNestedMappingBuilder(); + + String mapping = builder.toString(); + Settings indexSettingWithSynthetic = Settings.builder() + .put("number_of_shards", 1) + .put("number_of_replicas", 0) + .put("index.knn.synthetic_source.enabled", true) + .put("index.knn", true) + .build(); + + createKnnIndex(indexNameWithSynthetic, indexSettingWithSynthetic, mapping); + + Float[] vector = { 6.0f, 6.0f }; + addKnnDocWithTwoNestedField(indexNameWithSynthetic, "1", nestedField, vector, vector); + + Response responseWithSynthetic = queryNestedField(indexNameWithSynthetic, 10, vector); + String resp1 = EntityUtils.toString(responseWithSynthetic.getEntity()); + assertTrue(resp1.contains("\"nested-field\":[{\"test-nested-field-1\":[6.0,6.0]},{\"test-nested-field-1\":[6.0,6.0]}]")); + } + + public void testNestedFieldSyntheticSourceSearch_whenDisabledSynthetic_thenReturnNothingSuccess() throws IOException, ParseException { + String indexNameWithSynthetic = "test-index-nested-field-synthetic"; + // Create index + XContentBuilder builder = constructNestedMappingBuilder(); + + String mapping = builder.toString(); + Settings indexSettingWithSynthetic = Settings.builder() + .put("number_of_shards", 1) + .put("number_of_replicas", 0) + .put("index.knn.synthetic_source.enabled", false) + .put("index.knn", true) + .build(); + + createKnnIndex(indexNameWithSynthetic, indexSettingWithSynthetic, mapping); + + Float[] vector = { 6.0f, 6.0f }; + addKnnDocWithTwoNestedField(indexNameWithSynthetic, "1", nestedField, vector, vector); + + Response responseWithSynthetic = queryNestedField(indexNameWithSynthetic, 10, vector); + String resp1 = EntityUtils.toString(responseWithSynthetic.getEntity()); + assertFalse(resp1.contains("\"nested-field\":[{\"test-nested-field-1\":[6.0,6.0]},{\"test-nested-field-1\":[6.0,6.0]}]")); + } + + public void testMultiNestedField_whenEnabledSynthetic_thenReturnSuccess() throws IOException, ParseException { + String indexNameWithSynthetic = "test-index-nested-field-synthetic"; + // Create index + XContentBuilder builder = constructNestedMappingBuilder(); + + String mapping = builder.toString(); + Settings indexSettingWithSynthetic = Settings.builder() + .put("number_of_shards", 1) + .put("number_of_replicas", 0) + .put("index.knn.synthetic_source.enabled", true) + .put("index.knn", true) + .build(); + + createKnnIndex(indexNameWithSynthetic, indexSettingWithSynthetic, mapping); + + /* + "nested_field" : [ + {"nested_numeric": 1, "nested_vector": [2.6, 2.6]}, + {"nested_numeric": 2, "nested_vector": [3.1, 2.3]} + ] + */ + Float[] vector = { 6.0f, 6.0f }; + String[] fieldParts = nestedField.split("\\."); + XContentBuilder docBuilder = XContentFactory.jsonBuilder().startObject(); + docBuilder.startArray(fieldParts[0]); + docBuilder.startObject(); + docBuilder.field("nested_numeric", 1.0); + docBuilder.field(fieldParts[1], vector); + docBuilder.endObject(); + docBuilder.startObject(); + docBuilder.field("nested_numeric", 2.0); + docBuilder.field(fieldParts[1], vector); + docBuilder.endObject(); + docBuilder.endArray(); + docBuilder.endObject(); + + addKnnDocWithBuilder(indexNameWithSynthetic, "1", docBuilder); + + Response responseWithSynthetic = queryNestedField(indexNameWithSynthetic, 10, vector, null, null, null, RestStatus.OK); + + String resp1 = EntityUtils.toString(responseWithSynthetic.getEntity()); + assertTrue( + resp1.contains( + "{\"nested-field\":[{\"nested_numeric\":1.0,\"test-nested-field-1\":[6.0,6.0]},{\"nested_numeric\":2.0,\"test-nested-field-1\":[6.0,6.0]}]}}]}" + ) + ); + } + + public void testMultiNestedFieldWithNull_whenEnabledSynthetic_thenReturnFailed() throws IOException, ParseException { + String indexNameWithSynthetic = "test-index-nested-field-synthetic"; + // Create index + XContentBuilder builder = constructNestedMappingBuilder(); + + String mapping = builder.toString(); + Settings indexSettingWithSynthetic = Settings.builder() + .put("number_of_shards", 1) + .put("number_of_replicas", 0) + .put("index.knn.synthetic_source.enabled", true) + .put("index.knn", true) + .build(); + + createKnnIndex(indexNameWithSynthetic, indexSettingWithSynthetic, mapping); + + /* + "nested_field" : [ + {"nested_vector": [2.6, 2.6]}, + {"nested_numeric": 2, "nested_vector": [3.1, 2.3]} + ] + */ + Float[] vector = { 6.0f, 6.0f }; + String[] fieldParts = nestedField.split("\\."); + XContentBuilder docBuilder = XContentFactory.jsonBuilder().startObject(); + docBuilder.startArray(fieldParts[0]); + docBuilder.startObject(); + docBuilder.field(fieldParts[1], vector); + docBuilder.endObject(); + docBuilder.startObject(); + docBuilder.field("nested_numeric", 2.0); + docBuilder.field(fieldParts[1], vector); + docBuilder.endObject(); + docBuilder.endArray(); + docBuilder.endObject(); + + addKnnDocWithBuilder(indexNameWithSynthetic, "1", docBuilder); + + try { + Response responseWithSynthetic = queryNestedField( + indexNameWithSynthetic, + 10, + vector, + null, + null, + null, + RestStatus.INTERNAL_SERVER_ERROR + ); + + if (responseWithSynthetic != null) { + // need throw exception + assertFalse(true); + } + } catch (ResponseException ex) { + assertTrue( + ex.toString() + .contains( + "Nested Path \\\"nested-field\\\" in Field \\\"" + + "nested-field.test-nested-field-1\\\" with _ID \\\"1\\\" can not be empty" + ) + ); + } + } + + public void testSyntheticSourceUpdate_whenEnabledSynthetic_thenReturnSource() throws IOException, ParseException { + String indexNameWithSynthetic = "test-index-synthetic"; + String fieldName = "test-field-1"; + Integer dimension = 2; + + KNNMethod hnswMethod = KNNEngine.FAISS.getMethod(KNNConstants.METHOD_HNSW); + SpaceType spaceType = SpaceType.L2; + + // Create an index + XContentBuilder builder = XContentFactory.jsonBuilder() + .startObject() + .startObject("_source") + .startArray("excludes") + .value(fieldName) + .endArray() + .endObject() + .startObject("properties") + .startObject(fieldName) + .field("type", "knn_vector") + .field("dimension", dimension) + .startObject(KNNConstants.KNN_METHOD) + .field(KNNConstants.NAME, hnswMethod.getMethodComponent().getName()) + .field(KNNConstants.METHOD_PARAMETER_SPACE_TYPE, spaceType.getValue()) + .field(KNNConstants.KNN_ENGINE, KNNEngine.FAISS.getName()) + .endObject() + .endObject() + .endObject() + .endObject(); + + String mapping = builder.toString(); + Settings indexSettingWithSynthetic = Settings.builder() + .put("number_of_shards", 1) + .put("number_of_replicas", 0) + .put("index.knn.synthetic_source.enabled", true) + .put("index.knn", true) + .build(); + + createKnnIndex(indexNameWithSynthetic, indexSettingWithSynthetic, mapping); + + Float[] vector = { 6.0f, 6.0f }; + addKnnDoc(indexNameWithSynthetic, "1", fieldName, vector); + float[] queryVector = { 6.0f, 6.0f }; + + Response responseWithSynthetic = searchKNNIndex(indexNameWithSynthetic, new KNNQueryBuilder(fieldName, queryVector, 10), 10); + String resp1 = EntityUtils.toString(responseWithSynthetic.getEntity()); + assertTrue(resp1.contains("\"test-field-1\":[6.0,6.0]")); + + Float[] vector2 = { 8.0f, 8.0f }; + updateKnnDoc(indexNameWithSynthetic, "1", fieldName, vector2); + float[] queryVector2 = { 8.0f, 8.0f }; + Response responseAfterUpdate = searchKNNIndex(indexNameWithSynthetic, new KNNQueryBuilder(fieldName, queryVector2, 10), 10); + String respUpdate = EntityUtils.toString(responseAfterUpdate.getEntity()); + assertTrue(respUpdate.contains("\"test-field-1\":[8.0,8.0]")); + } + + public void testSyntheticSourceUpdateOtherField_whenEnabledSynthetic_thenReturnNothing() throws IOException, ParseException { + String indexNameWithSynthetic = "test-index-synthetic"; + String fieldName = "test-field-1"; + String fieldName2 = "test-field-2"; + Integer dimension = 2; + + KNNMethod hnswMethod = KNNEngine.FAISS.getMethod(KNNConstants.METHOD_HNSW); + SpaceType spaceType = SpaceType.L2; + + // Create an index + XContentBuilder builder = XContentFactory.jsonBuilder() + .startObject() + .startObject("_source") + .startArray("excludes") + .value(fieldName) + .endArray() + .endObject() + .startObject("properties") + .startObject(fieldName) + .field("type", "knn_vector") + .field("dimension", dimension) + .startObject(KNNConstants.KNN_METHOD) + .field(KNNConstants.NAME, hnswMethod.getMethodComponent().getName()) + .field(KNNConstants.METHOD_PARAMETER_SPACE_TYPE, spaceType.getValue()) + .field(KNNConstants.KNN_ENGINE, KNNEngine.FAISS.getName()) + .endObject() + .endObject() + .startObject(fieldName2) + .field("type", "knn_vector") + .field("dimension", dimension) + .startObject(KNNConstants.KNN_METHOD) + .field(KNNConstants.NAME, hnswMethod.getMethodComponent().getName()) + .field(KNNConstants.METHOD_PARAMETER_SPACE_TYPE, spaceType.getValue()) + .field(KNNConstants.KNN_ENGINE, KNNEngine.FAISS.getName()) + .endObject() + .endObject() + .endObject() + .endObject(); + + String mapping = builder.toString(); + Settings indexSettingWithSynthetic = Settings.builder() + .put("number_of_shards", 1) + .put("number_of_replicas", 0) + .put("index.knn.synthetic_source.enabled", true) + .put("index.knn", true) + .build(); + + createKnnIndex(indexNameWithSynthetic, indexSettingWithSynthetic, mapping); + + Float[] vector = { 6.0f, 6.0f }; + addKnnDoc(indexNameWithSynthetic, "1", fieldName, vector); + float[] queryVector = { 6.0f, 6.0f }; + + Response responseWithSynthetic = searchKNNIndex(indexNameWithSynthetic, new KNNQueryBuilder(fieldName, queryVector, 10), 10); + String resp1 = EntityUtils.toString(responseWithSynthetic.getEntity()); + assertTrue(resp1.contains("\"test-field-1\":[6.0,6.0]")); + + Float[] vector2 = { 8.0f, 8.0f }; + updateKnnDoc(indexNameWithSynthetic, "1", fieldName2, vector2); + float[] queryVector2 = { 8.0f, 8.0f }; + Response responseAfterUpdate = searchKNNIndex(indexNameWithSynthetic, new KNNQueryBuilder(fieldName2, queryVector2, 10), 10); + String respUpdate = EntityUtils.toString(responseAfterUpdate.getEntity()); + assertTrue(respUpdate.contains("\"test-field-2\":[8.0,8.0]")); + assertFalse(respUpdate.contains("\"test-field-1\"")); + } + + private Response queryNestedField(final String index, final int k, final Object[] vector) throws IOException { + return queryNestedField(index, k, vector, null, null, null, RestStatus.OK); + } + + private Response queryNestedField( + final String index, + final Integer k, + final Object[] vector, + final String filterName, + final String filterValue, + final Float minScore, + RestStatus Expectstatus + ) throws IOException { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject().startObject(QUERY); + builder.startObject(TYPE_NESTED); + builder.field(PATH, nestedPath); + builder.startObject(QUERY).startObject(KNN).startObject(nestedPath + "." + nestedFieldName); + builder.field(VECTOR, vector); + if (minScore != null) { + builder.field(MIN_SCORE, minScore); + } else if (k != null) { + builder.field(K, k); + } else { + throw new IllegalArgumentException("k or minScore must be provided in the query"); + } + if (filterName != null && filterValue != null) { + builder.startObject(FIELD_FILTER); + builder.startObject(FIELD_TERM); + builder.field(filterName, filterValue); + builder.endObject(); + builder.endObject(); + } + builder.endObject().endObject().endObject().endObject().endObject().endObject(); + String requestStr = builder.toString(); + Request request = new Request("POST", "/" + index + "/_search"); + request.setJsonEntity(requestStr); + Response response; + + response = client().performRequest(request); + assertEquals(request.getEndpoint() + ": failed", Expectstatus, RestStatus.fromCode(response.getStatusLine().getStatusCode())); + + return response; + } + + /** + * Add a single KNN Doc to an index with two nested vector field + * + * @param index name of the index + * @param docId id of the document + * @param nestedFieldPath path of the nested field, e.g. "my_nested_field.my_vector" + * @param vector1 vector to add + * @param vector2 vector to add + * + */ + private void addKnnDocWithTwoNestedField(String index, String docId, String nestedFieldPath, Object[] vector1, Object[] vector2) + throws IOException { + String[] fieldParts = nestedFieldPath.split("\\."); + + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + builder.startArray(fieldParts[0]); + builder.startObject(); + builder.field(fieldParts[1], vector1); + builder.endObject(); + builder.startObject(); + builder.field(fieldParts[1], vector2); + builder.endObject(); + + builder.endArray(); + builder.endObject(); + addKnnDocWithBuilder(index, docId, builder); + } + + private void addKnnDocWithBuilder(String index, String docId, XContentBuilder builder) throws IOException { + + Request request = new Request("POST", "/" + index + "/_doc/" + docId + "?refresh=true"); + String docStr = builder.toString(); + request.setJsonEntity(docStr); + client().performRequest(request); + + request = new Request("POST", "/" + index + "/_refresh"); + Response response = client().performRequest(request); + assertEquals(request.getEndpoint() + ": failed", RestStatus.OK, RestStatus.fromCode(response.getStatusLine().getStatusCode())); + } + + private XContentBuilder constructNestedMappingBuilder() throws IOException { + Integer dimension = 2; + + KNNMethod hnswMethod = KNNEngine.FAISS.getMethod(KNNConstants.METHOD_HNSW); + SpaceType spaceType = SpaceType.L2; + /* + "mappings":{ + "_source":{ + "excludes":[nestedFieldName] + }, + "properties:{ + "nestedField":{ + "type":"nested", + "properties":{ + "nestedFieldName":{ + "type":"knn_vector", + "dimension":2 + } + } + } + } + } + */ + XContentBuilder builder = XContentFactory.jsonBuilder() + .startObject() + .startObject("_source") + .startArray("excludes") + .value(nestedField) + .endArray() + .endObject() + .startObject("properties") + .startObject(nestedPath) + .field("type", "nested") + .startObject("properties") + .startObject(nestedFieldName) + .field("type", "knn_vector") + .field("dimension", dimension) + .startObject(KNNConstants.KNN_METHOD) + .field(KNNConstants.NAME, hnswMethod.getMethodComponent().getName()) + .field(KNNConstants.METHOD_PARAMETER_SPACE_TYPE, spaceType.getValue()) + .field(KNNConstants.KNN_ENGINE, KNNEngine.FAISS.getName()) + .endObject() + .endObject() + .endObject() + .endObject() + .endObject() + .endObject(); + return builder; + } + + private XContentBuilder constructMappingBuilder() throws IOException { + Integer dimension = 2; + + KNNMethod hnswMethod = KNNEngine.FAISS.getMethod(KNNConstants.METHOD_HNSW); + SpaceType spaceType = SpaceType.L2; + + // Create an index + XContentBuilder builder = XContentFactory.jsonBuilder() + .startObject() + .startObject("_source") + .startArray("excludes") + .value(fieldName) + .endArray() + .endObject() + .startObject("properties") + .startObject(fieldName) + .field("type", "knn_vector") + .field("dimension", dimension) + .startObject(KNNConstants.KNN_METHOD) + .field(KNNConstants.NAME, hnswMethod.getMethodComponent().getName()) + .field(KNNConstants.METHOD_PARAMETER_SPACE_TYPE, spaceType.getValue()) + .field(KNNConstants.KNN_ENGINE, KNNEngine.FAISS.getName()) + .endObject() + .endObject() + .endObject() + .endObject(); + return builder; + } +} diff --git a/src/test/java/org/opensearch/knn/index/KNNVectorDVLeafFieldDataTests.java b/src/test/java/org/opensearch/knn/index/KNNVectorDVLeafFieldDataTests.java index cbe11dd6b..dcb736eb0 100644 --- a/src/test/java/org/opensearch/knn/index/KNNVectorDVLeafFieldDataTests.java +++ b/src/test/java/org/opensearch/knn/index/KNNVectorDVLeafFieldDataTests.java @@ -17,6 +17,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.store.Directory; import org.opensearch.index.fielddata.ScriptDocValues; +import org.opensearch.search.DocValueFormat; import org.junit.Before; import java.io.IOException; @@ -96,4 +97,9 @@ public void testGetBytesValues() { KNNVectorDVLeafFieldData leafFieldData = new KNNVectorDVLeafFieldData(leafReaderContext.reader(), "", VectorDataType.FLOAT); expectThrows(UnsupportedOperationException.class, () -> leafFieldData.getBytesValues()); } + + public void testGetLeafValueFetcher() { + KNNVectorDVLeafFieldData leafFieldData = new KNNVectorDVLeafFieldData(leafReaderContext.reader(), "", VectorDataType.FLOAT); + assertNotNull(leafFieldData.getLeafValueFetcher(DocValueFormat.RAW)); + } } diff --git a/src/test/java/org/opensearch/knn/index/fetch/KNNFetchSubPhaseTests.java b/src/test/java/org/opensearch/knn/index/fetch/KNNFetchSubPhaseTests.java new file mode 100644 index 000000000..1c4d331b9 --- /dev/null +++ b/src/test/java/org/opensearch/knn/index/fetch/KNNFetchSubPhaseTests.java @@ -0,0 +1,165 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.knn.index.fetch; + +import org.apache.lucene.index.LeafReaderContext; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.IndexService; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.Engine; +import org.opensearch.index.fieldvisitor.FieldsVisitor; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.knn.KNNSingleNodeTestCase; +import org.opensearch.knn.common.KNNConstants; +import org.opensearch.knn.index.KNNMethod; +import org.opensearch.knn.index.SpaceType; +import org.opensearch.knn.index.util.KNNEngine; +import org.opensearch.search.SearchHit; +import org.opensearch.search.fetch.FetchContext; +import org.opensearch.search.fetch.FetchSubPhase; +import org.opensearch.search.fetch.FetchSubPhaseProcessor; +import org.opensearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.ExecutionException; + +import static java.util.Collections.emptyMap; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.opensearch.knn.index.KNNSettings.KNN_SYNTHETIC_SOURCE_ENABLED_SETTING; + +public class KNNFetchSubPhaseTests extends KNNSingleNodeTestCase { + static final String testIndexName = "test-index"; + static final String fieldName = "test-field-1"; + + public void testSyntheticSourceSettingDisabled() throws IOException { + FetchContext fetchContext = mock(FetchContext.class); + IndexSettings indexSettings = mock(IndexSettings.class); + when(indexSettings.getValue(KNN_SYNTHETIC_SOURCE_ENABLED_SETTING)).thenReturn(false); + when(fetchContext.getIndexSettings()).thenReturn(indexSettings); + KNNFetchSubPhase phase = new KNNFetchSubPhase(); + FetchSubPhaseProcessor processor = phase.getProcessor(fetchContext); + assertNull(processor); + } + + public void testKNNFetchSubPhaseGetProcessor() throws IOException, ExecutionException, InterruptedException { + XContentBuilder mapping = constructMappingBuilder(); + IndexService indexService = createIndex(testIndexName, constructSettings(), "_doc", mapping); + addKnnDoc(testIndexName, "1", fieldName, new Float[] { 2.5F, 3.5F }); + + IndexSettings indexSettings = indexService.getIndexSettings(); + MapperService mapperService = indexService.mapperService(); + FetchContext fetchContext = mock(FetchContext.class); + when(fetchContext.mapperService()).thenReturn(mapperService); + when(fetchContext.getIndexSettings()).thenReturn(indexSettings); + + QueryShardContext queryShardContext = indexService.newQueryShardContext(0, null, System::currentTimeMillis, null); + SearchLookup searchLookup = queryShardContext.newFetchLookup(); + when(fetchContext.searchLookup()).thenReturn(searchLookup); + + KNNFetchSubPhase phase = new KNNFetchSubPhase(); + FetchSubPhaseProcessor processor = phase.getProcessor(fetchContext); + assertNotNull(processor); + assertTrue(processor instanceof KNNFetchSubPhase.KNNFetchSubPhaseProcessor); + KNNFetchSubPhase.KNNFetchSubPhaseProcessor fetchProcessor = (KNNFetchSubPhase.KNNFetchSubPhaseProcessor) processor; + assertNotNull(fetchProcessor.getFields()); + assertEquals(fetchProcessor.getFields().get(0).getField(), fieldName); + } + + public void testKNNFetchSubPhaseProcessorProcessValue() throws IOException, ExecutionException, InterruptedException { + XContentBuilder mapping = constructMappingBuilder(); + IndexService indexService = createIndex(testIndexName, constructSettings(), "_doc", mapping); + addKnnDoc(testIndexName, "1", fieldName, new Float[] { 2.5F, 3.5F }); + + IndexSettings indexSettings = indexService.getIndexSettings(); + MapperService mapperService = indexService.mapperService(); + FetchContext fetchContext = mock(FetchContext.class); + when(fetchContext.mapperService()).thenReturn(mapperService); + when(fetchContext.getIndexSettings()).thenReturn(indexSettings); + + IndexShard indexShard = indexService.getShard(0); + Engine.Searcher searcher = indexShard.acquireSearcher("Test"); + QueryShardContext queryShardContext = indexService.newQueryShardContext(0, searcher, System::currentTimeMillis, null); + SearchLookup searchLookup = queryShardContext.newFetchLookup(); + when(fetchContext.searchLookup()).thenReturn(searchLookup); + + KNNFetchSubPhase phase = new KNNFetchSubPhase(); + FetchSubPhaseProcessor processor = phase.getProcessor(fetchContext); + + List listLeafReadContext = queryShardContext.getIndexReader().leaves(); + LeafReaderContext leafReaderContext = listLeafReadContext.get(0); + FieldsVisitor fieldsVisitor = new FieldsVisitor(true); + leafReaderContext.reader().storedFields().document(0, fieldsVisitor); + + final SearchHit searchHit = new SearchHit(0, "1", Collections.emptyMap(), emptyMap()); + + FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext(searchHit, leafReaderContext, 0, searchLookup.source()); + BytesReference bytesReference = fieldsVisitor.source(); + hitContext.sourceLookup().setSource(bytesReference); + hitContext.hit().sourceRef(bytesReference); + + String preSource = hitContext.hit().getSourceAsString(); + assertNotNull(preSource); + assertFalse(preSource.contains("test-field-1")); + processor.setNextReader(leafReaderContext); + processor.process(hitContext); + String afterSource = hitContext.hit().getSourceAsString(); + assertTrue(afterSource.contains("\"test-field-1\":[2.5,3.5]")); + searcher.close(); + } + + private Settings constructSettings() throws IOException { + Settings indexSettingWithSynthetic = Settings.builder() + .put("number_of_shards", 1) + .put("number_of_replicas", 0) + .put("index.knn.synthetic_source.enabled", true) + .put("index.knn", true) + .build(); + return indexSettingWithSynthetic; + } + + private XContentBuilder constructMappingBuilder() throws IOException { + Integer dimension = 2; + + KNNMethod hnswMethod = KNNEngine.FAISS.getMethod(KNNConstants.METHOD_HNSW); + SpaceType spaceType = SpaceType.L2; + + // Create an index + XContentBuilder builder = XContentFactory.jsonBuilder() + .startObject() + .startObject("_source") + .startArray("excludes") + .value(fieldName) + .endArray() + .endObject() + .startObject("properties") + .startObject(fieldName) + .field("type", "knn_vector") + .field("dimension", dimension) + .startObject(KNNConstants.KNN_METHOD) + .field(KNNConstants.NAME, hnswMethod.getMethodComponent().getName()) + .field(KNNConstants.METHOD_PARAMETER_SPACE_TYPE, spaceType.getValue()) + .field(KNNConstants.KNN_ENGINE, KNNEngine.FAISS.getName()) + .endObject() + .endObject() + .endObject() + .endObject(); + return builder; + } +} diff --git a/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java b/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java index 860cd2efa..c867f924d 100644 --- a/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java +++ b/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java @@ -644,6 +644,26 @@ protected Map getKnnDoc(final String index, final String docId) return docMap; } + protected Map doReindex(final String sourceIndex, final String destinationIndex) throws IOException { + Request request = new Request("POST", "/_reindex?refresh=true"); + request.setJsonEntity( + "{\n" + + " \"source\":{\n" + + " \"index\":\"" + + sourceIndex + + "\"\n" + + " },\n" + + " \"dest\":{\n" + + " \"index\":\"" + + destinationIndex + + "\"\n" + + " }\n" + + "}" + ); + Map response = entityAsMap(client().performRequest(request)); + return response; + } + /** * Utility to update settings */