opensearch-project · jmazanec15 · Aug 10, 2024 · Aug 7, 2024 · Aug 9, 2024 · Aug 9, 2024
@@ -31,4 +31,5 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 * Refactor method structure and definitions [#1920](https://github.com/opensearch-project/k-NN/pull/1920)
 * Refactor KNNVectorFieldType from KNNVectorFieldMapper to a separate class for better readability. [#1931](https://github.com/opensearch-project/k-NN/pull/1931)
 * Generalize lib interface to return context objects [#1925](https://github.com/opensearch-project/k-NN/pull/1925)
-* Move k search k-NN query to re-write phase of vector search query for Native Engines [#1877](https://github.com/opensearch-project/k-NN/pull/1877)
+* Move k search k-NN query to re-write phase of vector search query for Native Engines [#1877](https://github.com/opensearch-project/k-NN/pull/1877)
+* Restructure mappers to better handle null cases and avoid branching in parsing [#1939](https://github.com/opensearch-project/k-NN/pull/1939)
@@ -13,6 +13,8 @@
 import org.opensearch.knn.index.codec.params.KNNScalarQuantizedVectorsFormatParams;
 import org.opensearch.knn.index.codec.params.KNNVectorsFormatParams;
 import org.opensearch.knn.index.engine.KNNEngine;
+import org.opensearch.knn.index.engine.KNNMethodContext;
+import org.opensearch.knn.index.mapper.KNNMappingConfig;
 import org.opensearch.knn.index.mapper.KNNVectorFieldType;
 
 import java.util.Optional;
@@ -66,16 +68,19 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) {
             );
             return defaultFormatSupplier.get();
         }
-        var type = (KNNVectorFieldType) mapperService.orElseThrow(
+        KNNVectorFieldType mappedFieldType = (KNNVectorFieldType) mapperService.orElseThrow(
             () -> new IllegalStateException(
                 String.format("Cannot read field type for field [%s] because mapper service is not available", field)
             )
         ).fieldType(field);
-        var params = type.getKnnMethodContext().getMethodComponentContext().getParameters();
 
-        if (type.getKnnMethodContext().getKnnEngine() == KNNEngine.LUCENE
-            && params != null
-            && params.containsKey(METHOD_ENCODER_PARAMETER)) {
+        KNNMappingConfig knnMappingConfig = mappedFieldType.getKnnMappingConfig();
+        KNNMethodContext knnMethodContext = knnMappingConfig.getKnnMethodContext()
+            .orElseThrow(() -> new IllegalArgumentException("KNN method context cannot be empty"));
+
+        var params = knnMethodContext.getMethodComponentContext().getParameters();
+
+        if (knnMethodContext.getKnnEngine() == KNNEngine.LUCENE && params != null && params.containsKey(METHOD_ENCODER_PARAMETER)) {
             KNNScalarQuantizedVectorsFormatParams knnScalarQuantizedVectorsFormatParams = new KNNScalarQuantizedVectorsFormatParams(
                 params,
                 defaultMaxConnections,

@@ -21,7 +21,6 @@
 import org.opensearch.knn.index.codec.transfer.VectorTransferByte;
 import org.opensearch.knn.index.codec.transfer.VectorTransferFloat;
 import org.opensearch.knn.jni.JNIService;
-import org.opensearch.knn.index.SpaceType;
 import org.opensearch.knn.index.codec.util.KNNCodecUtil;
 import org.opensearch.knn.index.engine.KNNEngine;
 import org.opensearch.knn.indices.Model;
@@ -57,7 +56,6 @@
 
 import static org.apache.lucene.codecs.CodecUtil.FOOTER_MAGIC;
 import static org.opensearch.knn.common.KNNConstants.MODEL_ID;
-import static org.opensearch.knn.common.KNNConstants.PARAMETERS;
 import static org.opensearch.knn.index.codec.util.KNNCodecUtil.buildEngineFileName;
 import static org.opensearch.knn.index.codec.util.KNNCodecUtil.calculateArraySize;
 import static org.opensearch.knn.index.engine.faiss.Faiss.FAISS_BINARY_INDEX_DESCRIPTION_PREFIX;
@@ -214,35 +212,21 @@ private void createKNNIndexFromTemplate(Model model, KNNCodecUtil.Pair pair, KNN
 
     private void createKNNIndexFromScratch(FieldInfo fieldInfo, KNNCodecUtil.Pair pair, KNNEngine knnEngine, String indexPath)
         throws IOException {
-        Map<String, Object> parameters = new HashMap<>();
         Map<String, String> fieldAttributes = fieldInfo.attributes();
         String parametersString = fieldAttributes.get(KNNConstants.PARAMETERS);
-
-        // parametersString will be null when legacy mapper is used
         if (parametersString == null) {
-            parameters.put(KNNConstants.SPACE_TYPE, fieldAttributes.getOrDefault(KNNConstants.SPACE_TYPE, SpaceType.DEFAULT.getValue()));
-
-            String efConstruction = fieldAttributes.get(KNNConstants.HNSW_ALGO_EF_CONSTRUCTION);
-            Map<String, Object> algoParams = new HashMap<>();
-            if (efConstruction != null) {
-                algoParams.put(KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION, Integer.parseInt(efConstruction));
-            }
-
-            String m = fieldAttributes.get(KNNConstants.HNSW_ALGO_M);
-            if (m != null) {
-                algoParams.put(KNNConstants.METHOD_PARAMETER_M, Integer.parseInt(m));
-            }
-            parameters.put(PARAMETERS, algoParams);
-        } else {
-            parameters.putAll(
-                XContentHelper.createParser(
-                    NamedXContentRegistry.EMPTY,
-                    DeprecationHandler.THROW_UNSUPPORTED_OPERATION,
-                    new BytesArray(parametersString),
-                    MediaTypeRegistry.getDefaultMediaType()
-                ).map()
+            throw new IllegalStateException(
+                "Parameter string is not set when creating a KNN index. We should not enter this state. It is likely a bug with respect to removal of Legacy field mapper."
             );
         }
+        Map<String, Object> parameters = new HashMap<>(
+            XContentHelper.createParser(
+                NamedXContentRegistry.EMPTY,
+                DeprecationHandler.THROW_UNSUPPORTED_OPERATION,
+                new BytesArray(parametersString),
+                MediaTypeRegistry.getDefaultMediaType()
+            ).map()
+        );
 
         // Update index description of Faiss for binary data type
         if (KNNEngine.FAISS == knnEngine

@@ -9,7 +9,6 @@
 import lombok.Getter;
 import lombok.NonNull;
 import lombok.Setter;
-import org.opensearch.Version;
 import org.opensearch.common.ValidationException;
 import org.opensearch.core.common.io.stream.StreamInput;
 import org.opensearch.core.common.io.stream.StreamOutput;
@@ -20,7 +19,6 @@
 import org.opensearch.index.mapper.MapperParsingException;
 
 import java.io.IOException;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.stream.Collectors;
@@ -29,7 +27,6 @@
 import org.opensearch.knn.training.VectorSpaceInfo;
 
 import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE;
-import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW;
 import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_SPACE_TYPE;
 import static org.opensearch.knn.common.KNNConstants.NAME;
 import static org.opensearch.knn.common.KNNConstants.PARAMETERS;
@@ -42,21 +39,6 @@
 @Getter
 public class KNNMethodContext implements ToXContentFragment, Writeable {
 
-    private static KNNMethodContext defaultInstance = null;
-
-    /**
-     * This is used only for testing
-     * @return default KNNMethodContext for testing
-     */
-    public static synchronized KNNMethodContext getDefault() {
-        if (defaultInstance == null) {
-            MethodComponentContext methodComponentContext = new MethodComponentContext(METHOD_HNSW, Collections.emptyMap());
-            methodComponentContext.setIndexVersion(Version.CURRENT);
-            defaultInstance = new KNNMethodContext(KNNEngine.DEFAULT, SpaceType.DEFAULT, methodComponentContext);
-        }
-        return defaultInstance;
-    }
-
     @NonNull
     private final KNNEngine knnEngine;
     @NonNull

@@ -0,0 +1,97 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.knn.index.mapper;
+
+import org.apache.lucene.document.FieldType;
+import org.opensearch.Version;
+import org.opensearch.common.Explicit;
+import org.opensearch.knn.index.VectorDataType;
+
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * Mapper used when you dont want to build an underlying KNN struct - you just want to
+ * store vectors as doc values
+ */
+public class FlatVectorFieldMapper extends KNNVectorFieldMapper {
+
+    private final PerDimensionValidator perDimensionValidator;
+
+    public static FlatVectorFieldMapper createFieldMapper(
+        String fullname,
+        String simpleName,
+        Map<String, String> metaValue,
+        VectorDataType vectorDataType,
+        Integer dimension,
+        MultiFields multiFields,
+        CopyTo copyTo,
+        Explicit<Boolean> ignoreMalformed,
+        boolean stored,
+        boolean hasDocValues,
+        Version indexCreatedVersion
+    ) {
+        final KNNVectorFieldType mappedFieldType = new KNNVectorFieldType(fullname, metaValue, vectorDataType, new KNNMappingConfig() {
+            @Override
+            public Optional<Integer> getDimension() {
+                return Optional.of(dimension);
+            }
+        });
+        return new FlatVectorFieldMapper(
+            simpleName,
+            mappedFieldType,
+            multiFields,
+            copyTo,
+            ignoreMalformed,
+            stored,
+            hasDocValues,
+            indexCreatedVersion
+        );
+    }
+
+    private FlatVectorFieldMapper(
+        String simpleName,
+        KNNVectorFieldType mappedFieldType,
+        MultiFields multiFields,
+        CopyTo copyTo,
+        Explicit<Boolean> ignoreMalformed,
+        boolean stored,
+        boolean hasDocValues,
+        Version indexCreatedVersion
+    ) {
+        super(simpleName, mappedFieldType, multiFields, copyTo, ignoreMalformed, stored, hasDocValues, indexCreatedVersion, null);
+        this.perDimensionValidator = selectPerDimensionValidator(vectorDataType);
+        this.fieldType = new FieldType(KNNVectorFieldMapper.Defaults.FIELD_TYPE);
+        this.fieldType.freeze();
+    }
+
+    private PerDimensionValidator selectPerDimensionValidator(VectorDataType vectorDataType) {
+        if (VectorDataType.BINARY == vectorDataType) {
+            return PerDimensionValidator.DEFAULT_BIT_VALIDATOR;
+        }
+
+        if (VectorDataType.BYTE == vectorDataType) {
+            return PerDimensionValidator.DEFAULT_BYTE_VALIDATOR;
+        }
+
+        return PerDimensionValidator.DEFAULT_FLOAT_VALIDATOR;
+    }
+
+    @Override
+    protected VectorValidator getVectorValidator() {
+        return VectorValidator.NOOP_VECTOR_VALIDATOR;
+    }
+
+    @Override
+    protected PerDimensionValidator getPerDimensionValidator() {
+        return perDimensionValidator;
+    }
+
+    @Override
+    protected PerDimensionProcessor getPerDimensionProcessor() {
+        return PerDimensionProcessor.NOOP_PROCESSOR;
+    }
+}
@@ -0,0 +1,40 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.knn.index.mapper;
+
+import org.opensearch.knn.index.engine.KNNMethodContext;
+
+import java.util.Optional;
+
+/**
+ * Class holds information about how the ANN indices are created. The design of this class ensures that we do not
+ * accidentally configure an index that has multiple ways it can be created. This class is immutable.
+ */
+public interface KNNMappingConfig {
+    /**
+     *
+     * @return Optional containing the modelId if created from model, otherwise empty
+     */
+    default Optional<String> getModelId() {
+        return Optional.empty();
+    }
+
+    /**
+     *
+     * @return Optional containing the KNNMethodContext if created from method, otherwise empty
+     */
+    default Optional<KNNMethodContext> getKnnMethodContext() {
+        return Optional.empty();
+    }
+
+    /**
+     *
+     * @return the dimension of the index; for model based indices, it will be null
+     */
+    default Optional<Integer> getDimension() {
+        return Optional.empty();
+    }
+}