Skip to content

Commit

Permalink
Add spaceType as a top level parameter while creating vector field. (o…
Browse files Browse the repository at this point in the history
…pensearch-project#2044)

* Add spaceType as a top level parameter while creating vector field.

Signed-off-by: Navneet Verma <[email protected]>

* fix release notes

Signed-off-by: John Mazanec <[email protected]>

* Remove commented out code

Signed-off-by: John Mazanec <[email protected]>

---------

Signed-off-by: Navneet Verma <[email protected]>
Signed-off-by: John Mazanec <[email protected]>
Co-authored-by: John Mazanec <[email protected]>
  • Loading branch information
navneet1v and jmazanec15 authored Sep 5, 2024
1 parent 589a27b commit cb9ba71
Show file tree
Hide file tree
Showing 12 changed files with 240 additions and 45 deletions.
3 changes: 2 additions & 1 deletion release-notes/opensearch-knn.release-notes-2.17.0.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Compatible with OpenSearch 2.17.0
* Add support for byte vector with Faiss Engine HNSW algorithm [#1823](https://github.com/opensearch-project/k-NN/pull/1823)
* Add support for byte vector with Faiss Engine IVF algorithm [#2002](https://github.com/opensearch-project/k-NN/pull/2002)
* Add mode/compression configuration support for disk-based vector search [#2034](https://github.com/opensearch-project/k-NN/pull/2034)
* Add spaceType as a top level optional parameter while creating vector field. [#2044](https://github.com/opensearch-project/k-NN/pull/2044)
### Enhancements
* Adds iterative graph build capability into a faiss index to improve the memory footprint during indexing and Integrates KNNVectorsFormat for native engines[#1950](https://github.com/opensearch-project/k-NN/pull/1950)
### Bug Fixes
Expand All @@ -32,4 +33,4 @@ Compatible with OpenSearch 2.17.0
* Added Quantization Framework and implemented 1Bit and multibit quantizer[#1889](https://github.com/opensearch-project/k-NN/issues/1889)
* Encapsulate dimension, vector data type validation/processing inside Library [#1957](https://github.com/opensearch-project/k-NN/pull/1957)
* Add quantization state cache [#1960](https://github.com/opensearch-project/k-NN/pull/1960)
* Add quantization state reader and writer [#1997](https://github.com/opensearch-project/k-NN/pull/1997)
* Add quantization state reader and writer [#1997](https://github.com/opensearch-project/k-NN/pull/1997)
3 changes: 3 additions & 0 deletions src/main/java/org/opensearch/knn/common/KNNConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ public class KNNConstants {
public static final String METHOD_IVF = "ivf";
public static final String METHOD_PARAMETER_NLIST = "nlist";
public static final String METHOD_PARAMETER_SPACE_TYPE = "space_type"; // used for mapping parameter
// used for defining toplevel parameter
public static final String TOP_LEVEL_PARAMETER_SPACE_TYPE = METHOD_PARAMETER_SPACE_TYPE;
public static final String COMPOUND_EXTENSION = "c";
public static final String MODEL = "model";
public static final String MODELS = "models";
Expand Down Expand Up @@ -72,6 +74,7 @@ public class KNNConstants {
public static final String MODEL_VECTOR_DATA_TYPE_KEY = VECTOR_DATA_TYPE_FIELD;
public static final VectorDataType DEFAULT_VECTOR_DATA_TYPE_FIELD = VectorDataType.FLOAT;
public static final String MINIMAL_MODE_AND_COMPRESSION_FEATURE = "mode_and_compression_feature";
public static final String TOP_LEVEL_SPACE_TYPE_FEATURE = "top_level_space_type_feature";

public static final String RADIAL_SEARCH_KEY = "radial_search";
public static final String QUANTIZATION_STATE_FILE_SUFFIX = "osknnqstate";
Expand Down
12 changes: 11 additions & 1 deletion src/main/java/org/opensearch/knn/index/SpaceType.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@

package org.opensearch.knn.index;

import java.util.Arrays;
import java.util.Locale;

import java.util.HashSet;
import java.util.Set;
import java.util.stream.Collectors;

import static org.opensearch.knn.common.KNNVectorUtil.isZeroVector;

Expand Down Expand Up @@ -149,6 +151,12 @@ public KNNVectorSimilarityFunction getKnnVectorSimilarityFunction() {
public static SpaceType DEFAULT = L2;
public static SpaceType DEFAULT_BINARY = HAMMING;

private static final String[] VALID_VALUES = Arrays.stream(SpaceType.values())
.filter(space -> space != SpaceType.UNDEFINED)
.map(SpaceType::getValue)
.collect(Collectors.toList())
.toArray(new String[0]);

private final String value;

SpaceType(String value) {
Expand Down Expand Up @@ -221,7 +229,9 @@ public static SpaceType getSpace(String spaceTypeName) {
return currentSpaceType;
}
}
throw new IllegalArgumentException("Unable to find space: " + spaceTypeName);
throw new IllegalArgumentException(
String.format(Locale.ROOT, "Unable to find space: %s . Valid values are: %s", spaceTypeName, Arrays.toString(VALID_VALUES))
);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,14 @@ public static class Builder extends ParametrizedFieldMapper.Builder {
CompressionLevel.NAMES_ARRAY
).acceptsNull();

// A top level space Type field.
protected final Parameter<String> topLevelSpaceType = Parameter.stringParam(
KNNConstants.TOP_LEVEL_PARAMETER_SPACE_TYPE,
false,
m -> toType(m).originalMappingParameters.getTopLevelSpaceType(),
SpaceType.UNDEFINED.getValue()
).setValidator(SpaceType::getSpace);

protected final Parameter<Map<String, String>> meta = Parameter.metaParam();

protected ModelDao modelDao;
Expand All @@ -187,7 +195,18 @@ public Builder(

@Override
protected List<Parameter<?>> getParameters() {
return Arrays.asList(stored, hasDocValues, dimension, vectorDataType, meta, knnMethodContext, modelId, mode, compressionLevel);
return Arrays.asList(
stored,
hasDocValues,
dimension,
vectorDataType,
meta,
knnMethodContext,
modelId,
mode,
compressionLevel,
topLevelSpaceType
);
}

protected Explicit<Boolean> ignoreMalformed(BuilderContext context) {
Expand Down Expand Up @@ -346,13 +365,31 @@ public Mapper.Builder<?> parse(String name, Map<String, Object> node, ParserCont
validateFromModel(builder);
} else {
validateMode(builder);
validateSpaceType(builder);
resolveKNNMethodComponents(builder, parserContext);
validateFromKNNMethod(builder);
}

return builder;
}

private void validateSpaceType(KNNVectorFieldMapper.Builder builder) {
final KNNMethodContext knnMethodContext = builder.knnMethodContext.get();
// if context is defined
if (knnMethodContext != null) {
// now ensure both space types are same.
final SpaceType knnMethodContextSpaceType = knnMethodContext.getSpaceType();
final SpaceType topLevelSpaceType = SpaceType.getSpace(builder.topLevelSpaceType.get());
if (topLevelSpaceType != SpaceType.UNDEFINED
&& topLevelSpaceType != knnMethodContextSpaceType
&& knnMethodContextSpaceType != SpaceType.UNDEFINED) {
throw new MapperParsingException(
"Space type in \"method\" and top level space type should be same or one of them should be defined"
);
}
}
}

private void validateMode(KNNVectorFieldMapper.Builder builder) {
boolean isKNNMethodContextConfigured = builder.originalParameters.getKnnMethodContext() != null;
boolean isModeConfigured = builder.mode.isConfigured() || builder.compressionLevel.isConfigured();
Expand Down Expand Up @@ -386,6 +423,11 @@ private void validateFromModel(KNNVectorFieldMapper.Builder builder) {
if (builder.dimension.getValue() == UNSET_MODEL_DIMENSION_IDENTIFIER && builder.modelId.get() == null) {
throw new IllegalArgumentException(String.format(Locale.ROOT, "Dimension value missing for vector: %s", builder.name()));
}
// ensure model and top level spaceType is not defined
if (builder.modelId.get() != null && SpaceType.getSpace(builder.topLevelSpaceType.get()) != SpaceType.UNDEFINED) {
throw new IllegalArgumentException("TopLevel Space type and model can not be both specified in the " + "mapping");
}

validateCompressionAndModeNotSet(builder, builder.name(), "model");
}

Expand Down Expand Up @@ -439,36 +481,64 @@ private void resolveKNNMethodComponents(KNNVectorFieldMapper.Builder builder, Pa
// Configure method from map or legacy
if (builder.originalParameters.isLegacyMapping()) {
builder.originalParameters.setResolvedKnnMethodContext(
createKNNMethodContextFromLegacy(parserContext.getSettings(), parserContext.indexVersionCreated())
createKNNMethodContextFromLegacy(
parserContext.getSettings(),
parserContext.indexVersionCreated(),
SpaceType.getSpace(builder.topLevelSpaceType.get())
)
);
} else if (Mode.isConfigured(Mode.fromName(builder.mode.get()))
|| CompressionLevel.isConfigured(CompressionLevel.fromName(builder.compressionLevel.get()))) {
// we need don't need to resolve the space type, whatever default we are using will be passed down to
// while resolving KNNMethodContext for the mode and compression. and then when we resolve the spaceType
// we will set the correct spaceType.
builder.originalParameters.setResolvedKnnMethodContext(
ModeBasedResolver.INSTANCE.resolveKNNMethodContext(
builder.knnMethodConfigContext.getMode(),
builder.knnMethodConfigContext.getCompressionLevel(),
false
false,
SpaceType.getSpace(builder.originalParameters.getTopLevelSpaceType())
)
);
}
setDefaultSpaceType(builder.originalParameters.getResolvedKnnMethodContext(), builder.originalParameters.getVectorDataType());
// this function should now correct the space type for the above resolved context too, if spaceType was
// not provided.
setSpaceType(
builder.originalParameters.getResolvedKnnMethodContext(),
builder.originalParameters.getVectorDataType(),
builder.topLevelSpaceType.get()
);
}

private boolean isKNNDisabled(Settings settings) {
boolean isSettingPresent = KNNSettings.IS_KNN_INDEX_SETTING.exists(settings);
return !isSettingPresent || !KNNSettings.IS_KNN_INDEX_SETTING.get(settings);
}

private void setDefaultSpaceType(final KNNMethodContext knnMethodContext, final VectorDataType vectorDataType) {
private void setSpaceType(
final KNNMethodContext knnMethodContext,
final VectorDataType vectorDataType,
final String topLevelSpaceType
) {
// Now KNNMethodContext should never be null. Because only case it could be null is flatMapper which is
// already handled
if (knnMethodContext == null) {
return;
throw new IllegalArgumentException("KNNMethodContext cannot be null");
}

final SpaceType topLevelSpaceTypeEnum = SpaceType.getSpace(topLevelSpaceType);
// Now set the spaceSpaceType for KNNMethodContext
if (SpaceType.UNDEFINED == knnMethodContext.getSpaceType()) {
if (VectorDataType.BINARY == vectorDataType) {
knnMethodContext.setSpaceType(SpaceType.DEFAULT_BINARY);
// We are handling the case when top level space type is defined but method level spaceType is not
// defined.
if (topLevelSpaceTypeEnum != SpaceType.UNDEFINED) {
knnMethodContext.setSpaceType(topLevelSpaceTypeEnum);
} else {
knnMethodContext.setSpaceType(SpaceType.DEFAULT);
// If both spaceTypes are undefined then put the default spaceType based on datatype
if (VectorDataType.BINARY == vectorDataType) {
knnMethodContext.setSpaceType(SpaceType.DEFAULT_BINARY);
} else {
knnMethodContext.setSpaceType(SpaceType.DEFAULT);
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,18 @@ private static int getEfConstruction(Settings indexSettings, Version indexVersio
return Integer.parseInt(efConstruction);
}

static KNNMethodContext createKNNMethodContextFromLegacy(Settings indexSettings, Version indexCreatedVersion) {
static KNNMethodContext createKNNMethodContextFromLegacy(
Settings indexSettings,
Version indexCreatedVersion,
SpaceType topLevelSpaceType
) {
// If top level spaceType is set then use that spaceType otherwise default to spaceType from index-settings
final SpaceType finalSpaceToSet = topLevelSpaceType != SpaceType.UNDEFINED
? topLevelSpaceType
: KNNVectorFieldMapperUtil.getSpaceType(indexSettings);
return new KNNMethodContext(
KNNEngine.NMSLIB,
KNNVectorFieldMapperUtil.getSpaceType(indexSettings),
finalSpaceToSet,
new MethodComponentContext(
METHOD_HNSW,
Map.of(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,19 @@ private ModeBasedResolver() {}
* @param requiresTraining whether config requires trianing
* @return {@link KNNMethodContext}
*/
public KNNMethodContext resolveKNNMethodContext(Mode mode, CompressionLevel compressionLevel, boolean requiresTraining) {
public KNNMethodContext resolveKNNMethodContext(
Mode mode,
CompressionLevel compressionLevel,
boolean requiresTraining,
SpaceType spaceType
) {
if (requiresTraining) {
return resolveWithTraining(mode, compressionLevel);
return resolveWithTraining(mode, compressionLevel, spaceType);
}

return resolveWithoutTraining(mode, compressionLevel);
return resolveWithoutTraining(mode, compressionLevel, spaceType);
}

private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel compressionLevel) {
private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel compressionLevel, final SpaceType spaceType) {
CompressionLevel resolvedCompressionLevel = resolveCompressionLevel(mode, compressionLevel);
MethodComponentContext encoderContext = resolveEncoder(resolvedCompressionLevel);

Expand All @@ -76,7 +80,7 @@ private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel comp
if (encoderContext != null) {
return new KNNMethodContext(
knnEngine,
SpaceType.DEFAULT,
spaceType,
new MethodComponentContext(
METHOD_HNSW,
Map.of(
Expand All @@ -96,7 +100,7 @@ private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel comp
if (knnEngine == KNNEngine.FAISS) {
return new KNNMethodContext(
knnEngine,
SpaceType.DEFAULT,
spaceType,
new MethodComponentContext(
METHOD_HNSW,
Map.of(
Expand All @@ -113,7 +117,7 @@ private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel comp

return new KNNMethodContext(
knnEngine,
SpaceType.DEFAULT,
spaceType,
new MethodComponentContext(
METHOD_HNSW,
Map.of(
Expand All @@ -126,13 +130,13 @@ private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel comp
);
}

private KNNMethodContext resolveWithTraining(Mode mode, CompressionLevel compressionLevel) {
private KNNMethodContext resolveWithTraining(Mode mode, CompressionLevel compressionLevel, SpaceType spaceType) {
CompressionLevel resolvedCompressionLevel = resolveCompressionLevel(mode, compressionLevel);
MethodComponentContext encoderContext = resolveEncoder(resolvedCompressionLevel);
if (encoderContext != null) {
return new KNNMethodContext(
KNNEngine.FAISS,
SpaceType.DEFAULT,
spaceType,
new MethodComponentContext(
METHOD_IVF,
Map.of(
Expand All @@ -149,7 +153,7 @@ private KNNMethodContext resolveWithTraining(Mode mode, CompressionLevel compres

return new KNNMethodContext(
KNNEngine.FAISS,
SpaceType.DEFAULT,
spaceType,
new MethodComponentContext(
METHOD_IVF,
Map.of(METHOD_PARAMETER_NLIST, METHOD_PARAMETER_NLIST_DEFAULT, METHOD_PARAMETER_NPROBES, METHOD_PARAMETER_NPROBES_DEFAULT)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public final class OriginalMappingParameters {
private final String mode;
private final String compressionLevel;
private final String modelId;
private final String topLevelSpaceType;

/**
* Initialize the parameters from the builder
Expand All @@ -56,6 +57,7 @@ public OriginalMappingParameters(KNNVectorFieldMapper.Builder builder) {
this.mode = builder.mode.get();
this.compressionLevel = builder.compressionLevel.get();
this.modelId = builder.modelId.get();
this.topLevelSpaceType = builder.topLevelSpaceType.get();
}

/**
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/opensearch/knn/index/util/IndexUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ public class IndexUtil {
private static final Version MINIMAL_SUPPORTED_VERSION_FOR_MODEL_VECTOR_DATA_TYPE = Version.V_2_16_0;
private static final Version MINIMAL_RESCORE_FEATURE = Version.V_2_17_0;
private static final Version MINIMAL_MODE_AND_COMPRESSION_FEATURE = Version.V_2_17_0;
private static final Version MINIMAL_TOP_LEVEL_SPACE_TYPE_FEATURE = Version.V_2_17_0;
// public so neural search can access it
public static final Map<String, Version> minimalRequiredVersionMap = initializeMinimalRequiredVersionMap();
public static final Set<VectorDataType> VECTOR_DATA_TYPES_NOT_SUPPORTING_ENCODERS = Set.of(VectorDataType.BINARY, VectorDataType.BYTE);
Expand Down Expand Up @@ -390,6 +391,7 @@ private static Map<String, Version> initializeMinimalRequiredVersionMap() {
put(KNNConstants.MODEL_VECTOR_DATA_TYPE_KEY, MINIMAL_SUPPORTED_VERSION_FOR_MODEL_VECTOR_DATA_TYPE);
put(RESCORE_PARAMETER, MINIMAL_RESCORE_FEATURE);
put(KNNConstants.MINIMAL_MODE_AND_COMPRESSION_FEATURE, MINIMAL_MODE_AND_COMPRESSION_FEATURE);
put(KNNConstants.TOP_LEVEL_SPACE_TYPE_FEATURE, MINIMAL_TOP_LEVEL_SPACE_TYPE_FEATURE);
}
};

Expand Down
Loading

0 comments on commit cb9ba71

Please sign in to comment.