Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add spaceType as a top level parameter while creating vector field. #2044

Merged
merged 3 commits into from
Sep 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion release-notes/opensearch-knn.release-notes-2.17.0.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Compatible with OpenSearch 2.17.0
* Add support for byte vector with Faiss Engine HNSW algorithm [#1823](https://github.com/opensearch-project/k-NN/pull/1823)
* Add support for byte vector with Faiss Engine IVF algorithm [#2002](https://github.com/opensearch-project/k-NN/pull/2002)
* Add mode/compression configuration support for disk-based vector search [#2034](https://github.com/opensearch-project/k-NN/pull/2034)
* Add spaceType as a top level optional parameter while creating vector field. [#2044](https://github.com/opensearch-project/k-NN/pull/2044)
### Enhancements
* Adds iterative graph build capability into a faiss index to improve the memory footprint during indexing and Integrates KNNVectorsFormat for native engines[#1950](https://github.com/opensearch-project/k-NN/pull/1950)
### Bug Fixes
Expand All @@ -32,4 +33,4 @@ Compatible with OpenSearch 2.17.0
* Added Quantization Framework and implemented 1Bit and multibit quantizer[#1889](https://github.com/opensearch-project/k-NN/issues/1889)
* Encapsulate dimension, vector data type validation/processing inside Library [#1957](https://github.com/opensearch-project/k-NN/pull/1957)
* Add quantization state cache [#1960](https://github.com/opensearch-project/k-NN/pull/1960)
* Add quantization state reader and writer [#1997](https://github.com/opensearch-project/k-NN/pull/1997)
* Add quantization state reader and writer [#1997](https://github.com/opensearch-project/k-NN/pull/1997)
3 changes: 3 additions & 0 deletions src/main/java/org/opensearch/knn/common/KNNConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ public class KNNConstants {
public static final String METHOD_IVF = "ivf";
public static final String METHOD_PARAMETER_NLIST = "nlist";
public static final String METHOD_PARAMETER_SPACE_TYPE = "space_type"; // used for mapping parameter
// used for defining toplevel parameter
public static final String TOP_LEVEL_PARAMETER_SPACE_TYPE = METHOD_PARAMETER_SPACE_TYPE;
public static final String COMPOUND_EXTENSION = "c";
public static final String MODEL = "model";
public static final String MODELS = "models";
Expand Down Expand Up @@ -72,6 +74,7 @@ public class KNNConstants {
public static final String MODEL_VECTOR_DATA_TYPE_KEY = VECTOR_DATA_TYPE_FIELD;
public static final VectorDataType DEFAULT_VECTOR_DATA_TYPE_FIELD = VectorDataType.FLOAT;
public static final String MINIMAL_MODE_AND_COMPRESSION_FEATURE = "mode_and_compression_feature";
public static final String TOP_LEVEL_SPACE_TYPE_FEATURE = "top_level_space_type_feature";

public static final String RADIAL_SEARCH_KEY = "radial_search";
public static final String QUANTIZATION_STATE_FILE_SUFFIX = "osknnqstate";
Expand Down
12 changes: 11 additions & 1 deletion src/main/java/org/opensearch/knn/index/SpaceType.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@

package org.opensearch.knn.index;

import java.util.Arrays;
import java.util.Locale;

import java.util.HashSet;
import java.util.Set;
import java.util.stream.Collectors;

import static org.opensearch.knn.common.KNNVectorUtil.isZeroVector;

Expand Down Expand Up @@ -149,6 +151,12 @@ public KNNVectorSimilarityFunction getKnnVectorSimilarityFunction() {
public static SpaceType DEFAULT = L2;
public static SpaceType DEFAULT_BINARY = HAMMING;

private static final String[] VALID_VALUES = Arrays.stream(SpaceType.values())
.filter(space -> space != SpaceType.UNDEFINED)
.map(SpaceType::getValue)
.collect(Collectors.toList())
.toArray(new String[0]);

private final String value;

SpaceType(String value) {
Expand Down Expand Up @@ -221,7 +229,9 @@ public static SpaceType getSpace(String spaceTypeName) {
return currentSpaceType;
}
}
throw new IllegalArgumentException("Unable to find space: " + spaceTypeName);
throw new IllegalArgumentException(
String.format(Locale.ROOT, "Unable to find space: %s . Valid values are: %s", spaceTypeName, Arrays.toString(VALID_VALUES))
);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,14 @@ public static class Builder extends ParametrizedFieldMapper.Builder {
CompressionLevel.NAMES_ARRAY
).acceptsNull();

// A top level space Type field.
protected final Parameter<String> topLevelSpaceType = Parameter.stringParam(
KNNConstants.TOP_LEVEL_PARAMETER_SPACE_TYPE,
false,
m -> toType(m).originalMappingParameters.getTopLevelSpaceType(),
SpaceType.UNDEFINED.getValue()
).setValidator(SpaceType::getSpace);

protected final Parameter<Map<String, String>> meta = Parameter.metaParam();

protected ModelDao modelDao;
Expand All @@ -187,7 +195,18 @@ public Builder(

@Override
protected List<Parameter<?>> getParameters() {
return Arrays.asList(stored, hasDocValues, dimension, vectorDataType, meta, knnMethodContext, modelId, mode, compressionLevel);
return Arrays.asList(
stored,
hasDocValues,
dimension,
vectorDataType,
meta,
knnMethodContext,
modelId,
mode,
compressionLevel,
topLevelSpaceType
);
}

protected Explicit<Boolean> ignoreMalformed(BuilderContext context) {
Expand Down Expand Up @@ -346,13 +365,31 @@ public Mapper.Builder<?> parse(String name, Map<String, Object> node, ParserCont
validateFromModel(builder);
} else {
validateMode(builder);
validateSpaceType(builder);
resolveKNNMethodComponents(builder, parserContext);
validateFromKNNMethod(builder);
}

return builder;
}

private void validateSpaceType(KNNVectorFieldMapper.Builder builder) {
final KNNMethodContext knnMethodContext = builder.knnMethodContext.get();
// if context is defined
if (knnMethodContext != null) {
// now ensure both space types are same.
final SpaceType knnMethodContextSpaceType = knnMethodContext.getSpaceType();
final SpaceType topLevelSpaceType = SpaceType.getSpace(builder.topLevelSpaceType.get());
if (topLevelSpaceType != SpaceType.UNDEFINED
&& topLevelSpaceType != knnMethodContextSpaceType
&& knnMethodContextSpaceType != SpaceType.UNDEFINED) {
throw new MapperParsingException(
"Space type in \"method\" and top level space type should be same or one of them should be defined"
);
}
}
}

private void validateMode(KNNVectorFieldMapper.Builder builder) {
boolean isKNNMethodContextConfigured = builder.originalParameters.getKnnMethodContext() != null;
boolean isModeConfigured = builder.mode.isConfigured() || builder.compressionLevel.isConfigured();
Expand Down Expand Up @@ -386,6 +423,11 @@ private void validateFromModel(KNNVectorFieldMapper.Builder builder) {
if (builder.dimension.getValue() == UNSET_MODEL_DIMENSION_IDENTIFIER && builder.modelId.get() == null) {
throw new IllegalArgumentException(String.format(Locale.ROOT, "Dimension value missing for vector: %s", builder.name()));
}
// ensure model and top level spaceType is not defined
if (builder.modelId.get() != null && SpaceType.getSpace(builder.topLevelSpaceType.get()) != SpaceType.UNDEFINED) {
throw new IllegalArgumentException("TopLevel Space type and model can not be both specified in the " + "mapping");
}

validateCompressionAndModeNotSet(builder, builder.name(), "model");
}

Expand Down Expand Up @@ -439,36 +481,64 @@ private void resolveKNNMethodComponents(KNNVectorFieldMapper.Builder builder, Pa
// Configure method from map or legacy
if (builder.originalParameters.isLegacyMapping()) {
builder.originalParameters.setResolvedKnnMethodContext(
createKNNMethodContextFromLegacy(parserContext.getSettings(), parserContext.indexVersionCreated())
createKNNMethodContextFromLegacy(
parserContext.getSettings(),
parserContext.indexVersionCreated(),
SpaceType.getSpace(builder.topLevelSpaceType.get())
)
);
} else if (Mode.isConfigured(Mode.fromName(builder.mode.get()))
|| CompressionLevel.isConfigured(CompressionLevel.fromName(builder.compressionLevel.get()))) {
// we need don't need to resolve the space type, whatever default we are using will be passed down to
// while resolving KNNMethodContext for the mode and compression. and then when we resolve the spaceType
// we will set the correct spaceType.
builder.originalParameters.setResolvedKnnMethodContext(
ModeBasedResolver.INSTANCE.resolveKNNMethodContext(
builder.knnMethodConfigContext.getMode(),
builder.knnMethodConfigContext.getCompressionLevel(),
false
false,
SpaceType.getSpace(builder.originalParameters.getTopLevelSpaceType())
)
);
}
setDefaultSpaceType(builder.originalParameters.getResolvedKnnMethodContext(), builder.originalParameters.getVectorDataType());
// this function should now correct the space type for the above resolved context too, if spaceType was
// not provided.
setSpaceType(
builder.originalParameters.getResolvedKnnMethodContext(),
builder.originalParameters.getVectorDataType(),
builder.topLevelSpaceType.get()
);
}

private boolean isKNNDisabled(Settings settings) {
boolean isSettingPresent = KNNSettings.IS_KNN_INDEX_SETTING.exists(settings);
return !isSettingPresent || !KNNSettings.IS_KNN_INDEX_SETTING.get(settings);
}

private void setDefaultSpaceType(final KNNMethodContext knnMethodContext, final VectorDataType vectorDataType) {
private void setSpaceType(
final KNNMethodContext knnMethodContext,
final VectorDataType vectorDataType,
final String topLevelSpaceType
) {
// Now KNNMethodContext should never be null. Because only case it could be null is flatMapper which is
// already handled
if (knnMethodContext == null) {
return;
throw new IllegalArgumentException("KNNMethodContext cannot be null");
}

final SpaceType topLevelSpaceTypeEnum = SpaceType.getSpace(topLevelSpaceType);
// Now set the spaceSpaceType for KNNMethodContext
if (SpaceType.UNDEFINED == knnMethodContext.getSpaceType()) {
if (VectorDataType.BINARY == vectorDataType) {
knnMethodContext.setSpaceType(SpaceType.DEFAULT_BINARY);
// We are handling the case when top level space type is defined but method level spaceType is not
// defined.
if (topLevelSpaceTypeEnum != SpaceType.UNDEFINED) {
knnMethodContext.setSpaceType(topLevelSpaceTypeEnum);
} else {
knnMethodContext.setSpaceType(SpaceType.DEFAULT);
// If both spaceTypes are undefined then put the default spaceType based on datatype
if (VectorDataType.BINARY == vectorDataType) {
knnMethodContext.setSpaceType(SpaceType.DEFAULT_BINARY);
} else {
knnMethodContext.setSpaceType(SpaceType.DEFAULT);
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,18 @@ private static int getEfConstruction(Settings indexSettings, Version indexVersio
return Integer.parseInt(efConstruction);
}

static KNNMethodContext createKNNMethodContextFromLegacy(Settings indexSettings, Version indexCreatedVersion) {
static KNNMethodContext createKNNMethodContextFromLegacy(
Settings indexSettings,
Version indexCreatedVersion,
SpaceType topLevelSpaceType
) {
// If top level spaceType is set then use that spaceType otherwise default to spaceType from index-settings
final SpaceType finalSpaceToSet = topLevelSpaceType != SpaceType.UNDEFINED
? topLevelSpaceType
: KNNVectorFieldMapperUtil.getSpaceType(indexSettings);
return new KNNMethodContext(
KNNEngine.NMSLIB,
KNNVectorFieldMapperUtil.getSpaceType(indexSettings),
finalSpaceToSet,
new MethodComponentContext(
METHOD_HNSW,
Map.of(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,19 @@ private ModeBasedResolver() {}
* @param requiresTraining whether config requires trianing
* @return {@link KNNMethodContext}
*/
public KNNMethodContext resolveKNNMethodContext(Mode mode, CompressionLevel compressionLevel, boolean requiresTraining) {
public KNNMethodContext resolveKNNMethodContext(
Mode mode,
CompressionLevel compressionLevel,
boolean requiresTraining,
SpaceType spaceType
) {
if (requiresTraining) {
return resolveWithTraining(mode, compressionLevel);
return resolveWithTraining(mode, compressionLevel, spaceType);
}

return resolveWithoutTraining(mode, compressionLevel);
return resolveWithoutTraining(mode, compressionLevel, spaceType);
}

private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel compressionLevel) {
private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel compressionLevel, final SpaceType spaceType) {
CompressionLevel resolvedCompressionLevel = resolveCompressionLevel(mode, compressionLevel);
MethodComponentContext encoderContext = resolveEncoder(resolvedCompressionLevel);

Expand All @@ -76,7 +80,7 @@ private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel comp
if (encoderContext != null) {
return new KNNMethodContext(
knnEngine,
SpaceType.DEFAULT,
spaceType,
new MethodComponentContext(
METHOD_HNSW,
Map.of(
Expand All @@ -96,7 +100,7 @@ private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel comp
if (knnEngine == KNNEngine.FAISS) {
return new KNNMethodContext(
knnEngine,
SpaceType.DEFAULT,
spaceType,
new MethodComponentContext(
METHOD_HNSW,
Map.of(
Expand All @@ -113,7 +117,7 @@ private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel comp

return new KNNMethodContext(
knnEngine,
SpaceType.DEFAULT,
spaceType,
new MethodComponentContext(
METHOD_HNSW,
Map.of(
Expand All @@ -126,13 +130,13 @@ private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel comp
);
}

private KNNMethodContext resolveWithTraining(Mode mode, CompressionLevel compressionLevel) {
private KNNMethodContext resolveWithTraining(Mode mode, CompressionLevel compressionLevel, SpaceType spaceType) {
CompressionLevel resolvedCompressionLevel = resolveCompressionLevel(mode, compressionLevel);
MethodComponentContext encoderContext = resolveEncoder(resolvedCompressionLevel);
if (encoderContext != null) {
return new KNNMethodContext(
KNNEngine.FAISS,
SpaceType.DEFAULT,
spaceType,
new MethodComponentContext(
METHOD_IVF,
Map.of(
Expand All @@ -149,7 +153,7 @@ private KNNMethodContext resolveWithTraining(Mode mode, CompressionLevel compres

return new KNNMethodContext(
KNNEngine.FAISS,
SpaceType.DEFAULT,
spaceType,
new MethodComponentContext(
METHOD_IVF,
Map.of(METHOD_PARAMETER_NLIST, METHOD_PARAMETER_NLIST_DEFAULT, METHOD_PARAMETER_NPROBES, METHOD_PARAMETER_NPROBES_DEFAULT)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public final class OriginalMappingParameters {
private final String mode;
private final String compressionLevel;
private final String modelId;
private final String topLevelSpaceType;

/**
* Initialize the parameters from the builder
Expand All @@ -56,6 +57,7 @@ public OriginalMappingParameters(KNNVectorFieldMapper.Builder builder) {
this.mode = builder.mode.get();
this.compressionLevel = builder.compressionLevel.get();
this.modelId = builder.modelId.get();
this.topLevelSpaceType = builder.topLevelSpaceType.get();
}

/**
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/opensearch/knn/index/util/IndexUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ public class IndexUtil {
private static final Version MINIMAL_SUPPORTED_VERSION_FOR_MODEL_VECTOR_DATA_TYPE = Version.V_2_16_0;
private static final Version MINIMAL_RESCORE_FEATURE = Version.V_2_17_0;
private static final Version MINIMAL_MODE_AND_COMPRESSION_FEATURE = Version.V_2_17_0;
private static final Version MINIMAL_TOP_LEVEL_SPACE_TYPE_FEATURE = Version.V_2_17_0;
// public so neural search can access it
public static final Map<String, Version> minimalRequiredVersionMap = initializeMinimalRequiredVersionMap();
public static final Set<VectorDataType> VECTOR_DATA_TYPES_NOT_SUPPORTING_ENCODERS = Set.of(VectorDataType.BINARY, VectorDataType.BYTE);
Expand Down Expand Up @@ -390,6 +391,7 @@ private static Map<String, Version> initializeMinimalRequiredVersionMap() {
put(KNNConstants.MODEL_VECTOR_DATA_TYPE_KEY, MINIMAL_SUPPORTED_VERSION_FOR_MODEL_VECTOR_DATA_TYPE);
put(RESCORE_PARAMETER, MINIMAL_RESCORE_FEATURE);
put(KNNConstants.MINIMAL_MODE_AND_COMPRESSION_FEATURE, MINIMAL_MODE_AND_COMPRESSION_FEATURE);
put(KNNConstants.TOP_LEVEL_SPACE_TYPE_FEATURE, MINIMAL_TOP_LEVEL_SPACE_TYPE_FEATURE);
}
};

Expand Down
Loading
Loading