Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Commit

Permalink
FEAT: support cosine similarity (#90)
Browse files Browse the repository at this point in the history
  • Loading branch information
chenqi0805 authored Apr 27, 2020
1 parent 3cbcc9f commit 1c80b62
Show file tree
Hide file tree
Showing 14 changed files with 248 additions and 43 deletions.
55 changes: 55 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,57 @@ POST /myindex/_search
}
```

## Cosine Similarity Usage (experimental)

* Creating KNN index with cosine similarity space type

```
PUT /myindex
{
"settings" : {
"index": {
"knn": true,
"knn.space_type": "cosinesimil"
}
},
"mappings": {
"properties": {
"my_vector1": {
"type": "knn_vector",
"dimension": 2
}
}
}
}
```

* Indexing sample docs to KNN index

```
PUT /myindex/_doc/2?refresh=true
{
"my_vector1" : [1.5, 2.5],
"price":10
}
```

* Querying K-Nearest neighbors

```
POST /myindex/_search
{
"size" : 10,
"query": {
"knn": {
"my_vector1": {
"vector": [15, 25],
"k": 2
}
}
}
}
```

## Java Native library usage
For plugin installations from archive(.zip), it is necessary to ensure ```.so``` file for linux OS and ```.jnilib``` file for Mac OS are present in the java library path. This can be possible by copying .so/.jnilib to either $ES_HOME or by adding manually ```-Djava.library.path=<path_to_lib_files>``` in ```jvm.options``` file

Expand All @@ -145,6 +196,9 @@ You must provide index-level settings when you create the index. If you don't pr
##### index.knn
This setting indicates whether the index uses the KNN Codec or not. Possible values are *true*, *false*. Default value is *false*.

##### index.knn.space_type
This setting indicates the similarity metrics between vectors. Supported values are *l2*, *cosinesimil*. *l2* refers to euclidean distance metric; *cosinesimil* refers to cosine similarity. Default value is *l2*.

##### index.knn.algo_param.m
This setting is an HNSW parameter that represents "the number of bi-directional links created for every new element during construction. Reasonable range for M is 2-100. Higher M work better on datasets with high intrinsic dimensionality and/or high recall, while low M work better for datasets with low intrinsic dimensionality and/or low recalls. The parameter also determines the algorithm's memory consumption, which is roughly M * 8-10 bytes per stored element." [nmslib/hnswlib](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md) The default value is *16*.

Expand All @@ -160,6 +214,7 @@ PUT /my_index/_settings
{
"index" : {
"knn": true,
"knn.space_type": "l2",
"knn.algo_param.m": 18,
"knn.algo_param.ef_search" : 20,
"knn.algo_param.ef_construction" : 40
Expand Down
Binary file modified buildSrc/libKNNIndexV1_7_3_6.jnilib
Binary file not shown.
Binary file modified buildSrc/libKNNIndexV1_7_3_6.so
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ using similarity::KNNQueue;
extern "C"

struct IndexWrapper {
IndexWrapper() {
space.reset(SpaceFactoryRegistry<float>::Instance().CreateSpace("l2", AnyParams()));
index.reset(MethodFactoryRegistry<float>::Instance().CreateMethod(false, "hnsw", "l2", *space, data));
IndexWrapper(string spaceType) {
space.reset(SpaceFactoryRegistry<float>::Instance().CreateSpace(spaceType, AnyParams()));
index.reset(MethodFactoryRegistry<float>::Instance().CreateMethod(false, "hnsw", spaceType, *space, data));
}
std::unique_ptr<Space<float>> space;
std::unique_ptr<Index<float>> index;
Expand Down Expand Up @@ -85,15 +85,19 @@ void catch_cpp_exception_and_throw_java(JNIEnv* env)
}
}

JNIEXPORT void JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v1736_KNNIndex_saveIndex(JNIEnv* env, jclass cls, jintArray ids, jobjectArray vectors, jstring indexPath, jobjectArray algoParams)
JNIEXPORT void JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v1736_KNNIndex_saveIndex(JNIEnv* env, jclass cls, jintArray ids, jobjectArray vectors, jstring indexPath, jobjectArray algoParams, jstring spaceType)
{
Space<float>* space = NULL;
ObjectVector dataset;
Index<float>* index = NULL;
int* object_ids = NULL;

try {
space = SpaceFactoryRegistry<float>::Instance().CreateSpace("l2", AnyParams());
const char *spaceTypeCStr = env->GetStringUTFChars(spaceType, 0);
string spaceTypeString(spaceTypeCStr);
env->ReleaseStringUTFChars(spaceType, spaceTypeCStr);
has_exception_in_stack(env);
space = SpaceFactoryRegistry<float>::Instance().CreateSpace(spaceTypeString, AnyParams());
object_ids = env->GetIntArrayElements(ids, 0);
for (int i = 0; i < env->GetArrayLength(vectors); i++) {
jfloatArray vectorArray = (jfloatArray)env->GetObjectArrayElement(vectors, i);
Expand All @@ -103,7 +107,7 @@ JNIEXPORT void JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v173
}
// free up memory
env->ReleaseIntArrayElements(ids, object_ids, 0);
index = MethodFactoryRegistry<float>::Instance().CreateMethod(false, "hnsw", "l2", *space, dataset);
index = MethodFactoryRegistry<float>::Instance().CreateMethod(false, "hnsw", spaceTypeString, *space, dataset);

int paramsCount = env->GetArrayLength(algoParams);
vector<string> paramsList;
Expand Down Expand Up @@ -171,7 +175,7 @@ JNIEXPORT jobjectArray JNICALL Java_com_amazon_opendistroforelasticsearch_knn_in
return NULL;
}

JNIEXPORT jlong JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v1736_KNNIndex_init(JNIEnv* env, jclass cls, jstring indexPath, jobjectArray algoParams)
JNIEXPORT jlong JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v1736_KNNIndex_init(JNIEnv* env, jclass cls, jstring indexPath, jobjectArray algoParams, jstring spaceType)
{
IndexWrapper *indexWrapper = NULL;
try {
Expand All @@ -181,7 +185,11 @@ JNIEXPORT jlong JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v17
has_exception_in_stack(env);

// Load index from file (may throw)
IndexWrapper *indexWrapper = new IndexWrapper();
const char *spaceTypeCStr = env->GetStringUTFChars(spaceType, 0);
string spaceTypeString(spaceTypeCStr);
env->ReleaseStringUTFChars(spaceType, spaceTypeCStr);
has_exception_in_stack(env);
IndexWrapper *indexWrapper = new IndexWrapper(spaceTypeString);
indexWrapper->index->LoadIndex(indexPathString);

// Parse and set query params
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ public KNNIndexCacheEntry loadIndex(String indexPathUrl, String indexName) throw
// the entry
fileWatcher.init();

final KNNIndex knnIndex = KNNIndex.loadIndex(indexPathUrl, getQueryParams(indexName));
final KNNIndex knnIndex = KNNIndex.loadIndex(indexPathUrl, getQueryParams(indexName), KNNSettings.getSpaceType(indexName));

// TODO verify that this is safe - ideally we'd explicitly ensure that the FileWatcher is only checked
// after the guava cache has finished loading the key to avoid a race condition where the watcher
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,13 @@
import org.elasticsearch.monitor.jvm.JvmInfo;
import org.elasticsearch.monitor.os.OsProbe;

import java.security.InvalidParameterException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
import java.util.stream.Stream;
Expand Down Expand Up @@ -63,6 +65,7 @@ public class KNNSettings {
/**
* Settings name
*/
public static final String KNN_SPACE_TYPE = "index.knn.space_type";
public static final String KNN_ALGO_PARAM_M = "index.knn.algo_param.m";
public static final String KNN_ALGO_PARAM_EF_CONSTRUCTION = "index.knn.algo_param.ef_construction";
public static final String KNN_ALGO_PARAM_EF_SEARCH = "index.knn.algo_param.ef_search";
Expand All @@ -80,6 +83,11 @@ public class KNNSettings {
* Settings Definition
*/

public static final Setting<String> INDEX_KNN_SPACE_TYPE = Setting.simpleString(KNN_SPACE_TYPE,
"l2",
new SpaceTypeValidator(),
IndexScope);

/**
* M - the number of bi-directional links created for every new element during construction.
* Reasonable range for M is 2-100. Higher M work better on datasets with high intrinsic
Expand Down Expand Up @@ -252,7 +260,8 @@ public Setting<?> getSetting(String key) {
}

public List<Setting<?>> getSettings() {
List<Setting<?>> settings = Arrays.asList(INDEX_KNN_ALGO_PARAM_M_SETTING,
List<Setting<?>> settings = Arrays.asList(INDEX_KNN_SPACE_TYPE,
INDEX_KNN_ALGO_PARAM_M_SETTING,
INDEX_KNN_ALGO_PARAM_EF_CONSTRUCTION_SETTING,
INDEX_KNN_ALGO_PARAM_EF_SEARCH_SETTING,
KNN_ALGO_PARAM_INDEX_THREAD_QTY_SETTING,
Expand Down Expand Up @@ -357,6 +366,16 @@ public static int getEfSearchParam(String index) {
return getIndexSettingValue(index, KNN_ALGO_PARAM_EF_SEARCH, 512);
}

/**
*
* @param index Name of the index
* @return spaceType value
*/
public static String getSpaceType(String index) {
return KNNSettings.state().clusterService.state().getMetaData()
.index(index).getSettings().get(KNN_SPACE_TYPE, SpaceTypes.l2.getValue());
}

public static int getIndexSettingValue(String index, String settingName, int defaultValue) {
return KNNSettings.state().clusterService.state().getMetaData()
.index(index).getSettings()
Expand All @@ -367,4 +386,14 @@ public void setClusterService(ClusterService clusterService) {
this.clusterService = clusterService;
}

static class SpaceTypeValidator implements Setting.Validator<String> {

private Set<String> types = SpaceTypes.getValues();

@Override public void validate(String value) {
if (value == null || !types.contains(value.toLowerCase())){
throw new InvalidParameterException(String.format("Unsupported space type: %s", value));
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ public Builder ignoreMalformed(boolean ignoreMalformed) {
return builder;
}

public Builder spaceTypeParam(String key, String paramValue) {
Defaults.FIELD_TYPE.putAttribute(key, paramValue.toLowerCase());
return builder;
}

public Builder algoParams(String key, int paramValue) {
Defaults.FIELD_TYPE.putAttribute(key, String.valueOf(paramValue));
return builder;
Expand Down Expand Up @@ -135,6 +140,8 @@ public static class TypeParser implements Mapper.TypeParser {
public Mapper.Builder<?, ?> parse(String name, Map<String, Object> node, ParserContext parserContext)
throws MapperParsingException {
Builder builder = new KNNVectorFieldMapper.Builder(name);
builder.spaceTypeParam(KNNConstants.SPACE_TYPE, parserContext.mapperService().getIndexSettings().getValue(
KNNSettings.INDEX_KNN_SPACE_TYPE));
builder.algoParams(KNNConstants.HNSW_ALGO_M, parserContext.mapperService().getIndexSettings().getValue(
KNNSettings.INDEX_KNN_ALGO_PARAM_M_SETTING));
builder.algoParams(KNNConstants.HNSW_ALGO_EF_CONSTRUCTION, parserContext.mapperService().getIndexSettings()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.knn.index;

import java.util.HashSet;
import java.util.Set;

/**
* Enum contains space types for k-NN similarity search
*/
public enum SpaceTypes {
l2("l2"),
cosinesimil("cosinesimil");

private String value;

SpaceTypes(String value) { this.value = value; }

/**
* Get space type
*
* @return name
*/
public String getValue() { return value; }

/**
* Get all space types
*
* @return set of all stat names
*/
public static Set<String> getValues() {
Set<String> values = new HashSet<>();

for (SpaceTypes spaceType : SpaceTypes.values()) {
values.add(spaceType.getValue());
}
return values;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

package com.amazon.opendistroforelasticsearch.knn.index.codec.KNN80Codec;

import com.amazon.opendistroforelasticsearch.knn.index.SpaceTypes;
import com.amazon.opendistroforelasticsearch.knn.index.codec.KNNCodecUtil;
import com.amazon.opendistroforelasticsearch.knn.plugin.stats.KNNCounter;
import org.apache.logging.log4j.LogManager;
Expand Down Expand Up @@ -96,11 +97,13 @@ public void addKNNBinaryField(FieldInfo field, DocValuesProducer valuesProducer)

// Pass the path for the nms library to save the file
String tempIndexPath = indexPath + TEMP_SUFFIX;
String[] algoParams = getKNNIndexParams(field.attributes());
Map<String, String> fieldAttributes = field.attributes();
String spaceType = fieldAttributes.getOrDefault(KNNConstants.SPACE_TYPE, SpaceTypes.l2.getValue());
String[] algoParams = getKNNIndexParams(fieldAttributes);
AccessController.doPrivileged(
new PrivilegedAction<Void>() {
public Void run() {
KNNIndex.saveIndex(pair.docs, pair.vectors, tempIndexPath, algoParams);
KNNIndex.saveIndex(pair.docs, pair.vectors, tempIndexPath, algoParams, spaceType);
return null;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package com.amazon.opendistroforelasticsearch.knn.index.util;

public class KNNConstants {
public static final String SPACE_TYPE = "spaceType";
public static final String HNSW_ALGO_M = "M";
public static final String HNSW_ALGO_EF_CONSTRUCTION = "efConstruction";
public static final String HNSW_ALGO_EF_SEARCH = "efSearch";
Expand Down
Loading

0 comments on commit 1c80b62

Please sign in to comment.