-
Notifications
You must be signed in to change notification settings - Fork 25k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for configuring HNSW parameters
This PR extends the dense_vector type to allow configure HNSW params in `index_options`: `m` – max number of connections for each node, `ef_construction` – number of candidate neighbors to track while searching the graph for each newly inserted node. ``` "mappings": { "properties": { "my_vector": { "type": "dense_vector", "dims": 128, "index": true, "similarity": "l2_norm", "index_options": { "type" : "hnsw", "m" : 15, "ef_construction" : 50 } } } } ``` index_options as an object, and all parameters underneath are optional. If `m` or `ef_contruction` are not provided, the default values from the current codec will be used. Relates to #78473
- Loading branch information
1 parent
0089bd0
commit 5004872
Showing
10 changed files
with
252 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
128 changes: 128 additions & 0 deletions
128
server/src/main/java/org/elasticsearch/index/mapper/VectorFieldMapper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
package org.elasticsearch.index.mapper; | ||
|
||
import org.apache.lucene.codecs.KnnVectorsFormat; | ||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat; | ||
import org.elasticsearch.common.xcontent.support.XContentMapValues; | ||
import org.elasticsearch.xcontent.ToXContent; | ||
import org.elasticsearch.xcontent.XContentBuilder; | ||
|
||
import java.io.IOException; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
|
||
import static org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN; | ||
import static org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_BEAM_WIDTH; | ||
|
||
/** | ||
* Field mapper for a vector field for ann search. | ||
*/ | ||
|
||
public abstract class VectorFieldMapper extends FieldMapper { | ||
public static final IndexOptions DEFAULT_INDEX_OPTIONS = new HNSWIndexOptions(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH); | ||
protected final IndexOptions indexOptions; | ||
|
||
protected VectorFieldMapper(String simpleName, MappedFieldType mappedFieldType, MultiFields multiFields, CopyTo copyTo, | ||
IndexOptions indexOptions) { | ||
super(simpleName, mappedFieldType, multiFields, copyTo); | ||
this.indexOptions = indexOptions; | ||
} | ||
|
||
/** | ||
* Returns the knn vectors format that is customly set up for this field or {@code null} if | ||
* the format is not set up or if the set up format matches the default format. | ||
* @return the knn vectors format for the field, or {@code null} if the default format should be used | ||
*/ | ||
public KnnVectorsFormat getKnnVectorsFormatForField() { | ||
if (indexOptions == null && indexOptions == DEFAULT_INDEX_OPTIONS) { | ||
return null; | ||
} else { | ||
HNSWIndexOptions hnswIndexOptions = (HNSWIndexOptions) indexOptions; | ||
return new Lucene90HnswVectorsFormat(hnswIndexOptions.m, hnswIndexOptions.efConstruction); | ||
} | ||
} | ||
|
||
public static IndexOptions parseVectorIndexOptions(String fieldName, Object propNode) { | ||
if (propNode == null) { | ||
return null; | ||
} | ||
Map<?, ?> indexOptionsMap = (Map<?, ?>) propNode; | ||
String type = XContentMapValues.nodeStringValue(indexOptionsMap.remove("type"), "hnsw"); | ||
if (type.equals("hnsw")) { | ||
return HNSWIndexOptions.parseIndexOptions(fieldName, indexOptionsMap); | ||
} else { | ||
throw new MapperParsingException("Unknown vector index options type [" + type + "] for field [" + fieldName + "]"); | ||
} | ||
} | ||
|
||
public abstract static class IndexOptions implements ToXContent { | ||
protected final String type; | ||
public IndexOptions(String type) { | ||
this.type = type; | ||
} | ||
} | ||
|
||
public static class HNSWIndexOptions extends IndexOptions { | ||
private final int m; | ||
private final int efConstruction; | ||
|
||
public HNSWIndexOptions(int m, int efConstruction) { | ||
super("hnsw"); | ||
this.m = m; | ||
this.efConstruction = efConstruction; | ||
} | ||
|
||
public int m() { | ||
return m; | ||
} | ||
|
||
public int efConstruction() { | ||
return efConstruction; | ||
} | ||
|
||
public static IndexOptions parseIndexOptions(String fieldName, Map<?, ?> indexOptionsMap) { | ||
int m = XContentMapValues.nodeIntegerValue(indexOptionsMap.remove("m"), DEFAULT_MAX_CONN); | ||
int efConstruction = XContentMapValues.nodeIntegerValue(indexOptionsMap.remove("ef_construction"), DEFAULT_BEAM_WIDTH); | ||
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); | ||
if (m == DEFAULT_MAX_CONN && efConstruction == DEFAULT_BEAM_WIDTH) { | ||
return VectorFieldMapper.DEFAULT_INDEX_OPTIONS; | ||
} else { | ||
return new HNSWIndexOptions(m, efConstruction); | ||
} | ||
} | ||
|
||
@Override | ||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { | ||
builder.startObject(); | ||
builder.field("type", type); | ||
builder.field("m", m); | ||
builder.field("ef_construction", efConstruction); | ||
builder.endObject(); | ||
return builder; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object o) { | ||
if (this == o) return true; | ||
if (o == null || getClass() != o.getClass()) return false; | ||
HNSWIndexOptions that = (HNSWIndexOptions) o; | ||
return m == that.m && efConstruction == that.efConstruction; | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(type, m, efConstruction); | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "{type=" + type + ", m=" + m + ", ef_construction=" + efConstruction + " }"; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.