Skip to content

Commit

Permalink
Adding new bbq index types behind a feature flag
Browse files Browse the repository at this point in the history
  • Loading branch information
benwtrent committed Oct 9, 2024
1 parent f6bf506 commit 7649ff8
Show file tree
Hide file tree
Showing 8 changed files with 543 additions and 19 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
setup:
- requires:
cluster_features: "mapper.vectors.bbq"
reason: 'kNN float to better-binary quantization is required'
- do:
indices.create:
index: bbq_hnsw
body:
settings:
index:
number_of_shards: 1
mappings:
properties:
name:
type: keyword
vector:
type: dense_vector
dims: 65
index: true
similarity: l2_norm
index_options:
type: bbq_hnsw
another_vector:
type: dense_vector
dims: 65
index: true
similarity: l2_norm
index_options:
type: bbq_hnsw

- do:
index:
index: bbq_hnsw
id: "1"
body:
name: cow.jpg
vector: [230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0]
another_vector: [130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0]
# Flush in order to provoke a merge later
- do:
indices.flush:
index: bbq_hnsw

- do:
index:
index: bbq_hnsw
id: "2"
body:
name: moose.jpg
vector: [-0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0]
another_vector: [-0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120]
# Flush in order to provoke a merge later
- do:
indices.flush:
index: bbq_hnsw

- do:
index:
index: bbq_hnsw
id: "3"
body:
name: rabbit.jpg
vector: [0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0]
another_vector: [-0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0]
# Flush in order to provoke a merge later
- do:
indices.flush:
index: bbq_hnsw

- do:
indices.forcemerge:
index: bbq_hnsw
max_num_segments: 1
---
"Test bad quantization parameters":
- do:
catch: bad_request
indices.create:
index: bad_bbq_hnsw
body:
mappings:
properties:
vector:
type: dense_vector
dims: 64
element_type: byte
index: true
index_options:
type: bbq_hnsw

- do:
catch: bad_request
indices.create:
index: bad_bbq_hnsw
body:
mappings:
properties:
vector:
type: dense_vector
dims: 64
index: false
index_options:
type: bbq_hnsw
---
"Test few dimensions fail indexing":
- do:
catch: bad_request
indices.create:
index: bad_bbq_hnsw
body:
mappings:
properties:
vector:
type: dense_vector
dims: 42
index: true
index_options:
type: bbq_hnsw

- do:
indices.create:
index: dynamic_dim_bbq_hnsw
body:
mappings:
properties:
vector:
type: dense_vector
index: true
similarity: l2_norm
index_options:
type: bbq_hnsw

- do:
catch: bad_request
index:
index: dynamic_dim_bbq_hnsw
body:
vector: [1.0, 2.0, 3.0, 4.0, 5.0]

- do:
index:
index: dynamic_dim_bbq_hnsw
body:
vector: [1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
setup:
- requires:
cluster_features: "mapper.vectors.bbq"
reason: 'kNN float to better-binary quantization is required'
- do:
indices.create:
index: bbq_flat
body:
settings:
index:
number_of_shards: 1
mappings:
properties:
name:
type: keyword
vector:
type: dense_vector
dims: 65
index: true
similarity: l2_norm
index_options:
type: bbq_flat
another_vector:
type: dense_vector
dims: 65
index: true
similarity: l2_norm
index_options:
type: bbq_flat

- do:
index:
index: bbq_flat
id: "1"
body:
name: cow.jpg
vector: [230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0]
another_vector: [130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0]
# Flush in order to provoke a merge later
- do:
indices.flush:
index: bbq_flat

- do:
index:
index: bbq_flat
id: "2"
body:
name: moose.jpg
vector: [-0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0]
another_vector: [-0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120]
# Flush in order to provoke a merge later
- do:
indices.flush:
index: bbq_flat

- do:
index:
index: bbq_flat
id: "3"
body:
name: rabbit.jpg
vector: [0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0]
another_vector: [-0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0]
# Flush in order to provoke a merge later
- do:
indices.flush:
index: bbq_flat

- do:
indices.forcemerge:
index: bbq_flat
max_num_segments: 1
---
"Test bad parameters":
- do:
catch: bad_request
indices.create:
index: bad_bbq_flat
body:
mappings:
properties:
vector:
type: dense_vector
dims: 64
index: true
index_options:
type: bbq_flat
m: 42

- do:
catch: bad_request
indices.create:
index: bad_bbq_flat
body:
mappings:
properties:
vector:
type: dense_vector
dims: 64
element_type: byte
index: true
index_options:
type: bbq_flat
---
"Test few dimensions fail indexing":
# verify index creation fails
- do:
catch: bad_request
indices.create:
index: bad_bbq_flat
body:
mappings:
properties:
vector:
type: dense_vector
dims: 42
index: true
similarity: l2_norm
index_options:
type: bbq_flat

# verify dynamic dimension fails
- do:
indices.create:
index: dynamic_dim_bbq_flat
body:
mappings:
properties:
vector:
type: dense_vector
index: true
similarity: l2_norm
index_options:
type: bbq_flat

# verify index fails for odd dim vector
- do:
catch: bad_request
index:
index: dynamic_dim_bbq_flat
body:
vector: [1.0, 2.0, 3.0, 4.0, 5.0]

# verify that we can index an even dim vector after the odd dim vector failure
- do:
index:
index: dynamic_dim_bbq_flat
body:
vector: [1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0]
7 changes: 4 additions & 3 deletions server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/

import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
import org.elasticsearch.plugins.internal.RestExtension;

/** The Elasticsearch Server Module. */
Expand Down Expand Up @@ -446,14 +445,16 @@
org.elasticsearch.index.codec.bloomfilter.ES85BloomFilterPostingsFormat,
org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat,
org.elasticsearch.index.codec.postings.ES812PostingsFormat;
provides org.apache.lucene.codecs.DocValuesFormat with ES87TSDBDocValuesFormat;
provides org.apache.lucene.codecs.DocValuesFormat with org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
provides org.apache.lucene.codecs.KnnVectorsFormat
with
org.elasticsearch.index.codec.vectors.ES813FlatVectorFormat,
org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat,
org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat,
org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat,
org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat;
org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat,
org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat,
org.elasticsearch.index.codec.vectors.ES816HnswBinaryQuantizedVectorsFormat;

provides org.apache.lucene.codecs.Codec
with
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

package org.elasticsearch.index.mapper;

import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.features.FeatureSpecification;
import org.elasticsearch.features.NodeFeature;
import org.elasticsearch.index.IndexSettings;
Expand All @@ -28,7 +29,7 @@ public class MapperFeatures implements FeatureSpecification {

@Override
public Set<NodeFeature> getFeatures() {
return Set.of(
Set<NodeFeature> features = Set.of(
BWC_WORKAROUND_9_0,
IgnoredSourceFieldMapper.TRACK_IGNORED_SOURCE,
PassThroughObjectMapper.PASS_THROUGH_PRIORITY,
Expand All @@ -54,6 +55,11 @@ public Set<NodeFeature> getFeatures() {
TimeSeriesRoutingHashFieldMapper.TS_ROUTING_HASH_FIELD_PARSES_BYTES_REF,
FlattenedFieldMapper.IGNORE_ABOVE_WITH_ARRAYS_SUPPORT
);
// BBQ is currently behind a feature flag for testing
if (DenseVectorFieldMapper.BBQ_FEATURE_FLAG.isEnabled()) {
return Sets.union(features, Set.of(DenseVectorFieldMapper.BBQ_FORMAT));
}
return features;
}

@Override
Expand Down
Loading

0 comments on commit 7649ff8

Please sign in to comment.