Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Even better(er) binary quantization #117994

Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/117994.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 117994
summary: Even better(er) binary quantization
area: Vector Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,11 @@ setup:
number_of_shards: 1
mappings:
properties:
name:
type: keyword
vector:
type: dense_vector
dims: 64
index: true
similarity: l2_norm
index_options:
type: bbq_hnsw
another_vector:
type: dense_vector
dims: 64
index: true
similarity: l2_norm
similarity: max_inner_product
index_options:
type: bbq_hnsw

Expand All @@ -33,9 +24,14 @@ setup:
index: bbq_hnsw
id: "1"
body:
name: cow.jpg
vector: [300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0]
another_vector: [115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0]
vector: [0.077, 0.32 , -0.205, 0.63 , 0.032, 0.201, 0.167, -0.313,
0.176, 0.531, -0.375, 0.334, -0.046, 0.078, -0.349, 0.272,
0.307, -0.083, 0.504, 0.255, -0.404, 0.289, -0.226, -0.132,
-0.216, 0.49 , 0.039, 0.507, -0.307, 0.107, 0.09 , -0.265,
-0.285, 0.336, -0.272, 0.369, -0.282, 0.086, -0.132, 0.475,
-0.224, 0.203, 0.439, 0.064, 0.246, -0.396, 0.297, 0.242,
-0.028, 0.321, -0.022, -0.009, -0.001 , 0.031, -0.533, 0.45,
-0.683, 1.331, 0.194, -0.157, -0.1 , -0.279, -0.098, -0.176]
# Flush in order to provoke a merge later
- do:
indices.flush:
Expand All @@ -46,9 +42,14 @@ setup:
index: bbq_hnsw
id: "2"
body:
name: moose.jpg
vector: [100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0]
another_vector: [50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120]
vector: [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348,
-0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048,
0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438,
-0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138,
-0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429,
-0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166,
0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569,
-0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013]
# Flush in order to provoke a merge later
- do:
indices.flush:
Expand All @@ -60,8 +61,14 @@ setup:
id: "3"
body:
name: rabbit.jpg
vector: [111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0]
another_vector: [11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0]
vector: [0.139, 0.178, -0.117, 0.399, 0.014, -0.139, 0.347, -0.33 ,
0.139, 0.34 , -0.052, -0.052, -0.249, 0.327, -0.288, 0.049,
0.464, 0.338, 0.516, 0.247, -0.104, 0.259, -0.209, -0.246,
-0.11 , 0.323, 0.091, 0.442, -0.254, 0.195, -0.109, -0.058,
-0.279, 0.402, -0.107, 0.308, -0.273, 0.019, 0.082, 0.399,
-0.658, -0.03 , 0.276, 0.041, 0.187, -0.331, 0.165, 0.017,
0.171, -0.203, -0.198, 0.115, -0.007, 0.337, -0.444, 0.615,
-0.657, 1.285, 0.2 , -0.062, 0.038, 0.089, -0.068, -0.058]
# Flush in order to provoke a merge later
- do:
indices.flush:
Expand All @@ -73,20 +80,33 @@ setup:
max_num_segments: 1
---
"Test knn search":
- requires:
capabilities:
- method: POST
path: /_search
capabilities: [ optimized_scalar_quantization_bbq ]
test_runner_features: capabilities
reason: "BBQ scoring improved and changed with optimized_scalar_quantization_bbq"
- do:
search:
index: bbq_hnsw
body:
knn:
field: vector
query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0]
query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393,
0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015,
0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259,
-0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 ,
-0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232,
-0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034,
-0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582,
-0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158]
k: 3
num_candidates: 3

# Depending on how things are distributed, docs 2 and 3 might be swapped
# here we verify that are last hit is always the worst one
- match: { hits.hits.2._id: "1" }

- match: { hits.hits.0._id: "1" }
- match: { hits.hits.1._id: "3" }
- match: { hits.hits.2._id: "2" }
---
"Test bad quantization parameters":
- do:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,11 @@ setup:
number_of_shards: 1
mappings:
properties:
name:
type: keyword
vector:
type: dense_vector
dims: 64
index: true
similarity: l2_norm
index_options:
type: bbq_flat
another_vector:
type: dense_vector
dims: 64
index: true
similarity: l2_norm
similarity: max_inner_product
index_options:
type: bbq_flat

Expand All @@ -33,9 +24,14 @@ setup:
index: bbq_flat
id: "1"
body:
name: cow.jpg
vector: [300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0]
another_vector: [115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0]
vector: [0.077, 0.32 , -0.205, 0.63 , 0.032, 0.201, 0.167, -0.313,
0.176, 0.531, -0.375, 0.334, -0.046, 0.078, -0.349, 0.272,
0.307, -0.083, 0.504, 0.255, -0.404, 0.289, -0.226, -0.132,
-0.216, 0.49 , 0.039, 0.507, -0.307, 0.107, 0.09 , -0.265,
-0.285, 0.336, -0.272, 0.369, -0.282, 0.086, -0.132, 0.475,
-0.224, 0.203, 0.439, 0.064, 0.246, -0.396, 0.297, 0.242,
-0.028, 0.321, -0.022, -0.009, -0.001 , 0.031, -0.533, 0.45,
-0.683, 1.331, 0.194, -0.157, -0.1 , -0.279, -0.098, -0.176]
# Flush in order to provoke a merge later
- do:
indices.flush:
Expand All @@ -46,9 +42,14 @@ setup:
index: bbq_flat
id: "2"
body:
name: moose.jpg
vector: [100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0]
another_vector: [50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120]
vector: [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348,
-0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048,
0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438,
-0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138,
-0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429,
-0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166,
0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569,
-0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013]
# Flush in order to provoke a merge later
- do:
indices.flush:
Expand All @@ -59,9 +60,14 @@ setup:
index: bbq_flat
id: "3"
body:
name: rabbit.jpg
vector: [111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0]
another_vector: [11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0]
vector: [0.139, 0.178, -0.117, 0.399, 0.014, -0.139, 0.347, -0.33 ,
0.139, 0.34 , -0.052, -0.052, -0.249, 0.327, -0.288, 0.049,
0.464, 0.338, 0.516, 0.247, -0.104, 0.259, -0.209, -0.246,
-0.11 , 0.323, 0.091, 0.442, -0.254, 0.195, -0.109, -0.058,
-0.279, 0.402, -0.107, 0.308, -0.273, 0.019, 0.082, 0.399,
-0.658, -0.03 , 0.276, 0.041, 0.187, -0.331, 0.165, 0.017,
0.171, -0.203, -0.198, 0.115, -0.007, 0.337, -0.444, 0.615,
-0.657, 1.285, 0.2 , -0.062, 0.038, 0.089, -0.068, -0.058]
# Flush in order to provoke a merge later
- do:
indices.flush:
Expand All @@ -73,19 +79,33 @@ setup:
max_num_segments: 1
---
"Test knn search":
- requires:
capabilities:
- method: POST
path: /_search
capabilities: [ optimized_scalar_quantization_bbq ]
test_runner_features: capabilities
reason: "BBQ scoring improved and changed with optimized_scalar_quantization_bbq"
- do:
search:
index: bbq_flat
body:
knn:
field: vector
query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0]
query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393,
0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015,
0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259,
-0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 ,
-0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232,
-0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034,
-0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582,
-0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158]
k: 3
num_candidates: 3

# Depending on how things are distributed, docs 2 and 3 might be swapped
# here we verify that are last hit is always the worst one
- match: { hits.hits.2._id: "1" }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.1._id: "3" }
- match: { hits.hits.2._id: "2" }
---
"Test bad parameters":
- do:
Expand Down
4 changes: 3 additions & 1 deletion server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,9 @@
org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat,
org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat,
org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat,
org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat;
org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat,
org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat,
org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat;

provides org.apache.lucene.codecs.Codec
with
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,20 @@ public static boolean isUnitVector(float[] v) {
return Math.abs(l1norm - 1.0d) <= EPSILON;
}

public static void packAsBinary(byte[] vector, byte[] packed) {
for (int i = 0; i < vector.length;) {
byte result = 0;
for (int j = 7; j >= 0 && i < vector.length; j--) {
assert vector[i] == 0 || vector[i] == 1;
result |= (byte) ((vector[i] & 1) << j);
++i;
}
int index = ((i + 7) / 8) - 1;
assert index < packed.length;
packed[index] = result;
}
}

public static int discretize(int value, int bucket) {
return ((value + (bucket - 1)) / bucket) * bucket;
}
Expand Down
Loading