Adds new max_inner_product vector similarity function (elastic#99527)

Adds new max_inner_product vector similarity function. This differs from dot_product in the following ways: Doesn't require vectors to be normalized Scales the similarity between vectors differently to prevent negative scores
ChrisHegarty · Sep 20, 2023 · dee85de · dee85de
1 parent 270de88
commit dee85de
Show file tree

Hide file tree

Showing 4 changed files with 107 additions and 1 deletion.
diff --git a/docs/changelog/99527.yaml b/docs/changelog/99527.yaml
@@ -0,0 +1,5 @@
+pr: 99445
+summary: Add new max_inner_product vector similarity function 
+area: Vector Search
+type: enhancement
+issues: []
diff --git a/docs/reference/mapping/types/dense-vector.asciidoc b/docs/reference/mapping/types/dense-vector.asciidoc
@@ -159,7 +159,7 @@ distance) between the vectors. The document `_score` is computed as
 `1 / (1 + l2_norm(query, vector)^2)`.
 
 `dot_product`:::
-Computes the dot product of two vectors. This option provides an optimized way
+Computes the dot product of two unit vectors. This option provides an optimized way
 to perform cosine similarity. The constraints and computed score are defined
 by `element_type`.
 +
@@ -181,6 +181,14 @@ original vectors and cannot normalize them in advance. The document `_score`
 is computed as `(1 + cosine(query, vector)) / 2`. The `cosine` similarity does
 not allow vectors with zero magnitude, since cosine is not defined in this
 case.
+
+`max_inner_product`:::
+Computes the maximum inner product of two vectors. This is similar to `dot_product`,
+but doesn't require vectors to be normalized. This means that each vector's magnitude
+can significantly effect the score. The document `_score` is adjusted to prevent negative
+values. For `max_inner_product` values `< 0`, the `_score` is
+`1 / (1 + -1 * max_inner_product(query, vector))`. For non-negative `max_inner_product` results
+the `_score` is calculated `max_inner_product(query, vector) + 1`.
 ====
 
 NOTE: Although they are conceptually related, the `similarity` parameter is

diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml
@@ -368,3 +368,88 @@ setup:
             filter: {"term": {"name": "cow.jpg"}}
 
   - length: {hits.hits: 0}
+---
+"Knn search with mip":
+  - skip:
+      version: ' - 8.10.99'
+      reason: 'mip similarity added in 8.11'
+      features: close_to
+
+  - do:
+      indices.create:
+        index: mip
+        body:
+          mappings:
+            properties:
+              name:
+                type: keyword
+              vector:
+                type: dense_vector
+                dims: 5
+                index: true
+                similarity: max_inner_product
+
+  - do:
+      index:
+        index: mip
+        id: "1"
+        body:
+          name: cow.jpg
+          vector: [230.0, 300.33, -34.8988, 15.555, -200.0]
+
+  - do:
+      index:
+        index: mip
+        id: "2"
+        body:
+          name: moose.jpg
+          vector: [-0.5, 100.0, -13, 14.8, -156.0]
+
+  - do:
+      index:
+        index: mip
+        id: "3"
+        body:
+          name: rabbit.jpg
+          vector: [0.5, 111.3, -13.0, 14.8, -156.0]
+
+  - do:
+      indices.refresh: {}
+
+  - do:
+      search:
+        index: mip
+        body:
+          fields: [ "name" ]
+          knn:
+            num_candidates: 3
+            k: 3
+            field: vector
+            query_vector: [-0.5, 90.0, -10, 14.8, -156.0]
+
+
+  - length: {hits.hits: 3}
+  - match: {hits.hits.0._id: "1"}
+  - close_to: {hits.hits.0._score: {value: 58694.902, error: 0.01}}
+  - match: {hits.hits.1._id: "3"}
+  - close_to: {hits.hits.1._score: {value: 34702.79, error: 0.01}}
+  - match: {hits.hits.2._id: "2"}
+  - close_to: {hits.hits.2._score: {value: 33686.29, error: 0.01}}
+
+  - do:
+      search:
+        index: mip
+        body:
+          fields: [ "name" ]
+          knn:
+            num_candidates: 3
+            k: 3
+            field: vector
+            query_vector: [-0.5, 90.0, -10, 14.8, -156.0]
+            filter: { "term": { "name": "moose.jpg" } }
+
+
+
+  - length: {hits.hits: 1}
+  - match: {hits.hits.0._id: "2"}
+  - close_to: {hits.hits.0._score: {value: 33686.29, error: 0.01}}
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java
@@ -661,6 +661,14 @@ float score(float similarity, ElementType elementType, int dim) {
                     case FLOAT -> (1 + similarity) / 2f;
                 };
             }
+        },
+        MAX_INNER_PRODUCT(VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT) {
+            @Override
+            float score(float similarity, ElementType elementType, int dim) {
+                return switch (elementType) {
+                    case BYTE, FLOAT -> similarity < 0 ? 1 / (1 + -1 * similarity) : similarity + 1;
+                };
+            }
         };
 
         public final VectorSimilarityFunction function;