From 5901b4286ec4a91018c8e6a538a5e73216b734a3 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Fri, 29 Mar 2019 11:29:54 -0700 Subject: [PATCH] Update vector similarity examples to avoid negative scores. (#40493) Negative scores are no longer allowed, but the cosine similarity between two vectors lies in the range [-1, 1], and dot products can also be negative. This commit updates the documentation with an example of how to avoid negative scores. --- .../query-dsl/script-score-query.asciidoc | 33 +++++++++++++------ 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/docs/reference/query-dsl/script-score-query.asciidoc b/docs/reference/query-dsl/script-score-query.asciidoc index 56c4f7c41b8ee..f3f0bc8af48c9 100644 --- a/docs/reference/query-dsl/script-score-query.asciidoc +++ b/docs/reference/query-dsl/script-score-query.asciidoc @@ -36,6 +36,10 @@ GET /_search // CONSOLE // TEST[setup:twitter] +NOTE: The values returned from `script_score` cannot be negative. In general, +Lucene requires the scores produced by queries to be non-negative in order to +support certain search optimizations. + ==== Accessing the score of a document within a script Within a script, you can @@ -92,9 +96,9 @@ cosine similarity between a given query vector and document vectors. "match_all": {} }, "script": { - "source": "cosineSimilarity(params.queryVector, doc['my_dense_vector'])", + "source": "cosineSimilarity(params.query_vector, doc['my_dense_vector']) + 1.0" <1>, "params": { - "queryVector": [4, 3.4, -0.2] <1> + "query_vector": [4, 3.4, -0.2] <2> } } } @@ -102,7 +106,8 @@ cosine similarity between a given query vector and document vectors. } -------------------------------------------------- // NOTCONSOLE -<1> To take advantage of the script optimizations, provide a query vector as a script parameter. +<1> The script adds 1.0 to the cosine similarity to prevent the score from being negative. +<2> To take advantage of the script optimizations, provide a query vector as a script parameter. Similarly, for sparse_vector fields, `cosineSimilaritySparse` calculates cosine similarity between a given query vector and document vectors. @@ -116,9 +121,9 @@ between a given query vector and document vectors. "match_all": {} }, "script": { - "source": "cosineSimilaritySparse(params.queryVector, doc['my_sparse_vector'])", + "source": "cosineSimilaritySparse(params.query_vector, doc['my_sparse_vector']) + 1.0", "params": { - "queryVector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0} + "query_vector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0} } } } @@ -139,9 +144,12 @@ dot product between a given query vector and document vectors. "match_all": {} }, "script": { - "source": "dotProduct(params.queryVector, doc['my_dense_vector'])", + "source": """ + double value = dotProduct(params.query_vector, doc['my_vector']); + return sigmoid(1, Math.E, -value); <1> + """, "params": { - "queryVector": [4, 3.4, -0.2] + "query_vector": [4, 3.4, -0.2] } } } @@ -150,6 +158,8 @@ dot product between a given query vector and document vectors. -------------------------------------------------- // NOTCONSOLE +<1> Using the standard sigmoid function prevents scores from being negative. + Similarly, for sparse_vector fields, `dotProductSparse` calculates dot product between a given query vector and document vectors. @@ -162,9 +172,12 @@ between a given query vector and document vectors. "match_all": {} }, "script": { - "source": "dotProductSparse(params.queryVector, doc['my_sparse_vector'])", - "params": { - "queryVector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0} + "source": """ + double value = dotProductSparse(params.query_vector, doc['my_sparse_vector']); + return sigmoid(1, Math.E, -value); + """, + "params": { + "query_vector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0} } } }