From d0a1675d1a1ef5f5202960977cdce15e5b7d2926 Mon Sep 17 00:00:00 2001
From: Liam Thompson <32779855+leemthompo@users.noreply.github.com>
Date: Mon, 12 Aug 2024 16:45:25 +0100
Subject: [PATCH] [DOCS] Add retriever examples, semantic reranking
 step-by-step guide (#111793)

---
 docs/reference/search/retriever.asciidoc      | 297 +++++++++++++++---
 .../semantic-reranking.asciidoc               |   2 +-
 2 files changed, 251 insertions(+), 48 deletions(-)

diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc
index 1b7376c21daab..bf97da15a1ccf 100644
--- a/docs/reference/search/retriever.asciidoc
+++ b/docs/reference/search/retriever.asciidoc
@@ -77,23 +77,48 @@ Collapses the top documents by a specified key into a single top document per ke
 When a retriever tree contains a compound retriever (a retriever with two or more child
 retrievers) *only* the query element is allowed.
 
-===== Example
+[discrete]
+[[standard-retriever-example]]
+==== Example
 
 [source,js]
 ----
-GET /index/_search
+GET /restaurants/_search
 {
-    "retriever": {
-        "standard": {
-            "query" { ... },
-            "filter" { ... },
-            "min_score": ...
+  "retriever": { <1>
+    "standard": { <2>
+      "query": { <3>
+        "bool": { <4>
+          "should": [ <5>
+            {
+              "match": { <6>
+                "region": "Austria"
+              }
+            }
+          ],
+          "filter": [ <7>
+            {
+              "term": { <8>
+                "year": "2019" <9>
+              }
+            }
+          ]
         }
-    },
-    "size": ...
+      }
+    }
+  }
 }
 ----
 // NOTCONSOLE
+<1> Opens the `retriever` object.
+<2> The `standard` retriever is used for definining traditional {es} queries.
+<3> The entry point for defining the search query.
+<4> The `bool` object allows for combining multiple query clauses logically.
+<5> The `should` array indicates conditions under which a document will match. Documents matching these conditions will increase their relevancy score.
+<6> The `match` object finds documents where the `region` field contains the word "Austria."
+<7> The `filter` array provides filtering conditions that must be met but do not contribute to the relevancy score.
+<8> The `term` object is used for exact matches, in this case, filtering documents by the `year` field.
+<9> The exact value to match in the `year` field.
 
 [[knn-retriever]]
 ==== kNN Retriever
@@ -142,29 +167,39 @@ include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=knn-similarity]
 
 The parameters `query_vector` and `query_vector_builder` cannot be used together.
 
-===== Example:
+[discrete]
+[[knn-retriever-example]]
+==== Example
 
 [source,js]
 ----
-GET /index/_search
+GET my-embeddings/_search
 {
-    "retriever": {
-        "knn": {
-            "field": ...,
-            "query_vector": ...,
-            "k": ...,
-            "num_candidates": ...
-        }
+  "retriever": {
+    "knn": { <1>
+      "field": "vector", <2>
+      "query_vector": [10, 22, 77], <3>
+      "k": 10, <4>
+      "num_candidates": 10 <5>
     }
+  }
 }
 ----
 // NOTCONSOLE
 
+<1> Configuration for k-nearest neighbor (knn) search, which is based on vector similarity.
+<2> Specifies the field name that contains the vectors.
+<3> The query vector against which document vectors are compared in the `knn` search.
+<4> The number of nearest neighbors to return as top hits. This value must be fewer than or equal to `num_candidates`.
+<5> The size of the initial candidate set from which the final `k` nearest neighbors are selected.
+
 [[rrf-retriever]]
 ==== RRF Retriever
 
-An <<rrf, RRF>> retriever returns top documents based on the RRF formula
+An <<rrf, RRF>> retriever returns top documents based on the RRF formula,
 equally weighting two or more child retrievers.
+Reciprocal rank fusion (RRF) is a method for combining multiple result 
+sets with different relevance indicators into a single result set.
 
 ===== Parameters
 
@@ -180,26 +215,103 @@ An RRF retriever is a compound retriever. Child retrievers may not use
 elements that are restricted by having a compound retriever as part of
 the retriever tree.
 
-===== Example
+[discrete]
+[[rrf-retriever-example-hybrid]]
+==== Example: Hybrid search
+
+A simple hybrid search example (lexical search + dense vector search) combining a `standard` retriever with a `knn` retriever using RRF:
 
 [source,js]
 ----
-GET /index/_search
+GET /restaurants/_search
 {
-    "retriever": {
-        "rrf": {
-            "retrievers": [
-                {
-                    "standard" { ... }
-                },
-                {
-                    "knn": { ... }
-                }
-            ],
-            "rank_constant": ...
-            "rank_window_size": ...
+  "retriever": { 
+    "rrf": { <1>
+      "retrievers": [ <2>
+        {
+          "standard": { <3>
+            "query": {
+              "multi_match": {
+                "query": "San Francisco",
+                "fields": [
+                  "city",
+                  "region"
+                ]
+              }
+            }
+          }
+        },
+        {
+          "knn": { <4>
+            "field": "vector",
+            "query_vector": [10, 22, 77],
+            "k": 10,
+            "num_candidates": 10
+          }
         }
+      ],
+      "rank_constant": 0.3, <5>
+      "rank_window_size": 50  <6>
     }
+  }
+}
+----
+// NOTCONSOLE
+<1> Defines a retriever tree with an RRF retriever.
+<2> The sub-retriever array.
+<3> The first sub-retriever is a `standard` retriever.
+<4> The second sub-retriever is a `knn` retriever.
+<5> The rank constant for the RRF retriever.
+<6> The rank window size for the RRF retriever.
+
+[discrete]
+[[rrf-retriever-example-hybrid-sparse]]
+==== Example: Hybrid search with sparse vectors
+
+A more complex hybrid search example (lexical search + ELSER sparse vector search + dense vector search) using RRF:
+
+[source,js]
+----
+GET movies/_search
+{
+  "retriever": {
+    "rrf": {
+      "retrievers": [
+        {
+          "standard": {
+            "query": {
+              "sparse_vector": {
+                "field": "plot_embedding",
+                "inference_id": "my-elser-model",
+                "query": "films that explore psychological depths"
+              }
+            }
+          }
+        },
+        {
+          "standard": {
+            "query": {
+              "multi_match": {
+                "query": "crime",
+                "fields": [
+                  "plot",
+                  "title"
+                ]
+              }
+            }
+          }
+        },
+        {
+          "knn": {
+            "field": "vector",
+            "query_vector": [10, 22, 77],
+            "k": 10,
+            "num_candidates": 10
+          }
+        }
+      ]
+    }
+  }
 }
 ----
 // NOTCONSOLE
@@ -207,7 +319,7 @@ GET /index/_search
 [[text-similarity-reranker-retriever]]
 ==== Text Similarity Re-ranker Retriever
 
-The `text_similarity_reranker` is a type of retriever that enhances search results by re-ranking documents based on semantic similarity to a specified inference text, using a machine learning model.
+The `text_similarity_reranker` retriever uses an NLP model to improve search results by reordering the top-k documents based on their semantic similarity to the query.
 
 [TIP]
 ====
@@ -217,14 +329,15 @@ Refer to <<semantic-reranking>> for a high level overview of semantic reranking.
 ===== Prerequisites
 
 To use `text_similarity_reranker` you must first set up a `rerank` task using the <<put-inference-api, Create {infer} API>>.
-The `rerank` task should be set up with a machine learning model that can compute text similarity.
+The `rerank` task should be set up with a machine learning model that can compute text similarity. Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third-party text similarity models supported by {es}.
 
 Currently you can:
 
 * Integrate directly with the <<infer-service-cohere,Cohere Rerank inference endpoint>> using the `rerank` task type
 * Integrate directly with the <<infer-service-google-vertex-ai,Google Vertex AI inference endpoint>> using the `rerank` task type
-* Upload a model to {es} with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland]
+* Upload a model to {es} with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland] using the `text_similarity` NLP task type.
 ** Then set up an <<inference-example-eland,{es} service inference endpoint>> with the `rerank` task type
+** Refer to the <<text-similarity-reranker-retriever-example-eland,example>> on this page for a step-by-step guide.
 
 ===== Parameters
 
@@ -257,27 +370,117 @@ Sets a minimum threshold score for including documents in the re-ranked results.
 
 A text similarity re-ranker retriever is a compound retriever. Child retrievers may not use elements that are restricted by having a compound retriever as part of the retriever tree.
 
-===== Example
+[discrete]
+[[text-similarity-reranker-retriever-example-cohere]]
+==== Example: Cohere Rerank
+
+This example enables out-of-the-box semantic search by reranking top documents using the Cohere Rerank API. This approach eliminate the need to generate and store embeddings for all indexed documents.
+This requires a <<infer-service-cohere,Cohere Rerank inference endpoint>> using the `rerank` task type.
 
 [source,js]
 ----
 GET /index/_search
 {
-    "retriever": {
-        "text_similarity_reranker": {
-            "retriever": {
-                "standard": { ... }
-            },
-            "field": "text",
-            "inference_id": "my-cohere-rerank-model",
-            "inference_text": "Most famous landmark in Paris",
-            "rank_window_size": 100,
-            "min_score": 0.5
+   "retriever": {
+      "text_similarity_reranker": {
+         "retriever": {
+            "standard": {
+               "query": {
+                  "match_phrase": {
+                     "text": "landmark in Paris"
+                  }
+               }
+            }
+         },
+         "field": "text",
+         "inference_id": "my-cohere-rerank-model",
+         "inference_text": "Most famous landmark in Paris",
+         "rank_window_size": 100,
+         "min_score": 0.5
+      }
+   }
+}
+----
+// NOTCONSOLE
+
+[discrete]
+[[text-similarity-reranker-retriever-example-eland]]
+==== Example: Semantic reranking with a Hugging Face model
+
+The following example uses the `cross-encoder/ms-marco-MiniLM-L-6-v2` model from Hugging Face to rerank search results based on semantic similarity.
+The model must be uploaded to {es} using https://www.elastic.co/guide/en/elasticsearch/client/eland/current/machine-learning.html#ml-nlp-pytorch[Eland].
+
+[TIP]
+====
+Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third party text similarity models supported by {es}.
+====
+
+Follow these steps to load the model and create a semantic reranker.
+
+. Install Eland using `pip`
++
+[source,sh]
+----
+python -m pip install eland[pytorch]
+----
++
+. Upload the model to {es} using Eland. This example assumes you have an Elastic Cloud deployment and an API key. Refer to the https://www.elastic.co/guide/en/elasticsearch/client/eland/current/machine-learning.html#ml-nlp-pytorch-auth[Eland documentation] for more authentication options.
++
+[source,sh]
+----
+eland_import_hub_model \
+  --cloud-id $CLOUD_ID \
+  --es-api-key $ES_API_KEY \
+  --hub-model-id cross-encoder/ms-marco-MiniLM-L-6-v2 \
+  --task-type text_similarity \
+  --clear-previous \
+  --start
+----
++
+. Create an inference endpoint for the `rerank` task
++
+[source,js]
+----
+PUT _inference/rerank/my-msmarco-minilm-model 
+{
+  "service": "elasticsearch",
+  "service_settings": {
+    "num_allocations": 1,
+    "num_threads": 1,
+    "model_id": "cross-encoder__ms-marco-minilm-l-6-v2" 
+  }
+}
+----
+// NOTCONSOLE
++
+. Define a `text_similarity_rerank` retriever.
++
+[source,js]
+----
+POST movies/_search
+{
+  "retriever": {
+    "text_similarity_reranker": {
+      "retriever": {
+        "standard": {
+          "query": {
+            "match": {
+              "genre": "drama"
+            }
+          }
         }
+      },
+      "field": "plot",
+      "inference_id": "my-msmarco-minilm-model",
+      "inference_text": "films that explore psychological depths"
     }
+  }
 }
 ----
 // NOTCONSOLE
++
+This retriever uses a standard `match` query to search the `movie` index for films tagged with the genre "drama".
+It then re-ranks the results based on semantic similarity to the text in the `inference_text` parameter, using the model we uploaded to {es}.
 
 ==== Using `from` and `size` with a retriever tree
 
diff --git a/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc b/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc
index f25741fca0b8f..add2d7455983e 100644
--- a/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc
+++ b/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc
@@ -94,7 +94,7 @@ Currently you can:
 
 ** Integrate directly with the <<infer-service-cohere,Cohere Rerank inference endpoint>> using the `rerank` task type
 ** Integrate directly with the <<infer-service-google-vertex-ai,Google Vertex AI inference endpoint>> using the `rerank` task type
-** Upload a model to {es} from Hugging Face with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland]
+** Upload a model to {es} from Hugging Face with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland]. You'll need to use the `text_similarity` NLP task type when loading the model using Eland. Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third party text similarity models supported by {es} for semantic reranking.
 *** Then set up an <<inference-example-eland,{es} service inference endpoint>> with the `rerank` task type
 . *Create a `rerank` task using the <<put-inference-api,{es} Inference API>>*.
 The Inference API creates an inference endpoint and configures your chosen machine learning model to perform the reranking task.