From 88cf14c3bcbbe3cec175ecd0acba661617ecd677 Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Mon, 16 Sep 2024 09:58:34 -0400 Subject: [PATCH] Add documentation changes for disk-based k-NN (#8246) * Add space type as top level Signed-off-by: John Mazanec * Add new rescore parameter Signed-off-by: John Mazanec * Add new rescore parameter Signed-off-by: John Mazanec * add docs for compression and mode Signed-off-by: John Mazanec * Clean up compression docs Signed-off-by: John Mazanec * Doc review Signed-off-by: Fanit Kolchina * Update a few things Signed-off-by: John Mazanec * Doc review Signed-off-by: Fanit Kolchina * Apply suggestions from code review Co-authored-by: Nathan Bower Signed-off-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> --------- Signed-off-by: John Mazanec Signed-off-by: Fanit Kolchina Signed-off-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> Co-authored-by: Fanit Kolchina Co-authored-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> Co-authored-by: Nathan Bower Signed-off-by: Noah Staveley --- .../styles/Vocab/OpenSearch/Words/accept.txt | 1 + .../supported-field-types/knn-vector.md | 123 ++++++++++++++---- _query-dsl/specialized/neural.md | 2 + _search-plugins/knn/api.md | 7 +- _search-plugins/knn/approximate-knn.md | 72 +++++++++- _search-plugins/knn/knn-index.md | 11 +- .../knn/knn-vector-quantization.md | 8 +- _search-plugins/knn/nested-search-knn.md | 4 +- _search-plugins/knn/performance-tuning.md | 4 +- _search-plugins/knn/radial-search-knn.md | 2 +- _search-plugins/vector-search.md | 4 +- 11 files changed, 190 insertions(+), 48 deletions(-) diff --git a/.github/vale/styles/Vocab/OpenSearch/Words/accept.txt b/.github/vale/styles/Vocab/OpenSearch/Words/accept.txt index d0d1c308eb..ff634490fc 100644 --- a/.github/vale/styles/Vocab/OpenSearch/Words/accept.txt +++ b/.github/vale/styles/Vocab/OpenSearch/Words/accept.txt @@ -105,6 +105,7 @@ p\d{2} [Rr]eprovision(ed|ing)? [Rr]erank(er|ed|ing)? [Rr]epo +[Rr]escor(e|ed|ing)? [Rr]ewriter [Rr]ollout [Rr]ollup diff --git a/_field-types/supported-field-types/knn-vector.md b/_field-types/supported-field-types/knn-vector.md index f0dc831268..4c00b94de8 100644 --- a/_field-types/supported-field-types/knn-vector.md +++ b/_field-types/supported-field-types/knn-vector.md @@ -22,8 +22,7 @@ PUT test-index { "settings": { "index": { - "knn": true, - "knn.algo_param.ef_search": 100 + "knn": true } }, "mappings": { @@ -31,14 +30,10 @@ PUT test-index "my_vector": { "type": "knn_vector", "dimension": 3, + "space_type": "l2", "method": { "name": "hnsw", - "space_type": "l2", - "engine": "lucene", - "parameters": { - "ef_construction": 128, - "m": 24 - } + "engine": "faiss" } } } @@ -47,6 +42,92 @@ PUT test-index ``` {% include copy-curl.html %} +## Vector workload modes + +Vector search involves trade-offs between low-latency and low-cost search. Specify the `mode` mapping parameter of the `knn_vector` type to indicate which search mode you want to prioritize. The `mode` dictates the default values for k-NN parameters. You can further fine-tune your index by overriding the default parameter values in the k-NN field mapping. + +The following modes are currently supported. + +| Mode | Default engine | Description | +|:---|:---|:---| +| `in_memory` (Default) | `nmslib` | Prioritizes low-latency search. This mode uses the `nmslib` engine without any quantization applied. It is configured with the default parameter values for vector search in OpenSearch. | +| `on_disk` | `faiss` | Prioritizes low-cost vector search while maintaining strong recall. By default, the `on_disk` mode uses quantization and rescoring to execute a two-pass approach to retrieve the top neighbors. The `on_disk` mode supports only `float` vector types. | + +To create a k-NN index that uses the `on_disk` mode for low-cost search, send the following request: + +```json +PUT test-index +{ + "settings": { + "index": { + "knn": true + } + }, + "mappings": { + "properties": { + "my_vector": { + "type": "knn_vector", + "dimension": 3, + "space_type": "l2", + "mode": "on_disk" + } + } + } +} +``` +{% include copy-curl.html %} + +## Compression levels + +The `compression_level` mapping parameter selects a quantization encoder that reduces vector memory consumption by the given factor. The following table lists the available `compression_level` values. + +| Compression level | Supported engines | +|:------------------|:-------------------------------| +| `1x` | `faiss`, `lucene`, and `nmslib` | +| `2x` | `faiss` | +| `4x` | `lucene` | +| `8x` | `faiss` | +| `16x` | `faiss` | +| `32x` | `faiss` | + +For example, if a `compression_level` of `32x` is passed for a `float32` index of 768-dimensional vectors, the per-vector memory is reduced from `4 * 768 = 3072` bytes to `3072 / 32 = 846` bytes. Internally, binary quantization (which maps a `float` to a `bit`) may be used to achieve this compression. + +If you set the `compression_level` parameter, then you cannot specify an `encoder` in the `method` mapping. Compression levels greater than `1x` are only supported for `float` vector types. +{: .note} + +The following table lists the default `compression_level` values for the available workload modes. + +| Mode | Default compression level | +|:------------------|:-------------------------------| +| `in_memory` | `1x` | +| `on_disk` | `32x` | + + +To create a vector field with a `compression_level` of `16x`, specify the `compression_level` parameter in the mappings. This parameter overrides the default compression level for the `on_disk` mode from `32x` to `16x`, producing higher recall and accuracy at the expense of a larger memory footprint: + +```json +PUT test-index +{ + "settings": { + "index": { + "knn": true + } + }, + "mappings": { + "properties": { + "my_vector": { + "type": "knn_vector", + "dimension": 3, + "space_type": "l2", + "mode": "on_disk", + "compression_level": "16x" + } + } + } +} +``` +{% include copy-curl.html %} + ## Method definitions [Method definitions]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index#method-definitions) are used when the underlying [approximate k-NN]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/) algorithm does not require training. For example, the following `knn_vector` field specifies that *nmslib*'s implementation of *hnsw* should be used for approximate k-NN search. During indexing, *nmslib* will build the corresponding *hnsw* segment files. @@ -55,13 +136,13 @@ PUT test-index "my_vector": { "type": "knn_vector", "dimension": 4, + "space_type": "l2", "method": { "name": "hnsw", - "space_type": "l2", "engine": "nmslib", "parameters": { - "ef_construction": 128, - "m": 24 + "ef_construction": 100, + "m": 16 } } } @@ -73,6 +154,7 @@ Model IDs are used when the underlying Approximate k-NN algorithm requires a tra model contains the information needed to initialize the native library segment files. ```json +"my_vector": { "type": "knn_vector", "model_id": "my-model" } @@ -80,6 +162,7 @@ model contains the information needed to initialize the native library segment f However, if you intend to use Painless scripting or a k-NN score script, you only need to pass the dimension. ```json +"my_vector": { "type": "knn_vector", "dimension": 128 } @@ -123,13 +206,13 @@ PUT test-index "type": "knn_vector", "dimension": 3, "data_type": "byte", + "space_type": "l2", "method": { "name": "hnsw", - "space_type": "l2", "engine": "lucene", "parameters": { - "ef_construction": 128, - "m": 24 + "ef_construction": 100, + "m": 16 } } } @@ -465,14 +548,10 @@ PUT /test-binary-hnsw "type": "knn_vector", "dimension": 8, "data_type": "binary", + "space_type": "hamming", "method": { "name": "hnsw", - "space_type": "hamming", - "engine": "faiss", - "parameters": { - "ef_construction": 128, - "m": 24 - } + "engine": "faiss" } } } @@ -695,12 +774,12 @@ POST _plugins/_knn/models/test-binary-model/_train "dimension": 8, "description": "model with binary data", "data_type": "binary", + "space_type": "hamming", "method": { "name": "ivf", "engine": "faiss", - "space_type": "hamming", "parameters": { - "nlist": 1, + "nlist": 16, "nprobes": 1 } } diff --git a/_query-dsl/specialized/neural.md b/_query-dsl/specialized/neural.md index 14b930cdb6..6cd534b87f 100644 --- a/_query-dsl/specialized/neural.md +++ b/_query-dsl/specialized/neural.md @@ -35,6 +35,8 @@ Field | Data type | Required/Optional | Description `min_score` | Float | Optional | The minimum score threshold for the search results. Only one variable, either `k`, `min_score`, or `max_distance`, can be specified. For more information, see [k-NN radial search]({{site.url}}{{site.baseurl}}/search-plugins/knn/radial-search-knn/). `max_distance` | Float | Optional | The maximum distance threshold for the search results. Only one variable, either `k`, `min_score`, or `max_distance`, can be specified. For more information, see [k-NN radial search]({{site.url}}{{site.baseurl}}/search-plugins/knn/radial-search-knn/). `filter` | Object | Optional | A query that can be used to reduce the number of documents considered. For more information about filter usage, see [k-NN search with filters]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). **Important**: Filter can only be used with the `faiss` or `lucene` engines. +`method_parameters` | Object | Optional | Parameters passed to the k-NN index during search. See [Additional query parameters]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#additional-query-parameters). +`rescore` | Object | Optional | Parameters for configuring rescoring functionality for k-NN indexes built using quantization. See [Rescoring]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#rescoring-quantized-results-using-full-precision). #### Example request diff --git a/_search-plugins/knn/api.md b/_search-plugins/knn/api.md index c7314f7ae2..1a6c970640 100644 --- a/_search-plugins/knn/api.md +++ b/_search-plugins/knn/api.md @@ -234,7 +234,7 @@ Response field | Description `timestamp` | The date and time when the model was created. `description` | A user-provided description of the model. `error` | An error message explaining why the model is in a failed state. -`space_type` | The space type for which this model is trained, for example, Euclidean or cosine. +`space_type` | The space type for which this model is trained, for example, Euclidean or cosine. Note - this value can be set in the top-level of the request as well `dimension` | The dimensionality of the vector space for which this model is designed. `engine` | The native library used to create the model, either `faiss` or `nmslib`. @@ -351,6 +351,7 @@ Request parameter | Description `search_size` | The training data is pulled from the training index using scroll queries. This parameter defines the number of results to return per scroll query. Default is `10000`. Optional. `description` | A user-provided description of the model. Optional. `method` | The configuration of the approximate k-NN method used for search operations. For more information about the available methods, see [k-NN index method definitions]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index#method-definitions). The method requires training to be valid. +`space_type` | The space type for which this model is trained, for example, Euclidean or cosine. Note: This value can also be set in the `method` parameter. #### Usage @@ -365,10 +366,10 @@ POST /_plugins/_knn/models/{model_id}/_train?preference={node_id} "max_training_vector_count": 1200, "search_size": 100, "description": "My model", + "space_type": "l2", "method": { "name":"ivf", "engine":"faiss", - "space_type": "l2", "parameters":{ "nlist":128, "encoder":{ @@ -395,10 +396,10 @@ POST /_plugins/_knn/models/_train?preference={node_id} "max_training_vector_count": 1200, "search_size": 100, "description": "My model", + "space_type": "l2", "method": { "name":"ivf", "engine":"faiss", - "space_type": "l2", "parameters":{ "nlist":128, "encoder":{ diff --git a/_search-plugins/knn/approximate-knn.md b/_search-plugins/knn/approximate-knn.md index a73844513e..f8921033e0 100644 --- a/_search-plugins/knn/approximate-knn.md +++ b/_search-plugins/knn/approximate-knn.md @@ -49,9 +49,9 @@ PUT my-knn-index-1 "my_vector1": { "type": "knn_vector", "dimension": 2, + "space_type": "l2", "method": { "name": "hnsw", - "space_type": "l2", "engine": "nmslib", "parameters": { "ef_construction": 128, @@ -62,9 +62,9 @@ PUT my-knn-index-1 "my_vector2": { "type": "knn_vector", "dimension": 4, + "space_type": "innerproduct", "method": { "name": "hnsw", - "space_type": "innerproduct", "engine": "faiss", "parameters": { "ef_construction": 256, @@ -199,10 +199,10 @@ POST /_plugins/_knn/models/my-model/_train "training_field": "train-field", "dimension": 4, "description": "My model description", + "space_type": "l2", "method": { "name": "ivf", "engine": "faiss", - "space_type": "l2", "parameters": { "nlist": 4, "nprobes": 2 @@ -308,6 +308,72 @@ Engine | Notes :--- | :--- `faiss` | If `nprobes` is present in a query, it overrides the value provided when creating the index. +### Rescoring quantized results using full precision + +Quantization can be used to significantly reduce the memory footprint of a k-NN index. For more information about quantization, see [k-NN vector quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization). Because some vector representation is lost during quantization, the computed distances will be approximate. This causes the overall recall of the search to decrease. + +To improve recall while maintaining the memory savings of quantization, you can use a two-phase search approach. In the first phase, `oversample_factor * k` results are retrieved from an index using quantized vectors and the scores are approximated. In the second phase, the full-precision vectors of those `oversample_factor * k` results are loaded into memory from disk, and scores are recomputed against the full-precision query vector. The results are then reduced to the top k. + +The default rescoring behavior is determined by the `mode` and `compression_level` of the backing k-NN vector field: + +- For `in_memory` mode, no rescoring is applied by default. +- For `on_disk` mode, default rescoring is based on the configured `compression_level`. Each `compression_level` provides a default `oversample_factor`, specified in the following table. + +| Compression level | Default rescore `oversample_factor` | +|:------------------|:----------------------------------| +| `32x` (default) | 3.0 | +| `16x` | 2.0 | +| `8x` | 2.0 | +| `4x` | No default rescoring | +| `2x` | No default rescoring | + +To explicitly apply rescoring, provide the `rescore` parameter in a query on a quantized index and specify the `oversample_factor`: + +```json +GET my-knn-index-1/_search +{ + "size": 2, + "query": { + "knn": { + "target-field": { + "vector": [2, 3, 5, 6], + "k": 2, + "rescore" : { + "oversample_factor": 1.2 + } + } + } + } +} +``` +{% include copy-curl.html %} + +Alternatively, set the `rescore` parameter to `true` to use a default `oversample_factor` of `1.0`: + +```json +GET my-knn-index-1/_search +{ + "size": 2, + "query": { + "knn": { + "target-field": { + "vector": [2, 3, 5, 6], + "k": 2, + "rescore" : true + } + } + } +} +``` +{% include copy-curl.html %} + +The `oversample_factor` is a floating-point number between 1.0 and 100.0, inclusive. The number of results in the first pass is calculated as `oversample_factor * k` and is guaranteed to be between 100 and 10,000, inclusive. If the calculated number of results is smaller than 100, then the number of results is set to 100. If the calculated number of results is greater than 10,000, then the number of results is set to 10,000. + +Rescoring is only supported for the `faiss` engine. + +Rescoring is not needed if quantization is not used because the scores returned are already fully precise. +{: .note} + ### Using approximate k-NN with filters To learn about using filters with k-NN search, see [k-NN search with filters]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). diff --git a/_search-plugins/knn/knn-index.md b/_search-plugins/knn/knn-index.md index 5bb7257898..15d660ca00 100644 --- a/_search-plugins/knn/knn-index.md +++ b/_search-plugins/knn/knn-index.md @@ -25,9 +25,9 @@ PUT /test-index "my_vector1": { "type": "knn_vector", "dimension": 3, + "space_type": "l2", "method": { "name": "hnsw", - "space_type": "l2", "engine": "lucene", "parameters": { "ef_construction": 128, @@ -83,7 +83,7 @@ A method definition will always contain the name of the method, the space_type t Mapping parameter | Required | Default | Updatable | Description :--- | :--- | :--- | :--- | :--- `name` | true | n/a | false | The identifier for the nearest neighbor method. -`space_type` | false | l2 | false | The vector space used to calculate the distance between vectors. +`space_type` | false | l2 | false | The vector space used to calculate the distance between vectors. Note: This value can also be specified at the top level of the mapping. `engine` | false | nmslib | false | The approximate k-NN library to use for indexing and search. The available libraries are faiss, nmslib, and Lucene. `parameters` | false | null | false | The parameters used for the nearest neighbor method. @@ -168,7 +168,6 @@ An index created in OpenSearch version 2.11 or earlier will still use the old `e "method": { "name":"hnsw", "engine":"lucene", - "space_type": "l2", "parameters":{ "m":2048, "ef_construction": 245 @@ -186,7 +185,6 @@ The following example method definition specifies the `hnsw` method and a `pq` e "method": { "name":"hnsw", "engine":"faiss", - "space_type": "l2", "parameters":{ "encoder":{ "name":"pq", @@ -232,7 +230,6 @@ The following example uses the `ivf` method without specifying an encoder (by d "method": { "name":"ivf", "engine":"faiss", - "space_type": "l2", "parameters":{ "nlist": 4, "nprobes": 2 @@ -246,7 +243,6 @@ The following example uses the `ivf` method with a `pq` encoder: "method": { "name":"ivf", "engine":"faiss", - "space_type": "l2", "parameters":{ "encoder":{ "name":"pq", @@ -265,7 +261,6 @@ The following example uses the `hnsw` method without specifying an encoder (by d "method": { "name":"hnsw", "engine":"faiss", - "space_type": "l2", "parameters":{ "ef_construction": 256, "m": 8 @@ -279,7 +274,6 @@ The following example uses the `hnsw` method with an `sq` encoder of type `fp16` "method": { "name":"hnsw", "engine":"faiss", - "space_type": "l2", "parameters":{ "encoder": { "name": "sq", @@ -300,7 +294,6 @@ The following example uses the `ivf` method with an `sq` encoder of type `fp16`: "method": { "name":"ivf", "engine":"faiss", - "space_type": "l2", "parameters":{ "encoder": { "name": "sq", diff --git a/_search-plugins/knn/knn-vector-quantization.md b/_search-plugins/knn/knn-vector-quantization.md index 5675d57eab..fbdcb4ad2e 100644 --- a/_search-plugins/knn/knn-vector-quantization.md +++ b/_search-plugins/knn/knn-vector-quantization.md @@ -40,10 +40,10 @@ PUT /test-index "my_vector1": { "type": "knn_vector", "dimension": 2, + "space_type": "l2", "method": { "name": "hnsw", "engine": "lucene", - "space_type": "l2", "parameters": { "encoder": { "name": "sq" @@ -85,10 +85,10 @@ PUT /test-index "my_vector1": { "type": "knn_vector", "dimension": 2, + "space_type": "l2", "method": { "name": "hnsw", "engine": "lucene", - "space_type": "l2", "parameters": { "encoder": { "name": "sq", @@ -150,10 +150,10 @@ PUT /test-index "my_vector1": { "type": "knn_vector", "dimension": 3, + "space_type": "l2", "method": { "name": "hnsw", "engine": "faiss", - "space_type": "l2", "parameters": { "encoder": { "name": "sq" @@ -194,10 +194,10 @@ PUT /test-index "my_vector1": { "type": "knn_vector", "dimension": 3, + "space_type": "l2", "method": { "name": "hnsw", "engine": "faiss", - "space_type": "l2", "parameters": { "encoder": { "name": "sq", diff --git a/_search-plugins/knn/nested-search-knn.md b/_search-plugins/knn/nested-search-knn.md index d947ebc6e6..bbba6c9c1e 100644 --- a/_search-plugins/knn/nested-search-knn.md +++ b/_search-plugins/knn/nested-search-knn.md @@ -38,9 +38,9 @@ PUT my-knn-index-1 "my_vector": { "type": "knn_vector", "dimension": 3, + "space_type": "l2", "method": { "name": "hnsw", - "space_type": "l2", "engine": "lucene", "parameters": { "ef_construction": 100, @@ -324,9 +324,9 @@ PUT my-knn-index-1 "my_vector": { "type": "knn_vector", "dimension": 3, + "space_type": "l2", "method": { "name": "hnsw", - "space_type": "l2", "engine": "lucene", "parameters": { "ef_construction": 100, diff --git a/_search-plugins/knn/performance-tuning.md b/_search-plugins/knn/performance-tuning.md index 123b1daef1..77f44dee93 100644 --- a/_search-plugins/knn/performance-tuning.md +++ b/_search-plugins/knn/performance-tuning.md @@ -59,9 +59,9 @@ The `_source` field contains the original JSON document body that was passed at "location": { "type": "knn_vector", "dimension": 2, + "space_type": "l2", "method": { "name": "hnsw", - "space_type": "l2", "engine": "faiss" } } @@ -85,9 +85,9 @@ In OpenSearch 2.15 or later, you can further improve indexing speed and reduce d "location": { "type": "knn_vector", "dimension": 2, + "space_type": "l2", "method": { "name": "hnsw", - "space_type": "l2", "engine": "faiss" } } diff --git a/_search-plugins/knn/radial-search-knn.md b/_search-plugins/knn/radial-search-knn.md index 1a4a223294..e5449a0993 100644 --- a/_search-plugins/knn/radial-search-knn.md +++ b/_search-plugins/knn/radial-search-knn.md @@ -53,9 +53,9 @@ PUT knn-index-test "my_vector": { "type": "knn_vector", "dimension": 2, + "space_type": "l2", "method": { "name": "hnsw", - "space_type": "l2", "engine": "faiss", "parameters": { "ef_construction": 100, diff --git a/_search-plugins/vector-search.md b/_search-plugins/vector-search.md index cc298786a3..f19030bf90 100644 --- a/_search-plugins/vector-search.md +++ b/_search-plugins/vector-search.md @@ -37,9 +37,9 @@ PUT test-index "my_vector1": { "type": "knn_vector", "dimension": 1024, + "space_type": "l2", "method": { "name": "hnsw", - "space_type": "l2", "engine": "nmslib", "parameters": { "ef_construction": 128, @@ -131,9 +131,9 @@ PUT /hotels-index "location": { "type": "knn_vector", "dimension": 2, + "space_type": "l2", "method": { "name": "hnsw", - "space_type": "l2", "engine": "lucene", "parameters": { "ef_construction": 100,