From a799183a6fef64aaf1e571585dc62e40a6051718 Mon Sep 17 00:00:00 2001 From: Heemin Kim Date: Tue, 18 Jun 2024 10:21:31 -0700 Subject: [PATCH 1/3] Add documentation for innerHit on knn nested field Signed-off-by: Heemin Kim --- _search-plugins/knn/nested-search-knn.md | 147 +++++++++++++++++++++-- 1 file changed, 136 insertions(+), 11 deletions(-) diff --git a/_search-plugins/knn/nested-search-knn.md b/_search-plugins/knn/nested-search-knn.md index 49465edea5..63ab00679e 100644 --- a/_search-plugins/knn/nested-search-knn.md +++ b/_search-plugins/knn/nested-search-knn.md @@ -48,6 +48,10 @@ PUT my-knn-index-1 "m": 16 } } + }, + "color": { + "type": "text", + "index": false } } } @@ -62,9 +66,9 @@ After you create the index, add some data to it: ```json PUT _bulk?refresh=true { "index": { "_index": "my-knn-index-1", "_id": "1" } } -{"nested_field":[{"my_vector":[1,1,1]},{"my_vector":[2,2,2]},{"my_vector":[3,3,3]}]} +{"nested_field":[{"my_vector":[1,1,1], "color": "blue"},{"my_vector":[2,2,2], "color": "yellow"},{"my_vector":[3,3,3], "color": "white"}]} { "index": { "_index": "my-knn-index-1", "_id": "2" } } -{"nested_field":[{"my_vector":[10,10,10]},{"my_vector":[20,20,20]},{"my_vector":[30,30,30]}]} +{"nested_field":[{"my_vector":[10,10,10], "color": "red"},{"my_vector":[20,20,20], "color": "green"},{"my_vector":[30,30,30], "color": "black"}]} ``` {% include copy-curl.html %} @@ -94,7 +98,7 @@ Even though all three vectors nearest to the query vector are in document 1, the ```json { - "took": 23, + "took": 5, "timed_out": false, "_shards": { "total": 1, @@ -107,12 +111,12 @@ Even though all three vectors nearest to the query vector are in document 1, the "value": 2, "relation": "eq" }, - "max_score": 1, + "max_score": 1.0, "hits": [ { "_index": "my-knn-index-1", "_id": "1", - "_score": 1, + "_score": 1.0, "_source": { "nested_field": [ { @@ -120,21 +124,24 @@ Even though all three vectors nearest to the query vector are in document 1, the 1, 1, 1 - ] + ], + "color": "blue" }, { "my_vector": [ 2, 2, 2 - ] + ], + "color": "yellow" }, { "my_vector": [ 3, 3, 3 - ] + ], + "color": "white" } ] } @@ -150,21 +157,24 @@ Even though all three vectors nearest to the query vector are in document 1, the 10, 10, 10 - ] + ], + "color": "red" }, { "my_vector": [ 20, 20, 20 - ] + ], + "color": "green" }, { "my_vector": [ 30, 30, 30 - ] + ], + "color": "black" } ] } @@ -174,6 +184,121 @@ Even though all three vectors nearest to the query vector are in document 1, the } ``` +## InnerHit with nested fields +If you want to retrieve only the matched documents in nested fields, you can use `inner_hits` in your query. +The following example returns only the `color` field of the matched documents within the nested fields. +```json +GET my-knn-index-1/_search +{ + "_source": false, + "query": { + "nested": { + "path": "nested_field", + "query": { + "knn": { + "nested_field.my_vector": { + "vector": [1,1,1], + "k": 2 + } + } + }, + "inner_hits": { + "_source": false, + "fields":["nested_field.color"] + } + } + } +} +``` +{% include copy-curl.html %} + +`inner_hits` contains only the matched documents from the nested fields. +```json +{ + "took": 4, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 2, + "relation": "eq" + }, + "max_score": 1.0, + "hits": [ + { + "_index": "my-knn-index-1", + "_id": "1", + "_score": 1.0, + "inner_hits": { + "nested_field": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 1.0, + "hits": [ + { + "_index": "my-knn-index-1", + "_id": "1", + "_nested": { + "field": "nested_field", + "offset": 0 + }, + "_score": 1.0, + "fields": { + "nested_field.color": [ + "blue" + ] + } + } + ] + } + } + } + }, + { + "_index": "my-knn-index-1", + "_id": "2", + "_score": 0.0040983604, + "inner_hits": { + "nested_field": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 0.0040983604, + "hits": [ + { + "_index": "my-knn-index-1", + "_id": "2", + "_nested": { + "field": "nested_field", + "offset": 0 + }, + "_score": 0.0040983604, + "fields": { + "nested_field.color": [ + "red" + ] + } + } + ] + } + } + } + } + ] + } +} +``` + ## k-NN search with filtering on nested fields You can apply a filter to a k-NN search with nested fields. A filter can be applied to either a top-level field or a field inside a nested field. From 35b3d5dc6bc84f2121b72789e057b69f50ab55af Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Tue, 18 Jun 2024 14:22:55 -0400 Subject: [PATCH 2/3] Doc review Signed-off-by: Fanit Kolchina --- _search-plugins/knn/nested-search-knn.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/_search-plugins/knn/nested-search-knn.md b/_search-plugins/knn/nested-search-knn.md index 63ab00679e..c3d1c85303 100644 --- a/_search-plugins/knn/nested-search-knn.md +++ b/_search-plugins/knn/nested-search-knn.md @@ -184,9 +184,10 @@ Even though all three vectors nearest to the query vector are in document 1, the } ``` -## InnerHit with nested fields -If you want to retrieve only the matched documents in nested fields, you can use `inner_hits` in your query. -The following example returns only the `color` field of the matched documents within the nested fields. +## Inner hits + +When you retrieve documents based on matches in nested fields, by default, the response does not contain information about which inner objects matched the query. Thus, it is not apparent why the document is a match. To include information about the matching nested fields in the response, you can provide the `inner_hits` object in your query. To return only certain fields of the matching documents within `inner_hits`, specify the document fields in the `fields` array. The following example returns only the `color` inner field of the `nested_field`: + ```json GET my-knn-index-1/_search { @@ -212,7 +213,8 @@ GET my-knn-index-1/_search ``` {% include copy-curl.html %} -`inner_hits` contains only the matched documents from the nested fields. +The response contains matching documents. For each matching document, the `inner_hits` object contains only the `nested_field.color` fields of the matched documents in the `fields` array: + ```json { "took": 4, From cdcb566a1011b4e43ecb0c740f5e5352673a8bd3 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Tue, 18 Jun 2024 14:50:41 -0400 Subject: [PATCH 3/3] Explain excluding source Signed-off-by: Fanit Kolchina --- _search-plugins/knn/nested-search-knn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_search-plugins/knn/nested-search-knn.md b/_search-plugins/knn/nested-search-knn.md index c3d1c85303..bdc1045387 100644 --- a/_search-plugins/knn/nested-search-knn.md +++ b/_search-plugins/knn/nested-search-knn.md @@ -186,7 +186,7 @@ Even though all three vectors nearest to the query vector are in document 1, the ## Inner hits -When you retrieve documents based on matches in nested fields, by default, the response does not contain information about which inner objects matched the query. Thus, it is not apparent why the document is a match. To include information about the matching nested fields in the response, you can provide the `inner_hits` object in your query. To return only certain fields of the matching documents within `inner_hits`, specify the document fields in the `fields` array. The following example returns only the `color` inner field of the `nested_field`: +When you retrieve documents based on matches in nested fields, by default, the response does not contain information about which inner objects matched the query. Thus, it is not apparent why the document is a match. To include information about the matching nested fields in the response, you can provide the `inner_hits` object in your query. To return only certain fields of the matching documents within `inner_hits`, specify the document fields in the `fields` array. Generally, you should also exclude `_source` from the results to avoid returning the whole document. The following example returns only the `color` inner field of the `nested_field`: ```json GET my-knn-index-1/_search