From 6d3db820de72297680d870e0f8db27b5b7e11bcd Mon Sep 17 00:00:00 2001 From: James Rodewig <40268737+jrodewig@users.noreply.github.com> Date: Fri, 31 Jul 2020 09:37:11 -0400 Subject: [PATCH 1/4] [DOCS] Move search pagination content to one page --- .../bucket/composite-aggregation.asciidoc | 2 +- docs/reference/docs/delete-by-query.asciidoc | 2 +- docs/reference/docs/update-by-query.asciidoc | 2 +- docs/reference/how-to/general.asciidoc | 2 +- docs/reference/index-modules.asciidoc | 2 +- .../modules/cross-cluster-search.asciidoc | 2 +- docs/reference/redirects.asciidoc | 16 ++++++++++++--- docs/reference/search/async-search.asciidoc | 2 +- .../search/clear-scroll-api.asciidoc | 2 +- ...iidoc => paginate-search-results.asciidoc} | 11 ++++++---- docs/reference/search/request-body.asciidoc | 10 ++++++++-- .../search/request/collapse.asciidoc | 4 ++-- docs/reference/search/request/scroll.asciidoc | 15 +++++++++----- .../search/request/search-after.asciidoc | 7 ++++--- docs/reference/search/request/sort.asciidoc | 2 +- docs/reference/search/scroll-api.asciidoc | 10 +++++----- .../search/search-your-data.asciidoc | 2 +- docs/reference/search/search.asciidoc | 20 +++++++++---------- docs/reference/sql/endpoints/rest.asciidoc | 2 +- .../sql/endpoints/translate.asciidoc | 2 +- 20 files changed, 71 insertions(+), 46 deletions(-) rename docs/reference/search/{request/from-size.asciidoc => paginate-search-results.asciidoc} (89%) diff --git a/docs/reference/aggregations/bucket/composite-aggregation.asciidoc b/docs/reference/aggregations/bucket/composite-aggregation.asciidoc index 350ae13c75607..c5dad2f111c8a 100644 --- a/docs/reference/aggregations/bucket/composite-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/composite-aggregation.asciidoc @@ -6,7 +6,7 @@ A multi-bucket aggregation that creates composite buckets from different sources Unlike the other `multi-bucket` aggregation the `composite` aggregation can be used to paginate **all** buckets from a multi-level aggregation efficiently. This aggregation provides a way to stream **all** buckets of a specific aggregation similarly to what -<> does for documents. +<> does for documents. The composite buckets are built from the combinations of the values extracted/created for each document and each combination is considered as diff --git a/docs/reference/docs/delete-by-query.asciidoc b/docs/reference/docs/delete-by-query.asciidoc index b09a351f502bf..7eb77b1ff4d32 100644 --- a/docs/reference/docs/delete-by-query.asciidoc +++ b/docs/reference/docs/delete-by-query.asciidoc @@ -207,7 +207,7 @@ include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=routing] `scroll`:: (Optional, <>) Period to retain the <> for scrolling. See -<>. +<>. include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=scroll_size] diff --git a/docs/reference/docs/update-by-query.asciidoc b/docs/reference/docs/update-by-query.asciidoc index 1bdfa5874035a..48506deb330bf 100644 --- a/docs/reference/docs/update-by-query.asciidoc +++ b/docs/reference/docs/update-by-query.asciidoc @@ -204,7 +204,7 @@ include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=routing] `scroll`:: (Optional, <>) Period to retain the <> for scrolling. See -<>. +<>. include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=scroll_size] diff --git a/docs/reference/how-to/general.asciidoc b/docs/reference/how-to/general.asciidoc index ac6760ee8d2ad..4a9331194d902 100644 --- a/docs/reference/how-to/general.asciidoc +++ b/docs/reference/how-to/general.asciidoc @@ -9,7 +9,7 @@ Elasticsearch is designed as a search engine, which makes it very good at getting back the top documents that match a query. However, it is not as good for workloads that fall into the database domain, such as retrieving all documents that match a particular query. If you need to do this, make sure to -use the <> API. +use the <> API. [discrete] [[maximum-document-size]] diff --git a/docs/reference/index-modules.asciidoc b/docs/reference/index-modules.asciidoc index e833ab333fd81..0f1f5e88ac679 100644 --- a/docs/reference/index-modules.asciidoc +++ b/docs/reference/index-modules.asciidoc @@ -155,7 +155,7 @@ specific index module: The maximum value of `from + size` for searches to this index. Defaults to `10000`. Search requests take heap memory and time proportional to `from + size` and this limits that memory. See - <> or <> for a more efficient alternative + <> or <> for a more efficient alternative to raising this. `index.max_inner_result_window`:: diff --git a/docs/reference/modules/cross-cluster-search.asciidoc b/docs/reference/modules/cross-cluster-search.asciidoc index 4eb88f361a639..38e71158512ba 100644 --- a/docs/reference/modules/cross-cluster-search.asciidoc +++ b/docs/reference/modules/cross-cluster-search.asciidoc @@ -286,7 +286,7 @@ handling network delays: By default, {es} reduces the number of network roundtrips between remote clusters. This reduces the impact of network delays on search speed. However, {es} can't reduce network roundtrips for large search requests, such as those -including a <> or +including a <> or <>. + See <> to learn how this option works. diff --git a/docs/reference/redirects.asciidoc b/docs/reference/redirects.asciidoc index 1330d0c35fc13..ea7bcab7800b3 100644 --- a/docs/reference/redirects.asciidoc +++ b/docs/reference/redirects.asciidoc @@ -117,11 +117,11 @@ See <>. [role="exclude",id="search-request-scroll"] === Scroll parameter for request body search API -See <>. +See <>. [role="exclude",id="search-request-search-after"] === Search after parameter for request body search API -See <>. +See <>. [role="exclude",id="search-request-search-type"] === Search type parameter for request body search API @@ -874,7 +874,7 @@ See <>. [role="exclude",id="scan-scroll"] === Scan and scroll -See <>. +See <>. [role="exclude",id="mapping-dynamic-mapping"] === Dynamic mapping @@ -1001,6 +1001,16 @@ See <>. See <>. +[role="exclude",id="request-body-search-scroll"] +==== Scroll + +See <>. + +[role="exclude",id="request-body-search-search-after"] +==== Search After + +See <>. + [role="exclude",id="request-body-search-sort"] ==== Sort diff --git a/docs/reference/search/async-search.asciidoc b/docs/reference/search/async-search.asciidoc index 3c788f704bc96..72241e2387848 100644 --- a/docs/reference/search/async-search.asciidoc +++ b/docs/reference/search/async-search.asciidoc @@ -129,7 +129,7 @@ query get skipped. * `ccs_minimize_roundtrips` defaults to `false`, which is also the only supported value -WARNING: Async search does not support <> +WARNING: Async search does not support <> nor search requests that only include the <>. {ccs} is supported only with <> set to `false`. diff --git a/docs/reference/search/clear-scroll-api.asciidoc b/docs/reference/search/clear-scroll-api.asciidoc index 6e5325d658153..6892beebe7372 100644 --- a/docs/reference/search/clear-scroll-api.asciidoc +++ b/docs/reference/search/clear-scroll-api.asciidoc @@ -5,7 +5,7 @@ ++++ Clears the search context and results for a -<>. +<>. //// [source,console] diff --git a/docs/reference/search/request/from-size.asciidoc b/docs/reference/search/paginate-search-results.asciidoc similarity index 89% rename from docs/reference/search/request/from-size.asciidoc rename to docs/reference/search/paginate-search-results.asciidoc index 3848727131b9c..ef76ad55ed6af 100644 --- a/docs/reference/search/request/from-size.asciidoc +++ b/docs/reference/search/paginate-search-results.asciidoc @@ -1,6 +1,5 @@ -[discrete] [[paginate-search-results]] -=== Paginate search results +== Paginate search results By default, the search API returns the top 10 matching documents. @@ -40,10 +39,14 @@ results must then be combined and sorted to ensure that the overall sort order is correct. As an alternative to deep paging, we recommend using -<> or the -<> parameter. +<> or the +<> parameter. WARNING: {es} uses Lucene's internal doc IDs as tie-breakers. These internal doc IDs can be completely different across replicas of the same data. When paginating, you might occasionally see that documents with the same sort values are not ordered consistently. + +include::request/scroll.asciidoc[] + +include::request/search-after.asciidoc[] \ No newline at end of file diff --git a/docs/reference/search/request-body.asciidoc b/docs/reference/search/request-body.asciidoc index 89c918ff55c8a..28958861d0abc 100644 --- a/docs/reference/search/request-body.asciidoc +++ b/docs/reference/search/request-body.asciidoc @@ -139,9 +139,15 @@ include::request/rescore.asciidoc[] include::request/script-fields.asciidoc[] -include::request/scroll.asciidoc[] +[[request-body-search-scroll]] +==== Scroll -include::request/search-after.asciidoc[] +See <>. + +[[request-body-search-search-after]] +==== Search After + +See <>. include::request/search-type.asciidoc[] diff --git a/docs/reference/search/request/collapse.asciidoc b/docs/reference/search/request/collapse.asciidoc index 9780116f61ab9..b97c4044ec0c0 100644 --- a/docs/reference/search/request/collapse.asciidoc +++ b/docs/reference/search/request/collapse.asciidoc @@ -116,8 +116,8 @@ The `max_concurrent_group_searches` request parameter can be used to control the maximum number of concurrent searches allowed in this phase. The default is based on the number of data nodes and the default search thread pool size. -WARNING: `collapse` cannot be used in conjunction with <>, -<> or <>. +WARNING: `collapse` cannot be used in conjunction with <>, +<> or <>. [discrete] [[second-level-of-collapsing]] diff --git a/docs/reference/search/request/scroll.asciidoc b/docs/reference/search/request/scroll.asciidoc index 9c66c61e99a51..f1dfd27a08ccc 100644 --- a/docs/reference/search/request/scroll.asciidoc +++ b/docs/reference/search/request/scroll.asciidoc @@ -1,5 +1,6 @@ -[[request-body-search-scroll]] -==== Scroll +[discrete] +[[scroll-search-results]] +=== Scroll search results While a `search` request returns a single ``page'' of results, the `scroll` API can be used to retrieve large numbers of results (or even all results) @@ -102,8 +103,9 @@ GET /_search?scroll=1m -------------------------------------------------- // TEST[setup:twitter] +[discrete] [[scroll-search-context]] -===== Keeping the search context alive +==== Keeping the search context alive A scroll returns all the documents which matched the search at the time of the initial search request. It ignores any subsequent changes to these documents. @@ -149,7 +151,9 @@ You can check how many search contexts are open with the GET /_nodes/stats/indices/search --------------------------------------- -===== Clear scroll API +[discrete] +[[clear-scroll]] +==== Clear scroll API Search context are automatically removed when the `scroll` timeout has been exceeded. However keeping scrolls open has a cost, as discussed in the @@ -196,8 +200,9 @@ DELETE /_search/scroll/DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAAAD4WYm9laVYtZndUQlNsdDcwakFMN --------------------------------------- // TEST[catch:missing] +[discrete] [[sliced-scroll]] -===== Sliced Scroll +==== Sliced Scroll For scroll queries that return a lot of documents it is possible to split the scroll in multiple slices which can be consumed independently: diff --git a/docs/reference/search/request/search-after.asciidoc b/docs/reference/search/request/search-after.asciidoc index 3196ac6dae2be..4e02abec363ae 100644 --- a/docs/reference/search/request/search-after.asciidoc +++ b/docs/reference/search/request/search-after.asciidoc @@ -1,9 +1,10 @@ -[[request-body-search-search-after]] -==== Search After +[discrete] +[[search-after]] +=== Search after Pagination of results can be done by using the `from` and `size` but the cost becomes prohibitive when the deep pagination is reached. The `index.max_result_window` which defaults to 10,000 is a safeguard, search requests take heap memory and time proportional to `from + size`. -The <> api is recommended for efficient deep scrolling but scroll contexts are costly and it is not +The <> API is recommended for efficient deep scrolling but scroll contexts are costly and it is not recommended to use it for real time user requests. The `search_after` parameter circumvents this problem by providing a live cursor. The idea is to use the results from the previous page to help the retrieval of the next page. diff --git a/docs/reference/search/request/sort.asciidoc b/docs/reference/search/request/sort.asciidoc index 9a3043b33f07d..cc54d21e59d08 100644 --- a/docs/reference/search/request/sort.asciidoc +++ b/docs/reference/search/request/sort.asciidoc @@ -46,7 +46,7 @@ GET /my-index-000001/_search NOTE: `_doc` has no real use-case besides being the most efficient sort order. So if you don't care about the order in which documents are returned, then you -should sort by `_doc`. This especially helps when <>. +should sort by `_doc`. This especially helps when <>. [discrete] === Sort Values diff --git a/docs/reference/search/scroll-api.asciidoc b/docs/reference/search/scroll-api.asciidoc index 6b16e63993005..6c3f9d86de199 100644 --- a/docs/reference/search/scroll-api.asciidoc +++ b/docs/reference/search/scroll-api.asciidoc @@ -4,7 +4,7 @@ Scroll ++++ -Retrieves the next batch of results for a <>. //// @@ -48,7 +48,7 @@ deprecated:[7.0.0] ==== {api-description-title} You can use the scroll API to retrieve large sets of results from a single -<> request. +<> request. The scroll API requires a scroll ID. To get a scroll ID, submit a <> request that includes an argument for the @@ -63,7 +63,7 @@ next batch of results for the request. You can also use the scroll API to specify a new `scroll` parameter that extends or shortens the retention period for the search context. -See <>. +See <>. IMPORTANT: Results from a scrolling search reflect the state of the index at the time of the initial search request. Subsequent indexing or document changes only @@ -86,7 +86,7 @@ the <>. `scroll`:: (Optional, <>) Period to retain the <> for scrolling. See -<>. +<>. + This value overrides the duration set by the original search API request's `scroll` parameter. @@ -118,7 +118,7 @@ Defaults to `false`. `scroll`:: (Optional, <>) Period to retain the <> for scrolling. See -<>. +<>. + This value overrides the duration set by the original search API request's `scroll` parameter. diff --git a/docs/reference/search/search-your-data.asciidoc b/docs/reference/search/search-your-data.asciidoc index 0babfb0c1491c..ab1cbef4736f6 100644 --- a/docs/reference/search/search-your-data.asciidoc +++ b/docs/reference/search/search-your-data.asciidoc @@ -290,10 +290,10 @@ GET /*/_search } ---- -include::request/from-size.asciidoc[] include::search-fields.asciidoc[] include::request/collapse.asciidoc[] include::request/highlighting.asciidoc[] +include::paginate-search-results.asciidoc[] include::request/sort.asciidoc[] include::{es-repo-dir}/async-search.asciidoc[] include::{es-repo-dir}/modules/cross-cluster-search.asciidoc[] diff --git a/docs/reference/search/search.asciidoc b/docs/reference/search/search.asciidoc index 35273b794e32c..6ff5b5c099676 100644 --- a/docs/reference/search/search.asciidoc +++ b/docs/reference/search/search.asciidoc @@ -101,8 +101,8 @@ results must then be combined and sorted to ensure that the overall order is correct. As an alternative to deep paging, we recommend using -<> or the -<> parameter. +<> or the +<> parameter. -- `ignore_throttled`:: @@ -159,7 +159,7 @@ include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=routing] `scroll`:: (Optional, <>) Period to retain the <> for scrolling. See -<>. +<>. + By default, this value cannot exceed `1d` (24 hours). You can change this limit using the `search.max_keep_alive` cluster-level setting. @@ -185,8 +185,8 @@ results must then be combined and sorted to ensure that the overall order is correct. As an alternative to deep paging, we recommend using -<> or the -<> parameter. +<> or the +<> parameter. If the <> is specified, this value cannot be `0`. @@ -322,8 +322,8 @@ results must then be combined and sorted to ensure that the overall order is correct. As an alternative to deep paging, we recommend using -<> or the -<> parameter. +<> or the +<> parameter. -- [[request-body-search-query]] @@ -351,8 +351,8 @@ results must then be combined and sorted to ensure that the overall order is correct. As an alternative to deep paging, we recommend using -<> or the -<> parameter. +<> or the +<> parameter. If the <> is specified, this value cannot be `0`. @@ -428,7 +428,7 @@ Identifier for the search and its <>. + You can use this scroll ID with the <> to retrieve the next batch of search results for the request. See -<>. +<>. + This parameter is only returned if the <> is specified in the request. diff --git a/docs/reference/sql/endpoints/rest.asciidoc b/docs/reference/sql/endpoints/rest.asciidoc index 779d95ea9c576..f7bbf3ce6f83c 100644 --- a/docs/reference/sql/endpoints/rest.asciidoc +++ b/docs/reference/sql/endpoints/rest.asciidoc @@ -337,7 +337,7 @@ Which looks like: Note that the `columns` object is only part of the first page. You've reached the last page when there is no `cursor` returned -in the results. Like Elasticsearch's <>, +in the results. Like Elasticsearch's <>, SQL may keep state in Elasticsearch to support the cursor. Unlike scroll, receiving the last page is enough to guarantee that the Elasticsearch state is cleared. diff --git a/docs/reference/sql/endpoints/translate.asciidoc b/docs/reference/sql/endpoints/translate.asciidoc index 1312aea8ecaec..fdccbf00956b4 100644 --- a/docs/reference/sql/endpoints/translate.asciidoc +++ b/docs/reference/sql/endpoints/translate.asciidoc @@ -49,7 +49,7 @@ Which returns: -------------------------------------------------- Which is the request that SQL will run to provide the results. -In this case, SQL will use the <> +In this case, SQL will use the <> API. If the result contained an aggregation then SQL would use the normal <> API. From a6b520915e97d9e1d98998b78e850eaff7d38dbd Mon Sep 17 00:00:00 2001 From: James Rodewig <40268737+jrodewig@users.noreply.github.com> Date: Fri, 31 Jul 2020 09:47:15 -0400 Subject: [PATCH 2/4] add xref --- docs/reference/search/paginate-search-results.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/paginate-search-results.asciidoc b/docs/reference/search/paginate-search-results.asciidoc index ef76ad55ed6af..6e23966418ca4 100644 --- a/docs/reference/search/paginate-search-results.asciidoc +++ b/docs/reference/search/paginate-search-results.asciidoc @@ -1,7 +1,7 @@ [[paginate-search-results]] == Paginate search results -By default, the search API returns the top 10 matching documents. +By default, the <> returns the top 10 matching documents. To paginate through a larger set of results, you can use the search API's `size` and `from` parameters. The `size` parameter is the number of matching documents From 43f20445cfe8632b989f6f9a9450b812d986759f Mon Sep 17 00:00:00 2001 From: James Rodewig <40268737+jrodewig@users.noreply.github.com> Date: Fri, 31 Jul 2020 10:07:44 -0400 Subject: [PATCH 3/4] fix anchor --- docs/reference/search/request/scroll.asciidoc | 2 +- .../src/main/resources/rest-api-spec/api/clear_scroll.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/search/request/scroll.asciidoc b/docs/reference/search/request/scroll.asciidoc index f1dfd27a08ccc..bd065035c379e 100644 --- a/docs/reference/search/request/scroll.asciidoc +++ b/docs/reference/search/request/scroll.asciidoc @@ -152,7 +152,7 @@ GET /_nodes/stats/indices/search --------------------------------------- [discrete] -[[clear-scroll]] +[[_clear_scroll_api]] ==== Clear scroll API Search context are automatically removed when the `scroll` timeout has been diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/clear_scroll.json b/rest-api-spec/src/main/resources/rest-api-spec/api/clear_scroll.json index f484c94246c7b..b0e50045cd7cd 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/clear_scroll.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/clear_scroll.json @@ -1,7 +1,7 @@ { "clear_scroll":{ "documentation":{ - "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/search-request-body.html#_clear_scroll_api", + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/current/clear-scroll-api.html", "description":"Explicitly clears the search context for a scroll." }, "stability":"stable", From a15a2273fda91f96166225b3b4795fe84efddc2a Mon Sep 17 00:00:00 2001 From: James Rodewig <40268737+jrodewig@users.noreply.github.com> Date: Fri, 31 Jul 2020 10:22:21 -0400 Subject: [PATCH 4/4] fix anchor --- docs/reference/search/request-body.asciidoc | 5 +++++ docs/reference/search/request/scroll.asciidoc | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/reference/search/request-body.asciidoc b/docs/reference/search/request-body.asciidoc index 28958861d0abc..ea1436fb7e34b 100644 --- a/docs/reference/search/request-body.asciidoc +++ b/docs/reference/search/request-body.asciidoc @@ -144,6 +144,11 @@ include::request/script-fields.asciidoc[] See <>. +[[_clear_scroll_api]] +===== Clear scroll API + +See <>. + [[request-body-search-search-after]] ==== Search After diff --git a/docs/reference/search/request/scroll.asciidoc b/docs/reference/search/request/scroll.asciidoc index bd065035c379e..071cc4fd2f7e7 100644 --- a/docs/reference/search/request/scroll.asciidoc +++ b/docs/reference/search/request/scroll.asciidoc @@ -152,8 +152,8 @@ GET /_nodes/stats/indices/search --------------------------------------- [discrete] -[[_clear_scroll_api]] -==== Clear scroll API +[[clear-scroll]] +==== Clear scroll Search context are automatically removed when the `scroll` timeout has been exceeded. However keeping scrolls open has a cost, as discussed in the