From 3c42613f7befc3743aab45e8ca88531f9fd15151 Mon Sep 17 00:00:00 2001 From: James Rodewig <40268737+jrodewig@users.noreply.github.com> Date: Wed, 26 Aug 2020 10:49:49 -0400 Subject: [PATCH 1/6] [DOCS] Add PIT to search after docs --- docs/reference/search/scroll-api.asciidoc | 4 + .../paginate-search-results.asciidoc | 287 +++++++++++------- docs/reference/search/search.asciidoc | 70 +---- 3 files changed, 185 insertions(+), 176 deletions(-) diff --git a/docs/reference/search/scroll-api.asciidoc b/docs/reference/search/scroll-api.asciidoc index 253b2d94bf5a4..50396f233e181 100644 --- a/docs/reference/search/scroll-api.asciidoc +++ b/docs/reference/search/scroll-api.asciidoc @@ -4,6 +4,10 @@ Scroll ++++ +IMPORTANT: We no longer recommend using the scroll API for deep pagination. If +you need to preserve the index state while paging through more than 10,000 hits, +use the <> parameter with a point in time (PIT). + Retrieves the next batch of results for a <>. diff --git a/docs/reference/search/search-your-data/paginate-search-results.asciidoc b/docs/reference/search/search-your-data/paginate-search-results.asciidoc index b39806c2df254..4b53967a77a75 100644 --- a/docs/reference/search/search-your-data/paginate-search-results.asciidoc +++ b/docs/reference/search/search-your-data/paginate-search-results.asciidoc @@ -1,18 +1,10 @@ [[paginate-search-results]] == Paginate search results -By default, the <> returns the top 10 matching documents. - -To paginate through a larger set of results, you can use the search API's `size` -and `from` parameters. The `size` parameter is the number of matching documents -to return. The `from` parameter is a zero-indexed offset from the beginning of -the complete result set that indicates the document you want to start with. - -The following search API request sets the `from` offset to `5`, meaning the -request offsets, or skips, the first five matching documents. - -The `size` parameter is `20`, meaning the request can return up to 20 documents, -starting at the offset. +By default, searches return the top 10 matching hits. To page through a larger +set of results, you can use the <>'s `from` and `size` +parameters. The `from` parameter defines the number of hits to skip, defaulting +to `0`. The `size` parameter is the maximum number of hits to return. [source,console] ---- @@ -28,29 +20,178 @@ GET /_search } ---- -By default, you cannot page through more than 10,000 documents using the `from` -and `size` parameters. This limit is set using the -<> index setting. +By default, you cannot use `from` and `size` to page through more than 10,000 +hits. Using `from` and `size` requires each searched shard to load hits for +previous pages into memory. This can significantly increase memory and CPU +usage, resulting in degraded performance or node failures. + +If you need to page through more than 10,000 hits, use the +<> parameter. If needed, you can use the +<> index setting to change +the 10,000 hit safeguard. + +WARNING: {es} uses Lucene's internal doc IDs as tie-breakers. These internal doc +IDs can be completely different across replicas of the same data. When paging +search hits, you might occasionally see that documents with the same sort values +are not ordered consistently. + +[discrete] +[[search-after]] +=== Search after + +You can use the `search_after` parameter to retrieve the next page of hits +using a set of <> from the previous page. + +Using `search_after` requires multiple search requests with the same `query` and +`sort` values. If a <> occurs between these requests, +the order of your results may change, resulting in inconsistent pages. To +prevent this, you can create a <> to +preserve the current index state for a specified period. + +IMPORTANT: We don't recommend creating PITs for each user request. Each PIT +requires additional disk space and file handles. If you want to paginate search +results for user requests, skip this step and use `search_after` without a PIT. + +[source,console] +---- +POST /my-index-000001/_pit?keep_alive=1m +---- +// TEST[setup:my_index] + +The API returns a PIT ID. + +[source,console-result] +---- +{ + "id": "46ToAwMDaWR4BXV1aWQxAgZub2RlXzEAAAAAAAAAAAEBYQNpZHkFdXVpZDIrBm5vZGVfMwAAAAAAAAAAKgFjA2lkeQV1dWlkMioGbm9kZV8yAAAAAAAAAAAMAWICBXV1aWQyAAAFdXVpZDEAAQltYXRjaF9hbGw_gAAAAA==" +} +---- +// TESTRESPONSE[s/"id": "46ToAwMDaWR4BXV1aWQxAgZub2RlXzEAAAAAAAAAAAEBYQNpZHkFdXVpZDIrBm5vZGVfMwAAAAAAAAAAKgFjA2lkeQV1dWlkMioGbm9kZV8yAAAAAAAAAAAMAWICBXV1aWQyAAAFdXVpZDEAAQltYXRjaF9hbGw_gAAAAA=="/"id": $body.id/] + +To get the first page of results, submit a search request with a `sort` +argument. If using a PIT, include the PIT ID as the `pit.id` argument. + +IMPORTANT: We recommend you include a tiebreaker field in your `sort`. This +tiebreaker field should contain a unique value for each document. If you don't +include a tiebreaker field, your paged results could miss or duplicate hits. + +[source,console] +---- +GET /my-index-000001/_search +{ + "size": 10000, + "query": { + "match" : { + "user.id" : "elkbee" + } + }, + "pit": { + "id": "46ToAwMDaWR4BXV1aWQxAgZub2RlXzEAAAAAAAAAAAEBYQNpZHkFdXVpZDIrBm5vZGVfMwAAAAAAAAAAKgFjA2lkeQV1dWlkMioGbm9kZV8yAAAAAAAAAAAMAWICBXV1aWQyAAAFdXVpZDEAAQltYXRjaF9hbGw_gAAAAA==", <1> + "keep_alive": "1m" + }, + "sort": [ <2> + {"@timestamp": "asc"}, + {"tie_breaker_id": "asc"} + ] +} +---- +// TEST[catch:missing] + +<1> The PIT ID. For user requests, omit this parameter. +<2> Sorts hits for the search. + +The search response includes an array of `sort` values for each hit. If you used +a PIT, the response's `pit_id` contains an updated PIT ID. + +[source,console-result] +---- +{ + "pit_id" : "46ToAwEPbXktaW5kZXgtMDAwMDAxFnVzaTVuenpUVGQ2TFNheUxVUG5LVVEAFldicVdzOFFtVHZTZDFoWWowTGkwS0EAAAAAAAAAAAQURzZzcUszUUJ5U1NMX3Jyak5ET0wBFnVzaTVuenpUVGQ2TFNheUxVUG5LVVEAAA==", <1> + "took" : 17, + "timed_out" : false, + "_shards" : ..., + "hits" : { + "total" : ..., + "max_score" : null, + "hits" : [ + ... + { + "_index" : "my-index-000001", + "_id" : "FaslK3QBySSL_rrj9zM5", + "_score" : null, + "_source" : ..., + "sort" : [ <2> + 4098435132000, + "FaslK3QBySSL_rrj9zM5" + ] + } + ] + } +} +---- +// TESTRESPONSE[skip: unable to access PIT ID] + +<1> Updated `id` for the point in time. +<2> Sort values for the last returned hit. + +To get the next page of results, rerun the previous search using the last hit's +sort values as the `search_after` argument. If using a PIT, use the updated PIT +ID as the `pit.id`. The search's `query` and `sort` must remain unchanged. If +provided, the `from` argument must be `0` (default) or `-1`. + +[source,console] +---- +GET /my-index-000001/_search +{ + "size": 10000, + "query": { + "match" : { + "user.id" : "elkbee" + } + }, + "pit": { + "id": "46ToAwEPbXktaW5kZXgtMDAwMDAxFnVzaTVuenpUVGQ2TFNheUxVUG5LVVEAFldicVdzOFFtVHZTZDFoWWowTGkwS0EAAAAAAAAAAAQURzZzcUszUUJ5U1NMX3Jyak5ET0wBFnVzaTVuenpUVGQ2TFNheUxVUG5LVVEAAA==", <1> + "keep_alive": "1m" + }, + "sort": [ + {"@timestamp": "asc"}, + {"tie_breaker_id": "asc"} + ], + "search_after": [ <2> + 4098435132000, + "FaslK3QBySSL_rrj9zM5" + ] +} +---- +// TEST[catch:missing] + +<1> PIT ID returned by the previous search. +<2> Sort values from the previous search's last hit. -Deep paging or requesting many results at once can result in slow searches. -Results are sorted before being returned. Because search requests usually span -multiple shards, each shard must generate its own sorted results. These separate -results must then be combined and sorted to ensure that the overall sort order -is correct. +You can repeat this process to get additional pages of results. If using a PIT, +you can extend the PIT's retention period each search request using the +`keep_alive` parameter. -As an alternative to deep paging, we recommend using -<> or the -<> parameter. +When you're finished, we recommend deleting your PIT. + +[source,console] +---- +DELETE /_pit +{ + "id" : "46ToAwEPbXktaW5kZXgtMDAwMDAxFnVzaTVuenpUVGQ2TFNheUxVUG5LVVEAFldicVdzOFFtVHZTZDFoWWowTGkwS0EAAAAAAAAAAAQURzZzcUszUUJ5U1NMX3Jyak5ET0wBFnVzaTVuenpUVGQ2TFNheUxVUG5LVVEAAA==" +} +---- +// TEST[catch:missing] -WARNING: {es} uses Lucene's internal doc IDs as tie-breakers. These internal -doc IDs can be completely different across replicas of the same -data. When paginating, you might occasionally see that documents with the same -sort values are not ordered consistently. [discrete] [[scroll-search-results]] === Scroll search results +IMPORTANT: We no longer recommend using the scroll API for deep pagination. If +you need to preserve the index state while paging through more than 10,000 hits, +use the <> parameter with a point in time (PIT). + While a `search` request returns a single ``page'' of results, the `scroll` API can be used to retrieve large numbers of results (or even all results) from a single search request, in much the same way as you would use a cursor @@ -125,13 +266,13 @@ POST /_search/scroll for another `1m`. <3> The `scroll_id` parameter -The `size` parameter allows you to configure the maximum number of hits to be -returned with each batch of results. Each call to the `scroll` API returns the -next batch of results until there are no more results left to return, ie the +The `size` parameter allows you to configure the maximum number of hits to be +returned with each batch of results. Each call to the `scroll` API returns the +next batch of results until there are no more results left to return, ie the `hits` array is empty. -IMPORTANT: The initial search request and each subsequent scroll request each -return a `_scroll_id`. While the `_scroll_id` may change between requests, it doesn’t +IMPORTANT: The initial search request and each subsequent scroll request each +return a `_scroll_id`. While the `_scroll_id` may change between requests, it doesn’t always change — in any case, only the most recently received `_scroll_id` should be used. NOTE: If the request specifies aggregations, only the initial search response @@ -340,85 +481,3 @@ For append only time-based indices, the `timestamp` field can be used safely. NOTE: By default the maximum number of slices allowed per scroll is limited to 1024. You can update the `index.max_slices_per_scroll` index setting to bypass this limit. - -[discrete] -[[search-after]] -=== Search after - -Pagination of results can be done by using the `from` and `size` but the cost becomes prohibitive when the deep pagination is reached. -The `index.max_result_window` which defaults to 10,000 is a safeguard, search requests take heap memory and time proportional to `from + size`. -The <> API is recommended for efficient deep scrolling but scroll contexts are costly and it is not -recommended to use it for real time user requests. -The `search_after` parameter circumvents this problem by providing a live cursor. -The idea is to use the results from the previous page to help the retrieval of the next page. - -Suppose that the query to retrieve the first page looks like this: - -[source,console] --------------------------------------------------- -GET my-index-000001/_search -{ - "size": 10, - "query": { - "match" : { - "message" : "foo" - } - }, - "sort": [ - {"@timestamp": "asc"}, - {"tie_breaker_id": "asc"} <1> - ] -} --------------------------------------------------- -// TEST[setup:my_index] -// TEST[s/"tie_breaker_id": "asc"/"tie_breaker_id": {"unmapped_type": "keyword"}/] - -<1> A copy of the `_id` field with `doc_values` enabled - -[IMPORTANT] -A field with one unique value per document should be used as the tiebreaker -of the sort specification. Otherwise the sort order for documents that have -the same sort values would be undefined and could lead to missing or duplicate -results. The <> has a unique value per document -but it is not recommended to use it as a tiebreaker directly. -Beware that `search_after` looks for the first document which fully or partially -matches tiebreaker's provided value. Therefore if a document has a tiebreaker value of -`"654323"` and you `search_after` for `"654"` it would still match that document -and return results found after it. -<> are disabled on this field so sorting on it requires -to load a lot of data in memory. Instead it is advised to duplicate (client side - or with a <>) the content -of the <> in another field that has -<> enabled and to use this new field as the tiebreaker -for the sort. - -The result from the above request includes an array of `sort values` for each document. -These `sort values` can be used in conjunction with the `search_after` parameter to start returning results "after" any -document in the result list. -For instance we can use the `sort values` of the last document and pass it to `search_after` to retrieve the next page of results: - -[source,console] --------------------------------------------------- -GET my-index-000001/_search -{ - "size": 10, - "query": { - "match" : { - "message" : "foo" - } - }, - "search_after": [1463538857, "654323"], - "sort": [ - {"@timestamp": "asc"}, - {"tie_breaker_id": "asc"} - ] -} --------------------------------------------------- -// TEST[setup:my_index] -// TEST[s/"tie_breaker_id": "asc"/"tie_breaker_id": {"unmapped_type": "keyword"}/] - -NOTE: The parameter `from` must be set to 0 (or -1) when `search_after` is used. - -`search_after` is not a solution to jump freely to a random page but rather to scroll many queries in parallel. -It is very similar to the `scroll` API but unlike it, the `search_after` parameter is stateless, it is always resolved against the latest - version of the searcher. For this reason the sort order may change during a walk depending on the updates and deletes of your index. diff --git a/docs/reference/search/search.asciidoc b/docs/reference/search/search.asciidoc index 4133f860e8174..abfa4b578d29c 100644 --- a/docs/reference/search/search.asciidoc +++ b/docs/reference/search/search.asciidoc @@ -89,21 +89,9 @@ computation as part of a hit. Defaults to `false`. include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=from] + --- -By default, you cannot page through more than 10,000 documents using the `from` -and `size` parameters. This limit is set using the -<> index setting. - -Deep paging or requesting many results at once can result in slow searches. -Results are sorted before being returned. Because search requests usually span -multiple shards, each shard must generate its own sorted results. These separate -results must then be combined and sorted to ensure that the overall order is -correct. - -As an alternative to deep paging, we recommend using -<> or the +By default, you cannot page through more than 10,000 hits using the `from` and +`size` parameters. To page through more hits, use the <> parameter. --- `ignore_throttled`:: (Optional, boolean) If `true`, concrete, expanded or aliased indices will be @@ -229,25 +217,10 @@ last modification of each hit. See <>. `size`:: (Optional, integer) Defines the number of hits to return. Defaults to `10`. + --- -By default, you cannot page through more than 10,000 documents using the `from` -and `size` parameters. This limit is set using the -<> index setting. - -Deep paging or requesting many results at once can result in slow searches. -Results are sorted before being returned. Because search requests usually span -multiple shards, each shard must generate its own sorted results. These separate -results must then be combined and sorted to ensure that the overall order is -correct. - -As an alternative to deep paging, we recommend using -<> or the +By default, you cannot page through more than 10,000 hits using the `from` and +`size` parameters. To page through more hits, use the <> parameter. -If the <> is specified, this -value cannot be `0`. --- - `sort`:: (Optional, string) A comma-separated list of : pairs. @@ -366,21 +339,9 @@ computation as part of a hit. Defaults to `false`. include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=from] + --- -By default, you cannot page through more than 10,000 documents using the `from` -and `size` parameters. This limit is set using the -<> index setting. - -Deep paging or requesting many results at once can result in slow searches. -Results are sorted before being returned. Because search requests usually span -multiple shards, each shard must generate its own sorted results. These separate -results must then be combined and sorted to ensure that the overall order is -correct. - -As an alternative to deep paging, we recommend using -<> or the +By default, you cannot page through more than 10,000 hits using the `from` and +`size` parameters. To page through more hits, use the <> parameter. --- `indices_boost`:: (Optional, array of objects) @@ -419,25 +380,10 @@ last modification of each hit. See <>. `size`:: (Optional, integer) The number of hits to return. Needs to be non-negative and defaults to `10`. + --- -By default, you cannot page through more than 10,000 documents using the `from` -and `size` parameters. This limit is set using the -<> index setting. - -Deep paging or requesting many results at once can result in slow searches. -Results are sorted before being returned. Because search requests usually span -multiple shards, each shard must generate its own sorted results. These separate -results must then be combined and sorted to ensure that the overall order is -correct. - -As an alternative to deep paging, we recommend using -<> or the +By default, you cannot page through more than 10,000 hits using the `from` and +`size` parameters. To page through more hits, use the <> parameter. -If the <> is specified, this -value cannot be `0`. --- - `_source`:: (Optional) Indicates which <> are returned for matching From ede7d9c8816b2df0bafb9880a9e5e1c5ec5cee03 Mon Sep 17 00:00:00 2001 From: James Rodewig <40268737+jrodewig@users.noreply.github.com> Date: Wed, 26 Aug 2020 16:05:42 -0400 Subject: [PATCH 2/6] Editorial changes --- .../paginate-search-results.asciidoc | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/docs/reference/search/search-your-data/paginate-search-results.asciidoc b/docs/reference/search/search-your-data/paginate-search-results.asciidoc index 4b53967a77a75..46e960d5fe43a 100644 --- a/docs/reference/search/search-your-data/paginate-search-results.asciidoc +++ b/docs/reference/search/search-your-data/paginate-search-results.asciidoc @@ -20,15 +20,14 @@ GET /_search } ---- -By default, you cannot use `from` and `size` to page through more than 10,000 -hits. Using `from` and `size` requires each searched shard to load hits for -previous pages into memory. This can significantly increase memory and CPU -usage, resulting in degraded performance or node failures. - -If you need to page through more than 10,000 hits, use the -<> parameter. If needed, you can use the -<> index setting to change -the 10,000 hit safeguard. +By default, you cannot use `from` and `size` to page through more than +10,000 hits. This limit is a safeguard set using the +<> index setting. +Using `from` and `size` requires each searched shard to load the hits for +all previous pages into memory. For large result sets, this can +significantly increase memory and CPU usage, resulting in degraded +performance or node failures. If you need to page through more than +10,000 hits, use the <> parameter instead. WARNING: {es} uses Lucene's internal doc IDs as tie-breakers. These internal doc IDs can be completely different across replicas of the same data. When paging @@ -44,13 +43,13 @@ using a set of <> from the previous page. Using `search_after` requires multiple search requests with the same `query` and `sort` values. If a <> occurs between these requests, -the order of your results may change, resulting in inconsistent pages. To +the order of your results may change, causing inconsistent results across pages. To prevent this, you can create a <> to -preserve the current index state for a specified period. +preserve the current index state over multiple searches. IMPORTANT: We don't recommend creating PITs for each user request. Each PIT requires additional disk space and file handles. If you want to paginate search -results for user requests, skip this step and use `search_after` without a PIT. +results for user requests, use `search_after` without a PIT. [source,console] ---- @@ -69,7 +68,7 @@ The API returns a PIT ID. // TESTRESPONSE[s/"id": "46ToAwMDaWR4BXV1aWQxAgZub2RlXzEAAAAAAAAAAAEBYQNpZHkFdXVpZDIrBm5vZGVfMwAAAAAAAAAAKgFjA2lkeQV1dWlkMioGbm9kZV8yAAAAAAAAAAAMAWICBXV1aWQyAAAFdXVpZDEAAQltYXRjaF9hbGw_gAAAAA=="/"id": $body.id/] To get the first page of results, submit a search request with a `sort` -argument. If using a PIT, include the PIT ID as the `pit.id` argument. +argument. If using a PIT, specify the PIT ID in the `pit.id` parameter. IMPORTANT: We recommend you include a tiebreaker field in your `sort`. This tiebreaker field should contain a unique value for each document. If you don't @@ -101,7 +100,7 @@ GET /my-index-000001/_search <2> Sorts hits for the search. The search response includes an array of `sort` values for each hit. If you used -a PIT, the response's `pit_id` contains an updated PIT ID. +a PIT, the response's `pit_id` parameter contains an updated PIT ID. [source,console-result] ---- @@ -135,9 +134,9 @@ a PIT, the response's `pit_id` contains an updated PIT ID. <2> Sort values for the last returned hit. To get the next page of results, rerun the previous search using the last hit's -sort values as the `search_after` argument. If using a PIT, use the updated PIT -ID as the `pit.id`. The search's `query` and `sort` must remain unchanged. If -provided, the `from` argument must be `0` (default) or `-1`. +sort values as the `search_after` argument. If using a PIT, use the latest PIT +ID in the `pit.id` parameter. The search's `query` and `sort` arguments must +remain unchanged. If provided, the `from` argument must be `0` (default) or `-1`. [source,console] ---- @@ -169,8 +168,8 @@ GET /my-index-000001/_search <2> Sort values from the previous search's last hit. You can repeat this process to get additional pages of results. If using a PIT, -you can extend the PIT's retention period each search request using the -`keep_alive` parameter. +you can extend the PIT's retention period using the +`keep_alive` parameter of each search request. When you're finished, we recommend deleting your PIT. From 6017c316b7e7bf651a64bd7ea34ee7f6aeb2fb18 Mon Sep 17 00:00:00 2001 From: James Rodewig <40268737+jrodewig@users.noreply.github.com> Date: Wed, 26 Aug 2020 18:38:20 -0400 Subject: [PATCH 3/6] minor wording fixes --- .../paginate-search-results.asciidoc | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/docs/reference/search/search-your-data/paginate-search-results.asciidoc b/docs/reference/search/search-your-data/paginate-search-results.asciidoc index 46e960d5fe43a..c23401b077dd9 100644 --- a/docs/reference/search/search-your-data/paginate-search-results.asciidoc +++ b/docs/reference/search/search-your-data/paginate-search-results.asciidoc @@ -4,7 +4,8 @@ By default, searches return the top 10 matching hits. To page through a larger set of results, you can use the <>'s `from` and `size` parameters. The `from` parameter defines the number of hits to skip, defaulting -to `0`. The `size` parameter is the maximum number of hits to return. +to `0`. The `size` parameter is the maximum number of hits to return. Together, +these two parameters define a page of results. [source,console] ---- @@ -20,14 +21,17 @@ GET /_search } ---- -By default, you cannot use `from` and `size` to page through more than -10,000 hits. This limit is a safeguard set using the -<> index setting. -Using `from` and `size` requires each searched shard to load the hits for -all previous pages into memory. For large result sets, this can -significantly increase memory and CPU usage, resulting in degraded -performance or node failures. If you need to page through more than -10,000 hits, use the <> parameter instead. +Avoid using `from` and `size` to page deeply or request many results at once. +Search requests usually span multiple shards. Each shard must load its requested +hits and the hits for any previous pages into memory. For deep pages or large +sets of results, these operations can significantly increase memory and CPU +usage, resulting in degraded performance or node failures. + +By default, you cannot use `from` and `size` to page through more than 10,000 +hits. This limit is a safeguard set by the +<> index setting. If you need +to page through more than 10,000 hits, use the <> +parameter instead. WARNING: {es} uses Lucene's internal doc IDs as tie-breakers. These internal doc IDs can be completely different across replicas of the same data. When paging @@ -45,7 +49,7 @@ Using `search_after` requires multiple search requests with the same `query` and `sort` values. If a <> occurs between these requests, the order of your results may change, causing inconsistent results across pages. To prevent this, you can create a <> to -preserve the current index state over multiple searches. +preserve the current index state over your searches. IMPORTANT: We don't recommend creating PITs for each user request. Each PIT requires additional disk space and file handles. If you want to paginate search @@ -96,7 +100,7 @@ GET /my-index-000001/_search ---- // TEST[catch:missing] -<1> The PIT ID. For user requests, omit this parameter. +<1> PIT ID for the search. <2> Sorts hits for the search. The search response includes an array of `sort` values for each hit. If you used From 4b5b859dd1694f26df83373b1ddcc46bb5db1e87 Mon Sep 17 00:00:00 2001 From: James Rodewig <40268737+jrodewig@users.noreply.github.com> Date: Wed, 26 Aug 2020 18:43:10 -0400 Subject: [PATCH 4/6] missing word --- .../search-your-data/paginate-search-results.asciidoc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/reference/search/search-your-data/paginate-search-results.asciidoc b/docs/reference/search/search-your-data/paginate-search-results.asciidoc index c23401b077dd9..8eec4882c9eab 100644 --- a/docs/reference/search/search-your-data/paginate-search-results.asciidoc +++ b/docs/reference/search/search-your-data/paginate-search-results.asciidoc @@ -21,11 +21,11 @@ GET /_search } ---- -Avoid using `from` and `size` to page deeply or request many results at once. -Search requests usually span multiple shards. Each shard must load its requested -hits and the hits for any previous pages into memory. For deep pages or large -sets of results, these operations can significantly increase memory and CPU -usage, resulting in degraded performance or node failures. +Avoid using `from` and `size` to page too deeply or request too many results at +once. Search requests usually span multiple shards. Each shard must load its +requested hits and the hits for any previous pages into memory. For deep pages +or large sets of results, these operations can significantly increase memory and +CPU usage, resulting in degraded performance or node failures. By default, you cannot use `from` and `size` to page through more than 10,000 hits. This limit is a safeguard set by the From 9cdf464ac02e556f630ffca3d2e8bc9abea2f0ba Mon Sep 17 00:00:00 2001 From: James Rodewig <40268737+jrodewig@users.noreply.github.com> Date: Thu, 3 Sep 2020 09:09:27 -0400 Subject: [PATCH 5/6] Update docs/reference/search/search-your-data/paginate-search-results.asciidoc Co-authored-by: Jim Ferenczi --- .../search/search-your-data/paginate-search-results.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/paginate-search-results.asciidoc b/docs/reference/search/search-your-data/paginate-search-results.asciidoc index 8eec4882c9eab..21b499cb90160 100644 --- a/docs/reference/search/search-your-data/paginate-search-results.asciidoc +++ b/docs/reference/search/search-your-data/paginate-search-results.asciidoc @@ -175,7 +175,7 @@ You can repeat this process to get additional pages of results. If using a PIT, you can extend the PIT's retention period using the `keep_alive` parameter of each search request. -When you're finished, we recommend deleting your PIT. +When you're finished, you should delete your PIT. [source,console] ---- From df7ca438f5ecda1ab49f40a84ec2716f5280edcf Mon Sep 17 00:00:00 2001 From: James Rodewig <40268737+jrodewig@users.noreply.github.com> Date: Thu, 3 Sep 2020 09:11:19 -0400 Subject: [PATCH 6/6] Remove admon re: user requests --- .../search/search-your-data/paginate-search-results.asciidoc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/reference/search/search-your-data/paginate-search-results.asciidoc b/docs/reference/search/search-your-data/paginate-search-results.asciidoc index 21b499cb90160..708451945e8a9 100644 --- a/docs/reference/search/search-your-data/paginate-search-results.asciidoc +++ b/docs/reference/search/search-your-data/paginate-search-results.asciidoc @@ -51,10 +51,6 @@ the order of your results may change, causing inconsistent results across pages. prevent this, you can create a <> to preserve the current index state over your searches. -IMPORTANT: We don't recommend creating PITs for each user request. Each PIT -requires additional disk space and file handles. If you want to paginate search -results for user requests, use `search_after` without a PIT. - [source,console] ---- POST /my-index-000001/_pit?keep_alive=1m