Skip to content

Commit

Permalink
[search] Limit the size of the result window
Browse files Browse the repository at this point in the history
Requesting a million hits, or page 100,000 is always a bad idea, but users
may not be aware of this. This adds a per-index limit on the maximum size +
from that can be requested which defaults to 10,000.

This should not interfere with deep-scrolling.

Closes #9311
  • Loading branch information
nik9000 committed Sep 10, 2015
1 parent f2ae12e commit 7b0a876
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
import org.elasticsearch.indices.store.IndicesStore;
import org.elasticsearch.indices.ttl.IndicesTTLService;
import org.elasticsearch.search.SearchService;
import org.elasticsearch.search.internal.DefaultSearchContext;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;

Expand Down Expand Up @@ -275,6 +276,7 @@ private void registerBuiltinIndexSettings() {
registerIndexDynamicSetting(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED, Validator.BOOLEAN);
registerIndexDynamicSetting(IndicesRequestCache.DEPRECATED_INDEX_CACHE_REQUEST_ENABLED, Validator.BOOLEAN);
registerIndexDynamicSetting(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, Validator.TIME);
registerIndexDynamicSetting(DefaultSearchContext.MAX_RESULT_WINDOW, Validator.POSITIVE_INTEGER);
}

public void registerIndexDynamicSetting(String setting, Validator validator) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,20 @@
*
*/
public class DefaultSearchContext extends SearchContext {
/**
* Index setting describing the maximum value of from + size on a query.
*/
public static final String MAX_RESULT_WINDOW = "index.max_result_window";
public static class Defaults {
/**
* Default maximum value of from + size on a query. 10,000 was chosen as
* a conservative default as it is sure to not cause trouble. Users can
* certainly profile their cluster and decide to set it to 100,000
* safely. 1,000,000 is probably way to high for any cluster to set
* safely.
*/
public static final int MAX_RESULT_WINDOW = 10000;
}

private final long id;
private final ShardSearchRequest request;
Expand Down Expand Up @@ -172,12 +186,20 @@ public void doClose() {
*/
@Override
public void preProcess() {
if (!(from() == -1 && size() == -1)) {
// from and size have been set.
int numHits = from() + size();
if (numHits < 0) {
String msg = "Result window is too large, from + size must be less than or equal to: [" + Integer.MAX_VALUE + "] but was [" + (((long) from()) + ((long) size())) + "]";
throw new QueryPhaseExecutionException(this, msg);
if (scrollContext == null) {
long from = from() == -1 ? 0 : from();
long size = size() == -1 ? 10 : size();
long resultWindow = from + size;
// We need settingsService's view of the settings because its dynamic.
// indexService's isn't.
int maxResultWindow = indexService.settingsService().getSettings().getAsInt(MAX_RESULT_WINDOW, Defaults.MAX_RESULT_WINDOW);

if (resultWindow > maxResultWindow) {
throw new QueryPhaseExecutionException(this,
"Result window is too large, from + size must be less than or equal to: [" + maxResultWindow + "] but was ["
+ resultWindow + "]. See the scroll api for a more efficient way to request large data sets. "
+ "This limit can be set by changing the [" + DefaultSearchContext.MAX_RESULT_WINDOW
+ "] index level parameter.");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,27 +74,27 @@ public void testSimpleScrollQueryThenFetch() throws Exception {
.execute().actionGet();
try {
long counter = 0;

assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
assertThat(searchResponse.getHits().hits().length, equalTo(35));
for (SearchHit hit : searchResponse.getHits()) {
assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
}

searchResponse = client().prepareSearchScroll(searchResponse.getScrollId())
.setScroll(TimeValue.timeValueMinutes(2))
.execute().actionGet();

assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
assertThat(searchResponse.getHits().hits().length, equalTo(35));
for (SearchHit hit : searchResponse.getHits()) {
assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
}

searchResponse = client().prepareSearchScroll(searchResponse.getScrollId())
.setScroll(TimeValue.timeValueMinutes(2))
.execute().actionGet();

assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
assertThat(searchResponse.getHits().hits().length, equalTo(30));
for (SearchHit hit : searchResponse.getHits()) {
Expand Down Expand Up @@ -133,47 +133,47 @@ public void testSimpleScrollQueryThenFetchSmallSizeUnevenDistribution() throws E
.execute().actionGet();
try {
long counter = 0;

assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
assertThat(searchResponse.getHits().hits().length, equalTo(3));
for (SearchHit hit : searchResponse.getHits()) {
assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
}

for (int i = 0; i < 32; i++) {
searchResponse = client().prepareSearchScroll(searchResponse.getScrollId())
.setScroll(TimeValue.timeValueMinutes(2))
.execute().actionGet();

assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
assertThat(searchResponse.getHits().hits().length, equalTo(3));
for (SearchHit hit : searchResponse.getHits()) {
assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
}
}

// and now, the last one is one
searchResponse = client().prepareSearchScroll(searchResponse.getScrollId())
.setScroll(TimeValue.timeValueMinutes(2))
.execute().actionGet();

assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
assertThat(searchResponse.getHits().hits().length, equalTo(1));
for (SearchHit hit : searchResponse.getHits()) {
assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
}

// a the last is zero
searchResponse = client().prepareSearchScroll(searchResponse.getScrollId())
.setScroll(TimeValue.timeValueMinutes(2))
.execute().actionGet();

assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
assertThat(searchResponse.getHits().hits().length, equalTo(0));
for (SearchHit hit : searchResponse.getHits()) {
assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
}

} finally {
clearScroll(searchResponse.getScrollId());
}
Expand Down Expand Up @@ -212,7 +212,7 @@ public void testScrollAndUpdateIndex() throws Exception {
}
searchResponse = client().prepareSearchScroll(searchResponse.getScrollId()).setScroll(TimeValue.timeValueMinutes(2)).execute().actionGet();
} while (searchResponse.getHits().hits().length > 0);

client().admin().indices().prepareRefresh().execute().actionGet();
assertThat(client().prepareCount().setQuery(matchAllQuery()).execute().actionGet().getCount(), equalTo(500l));
assertThat(client().prepareCount().setQuery(termQuery("message", "test")).execute().actionGet().getCount(), equalTo(0l));
Expand Down Expand Up @@ -410,9 +410,7 @@ public void testSimpleScrollQueryThenFetch_clearAllScrollIds() throws Exception
assertThrows(internalCluster().transportClient().prepareSearchScroll(searchResponse2.getScrollId()).setScroll(TimeValue.timeValueMinutes(2)), RestStatus.NOT_FOUND);
}

@Test
// https://github.com/elasticsearch/elasticsearch/issues/4156
public void testDeepPaginationWithOneDocIndexAndDoNotBlowUp() throws Exception {
public void testDeepScrollingDoesNotBlowUp() throws Exception {
client().prepareIndex("index", "type", "1")
.setSource("field", "value")
.setRefresh(true)
Expand All @@ -424,7 +422,7 @@ public void testDeepPaginationWithOneDocIndexAndDoNotBlowUp() throws Exception {
.setQuery(QueryBuilders.matchAllQuery())
.setSize(Integer.MAX_VALUE);

if (searchType == SearchType.SCAN || searchType != SearchType.COUNT && randomBoolean()) {
if (searchType == SearchType.SCAN || searchType != SearchType.COUNT) {
builder.setScroll("1m");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@
import org.apache.lucene.util.Constants;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.internal.DefaultSearchContext;
import org.elasticsearch.test.ESIntegTestCase;
import org.junit.Test;

import java.util.ArrayList;
import java.util.List;
Expand All @@ -38,12 +40,12 @@
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
import static org.hamcrest.Matchers.containsString;

public class SimpleSearchIT extends ESIntegTestCase {

@Test
public void testSearchNullIndex() {
try {
client().prepareSearch((String) null).setQuery(QueryBuilders.termQuery("_id", "XXX1")).execute().actionGet();
Expand All @@ -60,7 +62,6 @@ public void testSearchNullIndex() {
}
}

@Test
public void testSearchRandomPreference() throws InterruptedException, ExecutionException {
createIndex("test");
indexRandom(true, client().prepareIndex("test", "type", "1").setSource("field", "value"),
Expand All @@ -84,8 +85,7 @@ public void testSearchRandomPreference() throws InterruptedException, ExecutionE
}
}

@Test
public void simpleIpTests() throws Exception {
public void testSimpleIp() throws Exception {
createIndex("test");

client().admin().indices().preparePutMapping("test").setType("type1")
Expand All @@ -104,8 +104,7 @@ public void simpleIpTests() throws Exception {
assertHitCount(search, 1l);
}

@Test
public void simpleIdTests() {
public void testSimpleId() {
createIndex("test");

client().prepareIndex("test", "type", "XXX1").setSource("field", "value").setRefresh(true).execute().actionGet();
Expand All @@ -124,8 +123,7 @@ public void simpleIdTests() {
assertHitCount(searchResponse, 1l);
}

@Test
public void simpleDateRangeTests() throws Exception {
public void testSimpleDateRange() throws Exception {
createIndex("test");
client().prepareIndex("test", "type1", "1").setSource("field", "2010-01-05T02:00").execute().actionGet();
client().prepareIndex("test", "type1", "2").setSource("field", "2010-01-06T02:00").execute().actionGet();
Expand All @@ -150,9 +148,8 @@ public void simpleDateRangeTests() throws Exception {
searchResponse = client().prepareSearch("test").setQuery(QueryBuilders.queryStringQuery("field:[2010-01-03||+2d TO 2010-01-04||+2d/d]")).execute().actionGet();
assertHitCount(searchResponse, 2l);
}

@Test
public void localeDependentDateTests() throws Exception {

public void testLocaleDependentDate() throws Exception {
assumeFalse("Locals are buggy on JDK9EA", Constants.JRE_IS_MINIMUM_JAVA9 && systemPropertyAsBoolean("tests.security.manager", false));
assertAcked(prepareCreate("test")
.addMapping("type1",
Expand Down Expand Up @@ -189,8 +186,7 @@ public void localeDependentDateTests() throws Exception {
}
}

@Test
public void simpleTerminateAfterCountTests() throws Exception {
public void testSimpleTerminateAfterCount() throws Exception {
prepareCreate("test").setSettings(
SETTING_NUMBER_OF_SHARDS, 1,
SETTING_NUMBER_OF_REPLICAS, 0).get();
Expand Down Expand Up @@ -225,16 +221,76 @@ public void simpleTerminateAfterCountTests() throws Exception {
assertFalse(searchResponse.isTerminatedEarly());
}

@Test
public void testInsaneFrom() throws Exception {
public void testInsaneFromAndSize() throws Exception {
createIndex("idx");
indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));

assertWindowFails(client().prepareSearch("idx").setFrom(Integer.MAX_VALUE));
assertWindowFails(client().prepareSearch("idx").setSize(Integer.MAX_VALUE));
}

public void testTooLargeFromAndSize() throws Exception {
createIndex("idx");
indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));

assertWindowFails(client().prepareSearch("idx").setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW));
assertWindowFails(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW + 1));
assertWindowFails(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW)
.setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW));
}

public void testLargeFromAndSizeSucceeds() throws Exception {
createIndex("idx");
indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));

assertHitCount(client().prepareSearch("idx").setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW - 10).get(), 1);
assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW).get(), 1);
assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW / 2)
.setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW / 2 - 1).get(), 1);
}

public void testTooLargeFromAndSizeOkBySetting() throws Exception {
prepareCreate("idx").setSettings(DefaultSearchContext.MAX_RESULT_WINDOW, DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 2).get();
indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));

assertHitCount(client().prepareSearch("idx").setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW).get(), 1);
assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW + 1).get(), 1);
assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW)
.setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW).get(), 1);
}

public void testTooLargeFromAndSizeOkByDynamicSetting() throws Exception {
createIndex("idx");
assertAcked(client().admin().indices().prepareUpdateSettings("idx")
.setSettings(
Settings.builder().put(DefaultSearchContext.MAX_RESULT_WINDOW, DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 2))
.get());
indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));

assertHitCount(client().prepareSearch("idx").setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW).get(), 1);
assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW + 1).get(), 1);
assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW)
.setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW).get(), 1);
}

public void testTooLargeFromAndSizeBackwardsCompatibilityRecommendation() throws Exception {
prepareCreate("idx").setSettings(DefaultSearchContext.MAX_RESULT_WINDOW, Integer.MAX_VALUE).get();
indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));

assertHitCount(client().prepareSearch("idx").setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 10).get(), 1);
assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 10).get(), 1);
assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 10)
.setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 10).get(), 1);
}

private void assertWindowFails(SearchRequestBuilder search) {
try {
client().prepareSearch("idx").setFrom(Integer.MAX_VALUE).get();
search.get();
fail();
} catch (SearchPhaseExecutionException e) {
assertThat(e.toString(), containsString("Result window is too large, from + size must be less than or equal to:"));
assertThat(e.toString(), containsString("Result window is too large, from + size must be less than or equal to: ["
+ DefaultSearchContext.Defaults.MAX_RESULT_WINDOW));
assertThat(e.toString(), containsString("See the scroll api for a more efficient way to request large data sets"));
}
}
}
10 changes: 8 additions & 2 deletions docs/reference/index-modules.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,14 @@ specific index module:
index visible to search. Defaults to `1s`. Can be set to `-1` to disable
refresh.

`index.max_result_window`::

The maximum value of `from + size` for searches to this index. Defaults to
`10000`. Search requests take heap memory and time proportional to
`from + size` and this limits that memory. See
{ref}/search-request-scroll.html[Scroll] for a more efficient alternative
to raising this.

`index.blocks.read_only`::

Set to `true` to make the index and index metadata read only, `false` to
Expand Down Expand Up @@ -184,5 +192,3 @@ include::index-modules/slowlog.asciidoc[]
include::index-modules/store.asciidoc[]

include::index-modules/translog.asciidoc[]


10 changes: 10 additions & 0 deletions docs/reference/migration/migrate_2_1.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ Scroll requests sorted by `_doc` have been optimized to more efficiently resume
from where the previous request stopped, so this will have the same performance
characteristics as the former `scan` search type.

==== from + size limits

Elasticsearch will now return an error message if a query's `from` + `size` is
more than the `index.max_result_window` parameter. This parameter defaults to
10,000 which is safe for almost all clusters. Values higher than can consume
significant chunks of heap memory per search and per shard executing the
search. It's safest to leave this value as it is an use the scroll api for any
deep scrolling but this setting is dynamic so it can raised or lowered as
needed.

=== Update changes

==== Updates now `detect_noop` by default
Expand Down
Loading

0 comments on commit 7b0a876

Please sign in to comment.