Ignore fields with no content when querying wildcard fields (elastic#…

…81985) The query_string, simple_query_string, combined_fields and multi_match queries all allow you to query a large number of fields, based on wildcard field name matches. By default, the wildcard match is *, meaning that these queries will try and match against every single field in your index. This can cause problems if you have a very large number of fields defined, and your elasticsearch instance has a fairly low maximum query clause count. In many cases, users may have many more fields defined in their mappings than are actually populated in their index. For example, indexes using ECS mappings may well only use a small subset of these mapped fields for their data. In these situations, we can put a limit on the number of fields being searched by doing a quick check of the Lucene index metadata to see if a mapped field actually has content in the index; if it doesn't exist, we can trivially skip it. This commit adds a check to QueryParserHelper.resolveMappingField() that strips out fields with no content if the field name to resolve contains a wildcard. The check is delegated down to MappedFieldType and by default returns `true`, but the standard indexable field types (numeric, text, keyword, range, etc) will check their fieldnames against the names in the underlying lucene FieldInfos and return `false` if they do not appear there.
pgomulka · Jan 18, 2022 · d11973b · d11973b
1 parent 7be74a8
commit d11973b
Show file tree

Hide file tree

Showing 31 changed files with 1,040 additions and 535 deletions.
diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/QueryParserHelperBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/QueryParserHelperBenchmark.java
@@ -0,0 +1,200 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.benchmark.search;
+
+import org.apache.logging.log4j.util.Strings;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.ByteBuffersDirectory;
+import org.apache.lucene.store.Directory;
+import org.elasticsearch.Version;
+import org.elasticsearch.cluster.ClusterModule;
+import org.elasticsearch.cluster.metadata.IndexMetadata;
+import org.elasticsearch.common.bytes.BytesArray;
+import org.elasticsearch.common.compress.CompressedXContent;
+import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.core.internal.io.IOUtils;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AnalyzerScope;
+import org.elasticsearch.index.analysis.IndexAnalyzers;
+import org.elasticsearch.index.analysis.NamedAnalyzer;
+import org.elasticsearch.index.fielddata.IndexFieldDataCache;
+import org.elasticsearch.index.mapper.IdFieldMapper;
+import org.elasticsearch.index.mapper.MapperRegistry;
+import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.index.mapper.ParsedDocument;
+import org.elasticsearch.index.mapper.SourceToParse;
+import org.elasticsearch.index.query.SearchExecutionContext;
+import org.elasticsearch.index.search.QueryParserHelper;
+import org.elasticsearch.index.shard.IndexShard;
+import org.elasticsearch.index.similarity.SimilarityService;
+import org.elasticsearch.indices.IndicesModule;
+import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
+import org.elasticsearch.script.Script;
+import org.elasticsearch.script.ScriptCompiler;
+import org.elasticsearch.script.ScriptContext;
+import org.elasticsearch.xcontent.NamedXContentRegistry;
+import org.elasticsearch.xcontent.XContentType;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+@Fork(1)
+@Warmup(iterations = 5)
+@Measurement(iterations = 5)
+@State(Scope.Benchmark)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@BenchmarkMode(Mode.AverageTime)
+public class QueryParserHelperBenchmark {
+
+    private static final int NUMBER_OF_MAPPING_FIELDS = 1000;
+
+    private Directory directory;
+    private IndexReader indexReader;
+    private MapperService mapperService;
+
+    @Setup
+    public void setup() throws IOException {
+        // pre: set up MapperService and SearchExecutionContext
+        List<String> fields = new ArrayList<>();
+        for (int i = 0; i < NUMBER_OF_MAPPING_FIELDS; i++) {
+            fields.add(String.format("""
+                "field%d":{"type":"long"}""", i));
+        }
+        String mappings = """
+            {"_doc":{"properties":{""" + Strings.join(fields, ',') + "}}}";
+
+        mapperService = createMapperService(mappings);
+        IndexWriterConfig iwc = new IndexWriterConfig(IndexShard.buildIndexAnalyzer(mapperService));
+        directory = new ByteBuffersDirectory();
+        IndexWriter iw = new IndexWriter(directory, iwc);
+
+        for (int i = 0; i < 2000; i++) {
+            ParsedDocument doc = mapperService.documentMapper().parse(buildDoc(i));
+            iw.addDocument(doc.rootDoc());
+            if (i % 100 == 0) {
+                iw.commit();
+            }
+        }
+        iw.close();
+
+        indexReader = DirectoryReader.open(directory);
+    }
+
+    private SourceToParse buildDoc(int docId) {
+        List<String> fields = new ArrayList<>();
+        for (int i = 0; i < NUMBER_OF_MAPPING_FIELDS; i++) {
+            if (i % 2 == 0) continue;
+            if (i % 3 == 0 && (docId < (NUMBER_OF_MAPPING_FIELDS / 2))) continue;
+            fields.add(String.format("""
+                "field%d":1""", i));
+        }
+        String source = "{" + String.join(",", fields) + "}";
+        return new SourceToParse("" + docId, new BytesArray(source), XContentType.JSON);
+    }
+
+    @TearDown
+    public void tearDown() {
+        IOUtils.closeWhileHandlingException(indexReader, directory);
+    }
+
+    @Benchmark
+    public void expand() {
+        Map<String, Float> fields = QueryParserHelper.resolveMappingFields(buildSearchExecutionContext(), Map.of("*", 1f));
+        assert fields.size() > 0 && fields.size() < NUMBER_OF_MAPPING_FIELDS;
+    }
+
+    protected SearchExecutionContext buildSearchExecutionContext() {
+        final SimilarityService similarityService = new SimilarityService(mapperService.getIndexSettings(), null, Map.of());
+        final long nowInMillis = 1;
+        return new SearchExecutionContext(
+            0,
+            0,
+            mapperService.getIndexSettings(),
+            null,
+            (ft, idxName, lookup) -> ft.fielddataBuilder(idxName, lookup)
+                .build(new IndexFieldDataCache.None(), new NoneCircuitBreakerService()),
+            mapperService,
+            mapperService.mappingLookup(),
+            similarityService,
+            null,
+            new NamedXContentRegistry(ClusterModule.getNamedXWriteables()),
+            new NamedWriteableRegistry(ClusterModule.getNamedWriteables()),
+            null,
+            new IndexSearcher(indexReader),
+            () -> nowInMillis,
+            null,
+            null,
+            () -> true,
+            null,
+            Collections.emptyMap()
+        );
+    }
+
+    protected final MapperService createMapperService(String mappings) {
+        Settings settings = Settings.builder()
+            .put("index.number_of_replicas", 0)
+            .put("index.number_of_shards", 1)
+            .put("index.version.created", Version.CURRENT)
+            .build();
+        IndexMetadata meta = IndexMetadata.builder("index").settings(settings).build();
+        IndexSettings indexSettings = new IndexSettings(meta, settings);
+        MapperRegistry mapperRegistry = new IndicesModule(Collections.emptyList()).getMapperRegistry();
+
+        SimilarityService similarityService = new SimilarityService(indexSettings, null, Map.of());
+        MapperService mapperService = new MapperService(
+            indexSettings,
+            new IndexAnalyzers(
+                Map.of("default", new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer())),
+                Map.of(),
+                Map.of()
+            ),
+            new NamedXContentRegistry(ClusterModule.getNamedXWriteables()),
+            similarityService,
+            mapperRegistry,
+            () -> { throw new UnsupportedOperationException(); },
+            new IdFieldMapper(() -> true),
+            new ScriptCompiler() {
+                @Override
+                public <T> T compile(Script script, ScriptContext<T> scriptContext) {
+                    throw new UnsupportedOperationException();
+                }
+            }
+        );
+
+        try {
+            mapperService.merge("_doc", new CompressedXContent(mappings), MapperService.MergeReason.MAPPING_UPDATE);
+            return mapperService;
+        } catch (IOException e) {
+            throw new UncheckedIOException(e);
+        }
+    }
+
+}
diff --git a/...er-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java b/...er-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java
@@ -214,6 +214,11 @@ public String typeName() {
             return CONTENT_TYPE;
         }
 
+        @Override
+        public boolean mayExistInIndex(SearchExecutionContext context) {
+            return context.fieldExistsInIndex(name());
+        }
+
         @Override
         public Query termQuery(Object value, SearchExecutionContext context) {
             failIfNotIndexedNorDocValuesFallback(context);

diff --git a/...xtras/src/main/java/org/elasticsearch/index/mapper/extras/SearchAsYouTypeFieldMapper.java b/...xtras/src/main/java/org/elasticsearch/index/mapper/extras/SearchAsYouTypeFieldMapper.java
@@ -436,6 +436,11 @@ boolean termLengthWithinBounds(int length) {
             return length >= minChars - 1 && length <= maxChars;
         }
 
+        @Override
+        public boolean mayExistInIndex(SearchExecutionContext context) {
+            return false;
+        }
+
         @Override
         public Query prefixQuery(
             String value,
@@ -569,6 +574,11 @@ void setPrefixFieldType(PrefixFieldType prefixFieldType) {
             this.prefixFieldType = prefixFieldType;
         }
 
+        @Override
+        public boolean mayExistInIndex(SearchExecutionContext context) {
+            return false;
+        }
+
         @Override
         public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
             // Because this internal field is modelled as a multi-field, SourceValueFetcher will look up its

diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/query/QueryStringIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/query/QueryStringIT.java
@@ -8,18 +8,14 @@
 
 package org.elasticsearch.search.query;
 
-import org.apache.lucene.search.IndexSearcher;
 import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.query.Operator;
-import org.elasticsearch.index.query.QueryStringQueryBuilder;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.SearchHits;
 import org.elasticsearch.test.ESIntegTestCase;
-import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentType;
 import org.junit.Before;
 
@@ -31,10 +27,8 @@
 
 import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
 import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
-import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
-import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
@@ -234,90 +228,6 @@ public void testAllFieldsWithSpecifiedLeniency() throws IOException {
         assertThat(e.getCause().getMessage(), containsString("unit [D] not supported for date math [-2D]"));
     }
 
-    public void testLimitOnExpandedFields() throws Exception {
-
-        final int maxClauseCount = randomIntBetween(50, 100);
-
-        XContentBuilder builder = jsonBuilder();
-        builder.startObject();
-        {
-            builder.startObject("_doc");
-            {
-                builder.startObject("properties");
-                {
-                    for (int i = 0; i < maxClauseCount; i++) {
-                        builder.startObject("field_A" + i).field("type", "text").endObject();
-                        builder.startObject("field_B" + i).field("type", "text").endObject();
-                    }
-                    builder.endObject();
-                }
-                builder.endObject();
-            }
-            builder.endObject();
-        }
-
-        assertAcked(
-            prepareCreate("testindex").setSettings(
-                Settings.builder().put(MapperService.INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING.getKey(), maxClauseCount + 100)
-            ).setMapping(builder)
-        );
-
-        client().prepareIndex("testindex").setId("1").setSource("field_A0", "foo bar baz").get();
-        refresh();
-
-        int originalMaxClauses = IndexSearcher.getMaxClauseCount();
-        try {
-
-            IndexSearcher.setMaxClauseCount(maxClauseCount);
-
-            // single field shouldn't trigger the limit
-            doAssertOneHitForQueryString("field_A0:foo");
-            // expanding to the limit should work
-            doAssertOneHitForQueryString("field_A\\*:foo");
-
-            // adding a non-existing field on top shouldn't overshoot the limit
-            doAssertOneHitForQueryString("field_A\\*:foo unmapped:something");
-
-            // the following should exceed the limit
-            doAssertLimitExceededException("foo", IndexSearcher.getMaxClauseCount() * 2, "*");
-            doAssertLimitExceededException("*:foo", IndexSearcher.getMaxClauseCount() * 2, "*");
-            doAssertLimitExceededException("field_\\*:foo", IndexSearcher.getMaxClauseCount() * 2, "field_*");
-
-        } finally {
-            IndexSearcher.setMaxClauseCount(originalMaxClauses);
-        }
-    }
-
-    private void doAssertOneHitForQueryString(String queryString) {
-        QueryStringQueryBuilder qb = queryStringQuery(queryString);
-        if (randomBoolean()) {
-            qb.defaultField("*");
-        }
-        SearchResponse response = client().prepareSearch("testindex").setQuery(qb).get();
-        assertHitCount(response, 1);
-    }
-
-    private void doAssertLimitExceededException(String queryString, int exceedingFieldCount, String inputFieldPattern) {
-        Exception e = expectThrows(Exception.class, () -> {
-            QueryStringQueryBuilder qb = queryStringQuery(queryString);
-            if (randomBoolean()) {
-                qb.defaultField("*");
-            }
-            client().prepareSearch("testindex").setQuery(qb).get();
-        });
-        assertThat(
-            ExceptionsHelper.unwrap(e, IllegalArgumentException.class).getMessage(),
-            containsString(
-                "field expansion for ["
-                    + inputFieldPattern
-                    + "] matches too many fields, limit: "
-                    + IndexSearcher.getMaxClauseCount()
-                    + ", got: "
-                    + exceedingFieldCount
-            )
-        );
-    }
-
     public void testFieldAlias() throws Exception {
         List<IndexRequestBuilder> indexRequests = new ArrayList<>();
         indexRequests.add(client().prepareIndex("test").setId("1").setSource("f3", "text", "f2", "one"));