forked from elastic/elasticsearch
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Ignore fields with no content when querying wildcard fields (elastic#…
…81985) The query_string, simple_query_string, combined_fields and multi_match queries all allow you to query a large number of fields, based on wildcard field name matches. By default, the wildcard match is *, meaning that these queries will try and match against every single field in your index. This can cause problems if you have a very large number of fields defined, and your elasticsearch instance has a fairly low maximum query clause count. In many cases, users may have many more fields defined in their mappings than are actually populated in their index. For example, indexes using ECS mappings may well only use a small subset of these mapped fields for their data. In these situations, we can put a limit on the number of fields being searched by doing a quick check of the Lucene index metadata to see if a mapped field actually has content in the index; if it doesn't exist, we can trivially skip it. This commit adds a check to QueryParserHelper.resolveMappingField() that strips out fields with no content if the field name to resolve contains a wildcard. The check is delegated down to MappedFieldType and by default returns `true`, but the standard indexable field types (numeric, text, keyword, range, etc) will check their fieldnames against the names in the underlying lucene FieldInfos and return `false` if they do not appear there.
- Loading branch information
1 parent
7be74a8
commit d11973b
Showing
31 changed files
with
1,040 additions
and
535 deletions.
There are no files selected for viewing
200 changes: 200 additions & 0 deletions
200
benchmarks/src/main/java/org/elasticsearch/benchmark/search/QueryParserHelperBenchmark.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,200 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.benchmark.search; | ||
|
||
import org.apache.logging.log4j.util.Strings; | ||
import org.apache.lucene.analysis.standard.StandardAnalyzer; | ||
import org.apache.lucene.index.DirectoryReader; | ||
import org.apache.lucene.index.IndexReader; | ||
import org.apache.lucene.index.IndexWriter; | ||
import org.apache.lucene.index.IndexWriterConfig; | ||
import org.apache.lucene.search.IndexSearcher; | ||
import org.apache.lucene.store.ByteBuffersDirectory; | ||
import org.apache.lucene.store.Directory; | ||
import org.elasticsearch.Version; | ||
import org.elasticsearch.cluster.ClusterModule; | ||
import org.elasticsearch.cluster.metadata.IndexMetadata; | ||
import org.elasticsearch.common.bytes.BytesArray; | ||
import org.elasticsearch.common.compress.CompressedXContent; | ||
import org.elasticsearch.common.io.stream.NamedWriteableRegistry; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.core.internal.io.IOUtils; | ||
import org.elasticsearch.index.IndexSettings; | ||
import org.elasticsearch.index.analysis.AnalyzerScope; | ||
import org.elasticsearch.index.analysis.IndexAnalyzers; | ||
import org.elasticsearch.index.analysis.NamedAnalyzer; | ||
import org.elasticsearch.index.fielddata.IndexFieldDataCache; | ||
import org.elasticsearch.index.mapper.IdFieldMapper; | ||
import org.elasticsearch.index.mapper.MapperRegistry; | ||
import org.elasticsearch.index.mapper.MapperService; | ||
import org.elasticsearch.index.mapper.ParsedDocument; | ||
import org.elasticsearch.index.mapper.SourceToParse; | ||
import org.elasticsearch.index.query.SearchExecutionContext; | ||
import org.elasticsearch.index.search.QueryParserHelper; | ||
import org.elasticsearch.index.shard.IndexShard; | ||
import org.elasticsearch.index.similarity.SimilarityService; | ||
import org.elasticsearch.indices.IndicesModule; | ||
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; | ||
import org.elasticsearch.script.Script; | ||
import org.elasticsearch.script.ScriptCompiler; | ||
import org.elasticsearch.script.ScriptContext; | ||
import org.elasticsearch.xcontent.NamedXContentRegistry; | ||
import org.elasticsearch.xcontent.XContentType; | ||
import org.openjdk.jmh.annotations.Benchmark; | ||
import org.openjdk.jmh.annotations.BenchmarkMode; | ||
import org.openjdk.jmh.annotations.Fork; | ||
import org.openjdk.jmh.annotations.Measurement; | ||
import org.openjdk.jmh.annotations.Mode; | ||
import org.openjdk.jmh.annotations.OutputTimeUnit; | ||
import org.openjdk.jmh.annotations.Scope; | ||
import org.openjdk.jmh.annotations.Setup; | ||
import org.openjdk.jmh.annotations.State; | ||
import org.openjdk.jmh.annotations.TearDown; | ||
import org.openjdk.jmh.annotations.Warmup; | ||
|
||
import java.io.IOException; | ||
import java.io.UncheckedIOException; | ||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
@Fork(1) | ||
@Warmup(iterations = 5) | ||
@Measurement(iterations = 5) | ||
@State(Scope.Benchmark) | ||
@OutputTimeUnit(TimeUnit.MICROSECONDS) | ||
@BenchmarkMode(Mode.AverageTime) | ||
public class QueryParserHelperBenchmark { | ||
|
||
private static final int NUMBER_OF_MAPPING_FIELDS = 1000; | ||
|
||
private Directory directory; | ||
private IndexReader indexReader; | ||
private MapperService mapperService; | ||
|
||
@Setup | ||
public void setup() throws IOException { | ||
// pre: set up MapperService and SearchExecutionContext | ||
List<String> fields = new ArrayList<>(); | ||
for (int i = 0; i < NUMBER_OF_MAPPING_FIELDS; i++) { | ||
fields.add(String.format(""" | ||
"field%d":{"type":"long"}""", i)); | ||
} | ||
String mappings = """ | ||
{"_doc":{"properties":{""" + Strings.join(fields, ',') + "}}}"; | ||
|
||
mapperService = createMapperService(mappings); | ||
IndexWriterConfig iwc = new IndexWriterConfig(IndexShard.buildIndexAnalyzer(mapperService)); | ||
directory = new ByteBuffersDirectory(); | ||
IndexWriter iw = new IndexWriter(directory, iwc); | ||
|
||
for (int i = 0; i < 2000; i++) { | ||
ParsedDocument doc = mapperService.documentMapper().parse(buildDoc(i)); | ||
iw.addDocument(doc.rootDoc()); | ||
if (i % 100 == 0) { | ||
iw.commit(); | ||
} | ||
} | ||
iw.close(); | ||
|
||
indexReader = DirectoryReader.open(directory); | ||
} | ||
|
||
private SourceToParse buildDoc(int docId) { | ||
List<String> fields = new ArrayList<>(); | ||
for (int i = 0; i < NUMBER_OF_MAPPING_FIELDS; i++) { | ||
if (i % 2 == 0) continue; | ||
if (i % 3 == 0 && (docId < (NUMBER_OF_MAPPING_FIELDS / 2))) continue; | ||
fields.add(String.format(""" | ||
"field%d":1""", i)); | ||
} | ||
String source = "{" + String.join(",", fields) + "}"; | ||
return new SourceToParse("" + docId, new BytesArray(source), XContentType.JSON); | ||
} | ||
|
||
@TearDown | ||
public void tearDown() { | ||
IOUtils.closeWhileHandlingException(indexReader, directory); | ||
} | ||
|
||
@Benchmark | ||
public void expand() { | ||
Map<String, Float> fields = QueryParserHelper.resolveMappingFields(buildSearchExecutionContext(), Map.of("*", 1f)); | ||
assert fields.size() > 0 && fields.size() < NUMBER_OF_MAPPING_FIELDS; | ||
} | ||
|
||
protected SearchExecutionContext buildSearchExecutionContext() { | ||
final SimilarityService similarityService = new SimilarityService(mapperService.getIndexSettings(), null, Map.of()); | ||
final long nowInMillis = 1; | ||
return new SearchExecutionContext( | ||
0, | ||
0, | ||
mapperService.getIndexSettings(), | ||
null, | ||
(ft, idxName, lookup) -> ft.fielddataBuilder(idxName, lookup) | ||
.build(new IndexFieldDataCache.None(), new NoneCircuitBreakerService()), | ||
mapperService, | ||
mapperService.mappingLookup(), | ||
similarityService, | ||
null, | ||
new NamedXContentRegistry(ClusterModule.getNamedXWriteables()), | ||
new NamedWriteableRegistry(ClusterModule.getNamedWriteables()), | ||
null, | ||
new IndexSearcher(indexReader), | ||
() -> nowInMillis, | ||
null, | ||
null, | ||
() -> true, | ||
null, | ||
Collections.emptyMap() | ||
); | ||
} | ||
|
||
protected final MapperService createMapperService(String mappings) { | ||
Settings settings = Settings.builder() | ||
.put("index.number_of_replicas", 0) | ||
.put("index.number_of_shards", 1) | ||
.put("index.version.created", Version.CURRENT) | ||
.build(); | ||
IndexMetadata meta = IndexMetadata.builder("index").settings(settings).build(); | ||
IndexSettings indexSettings = new IndexSettings(meta, settings); | ||
MapperRegistry mapperRegistry = new IndicesModule(Collections.emptyList()).getMapperRegistry(); | ||
|
||
SimilarityService similarityService = new SimilarityService(indexSettings, null, Map.of()); | ||
MapperService mapperService = new MapperService( | ||
indexSettings, | ||
new IndexAnalyzers( | ||
Map.of("default", new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer())), | ||
Map.of(), | ||
Map.of() | ||
), | ||
new NamedXContentRegistry(ClusterModule.getNamedXWriteables()), | ||
similarityService, | ||
mapperRegistry, | ||
() -> { throw new UnsupportedOperationException(); }, | ||
new IdFieldMapper(() -> true), | ||
new ScriptCompiler() { | ||
@Override | ||
public <T> T compile(Script script, ScriptContext<T> scriptContext) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
} | ||
); | ||
|
||
try { | ||
mapperService.merge("_doc", new CompressedXContent(mappings), MapperService.MergeReason.MAPPING_UPDATE); | ||
return mapperService; | ||
} catch (IOException e) { | ||
throw new UncheckedIOException(e); | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.