From 9750c8f26f3f032199f636e2b4ee398d3f2d6101 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Tue, 16 Mar 2021 10:28:41 +0100 Subject: [PATCH] Improve lookup for "include_unmapped" field pattern (#69984) Currently we use a CharacterRunAutomaton build from all field pattern that have the "include_unmapped" option set. This can lead to unnecessarily large automata for cases where the user unessesarily list all known fields and adds the "include_unmapped" option. We only really need the automaton for pattern that contain wildcards and can look up any other field path directly and only need to do so if the field path isn't indeed mapped. Relates to #69983 --- .../search/fetch/subphase/FieldFetcher.java | 110 +++++++++++------- .../fetch/subphase/FieldFetcherTests.java | 18 +++ 2 files changed, 88 insertions(+), 40 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FieldFetcher.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FieldFetcher.java index 6dad206e5c255..fe69da746fb2d 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FieldFetcher.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FieldFetcher.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.Nullable; import org.elasticsearch.common.document.DocumentField; import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NestedValueFetcher; import org.elasticsearch.index.mapper.ObjectMapper; @@ -38,6 +39,11 @@ */ public class FieldFetcher { + /** + * Default maximum number of states in the automaton that looks up unmapped fields. + */ + private static final int AUTOMATON_MAX_DETERMINIZED_STATES = 100000; + public static FieldFetcher create(SearchExecutionContext context, Collection fieldAndFormats) { Set nestedMappingPaths = context.hasNested() @@ -115,23 +121,33 @@ private static FieldFetcher create(SearchExecutionContext context, } CharacterRunAutomaton unmappedFieldsFetchAutomaton = null; - if (unmappedFetchPattern.isEmpty() == false) { + // We separate the "include_unmapped" field patters with wildcards from the rest in order to use less + // space in the lookup automaton + Map> partitions = unmappedFetchPattern.stream() + .collect(Collectors.partitioningBy((s -> Regex.isSimpleMatchPattern(s)))); + List unmappedWildcardPattern = partitions.get(true); + List unmappedConcreteFields = partitions.get(false); + if (unmappedWildcardPattern.isEmpty() == false) { unmappedFieldsFetchAutomaton = new CharacterRunAutomaton( - Regex.simpleMatchToAutomaton(unmappedFetchPattern.toArray(new String[unmappedFetchPattern.size()])) + Regex.simpleMatchToAutomaton(unmappedWildcardPattern.toArray(new String[unmappedWildcardPattern.size()])), + AUTOMATON_MAX_DETERMINIZED_STATES ); } - return new FieldFetcher(fieldContexts, unmappedFieldsFetchAutomaton); + return new FieldFetcher(fieldContexts, unmappedFieldsFetchAutomaton, unmappedConcreteFields); } private final Map fieldContexts; private final CharacterRunAutomaton unmappedFieldsFetchAutomaton; + private final List unmappedConcreteFields; private FieldFetcher( Map fieldContexts, - @Nullable CharacterRunAutomaton unmappedFieldsFetchAutomaton + @Nullable CharacterRunAutomaton unmappedFieldsFetchAutomaton, + @Nullable List unmappedConcreteFields ) { this.fieldContexts = fieldContexts; this.unmappedFieldsFetchAutomaton = unmappedFieldsFetchAutomaton; + this.unmappedConcreteFields = unmappedConcreteFields; } public Map fetch(SourceLookup sourceLookup) throws IOException { @@ -145,51 +161,65 @@ public Map fetch(SourceLookup sourceLookup) throws IOExce documentFields.put(field, new DocumentField(field, parsedValues)); } } - if (this.unmappedFieldsFetchAutomaton != null) { - collectUnmapped(documentFields, sourceLookup.source(), "", 0); - } + collectUnmapped(documentFields, sourceLookup.source(), "", 0); return documentFields; } private void collectUnmapped(Map documentFields, Map source, String parentPath, int lastState) { - for (String key : source.keySet()) { - Object value = source.get(key); - String currentPath = parentPath + key; - if (this.fieldContexts.containsKey(currentPath)) { - continue; - } - int currentState = step(this.unmappedFieldsFetchAutomaton, key, lastState); - if (currentState == -1) { - // current path doesn't match any fields pattern - continue; - } - if (value instanceof Map) { - // one step deeper into source tree - collectUnmapped( - documentFields, - (Map) value, - currentPath + ".", - step(this.unmappedFieldsFetchAutomaton, ".", currentState) - ); - } else if (value instanceof List) { - // iterate through list values - collectUnmappedList(documentFields, (List) value, currentPath, currentState); - } else { - // we have a leaf value - if (this.unmappedFieldsFetchAutomaton.isAccept(currentState)) { - if (value != null) { - DocumentField currentEntry = documentFields.get(currentPath); - if (currentEntry == null) { - List list = new ArrayList<>(); - list.add(value); - documentFields.put(currentPath, new DocumentField(currentPath, list)); - } else { - currentEntry.getValues().add(value); + // lookup field patterns containing wildcards + if (this.unmappedFieldsFetchAutomaton != null) { + for (String key : source.keySet()) { + Object value = source.get(key); + String currentPath = parentPath + key; + if (this.fieldContexts.containsKey(currentPath)) { + continue; + } + int currentState = step(this.unmappedFieldsFetchAutomaton, key, lastState); + if (currentState == -1) { + // current path doesn't match any fields pattern + continue; + } + if (value instanceof Map) { + // one step deeper into source tree + collectUnmapped( + documentFields, + (Map) value, + currentPath + ".", + step(this.unmappedFieldsFetchAutomaton, ".", currentState) + ); + } else if (value instanceof List) { + // iterate through list values + collectUnmappedList(documentFields, (List) value, currentPath, currentState); + } else { + // we have a leaf value + if (this.unmappedFieldsFetchAutomaton.isAccept(currentState)) { + if (value != null) { + DocumentField currentEntry = documentFields.get(currentPath); + if (currentEntry == null) { + List list = new ArrayList<>(); + list.add(value); + documentFields.put(currentPath, new DocumentField(currentPath, list)); + } else { + currentEntry.getValues().add(value); + } } } } } } + + // lookup concrete fields + if (this.unmappedConcreteFields != null) { + for (String path : unmappedConcreteFields) { + if (this.fieldContexts.containsKey(path)) { + continue; // this is actually a mapped field + } + List values = XContentMapValues.extractRawValues(path, source); + if (values.isEmpty() == false) { + documentFields.put(path, new DocumentField(path, values)); + } + } + } } private void collectUnmappedList(Map documentFields, Iterable iterable, String parentPath, int lastState) { diff --git a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java index 59f99b5cd12d8..d4185c81d6c64 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.search.fetch.subphase; +import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.Strings; @@ -844,6 +845,23 @@ public void testLastFormatWins() throws IOException { assertThat(fields.get("date_field").getValues().get(1), equalTo("12")); } + /** + * Field patterns retrieved with "include_unmapped" use an automaton with a maximal allowed size internally. + * This test checks we have a bound in place to avoid misuse of this with exceptionally large field patterns + */ + public void testTooManyUnmappedFieldWildcardPattern() throws IOException { + MapperService mapperService = createMapperService(); + + XContentBuilder source = XContentFactory.jsonBuilder().startObject().field("a", "foo").endObject(); + + List fieldAndFormatList = new ArrayList<>(); + boolean includeUnmapped = true; + for (int i = 0; i < 1000; i++) { + fieldAndFormatList.add(new FieldAndFormat(randomAlphaOfLength(150) + "*", null, includeUnmapped)); + } + expectThrows(TooComplexToDeterminizeException.class, () -> fetchFields(mapperService, source, fieldAndFormatList)); + } + private List fieldAndFormatList(String name, String format, boolean includeUnmapped) { return Collections.singletonList(new FieldAndFormat(name, format, includeUnmapped)); }