diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FieldFetcher.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FieldFetcher.java index 6dad206e5c255..fe69da746fb2d 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FieldFetcher.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FieldFetcher.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.Nullable; import org.elasticsearch.common.document.DocumentField; import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NestedValueFetcher; import org.elasticsearch.index.mapper.ObjectMapper; @@ -38,6 +39,11 @@ */ public class FieldFetcher { + /** + * Default maximum number of states in the automaton that looks up unmapped fields. + */ + private static final int AUTOMATON_MAX_DETERMINIZED_STATES = 100000; + public static FieldFetcher create(SearchExecutionContext context, Collection fieldAndFormats) { Set nestedMappingPaths = context.hasNested() @@ -115,23 +121,33 @@ private static FieldFetcher create(SearchExecutionContext context, } CharacterRunAutomaton unmappedFieldsFetchAutomaton = null; - if (unmappedFetchPattern.isEmpty() == false) { + // We separate the "include_unmapped" field patters with wildcards from the rest in order to use less + // space in the lookup automaton + Map> partitions = unmappedFetchPattern.stream() + .collect(Collectors.partitioningBy((s -> Regex.isSimpleMatchPattern(s)))); + List unmappedWildcardPattern = partitions.get(true); + List unmappedConcreteFields = partitions.get(false); + if (unmappedWildcardPattern.isEmpty() == false) { unmappedFieldsFetchAutomaton = new CharacterRunAutomaton( - Regex.simpleMatchToAutomaton(unmappedFetchPattern.toArray(new String[unmappedFetchPattern.size()])) + Regex.simpleMatchToAutomaton(unmappedWildcardPattern.toArray(new String[unmappedWildcardPattern.size()])), + AUTOMATON_MAX_DETERMINIZED_STATES ); } - return new FieldFetcher(fieldContexts, unmappedFieldsFetchAutomaton); + return new FieldFetcher(fieldContexts, unmappedFieldsFetchAutomaton, unmappedConcreteFields); } private final Map fieldContexts; private final CharacterRunAutomaton unmappedFieldsFetchAutomaton; + private final List unmappedConcreteFields; private FieldFetcher( Map fieldContexts, - @Nullable CharacterRunAutomaton unmappedFieldsFetchAutomaton + @Nullable CharacterRunAutomaton unmappedFieldsFetchAutomaton, + @Nullable List unmappedConcreteFields ) { this.fieldContexts = fieldContexts; this.unmappedFieldsFetchAutomaton = unmappedFieldsFetchAutomaton; + this.unmappedConcreteFields = unmappedConcreteFields; } public Map fetch(SourceLookup sourceLookup) throws IOException { @@ -145,51 +161,65 @@ public Map fetch(SourceLookup sourceLookup) throws IOExce documentFields.put(field, new DocumentField(field, parsedValues)); } } - if (this.unmappedFieldsFetchAutomaton != null) { - collectUnmapped(documentFields, sourceLookup.source(), "", 0); - } + collectUnmapped(documentFields, sourceLookup.source(), "", 0); return documentFields; } private void collectUnmapped(Map documentFields, Map source, String parentPath, int lastState) { - for (String key : source.keySet()) { - Object value = source.get(key); - String currentPath = parentPath + key; - if (this.fieldContexts.containsKey(currentPath)) { - continue; - } - int currentState = step(this.unmappedFieldsFetchAutomaton, key, lastState); - if (currentState == -1) { - // current path doesn't match any fields pattern - continue; - } - if (value instanceof Map) { - // one step deeper into source tree - collectUnmapped( - documentFields, - (Map) value, - currentPath + ".", - step(this.unmappedFieldsFetchAutomaton, ".", currentState) - ); - } else if (value instanceof List) { - // iterate through list values - collectUnmappedList(documentFields, (List) value, currentPath, currentState); - } else { - // we have a leaf value - if (this.unmappedFieldsFetchAutomaton.isAccept(currentState)) { - if (value != null) { - DocumentField currentEntry = documentFields.get(currentPath); - if (currentEntry == null) { - List list = new ArrayList<>(); - list.add(value); - documentFields.put(currentPath, new DocumentField(currentPath, list)); - } else { - currentEntry.getValues().add(value); + // lookup field patterns containing wildcards + if (this.unmappedFieldsFetchAutomaton != null) { + for (String key : source.keySet()) { + Object value = source.get(key); + String currentPath = parentPath + key; + if (this.fieldContexts.containsKey(currentPath)) { + continue; + } + int currentState = step(this.unmappedFieldsFetchAutomaton, key, lastState); + if (currentState == -1) { + // current path doesn't match any fields pattern + continue; + } + if (value instanceof Map) { + // one step deeper into source tree + collectUnmapped( + documentFields, + (Map) value, + currentPath + ".", + step(this.unmappedFieldsFetchAutomaton, ".", currentState) + ); + } else if (value instanceof List) { + // iterate through list values + collectUnmappedList(documentFields, (List) value, currentPath, currentState); + } else { + // we have a leaf value + if (this.unmappedFieldsFetchAutomaton.isAccept(currentState)) { + if (value != null) { + DocumentField currentEntry = documentFields.get(currentPath); + if (currentEntry == null) { + List list = new ArrayList<>(); + list.add(value); + documentFields.put(currentPath, new DocumentField(currentPath, list)); + } else { + currentEntry.getValues().add(value); + } } } } } } + + // lookup concrete fields + if (this.unmappedConcreteFields != null) { + for (String path : unmappedConcreteFields) { + if (this.fieldContexts.containsKey(path)) { + continue; // this is actually a mapped field + } + List values = XContentMapValues.extractRawValues(path, source); + if (values.isEmpty() == false) { + documentFields.put(path, new DocumentField(path, values)); + } + } + } } private void collectUnmappedList(Map documentFields, Iterable iterable, String parentPath, int lastState) { diff --git a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java index 59f99b5cd12d8..d4185c81d6c64 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.search.fetch.subphase; +import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.Strings; @@ -844,6 +845,23 @@ public void testLastFormatWins() throws IOException { assertThat(fields.get("date_field").getValues().get(1), equalTo("12")); } + /** + * Field patterns retrieved with "include_unmapped" use an automaton with a maximal allowed size internally. + * This test checks we have a bound in place to avoid misuse of this with exceptionally large field patterns + */ + public void testTooManyUnmappedFieldWildcardPattern() throws IOException { + MapperService mapperService = createMapperService(); + + XContentBuilder source = XContentFactory.jsonBuilder().startObject().field("a", "foo").endObject(); + + List fieldAndFormatList = new ArrayList<>(); + boolean includeUnmapped = true; + for (int i = 0; i < 1000; i++) { + fieldAndFormatList.add(new FieldAndFormat(randomAlphaOfLength(150) + "*", null, includeUnmapped)); + } + expectThrows(TooComplexToDeterminizeException.class, () -> fetchFields(mapperService, source, fieldAndFormatList)); + } + private List fieldAndFormatList(String name, String format, boolean includeUnmapped) { return Collections.singletonList(new FieldAndFormat(name, format, includeUnmapped)); }