Skip to content

Commit

Permalink
Improve lookup for "include_unmapped" field pattern (#69984)
Browse files Browse the repository at this point in the history
Currently we use a CharacterRunAutomaton build from all field pattern that have
the "include_unmapped" option set. This can lead to unnecessarily large automata
for cases where the user unessesarily list all known fields and adds the
"include_unmapped" option. We only really need the automaton for pattern that
contain wildcards and can look up any other field path directly and only need to
do so if the field path isn't indeed mapped.

Relates to #69983
  • Loading branch information
Christoph Büscher committed Mar 16, 2021
1 parent 62f9b3e commit 9750c8f
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.document.DocumentField;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NestedValueFetcher;
import org.elasticsearch.index.mapper.ObjectMapper;
Expand All @@ -38,6 +39,11 @@
*/
public class FieldFetcher {

/**
* Default maximum number of states in the automaton that looks up unmapped fields.
*/
private static final int AUTOMATON_MAX_DETERMINIZED_STATES = 100000;

public static FieldFetcher create(SearchExecutionContext context,
Collection<FieldAndFormat> fieldAndFormats) {
Set<String> nestedMappingPaths = context.hasNested()
Expand Down Expand Up @@ -115,23 +121,33 @@ private static FieldFetcher create(SearchExecutionContext context,
}

CharacterRunAutomaton unmappedFieldsFetchAutomaton = null;
if (unmappedFetchPattern.isEmpty() == false) {
// We separate the "include_unmapped" field patters with wildcards from the rest in order to use less
// space in the lookup automaton
Map<Boolean, List<String>> partitions = unmappedFetchPattern.stream()
.collect(Collectors.partitioningBy((s -> Regex.isSimpleMatchPattern(s))));
List<String> unmappedWildcardPattern = partitions.get(true);
List<String> unmappedConcreteFields = partitions.get(false);
if (unmappedWildcardPattern.isEmpty() == false) {
unmappedFieldsFetchAutomaton = new CharacterRunAutomaton(
Regex.simpleMatchToAutomaton(unmappedFetchPattern.toArray(new String[unmappedFetchPattern.size()]))
Regex.simpleMatchToAutomaton(unmappedWildcardPattern.toArray(new String[unmappedWildcardPattern.size()])),
AUTOMATON_MAX_DETERMINIZED_STATES
);
}
return new FieldFetcher(fieldContexts, unmappedFieldsFetchAutomaton);
return new FieldFetcher(fieldContexts, unmappedFieldsFetchAutomaton, unmappedConcreteFields);
}

private final Map<String, FieldContext> fieldContexts;
private final CharacterRunAutomaton unmappedFieldsFetchAutomaton;
private final List<String> unmappedConcreteFields;

private FieldFetcher(
Map<String, FieldContext> fieldContexts,
@Nullable CharacterRunAutomaton unmappedFieldsFetchAutomaton
@Nullable CharacterRunAutomaton unmappedFieldsFetchAutomaton,
@Nullable List<String> unmappedConcreteFields
) {
this.fieldContexts = fieldContexts;
this.unmappedFieldsFetchAutomaton = unmappedFieldsFetchAutomaton;
this.unmappedConcreteFields = unmappedConcreteFields;
}

public Map<String, DocumentField> fetch(SourceLookup sourceLookup) throws IOException {
Expand All @@ -145,51 +161,65 @@ public Map<String, DocumentField> fetch(SourceLookup sourceLookup) throws IOExce
documentFields.put(field, new DocumentField(field, parsedValues));
}
}
if (this.unmappedFieldsFetchAutomaton != null) {
collectUnmapped(documentFields, sourceLookup.source(), "", 0);
}
collectUnmapped(documentFields, sourceLookup.source(), "", 0);
return documentFields;
}

private void collectUnmapped(Map<String, DocumentField> documentFields, Map<String, Object> source, String parentPath, int lastState) {
for (String key : source.keySet()) {
Object value = source.get(key);
String currentPath = parentPath + key;
if (this.fieldContexts.containsKey(currentPath)) {
continue;
}
int currentState = step(this.unmappedFieldsFetchAutomaton, key, lastState);
if (currentState == -1) {
// current path doesn't match any fields pattern
continue;
}
if (value instanceof Map) {
// one step deeper into source tree
collectUnmapped(
documentFields,
(Map<String, Object>) value,
currentPath + ".",
step(this.unmappedFieldsFetchAutomaton, ".", currentState)
);
} else if (value instanceof List) {
// iterate through list values
collectUnmappedList(documentFields, (List<?>) value, currentPath, currentState);
} else {
// we have a leaf value
if (this.unmappedFieldsFetchAutomaton.isAccept(currentState)) {
if (value != null) {
DocumentField currentEntry = documentFields.get(currentPath);
if (currentEntry == null) {
List<Object> list = new ArrayList<>();
list.add(value);
documentFields.put(currentPath, new DocumentField(currentPath, list));
} else {
currentEntry.getValues().add(value);
// lookup field patterns containing wildcards
if (this.unmappedFieldsFetchAutomaton != null) {
for (String key : source.keySet()) {
Object value = source.get(key);
String currentPath = parentPath + key;
if (this.fieldContexts.containsKey(currentPath)) {
continue;
}
int currentState = step(this.unmappedFieldsFetchAutomaton, key, lastState);
if (currentState == -1) {
// current path doesn't match any fields pattern
continue;
}
if (value instanceof Map) {
// one step deeper into source tree
collectUnmapped(
documentFields,
(Map<String, Object>) value,
currentPath + ".",
step(this.unmappedFieldsFetchAutomaton, ".", currentState)
);
} else if (value instanceof List) {
// iterate through list values
collectUnmappedList(documentFields, (List<?>) value, currentPath, currentState);
} else {
// we have a leaf value
if (this.unmappedFieldsFetchAutomaton.isAccept(currentState)) {
if (value != null) {
DocumentField currentEntry = documentFields.get(currentPath);
if (currentEntry == null) {
List<Object> list = new ArrayList<>();
list.add(value);
documentFields.put(currentPath, new DocumentField(currentPath, list));
} else {
currentEntry.getValues().add(value);
}
}
}
}
}
}

// lookup concrete fields
if (this.unmappedConcreteFields != null) {
for (String path : unmappedConcreteFields) {
if (this.fieldContexts.containsKey(path)) {
continue; // this is actually a mapped field
}
List<Object> values = XContentMapValues.extractRawValues(path, source);
if (values.isEmpty() == false) {
documentFields.put(path, new DocumentField(path, values));
}
}
}
}

private void collectUnmappedList(Map<String, DocumentField> documentFields, Iterable<?> iterable, String parentPath, int lastState) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.elasticsearch.search.fetch.subphase;

import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.Strings;
Expand Down Expand Up @@ -844,6 +845,23 @@ public void testLastFormatWins() throws IOException {
assertThat(fields.get("date_field").getValues().get(1), equalTo("12"));
}

/**
* Field patterns retrieved with "include_unmapped" use an automaton with a maximal allowed size internally.
* This test checks we have a bound in place to avoid misuse of this with exceptionally large field patterns
*/
public void testTooManyUnmappedFieldWildcardPattern() throws IOException {
MapperService mapperService = createMapperService();

XContentBuilder source = XContentFactory.jsonBuilder().startObject().field("a", "foo").endObject();

List<FieldAndFormat> fieldAndFormatList = new ArrayList<>();
boolean includeUnmapped = true;
for (int i = 0; i < 1000; i++) {
fieldAndFormatList.add(new FieldAndFormat(randomAlphaOfLength(150) + "*", null, includeUnmapped));
}
expectThrows(TooComplexToDeterminizeException.class, () -> fetchFields(mapperService, source, fieldAndFormatList));
}

private List<FieldAndFormat> fieldAndFormatList(String name, String format, boolean includeUnmapped) {
return Collections.singletonList(new FieldAndFormat(name, format, includeUnmapped));
}
Expand Down

0 comments on commit 9750c8f

Please sign in to comment.