-
Notifications
You must be signed in to change notification settings - Fork 24.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: Support unmapped fields in search 'fields' option #64651
Changes from 7 commits
37265db
33e1a8b
9f39f91
f50bdc2
c94b191
9d6fd43
976fde1
822fb02
9511279
7ec4341
4d74f2d
4b5232b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,7 +20,9 @@ | |
package org.elasticsearch.search.fetch.subphase; | ||
|
||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.util.automaton.CharacterRunAutomaton; | ||
import org.elasticsearch.common.document.DocumentField; | ||
import org.elasticsearch.common.regex.Regex; | ||
import org.elasticsearch.index.mapper.MappedFieldType; | ||
import org.elasticsearch.index.mapper.ValueFetcher; | ||
import org.elasticsearch.index.query.QueryShardContext; | ||
|
@@ -31,6 +33,7 @@ | |
import java.util.ArrayList; | ||
import java.util.Collection; | ||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
|
@@ -42,12 +45,18 @@ | |
public class FieldFetcher { | ||
public static FieldFetcher create(QueryShardContext context, | ||
SearchLookup searchLookup, | ||
Collection<FieldAndFormat> fieldAndFormats) { | ||
Collection<FieldAndFormat> fieldAndFormats, | ||
boolean includeUnmapped) { | ||
|
||
List<FieldContext> fieldContexts = new ArrayList<>(); | ||
String[] originalPattern = new String[fieldAndFormats.size()]; | ||
Set<String> mappedToExclude = new HashSet<>(); | ||
int i = 0; | ||
|
||
for (FieldAndFormat fieldAndFormat : fieldAndFormats) { | ||
String fieldPattern = fieldAndFormat.field; | ||
originalPattern[i] = fieldAndFormat.field; | ||
i++; | ||
String format = fieldAndFormat.format; | ||
|
||
Collection<String> concreteFields = context.simpleMatchToIndexNames(fieldPattern); | ||
|
@@ -57,17 +66,29 @@ public static FieldFetcher create(QueryShardContext context, | |
continue; | ||
} | ||
ValueFetcher valueFetcher = ft.valueFetcher(context, searchLookup, format); | ||
mappedToExclude.add(field); | ||
fieldContexts.add(new FieldContext(field, valueFetcher)); | ||
} | ||
} | ||
|
||
return new FieldFetcher(fieldContexts); | ||
CharacterRunAutomaton pathAutomaton = new CharacterRunAutomaton(Regex.simpleMatchToAutomaton(originalPattern)); | ||
return new FieldFetcher(fieldContexts, includeUnmapped, pathAutomaton, mappedToExclude); | ||
} | ||
|
||
private final List<FieldContext> fieldContexts; | ||
|
||
private FieldFetcher(List<FieldContext> fieldContexts) { | ||
private final boolean includeUnmapped; | ||
private final CharacterRunAutomaton pathAutomaton; | ||
private final Set<String> mappedToExclude; | ||
|
||
private FieldFetcher( | ||
List<FieldContext> fieldContexts, | ||
boolean includeUnmapped, | ||
CharacterRunAutomaton pathAutomaton, | ||
Set<String> mappedToExclude | ||
) { | ||
this.fieldContexts = fieldContexts; | ||
this.includeUnmapped = includeUnmapped; | ||
this.pathAutomaton = pathAutomaton; | ||
this.mappedToExclude = mappedToExclude; | ||
} | ||
|
||
public Map<String, DocumentField> fetch(SourceLookup sourceLookup, Set<String> ignoredFields) throws IOException { | ||
|
@@ -85,9 +106,70 @@ public Map<String, DocumentField> fetch(SourceLookup sourceLookup, Set<String> i | |
documentFields.put(field, new DocumentField(field, parsedValues)); | ||
} | ||
} | ||
if (includeUnmapped) { | ||
collect(documentFields, sourceLookup.loadSourceIfNeeded(), "", 0); | ||
} | ||
return documentFields; | ||
} | ||
|
||
private void collect(Map<String, DocumentField> documentFields, Map<String, Object> source, String parentPath, int lastState) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This turned out to be a bit complex, I'm not sure my idea to do it all in one pass was a good one :) Maybe we could start by re-using the source filtering logic, optimizing later if we see a performance reason ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After looking at what the source filtering code does I actually find this solution less complicated. We are doing less work here than in the source filtering code where whole sub-trees of the source can be filtered out an the exclusion logic more complicated. These two methods that are pulling out the data from the source map while keeping track of where they are in the source tree via the inclusion pattern automaton are not recursive but not that long, and they are implementational details that we don't need to expose. On the other hand, using the source filtering code that was written for another purpose initially on the other hand might lead to problems when that code needs to be altered in the future. Having our "own" logic here doesn't seem to much overhead to me tbh. |
||
for (String key : source.keySet()) { | ||
Object value = source.get(key); | ||
String currentPath = parentPath + key; | ||
int currentState = step(this.pathAutomaton, key, lastState); | ||
if (currentState == -1) { | ||
// path doesn't match any fields pattern | ||
continue; | ||
} | ||
if (value instanceof Map) { | ||
// one step deeper into source tree | ||
collect(documentFields, (Map<String, Object>) value, currentPath + ".", step(this.pathAutomaton, ".", currentState)); | ||
} else if (value instanceof List) { | ||
// iterate through list values | ||
List<Object> list = collectList(documentFields, (List<?>) value, currentPath, currentState); | ||
if (list.isEmpty() == false) { | ||
documentFields.put(currentPath, new DocumentField(currentPath, list)); | ||
} | ||
} else { | ||
// we have a leaf value | ||
if (this.pathAutomaton.isAccept(currentState) && this.mappedToExclude.contains(currentPath) == false) { | ||
if (value != null) { | ||
DocumentField currentEntry = documentFields.get(currentPath); | ||
if (currentEntry == null) { | ||
List<Object> list = new ArrayList<>(); | ||
list.add(value); | ||
documentFields.put(currentPath, new DocumentField(currentPath, list)); | ||
} else { | ||
currentEntry.getValues().add(value); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
private List<Object> collectList(Map<String, DocumentField> documentFields, Iterable<?> iterable, String parentPath, int lastState) { | ||
List<Object> list = new ArrayList<>(); | ||
for (Object value : iterable) { | ||
if (value instanceof Map) { | ||
collect(documentFields, (Map<String, Object>) value, parentPath + ".", step(this.pathAutomaton, ".", lastState)); | ||
} else if (value instanceof List) { | ||
// weird case, but can happen for objects with "enabled" : "false" | ||
list.add(collectList(documentFields, (List<?>) value, parentPath, lastState)); | ||
} else if (this.pathAutomaton.isAccept(lastState)) { | ||
list.add(value); | ||
} | ||
} | ||
return list; | ||
} | ||
|
||
private static int step(CharacterRunAutomaton automaton, String key, int state) { | ||
for (int i = 0; state != -1 && i < key.length(); ++i) { | ||
state = automaton.step(state, key.charAt(i)); | ||
} | ||
return state; | ||
} | ||
|
||
public void setNextReader(LeafReaderContext readerContext) { | ||
for (FieldContext field : fieldContexts) { | ||
field.valueFetcher.setNextReader(readerContext); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Something to think about: if we end up making this configurable through a flag like
include_unmapped
, we could introduce a top-level parameter as we do here, or instead support the flag alongside each field pattern (so it'd be part ofFieldAndFormat
).