Skip to content

Commit

Permalink
[7.x][ML] Exclude nested fields in data frame analytics (elastic#71400)…
Browse files Browse the repository at this point in the history
… (elastic#71415)

Previously, the destination index was sorted which meant it could
not have `nested` fields. Since this has changed, `nested` fields
may be present. These were handled incorrectly as the _explain API
would report that they can be included in the analysis while
that is not the case.

This commit fixes this issue by detecting `nested` fields and children
of those `nested` fields and excluding them from the analysis. A
`nested` field may contain multiple inner fields. To avoid the noise
in the API response, we collapse them into a single entry with the
path to the top level nested field.

Backport of elastic#71400
  • Loading branch information
dimitris-athanasiou authored Apr 7, 2021
1 parent 754cda8 commit 955bd8d
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;
Expand All @@ -65,6 +66,7 @@ public class ExtractedFieldsDetector {
private final int docValueFieldsLimit;
private final FieldCapabilitiesResponse fieldCapabilitiesResponse;
private final Map<String, Long> cardinalitiesForFieldsWithConstraints;
private final List<String> topNestedFieldPrefixes;

ExtractedFieldsDetector(DataFrameAnalyticsConfig config,
int docValueFieldsLimit,
Expand All @@ -74,6 +76,26 @@ public class ExtractedFieldsDetector {
this.docValueFieldsLimit = docValueFieldsLimit;
this.fieldCapabilitiesResponse = Objects.requireNonNull(fieldCapabilitiesResponse);
this.cardinalitiesForFieldsWithConstraints = Objects.requireNonNull(cardinalitiesForFieldsWithConstraints);
this.topNestedFieldPrefixes = findTopNestedFieldPrefixes(fieldCapabilitiesResponse);
}

private List<String> findTopNestedFieldPrefixes(FieldCapabilitiesResponse fieldCapabilitiesResponse) {
List<String> sortedNestedFieldPrefixes = fieldCapabilitiesResponse.get().keySet().stream()
.filter(field -> isNested(getMappingTypes(field)))
.map(field -> field + ".")
.sorted()
.collect(Collectors.toList());
Iterator<String> iterator = sortedNestedFieldPrefixes.iterator();
String previousNestedFieldPrefix = null;
while (iterator.hasNext()) {
String nestedFieldPrefix = iterator.next();
if (previousNestedFieldPrefix != null && nestedFieldPrefix.startsWith(previousNestedFieldPrefix)) {
iterator.remove();
} else {
previousNestedFieldPrefix = nestedFieldPrefix;
}
}
return Collections.unmodifiableList(sortedNestedFieldPrefixes);
}

public Tuple<ExtractedFields, List<FieldSelection>> detect() {
Expand Down Expand Up @@ -139,7 +161,14 @@ private void validateFieldsRequireForProcessors(Set<String> processorFields) {
}
removeObjects(fieldsForProcessor);
if (fieldsForProcessor.size() < processorFields.size()) {
throw ExceptionsHelper.badRequestException("fields for feature_processors must not be objects");
throw ExceptionsHelper.badRequestException("fields for feature_processors must not be objects or nested");
}
for (String field : fieldsForProcessor) {
Optional<String> matchingNestedFieldPattern = findMatchingNestedFieldPattern(field);
if (matchingNestedFieldPattern.isPresent()) {
throw ExceptionsHelper.badRequestException("nested fields [{}] cannot be used in a feature_processor",
matchingNestedFieldPattern.get());
}
}
Collection<String> errorFields = new ArrayList<>();
for (String fieldName : fieldsForProcessor) {
Expand Down Expand Up @@ -190,7 +219,7 @@ private void removeObjects(Set<String> fields) {
while (fieldsIterator.hasNext()) {
String field = fieldsIterator.next();
Set<String> types = getMappingTypes(field);
if (isObject(types)) {
if (isObject(types) || isNested(types)) {
fieldsIterator.remove();
}
}
Expand All @@ -210,6 +239,11 @@ private void addExcludedField(String field, String reason, Set<FieldSelection> f
fieldSelection.add(FieldSelection.excluded(field, getMappingTypes(field), reason));
}

private void addExcludedNestedPattern(String pattern, Set<FieldSelection> fieldSelection) {
fieldSelection.add(FieldSelection.excluded(
pattern, Collections.singleton(ObjectMapper.NESTED_CONTENT_TYPE), "nested fields are not supported"));
}

private Set<String> getMappingTypes(String field) {
Map<String, FieldCapabilities> fieldCaps = fieldCapabilitiesResponse.getField(field);
return fieldCaps == null ? Collections.emptySet() : fieldCaps.keySet();
Expand All @@ -223,6 +257,11 @@ private void removeFieldsWithIncompatibleTypes(Set<String> fields, Set<FieldSele
addExcludedField(field, "unsupported type; supported types are " + getSupportedTypes(), fieldSelection);
fieldsIterator.remove();
}
Optional<String> matchingNestedFieldPattern = findMatchingNestedFieldPattern(field);
if (matchingNestedFieldPattern.isPresent()) {
addExcludedNestedPattern(matchingNestedFieldPattern.get(), fieldSelection);
fieldsIterator.remove();
}
}
}

Expand Down Expand Up @@ -257,6 +296,10 @@ private Set<String> getSupportedTypes() {
return supportedTypes;
}

private Optional<String> findMatchingNestedFieldPattern(String field) {
return topNestedFieldPrefixes.stream().filter(prefix -> field.startsWith(prefix)).map(prefix -> prefix + "*").findFirst();
}

private void includeAndExcludeFields(Set<String> fields, Set<FieldSelection> fieldSelection) {
FetchSourceContext analyzedFields = config.getAnalyzedFields();
if (analyzedFields == null) {
Expand Down Expand Up @@ -294,10 +337,10 @@ private void includeAndExcludeFields(Set<String> fields, Set<FieldSelection> fie

private void checkIncludesExcludesAreNotObjects(FetchSourceContext analyzedFields) {
List<String> objectFields = Stream.concat(Arrays.stream(analyzedFields.includes()), Arrays.stream(analyzedFields.excludes()))
.filter(field -> isObject(getMappingTypes(field)))
.filter(field -> isObject(getMappingTypes(field)) || isNested(getMappingTypes(field)))
.collect(Collectors.toList());
if (objectFields.isEmpty() == false) {
throw ExceptionsHelper.badRequestException("{} must not include or exclude object fields: {}",
throw ExceptionsHelper.badRequestException("{} must not include or exclude object or nested fields: {}",
DataFrameAnalyticsConfig.ANALYZED_FIELDS.getPreferredName(), objectFields);
}
}
Expand All @@ -317,10 +360,15 @@ private void applyIncludesExcludes(Set<String> fields, Set<String> includes, Set
}
} else {
fieldsIterator.remove();
if (hasCompatibleType(field)) {
addExcludedField(field, "field not in includes list", fieldSelection);
} else {
if (hasCompatibleType(field) == false) {
addExcludedField(field, "unsupported type; supported types are " + getSupportedTypes(), fieldSelection);
} else {
Optional<String> matchingNestedFieldPattern = findMatchingNestedFieldPattern(field);
if (matchingNestedFieldPattern.isPresent()) {
addExcludedNestedPattern(matchingNestedFieldPattern.get(), fieldSelection);
} else {
addExcludedField(field, "field not in includes list", fieldSelection);
}
}
}
}
Expand All @@ -337,6 +385,10 @@ private void checkFieldsHaveCompatibleTypes(Set<String> fields) {
throw ExceptionsHelper.badRequestException("field [{}] has unsupported type {}. Supported types are {}.", field,
fieldCaps.keySet(), getSupportedTypes());
}
Optional<String> matchingNestedFieldPattern = findMatchingNestedFieldPattern(field);
if (matchingNestedFieldPattern.isPresent()) {
throw ExceptionsHelper.badRequestException("nested fields [{}] are not supported", matchingNestedFieldPattern.get());
}
}
}

Expand Down Expand Up @@ -601,7 +653,11 @@ private static boolean isBoolean(Set<String> types) {
return types.size() == 1 && types.contains(BooleanFieldMapper.CONTENT_TYPE);
}

private boolean isObject(Set<String> types) {
private static boolean isObject(Set<String> types) {
return types.size() == 1 && types.contains(ObjectMapper.CONTENT_TYPE);
}

private static boolean isNested(Set<String> types) {
return types.size() == 1 && types.contains(ObjectMapper.NESTED_CONTENT_TYPE);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,16 @@ private boolean isMultiField(String field, String parent) {
return false;
}
Map<String, FieldCapabilities> parentFieldCaps = fieldsCapabilities.getField(parent);
if (parentFieldCaps == null || (parentFieldCaps.size() == 1 && parentFieldCaps.containsKey("object"))) {
// We check if the parent is an object which is indicated by field caps containing an "object" entry.
// If an object, it's not a multi field
if (parentFieldCaps == null || (parentFieldCaps.size() == 1 && isNestedOrObject(parentFieldCaps))) {
// We check if the parent is an object or nested field. If so, it's not a multi field.
return false;
}
return true;
}

private static boolean isNestedOrObject(Map<String, FieldCapabilities> fieldCaps) {
return fieldCaps.containsKey("object") || fieldCaps.containsKey("nested");
}
}

/**
Expand Down
Loading

0 comments on commit 955bd8d

Please sign in to comment.