Skip to content

Commit

Permalink
[ML] verify that there are no duplicate leaf fields in aggs (#41895)
Browse files Browse the repository at this point in the history
* [ML] verify that there are no duplicate leaf fields in aggs

* addressing pr comments

* addressing PR comments

* optmizing duplication check
  • Loading branch information
benwtrent authored May 9, 2019
1 parent 3911770 commit 0531987
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import java.io.IOException;
import java.util.Objects;

import static org.elasticsearch.action.ValidateActions.addValidationError;

public class PutDataFrameTransformAction extends Action<AcknowledgedResponse> {

public static final PutDataFrameTransformAction INSTANCE = new PutDataFrameTransformAction();
Expand Down Expand Up @@ -53,7 +55,11 @@ public static Request fromXContent(final XContentParser parser, final String id)

@Override
public ActionRequestValidationException validate() {
return null;
ActionRequestValidationException validationException = null;
for(String failure : config.getPivotConfig().aggFieldValidation()) {
validationException = addValidationError(failure, validationException);
}
return validationException;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,17 @@
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.PipelineAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.composite.CompositeAggregationBuilder;
import org.elasticsearch.xpack.core.dataframe.DataFrameField;
import org.elasticsearch.xpack.core.dataframe.utils.ExceptionsHelper;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map.Entry;
import java.util.Objects;

Expand Down Expand Up @@ -141,7 +147,63 @@ public boolean isValid() {
return groups.isValid() && aggregationConfig.isValid();
}

public List<String> aggFieldValidation() {
if ((aggregationConfig.isValid() && groups.isValid()) == false) {
return Collections.emptyList();
}
List<String> usedNames = new ArrayList<>();
// TODO this will need to change once we allow multi-bucket aggs + field merging
aggregationConfig.getAggregatorFactories().forEach(agg -> addAggNames(agg, usedNames));
aggregationConfig.getPipelineAggregatorFactories().forEach(agg -> addAggNames(agg, usedNames));
usedNames.addAll(groups.getGroups().keySet());
return aggFieldValidation(usedNames);
}

public static PivotConfig fromXContent(final XContentParser parser, boolean lenient) throws IOException {
return lenient ? LENIENT_PARSER.apply(parser, null) : STRICT_PARSER.apply(parser, null);
}

/**
* Does the following checks:
*
* - determines if there are any full duplicate names between the aggregation names and the group by names.
* - finds if there are conflicting name paths that could cause a failure later when the config is started.
*
* Examples showing conflicting field name paths:
*
* aggName1: foo.bar.baz
* aggName2: foo.bar
*
* This should fail as aggName1 will cause foo.bar to be an object, causing a conflict with the use of foo.bar in aggName2.
* @param usedNames The aggregation and group_by names
* @return List of validation failure messages
*/
static List<String> aggFieldValidation(List<String> usedNames) {
if (usedNames == null || usedNames.isEmpty()) {
return Collections.emptyList();
}
List<String> validationFailures = new ArrayList<>();

usedNames.sort(String::compareTo);
for (int i = 0; i < usedNames.size() - 1; i++) {
if (usedNames.get(i+1).startsWith(usedNames.get(i) + ".")) {
validationFailures.add("field [" + usedNames.get(i) + "] cannot be both an object and a field");
}
if (usedNames.get(i+1).equals(usedNames.get(i))) {
validationFailures.add("duplicate field [" + usedNames.get(i) + "] detected");
}
}
return validationFailures;
}


private static void addAggNames(AggregationBuilder aggregationBuilder, Collection<String> names) {
names.add(aggregationBuilder.getName());
aggregationBuilder.getSubAggregations().forEach(agg -> addAggNames(agg, names));
aggregationBuilder.getPipelineAggregations().forEach(agg -> addAggNames(agg, names));
}

private static void addAggNames(PipelineAggregationBuilder pipelineAggregationBuilder, Collection<String> names) {
names.add(pipelineAggregationBuilder.getName());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,20 @@

package org.elasticsearch.xpack.core.dataframe.transforms.pivot;

import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.Writeable.Reader;
import org.elasticsearch.common.xcontent.DeprecationHandler;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.xpack.core.dataframe.transforms.AbstractSerializingDataFrameTestCase;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;

import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.empty;

public class PivotConfigTests extends AbstractSerializingDataFrameTestCase<PivotConfig> {

Expand Down Expand Up @@ -103,7 +110,7 @@ public void testEmptyGroupBy() throws IOException {
assertFalse(pivotConfig.isValid());
}

public void testMissingGroupBy() throws IOException {
public void testMissingGroupBy() {
String pivot = "{"
+ " \"aggs\": {"
+ " \"avg\": {"
Expand All @@ -114,7 +121,7 @@ public void testMissingGroupBy() throws IOException {
expectThrows(IllegalArgumentException.class, () -> createPivotConfigFromString(pivot, false));
}

public void testDoubleAggs() throws IOException {
public void testDoubleAggs() {
String pivot = "{"
+ " \"group_by\": {"
+ " \"id\": {"
Expand All @@ -136,6 +143,68 @@ public void testDoubleAggs() throws IOException {
expectThrows(IllegalArgumentException.class, () -> createPivotConfigFromString(pivot, false));
}

public void testValidAggNames() throws IOException {
String pivotAggs = "{"
+ " \"group_by\": {"
+ " \"user.id.field\": {"
+ " \"terms\": {"
+ " \"field\": \"id\""
+ "} } },"
+ " \"aggs\": {"
+ " \"avg.field.value\": {"
+ " \"avg\": {"
+ " \"field\": \"points\""
+ "} } } }";
PivotConfig pivotConfig = createPivotConfigFromString(pivotAggs, true);
assertTrue(pivotConfig.isValid());
List<String> fieldValidation = pivotConfig.aggFieldValidation();
assertTrue(fieldValidation.isEmpty());
}

public void testAggNameValidationsWithoutIssues() {
String prefix = randomAlphaOfLength(10) + "1";
String prefix2 = randomAlphaOfLength(10) + "2";
String nestedField1 = randomAlphaOfLength(10) + "3";
String nestedField2 = randomAlphaOfLength(10) + "4";

assertThat(PivotConfig.aggFieldValidation(Arrays.asList(prefix + nestedField1 + nestedField2,
prefix + nestedField1,
prefix,
prefix2)), is(empty()));

assertThat(PivotConfig.aggFieldValidation(
Arrays.asList(
dotJoin(prefix, nestedField1, nestedField2),
dotJoin(nestedField1, nestedField2),
nestedField2,
prefix2)), is(empty()));
}

public void testAggNameValidationsWithDuplicatesAndNestingIssues() {
String prefix = randomAlphaOfLength(10) + "1";
String prefix2 = randomAlphaOfLength(10) + "2";
String nestedField1 = randomAlphaOfLength(10) + "3";
String nestedField2 = randomAlphaOfLength(10) + "4";

List<String> failures = PivotConfig.aggFieldValidation(
Arrays.asList(
dotJoin(prefix, nestedField1, nestedField2),
dotJoin(prefix, nestedField2),
dotJoin(prefix, nestedField1),
dotJoin(prefix2, nestedField1),
dotJoin(prefix2, nestedField1),
prefix2));

assertThat(failures,
containsInAnyOrder("duplicate field [" + dotJoin(prefix2, nestedField1) + "] detected",
"field [" + prefix2 + "] cannot be both an object and a field",
"field [" + dotJoin(prefix, nestedField1) + "] cannot be both an object and a field"));
}

private static String dotJoin(String... fields) {
return Strings.arrayToDelimitedString(fields, ".");
}

private PivotConfig createPivotConfigFromString(String json, boolean lenient) throws IOException {
final XContentParser parser = XContentType.JSON.xContent().createParser(xContentRegistry(),
DeprecationHandler.THROW_UNSUPPORTED_OPERATION, json);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -302,3 +302,35 @@ setup:
"aggs": {"avg_response": {"avg": {"field": "responsetime"}}}
}
}
---
"Test creation failures due to duplicate and conflicting field names":
- do:
catch: /duplicate field \[airline\] detected/
data_frame.put_data_frame_transform:
transform_id: "duplicate-field-transform"
body: >
{
"source": {
"index": "source-index"
},
"dest": { "index": "dest-index" },
"pivot": {
"group_by": { "airline": {"terms": {"field": "airline"}}},
"aggs": {"airline": {"avg": {"field": "responsetime"}}}
}
}
- do:
catch: /field \[airline\] cannot be both an object and a field/
data_frame.put_data_frame_transform:
transform_id: "duplicate-field-transform"
body: >
{
"source": {
"index": "source-index"
},
"dest": { "index": "dest-index" },
"pivot": {
"group_by": { "airline": {"terms": {"field": "airline"}}},
"aggs": {"airline.responsetime": {"avg": {"field": "responsetime"}}}
}
}

0 comments on commit 0531987

Please sign in to comment.