Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix sorting and ambiguous join issue #1127

Merged
merged 14 commits into from
Jan 10, 2020
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions elide-core/src/main/java/com/yahoo/elide/utils/JoinTrieNode.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* Copyright 2019, Yahoo Inc.
* Licensed under the Apache License, Version 2.0
* See LICENSE file in project root for terms.
*/
package com.yahoo.elide.utils;

import com.yahoo.elide.core.EntityDictionary;
import com.yahoo.elide.core.Path;

import javafx.util.Pair;
import lombok.Data;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.function.BiFunction;

/**
* This is a structure for storing and de-duplicating elide join paths.
* Basically, it is a Trie which uses relationship field names to navigate through the path.
*/
@Data
public class JoinTrieNode {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add a unit test class (JoinTrieNodeTest) to get test coverage to a high percentage.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

private final Class<?> type;
private final Map<String, JoinTrieNode> fields = new HashMap<>();

public JoinTrieNode(Class<?> type) {
this.type = type;
}

public void addPaths(Set<Path> paths, EntityDictionary dictionary) {
paths.forEach(path -> addPath(path, dictionary));
}

/**
* Add all path elements into this Trie, starting from the root.
*
* @param path full Elide join path, i.e. <code>foo.bar.baz</code>
* @param dictionary dictionary to use.
*/
public void addPath(Path path, EntityDictionary dictionary) {
JoinTrieNode node = this;

for (Path.PathElement pathElement : path.getPathElements()) {
Class<?> entityClass = pathElement.getType();
String fieldName = pathElement.getFieldName();

if (!dictionary.isRelation(entityClass, fieldName)) {
break;
}

if (!fields.containsKey(fieldName)) {
node.addField(fieldName, new JoinTrieNode(pathElement.getFieldType()));

}

node = fields.get(fieldName);
}
}

/**
* Attach a field to this node.
*
* @param fieldName field name
* @param node field node
*/
private void addField(String fieldName, JoinTrieNode node) {
fields.put(fieldName, node);
}

/**
* Traverse this Trie and project the result into a list in level-first-order.
* This previous result-node pair would be carried through the traversal.
*
* @param generator function that generate new results from previous result-node pair and new trie field
* @param traverser function that carry previous result for next level traversal
* @param identity initial result value
* @param <T> type of each individual result
* @return resulted projected in a list in level-first-order.
*/
public <T> List<T> levelOrderedTraverse(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this function should take no functional arguments, and instead just return a list or a stream of Paths.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My opinion is that if we already have a list of Paths, then we don't need to use this Trie.
The purpose of this Trie is to deduplicate common path elements between multiple lists of Paths. So once it is deduplicated, the method can just provide function interfaces for Trie to traverse itself.

BiFunction<Pair<JoinTrieNode, T>, Map.Entry<String, JoinTrieNode>, T> generator,
BiFunction<Pair<JoinTrieNode, T>, Map.Entry<String, JoinTrieNode>, T> traverser,
T identity
) {
// node-result pairs queue
Queue<Pair<JoinTrieNode, T>> todo = new ArrayDeque<>();

todo.add(new Pair<>(this, identity));
List<T> results = new ArrayList<>();

while (!todo.isEmpty()) {
Pair<JoinTrieNode, T> parentResult = todo.remove();

parentResult.getKey().getFields().entrySet().forEach(childField -> {
results.add(generator.apply(parentResult, childField));

if (childField.getValue().getFields().size() > 0) {
todo.add(new Pair<>(childField.getValue(), traverser.apply(parentResult, childField)));
}
});
}

return results;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright 2015, Yahoo Inc.
* Licensed under the Apache License, Version 2.0
* See LICENSE file in project root for terms.
*/
package com.yahoo.elide.utils;

import static org.junit.jupiter.api.Assertions.assertEquals;

import com.yahoo.elide.core.EntityDictionary;
import com.yahoo.elide.core.Path;

import example.Book;
import example.Editor;
import example.Publisher;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class JoinTrieNodeTest {
private static EntityDictionary dictionary;

@BeforeAll
public static void init() {
dictionary = new EntityDictionary(new HashMap<>());
dictionary.bindEntity(Book.class);
dictionary.bindEntity(Publisher.class);
dictionary.bindEntity(Editor.class);
}

@Test
public void testAddPath() {
Path path = new Path(Book.class, dictionary, "publisher.editor.id");
JoinTrieNode node = new JoinTrieNode(Book.class);
node.addPath(path, dictionary);

Map<String, JoinTrieNode> firstLevel = node.getFields();
assertEquals(1, firstLevel.size());
assertEquals(Publisher.class, firstLevel.get("publisher").getType());

Map<String, JoinTrieNode> secondLevel = firstLevel.get("publisher").getFields();
assertEquals(1, secondLevel.size());
assertEquals(Editor.class, secondLevel.get("editor").getType());

Map<String, JoinTrieNode> thirdLevel = secondLevel.get("editor").getFields();
assertEquals(0, thirdLevel.size());
}

@Test
public void testTraversal() {
Path path = new Path(Book.class, dictionary, "publisher.editor.id");
JoinTrieNode node = new JoinTrieNode(Book.class);
node.addPath(path, dictionary);

List<String> results = node.levelOrderedTraverse(
(parentResult, childEntry) -> parentResult.getValue() + "." + childEntry.getKey(),
(parentResult, childEntry) -> parentResult.getValue() + "." + childEntry.getKey(),
"book"
);

assertEquals(2, results.size());
assertEquals("book.publisher", results.get(0));
assertEquals("book.publisher.editor", results.get(1));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ public void close() throws IOException {
@VisibleForTesting
Query buildQuery(EntityProjection entityProjection, RequestScope scope) {
Table table = queryEngine.getTable(entityProjection.getType());

AggregationDataStoreHelper agHelper = new AggregationDataStoreHelper(table, entityProjection);
return agHelper.getQuery();
EntityProjectionTranslator translator = new EntityProjectionTranslator(table,
entityProjection, scope.getDictionary());
return translator.getQuery();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,9 @@
*/
package com.yahoo.elide.datastores.aggregation;

import com.yahoo.elide.core.Path;
import com.yahoo.elide.core.EntityDictionary;
import com.yahoo.elide.core.exceptions.InvalidOperationException;
import com.yahoo.elide.core.filter.FilterPredicate;
import com.yahoo.elide.core.filter.expression.AndFilterExpression;
import com.yahoo.elide.core.filter.expression.FilterExpression;
import com.yahoo.elide.core.filter.expression.NotFilterExpression;
import com.yahoo.elide.core.filter.expression.OrFilterExpression;
import com.yahoo.elide.datastores.aggregation.filter.visitor.FilterConstraints;
import com.yahoo.elide.datastores.aggregation.filter.visitor.SplitFilterExpressionVisitor;
import com.yahoo.elide.datastores.aggregation.metadata.metric.MetricFunctionInvocation;
Expand All @@ -24,6 +20,7 @@
import com.yahoo.elide.datastores.aggregation.query.Query;
import com.yahoo.elide.datastores.aggregation.query.TimeDimensionProjection;
import com.yahoo.elide.request.Argument;
import com.yahoo.elide.request.Attribute;
import com.yahoo.elide.request.EntityProjection;
import com.yahoo.elide.request.Relationship;

Expand All @@ -39,27 +36,28 @@
/**
* Helper for Aggregation Data Store which does the work associated with extracting {@link Query}.
*/
public class AggregationDataStoreHelper {
public class EntityProjectionTranslator {
private AnalyticView queriedTable;

private EntityProjection entityProjection;
private Set<ColumnProjection> dimensionProjections;
private Set<TimeDimensionProjection> timeDimensions;
private List<MetricFunctionInvocation> metrics;
private FilterExpression whereFilter;
private FilterExpression havingFilter;
private EntityDictionary dictionary;

public AggregationDataStoreHelper(Table table, EntityProjection entityProjection) {
public EntityProjectionTranslator(Table table, EntityProjection entityProjection, EntityDictionary dictionary) {
if (!(table instanceof AnalyticView)) {
throw new InvalidOperationException("Queried table is not analyticView: " + table.getName());
}

this.queriedTable = (AnalyticView) table;
this.entityProjection = entityProjection;

this.dictionary = dictionary;
dimensionProjections = resolveNonTimeDimensions();
timeDimensions = resolveTimeDimensions();
metrics = resolveMetrics();

splitFilters();
}

Expand All @@ -68,7 +66,7 @@ public AggregationDataStoreHelper(Table table, EntityProjection entityProjection
* @return {@link Query} query object with all the parameters provided by user.
*/
public Query getQuery() {
return Query.builder()
Query query = Query.builder()
.analyticView(queriedTable)
.metrics(metrics)
.groupByDimensions(dimensionProjections)
Expand All @@ -78,6 +76,9 @@ public Query getQuery() {
.sorting(entityProjection.getSorting())
.pagination(entityProjection.getPagination())
.build();
QueryValidator validator = new QueryValidator(query, getAllFields(), dictionary);
validator.validate();
return query;
}

/**
Expand All @@ -94,10 +95,6 @@ private void splitFilters() {
FilterConstraints constraints = filterExpression.accept(visitor);
whereFilter = constraints.getWhereExpression();
havingFilter = constraints.getHavingExpression();

if (havingFilter != null) {
validateHavingClause(havingFilter);
}
}

/**
Expand All @@ -117,26 +114,28 @@ private Set<TimeDimensionProjection> resolveTimeDimensions() {
.findAny()
.orElse(null);

TimeDimensionGrain grain;
TimeDimensionGrain resolvedGrain;
if (grainArgument == null) {
//The first grain is the default.
grain = timeDim.getSupportedGrains().stream()
resolvedGrain = timeDim.getSupportedGrains().stream()
.findFirst()
.orElseThrow(() -> new IllegalStateException(
String.format("Requested default grain, no grain defined on %s",
timeDimAttr.getName())));
} else {
String requestedGrainName = grainArgument.getValue().toString();

grain = timeDim.getSupportedGrains().stream()
.filter(g ->
g.getGrain().name().toLowerCase(Locale.ENGLISH).equals(requestedGrainName))
resolvedGrain = timeDim.getSupportedGrains().stream()
.filter(supportedGrain -> supportedGrain.getGrain().name().toLowerCase(Locale.ENGLISH)
.equals(requestedGrainName))
.findFirst()
.orElseThrow(() -> new InvalidOperationException(
String.format("Invalid grain %s", requestedGrainName)));
String.format("Unsupported grain %s for field %s",
requestedGrainName,
timeDimAttr.getName())));
}

return ColumnProjection.toProjection(timeDim, grain.getGrain(), timeDimAttr.getAlias());
return ColumnProjection.toProjection(timeDim, resolvedGrain.getGrain(), timeDimAttr.getAlias());
})
.collect(Collectors.toCollection(LinkedHashSet::new));
}
Expand Down Expand Up @@ -187,55 +186,21 @@ private Set<String> getRelationships() {
}

/**
* Validate the having clause before execution. Having clause is not as flexible as where clause,
* the fields in having clause must be either or these two:
* 1. A grouped by dimension in this query
* 2. An aggregated metric in this query
*
* All grouped by dimensions are defined in the entity bean, so the last entity class of a filter path
* must match entity class of the query.
*
* @param havingClause having clause generated from this query
* Gets attribute names from {@link EntityProjection}.
* @return relationships list of {@link Attribute} names
*/
private void validateHavingClause(FilterExpression havingClause) {
// TODO: support having clause for alias
if (havingClause instanceof FilterPredicate) {
Path path = ((FilterPredicate) havingClause).getPath();
Path.PathElement last = path.lastElement().get();
Class<?> cls = last.getType();
String fieldName = last.getFieldName();

if (cls != queriedTable.getCls()) {
throw new InvalidOperationException(
String.format(
"Classes don't match when try filtering on %s in having clause of %s.",
cls.getSimpleName(),
queriedTable.getCls().getSimpleName()));
}

if (queriedTable.isMetric(fieldName)) {
if (metrics.stream().noneMatch(m -> m.getAlias().equals(fieldName))) {
throw new InvalidOperationException(
String.format(
"Metric field %s must be aggregated before filtering in having clause.",
fieldName));
}
} else {
if (dimensionProjections.stream().noneMatch(dim -> dim.getAlias().equals(fieldName))) {
throw new InvalidOperationException(
String.format(
"Dimension field %s must be grouped before filtering in having clause.",
fieldName));
}
}
} else if (havingClause instanceof AndFilterExpression) {
validateHavingClause(((AndFilterExpression) havingClause).getLeft());
validateHavingClause(((AndFilterExpression) havingClause).getRight());
} else if (havingClause instanceof OrFilterExpression) {
validateHavingClause(((OrFilterExpression) havingClause).getLeft());
validateHavingClause(((OrFilterExpression) havingClause).getRight());
} else if (havingClause instanceof NotFilterExpression) {
validateHavingClause(((NotFilterExpression) havingClause).getNegated());
}
private Set<String> getAttributes() {
return entityProjection.getAttributes().stream()
.map(Attribute::getName).collect(Collectors.toCollection(LinkedHashSet::new));
}

/**
* Helper method to get all field names from the {@link EntityProjection}.
* @return allFields set of all field names
*/
private Set<String> getAllFields() {
Set<String> allFields = getAttributes();
allFields.addAll(getRelationships());
return allFields;
}
}
Loading