Skip to content

Commit

Permalink
ESQL: Remove aliasing inside Eval
Browse files Browse the repository at this point in the history
Evals that introduce aliased can be simplified by extracting them into a
 project (and thus signaling there's no underlying processing).

The following eval:
  eval x = a + 1, y = x, z = y + 1, y = z, w = y + 1
can be converted into:
  eval x = a + 1, z = a + 1 + 1, w = a + 1 + 1 | project x, z, z as y, w

Fix #100174
  • Loading branch information
costin committed Oct 4, 2023
1 parent 2e86d25 commit c7ccdfc
Show file tree
Hide file tree
Showing 6 changed files with 452 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,16 @@ x:integer | y:integer | x2:integer | y2:integer
4 | 4 | 5 | 6
;

duplicateProjectEval
from employees | eval y = languages, x = languages | keep x, y | eval x2 = x + 1 | eval y2 = y + 2 | limit 3;

x:integer | y:integer | x2:integer | y2:integer
2 | 2 | 3 | 4
5 | 5 | 6 | 7
4 | 4 | 5 | 6
;


renameWithFilterPushedToES
from employees | sort emp_no | rename emp_no as x | keep languages, first_name, last_name, x | where x > 10030 and x < 10040 | limit 5;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,13 @@ protected List<Batch<LogicalPlan>> batches() {
}

protected static List<Batch<LogicalPlan>> rules() {
var substitutions = new Batch<>("Substitutions", Limiter.ONCE, new SubstituteSurrogates(), new ReplaceRegexMatch()
// new ReplaceTextFieldAttributesWithTheKeywordSubfield()
var substitutions = new Batch<>(
"Substitutions",
Limiter.ONCE,
new SubstituteSurrogates(),
new ReplaceRegexMatch(),
new ReplaceAliasingEvalWithProject()
// new ReplaceTextFieldAttributesWithTheKeywordSubfield()
);

var operators = new Batch<>(
Expand Down Expand Up @@ -850,4 +855,86 @@ protected Expression regexToEquals(RegexMatch<?> regexMatch, Literal literal) {
return new Equals(regexMatch.source(), regexMatch.field(), literal);
}
}

/**
* Replace aliasing evals (eval x=a) with a projection which can be further combined / simplified.
* The rule gets applied only if there's another project (Project/Stats) above it.
*
* Needs to take into account shadowing of potentially intermediate fields:
* eval x = a + 1, y = x, z = y + 1, y = z, w = y + 1
* The output should be
* eval x = a + 1, z = a + 1 + 1, w = a + 1 + 1
* project x, z, z as y, w
*/
static class ReplaceAliasingEvalWithProject extends Rule<LogicalPlan, LogicalPlan> {

@Override
public LogicalPlan apply(LogicalPlan logicalPlan) {
Holder<Boolean> enabled = new Holder<>(false);

return logicalPlan.transformDown(p -> {
// found projection, turn enable flag on
if (p instanceof Aggregate || p instanceof Project) {
enabled.set(true);
}
if (enabled.get() && p instanceof Eval eval) {
p = rule(eval);
}

return p;
});
}

private LogicalPlan rule(Eval eval) {
LogicalPlan plan = eval;

// holds simple aliases such as b = a, c = b, d = c
AttributeMap<Expression> basicAliases = new AttributeMap<>();
// same as above but keeps the original expression
AttributeMap<NamedExpression> basicAliasSources = new AttributeMap<>();

List<Alias> keptFields = new ArrayList<>();

var fields = eval.fields();
for (int i = 0, size = fields.size(); i < size; i++) {
Alias field = fields.get(i);
Expression child = field.child();
var attribute = field.toAttribute();
// put the aliases in a separate map to separate the underlying resolve from other aliases
if (child instanceof Attribute) {
basicAliases.put(attribute, child);
basicAliasSources.put(attribute, field);
} else {
// be lazy and start replacing name aliases only if needed
if (basicAliases.size() > 0) {
// update the child through the field
field = (Alias) field.transformUp(e -> basicAliases.resolve(e, e));
}
keptFields.add(field);
}
}

// at least one alias encountered, move it into a project
if (basicAliases.size() > 0) {
// preserve the eval output (takes care of shadowing and order) but replace the basic aliases
List<NamedExpression> projections = new ArrayList<>(eval.output());
// replace the removed aliases with their initial definition - however use the output to preserve the shadowing
for (int i = projections.size() - 1; i >= 0; i--) {
NamedExpression project = projections.get(i);
projections.set(i, basicAliasSources.getOrDefault(project, project));
}

LogicalPlan child = eval.child();
if (keptFields.size() > 0) {
// replace the eval with just the kept fields
child = new Eval(eval.source(), eval.child(), keptFields);
}
// put the projection in place
plan = new Project(eval.source(), child, projections);
}

return plan;
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -208,27 +208,31 @@ public void testCountFieldWithFilter() {
}

/**
* Expects - for now
* Expects
* LimitExec[500[INTEGER]]
* \_AggregateExec[[],[COUNT(hidden_s{r}#8) AS c],FINAL,null]
* \_AggregateExec[[],[COUNT(salary{f}#20) AS c],FINAL,null]
* \_ExchangeExec[[count{r}#25, seen{r}#26],true]
* \_AggregateExec[[],[COUNT(hidden_s{r}#8) AS c],PARTIAL,8]
* \_EvalExec[[salary{f}#20 AS s, s{r}#3 AS hidden_s]]
* \_FieldExtractExec[salary{f}#20]
* \_EsQueryExec[test], query[{"esql_single_value":{"field":"emp_no","next":{"range":{"emp_no":{"lt":10050,"boost":1.0}}}}}]
* [_doc{f}#42], limit[], sort[] estimatedRowSize[16]
* \_EsStatsQueryExec[test], stats[Stat[name=salary, type=COUNT, query={
* "exists" : {
* "field" : "salary",
* "boost" : 1.0
* }
*/
// TODO: the eval is not yet optimized away
public void testCountFieldWithEval() {
var plan = plan("""
from test | eval s = salary | rename s as sr | eval hidden_s = sr | rename emp_no as e | where e < 10050
| stats c = count(hidden_s)
""", IS_SV_STATS);

var limit = as(plan, LimitExec.class);
var agg = as(limit.child(), AggregateExec.class);
var exg = as(agg.child(), ExchangeExec.class);
agg = as(exg.child(), AggregateExec.class);
var eval = as(agg.child(), EvalExec.class);
var esStatsQuery = as(exg.child(), EsStatsQueryExec.class);

assertThat(esStatsQuery.limit(), is(nullValue()));
assertThat(Expressions.names(esStatsQuery.output()), contains("count", "seen"));
var stat = as(esStatsQuery.stats().get(0), Stat.class);
assertThat(stat.query(), is(QueryBuilders.existsQuery("salary")));
}

// optimized doesn't know yet how to push down count over field
Expand Down
Loading

0 comments on commit c7ccdfc

Please sign in to comment.