Skip to content

Commit

Permalink
Extend join optimizations based on empty source.
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmadghazal64 authored and rschlussel committed Apr 23, 2021
1 parent 6c8b45c commit 69a5216
Show file tree
Hide file tree
Showing 3 changed files with 166 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,43 @@

import com.facebook.presto.matching.Captures;
import com.facebook.presto.matching.Pattern;
import com.facebook.presto.spi.plan.Assignments;
import com.facebook.presto.spi.plan.PlanNode;
import com.facebook.presto.spi.plan.ProjectNode;
import com.facebook.presto.spi.plan.ValuesNode;
import com.facebook.presto.spi.relation.VariableReferenceExpression;
import com.facebook.presto.sql.planner.iterative.Rule;
import com.facebook.presto.sql.planner.plan.JoinNode;

import java.util.Collection;
import java.util.Collections;

import static com.facebook.presto.SystemSessionProperties.isEmptyJoinOptimization;
import static com.facebook.presto.spi.plan.ProjectNode.Locality.LOCAL;
import static com.facebook.presto.sql.planner.optimizations.QueryCardinalityUtil.isAtMost;
import static com.facebook.presto.sql.planner.plan.Patterns.join;
import static com.facebook.presto.sql.relational.Expressions.constantNull;

public class EliminateEmptyJoins
implements Rule<JoinNode>
{
private static final Pattern<JoinNode> PATTERN = join();

// Build assignment list for the new Project node as: X=X if X is from non-empty child and X=null otherwise.
public static Assignments buildAssignments(Collection<VariableReferenceExpression> variables, PlanNode nonEmptyChild)
{
Assignments.Builder builder = Assignments.builder();
for (VariableReferenceExpression variable : variables) {
if (nonEmptyChild.getOutputVariables().contains(variable)) {
builder.put(variable, variable);
}
else {
builder.put(variable, constantNull(variable.getType()));
}
}
return builder.build();
}

@Override
public Pattern<JoinNode> getPattern()
{
Expand All @@ -48,11 +70,44 @@ public Result apply(JoinNode joinNode, Captures captures, Context context)
leftChildEmpty = isAtMost(context.getLookup().resolve(joinNode.getLeft()), context.getLookup(), 0);
rightChildEmpty = isAtMost(context.getLookup().resolve(joinNode.getRight()), context.getLookup(), 0);

if ((!leftChildEmpty && !rightChildEmpty) || joinNode.getType() != JoinNode.Type.INNER) {
return Result.empty();
/*
Prune joins with one or more empty sources in the following cases. The pruning is done by replacing the whole join by empty values node.
1. Both left and right children are empty. This works for all type of joins including outer joins.
2. One of the left and right are empty and join is inner.
3. Left child empty and left outer join.
4. Right child empty and right outer join.
*/
if ((leftChildEmpty && rightChildEmpty) ||
((leftChildEmpty || rightChildEmpty) && joinNode.getType() == JoinNode.Type.INNER)
|| (leftChildEmpty && joinNode.getType() == JoinNode.Type.LEFT)
|| (rightChildEmpty && joinNode.getType() == JoinNode.Type.RIGHT)) {
return Result.ofPlanNode(
new ValuesNode(joinNode.getId(), joinNode.getOutputVariables(), Collections.emptyList()));
}

return Result.ofPlanNode(
new ValuesNode(joinNode.getId(), joinNode.getOutputVariables(), Collections.emptyList()));
/*
This covers the cases where the whole join can not be pruned for outer join cases.
In this case, we optimize the join using a projection over the non-empty child.
The follwoing are 4 scenarios:
1. S1 left outer join S2 and S2 is empty. The join is rewritten as Projection over S1 with null values for fields of S2. For example,
"select t1.X, dt.Y from t1 left outer (select * from t2 where 1=0) is rewritten as select t1.X, null as Y from t1
2. S1 right outer join S2 and S1 is empty. Similiar to #1.
3. S1 full outer join S2 and S1 is empty. This is can be reduce to S2 left outer join S1 and S1 is empty. Same logic of #1 is used.
4. S1 full outer join S2 and S2 is empty. Similiar to #3 and full outer join is reduced to S1 left outer join S2. Same logic is #1.
*/
if (leftChildEmpty || rightChildEmpty) {
PlanNode nonEmptyChild;
if (leftChildEmpty) {
nonEmptyChild = joinNode.getRight();
}
else {
nonEmptyChild = joinNode.getLeft();
}
Assignments.Builder newProjections = Assignments.builder()
.putAll(buildAssignments(joinNode.getOutputVariables(), nonEmptyChild));

return Result.ofPlanNode(new ProjectNode(joinNode.getId(), nonEmptyChild, newProjections.build(), LOCAL));
}
return Result.empty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,57 @@ public void testEmptyJoins()
applyEmptyJoinOptimization, true,
output(values("orderkey_0")));

// Empty left child with left outer join
assertPlanWithSession(
"WITH DT AS (SELECT orderkey FROM (select custkey C from orders limit 0) left outer join orders on orderkey=C) SELECT * FROM DT LIMIT 2",
applyEmptyJoinOptimization, true,
output(limit(2, values("orderkey_0"))));

// 3 way join with empty non-null producing side for outer join
assertPlanWithSession(
"WITH DT AS (SELECT orderkey FROM (select custkey C from orders limit 0) left outer join orders on orderkey=C " +
" left outer join customer C2 on C2.custkey = C) " +
" SELECT * FROM DT LIMIT 2",
applyEmptyJoinOptimization, true,
output(limit(2, values("orderkey_0"))));

// Empty right child with right outer join
assertPlanWithSession(
"WITH DT AS (SELECT orderkey FROM orders right outer join (select custkey C from orders limit 0) on orderkey=C) SELECT * FROM DT LIMIT 2",
applyEmptyJoinOptimization, true,
output(limit(2, values("orderkey_0"))));

// Empty right child with no projections and left outer join
assertPlanWithSession(
"WITH DT AS (SELECT orderkey FROM orders left outer join (select custkey C from orders limit 0) on orderkey=C) SELECT * FROM DT",
applyEmptyJoinOptimization, true,
output(node(TableScanNode.class)));

// Empty left child with projections and right outer join
assertPlanWithSession(
"WITH DT AS (SELECT C, orderkey FROM (select custkey C from orders limit 0) right outer join orders on orderkey=C) SELECT * FROM DT",
applyEmptyJoinOptimization, true,
output(project(node(TableScanNode.class))));

// Empty right child with projections and left outer join
assertPlanWithSession(
"WITH DT AS (SELECT orderkey, C FROM orders left outer join (select custkey C from orders limit 0) on orderkey=C) SELECT * FROM DT",
applyEmptyJoinOptimization, true,
output(project(node(TableScanNode.class))));

// Empty right child with projections and full outer join
assertPlanWithSession(
"WITH DT AS (SELECT orderkey, C FROM orders full outer join (select custkey C from orders limit 0) on orderkey=C) SELECT * FROM DT",
applyEmptyJoinOptimization, true,
output(project(node(TableScanNode.class))));

// Both Left and Right child empty and full outer join.
assertPlanWithSession(
"SELECt orderkey,custkey FROM (SELECT orderkey FROM orders where 1=0) full outer join (select custkey from orders where 1=0) on orderkey=custkey",
applyEmptyJoinOptimization, true,
output(
values("orderkey_0", "custkey_0")));

// Negative tests. Both children are not empty
assertPlanWithSession(
"SELECT orderkey FROM (select custkey as C from orders where 1>0) join orders on orderkey=C",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import java.util.Set;
import java.util.stream.IntStream;

import static com.facebook.presto.SystemSessionProperties.OPTIMIZE_JOINS_WITH_EMPTY_SOURCES;
import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.BooleanType.BOOLEAN;
import static com.facebook.presto.common.type.DecimalType.createDecimalType;
Expand Down Expand Up @@ -167,22 +168,71 @@ public void testSelectLargeInterval()
@Test
public void testEmptyJoins()
{
// Empty predicate
assertQuery("select 1 from (select * from orders where 1 = 0) DT join customer on DT.custkey=customer.custkey",
Session sessionWithEmptyJoin = Session.builder(getSession())
.setSystemProperty(OPTIMIZE_JOINS_WITH_EMPTY_SOURCES, "true")
.build();
Session sessionWithoutEmptyJoin = Session.builder(getSession())
.setSystemProperty(OPTIMIZE_JOINS_WITH_EMPTY_SOURCES, "false")
.build();
emptyJoinQueries(sessionWithEmptyJoin);
emptyJoinQueries(sessionWithoutEmptyJoin);
}

private void emptyJoinQueries(Session session)
{
// Empty predicate and inner join
assertQuery(session, "select 1 from (select * from orders where 1 = 0) DT join customer on DT.custkey=customer.custkey",
"select 1 from orders where 1 =0");

// Empty non-null producing side for outer join.
assertQuery(session, "select 1 from (select * from orders where 1 = 0) DT left outer join customer on DT.custkey=customer.custkey",
"select 1 from orders where 1 =0");

// 3 way join with empty non-null producing side for outer join
assertQuery(session, "select 1 from (select * from orders where 1 = 0) DT"
+ " left outer join customer C1 on DT.custkey=C1.custkey"
+ " left outer join customer C2 on C1.custkey=C2.custkey",
"select 1 from orders where 1 =0");

// Zero limit
assertQuery("select 1 from (select * from orders LIMIT 0) DT join customer on DT.custkey=customer.custkey",
assertQuery(session, "select 1 from (select * from orders LIMIT 0) DT join customer on DT.custkey=customer.custkey",
"select 1 from orders where 1 =0");

// Negative test.
assertQuery("select 1 from (select * from orders) DT join customer on DT.custkey=customer.custkey",
assertQuery(session, "select 1 from (select * from orders) DT join customer on DT.custkey=customer.custkey",
"select 1 from orders");

// Empty null producing side for outer join.
assertQuery(session, "select 1 from (select * from orders) ORD left outer join (select custkey from customer where 1=0) " +
"CUST on ORD.custkey=CUST.custkey",
"select 1 from orders");

// Empty null producing side for outer join. Optimization TODO.
assertQuery("select 1 from (select * from orders) ORD left outer join (select custkey from customer where 1=0) " +
// Empty null producing side for left outer join with constant field.
assertQuery(session, "select One from (select * from orders) ORD left outer join (select 1 as One, custkey from customer where 1=0) " +
"CUST on ORD.custkey=CUST.custkey",
"select null as One from orders");

// Empty null producing side for right outer join with constant field.
assertQuery(session, "select One from (select 1 as One, custkey from customer where 1=0) CUST right outer join (select * from orders) ORD " +
" ON ORD.custkey=CUST.custkey",
"select null as One from orders");

// 3 way join with mix of left and right outer joins. DT left outer join C1 right outer join O2.
// DT is empty which produces DT right outer join O2 which produce O2 as final result.
assertQuery(session, "select 1 from (select * from orders where 1 = 0) DT"
+ " left outer join customer C1 on DT.custkey=C1.custkey"
+ " right outer join orders O2 on C1.custkey=O2.custkey",
"select 1 from orders");

// Empty side for full outer join.
assertQuery(session, "select 1 from (select * from orders) ORD full outer join (select custkey from customer where 1=0) " +
"CUST on ORD.custkey=CUST.custkey",
"select 1 from orders");

// Empty side for full outer join as input to aggregation.
assertQuery(session, "select count(*), orderkey from (select * from orders) ORD full outer join (select custkey from customer where 1=0) " +
"CUST on ORD.custkey=CUST.custkey group by orderkey order by orderkey",
"select count(*), orderkey from orders group by orderkey order by orderkey");
}

@Test
Expand Down

0 comments on commit 69a5216

Please sign in to comment.