Skip to content

Commit

Permalink
[SPARK-20175][SQL] Exists should not be evaluated in Join operator
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

Similar to `ListQuery`, `Exists` should not be evaluated in `Join` operator too.

## How was this patch tested?

Jenkins tests.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <[email protected]>

Closes #17491 from viirya/dont-push-exists-to-join.
  • Loading branch information
viirya authored and cloud-fan committed Apr 11, 2017
1 parent c870698 commit cd91f96
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,12 @@ trait PredicateHelper {
protected def canEvaluateWithinJoin(expr: Expression): Boolean = expr match {
// Non-deterministic expressions are not allowed as join conditions.
case e if !e.deterministic => false
case l: ListQuery =>
case _: ListQuery | _: Exists =>
// A ListQuery defines the query which we want to search in an IN subquery expression.
// Currently the only way to evaluate an IN subquery is to convert it to a
// LeftSemi/LeftAnti/ExistenceJoin by `RewritePredicateSubquery` rule.
// It cannot be evaluated as part of a Join operator.
// An Exists shouldn't be push into a Join operator too.
false
case e: SubqueryExpression =>
// non-correlated subquery will be replaced as literal
Expand Down
10 changes: 10 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -844,4 +844,14 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
Row(0) :: Row(1) :: Nil)
}
}

test("ListQuery and Exists should work even no correlated references") {
checkAnswer(
sql("select * from l, r where l.a = r.c AND (r.d in (select d from r) OR l.a >= 1)"),
Row(2, 1.0, 2, 3.0) :: Row(2, 1.0, 2, 3.0) :: Row(2, 1.0, 2, 3.0) ::
Row(2, 1.0, 2, 3.0) :: Row(3.0, 3.0, 3, 2.0) :: Row(6, null, 6, null) :: Nil)
checkAnswer(
sql("select * from l, r where l.a = r.c + 1 AND (exists (select * from r) OR l.a = r.c)"),
Row(3, 3.0, 2, 3.0) :: Row(3, 3.0, 2, 3.0) :: Nil)
}
}

0 comments on commit cd91f96

Please sign in to comment.