Skip to content

Commit

Permalink
[SPARK-23095][SQL] Decorrelation of scalar subquery fails with java.u…
Browse files Browse the repository at this point in the history
…til.NoSuchElementException

## What changes were proposed in this pull request?
The following SQL involving scalar correlated query returns a map exception.
``` SQL
SELECT t1a
FROM   t1
WHERE  t1a = (SELECT   count(*)
              FROM     t2
              WHERE    t2c = t1c
              HAVING   count(*) >= 1)
```
``` SQL
key not found: ExprId(278,786682bb-41f9-4bd5-a397-928272cc8e4e)
java.util.NoSuchElementException: key not found: ExprId(278,786682bb-41f9-4bd5-a397-928272cc8e4e)
        at scala.collection.MapLike$class.default(MapLike.scala:228)
        at scala.collection.AbstractMap.default(Map.scala:59)
        at scala.collection.MapLike$class.apply(MapLike.scala:141)
        at scala.collection.AbstractMap.apply(Map.scala:59)
        at org.apache.spark.sql.catalyst.optimizer.RewriteCorrelatedScalarSubquery$.org$apache$spark$sql$catalyst$optimizer$RewriteCorrelatedScalarSubquery$$evalSubqueryOnZeroTups(subquery.scala:378)
        at org.apache.spark.sql.catalyst.optimizer.RewriteCorrelatedScalarSubquery$$anonfun$org$apache$spark$sql$catalyst$optimizer$RewriteCorrelatedScalarSubquery$$constructLeftJoins$1.apply(subquery.scala:430)
        at org.apache.spark.sql.catalyst.optimizer.RewriteCorrelatedScalarSubquery$$anonfun$org$apache$spark$sql$catalyst$optimizer$RewriteCorrelatedScalarSubquery$$constructLeftJoins$1.apply(subquery.scala:426)
```

In this case, after evaluating the HAVING clause "count(*) > 1" statically
against the binding of aggregtation result on empty input, we determine
that this query will not have a the count bug. We should simply return
the evalSubqueryOnZeroTups with empty value.
(Please fill in changes proposed in this fix)

## How was this patch tested?
A new test was added in the Subquery bucket.

Author: Dilip Biswal <[email protected]>

Closes #20283 from dilipbiswal/scalar-count-defect.

(cherry picked from commit 0c2ba42)
Signed-off-by: gatorsmile <[email protected]>
  • Loading branch information
dilipbiswal authored and gatorsmile committed Jan 17, 2018
1 parent 7022ef8 commit d09eecc
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -335,13 +335,14 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
case ne => (ne.exprId, evalAggOnZeroTups(ne))
}.toMap

case _ => sys.error(s"Unexpected operator in scalar subquery: $lp")
case _ =>
sys.error(s"Unexpected operator in scalar subquery: $lp")
}

val resultMap = evalPlan(plan)

// By convention, the scalar subquery result is the leftmost field.
resultMap(plan.output.head.exprId)
resultMap.getOrElse(plan.output.head.exprId, None)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,16 @@ WHERE t1a = (SELECT max(t2a)
HAVING count(*) >= 0)
OR t1i > '2014-12-31';

-- TC 02.03.01
SELECT t1a
FROM t1
WHERE t1a = (SELECT max(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c
HAVING count(*) >= 1)
OR t1i > '2014-12-31';

-- TC 02.04
-- t1 on the right of an outer join
-- can be reduced to inner join
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 26
-- Number of queries: 29


-- !query 0
Expand Down Expand Up @@ -293,20 +293,35 @@ val1d


-- !query 19
SELECT t1a
FROM t1
WHERE t1a = (SELECT max(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c
HAVING count(*) >= 1)
OR t1i > '2014-12-31'
-- !query 19 schema
struct<t1a:string>
-- !query 19 output
val1c
val1d

-- !query 22
SELECT count(t1a)
FROM t1 RIGHT JOIN t2
ON t1d = t2d
WHERE t1a < (SELECT max(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c)
-- !query 19 schema
-- !query 22 schema
struct<count(t1a):bigint>
-- !query 19 output
-- !query 22 output
7


-- !query 20
-- !query 23
SELECT t1a
FROM t1
WHERE t1b <= (SELECT max(t2b)
Expand All @@ -317,14 +332,14 @@ AND t1b >= (SELECT min(t2b)
FROM t2
WHERE t2c = t1c
GROUP BY t2c)
-- !query 20 schema
-- !query 23 schema
struct<t1a:string>
-- !query 20 output
-- !query 23 output
val1b
val1c


-- !query 21
-- !query 24
SELECT t1a
FROM t1
WHERE t1a <= (SELECT max(t2a)
Expand All @@ -338,14 +353,14 @@ WHERE t1a >= (SELECT min(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c)
-- !query 21 schema
-- !query 24 schema
struct<t1a:string>
-- !query 21 output
-- !query 24 output
val1b
val1c


-- !query 22
-- !query 25
SELECT t1a
FROM t1
WHERE t1a <= (SELECT max(t2a)
Expand All @@ -359,9 +374,9 @@ WHERE t1a >= (SELECT min(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c)
-- !query 22 schema
-- !query 25 schema
struct<t1a:string>
-- !query 22 output
-- !query 25 output
val1a
val1a
val1b
Expand All @@ -372,7 +387,7 @@ val1d
val1d


-- !query 23
-- !query 26
SELECT t1a
FROM t1
WHERE t1a <= (SELECT max(t2a)
Expand All @@ -386,16 +401,16 @@ WHERE t1a >= (SELECT min(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c)
-- !query 23 schema
-- !query 26 schema
struct<t1a:string>
-- !query 23 output
-- !query 26 output
val1a
val1b
val1c
val1d


-- !query 24
-- !query 27
SELECT t1a
FROM t1
WHERE t1a <= (SELECT max(t2a)
Expand All @@ -409,22 +424,22 @@ WHERE t1a >= (SELECT min(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c)
-- !query 24 schema
-- !query 27 schema
struct<t1a:string>
-- !query 24 output
-- !query 27 output
val1a


-- !query 25
-- !query 28
SELECT t1a
FROM t1
GROUP BY t1a, t1c
HAVING max(t1b) <= (SELECT max(t2b)
FROM t2
WHERE t2c = t1c
GROUP BY t2c)
-- !query 25 schema
-- !query 28 schema
struct<t1a:string>
-- !query 25 output
-- !query 28 output
val1b
val1c

0 comments on commit d09eecc

Please sign in to comment.