-
Notifications
You must be signed in to change notification settings - Fork 28.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-18863][SQL] Output non-aggregate expressions without GROUP BY …
…in a subquery does not yield an error ## What changes were proposed in this pull request? This PR will report proper error messages when a subquery expression contain an invalid plan. This problem is fixed by calling CheckAnalysis for the plan inside a subquery. ## How was this patch tested? Existing tests and two new test cases on 2 forms of subquery, namely, scalar subquery and in/exists subquery. ```` -- TC 01.01 -- The column t2b in the SELECT of the subquery is invalid -- because it is neither an aggregate function nor a GROUP BY column. select t1a, t2b from t1, t2 where t1b = t2c and t2b = (select max(avg) from (select t2b, avg(t2b) avg from t2 where t2a = t1.t1b ) ) ; -- TC 01.02 -- Invalid due to the column t2b not part of the output from table t2. select * from t1 where t1a in (select min(t2a) from t2 group by t2c having t2c in (select max(t3c) from t3 group by t3b having t3b > t2b )) ; ```` Author: Nattavut Sutyanyong <[email protected]> Closes #16572 from nsyca/18863.
- Loading branch information
1 parent
0e821ec
commit f1ddca5
Showing
4 changed files
with
168 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
42 changes: 42 additions & 0 deletions
42
sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
-- The test file contains negative test cases | ||
-- of invalid queries where error messages are expected. | ||
|
||
create temporary view t1 as select * from values | ||
(1, 2, 3) | ||
as t1(t1a, t1b, t1c); | ||
|
||
create temporary view t2 as select * from values | ||
(1, 0, 1) | ||
as t2(t2a, t2b, t2c); | ||
|
||
create temporary view t3 as select * from values | ||
(3, 1, 2) | ||
as t3(t3a, t3b, t3c); | ||
|
||
-- TC 01.01 | ||
-- The column t2b in the SELECT of the subquery is invalid | ||
-- because it is neither an aggregate function nor a GROUP BY column. | ||
select t1a, t2b | ||
from t1, t2 | ||
where t1b = t2c | ||
and t2b = (select max(avg) | ||
from (select t2b, avg(t2b) avg | ||
from t2 | ||
where t2a = t1.t1b | ||
) | ||
) | ||
; | ||
|
||
-- TC 01.02 | ||
-- Invalid due to the column t2b not part of the output from table t2. | ||
select * | ||
from t1 | ||
where t1a in (select min(t2a) | ||
from t2 | ||
group by t2c | ||
having t2c in (select max(t3c) | ||
from t3 | ||
group by t3b | ||
having t3b > t2b )) | ||
; | ||
|
66 changes: 66 additions & 0 deletions
66
.../src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
-- Automatically generated by SQLQueryTestSuite | ||
-- Number of queries: 5 | ||
|
||
|
||
-- !query 0 | ||
create temporary view t1 as select * from values | ||
(1, 2, 3) | ||
as t1(t1a, t1b, t1c) | ||
-- !query 0 schema | ||
struct<> | ||
-- !query 0 output | ||
|
||
|
||
|
||
-- !query 1 | ||
create temporary view t2 as select * from values | ||
(1, 0, 1) | ||
as t2(t2a, t2b, t2c) | ||
-- !query 1 schema | ||
struct<> | ||
-- !query 1 output | ||
|
||
|
||
|
||
-- !query 2 | ||
create temporary view t3 as select * from values | ||
(3, 1, 2) | ||
as t3(t3a, t3b, t3c) | ||
-- !query 2 schema | ||
struct<> | ||
-- !query 2 output | ||
|
||
|
||
|
||
-- !query 3 | ||
select t1a, t2b | ||
from t1, t2 | ||
where t1b = t2c | ||
and t2b = (select max(avg) | ||
from (select t2b, avg(t2b) avg | ||
from t2 | ||
where t2a = t1.t1b | ||
) | ||
) | ||
-- !query 3 schema | ||
struct<> | ||
-- !query 3 output | ||
org.apache.spark.sql.AnalysisException | ||
expression 't2.`t2b`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.; | ||
|
||
|
||
-- !query 4 | ||
select * | ||
from t1 | ||
where t1a in (select min(t2a) | ||
from t2 | ||
group by t2c | ||
having t2c in (select max(t3c) | ||
from t3 | ||
group by t3b | ||
having t3b > t2b )) | ||
-- !query 4 schema | ||
struct<> | ||
-- !query 4 output | ||
org.apache.spark.sql.AnalysisException | ||
resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter predicate-subquery#x [(t2c#x = max(t3c)#x) && (t3b#x > t2b#x)]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters