Skip to content

Commit

Permalink
[SPARK-28284][SQL][PYTHON][TESTS] Convert and port 'join-empty-relati…
Browse files Browse the repository at this point in the history
…on.sql' into UDF test base

## What changes were proposed in this pull request?

This PR adds some tests converted from `join-empty-relation.sql` to test UDFs. Please see contribution guide of this umbrella ticket - [SPARK-27921](https://issues.apache.org/jira/browse/SPARK-27921).

<details><summary>Diff comparing to 'join-empty-relation.sql'</summary>
<p>

```diff
diff --git a/sql/core/src/test/resources/sql-tests/results/join-empty-relation.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-join-empty-relation.sql.out
index 857073a827..e79d01fb14 100644
--- a/sql/core/src/test/resources/sql-tests/results/join-empty-relation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-join-empty-relation.sql.out
 -27,111 +27,111  struct<>

 -- !query 3
-SELECT * FROM t1 INNER JOIN empty_table
+SELECT udf(t1.a), udf(empty_table.a) FROM t1 INNER JOIN empty_table ON (udf(t1.a) = udf(udf(empty_table.a)))
 -- !query 3 schema
-struct<a:int,a:int>
+struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
 -- !query 3 output

 -- !query 4
-SELECT * FROM t1 CROSS JOIN empty_table
+SELECT udf(t1.a), udf(udf(empty_table.a)) FROM t1 CROSS JOIN empty_table ON (udf(udf(t1.a)) = udf(empty_table.a))
 -- !query 4 schema
-struct<a:int,a:int>
+struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
 -- !query 4 output

 -- !query 5
-SELECT * FROM t1 LEFT OUTER JOIN empty_table
+SELECT udf(udf(t1.a)), empty_table.a FROM t1 LEFT OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a))
 -- !query 5 schema
-struct<a:int,a:int>
+struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int,a:int>
 -- !query 5 output
 1	NULL

 -- !query 6
-SELECT * FROM t1 RIGHT OUTER JOIN empty_table
+SELECT udf(t1.a), udf(empty_table.a) FROM t1 RIGHT OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a))
 -- !query 6 schema
-struct<a:int,a:int>
+struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
 -- !query 6 output

 -- !query 7
-SELECT * FROM t1 FULL OUTER JOIN empty_table
+SELECT udf(t1.a), empty_table.a FROM t1 FULL OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a))
 -- !query 7 schema
-struct<a:int,a:int>
+struct<CAST(udf(cast(a as string)) AS INT):int,a:int>
 -- !query 7 output
 1	NULL

 -- !query 8
-SELECT * FROM t1 LEFT SEMI JOIN empty_table
+SELECT udf(udf(t1.a)) FROM t1 LEFT SEMI JOIN empty_table ON (udf(t1.a) = udf(udf(empty_table.a)))
 -- !query 8 schema
-struct<a:int>
+struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
 -- !query 8 output

 -- !query 9
-SELECT * FROM t1 LEFT ANTI JOIN empty_table
+SELECT udf(t1.a) FROM t1 LEFT ANTI JOIN empty_table ON (udf(t1.a) = udf(empty_table.a))
 -- !query 9 schema
-struct<a:int>
+struct<CAST(udf(cast(a as string)) AS INT):int>
 -- !query 9 output
 1

 -- !query 10
-SELECT * FROM empty_table INNER JOIN t1
+SELECT udf(empty_table.a), udf(t1.a) FROM empty_table INNER JOIN t1 ON (udf(udf(empty_table.a)) = udf(t1.a))
 -- !query 10 schema
-struct<a:int,a:int>
+struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
 -- !query 10 output

 -- !query 11
-SELECT * FROM empty_table CROSS JOIN t1
+SELECT udf(empty_table.a), udf(udf(t1.a)) FROM empty_table CROSS JOIN t1 ON (udf(empty_table.a) = udf(udf(t1.a)))
 -- !query 11 schema
-struct<a:int,a:int>
+struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
 -- !query 11 output

 -- !query 12
-SELECT * FROM empty_table LEFT OUTER JOIN t1
+SELECT udf(udf(empty_table.a)), udf(t1.a) FROM empty_table LEFT OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a))
 -- !query 12 schema
-struct<a:int,a:int>
+struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
 -- !query 12 output

 -- !query 13
-SELECT * FROM empty_table RIGHT OUTER JOIN t1
+SELECT empty_table.a, udf(t1.a) FROM empty_table RIGHT OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a))
 -- !query 13 schema
-struct<a:int,a:int>
+struct<a:int,CAST(udf(cast(a as string)) AS INT):int>
 -- !query 13 output
 NULL	1

 -- !query 14
-SELECT * FROM empty_table FULL OUTER JOIN t1
+SELECT empty_table.a, udf(udf(t1.a)) FROM empty_table FULL OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a))
 -- !query 14 schema
-struct<a:int,a:int>
+struct<a:int,CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
 -- !query 14 output
 NULL	1

 -- !query 15
-SELECT * FROM empty_table LEFT SEMI JOIN t1
+SELECT udf(udf(empty_table.a)) FROM empty_table LEFT SEMI JOIN t1 ON (udf(empty_table.a) = udf(udf(t1.a)))
 -- !query 15 schema
-struct<a:int>
+struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
 -- !query 15 output

 -- !query 16
-SELECT * FROM empty_table LEFT ANTI JOIN t1
+SELECT empty_table.a FROM empty_table LEFT ANTI JOIN t1 ON (udf(empty_table.a) = udf(t1.a))
 -- !query 16 schema
 struct<a:int>
 -- !query 16 output
 -139,56 +139,56  struct<a:int>

 -- !query 17
-SELECT * FROM empty_table INNER JOIN empty_table
+SELECT udf(empty_table.a) FROM empty_table INNER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(udf(empty_table2.a)))
 -- !query 17 schema
-struct<a:int,a:int>
+struct<CAST(udf(cast(a as string)) AS INT):int>
 -- !query 17 output

 -- !query 18
-SELECT * FROM empty_table CROSS JOIN empty_table
+SELECT udf(udf(empty_table.a)) FROM empty_table CROSS JOIN empty_table AS empty_table2 ON (udf(udf(empty_table.a)) = udf(empty_table2.a))
 -- !query 18 schema
-struct<a:int,a:int>
+struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
 -- !query 18 output

 -- !query 19
-SELECT * FROM empty_table LEFT OUTER JOIN empty_table
+SELECT udf(empty_table.a) FROM empty_table LEFT OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a))
 -- !query 19 schema
-struct<a:int,a:int>
+struct<CAST(udf(cast(a as string)) AS INT):int>
 -- !query 19 output

 -- !query 20
-SELECT * FROM empty_table RIGHT OUTER JOIN empty_table
+SELECT udf(udf(empty_table.a)) FROM empty_table RIGHT OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(udf(empty_table2.a)))
 -- !query 20 schema
-struct<a:int,a:int>
+struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
 -- !query 20 output

 -- !query 21
-SELECT * FROM empty_table FULL OUTER JOIN empty_table
+SELECT udf(empty_table.a) FROM empty_table FULL OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a))
 -- !query 21 schema
-struct<a:int,a:int>
+struct<CAST(udf(cast(a as string)) AS INT):int>
 -- !query 21 output

 -- !query 22
-SELECT * FROM empty_table LEFT SEMI JOIN empty_table
+SELECT udf(udf(empty_table.a)) FROM empty_table LEFT SEMI JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a))
 -- !query 22 schema
-struct<a:int>
+struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
 -- !query 22 output

 -- !query 23
-SELECT * FROM empty_table LEFT ANTI JOIN empty_table
+SELECT udf(empty_table.a) FROM empty_table LEFT ANTI JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a))
 -- !query 23 schema
-struct<a:int>
+struct<CAST(udf(cast(a as string)) AS INT):int>
 -- !query 23 output

```
</p>
</details>

## How was this patch tested?

Tested as guided in [SPARK-27921](https://issues.apache.org/jira/browse/SPARK-27921).

Closes apache#25127 from imback82/join-empty-relation-sql.

Authored-by: Terry Kim <[email protected]>
Signed-off-by: HyukjinKwon <[email protected]>
  • Loading branch information
imback82 authored and HyukjinKwon committed Jul 19, 2019
1 parent 52ddf03 commit 453cbf3
Show file tree
Hide file tree
Showing 2 changed files with 229 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
-- List of configuration the test suite is run against:
--SET spark.sql.autoBroadcastJoinThreshold=10485760
--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false

-- This test file was converted from join-empty-relation.sql.

CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a);
CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a);

CREATE TEMPORARY VIEW empty_table as SELECT a FROM t2 WHERE false;

SELECT udf(t1.a), udf(empty_table.a) FROM t1 INNER JOIN empty_table ON (udf(t1.a) = udf(udf(empty_table.a)));
SELECT udf(t1.a), udf(udf(empty_table.a)) FROM t1 CROSS JOIN empty_table ON (udf(udf(t1.a)) = udf(empty_table.a));
SELECT udf(udf(t1.a)), empty_table.a FROM t1 LEFT OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a));
SELECT udf(t1.a), udf(empty_table.a) FROM t1 RIGHT OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a));
SELECT udf(t1.a), empty_table.a FROM t1 FULL OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a));
SELECT udf(udf(t1.a)) FROM t1 LEFT SEMI JOIN empty_table ON (udf(t1.a) = udf(udf(empty_table.a)));
SELECT udf(t1.a) FROM t1 LEFT ANTI JOIN empty_table ON (udf(t1.a) = udf(empty_table.a));

SELECT udf(empty_table.a), udf(t1.a) FROM empty_table INNER JOIN t1 ON (udf(udf(empty_table.a)) = udf(t1.a));
SELECT udf(empty_table.a), udf(udf(t1.a)) FROM empty_table CROSS JOIN t1 ON (udf(empty_table.a) = udf(udf(t1.a)));
SELECT udf(udf(empty_table.a)), udf(t1.a) FROM empty_table LEFT OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a));
SELECT empty_table.a, udf(t1.a) FROM empty_table RIGHT OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a));
SELECT empty_table.a, udf(udf(t1.a)) FROM empty_table FULL OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a));
SELECT udf(udf(empty_table.a)) FROM empty_table LEFT SEMI JOIN t1 ON (udf(empty_table.a) = udf(udf(t1.a)));
SELECT empty_table.a FROM empty_table LEFT ANTI JOIN t1 ON (udf(empty_table.a) = udf(t1.a));

SELECT udf(empty_table.a) FROM empty_table INNER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(udf(empty_table2.a)));
SELECT udf(udf(empty_table.a)) FROM empty_table CROSS JOIN empty_table AS empty_table2 ON (udf(udf(empty_table.a)) = udf(empty_table2.a));
SELECT udf(empty_table.a) FROM empty_table LEFT OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a));
SELECT udf(udf(empty_table.a)) FROM empty_table RIGHT OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(udf(empty_table2.a)));
SELECT udf(empty_table.a) FROM empty_table FULL OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a));
SELECT udf(udf(empty_table.a)) FROM empty_table LEFT SEMI JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a));
SELECT udf(empty_table.a) FROM empty_table LEFT ANTI JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a));
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 24


-- !query 0
CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
-- !query 0 schema
struct<>
-- !query 0 output



-- !query 1
CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a)
-- !query 1 schema
struct<>
-- !query 1 output



-- !query 2
CREATE TEMPORARY VIEW empty_table as SELECT a FROM t2 WHERE false
-- !query 2 schema
struct<>
-- !query 2 output



-- !query 3
SELECT udf(t1.a), udf(empty_table.a) FROM t1 INNER JOIN empty_table ON (udf(t1.a) = udf(udf(empty_table.a)))
-- !query 3 schema
struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
-- !query 3 output



-- !query 4
SELECT udf(t1.a), udf(udf(empty_table.a)) FROM t1 CROSS JOIN empty_table ON (udf(udf(t1.a)) = udf(empty_table.a))
-- !query 4 schema
struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
-- !query 4 output



-- !query 5
SELECT udf(udf(t1.a)), empty_table.a FROM t1 LEFT OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a))
-- !query 5 schema
struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int,a:int>
-- !query 5 output
1 NULL


-- !query 6
SELECT udf(t1.a), udf(empty_table.a) FROM t1 RIGHT OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a))
-- !query 6 schema
struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
-- !query 6 output



-- !query 7
SELECT udf(t1.a), empty_table.a FROM t1 FULL OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a))
-- !query 7 schema
struct<CAST(udf(cast(a as string)) AS INT):int,a:int>
-- !query 7 output
1 NULL


-- !query 8
SELECT udf(udf(t1.a)) FROM t1 LEFT SEMI JOIN empty_table ON (udf(t1.a) = udf(udf(empty_table.a)))
-- !query 8 schema
struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
-- !query 8 output



-- !query 9
SELECT udf(t1.a) FROM t1 LEFT ANTI JOIN empty_table ON (udf(t1.a) = udf(empty_table.a))
-- !query 9 schema
struct<CAST(udf(cast(a as string)) AS INT):int>
-- !query 9 output
1


-- !query 10
SELECT udf(empty_table.a), udf(t1.a) FROM empty_table INNER JOIN t1 ON (udf(udf(empty_table.a)) = udf(t1.a))
-- !query 10 schema
struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
-- !query 10 output



-- !query 11
SELECT udf(empty_table.a), udf(udf(t1.a)) FROM empty_table CROSS JOIN t1 ON (udf(empty_table.a) = udf(udf(t1.a)))
-- !query 11 schema
struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
-- !query 11 output



-- !query 12
SELECT udf(udf(empty_table.a)), udf(t1.a) FROM empty_table LEFT OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a))
-- !query 12 schema
struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
-- !query 12 output



-- !query 13
SELECT empty_table.a, udf(t1.a) FROM empty_table RIGHT OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a))
-- !query 13 schema
struct<a:int,CAST(udf(cast(a as string)) AS INT):int>
-- !query 13 output
NULL 1


-- !query 14
SELECT empty_table.a, udf(udf(t1.a)) FROM empty_table FULL OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a))
-- !query 14 schema
struct<a:int,CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
-- !query 14 output
NULL 1


-- !query 15
SELECT udf(udf(empty_table.a)) FROM empty_table LEFT SEMI JOIN t1 ON (udf(empty_table.a) = udf(udf(t1.a)))
-- !query 15 schema
struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
-- !query 15 output



-- !query 16
SELECT empty_table.a FROM empty_table LEFT ANTI JOIN t1 ON (udf(empty_table.a) = udf(t1.a))
-- !query 16 schema
struct<a:int>
-- !query 16 output



-- !query 17
SELECT udf(empty_table.a) FROM empty_table INNER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(udf(empty_table2.a)))
-- !query 17 schema
struct<CAST(udf(cast(a as string)) AS INT):int>
-- !query 17 output



-- !query 18
SELECT udf(udf(empty_table.a)) FROM empty_table CROSS JOIN empty_table AS empty_table2 ON (udf(udf(empty_table.a)) = udf(empty_table2.a))
-- !query 18 schema
struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
-- !query 18 output



-- !query 19
SELECT udf(empty_table.a) FROM empty_table LEFT OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a))
-- !query 19 schema
struct<CAST(udf(cast(a as string)) AS INT):int>
-- !query 19 output



-- !query 20
SELECT udf(udf(empty_table.a)) FROM empty_table RIGHT OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(udf(empty_table2.a)))
-- !query 20 schema
struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
-- !query 20 output



-- !query 21
SELECT udf(empty_table.a) FROM empty_table FULL OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a))
-- !query 21 schema
struct<CAST(udf(cast(a as string)) AS INT):int>
-- !query 21 output



-- !query 22
SELECT udf(udf(empty_table.a)) FROM empty_table LEFT SEMI JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a))
-- !query 22 schema
struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
-- !query 22 output



-- !query 23
SELECT udf(empty_table.a) FROM empty_table LEFT ANTI JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a))
-- !query 23 schema
struct<CAST(udf(cast(a as string)) AS INT):int>
-- !query 23 output

0 comments on commit 453cbf3

Please sign in to comment.