-
Notifications
You must be signed in to change notification settings - Fork 28.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-28169][SQL] Convert scan predicate condition to CNF
### What changes were proposed in this pull request? Spark can't push down scan predicate condition of **Or**: Such as if I have a table `default.test`, it's partition col is `dt`, If we use query : ``` select * from default.test where dt=20190625 or (dt = 20190626 and id in (1,2,3) ) ``` In this case, Spark will resolve **Or** condition as one expression, and since this expr has reference of "id", then it can't been push down. Base on pr #28733, In my PR , for SQL like `select * from default.test` `where dt = 20190626 or (dt = 20190627 and xxx="a") ` For this condition `dt = 20190626 or (dt = 20190627 and xxx="a" )`, it will been converted to CNF ``` (dt = 20190626 or dt = 20190627) and (dt = 20190626 or xxx = "a" ) ``` then condition `dt = 20190626 or dt = 20190627` will be push down when partition pruning ### Why are the changes needed? Optimize partition pruning ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? Added UT Closes #28805 from AngersZhuuuu/cnf-for-partition-pruning. Lead-authored-by: angerszhu <[email protected]> Co-authored-by: AngersZhuuuu <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
- Loading branch information
1 parent
a4ba344
commit 15fb5d7
Showing
9 changed files
with
152 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
76 changes: 76 additions & 0 deletions
76
sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.hive.execution | ||
|
||
import org.apache.spark.sql.QueryTest | ||
import org.apache.spark.sql.execution.SparkPlan | ||
import org.apache.spark.sql.hive.test.TestHiveSingleton | ||
import org.apache.spark.sql.test.SQLTestUtils | ||
|
||
abstract class PrunePartitionSuiteBase extends QueryTest with SQLTestUtils with TestHiveSingleton { | ||
|
||
protected def format: String | ||
|
||
test("SPARK-28169: Convert scan predicate condition to CNF") { | ||
withTempView("temp") { | ||
withTable("t") { | ||
sql( | ||
s""" | ||
|CREATE TABLE t(i INT, p STRING) | ||
|USING $format | ||
|PARTITIONED BY (p)""".stripMargin) | ||
|
||
spark.range(0, 1000, 1).selectExpr("id as col") | ||
.createOrReplaceTempView("temp") | ||
|
||
for (part <- Seq(1, 2, 3, 4)) { | ||
sql( | ||
s""" | ||
|INSERT OVERWRITE TABLE t PARTITION (p='$part') | ||
|SELECT col FROM temp""".stripMargin) | ||
} | ||
|
||
assertPrunedPartitions( | ||
"SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1)", 2) | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
AngersZhuuuu
Author
Contributor
|
||
assertPrunedPartitions( | ||
"SELECT * FROM t WHERE (p = '1' AND i = 2) OR (i = 1 OR p = '2')", 4) | ||
assertPrunedPartitions( | ||
"SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '3' AND i = 3 )", 2) | ||
assertPrunedPartitions( | ||
"SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '2' OR p = '3')", 3) | ||
assertPrunedPartitions( | ||
"SELECT * FROM t", 4) | ||
assertPrunedPartitions( | ||
"SELECT * FROM t WHERE p = '1' AND i = 2", 1) | ||
assertPrunedPartitions( | ||
""" | ||
|SELECT i, COUNT(1) FROM ( | ||
|SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1) | ||
|) tmp GROUP BY i | ||
""".stripMargin, 2) | ||
} | ||
} | ||
} | ||
|
||
protected def assertPrunedPartitions(query: String, expected: Long): Unit = { | ||
val plan = sql(query).queryExecution.sparkPlan | ||
assert(getScanExecPartitionSize(plan) == expected) | ||
} | ||
|
||
protected def getScanExecPartitionSize(plan: SparkPlan): Long | ||
} |
@AngersZhuuuu It seems the pushed partition filters are not in
PartitionFilters
: