From 4b5e1fe94c65f0feb77d849e7defa42b6007628d Mon Sep 17 00:00:00 2001 From: Zhongshuai Pei <799203320@qq.com> Date: Mon, 11 May 2015 19:22:44 -0700 Subject: [PATCH] [SPARK-7437] [SQL] Fold "literal in (item1, item2, ..., literal, ...)" into true or false directly SQL ``` select key from src where 3 in (4, 5); ``` Before ``` == Optimized Logical Plan == Project [key#12] Filter 3 INSET (5,4) MetastoreRelation default, src, None ``` After ``` == Optimized Logical Plan == LocalRelation [key#228], [] ``` Author: Zhongshuai Pei <799203320@qq.com> Author: DoingDone9 <799203320@qq.com> Closes #5972 from DoingDone9/InToFalse and squashes the following commits: 4c722a2 [Zhongshuai Pei] Update predicates.scala abe2bbb [Zhongshuai Pei] Update Optimizer.scala fa461a5 [Zhongshuai Pei] Update Optimizer.scala e34c28a [Zhongshuai Pei] Update predicates.scala 24739bd [Zhongshuai Pei] Update ConstantFoldingSuite.scala f4dbf50 [Zhongshuai Pei] Update ConstantFoldingSuite.scala 35ceb7a [Zhongshuai Pei] Update Optimizer.scala 36c194e [Zhongshuai Pei] Update Optimizer.scala 2e8f6ca [Zhongshuai Pei] Update Optimizer.scala 14952e2 [Zhongshuai Pei] Merge pull request #13 from apache/master f03fe7f [Zhongshuai Pei] Merge pull request #12 from apache/master f12fa50 [Zhongshuai Pei] Merge pull request #10 from apache/master f61210c [Zhongshuai Pei] Merge pull request #9 from apache/master 34b1a9a [Zhongshuai Pei] Merge pull request #8 from apache/master 802261c [DoingDone9] Merge pull request #7 from apache/master d00303b [DoingDone9] Merge pull request #6 from apache/master 98b134f [DoingDone9] Merge pull request #5 from apache/master 161cae3 [DoingDone9] Merge pull request #4 from apache/master c87e8b6 [DoingDone9] Merge pull request #3 from apache/master cb1852d [DoingDone9] Merge pull request #2 from apache/master c3f046f [DoingDone9] Merge pull request #1 from apache/master --- .../sql/catalyst/expressions/predicates.scala | 1 + .../sql/catalyst/optimizer/Optimizer.scala | 4 +-- .../optimizer/ConstantFoldingSuite.scala | 33 +++++++++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 50b0f3ee5f93f..1d72a9eb834b9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -110,6 +110,7 @@ case class InSet(value: Expression, hset: Set[Any]) override def children: Seq[Expression] = value :: Nil + override def foldable: Boolean = value.foldable override def nullable: Boolean = true // TODO: Figure out correct nullability semantics of IN. override def toString: String = s"$value INSET ${hset.mkString("(", ",", ")")}" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index d7b2f203a6934..1ee5fb245fbb2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -46,13 +46,13 @@ object DefaultOptimizer extends Optimizer { CombineLimits) :: Batch("ConstantFolding", FixedPoint(100), NullPropagation, + OptimizeIn, ConstantFolding, LikeSimplification, BooleanSimplification, SimplifyFilters, SimplifyCasts, - SimplifyCaseConversionExpressions, - OptimizeIn) :: + SimplifyCaseConversionExpressions) :: Batch("Decimal Optimizations", FixedPoint(100), DecimalAggregates) :: Batch("LocalRelation", FixedPoint(100), diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala index 6b7d9a85c341b..5697c2272b8e8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala @@ -35,6 +35,7 @@ class ConstantFoldingSuite extends PlanTest { Batch("AnalysisNodes", Once, EliminateSubQueries) :: Batch("ConstantFolding", Once, + OptimizeIn, ConstantFolding, BooleanSimplification) :: Nil } @@ -247,4 +248,36 @@ class ConstantFoldingSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + + test("Constant folding test: Fold In(v, list) into true or false") { + var originalQuery = + testRelation + .select('a) + .where(In(Literal(1), Seq(Literal(1), Literal(2)))) + + var optimized = Optimize.execute(originalQuery.analyze) + + var correctAnswer = + testRelation + .select('a) + .where(Literal(true)) + .analyze + + comparePlans(optimized, correctAnswer) + + originalQuery = + testRelation + .select('a) + .where(In(Literal(1), Seq(Literal(1), 'a.attr))) + + optimized = Optimize.execute(originalQuery.analyze) + + correctAnswer = + testRelation + .select('a) + .where(Literal(true)) + .analyze + + comparePlans(optimized, correctAnswer) + } }