Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
mgaido91 committed May 24, 2018
1 parent 0c484f1 commit a0af525
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ package org.apache.spark.sql.catalyst.expressions
* by `hashCode`.
* - [[EqualTo]] and [[EqualNullSafe]] are reordered by `hashCode`.
* - Other comparisons ([[GreaterThan]], [[LessThan]]) are reversed by `hashCode`.
* - Elements in [[In]] are reordered by `hashCode`.
*/
object Canonicalize {
def execute(e: Expression): Expression = {
Expand Down Expand Up @@ -86,8 +87,7 @@ object Canonicalize {
case Not(LessThanOrEqual(l, r)) => GreaterThan(l, r)

// order the list in the In operator
case In(value, list) =>
In(value, list.sortBy(_.semanticHash()))
case In(value, list) => In(value, list.sortBy(_.hashCode()))

case _ => e
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.plans.logical.Range

class CanonicalizeSuite extends SparkFunSuite {

test("SPARK-24276: IN expression with different order are semantically equal") {
val range = Range(1, 1, 1, 1)
val idAttr = range.output.head

val in1 = In(idAttr, Seq(Literal(1), Literal(2)))
val in2 = In(idAttr, Seq(Literal(2), Literal(1)))
val in3 = In(idAttr, Seq(Literal(1), Literal(2), Literal(3)))

assert(in1.canonicalized.semanticHash() == in2.canonicalized.semanticHash())
assert(in1.canonicalized.semanticHash() != in3.canonicalized.semanticHash())

assert(range.where(in1).sameResult(range.where(in2)))
assert(!range.where(in1).sameResult(range.where(in3)))

val arrays1 = In(idAttr, Seq(CreateArray(Seq(Literal(1), Literal(2))),
CreateArray(Seq(Literal(2), Literal(1)))))
val arrays2 = In(idAttr, Seq(CreateArray(Seq(Literal(2), Literal(1))),
CreateArray(Seq(Literal(1), Literal(2)))))
val arrays3 = In(idAttr, Seq(CreateArray(Seq(Literal(1), Literal(2))),
CreateArray(Seq(Literal(3), Literal(1)))))

assert(arrays1.canonicalized.semanticHash() == arrays2.canonicalized.semanticHash())
assert(arrays1.canonicalized.semanticHash() != arrays3.canonicalized.semanticHash())

assert(range.where(arrays1).sameResult(range.where(arrays2)))
assert(!range.where(arrays1).sameResult(range.where(arrays3)))
}
}
28 changes: 0 additions & 28 deletions sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2266,34 +2266,6 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
checkAnswer(df, df.collect())
}

test("SPARK-24276: IN returns sameResult if the order of literals is different") {
val df = spark.range(1)
val p1 = df.where($"id".isin(1, 2))
val p2 = df.where($"id".isin(2, 1))
val p3 = df.where($"id".isin(1, 2, 3))

assert(p1.queryExecution.executedPlan.sameResult(p2.queryExecution.executedPlan))
assert(!p1.queryExecution.executedPlan.sameResult(p3.queryExecution.executedPlan))

val h1 = p1.queryExecution.logical.canonicalized.semanticHash()
val h2 = p2.queryExecution.logical.canonicalized.semanticHash()
val h3 = p3.queryExecution.logical.canonicalized.semanticHash()
assert(h1 == h2)
assert(h1 != h3)

Seq(Array(1, 2)).toDF("id").createOrReplaceTempView("t")
val arrays1 = sql("select * from t where id in (array(1, 2), array(2, 1))")
val arrays2 = sql("select * from t where id in (array(2, 1), array(1, 2))")
val arrays3 = sql("select * from t where id in (array(1, 2), array(3, 1))")
assert(arrays1.queryExecution.executedPlan.sameResult(arrays2.queryExecution.executedPlan))
assert(!arrays1.queryExecution.executedPlan.sameResult(arrays3.queryExecution.executedPlan))
val arraysHash1 = arrays1.queryExecution.logical.canonicalized.semanticHash()
val arraysHash2 = arrays2.queryExecution.logical.canonicalized.semanticHash()
val arraysHash3 = arrays3.queryExecution.logical.canonicalized.semanticHash()
assert(arraysHash1 == arraysHash2)
assert(arraysHash1 != arraysHash3)
}

test("SPARK-24313: access map with binary keys") {
val mapWithBinaryKey = map(lit(Array[Byte](1.toByte)), lit(1))
checkAnswer(spark.range(1).select(mapWithBinaryKey.getItem(Array[Byte](1.toByte))), Row(1))
Expand Down

0 comments on commit a0af525

Please sign in to comment.