apache · lw-lin · Oct 13, 2016 · Oct 18, 2016 · Nov 5, 2016 · Dec 1, 2016
diff --git a/...t/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/...t/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -72,7 +72,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
    * Generates the code for ordering based on the given order.
    */
   def genComparisons(ctx: CodegenContext, ordering: Seq[SortOrder]): String = {
-    val comparisons = ordering.map { order =>
+    def comparisons(orderingGroup: Seq[SortOrder]) = orderingGroup.map { order =>
       val eval = order.child.genCode(ctx)
       val asc = order.isAscending
       val isNullA = ctx.freshName("isNullA")
@@ -118,7 +118,42 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
           }
       """
     }.mkString("\n")
-    comparisons
+
+    /*
+     * 40 = 7000 bytes / 170 (around 170 bytes per ordering comparison).
+     * The maximum byte code size to be compiled for HotSpot is 8000 bytes.
+     * We should keep less than 8000 bytes.
+     */
+    val numberOfComparisonsThreshold = 40
+
+    if (ordering.size <= numberOfComparisonsThreshold) {
+      comparisons(ordering)
+    } else {
+      val groupedOrderingItr = ordering.grouped(numberOfComparisonsThreshold)
+      val funcNamePrefix = ctx.freshName("compare")
+      val funcNames = groupedOrderingItr.zipWithIndex.map { case (orderingGroup, i) =>
+        val funcName = s"${funcNamePrefix}_$i"
+        val funcCode =
+          s"""
+             |private int $funcName(InternalRow a, InternalRow b) {
+             |  InternalRow ${ctx.INPUT_ROW} = null;  // Holds current row being evaluated.
+             |  ${comparisons(orderingGroup)}
+             |  return 0;
+             |}
+          """.stripMargin
+        ctx.addNewFunction(funcName, funcCode)
+        funcName
+      }
+
+      funcNames.zipWithIndex.map { case (funcName, i) =>
+        s"""
+           |int comp_$i = ${funcName}(a, b);
+           |if (comp_$i != 0) {
+           |  return comp_$i;
+           |}
+        """.stripMargin
+      }.mkString
+    }
   }
 
   protected def create(ordering: Seq[SortOrder]): BaseOrdering = {

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
@@ -127,4 +127,17 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper {
       }
     }
   }
+
+  test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") {
+    val sortOrder = Literal("abc").asc
+
+    // this is passing prior to SPARK-16845, and it should also be passing after SPARK-16845
+    GenerateOrdering.generate(Array.fill(40)(sortOrder))
+
+    // this is FAILING prior to SPARK-16845, but it should be passing after SPARK-16845
+    GenerateOrdering.generate(Array.fill(450)(sortOrder))
+
+    // verify that we can support up to 10000 ordering comparisons, which should be sufficient
+    GenerateOrdering.generate(Array.fill(10000)(sortOrder))
+  }
 }