From ecc6720ed96c750333173fc53a81e850abcd9013 Mon Sep 17 00:00:00 2001 From: Liwei Lin Date: Thu, 13 Oct 2016 16:16:29 +0800 Subject: [PATCH 1/6] [SPARK-16845][SQL] `GeneratedClass$SpecificOrdering` grows beyond 64 KB --- .../expressions/codegen/CodeGenerator.scala | 1 - .../codegen/GenerateOrdering.scala | 43 +++++++++++++++++-- .../catalyst/expressions/OrderingSuite.scala | 13 ++++++ 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 6cab50ae1bf8d..f4d3434c75430 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -537,7 +537,6 @@ class CodegenContext { val funcCode: String = s""" public int $compareFunc(InternalRow a, InternalRow b) { - InternalRow i = null; $comparisons return 0; } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 1cef95654a17b..f15b576a841b7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -72,7 +72,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR * Generates the code for ordering based on the given order. */ def genComparisons(ctx: CodegenContext, ordering: Seq[SortOrder]): String = { - val comparisons = ordering.map { order => + def comparisons(orderingGroup: Seq[SortOrder]) = orderingGroup.map { order => val eval = order.child.genCode(ctx) val asc = order.isAscending val isNullA = ctx.freshName("isNullA") @@ -118,7 +118,45 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR } """ }.mkString("\n") - comparisons + + /* + * 40 = 7000 bytes / 170 (around 170 bytes per ordering comparison). + * The maximum byte code size to be compiled for HotSpot is 8000 bytes. + * We should keep less than 8000 bytes. + */ + val numberOfComparisonsThreshold = 40 + + if (ordering.size <= numberOfComparisonsThreshold) { + s""" + | InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. + | ${comparisons(ordering)} + """.stripMargin + } else { + val groupedOrderingItr = ordering.grouped(numberOfComparisonsThreshold) + var groupedOrderingLength = 0 + groupedOrderingItr.zipWithIndex.foreach { case (orderingGroup, i) => + groupedOrderingLength += 1 + val funcName = s"compare_$i" + val funcCode = + s""" + |private int $funcName(InternalRow a, InternalRow b) { + | InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. + | ${comparisons(orderingGroup)} + | return 0; + |} + """.stripMargin + ctx.addNewFunction(funcName, funcCode) + } + + (0 to groupedOrderingLength - 1).map { i => + s""" + |int comp_$i = compare_$i(a, b); + |if (comp_$i != 0) { + | return comp_$i; + |} + """.stripMargin + }.mkString + } } protected def create(ordering: Seq[SortOrder]): BaseOrdering = { @@ -142,7 +180,6 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR ${ctx.declareAddedFunctions()} public int compare(InternalRow a, InternalRow b) { - InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. $comparisons return 0; } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala index 8cc2ab46c0c85..072d3cb0fd031 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala @@ -127,4 +127,17 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper { } } } + + test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") { + val sortOrder = Literal("abc").asc + + // this is passing prior to SPARK-16845, and it should also be passing after SPARK-16845 + GenerateOrdering.generate(Array.fill(40)(sortOrder)) + + // this is FAILING prior to SPARK-16845, but it should be passing after SPARK-16845 + GenerateOrdering.generate(Array.fill(450)(sortOrder)) + + // verify that we can support up to 10000 ordering comparisons, which should be sufficient + GenerateOrdering.generate(Array.fill(10000)(sortOrder)) + } } From 1ae9935b963b298459c09ab54b3f39f532230fef Mon Sep 17 00:00:00 2001 From: Liwei Lin Date: Tue, 18 Oct 2016 15:17:38 +0800 Subject: [PATCH 2/6] Address comments --- .../expressions/codegen/GenerateOrdering.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index f15b576a841b7..51a0770c6c1fd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -133,10 +133,9 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR """.stripMargin } else { val groupedOrderingItr = ordering.grouped(numberOfComparisonsThreshold) - var groupedOrderingLength = 0 - groupedOrderingItr.zipWithIndex.foreach { case (orderingGroup, i) => - groupedOrderingLength += 1 - val funcName = s"compare_$i" + val funcNamePrefix = ctx.freshName("compare") + val funcNames = groupedOrderingItr.zipWithIndex.map { case (orderingGroup, i) => + val funcName = s"${funcNamePrefix}_$i" val funcCode = s""" |private int $funcName(InternalRow a, InternalRow b) { @@ -146,11 +145,12 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR |} """.stripMargin ctx.addNewFunction(funcName, funcCode) + funcName } - (0 to groupedOrderingLength - 1).map { i => + funcNames.zipWithIndex.map { case (funcName, i) => s""" - |int comp_$i = compare_$i(a, b); + |int comp_$i = ${funcName}(a, b); |if (comp_$i != 0) { | return comp_$i; |} From 33b5fd86b2933ca359cb943ddf21f9bc714d38bf Mon Sep 17 00:00:00 2001 From: Liwei Lin Date: Sat, 5 Nov 2016 21:46:15 +0800 Subject: [PATCH 3/6] Address more comments --- .../sql/catalyst/expressions/codegen/CodeGenerator.scala | 1 + .../sql/catalyst/expressions/codegen/GenerateOrdering.scala | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index f4d3434c75430..6cab50ae1bf8d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -537,6 +537,7 @@ class CodegenContext { val funcCode: String = s""" public int $compareFunc(InternalRow a, InternalRow b) { + InternalRow i = null; $comparisons return 0; } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 51a0770c6c1fd..af8b75b14c377 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -127,10 +127,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR val numberOfComparisonsThreshold = 40 if (ordering.size <= numberOfComparisonsThreshold) { - s""" - | InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. - | ${comparisons(ordering)} - """.stripMargin + comparisons(ordering) } else { val groupedOrderingItr = ordering.grouped(numberOfComparisonsThreshold) val funcNamePrefix = ctx.freshName("compare") @@ -180,6 +177,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR ${ctx.declareAddedFunctions()} public int compare(InternalRow a, InternalRow b) { + InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. $comparisons return 0; } From 0aedc47fa94366a39e66da82b70c79dccac7d375 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Fri, 2 Dec 2016 00:16:37 +0900 Subject: [PATCH 4/6] Refactor `ctx.splitExpressions()`. --- .../expressions/codegen/CodeGenerator.scala | 16 +++-- .../codegen/GenerateOrdering.scala | 60 ++++++++----------- .../catalyst/expressions/OrderingSuite.scala | 4 +- 3 files changed, 37 insertions(+), 43 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 6cab50ae1bf8d..eb9c2578a6d2f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -610,8 +610,13 @@ class CodegenContext { splitExpressions(expressions, "apply", ("InternalRow", row) :: Nil) } - private def splitExpressions( - expressions: Seq[String], funcName: String, arguments: Seq[(String, String)]): String = { + def splitExpressions( + expressions: Seq[String], + funcName: String, + arguments: Seq[(String, String)], + returnType: String = "void", + makeSplitFunction: String => String = identity, + foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = { val blocks = new ArrayBuffer[String]() val blockBuilder = new StringBuilder() for (code <- expressions) { @@ -632,18 +637,19 @@ class CodegenContext { blocks.head } else { val func = freshName(funcName) + val argString = arguments.map { case (t, name) => s"$t $name" }.mkString(", ") val functions = blocks.zipWithIndex.map { case (body, i) => val name = s"${func}_$i" val code = s""" - |private void $name(${arguments.map { case (t, name) => s"$t $name" }.mkString(", ")}) { - | $body + |private $returnType $name($argString) { + | ${makeSplitFunction(body)} |} """.stripMargin addNewFunction(name, code) name } - functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")});").mkString("\n") + foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})")) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index af8b75b14c377..99844d69a39fb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -72,7 +72,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR * Generates the code for ordering based on the given order. */ def genComparisons(ctx: CodegenContext, ordering: Seq[SortOrder]): String = { - def comparisons(orderingGroup: Seq[SortOrder]) = orderingGroup.map { order => + val comparisons = ordering.map { order => val eval = order.child.genCode(ctx) val asc = order.isAscending val isNullA = ctx.freshName("isNullA") @@ -117,43 +117,31 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR } } """ - }.mkString("\n") - - /* - * 40 = 7000 bytes / 170 (around 170 bytes per ordering comparison). - * The maximum byte code size to be compiled for HotSpot is 8000 bytes. - * We should keep less than 8000 bytes. - */ - val numberOfComparisonsThreshold = 40 - - if (ordering.size <= numberOfComparisonsThreshold) { - comparisons(ordering) - } else { - val groupedOrderingItr = ordering.grouped(numberOfComparisonsThreshold) - val funcNamePrefix = ctx.freshName("compare") - val funcNames = groupedOrderingItr.zipWithIndex.map { case (orderingGroup, i) => - val funcName = s"${funcNamePrefix}_$i" - val funcCode = - s""" - |private int $funcName(InternalRow a, InternalRow b) { - | InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. - | ${comparisons(orderingGroup)} - | return 0; - |} - """.stripMargin - ctx.addNewFunction(funcName, funcCode) - funcName - } + } - funcNames.zipWithIndex.map { case (funcName, i) => + ctx.splitExpressions( + expressions = comparisons, + funcName = "compare", + arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")), + returnType = "int", + makeSplitFunction = { body => s""" - |int comp_$i = ${funcName}(a, b); - |if (comp_$i != 0) { - | return comp_$i; - |} - """.stripMargin - }.mkString - } + InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. + $body + return 0; + """ + }, + foldFunctions = { funCalls => + val comp = ctx.freshName("comp") + funCalls.zipWithIndex.map { case (funCall, i) => + s""" + int ${comp}_$i = $funCall; + if (${comp}_$i != 0) { + return ${comp}_$i; + } + """ + }.mkString + }) } protected def create(ordering: Seq[SortOrder]): BaseOrdering = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala index 072d3cb0fd031..c0bdd37d2a760 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala @@ -137,7 +137,7 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper { // this is FAILING prior to SPARK-16845, but it should be passing after SPARK-16845 GenerateOrdering.generate(Array.fill(450)(sortOrder)) - // verify that we can support up to 10000 ordering comparisons, which should be sufficient - GenerateOrdering.generate(Array.fill(10000)(sortOrder)) + // verify that we can support up to 5000 ordering comparisons, which should be sufficient + GenerateOrdering.generate(Array.fill(5000)(sortOrder)) } } From 3d31cb3e6309950bb723a1b83a004caab493c5e7 Mon Sep 17 00:00:00 2001 From: Takuya Ueshin Date: Tue, 13 Dec 2016 07:50:58 +0000 Subject: [PATCH 5/6] Add a comment for `splitExpressions`. --- .../catalyst/expressions/codegen/CodeGenerator.scala | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index eb9c2578a6d2f..d6659ddbe89f3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -610,6 +610,17 @@ class CodegenContext { splitExpressions(expressions, "apply", ("InternalRow", row) :: Nil) } + /** + * Splits the generated code of expressions into multiple functions, because function has + * 64kb code size limit in JVM + * + * @param expressions the codes to evaluate expressions. + * @param funcName the split function name base. + * @param arguments the list of (type, name) of the arguments of the split function. + * @param returnType the return type of the split function. + * @param makeSplitFunction makes split function body, e.g. add preparation or cleanup. + * @param foldFunctions folds the split function calls. + */ def splitExpressions( expressions: Seq[String], funcName: String, From 4aef473b70ef20d7a93fd974a3d844e49ca6ef9e Mon Sep 17 00:00:00 2001 From: Liwei Lin Date: Tue, 10 Jan 2017 15:37:25 +0800 Subject: [PATCH 6/6] @cloud-fan's comments --- .../catalyst/expressions/codegen/GenerateOrdering.scala | 8 ++++---- .../spark/sql/catalyst/expressions/OrderingSuite.scala | 3 --- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 99844d69a39fb..b7335f12b64b1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -132,12 +132,12 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR """ }, foldFunctions = { funCalls => - val comp = ctx.freshName("comp") funCalls.zipWithIndex.map { case (funCall, i) => + val comp = ctx.freshName("comp") s""" - int ${comp}_$i = $funCall; - if (${comp}_$i != 0) { - return ${comp}_$i; + int $comp = $funCall; + if ($comp != 0) { + return $comp; } """ }.mkString diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala index c0bdd37d2a760..190fab5d249bb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala @@ -134,9 +134,6 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper { // this is passing prior to SPARK-16845, and it should also be passing after SPARK-16845 GenerateOrdering.generate(Array.fill(40)(sortOrder)) - // this is FAILING prior to SPARK-16845, but it should be passing after SPARK-16845 - GenerateOrdering.generate(Array.fill(450)(sortOrder)) - // verify that we can support up to 5000 ordering comparisons, which should be sufficient GenerateOrdering.generate(Array.fill(5000)(sortOrder)) }