From 06b0cee8d1cd4f157595be9b9a038d55a3c4fa3c Mon Sep 17 00:00:00 2001 From: Liwei Lin Date: Tue, 10 Jan 2017 19:35:46 +0800 Subject: [PATCH] [SPARK-16845][SQL] `GeneratedClass$SpecificOrdering` grows beyond 64 KB ## What changes were proposed in this pull request? Prior to this patch, we'll generate `compare(...)` for `GeneratedClass$SpecificOrdering` like below, leading to Janino exceptions saying the code grows beyond 64 KB. ``` scala /* 005 */ class SpecificOrdering extends o.a.s.sql.catalyst.expressions.codegen.BaseOrdering { /* ..... */ ... /* 10969 */ private int compare(InternalRow a, InternalRow b) { /* 10970 */ InternalRow i = null; // Holds current row being evaluated. /* 10971 */ /* 1.... */ code for comparing field0 /* 1.... */ code for comparing field1 /* 1.... */ ... /* 1.... */ code for comparing field449 /* 15012 */ /* 15013 */ return 0; /* 15014 */ } /* 15015 */ } ``` This patch would break `compare(...)` into smaller `compare_xxx(...)` methods when necessary; then we'll get generated `compare(...)` like: ``` scala /* 001 */ public SpecificOrdering generate(Object[] references) { /* 002 */ return new SpecificOrdering(references); /* 003 */ } /* 004 */ /* 005 */ class SpecificOrdering extends o.a.s.sql.catalyst.expressions.codegen.BaseOrdering { /* 006 */ /* 007 */ ... /* 1.... */ /* 11290 */ private int compare_0(InternalRow a, InternalRow b) { /* 11291 */ InternalRow i = null; // Holds current row being evaluated. /* 11292 */ /* 11293 */ i = a; /* 11294 */ boolean isNullA; /* 11295 */ UTF8String primitiveA; /* 11296 */ { /* 11297 */ /* 11298 */ Object obj = ((Expression) references[0]).eval(null); /* 11299 */ UTF8String value = (UTF8String) obj; /* 11300 */ isNullA = false; /* 11301 */ primitiveA = value; /* 11302 */ } /* 11303 */ i = b; /* 11304 */ boolean isNullB; /* 11305 */ UTF8String primitiveB; /* 11306 */ { /* 11307 */ /* 11308 */ Object obj = ((Expression) references[0]).eval(null); /* 11309 */ UTF8String value = (UTF8String) obj; /* 11310 */ isNullB = false; /* 11311 */ primitiveB = value; /* 11312 */ } /* 11313 */ if (isNullA && isNullB) { /* 11314 */ // Nothing /* 11315 */ } else if (isNullA) { /* 11316 */ return -1; /* 11317 */ } else if (isNullB) { /* 11318 */ return 1; /* 11319 */ } else { /* 11320 */ int comp = primitiveA.compare(primitiveB); /* 11321 */ if (comp != 0) { /* 11322 */ return comp; /* 11323 */ } /* 11324 */ } /* 11325 */ /* 11326 */ /* 11327 */ i = a; /* 11328 */ boolean isNullA1; /* 11329 */ UTF8String primitiveA1; /* 11330 */ { /* 11331 */ /* 11332 */ Object obj1 = ((Expression) references[1]).eval(null); /* 11333 */ UTF8String value1 = (UTF8String) obj1; /* 11334 */ isNullA1 = false; /* 11335 */ primitiveA1 = value1; /* 11336 */ } /* 11337 */ i = b; /* 11338 */ boolean isNullB1; /* 11339 */ UTF8String primitiveB1; /* 11340 */ { /* 11341 */ /* 11342 */ Object obj1 = ((Expression) references[1]).eval(null); /* 11343 */ UTF8String value1 = (UTF8String) obj1; /* 11344 */ isNullB1 = false; /* 11345 */ primitiveB1 = value1; /* 11346 */ } /* 11347 */ if (isNullA1 && isNullB1) { /* 11348 */ // Nothing /* 11349 */ } else if (isNullA1) { /* 11350 */ return -1; /* 11351 */ } else if (isNullB1) { /* 11352 */ return 1; /* 11353 */ } else { /* 11354 */ int comp = primitiveA1.compare(primitiveB1); /* 11355 */ if (comp != 0) { /* 11356 */ return comp; /* 11357 */ } /* 11358 */ } /* 1.... */ /* 1.... */ ... /* 1.... */ /* 12652 */ return 0; /* 12653 */ } /* 1.... */ /* 1.... */ ... /* 15387 */ /* 15388 */ public int compare(InternalRow a, InternalRow b) { /* 15389 */ /* 15390 */ int comp_0 = compare_0(a, b); /* 15391 */ if (comp_0 != 0) { /* 15392 */ return comp_0; /* 15393 */ } /* 15394 */ /* 15395 */ int comp_1 = compare_1(a, b); /* 15396 */ if (comp_1 != 0) { /* 15397 */ return comp_1; /* 15398 */ } /* 1.... */ /* 1.... */ ... /* 1.... */ /* 15450 */ return 0; /* 15451 */ } /* 15452 */ } ``` ## How was this patch tested? - a new added test case which - would fail prior to this patch - would pass with this patch - ordering correctness should already be covered by existing tests like those in `OrderingSuite` ## Acknowledgement A major part of this PR - the refactoring work of `splitExpression()` - has been done by ueshin. Author: Liwei Lin Author: Takuya UESHIN Author: Takuya Ueshin Closes #15480 from lw-lin/spec-ordering-64k-. --- .../expressions/codegen/CodeGenerator.scala | 27 +++++++++++++++---- .../codegen/GenerateOrdering.scala | 27 +++++++++++++++++-- .../catalyst/expressions/OrderingSuite.scala | 10 +++++++ 3 files changed, 57 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index d7746ca7a052e..f8f868b59b967 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -640,8 +640,24 @@ class CodegenContext { splitExpressions(expressions, "apply", ("InternalRow", row) :: Nil) } - private def splitExpressions( - expressions: Seq[String], funcName: String, arguments: Seq[(String, String)]): String = { + /** + * Splits the generated code of expressions into multiple functions, because function has + * 64kb code size limit in JVM + * + * @param expressions the codes to evaluate expressions. + * @param funcName the split function name base. + * @param arguments the list of (type, name) of the arguments of the split function. + * @param returnType the return type of the split function. + * @param makeSplitFunction makes split function body, e.g. add preparation or cleanup. + * @param foldFunctions folds the split function calls. + */ + def splitExpressions( + expressions: Seq[String], + funcName: String, + arguments: Seq[(String, String)], + returnType: String = "void", + makeSplitFunction: String => String = identity, + foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = { val blocks = new ArrayBuffer[String]() val blockBuilder = new StringBuilder() for (code <- expressions) { @@ -662,18 +678,19 @@ class CodegenContext { blocks.head } else { val func = freshName(funcName) + val argString = arguments.map { case (t, name) => s"$t $name" }.mkString(", ") val functions = blocks.zipWithIndex.map { case (body, i) => val name = s"${func}_$i" val code = s""" - |private void $name(${arguments.map { case (t, name) => s"$t $name" }.mkString(", ")}) { - | $body + |private $returnType $name($argString) { + | ${makeSplitFunction(body)} |} """.stripMargin addNewFunction(name, code) name } - functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")});").mkString("\n") + foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})")) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 1cef95654a17b..b7335f12b64b1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -117,8 +117,31 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR } } """ - }.mkString("\n") - comparisons + } + + ctx.splitExpressions( + expressions = comparisons, + funcName = "compare", + arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")), + returnType = "int", + makeSplitFunction = { body => + s""" + InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. + $body + return 0; + """ + }, + foldFunctions = { funCalls => + funCalls.zipWithIndex.map { case (funCall, i) => + val comp = ctx.freshName("comp") + s""" + int $comp = $funCall; + if ($comp != 0) { + return $comp; + } + """ + }.mkString + }) } protected def create(ordering: Seq[SortOrder]): BaseOrdering = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala index 8cc2ab46c0c85..190fab5d249bb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala @@ -127,4 +127,14 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper { } } } + + test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") { + val sortOrder = Literal("abc").asc + + // this is passing prior to SPARK-16845, and it should also be passing after SPARK-16845 + GenerateOrdering.generate(Array.fill(40)(sortOrder)) + + // verify that we can support up to 5000 ordering comparisons, which should be sufficient + GenerateOrdering.generate(Array.fill(5000)(sortOrder)) + } }