Skip to content

Commit

Permalink
[SPARK-16845][SQL] GeneratedClass$SpecificOrdering grows beyond 64 KB
Browse files Browse the repository at this point in the history
Prior to this patch, we'll generate `compare(...)` for `GeneratedClass$SpecificOrdering` like below, leading to Janino exceptions saying the code grows beyond 64 KB.

``` scala
/* 005 */ class SpecificOrdering extends o.a.s.sql.catalyst.expressions.codegen.BaseOrdering {
/* ..... */   ...
/* 10969 */   private int compare(InternalRow a, InternalRow b) {
/* 10970 */     InternalRow i = null;  // Holds current row being evaluated.
/* 10971 */
/* 1.... */     code for comparing field0
/* 1.... */     code for comparing field1
/* 1.... */     ...
/* 1.... */     code for comparing field449
/* 15012 */
/* 15013 */     return 0;
/* 15014 */   }
/* 15015 */ }
```

This patch would break `compare(...)` into smaller `compare_xxx(...)` methods when necessary; then we'll get generated `compare(...)` like:

``` scala
/* 001 */ public SpecificOrdering generate(Object[] references) {
/* 002 */   return new SpecificOrdering(references);
/* 003 */ }
/* 004 */
/* 005 */ class SpecificOrdering extends o.a.s.sql.catalyst.expressions.codegen.BaseOrdering {
/* 006 */
/* 007 */     ...
/* 1.... */
/* 11290 */   private int compare_0(InternalRow a, InternalRow b) {
/* 11291 */     InternalRow i = null;  // Holds current row being evaluated.
/* 11292 */
/* 11293 */     i = a;
/* 11294 */     boolean isNullA;
/* 11295 */     UTF8String primitiveA;
/* 11296 */     {
/* 11297 */
/* 11298 */       Object obj = ((Expression) references[0]).eval(null);
/* 11299 */       UTF8String value = (UTF8String) obj;
/* 11300 */       isNullA = false;
/* 11301 */       primitiveA = value;
/* 11302 */     }
/* 11303 */     i = b;
/* 11304 */     boolean isNullB;
/* 11305 */     UTF8String primitiveB;
/* 11306 */     {
/* 11307 */
/* 11308 */       Object obj = ((Expression) references[0]).eval(null);
/* 11309 */       UTF8String value = (UTF8String) obj;
/* 11310 */       isNullB = false;
/* 11311 */       primitiveB = value;
/* 11312 */     }
/* 11313 */     if (isNullA && isNullB) {
/* 11314 */       // Nothing
/* 11315 */     } else if (isNullA) {
/* 11316 */       return -1;
/* 11317 */     } else if (isNullB) {
/* 11318 */       return 1;
/* 11319 */     } else {
/* 11320 */       int comp = primitiveA.compare(primitiveB);
/* 11321 */       if (comp != 0) {
/* 11322 */         return comp;
/* 11323 */       }
/* 11324 */     }
/* 11325 */
/* 11326 */
/* 11327 */     i = a;
/* 11328 */     boolean isNullA1;
/* 11329 */     UTF8String primitiveA1;
/* 11330 */     {
/* 11331 */
/* 11332 */       Object obj1 = ((Expression) references[1]).eval(null);
/* 11333 */       UTF8String value1 = (UTF8String) obj1;
/* 11334 */       isNullA1 = false;
/* 11335 */       primitiveA1 = value1;
/* 11336 */     }
/* 11337 */     i = b;
/* 11338 */     boolean isNullB1;
/* 11339 */     UTF8String primitiveB1;
/* 11340 */     {
/* 11341 */
/* 11342 */       Object obj1 = ((Expression) references[1]).eval(null);
/* 11343 */       UTF8String value1 = (UTF8String) obj1;
/* 11344 */       isNullB1 = false;
/* 11345 */       primitiveB1 = value1;
/* 11346 */     }
/* 11347 */     if (isNullA1 && isNullB1) {
/* 11348 */       // Nothing
/* 11349 */     } else if (isNullA1) {
/* 11350 */       return -1;
/* 11351 */     } else if (isNullB1) {
/* 11352 */       return 1;
/* 11353 */     } else {
/* 11354 */       int comp = primitiveA1.compare(primitiveB1);
/* 11355 */       if (comp != 0) {
/* 11356 */         return comp;
/* 11357 */       }
/* 11358 */     }
/* 1.... */
/* 1.... */   ...
/* 1.... */
/* 12652 */     return 0;
/* 12653 */   }
/* 1.... */
/* 1.... */   ...
/* 15387 */
/* 15388 */   public int compare(InternalRow a, InternalRow b) {
/* 15389 */
/* 15390 */     int comp_0 = compare_0(a, b);
/* 15391 */     if (comp_0 != 0) {
/* 15392 */       return comp_0;
/* 15393 */     }
/* 15394 */
/* 15395 */     int comp_1 = compare_1(a, b);
/* 15396 */     if (comp_1 != 0) {
/* 15397 */       return comp_1;
/* 15398 */     }
/* 1.... */
/* 1.... */     ...
/* 1.... */
/* 15450 */     return 0;
/* 15451 */   }
/* 15452 */ }
```
- a new added test case which
  - would fail prior to this patch
  - would pass with this patch
- ordering correctness should already be covered by existing tests like those in `OrderingSuite`

A major part of this PR - the refactoring work of `splitExpression()` - has been done by ueshin.

Author: Liwei Lin <[email protected]>
Author: Takuya UESHIN <[email protected]>
Author: Takuya Ueshin <[email protected]>

Closes apache#15480 from lw-lin/spec-ordering-64k-.
  • Loading branch information
lw-lin authored and ueshin committed Mar 3, 2017
1 parent 7380188 commit 399d950
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,27 @@ class CodegenContext {
// Cannot split these expressions because they are not created from a row object.
return expressions.mkString("\n")
}
splitExpressions(expressions, "apply", ("InternalRow", row) :: Nil)
}

/**
* Splits the generated code of expressions into multiple functions, because function has
* 64kb code size limit in JVM
*
* @param expressions the codes to evaluate expressions.
* @param funcName the split function name base.
* @param arguments the list of (type, name) of the arguments of the split function.
* @param returnType the return type of the split function.
* @param makeSplitFunction makes split function body, e.g. add preparation or cleanup.
* @param foldFunctions folds the split function calls.
*/
def splitExpressions(
expressions: Seq[String],
funcName: String,
arguments: Seq[(String, String)],
returnType: String = "void",
makeSplitFunction: String => String = identity,
foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = {
val blocks = new ArrayBuffer[String]()
val blockBuilder = new StringBuilder()
for (code <- expressions) {
Expand All @@ -639,19 +660,20 @@ class CodegenContext {
// inline execution if only one block
blocks.head
} else {
val apply = freshName("apply")
val func = freshName(funcName)
val argString = arguments.map { case (t, name) => s"$t $name" }.mkString(", ")
val functions = blocks.zipWithIndex.map { case (body, i) =>
val name = s"${apply}_$i"
val name = s"${func}_$i"
val code = s"""
|private void $name(InternalRow $row) {
| $body
|private $returnType $name($argString) {
| ${makeSplitFunction(body)}
|}
""".stripMargin
addNewFunction(name, code)
name
}

functions.map(name => s"$name($row);").mkString("\n")
foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})"))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,31 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
}
}
"""
}.mkString("\n")
comparisons
}

ctx.splitExpressions(
expressions = comparisons,
funcName = "compare",
arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")),
returnType = "int",
makeSplitFunction = { body =>
s"""
InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated.
$body
return 0;
"""
},
foldFunctions = { funCalls =>
funCalls.zipWithIndex.map { case (funCall, i) =>
val comp = ctx.freshName("comp")
s"""
int $comp = $funCall;
if ($comp != 0) {
return $comp;
}
"""
}.mkString
})
}

protected def create(ordering: Seq[SortOrder]): BaseOrdering = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,14 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper {
}
}
}

test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") {
val sortOrder = Literal("abc").asc

// this is passing prior to SPARK-16845, and it should also be passing after SPARK-16845
GenerateOrdering.generate(Array.fill(40)(sortOrder))

// verify that we can support up to 5000 ordering comparisons, which should be sufficient
GenerateOrdering.generate(Array.fill(5000)(sortOrder))
}
}

0 comments on commit 399d950

Please sign in to comment.