-
Notifications
You must be signed in to change notification settings - Fork 28.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-18467][SQL] Extracts method for preparing arguments from StaticInvoke, Invoke and NewInstance and modify to short circuit if arguments have null when needNullCheck == true
.
#15901
Changes from 21 commits
9bf9aa9
28f6200
2f30f53
9ac3f28
5ad6966
a0ac177
757d33e
240fde4
6f6e0b3
2bd6e50
bcb93db
d448b60
8894a96
ca4558f
4d9a037
ebb1241
99a59b2
243888a
e12a9bd
2c52d91
43d2693
bd9c09f
501095f
1baac55
831c521
0b210f8
f8acda6
fe2871c
c88a1ff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,79 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCo | |
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} | ||
import org.apache.spark.sql.types._ | ||
|
||
/** | ||
* Common base class for [[StaticInvoke]], [[Invoke]], and [[NewInstance]]. | ||
*/ | ||
trait InvokeLike extends Expression with NonSQLExpression { | ||
|
||
def arguments: Seq[Expression] | ||
|
||
def propagateNull: Boolean | ||
|
||
protected lazy val needNullCheck: Boolean = propagateNull && arguments.exists(_.nullable) | ||
|
||
/** | ||
* Prepares codes for arguments. | ||
* | ||
* - generate codes for argument. | ||
* - use ctx.splitExpressions() to not exceed 64kb JVM limit while preparing arguments. | ||
* - avoid some of nullabilty checking which are not needed because the expression is not | ||
* nullable. | ||
* - when needNullCheck == true, short circuit if we found one of arguments is null because | ||
* preparing rest of arguments can be skipped in the case. | ||
* | ||
* @param ctx a [[CodegenContext]] | ||
* @param ev an [[ExprCode]] with unique terms. | ||
* @return (code to prepare arguments, argument string, result of argument null check) | ||
*/ | ||
def prepareArguments(ctx: CodegenContext, ev: ExprCode): (String, String, String) = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, removed. |
||
|
||
val resultIsNull = if (needNullCheck) { | ||
val resultIsNull = ctx.freshName("resultIsNull") | ||
ctx.addMutableState("boolean", resultIsNull, "") | ||
resultIsNull | ||
} else { | ||
"false" | ||
} | ||
val argValues = arguments.zipWithIndex.map { case (e, i) => | ||
val argValue = ctx.freshName("argValue") | ||
ctx.addMutableState(ctx.javaType(e.dataType), argValue, "") | ||
argValue | ||
} | ||
|
||
val argCodes = if (needNullCheck) { | ||
val reset = s"$resultIsNull = false;" | ||
val argCodes = arguments.zipWithIndex.map { case (e, i) => | ||
val expr = e.genCode(ctx) | ||
val updateResultIsNull = if (e.nullable) { | ||
s"$resultIsNull = ${expr.isNull};" | ||
} else { | ||
"" | ||
} | ||
s""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how about:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, I'll use it. |
||
if (!$resultIsNull) { | ||
${expr.code} | ||
$updateResultIsNull | ||
${argValues(i)} = ${expr.value}; | ||
} | ||
""" | ||
} | ||
reset +: argCodes | ||
} else { | ||
arguments.zipWithIndex.map { case (e, i) => | ||
val expr = e.genCode(ctx) | ||
s""" | ||
${expr.code} | ||
${argValues(i)} = ${expr.value}; | ||
""" | ||
} | ||
} | ||
val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes) | ||
|
||
(argCode, argValues.mkString(", "), resultIsNull) | ||
} | ||
} | ||
|
||
/** | ||
* Invokes a static function, returning the result. By default, any of the arguments being null | ||
* will result in returning null instead of calling the function. | ||
|
@@ -50,7 +123,7 @@ case class StaticInvoke( | |
dataType: DataType, | ||
functionName: String, | ||
arguments: Seq[Expression] = Nil, | ||
propagateNull: Boolean = true) extends Expression with NonSQLExpression { | ||
propagateNull: Boolean = true) extends InvokeLike { | ||
|
||
val objectName = staticObject.getName.stripSuffix("$") | ||
|
||
|
@@ -62,16 +135,10 @@ case class StaticInvoke( | |
|
||
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { | ||
val javaType = ctx.javaType(dataType) | ||
val argGen = arguments.map(_.genCode(ctx)) | ||
val argString = argGen.map(_.value).mkString(", ") | ||
|
||
val callFunc = s"$objectName.$functionName($argString)" | ||
val (argCode, argString, resultIsNull) = prepareArguments(ctx, ev) | ||
|
||
val setIsNull = if (propagateNull && arguments.nonEmpty) { | ||
s"boolean ${ev.isNull} = ${argGen.map(_.isNull).mkString(" || ")};" | ||
} else { | ||
s"boolean ${ev.isNull} = false;" | ||
} | ||
val callFunc = s"$objectName.$functionName($argString)" | ||
|
||
// If the function can return null, we do an extra check to make sure our null bit is still set | ||
// correctly. | ||
|
@@ -82,8 +149,8 @@ case class StaticInvoke( | |
} | ||
|
||
val code = s""" | ||
${argGen.map(_.code).mkString("\n")} | ||
$setIsNull | ||
$argCode | ||
boolean ${ev.isNull} = $resultIsNull; | ||
final $javaType ${ev.value} = ${ev.isNull} ? ${ctx.defaultValue(dataType)} : $callFunc; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that's right. I modified. Thanks. |
||
$postNullCheck | ||
""" | ||
|
@@ -103,13 +170,15 @@ case class StaticInvoke( | |
* @param functionName The name of the method to call. | ||
* @param dataType The expected return type of the function. | ||
* @param arguments An optional list of expressions, whos evaluation will be passed to the function. | ||
* @param propagateNull When true, and any of the arguments is null, null will be returned instead | ||
* of calling the function. | ||
*/ | ||
case class Invoke( | ||
targetObject: Expression, | ||
functionName: String, | ||
dataType: DataType, | ||
arguments: Seq[Expression] = Nil, | ||
propagateNull: Boolean = true) extends Expression with NonSQLExpression { | ||
propagateNull: Boolean = true) extends InvokeLike { | ||
|
||
override def nullable: Boolean = true | ||
override def children: Seq[Expression] = targetObject +: arguments | ||
|
@@ -131,8 +200,8 @@ case class Invoke( | |
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { | ||
val javaType = ctx.javaType(dataType) | ||
val obj = targetObject.genCode(ctx) | ||
val argGen = arguments.map(_.genCode(ctx)) | ||
val argString = argGen.map(_.value).mkString(", ") | ||
|
||
val (argCode, argString, resultIsNull) = prepareArguments(ctx, ev) | ||
|
||
val returnPrimitive = method.isDefined && method.get.getReturnType.isPrimitive | ||
val needTryCatch = method.isDefined && method.get.getExceptionTypes.nonEmpty | ||
|
@@ -164,23 +233,20 @@ case class Invoke( | |
""" | ||
} | ||
|
||
val setIsNull = if (propagateNull && arguments.nonEmpty) { | ||
s"boolean ${ev.isNull} = ${obj.isNull} || ${argGen.map(_.isNull).mkString(" || ")};" | ||
} else { | ||
s"boolean ${ev.isNull} = ${obj.isNull};" | ||
} | ||
|
||
// If the function can return null, we do an extra check to make sure our null bit is still set | ||
// correctly. | ||
val postNullCheck = if (ctx.defaultValue(dataType) == "null") { | ||
s"${ev.isNull} = ${ev.value} == null;" | ||
} else { | ||
"" | ||
} | ||
|
||
val code = s""" | ||
${obj.code} | ||
${argGen.map(_.code).mkString("\n")} | ||
$setIsNull | ||
if (!${obj.isNull}) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Anyway, I think this new code looks clearer than before, what do you think? @ueshin There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I agree with you. Thank you for your suggestion. |
||
$argCode | ||
} | ||
boolean ${ev.isNull} = ${obj.isNull} || $resultIsNull; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this assumes
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm afraid not because if evaluating arguments is split to some methods, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I modified the code as you mentioned. But I'm sorry, I didn't understand what you want to do.
Could you let me know if you have other thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This is what I mean. We can generate code first and see if it exceeds the 64kb limitation, so logically we do have a chance to use local variables. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. Let's see if it exceeds the limit or not, using local variables. |
||
$javaType ${ev.value} = ${ctx.defaultValue(dataType)}; | ||
if (!${ev.isNull}) { | ||
$evaluate | ||
|
@@ -223,10 +289,10 @@ case class NewInstance( | |
arguments: Seq[Expression], | ||
propagateNull: Boolean, | ||
dataType: DataType, | ||
outerPointer: Option[() => AnyRef]) extends Expression with NonSQLExpression { | ||
outerPointer: Option[() => AnyRef]) extends InvokeLike { | ||
private val className = cls.getName | ||
|
||
override def nullable: Boolean = propagateNull | ||
override def nullable: Boolean = needNullCheck | ||
|
||
override def children: Seq[Expression] = arguments | ||
|
||
|
@@ -245,51 +311,35 @@ case class NewInstance( | |
|
||
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { | ||
val javaType = ctx.javaType(dataType) | ||
val argIsNulls = ctx.freshName("argIsNulls") | ||
ctx.addMutableState("boolean[]", argIsNulls, | ||
s"$argIsNulls = new boolean[${arguments.size}];") | ||
val argValues = arguments.zipWithIndex.map { case (e, i) => | ||
val argValue = ctx.freshName("argValue") | ||
ctx.addMutableState(ctx.javaType(e.dataType), argValue, "") | ||
argValue | ||
} | ||
|
||
val argCodes = arguments.zipWithIndex.map { case (e, i) => | ||
val expr = e.genCode(ctx) | ||
expr.code + s""" | ||
$argIsNulls[$i] = ${expr.isNull}; | ||
${argValues(i)} = ${expr.value}; | ||
""" | ||
} | ||
val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes) | ||
val (argCode, argString, resultIsNull) = prepareArguments(ctx, ev) | ||
|
||
val outer = outerPointer.map(func => Literal.fromObject(func()).genCode(ctx)) | ||
|
||
var isNull = ev.isNull | ||
val setIsNull = if (propagateNull && arguments.nonEmpty) { | ||
s""" | ||
boolean $isNull = false; | ||
for (int idx = 0; idx < ${arguments.length}; idx++) { | ||
if ($argIsNulls[idx]) { $isNull = true; break; } | ||
} | ||
""" | ||
val prepareIsNull = if (needNullCheck) { | ||
s"boolean $isNull = $resultIsNull;" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. one more optimization: we can just write There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I see what you mean, modified. Thanks. |
||
} else { | ||
isNull = "false" | ||
"" | ||
} | ||
|
||
val constructorCall = outer.map { gen => | ||
s"""${gen.value}.new ${cls.getSimpleName}(${argValues.mkString(", ")})""" | ||
s"${gen.value}.new ${cls.getSimpleName}($argString)" | ||
}.getOrElse { | ||
s"new $className(${argValues.mkString(", ")})" | ||
s"new $className($argString)" | ||
} | ||
|
||
val code = s""" | ||
$argCode | ||
${outer.map(_.code).getOrElse("")} | ||
$setIsNull | ||
final $javaType ${ev.value} = $isNull ? ${ctx.defaultValue(javaType)} : $constructorCall; | ||
""" | ||
$prepareIsNull | ||
""" + | ||
(if (needNullCheck) { | ||
s"final $javaType ${ev.value} = $isNull ? ${ctx.defaultValue(javaType)} : $constructorCall;" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @viirya I think we do need because if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or am I misunderstanding what you mean..? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, I see. This is correct. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I see. Thanks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it necessary? I'm pretty sure javac can optimize There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we use janino instead of javac? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry I mean "java compiler"... I think janino is smart enough about it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah. I go to check janino source code. It has the optimization: https://github.com/janino-compiler/janino/blob/22a7787e62037049e337cb1ce4064c29a4856022/janino/src/main/java/org/codehaus/janino/UnitCompiler.java#L4247. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, updated. Thanks. |
||
} else { | ||
s"final $javaType ${ev.value} = $constructorCall;" | ||
}) | ||
ev.copy(code = code, isNull = isNull) | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add comment for this method? It is not simple enough to skip the comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1