-
Notifications
You must be signed in to change notification settings - Fork 28.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-18467][SQL] Extracts method for preparing arguments from StaticInvoke, Invoke and NewInstance and modify to short circuit if arguments have null when needNullCheck == true
.
#15901
Changes from 3 commits
9bf9aa9
28f6200
2f30f53
9ac3f28
5ad6966
a0ac177
757d33e
240fde4
6f6e0b3
2bd6e50
bcb93db
d448b60
8894a96
ca4558f
4d9a037
ebb1241
99a59b2
243888a
e12a9bd
2c52d91
43d2693
bd9c09f
501095f
1baac55
831c521
0b210f8
f8acda6
fe2871c
c88a1ff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,73 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCo | |
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} | ||
import org.apache.spark.sql.types._ | ||
|
||
/** | ||
* Common base class for [[StaticInvoke]], [[Invoke]], and [[NewInstance]]. | ||
*/ | ||
trait InvokeLike extends Expression { | ||
|
||
def arguments: Seq[Expression] | ||
|
||
def propagateNull: Boolean | ||
|
||
def prepareArguments(ctx: CodegenContext, ev: ExprCode): (String, String, String) = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add comment for this method? It is not simple enough to skip the comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, removed. |
||
|
||
val argsHaveNull = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this variable name looks like a boolean flag, can you think of a better name? |
||
if (propagateNull && arguments.exists(_.nullable)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
||
val argsHaveNull = ctx.freshName("argsHaveNull") | ||
ctx.addMutableState("boolean", argsHaveNull, "") | ||
argsHaveNull | ||
} else { | ||
"" | ||
} | ||
val argValues = arguments.zipWithIndex.map { case (e, i) => | ||
val argValue = ctx.freshName("argValue") | ||
ctx.addMutableState(ctx.javaType(e.dataType), argValue, "") | ||
argValue | ||
} | ||
|
||
val argCodes = | ||
if (propagateNull && arguments.exists(_.nullable)) { | ||
s"$argsHaveNull = false;" +: | ||
arguments.zipWithIndex.map { case (e, i) => | ||
val expr = e.genCode(ctx) | ||
s""" | ||
if (!$argsHaveNull) { | ||
${expr.code} | ||
""" + | ||
(if (e.nullable) { | ||
s""" | ||
$argsHaveNull = ${expr.isNull}; | ||
${argValues(i)} = ${expr.value}; | ||
""" | ||
} else { | ||
s"${argValues(i)} = ${expr.value};" | ||
}) + | ||
""" | ||
} | ||
""" | ||
} | ||
} else { | ||
arguments.zipWithIndex.map { case (e, i) => | ||
val expr = e.genCode(ctx) | ||
s""" | ||
${expr.code} | ||
${argValues(i)} = ${expr.value}; | ||
""" | ||
} | ||
} | ||
val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes) | ||
|
||
val setIsNull = if (propagateNull && arguments.exists(_.nullable)) { | ||
s"${ev.isNull} = ${ev.isNull} || $argsHaveNull;" | ||
} else { | ||
"" | ||
} | ||
|
||
(argCode, argValues.mkString(", "), setIsNull) | ||
} | ||
} | ||
|
||
/** | ||
* Invokes a static function, returning the result. By default, any of the arguments being null | ||
* will result in returning null instead of calling the function. | ||
|
@@ -50,7 +117,7 @@ case class StaticInvoke( | |
dataType: DataType, | ||
functionName: String, | ||
arguments: Seq[Expression] = Nil, | ||
propagateNull: Boolean = true) extends Expression with NonSQLExpression { | ||
propagateNull: Boolean = true) extends Expression with InvokeLike with NonSQLExpression { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not make There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense, I'll modify them. |
||
|
||
val objectName = staticObject.getName.stripSuffix("$") | ||
|
||
|
@@ -62,16 +129,10 @@ case class StaticInvoke( | |
|
||
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { | ||
val javaType = ctx.javaType(dataType) | ||
val argGen = arguments.map(_.genCode(ctx)) | ||
val argString = argGen.map(_.value).mkString(", ") | ||
|
||
val callFunc = s"$objectName.$functionName($argString)" | ||
val (argCode, argString, setIsNull) = prepareArguments(ctx, ev) | ||
|
||
val setIsNull = if (propagateNull && arguments.nonEmpty) { | ||
s"boolean ${ev.isNull} = ${argGen.map(_.isNull).mkString(" || ")};" | ||
} else { | ||
s"boolean ${ev.isNull} = false;" | ||
} | ||
val callFunc = s"$objectName.$functionName($argString)" | ||
|
||
// If the function can return null, we do an extra check to make sure our null bit is still set | ||
// correctly. | ||
|
@@ -82,7 +143,8 @@ case class StaticInvoke( | |
} | ||
|
||
val code = s""" | ||
${argGen.map(_.code).mkString("\n")} | ||
$argCode | ||
boolean ${ev.isNull} = false; | ||
$setIsNull | ||
final $javaType ${ev.value} = ${ev.isNull} ? ${ctx.defaultValue(dataType)} : $callFunc; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that's right. I modified. Thanks. |
||
$postNullCheck | ||
|
@@ -109,7 +171,7 @@ case class Invoke( | |
functionName: String, | ||
dataType: DataType, | ||
arguments: Seq[Expression] = Nil, | ||
propagateNull: Boolean = true) extends Expression with NonSQLExpression { | ||
propagateNull: Boolean = true) extends Expression with InvokeLike with NonSQLExpression { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment of |
||
|
||
override def nullable: Boolean = true | ||
override def children: Seq[Expression] = targetObject +: arguments | ||
|
@@ -131,8 +193,8 @@ case class Invoke( | |
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { | ||
val javaType = ctx.javaType(dataType) | ||
val obj = targetObject.genCode(ctx) | ||
val argGen = arguments.map(_.genCode(ctx)) | ||
val argString = argGen.map(_.value).mkString(", ") | ||
|
||
val (argCode, argString, setIsNull) = prepareArguments(ctx, ev) | ||
|
||
val returnPrimitive = method.isDefined && method.get.getReturnType.isPrimitive | ||
val needTryCatch = method.isDefined && method.get.getExceptionTypes.nonEmpty | ||
|
@@ -164,12 +226,6 @@ case class Invoke( | |
""" | ||
} | ||
|
||
val setIsNull = if (propagateNull && arguments.nonEmpty) { | ||
s"boolean ${ev.isNull} = ${obj.isNull} || ${argGen.map(_.isNull).mkString(" || ")};" | ||
} else { | ||
s"boolean ${ev.isNull} = ${obj.isNull};" | ||
} | ||
|
||
// If the function can return null, we do an extra check to make sure our null bit is still set | ||
// correctly. | ||
val postNullCheck = if (ctx.defaultValue(dataType) == "null") { | ||
|
@@ -179,7 +235,8 @@ case class Invoke( | |
} | ||
val code = s""" | ||
${obj.code} | ||
${argGen.map(_.code).mkString("\n")} | ||
$argCode | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we don't need to evaluate the arguments if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cloud-fan Thanks, I updated. |
||
boolean ${ev.isNull} = ${obj.isNull}; | ||
$setIsNull | ||
$javaType ${ev.value} = ${ctx.defaultValue(dataType)}; | ||
if (!${ev.isNull}) { | ||
|
@@ -223,10 +280,10 @@ case class NewInstance( | |
arguments: Seq[Expression], | ||
propagateNull: Boolean, | ||
dataType: DataType, | ||
outerPointer: Option[() => AnyRef]) extends Expression with NonSQLExpression { | ||
outerPointer: Option[() => AnyRef]) extends Expression with InvokeLike with NonSQLExpression { | ||
private val className = cls.getName | ||
|
||
override def nullable: Boolean = propagateNull | ||
override def nullable: Boolean = propagateNull && arguments.exists(_.nullable) | ||
|
||
override def children: Seq[Expression] = arguments | ||
|
||
|
@@ -245,51 +302,36 @@ case class NewInstance( | |
|
||
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { | ||
val javaType = ctx.javaType(dataType) | ||
val argIsNulls = ctx.freshName("argIsNulls") | ||
ctx.addMutableState("boolean[]", argIsNulls, | ||
s"$argIsNulls = new boolean[${arguments.size}];") | ||
val argValues = arguments.zipWithIndex.map { case (e, i) => | ||
val argValue = ctx.freshName("argValue") | ||
ctx.addMutableState(ctx.javaType(e.dataType), argValue, "") | ||
argValue | ||
} | ||
|
||
val argCodes = arguments.zipWithIndex.map { case (e, i) => | ||
val expr = e.genCode(ctx) | ||
expr.code + s""" | ||
$argIsNulls[$i] = ${expr.isNull}; | ||
${argValues(i)} = ${expr.value}; | ||
""" | ||
} | ||
val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes) | ||
val (argCode, argString, setIsNull) = prepareArguments(ctx, ev) | ||
|
||
val outer = outerPointer.map(func => Literal.fromObject(func()).genCode(ctx)) | ||
|
||
var isNull = ev.isNull | ||
val setIsNull = if (propagateNull && arguments.nonEmpty) { | ||
s""" | ||
boolean $isNull = false; | ||
for (int idx = 0; idx < ${arguments.length}; idx++) { | ||
if ($argIsNulls[idx]) { $isNull = true; break; } | ||
} | ||
""" | ||
val prepareIsNull = if (propagateNull && arguments.exists(_.nullable)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops, I missed it, fixed. |
||
s"boolean $isNull = false;" | ||
} else { | ||
isNull = "false" | ||
"" | ||
} | ||
|
||
val constructorCall = outer.map { gen => | ||
s"""${gen.value}.new ${cls.getSimpleName}(${argValues.mkString(", ")})""" | ||
s"${gen.value}.new ${cls.getSimpleName}($argString)" | ||
}.getOrElse { | ||
s"new $className(${argValues.mkString(", ")})" | ||
s"new $className($argString)" | ||
} | ||
|
||
val code = s""" | ||
$argCode | ||
${outer.map(_.code).getOrElse("")} | ||
$prepareIsNull | ||
$setIsNull | ||
final $javaType ${ev.value} = $isNull ? ${ctx.defaultValue(javaType)} : $constructorCall; | ||
""" | ||
""" + | ||
(if (propagateNull && arguments.exists(_.nullable)) { | ||
s"final $javaType ${ev.value} = $isNull ? ${ctx.defaultValue(javaType)} : $constructorCall;" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @viirya I think we do need because if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or am I misunderstanding what you mean..? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, I see. This is correct. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I see. Thanks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it necessary? I'm pretty sure javac can optimize There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we use janino instead of javac? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry I mean "java compiler"... I think janino is smart enough about it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah. I go to check janino source code. It has the optimization: https://github.com/janino-compiler/janino/blob/22a7787e62037049e337cb1ce4064c29a4856022/janino/src/main/java/org/codehaus/janino/UnitCompiler.java#L4247. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, updated. Thanks. |
||
} else { | ||
s"final $javaType ${ev.value} = $constructorCall;" | ||
}) | ||
ev.copy(code = code, isNull = isNull) | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it need to extend
Expression
?