Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
[NSE-581] Improve GetArrayItem(Split()) performance (#933)
Browse files Browse the repository at this point in the history
* implement splitpart

Signed-off-by: Yuan Zhou <[email protected]>

* Recognize GetArrayItem with StringSplit and replace them with ColumnarStringSplitPart

* Fix found issues

* move code to spark321 shim layer

Signed-off-by: Yuan Zhou <[email protected]>

* remove splitpart

Signed-off-by: Yuan Zhou <[email protected]>

* fix dataType check

Signed-off-by: Yuan Zhou <[email protected]>

* adding FloatType in ColumnarLiteral

Signed-off-by: Yuan Zhou <[email protected]>

* Revert "adding FloatType in ColumnarLiteral"

This reverts commit 4cda7d5.

Co-authored-by: philo <[email protected]>
  • Loading branch information
zhouyuan and PHILO-HE authored May 25, 2022
1 parent d263ec5 commit 47af257
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,26 @@ object ColumnarExpressionConverter extends Logging {
r.scale,
convertBoundRefToAttrRef = convertBoundRefToAttrRef),
expr)
case getArrayItem: GetArrayItem =>
getArrayItem.child match {
case strSplit: StringSplit =>
ColumnarTernaryOperator.create(
replaceWithColumnarExpression(
strSplit.str,
attributeSeq,
convertBoundRefToAttrRef = convertBoundRefToAttrRef),
replaceWithColumnarExpression(
strSplit.regex,
convertBoundRefToAttrRef = convertBoundRefToAttrRef),
replaceWithColumnarExpression(
getArrayItem.ordinal,
convertBoundRefToAttrRef = convertBoundRefToAttrRef),
new StringSplit(strSplit.str, strSplit.regex, getArrayItem.ordinal))
case other =>
throw new UnsupportedOperationException(
s" --> ${other.getClass} | ${other} is not currently" +
s" supported as child of GetArrayItem.")
}
case b: BinaryExpression =>
logInfo(s"${expr.getClass} ${expr} is supported, no_cal is $check_if_no_calculation.")
ColumnarBinaryExpression.create(
Expand Down Expand Up @@ -464,6 +484,15 @@ object ColumnarExpressionConverter extends Logging {
return true
case c: Concat =>
c.children.map(containsSubquery).exists(_ == true)
case getArrayItem: GetArrayItem =>
getArrayItem.child match {
case strSplit: StringSplit =>
strSplit.children.map(containsSubquery).exists(_ == true)
case other =>
throw new UnsupportedOperationException(
s" --> ${other.getClass} | ${other} is not currently" +
s" supported as child of GetArrayItem.")
}
case b: BinaryExpression =>
containsSubquery(b.left) || containsSubquery(b.right)
case s: String2TrimExpression =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class ColumnarSubString(str: Expression, pos: Expression, len: Expression, origi
}

// StringSplit, not functionality ready, need array type support.
class ColumnarStringSplit(child: Expression, regex: Expression,
class ColumnarStringSplitPart(child: Expression, regex: Expression,
limit: Expression, original: Expression)
extends StringSplit(child: Expression,
regex: Expression, limit: Expression)
Expand All @@ -86,11 +86,12 @@ class ColumnarStringSplit(child: Expression, regex: Expression,
val supportedTypes = List(
StringType
)
if (supportedTypes.indexOf(child.dataType) == -1) {
if (supportedTypes.indexOf(dataType) == -1) {
throw new UnsupportedOperationException(
s"${child.dataType} is not supported in ColumnarStringSplit.")
s"${child} | ${child.dataType} is not supported in ColumnarStringSplitPart.")
}
}
override def dataType: DataType = StringType

override def doColumnarCodeGen(args: java.lang.Object)
: (TreeNode, ArrowType) = {
Expand All @@ -101,7 +102,7 @@ class ColumnarStringSplit(child: Expression, regex: Expression,
val (limit_node, limitType): (TreeNode, ArrowType) =
limit.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)

val resultType = new ArrowType.Bool()
val resultType = new ArrowType.Utf8()
val funcNode =
TreeBuilder.makeFunction(
"split_part", Lists.newArrayList(child_node, regex_node,
Expand Down Expand Up @@ -271,8 +272,8 @@ object ColumnarTernaryOperator {
case ss: Substring =>
new ColumnarSubString(src, arg1, arg2, ss)
// Currently not supported.
// case a: StringSplit =>
// new ColumnarStringSplit(str, a.regex, a.limit, a)
case ssp: StringSplit =>
new ColumnarStringSplitPart(src, arg1, arg2, ssp)
case st: StringTranslate =>
new ColumnarStringTranslate(src, arg1, arg2, st)
case sl: StringLocate =>
Expand Down

0 comments on commit 47af257

Please sign in to comment.