Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

[NSE-581] Improve GetArrayItem(Split()) performance #933

Merged
merged 8 commits into from
May 25, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,21 @@ object ColumnarExpressionConverter extends Logging {
sr.replaceExpr,
convertBoundRefToAttrRef = convertBoundRefToAttrRef),
expr)
case sr: StringSplitPart =>
check_if_no_calculation = false
logWarning(s"${expr.getClass} ${expr} is supported, no_cal is $check_if_no_calculation.")
ColumnarTernaryOperator.create(
replaceWithColumnarExpression(
sr.str,
attributeSeq,
convertBoundRefToAttrRef = convertBoundRefToAttrRef),
replaceWithColumnarExpression(
sr.regex,
convertBoundRefToAttrRef = convertBoundRefToAttrRef),
replaceWithColumnarExpression(
sr.limit,
convertBoundRefToAttrRef = convertBoundRefToAttrRef),
expr)
case u: UnaryExpression =>
logInfo(s"${expr.getClass} ${expr} is supported, no_cal is $check_if_no_calculation.")
if (!u.isInstanceOf[CheckOverflow] || !u.child.isInstanceOf[Divide]) {
Expand Down Expand Up @@ -387,6 +402,29 @@ object ColumnarExpressionConverter extends Logging {
r.scale,
convertBoundRefToAttrRef = convertBoundRefToAttrRef),
expr)
case getArrayItem: GetArrayItem =>
getArrayItem.child match {
case strSplit: StringSplit =>
ColumnarTernaryOperator.create(
replaceWithColumnarExpression(
strSplit.str,
attributeSeq,
convertBoundRefToAttrRef = convertBoundRefToAttrRef),
replaceWithColumnarExpression(
strSplit.regex,
convertBoundRefToAttrRef = convertBoundRefToAttrRef),
// replaceWithColumnarExpression(
// strSplit.limit,
// convertBoundRefToAttrRef = convertBoundRefToAttrRef),
replaceWithColumnarExpression(
getArrayItem.ordinal,
convertBoundRefToAttrRef = convertBoundRefToAttrRef),
new StringSplitPart(strSplit.str, strSplit.regex, getArrayItem.ordinal, null))
case other =>
throw new UnsupportedOperationException(
s" --> ${other.getClass} | ${other} is not currently" +
s" supported as child of GetArrayItem.")
}
case b: BinaryExpression =>
logInfo(s"${expr.getClass} ${expr} is supported, no_cal is $check_if_no_calculation.")
ColumnarBinaryExpression.create(
Expand Down Expand Up @@ -441,6 +479,15 @@ object ColumnarExpressionConverter extends Logging {
return true
case c: Concat =>
c.children.map(containsSubquery).exists(_ == true)
case getArrayItem: GetArrayItem =>
getArrayItem.child match {
case strSplit: StringSplit =>
strSplit.children.map(containsSubquery).exists(_ == true)
case other =>
throw new UnsupportedOperationException(
s" --> ${other.getClass} | ${other} is not currently" +
s" supported as child of GetArrayItem.")
}
case b: BinaryExpression =>
containsSubquery(b.left) || containsSubquery(b.right)
case s: String2TrimExpression =>
Expand All @@ -460,6 +507,10 @@ object ColumnarExpressionConverter extends Logging {
containsSubquery(sr.srcExpr) ||
containsSubquery(sr.searchExpr) ||
containsSubquery(sr.replaceExpr)
case sr: StringSplitPart =>
containsSubquery(sr.str) ||
containsSubquery(sr.regex) ||
containsSubquery(sr.limit)
case expr =>
throw new UnsupportedOperationException(
s" --> ${expr.getClass} | ${expr} is not currently supported.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class ColumnarSubString(str: Expression, pos: Expression, len: Expression, origi
}

// StringSplit, not functionality ready, need array type support.
class ColumnarStringSplit(child: Expression, regex: Expression,
class ColumnarStringSplitPart(child: Expression, regex: Expression,
limit: Expression, original: Expression)
extends StringSplit(child: Expression,
regex: Expression, limit: Expression)
Expand Down Expand Up @@ -101,7 +101,7 @@ class ColumnarStringSplit(child: Expression, regex: Expression,
val (limit_node, limitType): (TreeNode, ArrowType) =
limit.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)

val resultType = new ArrowType.Bool()
val resultType = new ArrowType.Utf8()
val funcNode =
TreeBuilder.makeFunction(
"split_part", Lists.newArrayList(child_node, regex_node,
Expand Down Expand Up @@ -271,8 +271,8 @@ object ColumnarTernaryOperator {
case ss: Substring =>
new ColumnarSubString(src, arg1, arg2, ss)
// Currently not supported.
// case a: StringSplit =>
// new ColumnarStringSplit(str, a.regex, a.limit, a)
case ssp: StringSplitPart =>
new ColumnarStringSplitPart(src, arg1, arg2, ssp)
case st: StringTranslate =>
new ColumnarStringTranslate(src, arg1, arg2, st)
case sl: StringLocate =>
Expand Down
Loading