From 47af257666aa28e347792737b0d6c64efb35eb94 Mon Sep 17 00:00:00 2001 From: Yuan Date: Wed, 25 May 2022 18:49:14 +0800 Subject: [PATCH] [NSE-581] Improve GetArrayItem(Split()) performance (#933) * implement splitpart Signed-off-by: Yuan Zhou * Recognize GetArrayItem with StringSplit and replace them with ColumnarStringSplitPart * Fix found issues * move code to spark321 shim layer Signed-off-by: Yuan Zhou * remove splitpart Signed-off-by: Yuan Zhou * fix dataType check Signed-off-by: Yuan Zhou * adding FloatType in ColumnarLiteral Signed-off-by: Yuan Zhou * Revert "adding FloatType in ColumnarLiteral" This reverts commit 4cda7d585e2573d0bb900667391da5b11cb4a48a. Co-authored-by: philo --- .../ColumnarExpressionConverter.scala | 29 +++++++++++++++++++ .../expression/ColumnarTernaryOperator.scala | 13 +++++---- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarExpressionConverter.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarExpressionConverter.scala index 9206be146..a9832f2c1 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarExpressionConverter.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarExpressionConverter.scala @@ -387,6 +387,26 @@ object ColumnarExpressionConverter extends Logging { r.scale, convertBoundRefToAttrRef = convertBoundRefToAttrRef), expr) + case getArrayItem: GetArrayItem => + getArrayItem.child match { + case strSplit: StringSplit => + ColumnarTernaryOperator.create( + replaceWithColumnarExpression( + strSplit.str, + attributeSeq, + convertBoundRefToAttrRef = convertBoundRefToAttrRef), + replaceWithColumnarExpression( + strSplit.regex, + convertBoundRefToAttrRef = convertBoundRefToAttrRef), + replaceWithColumnarExpression( + getArrayItem.ordinal, + convertBoundRefToAttrRef = convertBoundRefToAttrRef), + new StringSplit(strSplit.str, strSplit.regex, getArrayItem.ordinal)) + case other => + throw new UnsupportedOperationException( + s" --> ${other.getClass} | ${other} is not currently" + + s" supported as child of GetArrayItem.") + } case b: BinaryExpression => logInfo(s"${expr.getClass} ${expr} is supported, no_cal is $check_if_no_calculation.") ColumnarBinaryExpression.create( @@ -464,6 +484,15 @@ object ColumnarExpressionConverter extends Logging { return true case c: Concat => c.children.map(containsSubquery).exists(_ == true) + case getArrayItem: GetArrayItem => + getArrayItem.child match { + case strSplit: StringSplit => + strSplit.children.map(containsSubquery).exists(_ == true) + case other => + throw new UnsupportedOperationException( + s" --> ${other.getClass} | ${other} is not currently" + + s" supported as child of GetArrayItem.") + } case b: BinaryExpression => containsSubquery(b.left) || containsSubquery(b.right) case s: String2TrimExpression => diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala index 423251ba6..284c518cd 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala @@ -73,7 +73,7 @@ class ColumnarSubString(str: Expression, pos: Expression, len: Expression, origi } // StringSplit, not functionality ready, need array type support. -class ColumnarStringSplit(child: Expression, regex: Expression, +class ColumnarStringSplitPart(child: Expression, regex: Expression, limit: Expression, original: Expression) extends StringSplit(child: Expression, regex: Expression, limit: Expression) @@ -86,11 +86,12 @@ class ColumnarStringSplit(child: Expression, regex: Expression, val supportedTypes = List( StringType ) - if (supportedTypes.indexOf(child.dataType) == -1) { + if (supportedTypes.indexOf(dataType) == -1) { throw new UnsupportedOperationException( - s"${child.dataType} is not supported in ColumnarStringSplit.") + s"${child} | ${child.dataType} is not supported in ColumnarStringSplitPart.") } } + override def dataType: DataType = StringType override def doColumnarCodeGen(args: java.lang.Object) : (TreeNode, ArrowType) = { @@ -101,7 +102,7 @@ class ColumnarStringSplit(child: Expression, regex: Expression, val (limit_node, limitType): (TreeNode, ArrowType) = limit.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args) - val resultType = new ArrowType.Bool() + val resultType = new ArrowType.Utf8() val funcNode = TreeBuilder.makeFunction( "split_part", Lists.newArrayList(child_node, regex_node, @@ -271,8 +272,8 @@ object ColumnarTernaryOperator { case ss: Substring => new ColumnarSubString(src, arg1, arg2, ss) // Currently not supported. -// case a: StringSplit => -// new ColumnarStringSplit(str, a.regex, a.limit, a) + case ssp: StringSplit => + new ColumnarStringSplitPart(src, arg1, arg2, ssp) case st: StringTranslate => new ColumnarStringTranslate(src, arg1, arg2, st) case sl: StringLocate =>