From 5f322feebc3e0c21a2fd289b22f62702c2834ab6 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Thu, 9 Jun 2022 23:26:03 +0800 Subject: [PATCH 1/6] implement lpad/rpad Signed-off-by: Yuan Zhou --- .../ColumnarExpressionConverter.scala | 20 +++++ .../expression/ColumnarTernaryOperator.scala | 74 +++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarExpressionConverter.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarExpressionConverter.scala index 383d2fbe1..d8c7a3ff2 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarExpressionConverter.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarExpressionConverter.scala @@ -323,6 +323,26 @@ object ColumnarExpressionConverter extends Logging { convertBoundRefToAttrRef = convertBoundRefToAttrRef), expr ) + case slpad: StringLPad => + ColumnarTernaryOperator.create( + replaceWithColumnarExpression(slpad.str, attributeSeq, + convertBoundRefToAttrRef = convertBoundRefToAttrRef), + replaceWithColumnarExpression(slpad.len, attributeSeq, + convertBoundRefToAttrRef = convertBoundRefToAttrRef), + replaceWithColumnarExpression(slpad.pad, attributeSeq, + convertBoundRefToAttrRef = convertBoundRefToAttrRef), + expr + ) + case srpad: StringRPad => + ColumnarTernaryOperator.create( + replaceWithColumnarExpression(srpad.str, attributeSeq, + convertBoundRefToAttrRef = convertBoundRefToAttrRef), + replaceWithColumnarExpression(srpad.len, attributeSeq, + convertBoundRefToAttrRef = convertBoundRefToAttrRef), + replaceWithColumnarExpression(srpad.pad, attributeSeq, + convertBoundRefToAttrRef = convertBoundRefToAttrRef), + expr + ) case sr: StringReplace => check_if_no_calculation = false logInfo(s"${expr.getClass} ${expr} is supported, no_cal is $check_if_no_calculation.") diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala index bec56bb8f..34dfea96c 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala @@ -203,6 +203,76 @@ class ColumnarRegExpExtract(subject: Expression, regexp: Expression, idx: Expres } } +class ColumnarStringLPad(str: Expression, len: Expression, pad: Expression, + original: Expression) extends StringLPad(str: Expression, + len: Expression, pad: Expression) with ColumnarExpression { + + buildCheck + + def buildCheck: Unit = { + val supportedType = List(StringType) + if (supportedType.indexOf(str.dataType) == -1) { + throw new RuntimeException("Only string type is expected!") + } + + if (!pad.isInstanceOf[Literal]) { + throw new UnsupportedOperationException("Only literal regexp" + + " is supported in ColumnarRegExpExtract by now!") + } + } + + override def supportColumnarCodegen(args: java.lang.Object): Boolean = { + false + } + + override def doColumnarCodeGen(args: Object): (TreeNode, ArrowType) = { + val (str_node, _): (TreeNode, ArrowType) = + str.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args) + val (len_node, _): (TreeNode, ArrowType) = + len.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args) + val (pad_node, _): (TreeNode, ArrowType) = + pad.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args) + val resultType = new ArrowType.Utf8() + (TreeBuilder.makeFunction("lpad", + Lists.newArrayList(str_node, len_node, pad_node), resultType), resultType) + } +} + +class ColumnarStringRPad(str: Expression, len: Expression, pad: Expression, + original: Expression) extends StringRPad(str: Expression, + len: Expression, pad: Expression) with ColumnarExpression { + + buildCheck + + def buildCheck: Unit = { + val supportedType = List(StringType) + if (supportedType.indexOf(str.dataType) == -1) { + throw new RuntimeException("Only string type is expected!") + } + + if (!pad.isInstanceOf[Literal]) { + throw new UnsupportedOperationException("Only literal regexp" + + " is supported in ColumnarRegExpExtract by now!") + } + } + + override def supportColumnarCodegen(args: java.lang.Object): Boolean = { + false + } + + override def doColumnarCodeGen(args: Object): (TreeNode, ArrowType) = { + val (str_node, _): (TreeNode, ArrowType) = + str.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args) + val (len_node, _): (TreeNode, ArrowType) = + len.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args) + val (pad_node, _): (TreeNode, ArrowType) = + pad.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args) + val resultType = new ArrowType.Utf8() + (TreeBuilder.makeFunction("rpad", + Lists.newArrayList(str_node, len_node, pad_node), resultType), resultType) + } +} + class ColumnarSubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: Expression, original: Expression) extends SubstringIndex(strExpr, delimExpr, countExpr) with ColumnarExpression { @@ -310,6 +380,10 @@ object ColumnarTernaryOperator { new ColumnarStringLocate(src, arg1, arg2, sl) case re: RegExpExtract => new ColumnarRegExpExtract(src, arg1, arg2, re) + case slpad: StringLPad => + new ColumnarStringLPad(src, arg1, arg2, slpad) + case slpad: StringRPad => + new ColumnarStringRPad(src, arg1, arg2, slpad) case substrIndex: SubstringIndex => new ColumnarSubstringIndex(src, arg1, arg2, substrIndex) case _: StringReplace => From 8a512853341287fa4e0b7229ebdbaac64b31101d Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Sun, 12 Jun 2022 17:13:19 +0800 Subject: [PATCH 2/6] check arrow Signed-off-by: Yuan Zhou --- arrow-data-source/script/build_arrow.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-data-source/script/build_arrow.sh b/arrow-data-source/script/build_arrow.sh index d8ec40128..68e78837e 100755 --- a/arrow-data-source/script/build_arrow.sh +++ b/arrow-data-source/script/build_arrow.sh @@ -62,7 +62,7 @@ echo "ARROW_SOURCE_DIR=${ARROW_SOURCE_DIR}" echo "ARROW_INSTALL_DIR=${ARROW_INSTALL_DIR}" mkdir -p $ARROW_SOURCE_DIR mkdir -p $ARROW_INSTALL_DIR -git clone https://github.com/oap-project/arrow.git --branch arrow-4.0.0-oap $ARROW_SOURCE_DIR +git clone https://github.com/zhouyuan/arrow.git --branch wip_lpad $ARROW_SOURCE_DIR pushd $ARROW_SOURCE_DIR cmake ./cpp \ From 34bdf92fd62c62978247a30b015141de415bf4df Mon Sep 17 00:00:00 2001 From: philo Date: Mon, 13 Jun 2022 17:02:22 +0800 Subject: [PATCH 3/6] Let ColumnarIn fallback for non-literal type --- .../scala/com/intel/oap/expression/ColumnarInOperator.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarInOperator.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarInOperator.scala index 77328a709..d30d3cda9 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarInOperator.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarInOperator.scala @@ -48,6 +48,11 @@ class ColumnarIn(value: Expression, list: Seq[Expression], original: Expression) throw new UnsupportedOperationException( s"${value.dataType} is not supported in ColumnarIn.") } + if (list.map(_.isInstanceOf[Literal]).exists(_ == false)) { + throw new UnsupportedOperationException( + "Only Literal Type is supported for the input list!" + ) + } } override def doColumnarCodeGen(args: java.lang.Object): (TreeNode, ArrowType) = { From e7c9787da3851bb9459ceb4ad9290d9894cf4753 Mon Sep 17 00:00:00 2001 From: philo Date: Mon, 13 Jun 2022 17:09:58 +0800 Subject: [PATCH 4/6] Check subquery --- .../intel/oap/expression/ColumnarExpressionConverter.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarExpressionConverter.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarExpressionConverter.scala index d8c7a3ff2..d049e076e 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarExpressionConverter.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarExpressionConverter.scala @@ -551,6 +551,10 @@ object ColumnarExpressionConverter extends Logging { containsSubquery(sr.replaceExpr) case conv: Conv => conv.children.map(containsSubquery).exists(_ == true) + case lpad: StringLPad => + lpad.children.map(containsSubquery).exists(_ == true) + case rpad: StringRPad => + rpad.children.map(containsSubquery).exists(_ == true) case expr: ScalaUDF if (expr.udfName match { case Some(name) => ColumnarUDF.isSupportedUDF(name) From 0dcf87bd07e8124ef22de2f88f04c2a4192388b4 Mon Sep 17 00:00:00 2001 From: philo Date: Mon, 13 Jun 2022 18:40:37 +0800 Subject: [PATCH 5/6] Check supportColumnarCodegen for SMJ's join key and condition --- .../execution/ColumnarSortMergeJoinExec.scala | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarSortMergeJoinExec.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarSortMergeJoinExec.scala index d0e4d018c..a6f2290fa 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarSortMergeJoinExec.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarSortMergeJoinExec.scala @@ -348,7 +348,15 @@ case class ColumnarSortMergeJoinExec( // build check for condition val conditionExpr: Expression = condition.orNull if (conditionExpr != null) { - ColumnarExpressionConverter.replaceWithColumnarExpression(conditionExpr) + val columnarConditionExpr = + ColumnarExpressionConverter.replaceWithColumnarExpression(conditionExpr) + val supportCodegen = + columnarConditionExpr.asInstanceOf[ColumnarExpression].supportColumnarCodegen(null) + // Columnar SMJ only has codegen version of implementation. + if (!supportCodegen) { + throw new UnsupportedOperationException( + "Condition expression is not fully supporting codegen!") + } } // build check types for (attr <- left.output) { @@ -372,12 +380,24 @@ case class ColumnarSortMergeJoinExec( // build check for expr if (leftKeys != null) { for (expr <- leftKeys) { - ColumnarExpressionConverter.replaceWithColumnarExpression(expr) + val columnarExpr = ColumnarExpressionConverter.replaceWithColumnarExpression(expr) + val supportCodegen = + columnarExpr.asInstanceOf[ColumnarExpression].supportColumnarCodegen(null) + if (!supportCodegen) { + throw new UnsupportedOperationException( + "Condition expression is not fully supporting codegen!") + } } } if (rightKeys != null) { for (expr <- rightKeys) { - ColumnarExpressionConverter.replaceWithColumnarExpression(expr) + val columnarExpr = ColumnarExpressionConverter.replaceWithColumnarExpression(expr) + val supportCodegen = + columnarExpr.asInstanceOf[ColumnarExpression].supportColumnarCodegen(null) + if (!supportCodegen) { + throw new UnsupportedOperationException( + "Condition expression is not fully supporting codegen!") + } } } } From dd295e9c7cb80268d5bf2ed52c4912dce0e1a2ec Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Tue, 14 Jun 2022 23:36:56 +0800 Subject: [PATCH 6/6] Revert "check arrow" This reverts commit 8a512853341287fa4e0b7229ebdbaac64b31101d. --- arrow-data-source/script/build_arrow.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-data-source/script/build_arrow.sh b/arrow-data-source/script/build_arrow.sh index 68e78837e..d8ec40128 100755 --- a/arrow-data-source/script/build_arrow.sh +++ b/arrow-data-source/script/build_arrow.sh @@ -62,7 +62,7 @@ echo "ARROW_SOURCE_DIR=${ARROW_SOURCE_DIR}" echo "ARROW_INSTALL_DIR=${ARROW_INSTALL_DIR}" mkdir -p $ARROW_SOURCE_DIR mkdir -p $ARROW_INSTALL_DIR -git clone https://github.com/zhouyuan/arrow.git --branch wip_lpad $ARROW_SOURCE_DIR +git clone https://github.com/oap-project/arrow.git --branch arrow-4.0.0-oap $ARROW_SOURCE_DIR pushd $ARROW_SOURCE_DIR cmake ./cpp \