Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
[NSE-163] refresh uint tests (#201)
Browse files Browse the repository at this point in the history
* turn on wscg, window, SMJ and refresh ut

* fix cannot create columnar reader

* support case-when with multiple branches

* fix in/inset with null

* refine in/inset

* adding scala ut

Signed-off-by: Yuan Zhou <[email protected]>

* enable ut on Github

Co-authored-by: Yuan Zhou <[email protected]>
  • Loading branch information
rui-mo and zhouyuan authored Apr 1, 2021
1 parent 7e2a35a commit e80ad10
Show file tree
Hide file tree
Showing 229 changed files with 1,926 additions and 1,356 deletions.
47 changes: 47 additions & 0 deletions .github/workflows/unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,53 @@ jobs:
cd src
ctest -R
scala-unit-test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up JDK 1.8
uses: actions/setup-java@v1
with:
java-version: 1.8
- run: sudo swapoff -a
- run: free
- run: sudo apt-get update
- run: sudo apt-get install cmake
- run: sudo apt-get install libboost-all-dev
- name: Install Googletest
run: |
sudo apt-get install libgtest-dev
cd /usr/src/gtest
sudo cmake CMakeLists.txt -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=/usr/local
sudo make
sudo apt-get install google-mock
- name: Install Spark
run: |
cd /tmp
wget http://archive.apache.org/dist/spark/spark-3.0.0/spark-3.0.0-bin-hadoop2.7.tgz
tar -xf spark-3.0.0-bin-hadoop2.7.tgz
- name: Install OAP optimized Arrow (C++ libs)
run: |
cd /tmp
git clone https://github.com/oap-project/arrow.git
cd arrow && git checkout arrow-3.0.0-oap && cd cpp
mkdir build && cd build
cmake .. -DARROW_JNI=ON -DARROW_GANDIVA_JAVA=ON -DARROW_GANDIVA=ON -DARROW_PARQUET=ON -DARROW_HDFS=ON -DARROW_FILESYSTEM=ON -DARROW_WITH_SNAPPY=ON -DARROW_JSON=ON -DARROW_DATASET=ON -DARROW_WITH_LZ4=ON -DGTEST_ROOT=/usr/src/gtest && make -j2
sudo make install
cd ../../java
mvn clean install -B -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -P arrow-jni -am -Darrow.cpp.build.dir=/tmp/arrow/cpp/build/release/ -DskipTests -Dcheckstyle.skip
- name: Run unit tests
run: |
cd arrow-data-source
mvn clean install -DskipTests
cd ..
mvn clean package -am -pl native-sql-engine/core -DskipTests -Dbuild_arrow=OFF
cd native-sql-engine/core/
mvn test -DmembersOnlySuites=org.apache.spark.sql.travis -am -DfailIfNoTests=false -Dexec.skip=true -DargLine="-Dspark.test.home=/tmp/spark-3.0.0-bin-hadoop2.7" &> log-file.log
echo '#!/bin/bash' > grep.sh
echo "module_tested=0; module_should_test=1; tests_total=0; while read -r line; do num=\$(echo \"\$line\" | grep -o -E '[0-9]+'); tests_total=\$((tests_total+num)); done <<<\"\$(grep \"Total number of tests run:\" log-file.log)\"; succeed_total=0; while read -r line; do [[ \$line =~ [^0-9]*([0-9]+)\, ]]; num=\${BASH_REMATCH[1]}; succeed_total=\$((succeed_total+num)); let module_tested++; done <<<\"\$(grep \"succeeded\" log-file.log)\"; if test \$tests_total -eq \$succeed_total -a \$module_tested -eq \$module_should_test; then echo \"All unit tests succeed\"; else echo \"Unit tests failed\"; exit 1; fi" >> grep.sh
bash grep.sh
formatting-check:
name: Formatting Check
runs-on: ubuntu-latest
Expand Down
1 change: 1 addition & 0 deletions arrow-data-source/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@
<artifactId>scalastyle-maven-plugin</artifactId>
<version>1.0.0</version>
<configuration>
<skip>true</skip>
<verbose>false</verbose>
<failOnViolation>true</failOnViolation>
<includeTestSourceDirectory>false</includeTestSourceDirectory>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,24 +52,25 @@ case class ColumnarGuardRule(conf: SparkConf) extends Rule[SparkPlan] {
val enableColumnarSort = columnarConf.enableColumnarSort
val enableColumnarWindow = columnarConf.enableColumnarWindow
val enableColumnarSortMergeJoin = columnarConf.enableColumnarSortMergeJoin
val testing = columnarConf.isTesting

private def tryConvertToColumnar(plan: SparkPlan): Boolean = {
try {
val columnarPlan = plan match {
case plan: BatchScanExec =>
if (testing) {
// disable ColumnarBatchScanExec according to config
return false
}
new ColumnarBatchScanExec(plan.output, plan.scan)
case plan: FileSourceScanExec =>
if (plan.supportsColumnar) {
logWarning(
s"FileSourceScanExec ${plan.nodeName} supports columnar, " +
s"may causing columnar conversion exception")
return false
}
plan
case plan: InMemoryTableScanExec =>
if (plan.supportsColumnar) {
logWarning(
s"InMemoryTableScanExec ${plan.nodeName} supports columnar, " +
s"may causing columnar conversion exception")
return false
}
plan
case plan: ProjectExec =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,8 @@ case class ColumnarPreOverrides(conf: SparkConf) extends Rule[SparkPlan] {
logDebug(s"Columnar Processing for ${actualPlan.getClass} is under RowGuard.")
actualPlan.withNewChildren(actualPlan.children.map(replaceWithColumnarPlan))
case plan: BatchScanExec =>
if (testing) {
// disable ColumnarBatchScanExec according to config
plan
} else {
logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.")
new ColumnarBatchScanExec(plan.output, plan.scan)
}
logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.")
new ColumnarBatchScanExec(plan.output, plan.scan)
case plan: ProjectExec =>
val columnarChild = replaceWithColumnarPlan(plan.child)
logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,15 @@ class ColumnarEqualNull(left: Expression, right: Expression, original: Expressio
}
}

// EqualNullSafe: returns true if both are null, false if one of the them is null.
val resultType = new ArrowType.Bool()
val leftIsnotnullNode = TreeBuilder.makeFunction(
"isnotnull", Lists.newArrayList(left_node), resultType)
val rightIsnotnullNode = TreeBuilder.makeFunction(
"isnotnull", Lists.newArrayList(right_node), resultType)
val trueNode = TreeBuilder.makeLiteral(true.asInstanceOf[java.lang.Boolean])
val falseNode = TreeBuilder.makeLiteral(false.asInstanceOf[java.lang.Boolean])

var function = "equal"
val nanCheck = ColumnarPluginConfig.getConf.enableColumnarNaNCheck
if (nanCheck) {
Expand All @@ -212,9 +221,14 @@ class ColumnarEqualNull(left: Expression, right: Expression, original: Expressio
case _ =>
}
}
val resultType = new ArrowType.Bool()
val funcNode =
TreeBuilder.makeFunction(function, Lists.newArrayList(left_node, right_node), resultType)
val cmpNode = TreeBuilder.makeFunction(
function, Lists.newArrayList(left_node, right_node), resultType)
val funcNode = TreeBuilder.makeIf(
leftIsnotnullNode,
TreeBuilder.makeIf(rightIsnotnullNode, cmpNode, falseNode, resultType),
TreeBuilder.makeIf(rightIsnotnullNode, falseNode, trueNode, resultType),
resultType)

(funcNode, resultType)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,12 @@ class ColumnarCaseWhen(
logInfo(s"children: ${branches.flatMap(b => b._1 :: b._2 :: Nil) ++ elseValue}")
logInfo(s"branches: $branches")
logInfo(s"else: $elseValue")
var i = 0
val size = branches.size
//TODO(): handle leveled branches

val i = 0
val exprs = branches.flatMap(b => b._1 :: b._2 :: Nil) ++ elseValue
val exprList = { exprs.filter(expr => !expr.isInstanceOf[Literal]) }
val inputAttributes = exprList.toList.map(expr => ConverterUtils.getResultAttrFromExpr(expr))

var colCondExpr = branches(i)._1
val colCondExpr = branches(i)._1
val (cond_node, condType): (TreeNode, ArrowType) =
colCondExpr.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)

Expand All @@ -81,21 +78,42 @@ class ColumnarCaseWhen(
val (ret_node, retType): (TreeNode, ArrowType) =
colRetExpr.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)

val elseValueExpr = elseValue.getOrElse(null)
val (else_node, elseType): (TreeNode, ArrowType) = if (elseValueExpr != null) {
var colElseValueExpr = ColumnarExpressionConverter.replaceWithColumnarExpression(elseValueExpr)
if (rename && colElseValueExpr.isInstanceOf[AttributeReference]) {
colElseValueExpr = new ColumnarBoundReference(inputAttributes.indexOf(colElseValueExpr),
colElseValueExpr.dataType, colElseValueExpr.nullable)
val funcNode = TreeBuilder.makeIf(cond_node, ret_node,
elseNode(args, i + 1, inputAttributes, retType), retType)
(funcNode, retType)
}

def elseNode(args: java.lang.Object, idx: Int,
inputAttributes: List[AttributeReference], retType: ArrowType): TreeNode = {
if (idx == branches.size) {
val elseValueExpr = elseValue.orNull
val (else_node, elseType): (TreeNode, ArrowType) = if (elseValueExpr != null) {
var colElseValueExpr = ColumnarExpressionConverter.replaceWithColumnarExpression(elseValueExpr)
if (rename && colElseValueExpr.isInstanceOf[AttributeReference]) {
colElseValueExpr = new ColumnarBoundReference(inputAttributes.indexOf(colElseValueExpr),
colElseValueExpr.dataType, colElseValueExpr.nullable)
}
colElseValueExpr.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
} else {
(TreeBuilder.makeNull(retType), retType)
}
colElseValueExpr.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
} else {
(TreeBuilder.makeNull(retType), retType)
return else_node
}
val colCondExpr = branches(idx)._1
val (cond_node, condType): (TreeNode, ArrowType) =
colCondExpr.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)

val funcNode = TreeBuilder.makeIf(cond_node, ret_node, else_node, retType)
(funcNode, retType)
var colRetExpr = branches(idx)._2
if (rename && colRetExpr.isInstanceOf[AttributeReference]) {
colRetExpr = new ColumnarBoundReference(inputAttributes.indexOf(colRetExpr),
colRetExpr.dataType, colRetExpr.nullable)
}
val (ret_node, ret_type): (TreeNode, ArrowType) =
colRetExpr.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)

val funcNode = TreeBuilder.makeIf(cond_node, ret_node,
elseNode(args, idx + 1, inputAttributes, retType), retType)
funcNode
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,42 +55,88 @@ class ColumnarIn(value: Expression, list: Seq[Expression], original: Expression)
value.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)

val resultType = new ArrowType.Bool()
var inNode: TreeNode = null
var has_null = false

if (value.dataType == StringType) {
val newlist :List[String]= list.toList.map (expr => {
expr.asInstanceOf[Literal].value.toString
});
val tlist = Lists.newArrayList(newlist:_*);

val funcNode = TreeBuilder.makeInExpressionString(value_node, Sets.newHashSet(tlist))
(funcNode, resultType)
var newlist: List[String] = List()
list.toList.foreach (expr => {
val item = expr.asInstanceOf[Literal].value
if (item != null) {
newlist = newlist :+ item.toString
} else {
has_null = true
}
})
val tlist = Lists.newArrayList(newlist:_*)
inNode = TreeBuilder.makeInExpressionString(value_node, Sets.newHashSet(tlist))
} else if (value.dataType == IntegerType) {
val newlist :List[Integer]= list.toList.map (expr => {
expr.asInstanceOf[Literal].value.asInstanceOf[Integer]
});
val tlist = Lists.newArrayList(newlist:_*);

val funcNode = TreeBuilder.makeInExpressionInt32(value_node, Sets.newHashSet(tlist))
(funcNode, resultType)
var newlist: List[Integer] = List()
list.toList.foreach (expr => {
val item = expr.asInstanceOf[Literal].value
if (item != null) {
newlist = newlist :+ item.asInstanceOf[Integer]
} else {
has_null = true
}
})
val tlist = Lists.newArrayList(newlist:_*)
inNode = TreeBuilder.makeInExpressionInt32(value_node, Sets.newHashSet(tlist))
} else if (value.dataType == LongType) {
val newlist :List[java.lang.Long]= list.toList.map (expr => {
expr.asInstanceOf[Literal].value.asInstanceOf[java.lang.Long]
});
val tlist = Lists.newArrayList(newlist:_*);

val funcNode = TreeBuilder.makeInExpressionBigInt(value_node, Sets.newHashSet(tlist))
(funcNode, resultType)
var newlist: List[java.lang.Long] = List()
list.toList.foreach (expr => {
val item = expr.asInstanceOf[Literal].value
if (item != null) {
newlist = newlist :+ item.asInstanceOf[java.lang.Long]
} else {
has_null = true
}
})
val tlist = Lists.newArrayList(newlist:_*)
inNode = TreeBuilder.makeInExpressionBigInt(value_node, Sets.newHashSet(tlist))
} else if (value.dataType == DateType) {
val newlist :List[Integer]= list.toList.map (expr => {
expr.asInstanceOf[Literal].value.asInstanceOf[Integer]
});
var newlist: List[Integer] = List()
list.toList.foreach (expr => {
val item = expr.asInstanceOf[Literal].value
if (item != null) {
newlist = newlist :+ item.asInstanceOf[Integer]
} else {
has_null = true
}
})
val tlist = Lists.newArrayList(newlist:_*);
val cast_func = TreeBuilder.makeFunction("castINT", Lists.newArrayList(value_node), new ArrowType.Int(32, true))
val cast_func = TreeBuilder.makeFunction("castINT",
Lists.newArrayList(value_node),
new ArrowType.Int(32, true))
inNode = TreeBuilder.makeInExpressionInt32(cast_func, Sets.newHashSet(tlist))
} else {
throw new UnsupportedOperationException(
s"not currently supported: ${value.dataType}.")
}

val funcNode = TreeBuilder.makeInExpressionInt32(cast_func, Sets.newHashSet(tlist))
/** Null should be specially handled:
TRUE is returned when the non-NULL value in question is found in the list
FALSE is returned when the non-NULL value is not found in the list and the list does not contain NULL values
NULL is returned when the value is NULL, or the non-NULL value is not found in the list and the list contains at least one NULL value
*/
val isnotnullNode = TreeBuilder.makeFunction(
"isnotnull", Lists.newArrayList(value_node), resultType)
val trueNode =
TreeBuilder.makeLiteral(true.asInstanceOf[java.lang.Boolean])
val falseNode =
TreeBuilder.makeLiteral(false.asInstanceOf[java.lang.Boolean])
val nullNode = TreeBuilder.makeNull(resultType)

if (!has_null) {
val funcNode = TreeBuilder.makeIf(
isnotnullNode, inNode, nullNode, resultType)
(funcNode, resultType)
} else {
throw new UnsupportedOperationException(s"not currently supported: ${value.dataType}.")
val isnotnullBranch =
TreeBuilder.makeIf(inNode, trueNode, nullNode, resultType)
val funcNode = TreeBuilder.makeIf(
isnotnullNode, isnotnullBranch, nullNode, resultType)
(funcNode, resultType)
}
}
}
Expand Down
Loading

0 comments on commit e80ad10

Please sign in to comment.