Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
[NSE-29]adding non-codegen framework for multiple-key sort (#44)
Browse files Browse the repository at this point in the history
* support mul-key sort without projection

* optimize the std function

* remove IsNull check if null count is zero

* use optimized std function

* add nan support in non-codegen sort
  • Loading branch information
rui-mo authored Feb 4, 2021
1 parent 3d0c72c commit faeebb2
Show file tree
Hide file tree
Showing 8 changed files with 2,099 additions and 72 deletions.
2 changes: 2 additions & 0 deletions core/src/main/scala/com/intel/oap/ColumnarPluginConfig.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ case class ColumnarNumaBindingInfo(
class ColumnarPluginConfig(conf: SQLConf) {
val enableColumnarSort: Boolean =
conf.getConfString("spark.sql.columnar.sort", "false").toBoolean
val enableColumnarCodegenSort: Boolean =
conf.getConfString("spark.sql.columnar.codegen.sort", "true").toBoolean
val enableColumnarNaNCheck: Boolean =
conf.getConfString("spark.sql.columnar.nanCheck", "false").toBoolean
val enableColumnarBroadcastJoin: Boolean =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ object ColumnarSorter extends Logging {
result_type: Int = 0): TreeNode = {
logInfo(s"ColumnarSorter sortOrder is ${sortOrder}, outputAttributes is ${outputAttributes}")
val NaNCheck = ColumnarPluginConfig.getConf.enableColumnarNaNCheck
val codegen = ColumnarPluginConfig.getConf.enableColumnarCodegenSort
/////////////// Prepare ColumnarSorter //////////////
val outputFieldList: List[Field] = outputAttributes.toList.map(expr => {
val attr = ConverterUtils.getAttrFromExpr(expr)
Expand Down Expand Up @@ -322,6 +323,11 @@ object ColumnarSorter extends Logging {
TreeBuilder.makeLiteral(NaNCheck.asInstanceOf[java.lang.Boolean])),
new ArrowType.Int(32, true) /*dummy ret type, won't be used*/ )

val codegen_node = TreeBuilder.makeFunction(
"codegen",
Lists.newArrayList(TreeBuilder.makeLiteral(codegen.asInstanceOf[java.lang.Boolean])),
new ArrowType.Int(32, true) /*dummy ret type, won't be used*/ )

val result_type_node = TreeBuilder.makeFunction(
"result_type",
Lists.newArrayList(TreeBuilder.makeLiteral(result_type.asInstanceOf[Integer])),
Expand All @@ -337,6 +343,7 @@ object ColumnarSorter extends Logging {
dir_node,
nulls_order_node,
NaN_check_node,
codegen_node,
result_type_node),
new ArrowType.Int(32, true) /*dummy ret type, won't be used*/ )

Expand Down
22 changes: 14 additions & 8 deletions cpp/src/codegen/arrow_compute/expr_visitor_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -630,14 +630,19 @@ class SortArraysToIndicesVisitorImpl : public ExprVisitorImpl {
nulls_order_.push_back(order_val);
}
// fifth child specifies whether to check NaN when sorting
auto function_node = std::dynamic_pointer_cast<gandiva::FunctionNode>(children[4]);
auto NaN_check_node =
std::dynamic_pointer_cast<gandiva::LiteralNode>(function_node->children()[0]);
NaN_check_ = arrow::util::get<bool>(NaN_check_node->holder());

if (children.size() == 6) {
auto nan_func_node = std::dynamic_pointer_cast<gandiva::FunctionNode>(children[4]);
auto NaN_lit_node =
std::dynamic_pointer_cast<gandiva::LiteralNode>(nan_func_node->children()[0]);
NaN_check_ = arrow::util::get<bool>(NaN_lit_node->holder());
// sixth child specifies whether to do codegen for mutiple-key sort
auto codegen_func_node =
std::dynamic_pointer_cast<gandiva::FunctionNode>(children[5]);
auto codegen_lit_node =
std::dynamic_pointer_cast<gandiva::LiteralNode>(codegen_func_node->children()[0]);
do_codegen_ = arrow::util::get<bool>(codegen_lit_node->holder());
if (children.size() == 7) {
auto type_node = std::dynamic_pointer_cast<gandiva::LiteralNode>(
std::dynamic_pointer_cast<gandiva::FunctionNode>(children[5])->children()[0]);
std::dynamic_pointer_cast<gandiva::FunctionNode>(children[6])->children()[0]);
result_type_ = arrow::util::get<int>(type_node->holder());
}
result_schema_ = arrow::schema(ret_fields);
Expand All @@ -659,7 +664,7 @@ class SortArraysToIndicesVisitorImpl : public ExprVisitorImpl {
}
RETURN_NOT_OK(extra::SortArraysToIndicesKernel::Make(
&p_->ctx_, result_schema_, sort_key_node_, key_field_list_, sort_directions_,
nulls_order_, NaN_check_, result_type_, &kernel_));
nulls_order_, NaN_check_, do_codegen_, result_type_, &kernel_));
p_->signature_ = kernel_->GetSignature();
initialized_ = true;
finish_return_type_ = ArrowComputeResultType::BatchIterator;
Expand Down Expand Up @@ -711,6 +716,7 @@ class SortArraysToIndicesVisitorImpl : public ExprVisitorImpl {
std::vector<bool> sort_directions_;
std::vector<bool> nulls_order_;
bool NaN_check_;
bool do_codegen_;
int result_type_ = 0;
std::shared_ptr<arrow::Schema> result_schema_;
};
Expand Down
Loading

0 comments on commit faeebb2

Please sign in to comment.