Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
[NSE-1071] Add tiny optimizations for hash aggregation functions (#1072)
Browse files Browse the repository at this point in the history
* Optimize code logic by considering input null count

* Pass input list in const reference

* Remove pieces of unnecessary code
  • Loading branch information
PHILO-HE authored Aug 21, 2022
1 parent 25fa211 commit 49d5985
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 40 deletions.
78 changes: 40 additions & 38 deletions native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ struct FindAccumulatorType<I, arrow::enable_if_floating_point<I>> {
using Type = arrow::DoubleType;
};

arrow::Status ActionBase::Submit(ArrayList in, int max_group_id,
arrow::Status ActionBase::Submit(const ArrayList& in, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) {
return arrow::Status::NotImplemented("ActionBase Submit is abstract.");
}

arrow::Status ActionBase::Submit(
std::vector<std::shared_ptr<arrow::Array>> in,
const std::vector<std::shared_ptr<arrow::Array>>& in,
std::function<arrow::Status(uint64_t, uint64_t)>* on_valid,
std::function<arrow::Status()>* on_null) {
return arrow::Status::NotImplemented("ActionBase Submit is abstract.");
Expand Down Expand Up @@ -143,7 +143,7 @@ class UniqueAction : public ActionBase {
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -302,7 +302,7 @@ class CountAction : public ActionBase {
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -465,7 +465,7 @@ class CountDistinctAction : public ActionBase {
#endif
}
std::string getName() { return "CountDistinctAction"; }
arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -616,7 +616,7 @@ class CountLiteralAction : public ActionBase {
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -740,7 +740,7 @@ class MinAction<DataType, CType, precompile::enable_if_number<DataType>>
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -1022,7 +1022,7 @@ class MinAction<DataType, CType, precompile::enable_if_decimal<DataType>>
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -1179,7 +1179,7 @@ class MinAction<DataType, CType, precompile::enable_if_string_like<DataType>>
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -1350,7 +1350,7 @@ class MaxAction<DataType, CType, precompile::enable_if_number<DataType>>
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -1627,7 +1627,7 @@ class MaxAction<DataType, CType, precompile::enable_if_decimal<DataType>>
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -1783,7 +1783,7 @@ class MaxAction<DataType, CType, precompile::enable_if_string_like<DataType>>
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -1956,7 +1956,7 @@ class SumAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -2117,7 +2117,7 @@ class SumAction<DataType, CType, ResDataType, ResCType,

int RequiredColNum() { return 1; }

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -2284,7 +2284,7 @@ class SumActionPartial<DataType, CType, ResDataType, ResCType,

int RequiredColNum() { return 1; }

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand All @@ -2302,7 +2302,7 @@ class SumActionPartial<DataType, CType, ResDataType, ResCType,
// prepare evaluate lambda
if (in_null_count_) {
*on_valid = [this](int dest_group_id) {
const bool is_null = in_null_count_ > 0 && in_->IsNull(row_id);
const bool is_null = in_->IsNull(row_id);
if (!is_null) {
cache_validity_[dest_group_id] = true;
cache_[dest_group_id] += data_[row_id];
Expand Down Expand Up @@ -2450,7 +2450,7 @@ class SumActionPartial<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand All @@ -2467,7 +2467,7 @@ class SumActionPartial<DataType, CType, ResDataType, ResCType,
// prepare evaluate lambda
if (in_null_count_) {
*on_valid = [this](int dest_group_id) {
const bool is_null = in_null_count_ > 0 && in_->IsNull(row_id);
const bool is_null = in_->IsNull(row_id);
if (!is_null) {
cache_validity_[dest_group_id] = true;
cache_[dest_group_id] += in_->GetView(row_id);
Expand Down Expand Up @@ -2621,7 +2621,7 @@ class AvgAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -2792,7 +2792,7 @@ class AvgAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -2979,7 +2979,7 @@ class SumCountAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand All @@ -3005,7 +3005,7 @@ class SumCountAction<DataType, CType, ResDataType, ResCType,
};
} else {
*on_valid = [this](int dest_group_id) {
const bool is_null = in_null_count_ > 0 && in_->IsNull(row_id);
const bool is_null = in_->IsNull(row_id);
if (!is_null) {
cache_sum_[dest_group_id] += data_[row_id];
cache_count_[dest_group_id] += 1;
Expand Down Expand Up @@ -3168,7 +3168,7 @@ class SumCountAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand All @@ -3185,7 +3185,7 @@ class SumCountAction<DataType, CType, ResDataType, ResCType,
row_id = 0;
if (in_null_count_) {
*on_valid = [this](int dest_group_id) {
const bool is_null = in_null_count_ > 0 && in_->IsNull(row_id);
const bool is_null = in_->IsNull(row_id);
if (!is_null) {
cache_sum_[dest_group_id] += in_->GetView(row_id);
cache_count_[dest_group_id] += 1;
Expand Down Expand Up @@ -3353,7 +3353,7 @@ class SumCountMergeAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -3534,7 +3534,7 @@ class SumCountMergeAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -3704,7 +3704,7 @@ class AvgByCountAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -3884,7 +3884,7 @@ class AvgByCountAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -4079,7 +4079,7 @@ class StddevSampPartialAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -4313,7 +4313,7 @@ class StddevSampPartialAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -4540,7 +4540,7 @@ class StddevSampFinalAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -4747,7 +4747,7 @@ class StddevSampFinalAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -4986,7 +4986,7 @@ class FirstPartialAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -5059,8 +5059,9 @@ class FirstPartialAction<DataType, CType, ResDataType, ResCType,
return arrow::Status::OK();
}
auto input_array = std::make_shared<ArrayType>(in[0]);
int in_null_count = input_array->null_count();
for (int id = 0; id < input_array->length(); id++) {
if (input_array->IsNull(id)) {
if (in_null_count > 0 && input_array->IsNull(id)) {
if (ignore_nulls_) {
continue;
} else {
Expand Down Expand Up @@ -5224,7 +5225,7 @@ class FirstPartialAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -5296,8 +5297,9 @@ class FirstPartialAction<DataType, CType, ResDataType, ResCType,
return arrow::Status::OK();
}
auto input_array = std::make_shared<ArrayType>(in[0]);
int in_null_count = input_array->null_count();
for (int id = 0; id < input_array->length(); id++) {
if (input_array->IsNull(id)) {
if (in_null_count > 0 && input_array->IsNull(id)) {
if (ignore_nulls_) {
continue;
} else {
Expand Down Expand Up @@ -5459,7 +5461,7 @@ class FirstFinalAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -5719,7 +5721,7 @@ class FirstFinalAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ class ActionBase {
public:
virtual ~ActionBase() {}

virtual arrow::Status Submit(ArrayList in, int max_group_id,
virtual arrow::Status Submit(const ArrayList& in, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null);
virtual arrow::Status Submit(std::vector<std::shared_ptr<arrow::Array>> in,
virtual arrow::Status Submit(const std::vector<std::shared_ptr<arrow::Array>>& in,
std::function<arrow::Status(uint64_t, uint64_t)>* on_valid,
std::function<arrow::Status()>* on_null);
virtual arrow::Status Submit(const std::shared_ptr<arrow::Array>& in,
Expand Down

0 comments on commit 49d5985

Please sign in to comment.