Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

[NSE-173] fix incorrect result of q69 #174

Merged
merged 1 commit into from
Mar 22, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 26 additions & 51 deletions native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1028,7 +1028,7 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
std::string ToString() override { return "SortArraysToIndicesResultIterator"; }

bool HasNext() override {
if (total_offset_ >= total_length_) {
if (offset_ >= total_length_) {
return false;
}
return true;
Expand All @@ -1054,21 +1054,25 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
for (auto& data : out_data_.child_data) {
data = std::make_shared<arrow::ArrayData>();
}
// decide null_count
if (null_first) {
if ((offset + length) > null_total_) {
out_data_.null_count =
(null_total_ - offset > 0) ? (null_total_ - offset) : 0;
} else {
out_data_.null_count = length;
}
// decide null_count of this sliced array
if (null_total == 0) {
out_data_.null_count = 0;
} else {
auto valid_total = total_length - null_total_;
if ((offset + length) < valid_total) {
out_data_.null_count = 0;
if (null_first) {
if ((offset + length) > null_total_) {
out_data_.null_count =
(null_total_ - offset > 0) ? (null_total_ - offset) : 0;
} else {
out_data_.null_count = length;
}
} else {
out_data_.null_count =
(offset - valid_total) > 0 ? length : (offset + length - valid_total);
auto valid_total = total_length - null_total_;
if ((offset + length) < valid_total) {
out_data_.null_count = 0;
} else {
out_data_.null_count =
(offset - valid_total) > 0 ? length : (offset + length - valid_total);
}
}
}
}
Expand Down Expand Up @@ -1115,47 +1119,20 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
return arrow::Status::OK();
}

arrow::Status SliceBitmapImpl(const Bitmap& bitmap,
std::shared_ptr<arrow::Buffer>* out) {
auto length = bitmap.range.length;
auto offset = bitmap.range.offset;
ARROW_ASSIGN_OR_RAISE(*out, AllocateBitmap(length, pool_));
uint8_t* dst = (*out)->mutable_data();

int64_t bitmap_offset = 0;
if (bitmap.AllSet()) {
arrow::BitUtil::SetBitsTo(dst, offset, length, true);
} else {
arrow::internal::CopyBitmap(bitmap.data, offset, length, dst, bitmap_offset);
}

// finally (if applicable) zero out any trailing bits
if (auto preceding_bits = arrow::BitUtil::kPrecedingBitmask[length_ % 8]) {
dst[length_ / 8] &= preceding_bits;
}
return arrow::Status::OK();
}

arrow::Status SliceBitmap(const std::shared_ptr<arrow::Buffer>& buffer,
std::shared_ptr<arrow::Buffer>* out) {
Range range(size * offset_, size * length_);
Range range(offset_, length_);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seems like a big change, does this mean only q69 use sortinplace?

Bitmap bitmap = Bitmap(buffer, range);

auto length = bitmap.range.length;
auto offset = bitmap.range.offset;
ARROW_ASSIGN_OR_RAISE(*out, AllocateBitmap(length, pool_));
uint8_t* dst = (*out)->mutable_data();

int64_t bitmap_offset = 0;
if (bitmap.AllSet()) {
arrow::BitUtil::SetBitsTo(dst, offset, length, true);
} else {
arrow::internal::CopyBitmap(bitmap.data, offset, length, dst, bitmap_offset);
}

// finally (if applicable) zero out any trailing bits
if (auto preceding_bits = arrow::BitUtil::kPrecedingBitmask[length_ % 8]) {
dst[length_ / 8] &= preceding_bits;
arrow::internal::CopyBitmap(bitmap.data, offset, length, dst, 0);
}
return arrow::Status::OK();
}
Expand All @@ -1170,17 +1147,16 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
};

arrow::Status Next(std::shared_ptr<arrow::RecordBatch>* out) {
auto length = (total_length_ - total_offset_) > batch_size_
? batch_size_
: (total_length_ - total_offset_);
auto length = (total_length_ - offset_) > batch_size_ ? batch_size_
: (total_length_ - offset_);
arrow::ArrayData result_data = *result_arr_->data();
arrow::ArrayData out_data;
SliceImpl<CTYPE>(result_data, ctx_->memory_pool(), length, total_offset_,
nulls_total_, nulls_first_, total_length_)
SliceImpl<CTYPE>(result_data, ctx_->memory_pool(), length, offset_, nulls_total_,
nulls_first_, total_length_)
.Slice(&out_data);
std::shared_ptr<arrow::Array> out_0 =
MakeArray(std::make_shared<arrow::ArrayData>(std::move(out_data)));
total_offset_ += length;
offset_ += length;
*out = arrow::RecordBatch::Make(result_schema_, length, {out_0});
return arrow::Status::OK();
}
Expand All @@ -1189,8 +1165,7 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
using ArrayType_0 = typename arrow::TypeTraits<DATATYPE>::ArrayType;
using BuilderType_0 = typename arrow::TypeTraits<DATATYPE>::BuilderType;
std::shared_ptr<arrow::Array> result_arr_;
uint64_t total_offset_ = 0;
uint64_t valid_offset_ = 0;
uint64_t offset_ = 0;
const uint64_t total_length_;
const uint64_t nulls_total_;
std::shared_ptr<arrow::Schema> result_schema_;
Expand Down