From 1494bf8aeeb98a1612effa24f84efa8dcacc9e6c Mon Sep 17 00:00:00 2001 From: Rui Mo Date: Fri, 19 Mar 2021 03:49:53 +0000 Subject: [PATCH] fix copy bitmap in InplaceSort --- .../codegen/arrow_compute/ext/sort_kernel.cc | 77 +++++++------------ 1 file changed, 26 insertions(+), 51 deletions(-) diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc index d3b7a8914..38c8aac12 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc @@ -1028,7 +1028,7 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl { std::string ToString() override { return "SortArraysToIndicesResultIterator"; } bool HasNext() override { - if (total_offset_ >= total_length_) { + if (offset_ >= total_length_) { return false; } return true; @@ -1054,21 +1054,25 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl { for (auto& data : out_data_.child_data) { data = std::make_shared(); } - // decide null_count - if (null_first) { - if ((offset + length) > null_total_) { - out_data_.null_count = - (null_total_ - offset > 0) ? (null_total_ - offset) : 0; - } else { - out_data_.null_count = length; - } + // decide null_count of this sliced array + if (null_total == 0) { + out_data_.null_count = 0; } else { - auto valid_total = total_length - null_total_; - if ((offset + length) < valid_total) { - out_data_.null_count = 0; + if (null_first) { + if ((offset + length) > null_total_) { + out_data_.null_count = + (null_total_ - offset > 0) ? (null_total_ - offset) : 0; + } else { + out_data_.null_count = length; + } } else { - out_data_.null_count = - (offset - valid_total) > 0 ? length : (offset + length - valid_total); + auto valid_total = total_length - null_total_; + if ((offset + length) < valid_total) { + out_data_.null_count = 0; + } else { + out_data_.null_count = + (offset - valid_total) > 0 ? length : (offset + length - valid_total); + } } } } @@ -1115,30 +1119,9 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl { return arrow::Status::OK(); } - arrow::Status SliceBitmapImpl(const Bitmap& bitmap, - std::shared_ptr* out) { - auto length = bitmap.range.length; - auto offset = bitmap.range.offset; - ARROW_ASSIGN_OR_RAISE(*out, AllocateBitmap(length, pool_)); - uint8_t* dst = (*out)->mutable_data(); - - int64_t bitmap_offset = 0; - if (bitmap.AllSet()) { - arrow::BitUtil::SetBitsTo(dst, offset, length, true); - } else { - arrow::internal::CopyBitmap(bitmap.data, offset, length, dst, bitmap_offset); - } - - // finally (if applicable) zero out any trailing bits - if (auto preceding_bits = arrow::BitUtil::kPrecedingBitmask[length_ % 8]) { - dst[length_ / 8] &= preceding_bits; - } - return arrow::Status::OK(); - } - arrow::Status SliceBitmap(const std::shared_ptr& buffer, std::shared_ptr* out) { - Range range(size * offset_, size * length_); + Range range(offset_, length_); Bitmap bitmap = Bitmap(buffer, range); auto length = bitmap.range.length; @@ -1146,16 +1129,10 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl { ARROW_ASSIGN_OR_RAISE(*out, AllocateBitmap(length, pool_)); uint8_t* dst = (*out)->mutable_data(); - int64_t bitmap_offset = 0; if (bitmap.AllSet()) { arrow::BitUtil::SetBitsTo(dst, offset, length, true); } else { - arrow::internal::CopyBitmap(bitmap.data, offset, length, dst, bitmap_offset); - } - - // finally (if applicable) zero out any trailing bits - if (auto preceding_bits = arrow::BitUtil::kPrecedingBitmask[length_ % 8]) { - dst[length_ / 8] &= preceding_bits; + arrow::internal::CopyBitmap(bitmap.data, offset, length, dst, 0); } return arrow::Status::OK(); } @@ -1170,17 +1147,16 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl { }; arrow::Status Next(std::shared_ptr* out) { - auto length = (total_length_ - total_offset_) > batch_size_ - ? batch_size_ - : (total_length_ - total_offset_); + auto length = (total_length_ - offset_) > batch_size_ ? batch_size_ + : (total_length_ - offset_); arrow::ArrayData result_data = *result_arr_->data(); arrow::ArrayData out_data; - SliceImpl(result_data, ctx_->memory_pool(), length, total_offset_, - nulls_total_, nulls_first_, total_length_) + SliceImpl(result_data, ctx_->memory_pool(), length, offset_, nulls_total_, + nulls_first_, total_length_) .Slice(&out_data); std::shared_ptr out_0 = MakeArray(std::make_shared(std::move(out_data))); - total_offset_ += length; + offset_ += length; *out = arrow::RecordBatch::Make(result_schema_, length, {out_0}); return arrow::Status::OK(); } @@ -1189,8 +1165,7 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl { using ArrayType_0 = typename arrow::TypeTraits::ArrayType; using BuilderType_0 = typename arrow::TypeTraits::BuilderType; std::shared_ptr result_arr_; - uint64_t total_offset_ = 0; - uint64_t valid_offset_ = 0; + uint64_t offset_ = 0; const uint64_t total_length_; const uint64_t nulls_total_; std::shared_ptr result_schema_;