From 18f90fb80024a034205b06819b12dc8bacd26766 Mon Sep 17 00:00:00 2001 From: Panchen Xue Date: Thu, 18 Jan 2018 17:56:47 -0500 Subject: [PATCH] ARROW-1945: [C++] Add data_capacity_ to track capacity of value data data_capacity_ represents the indicated capacity for value_data_builder and it is always smaller than or equal to the actual capacity of underlying value_data_builder (data_capacity_ <= value_data_builder.capacity()). That's because when we say: ReserveData(capacity); The new capacity is max(data_capacity_, data length + capacity), and data_capacity_ is set to be equal to new capacity but underlying buffer size is set to BitUtil::RoundUpToMultipleOf64(new capacity) to ensure that the capacity of the buffer is a multiple of 64 bytes as defined in Layout.md. That's why data_capacity_ is needed to show the indicated capacity of the BinaryBuilder, just like ArrayBuilder::capacity_ indicates the indicated capacity of ArrayBuilder. A safety check is added in BinaryBuilder::Append() to update data_capacity_ if data length is greater than data_capacity_. The reason is that data_capacity is updated in ResearveData(). But if users make mistakes to append too much data, data length might be larger than data_capacity_ (data length <= actual capacity of underlying value_data_builder). If this happens data_capacity_ is set equal to data length to avoid confusion. --- cpp/src/arrow/builder.cc | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index a6be2e707a223..63588867dfce0 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -1208,7 +1208,7 @@ ArrayBuilder* ListBuilder::value_builder() const { // String and binary BinaryBuilder::BinaryBuilder(const std::shared_ptr& type, MemoryPool* pool) - : ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool) {} + : ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool), data_capacity_(0) {} BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BinaryBuilder(binary(), pool) {} @@ -1227,11 +1227,15 @@ Status BinaryBuilder::Resize(int64_t capacity) { } Status BinaryBuilder::ReserveData(int64_t capacity) { - if(value_data_length() + capacity > std::numeric_limits::max()) { + if (value_data_length() + capacity > data_capacity_) { + if (value_data_length() + capacity > std::numeric_limits::max()) { return Status::Invalid("Cannot reserve capacity larger than 2^31 - 1 in length for binary data"); - } - - return value_data_builder_.Resize(value_data_length() + capacity); + } + + RETURN_NOT_OK(value_data_builder_.Resize(value_data_length() + capacity)); + data_capacity_ = value_data_length() + capacity; + } + return Status::OK(); } Status BinaryBuilder::AppendNextOffset() { @@ -1249,6 +1253,9 @@ Status BinaryBuilder::Append(const uint8_t* value, int32_t length) { RETURN_NOT_OK(Reserve(1)); RETURN_NOT_OK(AppendNextOffset()); RETURN_NOT_OK(value_data_builder_.Append(value, length)); + if (data_capacity_ < value_data_length()) { + data_capacity_ = value_data_length(); + } UnsafeAppendToBitmap(true); return Status::OK(); }