Skip to content

Commit

Permalink
ARROW-2004: [C++] Add shrink_to_fit parameter to BufferBuilder::Resiz…
Browse files Browse the repository at this point in the history
…e, add Reserve method

I also relaxed the requirement to pass `const uint8_t*` so that one can pass `const void*` when writing to a `BufferBuilder`. This will not affect any downstream users

Author: Wes McKinney <[email protected]>

Closes #1486 from wesm/ARROW-2004 and squashes the following commits:

2d6660a [Wes McKinney] Add shrink_to_fit parameter to BufferBuilder::Resize, add Reserve method, relax pointer type in Append
  • Loading branch information
wesm committed Jan 18, 2018
1 parent 1ffce26 commit 58a24c5
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 11 deletions.
25 changes: 25 additions & 0 deletions cpp/src/arrow/buffer-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -194,4 +194,29 @@ TEST(TestBuffer, SliceMutableBuffer) {
ASSERT_TRUE(slice->Equals(expected));
}

TEST(TestBufferBuilder, ResizeReserve) {
const std::string data = "some data";
auto data_ptr = data.c_str();

BufferBuilder builder;

ASSERT_OK(builder.Append(data_ptr, 9));
ASSERT_EQ(9, builder.length());

ASSERT_OK(builder.Resize(128));
ASSERT_EQ(128, builder.capacity());

// Do not shrink to fit
ASSERT_OK(builder.Resize(64, false));
ASSERT_EQ(128, builder.capacity());

// Shrink to fit
ASSERT_OK(builder.Resize(64));
ASSERT_EQ(64, builder.capacity());

// Reserve elements
ASSERT_OK(builder.Reserve(60));
ASSERT_EQ(128, builder.capacity());
}

} // namespace arrow
41 changes: 30 additions & 11 deletions cpp/src/arrow/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,20 @@
#include <string>
#include <type_traits>

#include "arrow/memory_pool.h"
#include "arrow/status.h"
#include "arrow/util/bit-util.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"

namespace arrow {

class MemoryPool;

// ----------------------------------------------------------------------
// Buffer classes

/// Immutable API for a chunk of bytes which may or may not be owned by the
/// class instance.
/// \class Buffer
/// \brief Object containing a pointer to a piece of contiguous memory with a
/// particular size. Base class does not own its memory
///
/// Buffers have two related notions of length: size and capacity. Size is
/// the number of bytes that might have valid data. Capacity is the number
Expand Down Expand Up @@ -133,7 +133,8 @@ ARROW_EXPORT
std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
const int64_t offset, const int64_t length);

/// A Buffer whose contents can be mutated. May or may not own its data.
/// \class MutableBuffer
/// \brief A Buffer whose contents can be mutated. May or may not own its data.
class ARROW_EXPORT MutableBuffer : public Buffer {
public:
MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) {
Expand All @@ -148,6 +149,8 @@ class ARROW_EXPORT MutableBuffer : public Buffer {
MutableBuffer() : Buffer(NULLPTR, 0) {}
};

/// \class ResizableBuffer
/// \brief A mutable buffer that can be resized
class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
public:
/// Change buffer reported size to indicated size, allocating memory if
Expand Down Expand Up @@ -190,13 +193,22 @@ class ARROW_EXPORT PoolBuffer : public ResizableBuffer {
MemoryPool* pool_;
};

/// \class BufferBuilder
/// \brief A class for incrementally building a contiguous chunk of in-memory data
class ARROW_EXPORT BufferBuilder {
public:
explicit BufferBuilder(MemoryPool* pool)
explicit BufferBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
: pool_(pool), data_(NULLPTR), capacity_(0), size_(0) {}

/// Resizes the buffer to the nearest multiple of 64 bytes per Layout.md
Status Resize(const int64_t elements) {
/// \brief Resizes the buffer to the nearest multiple of 64 bytes
///
/// \param elements the new capacity of the of the builder. Will be rounded
/// up to a multiple of 64 bytes for padding
/// \param shrink_to_fit if new capacity smaller than existing size,
/// reallocate internal buffer. Set to false to avoid reallocations when
/// shrinking the builder
/// \return Status
Status Resize(const int64_t elements, bool shrink_to_fit = true) {
// Resize(0) is a no-op
if (elements == 0) {
return Status::OK();
Expand All @@ -205,7 +217,7 @@ class ARROW_EXPORT BufferBuilder {
buffer_ = std::make_shared<PoolBuffer>(pool_);
}
int64_t old_capacity = capacity_;
RETURN_NOT_OK(buffer_->Resize(elements));
RETURN_NOT_OK(buffer_->Resize(elements, shrink_to_fit));
capacity_ = buffer_->capacity();
data_ = buffer_->mutable_data();
if (capacity_ > old_capacity) {
Expand All @@ -214,7 +226,14 @@ class ARROW_EXPORT BufferBuilder {
return Status::OK();
}

Status Append(const uint8_t* data, int64_t length) {
/// \brief Ensure that builder can accommodate the additional number of bytes
/// without the need to perform allocations
///
/// \param size number of additional bytes to make space for
/// \return Status
Status Reserve(const int64_t size) { return Resize(size_ + size, false); }

Status Append(const void* data, int64_t length) {
if (capacity_ < length + size_) {
int64_t new_capacity = BitUtil::NextPower2(length + size_);
RETURN_NOT_OK(Resize(new_capacity));
Expand Down Expand Up @@ -248,7 +267,7 @@ class ARROW_EXPORT BufferBuilder {
}

// Unsafe methods don't check existing size
void UnsafeAppend(const uint8_t* data, int64_t length) {
void UnsafeAppend(const void* data, int64_t length) {
memcpy(data_ + size_, data, static_cast<size_t>(length));
size_ += length;
}
Expand Down

0 comments on commit 58a24c5

Please sign in to comment.