Skip to content

Commit

Permalink
Add a container for temporary memory (#7500)
Browse files Browse the repository at this point in the history
Summary:
Adds Scratch, a container for reusable raw, SIMD padded memory. Use as working memory in operators instead of temporary std::vectors. This saves mallocs and frees.

Pull Request resolved: #7500

Reviewed By: Yuhta

Differential Revision: D51167868

Pulled By: oerling

fbshipit-source-id: a86a344771967077fa5f07a43403e6ec9697c860
  • Loading branch information
Orri Erling authored and facebook-github-bot committed Nov 27, 2023
1 parent 70320dd commit faa9a3f
Show file tree
Hide file tree
Showing 3 changed files with 234 additions and 0 deletions.
159 changes: 159 additions & 0 deletions velox/common/base/Scratch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "velox/common/base/RawVector.h"

/// A utility for reusable scoped temporary scratch areas.
namespace facebook::velox {

/// A collection of temporary reusable scratch vectors. The vectors are accessed
/// via the ScratchPtr scoped lease. The vectors are padded so that their last
/// element can be written at full SIMD width, as with raw_vector.
class Scratch {
public:
using Item = raw_vector<char>;

Scratch() = default;
Scratch(const Scratch& other) = delete;

~Scratch() {
reserve(0);
::free(items_);
items_ = nullptr;
capacity_ = 0;
fill_ = 0;
}
void operator=(const Scratch& other) = delete;

/// Returns the next reusable scratch vector or makes a new one.
Item get() {
if (fill_ == 0) {
return Item();
}
auto temp = std::move(items_[fill_ - 1]);
--fill_;
retainedSize_ -= temp.capacity();
return temp;
}

void release(Item&& item) {
retainedSize_ += item.capacity();
if (fill_ == capacity_) {
reserve(std::max(16, 2 * capacity_));
}
items_[fill_++] = std::move(item);
}

void trim() {
reserve(0);
retainedSize_ = 0;
}

size_t retainedSize() {
return retainedSize_;
}

private:
void reserve(int32_t newCapacity) {
VELOX_CHECK_LE(fill_, capacity_);
// Delete the items above the new capacity.
for (auto i = newCapacity; i < fill_; ++i) {
std::destroy_at(&items_[i]);
}
if (newCapacity > capacity_) {
Item* newItems =
reinterpret_cast<Item*>(::malloc(sizeof(Item) * newCapacity));
if (fill_ > 0) {
memcpy(newItems, items_, fill_ * sizeof(Item));
}
memset(newItems + fill_, 0, (newCapacity - fill_) * sizeof(Item));
free(items_);
items_ = newItems;
capacity_ = newCapacity;
}
fill_ = std::min(fill_, newCapacity);
}

Item* items_{nullptr};
int32_t fill_{0};
int32_t capacity_{0};
// The total size held. If too large from outlier use cases, 'this' should be
// trimmed.
int64_t retainedSize_{0};
};

/// A scoped lease for a scratch area of T. For scratch areas <=
/// 'inlineSize' the scratch area is inlined, typically on stack, and
/// no allocation will ever take place. The inline storage is padded
/// with a trailer of simd::kPadding bytes to allow writing at full
/// SIMD width at the end of the area.
template <typename T, int32_t inlineSize = 0>
class ScratchPtr {
public:
ScratchPtr(Scratch& scratch) : scratch_(&scratch) {}

ScratchPtr(const ScratchPtr& other) = delete;
ScratchPtr(ScratchPtr&& other) = delete;

inline ~ScratchPtr() {
if (data_.data()) {
scratch_->release(std::move(data_));
}
}

void operator=(ScratchPtr&& other) = delete;
void operator=(const ScratchPtr& other) = delete;

/// Returns a writable pointer to at least 'size' uninitialized
/// elements of T. The last element is followed by simd::kPadding
/// bytes to allow a full width SIMD store for any element. This may
/// be called once per lifetime.
T* get(int32_t size) {
VELOX_CHECK_NULL(ptr_);
size_ = size;
if (size <= inlineSize) {
ptr_ = inline_;
return ptr_;
}
data_ = scratch_->get();
data_.resize(size * sizeof(T));
ptr_ = reinterpret_cast<T*>(data_.data());
return ptr_;
}

/// Returns the pointer returned by a previous get(int32_t).
T* get() const {
VELOX_DCHECK_NOT_NULL(ptr_);
return ptr_;
}

/// Returns the size of the previous get(int32_t).
int32_t size() const {
return size_;
}

private:
Scratch* scratch_{nullptr};
raw_vector<char> data_;
T* ptr_{nullptr};
int32_t size_{0};
T inline_[inlineSize];
char padding_[inlineSize == 0 ? 0 : simd::kPadding];
};

} // namespace facebook::velox
1 change: 1 addition & 0 deletions velox/common/base/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ add_executable(
FsTest.cpp
RangeTest.cpp
RawVectorTest.cpp
ScratchTest.cpp
RuntimeMetricsTest.cpp
ScopedLockTest.cpp
SemaphoreTest.cpp
Expand Down
74 changes: 74 additions & 0 deletions velox/common/base/tests/ScratchTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/common/base/Scratch.h"

#include <gtest/gtest.h>

using namespace facebook::velox;

TEST(ScratchTest, basic) {
Scratch scratch;
{
ScratchPtr<int32_t> ints(scratch);
ScratchPtr<int64_t> longs(scratch);
auto tempInts = ints.get(1000);
auto tempLongs = longs.get(2000);
std::fill(tempInts, tempInts + 1000, -1);
std::fill(tempLongs, tempLongs + 2000, -1);
EXPECT_EQ(0, scratch.retainedSize());
}
EXPECT_EQ(20352, scratch.retainedSize());
{
ScratchPtr<int32_t> ints(scratch);
ScratchPtr<int64_t> longs(scratch);
auto tempLongs = longs.get(2000);
auto tempInts = ints.get(1000);
std::fill(tempInts, tempInts + 1000, -1);
std::fill(tempInts, tempInts + 2000, -1);
EXPECT_EQ(0, scratch.retainedSize());
}
// The scratch vectors were acquired in a different order, so the smaller got
// resized to the larger size.
EXPECT_EQ(32640, scratch.retainedSize());
scratch.trim();
EXPECT_EQ(0, scratch.retainedSize());
{
ScratchPtr<int32_t, 10> ints(scratch);
// The size is the inline size, nothing gets returned to 'scratch'.
auto temp = ints.get(10);
temp[0] = 1;
}
EXPECT_EQ(0, scratch.retainedSize());
}

TEST(ScratchTest, large) {
constexpr int32_t kSize = 100;
Scratch scratch;
std::vector<std::unique_ptr<ScratchPtr<int32_t>>> pointers;
for (auto i = 0; i < kSize; ++i) {
pointers.push_back(std::make_unique<ScratchPtr<int32_t>>(scratch));
pointers.back()->get(1000);
}
pointers.clear();
// 100 times 1000 bytes returned.
EXPECT_LT(100'000, scratch.retainedSize());
for (auto i = 0; i < kSize; ++i) {
pointers.push_back(std::make_unique<ScratchPtr<int32_t>>(scratch));
pointers.back()->get(1000);
}
EXPECT_EQ(0, scratch.retainedSize());
}

0 comments on commit faa9a3f

Please sign in to comment.