Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DenseSet::Grow optimization #3894

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 108 additions & 79 deletions src/core/dense_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ DenseSet::DenseSet(MemoryResource* mr) : entries_(mr) {
}

DenseSet::~DenseSet() {
while (cached_link_num) {
mr()->deallocate(cached_links[--cached_link_num], sizeof(DenseLinkKey), alignof(DenseLinkKey));
--num_links_;
}

// We can not call Clear from the base class because it internally calls ObjDelete which is
// a virtual function. Therefore, destructor of the derived classes must clean up the table.
CHECK(entries_.empty());
Expand Down Expand Up @@ -306,6 +311,73 @@ void DenseSet::ClearBatch(unsigned len, ClearItem* items) {
len = dest_id;
}
}

void DenseSet::GrowBatch(uint32_t len, GrowItem* items,
std::vector<DensePtr, DensePtrAllocator>* new_entries) {
while (len) {
unsigned dest_id = 0;
for (uint32_t i = 0; i < len; ++i) {
auto& item = items[i];
if (!item.obj.IsEmpty()) {
DensePtr* curr = &item.obj;

if (ExpireIfNeeded(nullptr, curr)) {
// if curr has disappeared due to expiry and prev was converted from Link to a
// regular DensePtr
}

if (curr->IsEmpty())
continue;
void* ptr = curr->GetObject();

DCHECK(ptr != nullptr && ObjectAllocSize(ptr));

uint32_t bid = BucketId(ptr, 0);

// if the item does not move from the current chain, ensure
// it is not marked as displaced and move to the next item in the chain

auto dest = new_entries->begin() + bid;
DensePtr dptr = *curr;

if (curr->IsObject()) {
curr->Reset(); // reset the original placeholder (.next or root)

DVLOG(2) << " Pushing to " << bid << " " << dptr.GetObject();
DCHECK_EQ(BucketId(dptr.GetObject(), 0), bid);
PushFront(dest, dptr);

dest->ClearDisplaced();

continue;
} // if IsObject

*curr = *dptr.Next();
if (curr->IsLink()) {
PREFETCH_READ(curr->AsLink());
}

PREFETCH_READ(curr->Raw());
DCHECK(!curr->IsEmpty());

PushFront(dest, dptr);
dest->ClearDisplaced();
} else {
auto link = item.ptr.AsLink();
PREFETCH_READ(link->next.Raw());
PREFETCH_READ(link->Raw());

item.obj = item.ptr;
item.ptr.Reset();
}

items[dest_id++] = {item.ptr, item.obj};
}
// update the length of the batch for the next iteration.
len = dest_id;
}
}

bool DenseSet::NoItemBelongsBucket(uint32_t bid) const {
auto& entries = const_cast<DenseSet*>(this)->entries_;
DensePtr* curr = &entries[bid];
Expand Down Expand Up @@ -365,10 +437,8 @@ void DenseSet::Reserve(size_t sz) {

sz = absl::bit_ceil(sz);
if (sz > entries_.size()) {
size_t prev_size = entries_.size();
entries_.resize(sz);
capacity_log_ = absl::bit_width(sz) - 1;
Grow(prev_size);
capacity_log_ = absl::bit_width(sz) - 1; // TODO move into Grow()
Grow(sz);
}
}

Expand Down Expand Up @@ -408,78 +478,35 @@ void DenseSet::Fill(DenseSet* other) const {
CloneBatch(len, arr, other);
}

void DenseSet::Grow(size_t prev_size) {
void DenseSet::Grow(size_t new_size) {
decltype(entries_) ne(new_size, entries_.get_allocator());
const auto kMaxBatchLen = 32;
GrowItem items[kMaxBatchLen];
uint32_t len = 0;
// perform rehashing of items in the set
for (long i = prev_size - 1; i >= 0; --i) {
DensePtr* curr = &entries_[i];
DensePtr* prev = nullptr;

while (true) {
if (ExpireIfNeeded(prev, curr)) {
// if curr has disappeared due to expiry and prev was converted from Link to a
// regular DensePtr
if (prev && !prev->IsLink())
break;
}

if (curr->IsEmpty())
break;
void* ptr = curr->GetObject();

DCHECK(ptr != nullptr && ObjectAllocSize(ptr));

uint32_t bid = BucketId(ptr, 0);

// if the item does not move from the current chain, ensure
// it is not marked as displaced and move to the next item in the chain
if (bid == i) {
curr->ClearDisplaced();
prev = curr;
curr = curr->Next();
if (curr == nullptr)
break;
for (auto& entry : entries_) {
PREFETCH_READ(entry.Raw());
if (!entry.IsEmpty()) {
auto& item = items[len++];
if (entry.IsLink()) {
item.ptr = entry;
item.obj.Reset();
} else {
// if the entry is in the wrong chain remove it and
// add it to the correct chain. This will also correct
// displaced entries
auto dest = entries_.begin() + bid;
DensePtr dptr = *curr;

if (curr->IsObject()) {
curr->Reset(); // reset the original placeholder (.next or root)

if (prev) {
DCHECK(prev->IsLink());

DenseLinkKey* plink = prev->AsLink();
DCHECK(&plink->next == curr);

// we want to make *prev a DensePtr instead of DenseLink and we
// want to deallocate the link.
DensePtr tmp = DensePtr::From(plink);
DCHECK(ObjectAllocSize(tmp.GetObject()));

FreeLink(plink);
*prev = tmp;
}

DVLOG(2) << " Pushing to " << bid << " " << dptr.GetObject();
DCHECK_EQ(BucketId(dptr.GetObject(), 0), bid);
PushFront(dest, dptr);

dest->ClearDisplaced();

break;
} // if IsObject

*curr = *dptr.Next();
DCHECK(!curr->IsEmpty());
item.ptr.Reset();
item.obj = entry;
}

PushFront(dest, dptr);
dest->ClearDisplaced();
if (len == kMaxBatchLen) {
GrowBatch(len, items, &ne);
len = 0;
}
}
}
if (len) {
GrowBatch(len, items, &ne);
}
// TODO add DCHECK that entries_ is empty
entries_ = std::move(ne);
}

// Assumes that the object does not exist in the set.
Expand Down Expand Up @@ -511,11 +538,8 @@ void DenseSet::AddUnique(void* obj, bool has_ttl, uint64_t hashcode) {
break;
}

size_t prev_size = entries_.size();
entries_.resize(prev_size * 2);
++capacity_log_;

Grow(prev_size);
++capacity_log_; // TODO move into Grow()
Grow(entries_.size() * 2);
bucket_id = BucketId(hashcode);
}

Expand Down Expand Up @@ -779,13 +803,18 @@ uint32_t DenseSet::Scan(uint32_t cursor, const ItemCb& cb) const {
}

auto DenseSet::NewLink(void* data, DensePtr next) -> DenseLinkKey* {
LinkAllocator la(mr());
DenseLinkKey* lk = la.allocate(1);
la.construct(lk);
DenseLinkKey* lk = nullptr;
if (cached_link_num) {
lk = cached_links[--cached_link_num];
} else {
LinkAllocator la(mr());
lk = la.allocate(1);
la.construct(lk);
++num_links_;
}

lk->next = next;
lk->SetObject(data);
++num_links_;

return lk;
}
Expand Down
23 changes: 19 additions & 4 deletions src/core/dense_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,13 @@ class DenseSet {
using ClearItem = CloneItem;
void ClearBatch(unsigned len, ClearItem* items);

struct GrowItem {
DensePtr ptr;
DensePtr obj;
};
void GrowBatch(uint32_t len, GrowItem* items,
std::vector<DensePtr, DensePtrAllocator>* new_entries);

MemoryResource* mr() {
return entries_.get_allocator().resource();
}
Expand All @@ -360,7 +367,7 @@ class DenseSet {
// Return if bucket has no item which is not displaced and right/left bucket has no displaced item
// belong to given bid
bool NoItemBelongsBucket(uint32_t bid) const;
void Grow(size_t prev_size);
void Grow(size_t new_size);

// ============ Pseudo Linked List Functions for interacting with Chains ==================
size_t PushFront(ChainVectorIterator, void* obj, bool has_ttl);
Expand All @@ -383,9 +390,13 @@ class DenseSet {
DenseLinkKey* NewLink(void* data, DensePtr next);

inline void FreeLink(DenseLinkKey* plink) {
// deallocate the link if it is no longer a link as it is now in an empty list
mr()->deallocate(plink, sizeof(DenseLinkKey), alignof(DenseLinkKey));
--num_links_;
if (cached_link_num < max_cached_links) {
cached_links[cached_link_num++] = plink;
} else {
// deallocate the link if it is no longer a link as it is now in an empty list
mr()->deallocate(plink, sizeof(DenseLinkKey), alignof(DenseLinkKey));
--num_links_;
}
}

// Returns true if *node was deleted.
Expand Down Expand Up @@ -413,6 +424,10 @@ class DenseSet {
uint32_t time_now_ = 0;

mutable bool expiration_used_ = false;

static constexpr uint32_t max_cached_links = 8;
DenseLinkKey* cached_links[max_cached_links];
uint32_t cached_link_num = 0;
};

inline void* DenseSet::FindInternal(const void* obj, uint64_t hashcode, uint32_t cookie) const {
Expand Down
22 changes: 22 additions & 0 deletions src/core/string_set_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -574,4 +574,26 @@ void BM_AddMany(benchmark::State& state) {
}
BENCHMARK(BM_AddMany);

void BM_Grow(benchmark::State& state) {
vector<string> strs;
mt19937 generator(0);
StringSet ss;
unsigned elems = 2 << 15;
for (size_t i = 0; i < elems; ++i) {
strs.push_back(random_string(generator, 16));
}
ss.Reserve(elems);

while (state.KeepRunning()) {
state.PauseTiming();
ss.Clear();
ss.Reserve(elems);
ss.AddMany(absl::MakeSpan(strs), UINT32_MAX);
CHECK_EQ(ss.UpperBoundSize(), elems);
state.ResumeTiming();
ss.Add(random_string(generator, 16));
}
}
BENCHMARK(BM_Grow);

} // namespace dfly
Loading