-
Notifications
You must be signed in to change notification settings - Fork 949
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chore(tiering): Faster small bins serialization #2 #3396
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,7 @@ extern "C" { | |
#include "base/logging.h" | ||
#include "core/bloom.h" | ||
#include "core/json/json_object.h" | ||
#include "core/overloaded.h" | ||
#include "core/sorted_map.h" | ||
#include "core/string_map.h" | ||
#include "core/string_set.h" | ||
|
@@ -48,6 +49,7 @@ extern "C" { | |
#include "server/server_state.h" | ||
#include "server/set_family.h" | ||
#include "server/tiering/common.h" // for _KB literal | ||
#include "server/tiering/disk_storage.h" | ||
#include "server/transaction.h" | ||
#include "strings/human_readable.h" | ||
|
||
|
@@ -387,6 +389,7 @@ class RdbLoaderBase::OpaqueObjLoader { | |
void operator()(const LzfString& lzfstr); | ||
void operator()(const unique_ptr<LoadTrace>& ptr); | ||
void operator()(const RdbSBF& src); | ||
void operator()(const RdbTieredSegment& segmnet); | ||
|
||
std::error_code ec() const { | ||
return ec_; | ||
|
@@ -481,6 +484,10 @@ void RdbLoaderBase::OpaqueObjLoader::operator()(const RdbSBF& src) { | |
pv_->SetSBF(sbf); | ||
} | ||
|
||
void RdbLoaderBase::OpaqueObjLoader::operator()(const RdbTieredSegment& src) { | ||
CHECK(false) << "unreachable"; | ||
} | ||
|
||
void RdbLoaderBase::OpaqueObjLoader::CreateSet(const LoadTrace* ltrace) { | ||
size_t len = ltrace->blob_count(); | ||
|
||
|
@@ -1385,6 +1392,9 @@ error_code RdbLoaderBase::ReadObj(int rdbtype, OpaqueObj* dest) { | |
case RDB_TYPE_SBF: | ||
iores = ReadSBF(); | ||
break; | ||
case RDB_TYPE_TIERED_SEGMENT: | ||
iores = ReadTieredSegment(); | ||
break; | ||
default: | ||
LOG(ERROR) << "Unsupported rdb type " << rdbtype; | ||
|
||
|
@@ -1878,6 +1888,14 @@ auto RdbLoaderBase::ReadSBF() -> io::Result<OpaqueObj> { | |
return OpaqueObj{std::move(res), RDB_TYPE_SBF}; | ||
} | ||
|
||
auto RdbLoaderBase::ReadTieredSegment() -> io::Result<OpaqueObj> { | ||
RdbTieredSegment segment; | ||
SET_OR_UNEXPECT(LoadLen(nullptr), segment.offset); | ||
SET_OR_UNEXPECT(LoadLen(nullptr), segment.length); | ||
SET_OR_UNEXPECT(LoadLen(nullptr), segment.enc_mask); | ||
return OpaqueObj{segment, RDB_TYPE_TIERED_SEGMENT}; | ||
}; | ||
|
||
template <typename T> io::Result<T> RdbLoaderBase::FetchInt() { | ||
auto ec = EnsureRead(sizeof(T)); | ||
if (ec) | ||
|
@@ -1924,6 +1942,18 @@ RdbLoader::RdbLoader(Service* service) | |
} | ||
|
||
RdbLoader::~RdbLoader() { | ||
for (auto& [_, page] : small_items_pages_) { | ||
if (!holds_alternative<tiering::DiskSegment>(page)) | ||
continue; | ||
auto segment = get<tiering::DiskSegment>(page); | ||
EngineShard::tlocal()->tiered_storage()->BorrowStorage().MarkAsFree(segment); | ||
} | ||
|
||
for (auto& [_, items] : small_items_) { | ||
for (Item* item : items) | ||
delete item; | ||
} | ||
|
||
while (true) { | ||
Item* item = item_queue_.Pop(); | ||
if (item == nullptr) | ||
|
@@ -2117,6 +2147,11 @@ error_code RdbLoader::Load(io::Source* src) { | |
continue; | ||
} | ||
|
||
if (type == RDB_OPCODE_TIERED_PAGE) { | ||
RETURN_ON_ERR(LoadTieredPage()); | ||
continue; | ||
} | ||
|
||
if (!rdbIsObjectTypeDF(type)) { | ||
return RdbError(errc::invalid_rdb_type); | ||
} | ||
|
@@ -2126,6 +2161,11 @@ error_code RdbLoader::Load(io::Source* src) { | |
settings.Reset(); | ||
} // main load loop | ||
|
||
// Flush all small items | ||
HandleSmallItems(true); | ||
|
||
FlushAllShards(); | ||
|
||
DVLOG(1) << "RdbLoad loop finished"; | ||
|
||
if (stop_early_) { | ||
|
@@ -2348,6 +2388,38 @@ error_code RdbLoaderBase::HandleJournalBlob(Service* service) { | |
return std::error_code{}; | ||
} | ||
|
||
error_code RdbLoader::LoadTieredPage() { | ||
size_t offset; | ||
SET_OR_RETURN(LoadLen(nullptr), offset); | ||
|
||
std::string page; | ||
SET_OR_RETURN(FetchGenericString(), page); | ||
|
||
// If tiering is enabled, try saving the received page on disk | ||
// Fall back to memory in case of errors | ||
if (EngineShard::tlocal() && EngineShard::tlocal()->tiered_storage()) { | ||
auto& storage = EngineShard::tlocal()->tiered_storage()->BorrowStorage(); | ||
|
||
util::fb2::Done done; | ||
std::error_code ec; | ||
auto cb = [this, offset, &ec, &done](io::Result<tiering::DiskSegment> res) { | ||
if (res.has_value()) | ||
small_items_pages_[offset] = res.value(); | ||
else | ||
ec = res.error(); | ||
done.Notify(); | ||
}; | ||
ec = storage.Stash(io::Buffer(page), {}, cb); | ||
|
||
done.Wait(); | ||
if (!ec) | ||
return {}; | ||
} | ||
|
||
small_items_pages_[offset] = page; | ||
return {}; | ||
} | ||
Comment on lines
+2399
to
+2422
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I really wish to use "virtual" keys to store pages. This would not require having custom code for their serialization, as well custom code for loading and stashing them. We could simply rely on the tiered storage for doing this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Second, a missing optimization is that we don't re-use the offloaded page, we only use it to read from it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The idea would be to create a mapping table between new and old offsets and re-create those offsets in small bins There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can add TODOs to detail your ideas.. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I want to use the existing code for serialization and offloading, so pages will be "just values" and managed automatically |
||
|
||
error_code RdbLoader::HandleAux() { | ||
/* AUX: generic string-string fields. Use to add state to RDB | ||
* which is backward compatible. Implementations of RDB loading | ||
|
@@ -2531,20 +2603,37 @@ error_code RdbLoader::LoadKeyValPair(int type, ObjSettings* settings) { | |
|
||
item->is_sticky = settings->is_sticky; | ||
|
||
ShardId sid = Shard(item->key, shard_set->size()); | ||
item->expire_ms = settings->expiretime; | ||
|
||
auto& out_buf = shard_buf_[sid]; | ||
std::move(cleanup).Cancel(); | ||
|
||
if (item->val.rdb_type == RDB_TYPE_TIERED_SEGMENT) { | ||
auto segment = get<RdbTieredSegment>(item->val.obj); | ||
{ | ||
size_t offset = segment.offset / tiering::kPageSize * tiering::kPageSize; | ||
auto& items = small_items_[offset]; | ||
small_items_sizes_.erase({items.size(), offset}); | ||
items.push_back(item); | ||
small_items_sizes_.insert({items.size(), offset}); | ||
} | ||
HandleSmallItems(false); // don't force flush | ||
return kOk; | ||
} | ||
|
||
Add(item); | ||
return kOk; | ||
} | ||
|
||
void RdbLoader::Add(Item* item) { | ||
ShardId sid = Shard(item->key, shard_set->size()); | ||
|
||
auto& out_buf = shard_buf_[sid]; | ||
out_buf.emplace_back(item); | ||
std::move(cleanup).Cancel(); | ||
|
||
constexpr size_t kBufSize = 128; | ||
if (out_buf.size() >= kBufSize) { | ||
FlushShardAsync(sid); | ||
} | ||
|
||
return kOk; | ||
} | ||
|
||
void RdbLoader::LoadScriptFromAux(string&& body) { | ||
|
@@ -2559,6 +2648,42 @@ void RdbLoader::LoadScriptFromAux(string&& body) { | |
} | ||
} | ||
|
||
void RdbLoader::HandleSmallItems(bool flush) { | ||
while (!small_items_.empty() && (flush || small_items_.size() > 1000)) { | ||
auto [_, offset] = small_items_sizes_.extract(small_items_sizes_.begin()).value(); | ||
auto node = small_items_.extract(offset); | ||
|
||
auto page_reader = [](tiering::DiskSegment segment) { | ||
auto& store = EngineShard::tlocal()->tiered_storage()->BorrowStorage(); | ||
util::fb2::Future<std::string> f; | ||
store.Read(segment, [f](io::Result<std::string_view> result) mutable { | ||
CHECK(result.has_value()); // TODO | ||
f.Resolve(string{result.value()}); | ||
}); | ||
return f.Get(); | ||
}; | ||
string page = visit(Overloaded{[](const string& s) { return s; }, page_reader}, | ||
small_items_pages_[offset]); | ||
|
||
for (Item* item : node.mapped()) { | ||
RdbTieredSegment segment = get<RdbTieredSegment>(item->val.obj); | ||
|
||
CompactObj co; | ||
co.SetEncodingMask(segment.enc_mask); | ||
co.Materialize({page.data() + (segment.offset - offset), segment.length}, true); | ||
|
||
VLOG(0) << "Loaded " << co.ToString(); | ||
|
||
base::PODArray<char> arr(co.Size(), nullptr); | ||
co.GetString(arr.data()); | ||
|
||
item->val.rdb_type = RDB_TYPE_STRING; | ||
item->val.obj = std::move(arr); | ||
Add(item); | ||
} | ||
} | ||
} | ||
|
||
void RdbLoader::LoadSearchIndexDefFromAux(string&& def) { | ||
facade::CapturingReplyBuilder crb{}; | ||
ConnectionContext cntx{nullptr, nullptr, &crb}; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LOG(FATAL)