Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix max_caps may not been update after GC #4323

Merged
merged 9 commits into from
Mar 21, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 40 additions & 13 deletions dbms/src/Storages/Page/V3/BlobStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,18 +192,36 @@ PageEntriesEdit BlobStore::write(DB::WriteBatch & wb, const WriteLimiterPtr & wr

void BlobStore::remove(const PageEntriesV3 & del_entries)
{
std::set<BlobFileId> blob_updated;
for (const auto & entry : del_entries)
{
blob_updated.insert(entry.file_id);
// External page size is 0
if (entry.size == 0)
{
continue;
// throw Exception(fmt::format("Invaild entry. entry size 0. [id={}] [offset={}]",
// entry.file_id,
// entry.offset));
}
removePosFromStats(entry.file_id, entry.offset, entry.size);
}

// After we remove postion of blob, we need recalculate the blob.
for (const auto & blob_id : blob_updated)
{
const auto & stat = blob_stats.blobIdToStat(blob_id,
/*restore_if_not_exist*/ false,
/*ignore_not_exist*/ true);

// Some of blob may been removed.
// So if we can't use id find blob, just ignore it.
if (stat)
{
LOG_FMT_TRACE(log, "Blob begin to recalculate capability [blob_id={}]", blob_id);
{
auto lock = stat->lock();
stat->recalculateCapacity();
}
}
}
}

std::pair<BlobFileId, BlobFileOffset> BlobStore::getPosFromStats(size_t size)
Expand All @@ -213,7 +231,7 @@ std::pair<BlobFileId, BlobFileOffset> BlobStore::getPosFromStats(size_t size)
auto lock_stat = [size, this, &stat]() -> std::lock_guard<std::mutex> {
auto lock_stats = blob_stats.lock();
BlobFileId blob_file_id = INVALID_BLOBFILE_ID;
std::tie(stat, blob_file_id) = blob_stats.chooseStat(size, config.file_limit_size, lock_stats);
std::tie(stat, blob_file_id) = blob_stats.chooseStat(size, lock_stats);

// No valid stat for puting data with `size`, create a new one
if (stat == nullptr)
Expand Down Expand Up @@ -743,13 +761,12 @@ void BlobStore::BlobStats::restoreByEntry(const PageEntryV3 & entry)
void BlobStore::BlobStats::restore()
{
BlobFileId max_restored_file_id = 0;
std::set<BlobFileId> existing_file_ids;
for (const auto & stat : stats_map)
{
stat->recalculateSpaceMap();
max_restored_file_id = std::max(stat->id, max_restored_file_id);
existing_file_ids.insert(stat->id);
}

// restore `roll_id`
roll_id = max_restored_file_id + 1;
}
Expand Down Expand Up @@ -832,7 +849,7 @@ void BlobStore::BlobStats::eraseStat(BlobFileId blob_file_id, const std::lock_gu
eraseStat(std::move(stat), lock);
}

std::pair<BlobStatPtr, BlobFileId> BlobStore::BlobStats::chooseStat(size_t buf_size, UInt64 file_limit_size, const std::lock_guard<std::mutex> &)
std::pair<BlobStatPtr, BlobFileId> BlobStore::BlobStats::chooseStat(size_t buf_size, const std::lock_guard<std::mutex> &)
{
BlobStatPtr stat_ptr = nullptr;
double smallest_valid_rate = 2;
Expand All @@ -847,10 +864,9 @@ std::pair<BlobStatPtr, BlobFileId> BlobStore::BlobStats::chooseStat(size_t buf_s
{
if (!stat->isReadOnly()
&& stat->sm_max_caps >= buf_size
&& stat->sm_total_size + buf_size < file_limit_size
&& stat->sm_valid_rate < smallest_valid_rate)
{
smallest_valid_rate = stat->sm_valid_size;
smallest_valid_rate = stat->sm_valid_rate;
stat_ptr = stat;
}
}
Expand All @@ -863,7 +879,7 @@ std::pair<BlobStatPtr, BlobFileId> BlobStore::BlobStats::chooseStat(size_t buf_s
return std::make_pair(stat_ptr, INVALID_BLOBFILE_ID);
}

BlobStatPtr BlobStore::BlobStats::blobIdToStat(BlobFileId file_id, bool restore_if_not_exist)
BlobStatPtr BlobStore::BlobStats::blobIdToStat(BlobFileId file_id, bool restore_if_not_exist, bool ignore_not_exist)
{
auto guard = lock();
for (auto & stat : stats_map)
Expand All @@ -880,9 +896,14 @@ BlobStatPtr BlobStore::BlobStats::blobIdToStat(BlobFileId file_id, bool restore_
return createStatNotCheckingRoll(file_id, guard);
}

throw Exception(fmt::format("Can't find BlobStat with [blob_id={}]",
file_id),
ErrorCodes::LOGICAL_ERROR);
if (!ignore_not_exist)
{
throw Exception(fmt::format("Can't find BlobStat with [blob_id={}]",
file_id),
ErrorCodes::LOGICAL_ERROR);
}

return nullptr;
}

/*********************
Expand Down Expand Up @@ -960,6 +981,12 @@ void BlobStore::BlobStats::BlobStat::recalculateSpaceMap()
sm_total_size = total_size;
sm_valid_size = valid_size;
sm_valid_rate = valid_size * 1.0 / total_size;
recalculateCapacity();
jiaqizho marked this conversation as resolved.
Show resolved Hide resolved
}

void BlobStore::BlobStats::BlobStat::recalculateCapacity()
{
sm_max_caps = smap->updateAccurateMaxCapacity();
}

} // namespace PS::V3
Expand Down
10 changes: 8 additions & 2 deletions dbms/src/Storages/Page/V3/BlobStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ class BlobStore : private Allocator<false>
* We still need to recalculate a `sm_total_size`/`sm_valid_size`/`sm_valid_rate`.
*/
void recalculateSpaceMap();

/**
* The `sm_max_cap` is not accurate after GC removes out-of-date data, or after restoring from disk.
* Caller should call this function to update the `sm_max_cap` so that we can reuse the space in this BlobStat.
*/
void recalculateCapacity();
};

using BlobStatPtr = std::shared_ptr<BlobStat>;
Expand Down Expand Up @@ -141,9 +147,9 @@ class BlobStore : private Allocator<false>
* The `INVALID_BLOBFILE_ID` means that you don't need create a new `BlobFile`.
*
*/
std::pair<BlobStatPtr, BlobFileId> chooseStat(size_t buf_size, UInt64 file_limit_size, const std::lock_guard<std::mutex> &);
std::pair<BlobStatPtr, BlobFileId> chooseStat(size_t buf_size, const std::lock_guard<std::mutex> &);

BlobStatPtr blobIdToStat(BlobFileId file_id, bool restore_if_not_exist = false);
BlobStatPtr blobIdToStat(BlobFileId file_id, bool restore_if_not_exist = false, bool ignore_not_exist = false);

std::list<BlobStatPtr> getStats() const
{
Expand Down
4 changes: 4 additions & 0 deletions dbms/src/Storages/Page/V3/spacemap/SpaceMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ class SpaceMap
*/
virtual UInt64 getRightMargin() = 0;

/**
* Get the accurate max capacity of the space map.
*/
virtual UInt64 updateAccurateMaxCapacity() = 0;

/**
* Return the size of file and the size contains valid data.
Expand Down
62 changes: 53 additions & 9 deletions dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,34 @@ std::pair<UInt64, UInt64> RBTreeSpaceMap::searchInsertOffset(size_t size)
return std::make_pair(offset, max_cap);
}

UInt64 RBTreeSpaceMap::updateAccurateMaxCapacity()
{
struct rb_node * node = nullptr;
struct SmapRbEntry * entry;
UInt64 max_offset = 0;
UInt64 max_cap = 0;

node = rb_tree_first(&rb_tree->root);
if (node == nullptr)
{
return max_cap;
}

for (; node != nullptr; node = rb_tree_next(node))
{
entry = node_to_entry(node);
if (entry->count > max_cap)
{
max_offset = entry->start;
max_cap = entry->count;
}
}

biggest_range = max_offset;
biggest_cap = max_cap;
return max_cap;
}

std::pair<UInt64, UInt64> RBTreeSpaceMap::getSizes() const
{
struct rb_node * node = rb_tree_last(&rb_tree->root);
Expand All @@ -660,18 +688,34 @@ std::pair<UInt64, UInt64> RBTreeSpaceMap::getSizes() const
}

auto * entry = node_to_entry(node);
UInt64 total_size = entry->start - start;
UInt64 last_node_size = entry->count;
UInt64 valid_size = 0;

for (node = rb_tree_first(&rb_tree->root); node != nullptr; node = rb_tree_next(node))
if (entry->start + entry->count != end)
{
entry = node_to_entry(node);
valid_size += entry->count;
UInt64 total_size = end;
UInt64 valid_size = 0;
for (node = rb_tree_first(&rb_tree->root); node != nullptr; node = rb_tree_next(node))
{
entry = node_to_entry(node);
valid_size += entry->count;
}

valid_size = total_size - valid_size;
return std::make_pair(total_size, valid_size);
}
valid_size = total_size - (valid_size - last_node_size);
else
{
UInt64 total_size = entry->start - start;
UInt64 last_node_size = entry->count;
UInt64 valid_size = 0;

return std::make_pair(total_size, valid_size);
for (node = rb_tree_first(&rb_tree->root); node != nullptr; node = rb_tree_next(node))
{
entry = node_to_entry(node);
valid_size += entry->count;
}
valid_size = total_size - (valid_size - last_node_size);

return std::make_pair(total_size, valid_size);
}
}

UInt64 RBTreeSpaceMap::getRightMargin()
Expand Down
2 changes: 2 additions & 0 deletions dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class RBTreeSpaceMap

std::pair<UInt64, UInt64> searchInsertOffset(size_t size) override;

UInt64 updateAccurateMaxCapacity() override;

std::pair<UInt64, UInt64> getSizes() const override;

UInt64 getRightMargin() override;
Expand Down
49 changes: 42 additions & 7 deletions dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,33 @@ class STDMapSpaceMap
}

const auto & last_free_block = free_map.rbegin();
UInt64 total_size = last_free_block->first - start;
UInt64 last_free_block_size = last_free_block->second;

UInt64 valid_size = 0;
for (const auto & free_block : free_map)
if (last_free_block->first + last_free_block->second != end)
{
valid_size += free_block.second;
UInt64 total_size = end;
UInt64 valid_size = 0;
for (const auto & free_block : free_map)
{
valid_size += free_block.second;
}

valid_size = total_size - valid_size;
jiaqizho marked this conversation as resolved.
Show resolved Hide resolved
return std::make_pair(total_size, valid_size);
}
valid_size = total_size - (valid_size - last_free_block_size);
else
{
UInt64 total_size = last_free_block->first - start;
UInt64 last_free_block_size = last_free_block->second;

return std::make_pair(total_size, valid_size);
UInt64 valid_size = 0;
for (const auto & free_block : free_map)
{
valid_size += free_block.second;
}
valid_size = total_size - (valid_size - last_free_block_size);
JaySon-Huang marked this conversation as resolved.
Show resolved Hide resolved

return std::make_pair(total_size, valid_size);
}
}

UInt64 getRightMargin() override
Expand Down Expand Up @@ -243,6 +259,25 @@ class STDMapSpaceMap
return std::make_pair(offset, hint_biggest_cap);
}

UInt64 updateAccurateMaxCapacity() override
{
UInt64 max_offset = 0;
UInt64 max_cap = 0;

for (const auto & [start, size] : free_map)
{
if (size > max_cap)
{
max_cap = size;
max_offset = start;
}
}
hint_biggest_offset = max_offset;
hint_biggest_cap = max_cap;

return max_cap;
}

bool markFreeImpl(UInt64 offset, size_t length) override
{
auto it = free_map.find(offset);
Expand Down
32 changes: 24 additions & 8 deletions dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,17 +123,17 @@ TEST_F(BlobStoreStatsTest, testStat)

BlobStats stats(logger, config);

std::tie(stat, blob_file_id) = stats.chooseStat(10, BLOBFILE_LIMIT_SIZE, stats.lock());
std::tie(stat, blob_file_id) = stats.chooseStat(10, stats.lock());
ASSERT_EQ(blob_file_id, 1);
ASSERT_FALSE(stat);

// still 0
std::tie(stat, blob_file_id) = stats.chooseStat(10, BLOBFILE_LIMIT_SIZE, stats.lock());
std::tie(stat, blob_file_id) = stats.chooseStat(10, stats.lock());
ASSERT_EQ(blob_file_id, 1);
ASSERT_FALSE(stat);

stats.createStat(0, stats.lock());
std::tie(stat, blob_file_id) = stats.chooseStat(10, BLOBFILE_LIMIT_SIZE, stats.lock());
std::tie(stat, blob_file_id) = stats.chooseStat(10, stats.lock());
ASSERT_EQ(blob_file_id, INVALID_BLOBFILE_ID);
ASSERT_TRUE(stat);

Expand Down Expand Up @@ -210,7 +210,7 @@ TEST_F(BlobStoreStatsTest, testFullStats)
ASSERT_LE(stat->sm_valid_rate, 1);

// Won't choose full one
std::tie(stat, blob_file_id) = stats.chooseStat(100, BLOBFILE_LIMIT_SIZE, stats.lock());
std::tie(stat, blob_file_id) = stats.chooseStat(100, stats.lock());
ASSERT_EQ(blob_file_id, 2);
ASSERT_FALSE(stat);

Expand All @@ -228,7 +228,7 @@ TEST_F(BlobStoreStatsTest, testFullStats)

// Then choose stat , it should return the stat id 3
// Stat which id is 2 is full.
std::tie(stat, blob_file_id) = stats.chooseStat(100, BLOBFILE_LIMIT_SIZE, stats.lock());
std::tie(stat, blob_file_id) = stats.chooseStat(100, stats.lock());
ASSERT_EQ(blob_file_id, 3);
ASSERT_FALSE(stat);
}
Expand Down Expand Up @@ -257,6 +257,7 @@ TEST_F(BlobStoreTest, Restore)
try
{
const auto file_provider = DB::tests::TiFlashTestEnv::getContext().getFileProvider();
config.file_limit_size = 2560;
auto blob_store = BlobStore(file_provider, path, config);

BlobFileId file_id1 = 10;
Expand Down Expand Up @@ -287,9 +288,24 @@ try
blob_store.blob_stats.restore();
}

auto blob_need_gc = blob_store.getGCStats();
ASSERT_EQ(blob_need_gc.size(), 1);
EXPECT_EQ(blob_need_gc[0], 12);
// check spacemap updated
{
for (const auto & stat : blob_store.blob_stats.getStats())
{
if (stat->id == file_id1)
{
ASSERT_EQ(stat->sm_total_size, 2560);
ASSERT_EQ(stat->sm_valid_size, 640);
ASSERT_EQ(stat->sm_max_caps, 1024);
}
else if (stat->id == file_id2)
{
ASSERT_EQ(stat->sm_total_size, 2560);
ASSERT_EQ(stat->sm_valid_size, 512);
ASSERT_EQ(stat->sm_max_caps, 2048);
}
}
}
}
CATCH

Expand Down
Loading