Skip to content

Commit

Permalink
Fix blobstore truncate size may not right (pingcap#5127) (pingcap#5147)
Browse files Browse the repository at this point in the history
  • Loading branch information
ti-chi-bot authored Aug 18, 2022
1 parent 6feacc2 commit e7deb68
Show file tree
Hide file tree
Showing 8 changed files with 125 additions and 29 deletions.
55 changes: 41 additions & 14 deletions dbms/src/Storages/Page/V3/BlobStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -846,8 +846,8 @@ struct BlobStoreGCInfo
toTypeString("Read-Only Blob", 0),
toTypeString("No GC Blob", 1),
toTypeString("Full GC Blob", 2),
toTypeString("Truncated Blob", 3),
toTypeString("Big Blob", 4));
toTypeString("Big Blob", 3),
toTypeTruncateString("Truncated Blob"));
}

void appendToReadOnlyBlob(const BlobFileId blob_id, double valid_rate)
Expand All @@ -865,23 +865,24 @@ struct BlobStoreGCInfo
blob_gc_info[2].emplace_back(std::make_pair(blob_id, valid_rate));
}

void appendToTruncatedBlob(const BlobFileId blob_id, double valid_rate)
void appendToBigBlob(const BlobFileId blob_id, double valid_rate)
{
blob_gc_info[3].emplace_back(std::make_pair(blob_id, valid_rate));
}

void appendToBigBlob(const BlobFileId blob_id, double valid_rate)
void appendToTruncatedBlob(const BlobFileId blob_id, UInt64 origin_size, UInt64 truncated_size, double valid_rate)
{
blob_gc_info[4].emplace_back(std::make_pair(blob_id, valid_rate));
blob_gc_truncate_info.emplace_back(std::make_tuple(blob_id, origin_size, truncated_size, valid_rate));
}

private:
// 1. read only blob
// 2. no need gc blob
// 3. full gc blob
// 4. need truncate blob
// 5. big blob
std::vector<std::pair<BlobFileId, double>> blob_gc_info[5];
// 4. big blob
std::vector<std::pair<BlobFileId, double>> blob_gc_info[4];

std::vector<std::tuple<BlobFileId, UInt64, UInt64, double>> blob_gc_truncate_info;

String toTypeString(const std::string_view prefix, const size_t index) const
{
Expand All @@ -906,6 +907,32 @@ struct BlobStoreGCInfo

return fmt_buf.toString();
}

String toTypeTruncateString(const std::string_view prefix) const
{
FmtBuffer fmt_buf;
if (blob_gc_truncate_info.empty())
{
fmt_buf.fmtAppend("{}: [null]", prefix);
}
else
{
fmt_buf.fmtAppend("{}: [", prefix);
fmt_buf.joinStr(
blob_gc_truncate_info.begin(),
blob_gc_truncate_info.end(),
[](const auto arg, FmtBuffer & fb) {
fb.fmtAppend("{} origin: {} truncate: {} rate: {:.2f}", //
std::get<0>(arg), // blob id
std::get<1>(arg), // origin size
std::get<2>(arg), // truncated size
std::get<3>(arg)); // valid rate
},
", ");
fmt_buf.append("]");
}
return fmt_buf.toString();
}
};

std::vector<BlobFileId> BlobStore::getGCStats()
Expand Down Expand Up @@ -935,7 +962,7 @@ std::vector<BlobFileId> BlobStore::getGCStats()
}

auto lock = stat->lock();
auto right_margin = stat->smap->getRightMargin();
auto right_margin = stat->smap->getUsedBoundary();

// Avoid divide by zero
if (right_margin == 0)
Expand All @@ -948,14 +975,13 @@ std::vector<BlobFileId> BlobStore::getGCStats()
stat->sm_valid_rate));
}

LOG_FMT_TRACE(log, "Current blob is empty [blob_id={}, total size(all invalid)={}] [valid_rate={}].", stat->id, stat->sm_total_size, stat->sm_valid_rate);

// If current blob empty, the size of in disk blob may not empty
// So we need truncate current blob, and let it be reused.
auto blobfile = getBlobFile(stat->id);
LOG_FMT_TRACE(log, "Truncate empty blob file [blob_id={}] to 0.", stat->id);
LOG_FMT_INFO(log, "Current blob file is empty, truncated to zero [blob_id={}] [total_size={}] [valid_rate={}]", stat->id, stat->sm_total_size, stat->sm_valid_rate);
blobfile->truncate(right_margin);
blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_valid_rate);
blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_total_size, right_margin, stat->sm_valid_rate);
stat->sm_total_size = right_margin;
continue;
}

Expand Down Expand Up @@ -996,9 +1022,10 @@ std::vector<BlobFileId> BlobStore::getGCStats()
auto blobfile = getBlobFile(stat->id);
LOG_FMT_TRACE(log, "Truncate blob file [blob_id={}] [origin size={}] [truncated size={}]", stat->id, stat->sm_total_size, right_margin);
blobfile->truncate(right_margin);
blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_total_size, right_margin, stat->sm_valid_rate);

stat->sm_total_size = right_margin;
stat->sm_valid_rate = stat->sm_valid_size * 1.0 / stat->sm_total_size;
blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_valid_rate);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Storages/Page/V3/PageDirectory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1237,7 +1237,7 @@ bool PageDirectory::tryDumpSnapshot(const ReadLimiterPtr & read_limiter, const W
// `being_ref_count` by the function `createSnapshot()`.
assert(!files_snap.persisted_log_files.empty()); // should not be empty when `needSave` return true
auto log_num = files_snap.persisted_log_files.rbegin()->log_num;
auto identifier = fmt::format("{}_dump_{}", wal->name(), log_num);
auto identifier = fmt::format("{}.dump_{}", wal->name(), log_num);
auto snapshot_reader = wal->createReaderForFiles(identifier, files_snap.persisted_log_files, read_limiter);
PageDirectoryFactory factory;
// we just use the `collapsed_dir` to dump edit of the snapshot, should never call functions like `apply` that
Expand Down
6 changes: 4 additions & 2 deletions dbms/src/Storages/Page/V3/spacemap/SpaceMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,11 @@ class SpaceMap
virtual std::tuple<UInt64, UInt64, bool> searchInsertOffset(size_t size) = 0;

/**
* Get the offset of the last free block. `[margin_offset, +∞)` is not used at all.
* Get the used boundary of this SpaceMap.
* The return value (`used_boundary`) means that `[used_bounary + 1, +∞)` is safe to be truncated.
* If the `used_boundary` is equal to the `end` of this SpaceMap, it means that there is no space to be truncated.
*/
virtual UInt64 getRightMargin() = 0;
virtual UInt64 getUsedBoundary() = 0;

/**
* Get the accurate max capacity of the space map.
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Storages/Page/V3/spacemap/SpaceMapBig.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class BigSpaceMap
return std::make_pair(size_in_used, size_in_used);
}

UInt64 getRightMargin() override
UInt64 getUsedBoundary() override
{
return end;
}
Expand Down
28 changes: 21 additions & 7 deletions dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ static void rb_get_new_entry(struct SmapRbEntry ** entry, UInt64 start, UInt64 c
{
struct SmapRbEntry * new_entry;

new_entry = static_cast<struct SmapRbEntry *>(calloc(1, sizeof(struct SmapRbEntry)));
new_entry = static_cast<struct SmapRbEntry *>(calloc(1, sizeof(struct SmapRbEntry))); // NOLINT
if (new_entry == nullptr)
{
return;
Expand Down Expand Up @@ -115,7 +115,7 @@ inline static void rb_free_entry(struct RbPrivate * private_data, struct SmapRbE
private_data->read_index_next = nullptr;
}

free(entry);
free(entry); // NOLINT
}


Expand Down Expand Up @@ -419,7 +419,7 @@ std::shared_ptr<RBTreeSpaceMap> RBTreeSpaceMap::create(UInt64 start, UInt64 end)
{
auto ptr = std::shared_ptr<RBTreeSpaceMap>(new RBTreeSpaceMap(start, end));

ptr->rb_tree = static_cast<struct RbPrivate *>(calloc(1, sizeof(struct RbPrivate)));
ptr->rb_tree = static_cast<struct RbPrivate *>(calloc(1, sizeof(struct RbPrivate))); // NOLINT
if (ptr->rb_tree == nullptr)
{
return nullptr;
Expand All @@ -435,7 +435,7 @@ std::shared_ptr<RBTreeSpaceMap> RBTreeSpaceMap::create(UInt64 start, UInt64 end)
if (!rb_insert_entry(start, end, ptr->rb_tree, ptr->log))
{
LOG_FMT_ERROR(ptr->log, "Erorr happend, when mark all space free. [start={}] , [end={}]", start, end);
free(ptr->rb_tree);
free(ptr->rb_tree); // NOLINT
return nullptr;
}
return ptr;
Expand All @@ -451,7 +451,7 @@ static void rb_free_tree(struct rb_root * root)
next = rb_tree_next(node);
entry = node_to_entry(node);
rb_node_remove(node, root);
free(entry);
free(entry); // NOLINT
}
}

Expand All @@ -460,7 +460,7 @@ void RBTreeSpaceMap::freeSmap()
if (rb_tree)
{
rb_free_tree(&rb_tree->root);
free(rb_tree);
free(rb_tree); // NOLINT
}
}

Expand Down Expand Up @@ -734,7 +734,7 @@ std::pair<UInt64, UInt64> RBTreeSpaceMap::getSizes() const
}
}

UInt64 RBTreeSpaceMap::getRightMargin()
UInt64 RBTreeSpaceMap::getUsedBoundary()
{
struct rb_node * node = rb_tree_last(&rb_tree->root);
if (node == nullptr)
Expand All @@ -743,6 +743,20 @@ UInt64 RBTreeSpaceMap::getRightMargin()
}

auto * entry = node_to_entry(node);

// If the `offset+size` of the last free node is not equal to `end`, it means the range `[last_node.offset, end)` is marked as used,
// then we should return `end` as the used boundary.
//
// eg.
// 1. The spacemap manage a space of `[0, 100]`
// 2. A span {offset=90, size=10} is marked as used, then the free range in SpaceMap is `[0, 90)`
// 3. The return value should be 100
if (entry->start + entry->count != end)
{
return end;
}

// Else we should return the offset of last free node
return entry->start;
}

Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class RBTreeSpaceMap

std::pair<UInt64, UInt64> getSizes() const override;

UInt64 getRightMargin() override;
UInt64 getUsedBoundary() override;

protected:
RBTreeSpaceMap(UInt64 start, UInt64 end)
Expand Down
22 changes: 19 additions & 3 deletions dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,29 @@ class STDMapSpaceMap
}
}

UInt64 getRightMargin() override
UInt64 getUsedBoundary() override
{
if (free_map.empty())
{
return end - start;
return end;
}
return free_map.rbegin()->first;

const auto & last_node_it = free_map.rbegin();

// If the `offset+size` of the last free node is not equal to `end`, it means the range `[last_node.offset, end)` is marked as used,
// then we should return `end` as the used boundary.
//
// eg.
// 1. The spacemap manage a space of `[0, 100]`
// 2. A span {offset=90, size=10} is marked as used, then the free range in SpaceMap is `[0, 90)`
// 3. The return value should be 100
if (last_node_it->first + last_node_it->second != end)
{
return end;
}

// Else we should return the offset of last free node
return last_node_it->first;
}

bool isMarkUnused(UInt64 offset, size_t length) override
Expand Down
37 changes: 37 additions & 0 deletions dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,43 @@ TEST_P(SpaceMapTest, TestGetMaxCap)
}
}


TEST_P(SpaceMapTest, TestGetUsedBoundary)
{
{
auto smap = SpaceMap::createSpaceMap(test_type, 0, 100);
ASSERT_TRUE(smap->markUsed(50, 10));
ASSERT_EQ(smap->getUsedBoundary(), 60);
ASSERT_TRUE(smap->markUsed(80, 10));
ASSERT_EQ(smap->getUsedBoundary(), 90);

ASSERT_TRUE(smap->markUsed(90, 10));
ASSERT_EQ(smap->getUsedBoundary(), 100);
}

{
auto smap = SpaceMap::createSpaceMap(test_type, 0, 100);
ASSERT_TRUE(smap->markUsed(90, 10));
ASSERT_EQ(smap->getUsedBoundary(), 100);

ASSERT_TRUE(smap->markUsed(20, 10));
ASSERT_EQ(smap->getUsedBoundary(), 100);

ASSERT_TRUE(smap->markFree(90, 10));
ASSERT_EQ(smap->getUsedBoundary(), 30);

ASSERT_TRUE(smap->markUsed(90, 10));
ASSERT_EQ(smap->getUsedBoundary(), 100);
}

{
auto smap = SpaceMap::createSpaceMap(test_type, 0, 100);
ASSERT_EQ(smap->getUsedBoundary(), 0);
ASSERT_TRUE(smap->markUsed(0, 100));
ASSERT_EQ(smap->getUsedBoundary(), 100);
}
}

INSTANTIATE_TEST_CASE_P(
Type,
SpaceMapTest,
Expand Down

0 comments on commit e7deb68

Please sign in to comment.