diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index 4a203199473..0c443011f81 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -835,8 +835,8 @@ struct BlobStoreGCInfo toTypeString("Read-Only Blob", 0), toTypeString("No GC Blob", 1), toTypeString("Full GC Blob", 2), - toTypeString("Truncated Blob", 3), - toTypeString("Big Blob", 4)); + toTypeString("Big Blob", 3), + toTypeTruncateString("Truncated Blob")); } void appendToReadOnlyBlob(const BlobFileId blob_id, double valid_rate) @@ -854,23 +854,24 @@ struct BlobStoreGCInfo blob_gc_info[2].emplace_back(std::make_pair(blob_id, valid_rate)); } - void appendToTruncatedBlob(const BlobFileId blob_id, double valid_rate) + void appendToBigBlob(const BlobFileId blob_id, double valid_rate) { blob_gc_info[3].emplace_back(std::make_pair(blob_id, valid_rate)); } - void appendToBigBlob(const BlobFileId blob_id, double valid_rate) + void appendToTruncatedBlob(const BlobFileId blob_id, UInt64 origin_size, UInt64 truncated_size, double valid_rate) { - blob_gc_info[4].emplace_back(std::make_pair(blob_id, valid_rate)); + blob_gc_truncate_info.emplace_back(std::make_tuple(blob_id, origin_size, truncated_size, valid_rate)); } private: // 1. read only blob // 2. no need gc blob // 3. full gc blob - // 4. need truncate blob - // 5. big blob - std::vector> blob_gc_info[5]; + // 4. big blob + std::vector> blob_gc_info[4]; + + std::vector> blob_gc_truncate_info; String toTypeString(const std::string_view prefix, const size_t index) const { @@ -895,6 +896,32 @@ struct BlobStoreGCInfo return fmt_buf.toString(); } + + String toTypeTruncateString(const std::string_view prefix) const + { + FmtBuffer fmt_buf; + if (blob_gc_truncate_info.empty()) + { + fmt_buf.fmtAppend("{}: [null]", prefix); + } + else + { + fmt_buf.fmtAppend("{}: [", prefix); + fmt_buf.joinStr( + blob_gc_truncate_info.begin(), + blob_gc_truncate_info.end(), + [](const auto arg, FmtBuffer & fb) { + fb.fmtAppend("{} origin: {} truncate: {} rate: {:.2f}", // + std::get<0>(arg), // blob id + std::get<1>(arg), // origin size + std::get<2>(arg), // truncated size + std::get<3>(arg)); // valid rate + }, + ", "); + fmt_buf.append("]"); + } + return fmt_buf.toString(); + } }; std::vector BlobStore::getGCStats() @@ -924,7 +951,7 @@ std::vector BlobStore::getGCStats() } auto lock = stat->lock(); - auto right_margin = stat->smap->getRightMargin(); + auto right_margin = stat->smap->getUsedBoundary(); // Avoid divide by zero if (right_margin == 0) @@ -937,14 +964,13 @@ std::vector BlobStore::getGCStats() stat->sm_valid_rate)); } - LOG_FMT_TRACE(log, "Current blob is empty [blob_id={}, total size(all invalid)={}] [valid_rate={}].", stat->id, stat->sm_total_size, stat->sm_valid_rate); - // If current blob empty, the size of in disk blob may not empty // So we need truncate current blob, and let it be reused. auto blobfile = getBlobFile(stat->id); - LOG_FMT_TRACE(log, "Truncate empty blob file [blob_id={}] to 0.", stat->id); + LOG_FMT_INFO(log, "Current blob file is empty, truncated to zero [blob_id={}] [total_size={}] [valid_rate={}]", stat->id, stat->sm_total_size, stat->sm_valid_rate); blobfile->truncate(right_margin); - blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_valid_rate); + blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_total_size, right_margin, stat->sm_valid_rate); + stat->sm_total_size = right_margin; continue; } @@ -985,9 +1011,10 @@ std::vector BlobStore::getGCStats() auto blobfile = getBlobFile(stat->id); LOG_FMT_TRACE(log, "Truncate blob file [blob_id={}] [origin size={}] [truncated size={}]", stat->id, stat->sm_total_size, right_margin); blobfile->truncate(right_margin); + blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_total_size, right_margin, stat->sm_valid_rate); + stat->sm_total_size = right_margin; stat->sm_valid_rate = stat->sm_valid_size * 1.0 / stat->sm_total_size; - blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_valid_rate); } } } diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index e9b754854b8..5eb275f5af5 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -1223,7 +1223,7 @@ bool PageDirectory::tryDumpSnapshot(const ReadLimiterPtr & read_limiter, const W // `being_ref_count` by the function `createSnapshot()`. assert(!files_snap.persisted_log_files.empty()); // should not be empty when `needSave` return true auto log_num = files_snap.persisted_log_files.rbegin()->log_num; - auto identifier = fmt::format("{}_dump_{}", wal->name(), log_num); + auto identifier = fmt::format("{}.dump_{}", wal->name(), log_num); auto snapshot_reader = wal->createReaderForFiles(identifier, files_snap.persisted_log_files, read_limiter); PageDirectoryFactory factory; // we just use the `collapsed_dir` to dump edit of the snapshot, should never call functions like `apply` that diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h index ae44b608de0..d230b2f3e35 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h @@ -95,9 +95,11 @@ class SpaceMap virtual std::tuple searchInsertOffset(size_t size) = 0; /** - * Get the offset of the last free block. `[margin_offset, +∞)` is not used at all. + * Get the used boundary of this SpaceMap. + * The return value (`used_boundary`) means that `[used_bounary + 1, +∞)` is safe to be truncated. + * If the `used_boundary` is equal to the `end` of this SpaceMap, it means that there is no space to be truncated. */ - virtual UInt64 getRightMargin() = 0; + virtual UInt64 getUsedBoundary() = 0; /** * Get the accurate max capacity of the space map. diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapBig.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapBig.h index 22128a09f30..81c2a5cb786 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapBig.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapBig.h @@ -74,7 +74,7 @@ class BigSpaceMap return std::make_pair(size_in_used, size_in_used); } - UInt64 getRightMargin() override + UInt64 getUsedBoundary() override { return end; } diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp index 54275574060..4bd53b93e07 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp @@ -84,7 +84,7 @@ static void rb_get_new_entry(struct SmapRbEntry ** entry, UInt64 start, UInt64 c { struct SmapRbEntry * new_entry; - new_entry = static_cast(calloc(1, sizeof(struct SmapRbEntry))); + new_entry = static_cast(calloc(1, sizeof(struct SmapRbEntry))); // NOLINT if (new_entry == nullptr) { return; @@ -115,7 +115,7 @@ inline static void rb_free_entry(struct RbPrivate * private_data, struct SmapRbE private_data->read_index_next = nullptr; } - free(entry); + free(entry); // NOLINT } @@ -419,7 +419,7 @@ std::shared_ptr RBTreeSpaceMap::create(UInt64 start, UInt64 end) { auto ptr = std::shared_ptr(new RBTreeSpaceMap(start, end)); - ptr->rb_tree = static_cast(calloc(1, sizeof(struct RbPrivate))); + ptr->rb_tree = static_cast(calloc(1, sizeof(struct RbPrivate))); // NOLINT if (ptr->rb_tree == nullptr) { return nullptr; @@ -435,7 +435,7 @@ std::shared_ptr RBTreeSpaceMap::create(UInt64 start, UInt64 end) if (!rb_insert_entry(start, end, ptr->rb_tree, ptr->log)) { LOG_FMT_ERROR(ptr->log, "Erorr happend, when mark all space free. [start={}] , [end={}]", start, end); - free(ptr->rb_tree); + free(ptr->rb_tree); // NOLINT return nullptr; } return ptr; @@ -451,7 +451,7 @@ static void rb_free_tree(struct rb_root * root) next = rb_tree_next(node); entry = node_to_entry(node); rb_node_remove(node, root); - free(entry); + free(entry); // NOLINT } } @@ -460,7 +460,7 @@ void RBTreeSpaceMap::freeSmap() if (rb_tree) { rb_free_tree(&rb_tree->root); - free(rb_tree); + free(rb_tree); // NOLINT } } @@ -734,7 +734,7 @@ std::pair RBTreeSpaceMap::getSizes() const } } -UInt64 RBTreeSpaceMap::getRightMargin() +UInt64 RBTreeSpaceMap::getUsedBoundary() { struct rb_node * node = rb_tree_last(&rb_tree->root); if (node == nullptr) @@ -743,6 +743,20 @@ UInt64 RBTreeSpaceMap::getRightMargin() } auto * entry = node_to_entry(node); + + // If the `offset+size` of the last free node is not equal to `end`, it means the range `[last_node.offset, end)` is marked as used, + // then we should return `end` as the used boundary. + // + // eg. + // 1. The spacemap manage a space of `[0, 100]` + // 2. A span {offset=90, size=10} is marked as used, then the free range in SpaceMap is `[0, 90)` + // 3. The return value should be 100 + if (entry->start + entry->count != end) + { + return end; + } + + // Else we should return the offset of last free node return entry->start; } diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h index 0393fda081b..04691007a47 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h @@ -46,7 +46,7 @@ class RBTreeSpaceMap std::pair getSizes() const override; - UInt64 getRightMargin() override; + UInt64 getUsedBoundary() override; protected: RBTreeSpaceMap(UInt64 start, UInt64 end) diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h index b6ff8797f0f..41ddd77d03a 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h @@ -111,13 +111,29 @@ class STDMapSpaceMap } } - UInt64 getRightMargin() override + UInt64 getUsedBoundary() override { if (free_map.empty()) { - return end - start; + return end; } - return free_map.rbegin()->first; + + const auto & last_node_it = free_map.rbegin(); + + // If the `offset+size` of the last free node is not equal to `end`, it means the range `[last_node.offset, end)` is marked as used, + // then we should return `end` as the used boundary. + // + // eg. + // 1. The spacemap manage a space of `[0, 100]` + // 2. A span {offset=90, size=10} is marked as used, then the free range in SpaceMap is `[0, 90)` + // 3. The return value should be 100 + if (last_node_it->first + last_node_it->second != end) + { + return end; + } + + // Else we should return the offset of last free node + return last_node_it->first; } bool isMarkUnused(UInt64 offset, size_t length) override diff --git a/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp b/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp index f7120f000b2..faec139920b 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp @@ -427,6 +427,43 @@ TEST_P(SpaceMapTest, TestGetMaxCap) } } + +TEST_P(SpaceMapTest, TestGetUsedBoundary) +{ + { + auto smap = SpaceMap::createSpaceMap(test_type, 0, 100); + ASSERT_TRUE(smap->markUsed(50, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 60); + ASSERT_TRUE(smap->markUsed(80, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 90); + + ASSERT_TRUE(smap->markUsed(90, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 100); + } + + { + auto smap = SpaceMap::createSpaceMap(test_type, 0, 100); + ASSERT_TRUE(smap->markUsed(90, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 100); + + ASSERT_TRUE(smap->markUsed(20, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 100); + + ASSERT_TRUE(smap->markFree(90, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 30); + + ASSERT_TRUE(smap->markUsed(90, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 100); + } + + { + auto smap = SpaceMap::createSpaceMap(test_type, 0, 100); + ASSERT_EQ(smap->getUsedBoundary(), 0); + ASSERT_TRUE(smap->markUsed(0, 100)); + ASSERT_EQ(smap->getUsedBoundary(), 100); + } +} + INSTANTIATE_TEST_CASE_P( Type, SpaceMapTest,