From 17a164054a2e35de2c1fb605535a84a0f6498ff5 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 24 May 2022 14:15:54 +0800 Subject: [PATCH 01/15] Create another directory instance for dumping log files snapshot --- dbms/src/Storages/Page/V3/PageDirectory.cpp | 15 ++++- dbms/src/Storages/Page/V3/PageDirectory.h | 2 +- .../Storages/Page/V3/PageDirectoryFactory.cpp | 9 ++- .../Storages/Page/V3/PageDirectoryFactory.h | 3 + dbms/src/Storages/Page/V3/PageStorageImpl.cpp | 2 +- dbms/src/Storages/Page/V3/WALStore.cpp | 18 ++++-- dbms/src/Storages/Page/V3/WALStore.h | 21 ++++++- .../Storages/Page/V3/tests/entries_helper.h | 13 ++-- .../Page/V3/tests/gtest_page_directory.cpp | 62 ++++++++++++++++++- 9 files changed, 121 insertions(+), 24 deletions(-) diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index 64a3fead674..db4995f538b 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1191,15 +1192,25 @@ PageDirectory::getEntriesByBlobIds(const std::vector & blob_ids) con return std::make_pair(std::move(blob_versioned_entries), total_page_size); } -bool PageDirectory::tryDumpSnapshot(const WriteLimiterPtr & write_limiter) +bool PageDirectory::tryDumpSnapshot(const ReadLimiterPtr & read_limiter, const WriteLimiterPtr & write_limiter) { bool done_any_io = false; // In order not to make read amplification too high, only apply compact logs when ... auto files_snap = wal->getFilesSnapshot(); if (files_snap.needSave(max_persisted_log_files)) { + // Read from `file_snap` to create an edit for snapshot. + assert(!files_snap.persisted_log_files.empty()); // should not be empty when `needSave` return true + auto log_num = files_snap.persisted_log_files.rbegin()->log_num; + auto identifier = fmt::format("{}_dump_{}", wal->name(), log_num); + auto snapshot_reader = wal->createReaderForFiles(identifier, files_snap.persisted_log_files, read_limiter); + PageDirectoryFactory factory; + PageDirectoryPtr collapsed_dir = factory.createFromReader( + identifier, + std::move(snapshot_reader), + /*wal=*/nullptr); // The records persisted in `files_snap` is older than or equal to all records in `edit` - auto edit = dumpSnapshotToEdit(); + auto edit = collapsed_dir->dumpSnapshotToEdit(); done_any_io = wal->saveSnapshot(std::move(files_snap), std::move(edit), write_limiter); } return done_any_io; diff --git a/dbms/src/Storages/Page/V3/PageDirectory.h b/dbms/src/Storages/Page/V3/PageDirectory.h index a3c6b079fee..39b5a05a40a 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.h +++ b/dbms/src/Storages/Page/V3/PageDirectory.h @@ -358,7 +358,7 @@ class PageDirectory void gcApply(PageEntriesEdit && migrated_edit, const WriteLimiterPtr & write_limiter = nullptr); - bool tryDumpSnapshot(const WriteLimiterPtr & write_limiter = nullptr); + bool tryDumpSnapshot(const ReadLimiterPtr & read_limiter = nullptr, const WriteLimiterPtr & write_limiter = nullptr); PageEntriesV3 gcInMemEntries(); diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp index 0592d1ddaa8..94ec218ece1 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp @@ -31,7 +31,12 @@ namespace PS::V3 PageDirectoryPtr PageDirectoryFactory::create(String storage_name, FileProviderPtr & file_provider, PSDiskDelegatorPtr & delegator, WALStore::Config config) { auto [wal, reader] = WALStore::create(storage_name, file_provider, delegator, config); - PageDirectoryPtr dir = std::make_unique(std::move(storage_name), std::move(wal), config.max_persisted_log_files); + return createFromReader(storage_name, reader, std::move(wal)); +} + +PageDirectoryPtr PageDirectoryFactory::createFromReader(String storage_name, WALStoreReaderPtr reader, WALStorePtr wal) +{ + PageDirectoryPtr dir = std::make_unique(storage_name, std::move(wal)); loadFromDisk(dir, std::move(reader)); // Reset the `sequence` to the maximum of persisted. @@ -40,7 +45,7 @@ PageDirectoryPtr PageDirectoryFactory::create(String storage_name, FileProviderP // After restoring from the disk, we need cleanup all invalid entries in memory, or it will // try to run GC again on some entries that are already marked as invalid in BlobStore. dir->gcInMemEntries(); - LOG_FMT_INFO(DB::Logger::get("PageDirectoryFactory"), "PageDirectory restored [max_page_id={}] [max_applied_ver={}]", dir->getMaxId(), dir->sequence); + LOG_FMT_INFO(DB::Logger::get("PageDirectoryFactory", storage_name), "PageDirectory restored [max_page_id={}] [max_applied_ver={}]", dir->getMaxId(), dir->sequence); if (blob_stats) { diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.h b/dbms/src/Storages/Page/V3/PageDirectoryFactory.h index e4b76bfba0d..a93f0dbac48 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.h +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.h @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB { @@ -47,6 +48,8 @@ class PageDirectoryFactory PageDirectoryPtr create(String storage_name, FileProviderPtr & file_provider, PSDiskDelegatorPtr & delegator, WALStore::Config config); + PageDirectoryPtr createFromReader(String storage_name, WALStoreReaderPtr reader, WALStorePtr wal); + // just for test PageDirectoryPtr createFromEdit(String storage_name, FileProviderPtr & file_provider, PSDiskDelegatorPtr & delegator, const PageEntriesEdit & edit); diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp index 58fe4b4dd4c..cfa07199637 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp @@ -289,7 +289,7 @@ bool PageStorageImpl::gcImpl(bool /*not_skip*/, const WriteLimiterPtr & write_li // 1. Do the MVCC gc, clean up expired snapshot. // And get the expired entries. - if (page_directory->tryDumpSnapshot(write_limiter)) + if (page_directory->tryDumpSnapshot(read_limiter, write_limiter)) { GET_METRIC(tiflash_storage_page_gc_count, type_v3_mvcc_dumped).Increment(); } diff --git a/dbms/src/Storages/Page/V3/WALStore.cpp b/dbms/src/Storages/Page/V3/WALStore.cpp index 1f1eaf3bc33..9566a64a0a5 100644 --- a/dbms/src/Storages/Page/V3/WALStore.cpp +++ b/dbms/src/Storages/Page/V3/WALStore.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -46,7 +47,7 @@ std::pair WALStore::create( auto reader = WALStoreReader::create(storage_name, provider, delegator, - static_cast(config.wal_recover_mode.get())); + config.getRecoverMode()); // Create a new LogFile for writing new logs auto last_log_num = reader->lastLogNum() + 1; // TODO reuse old file return { @@ -54,17 +55,23 @@ std::pair WALStore::create( reader}; } +WALStoreReaderPtr WALStore::createReaderForFiles(const String & identifier, const LogFilenameSet & log_filenames, const ReadLimiterPtr & read_limiter) +{ + return WALStoreReader::create(identifier, provider, log_filenames, config.getRecoverMode(), read_limiter); +} + WALStore::WALStore( - String storage_name, + String storage_name_, const PSDiskDelegatorPtr & delegator_, const FileProviderPtr & provider_, Format::LogNumberType last_log_num_, WALStore::Config config_) - : delegator(delegator_) + : storage_name(std::move(storage_name_)) + , delegator(delegator_) , provider(provider_) , last_log_num(last_log_num_) , wal_paths_index(0) - , logger(Logger::get("WALStore", std::move(storage_name))) + , logger(Logger::get("WALStore", storage_name)) , config(config_) { } @@ -186,7 +193,7 @@ bool WALStore::saveSnapshot(FilesSnapshot && files_snap, PageEntriesEdit && dire LOG_FMT_INFO(logger, "Saving directory snapshot"); - // Use {largest_log_num + 1, 1} to save the `edit` + // Use {largest_log_num, 1} to save the `edit` const auto log_num = files_snap.persisted_log_files.rbegin()->log_num; // Create a temporary file for saving directory snapshot auto [compact_log, log_filename] = createLogWriter({log_num, 1}, /*manual_flush*/ true); @@ -221,6 +228,7 @@ bool WALStore::saveSnapshot(FilesSnapshot && files_snap, PageEntriesEdit && dire { #ifndef ARCHIVE_COMPACTED_LOGS f.remove(); + // TODO: remove encryption path from provider #else const Poco::Path archive_path(delegator->defaultPath(), "archive"); Poco::File archive_dir(archive_path); diff --git a/dbms/src/Storages/Page/V3/WALStore.h b/dbms/src/Storages/Page/V3/WALStore.h index 039903a8608..1ab15007384 100644 --- a/dbms/src/Storages/Page/V3/WALStore.h +++ b/dbms/src/Storages/Page/V3/WALStore.h @@ -88,28 +88,41 @@ class WALStore SettingUInt64 roll_size = PAGE_META_ROLL_SIZE; SettingUInt64 wal_recover_mode = 0; SettingUInt64 max_persisted_log_files = MAX_PERSISTED_LOG_FILES; + + static WALRecoveryMode getRecoverMode() + { + // return static_cast(wal_recover_mode.get()); + // Now we only use this mode + return WALRecoveryMode::TolerateCorruptedTailRecords; + } }; constexpr static const char * wal_folder_prefix = "/wal"; static std::pair create( - String storage_name, + String storage_name_, FileProviderPtr & provider, PSDiskDelegatorPtr & delegator, WALStore::Config config); + WALStoreReaderPtr createReaderForFiles(const String & identifier, const LogFilenameSet & log_filenames, const ReadLimiterPtr & read_limiter); + void apply(PageEntriesEdit & edit, const PageVersion & version, const WriteLimiterPtr & write_limiter = nullptr); void apply(const PageEntriesEdit & edit, const WriteLimiterPtr & write_limiter = nullptr); struct FilesSnapshot { Format::LogNumberType current_writting_log_num; + // The log files to generate snapshot from. Sorted by . + // If the WAL log file is not inited, it is an empty set. LogFilenameSet persisted_log_files; + // Note that persisted_log_files should not be empty for needSave() == true, + // cause we get the largest log num from persisted_log_files as the new + // file name. bool needSave(const size_t & max_size) const { - // TODO: Make it configurable and check the reasonable of this number return persisted_log_files.size() > max_size; } }; @@ -121,6 +134,8 @@ class WALStore PageEntriesEdit && directory_snap, const WriteLimiterPtr & write_limiter = nullptr); + const String & name() { return storage_name; } + private: WALStore( String storage_name, @@ -134,6 +149,8 @@ class WALStore const std::pair & new_log_lvl, bool manual_flush); +private: + const String storage_name; PSDiskDelegatorPtr delegator; FileProviderPtr provider; mutable std::mutex log_file_mutex; diff --git a/dbms/src/Storages/Page/V3/tests/entries_helper.h b/dbms/src/Storages/Page/V3/tests/entries_helper.h index cce59919ec8..24be99a53b2 100644 --- a/dbms/src/Storages/Page/V3/tests/entries_helper.h +++ b/dbms/src/Storages/Page/V3/tests/entries_helper.h @@ -25,6 +25,7 @@ #include #include #include +#include namespace DB { @@ -221,7 +222,9 @@ inline ::testing::AssertionResult getEntryNotExist( String error; try { - auto id_entry = dir->get(page_id, snap); + auto id_entry = dir->getOrNull(page_id, snap); + if (!id_entry.second.isValid()) + return ::testing::AssertionSuccess(); error = fmt::format( "Expect entry [id={}] from {} with snap{} not exist, but got <{}.{}, {}>", page_id_expr, @@ -231,14 +234,6 @@ inline ::testing::AssertionResult getEntryNotExist( id_entry.first.low, toDebugString(id_entry.second)); } - catch (DB::Exception & ex) - { - if (ex.code() == ErrorCodes::PS_ENTRY_NOT_EXISTS || ex.code() == ErrorCodes::PS_ENTRY_NO_VALID_VERSION) - return ::testing::AssertionSuccess(); - else - error = ex.displayText(); - return ::testing::AssertionFailure(::testing::Message(error.c_str())); - } catch (...) { error = getCurrentExceptionMessage(true); diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index dfa33824473..e9be29286af 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -50,12 +50,17 @@ class PageDirectoryTest : public DB::base::TiFlashStorageTestBasic { auto path = getTemporaryPath(); dropDataOnDisk(path); + dir = restoreFromDisk(); + } + static PageDirectoryPtr restoreFromDisk() + { + auto path = getTemporaryPath(); auto ctx = DB::tests::TiFlashTestEnv::getContext(); FileProviderPtr provider = ctx.getFileProvider(); PSDiskDelegatorPtr delegator = std::make_shared(path); PageDirectoryFactory factory; - dir = factory.create("PageDirectoryTest", provider, delegator, WALStore::Config()); + return factory.create("PageDirectoryTest", provider, delegator, WALStore::Config()); } protected: @@ -1286,6 +1291,60 @@ class PageDirectoryGCTest : public PageDirectoryTest dir->apply(std::move(edit)); \ } +TEST_F(PageDirectoryGCTest, ManyEditsAndDumpSnapshot) +{ + PageId page_id0 = 50; + PageId page_id1 = 51; + PageId page_id2 = 52; + PageId page_id3 = 53; + + PageEntryV3 last_entry_for_0; + constexpr size_t num_edits_test = 50000; + for (size_t i = 0; i < num_edits_test; ++i) + { + { + INSERT_ENTRY(page_id0, i); + last_entry_for_0 = entry_vi; + } + { + INSERT_ENTRY(page_id1, i); + } + } + INSERT_DELETE(page_id1); + EXPECT_TRUE(dir->tryDumpSnapshot()); + dir.reset(); + + dir = restoreFromDisk(); + { + auto snap = dir->createSnapshot(); + ASSERT_SAME_ENTRY(dir->get(page_id0, snap).second, last_entry_for_0); + EXPECT_ENTRY_NOT_EXIST(dir, page_id1, snap); + } + + PageEntryV3 last_entry_for_2; + for (size_t i = 0; i < num_edits_test; ++i) + { + { + INSERT_ENTRY(page_id2, i); + last_entry_for_2 = entry_vi; + } + { + INSERT_ENTRY(page_id3, i); + } + } + INSERT_DELETE(page_id3); + EXPECT_TRUE(dir->tryDumpSnapshot()); + + dir = restoreFromDisk(); + { + auto snap = dir->createSnapshot(); + ASSERT_SAME_ENTRY(dir->get(page_id0, snap).second, last_entry_for_0); + EXPECT_ENTRY_NOT_EXIST(dir, page_id1, snap); + ASSERT_SAME_ENTRY(dir->get(page_id2, snap).second, last_entry_for_2); + EXPECT_ENTRY_NOT_EXIST(dir, page_id3, snap); + } +} + TEST_F(PageDirectoryGCTest, GCPushForward) try { @@ -1931,7 +1990,6 @@ try auto s0 = dir->createSnapshot(); auto edit = dir->dumpSnapshotToEdit(s0); - edit.size(); auto restore_from_edit = [](const PageEntriesEdit & edit) { auto deseri_edit = DB::PS::V3::ser::deserializeFrom(DB::PS::V3::ser::serializeTo(edit)); auto ctx = DB::tests::TiFlashTestEnv::getContext(); From 0dd68451a05633529b2f59c696755e33e07b9660 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 24 May 2022 14:42:56 +0800 Subject: [PATCH 02/15] Remove encryption info for WAL log file --- dbms/src/Encryption/MockKeyManager.cpp | 6 ++++++ dbms/src/Encryption/MockKeyManager.h | 11 ++++++++--- dbms/src/Storages/Page/V3/WALStore.cpp | 6 ++++-- dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp | 10 +++++----- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/dbms/src/Encryption/MockKeyManager.cpp b/dbms/src/Encryption/MockKeyManager.cpp index d125961fd06..bbaeb37848a 100644 --- a/dbms/src/Encryption/MockKeyManager.cpp +++ b/dbms/src/Encryption/MockKeyManager.cpp @@ -13,8 +13,10 @@ // limitations under the License. #include +#include #include #include +#include #include #include @@ -40,12 +42,14 @@ MockKeyManager::MockKeyManager(EncryptionMethod method_, const String & key_, co , key{key_} , iv{iv} , encryption_enabled{encryption_enabled_} + , logger(DB::Logger::get("MockKeyManager")) {} FileEncryptionInfo MockKeyManager::newFile(const String & fname) { if (encryption_enabled) { + LOG_FMT_TRACE(logger, "Create mock encryption [file={}]", fname); files.emplace_back(fname); } return getFile(fname); @@ -64,6 +68,7 @@ void MockKeyManager::deleteFile(const String & fname, bool throw_on_error) { if (*iter == fname) { + LOG_FMT_TRACE(logger, "Delete mock encryption [file={}]", fname); files.erase(iter); break; } @@ -80,6 +85,7 @@ void MockKeyManager::linkFile(const String & src_fname, const String & dst_fname { throw DB::Exception(fmt::format("Can't find file which name is {}", src_fname), DB::ErrorCodes::LOGICAL_ERROR); } + LOG_FMT_TRACE(logger, "Link mock encryption file [src_file={}] [dst_file={}]", src_fname, dst_fname); files.emplace_back(dst_fname); } } diff --git a/dbms/src/Encryption/MockKeyManager.h b/dbms/src/Encryption/MockKeyManager.h index 914e6ab1fe4..268bb00d129 100644 --- a/dbms/src/Encryption/MockKeyManager.h +++ b/dbms/src/Encryption/MockKeyManager.h @@ -20,12 +20,15 @@ namespace DB { -class MockKeyManager : public KeyManager +class Logger; +using LoggerPtr = std::shared_ptr; + +class MockKeyManager final : public KeyManager { public: - ~MockKeyManager() = default; + ~MockKeyManager() override = default; - MockKeyManager(bool encryption_enabled_ = true); + explicit MockKeyManager(bool encryption_enabled_ = true); MockKeyManager(EncryptionMethod method_, const String & key_, const String & iv, bool encryption_enabled_ = true); @@ -50,5 +53,7 @@ class MockKeyManager : public KeyManager String key; String iv; bool encryption_enabled; + + LoggerPtr logger; }; } // namespace DB diff --git a/dbms/src/Storages/Page/V3/WALStore.cpp b/dbms/src/Storages/Page/V3/WALStore.cpp index 9566a64a0a5..9d56938b091 100644 --- a/dbms/src/Storages/Page/V3/WALStore.cpp +++ b/dbms/src/Storages/Page/V3/WALStore.cpp @@ -224,11 +224,13 @@ bool WALStore::saveSnapshot(FilesSnapshot && files_snap, PageEntriesEdit && dire // Remove compacted log files. for (const auto & filename : files_snap.persisted_log_files) { - if (auto f = Poco::File(filename.fullname(LogFileStage::Normal)); f.exists()) + const auto log_fullname = filename.fullname(LogFileStage::Normal); + if (auto f = Poco::File(log_fullname); f.exists()) { #ifndef ARCHIVE_COMPACTED_LOGS f.remove(); - // TODO: remove encryption path from provider + // remove encryption path from provider + provider->deleteEncryptionInfo(EncryptionPath(log_fullname, "")); #else const Poco::Path archive_path(delegator->defaultPath(), "archive"); Poco::File archive_dir(archive_path); diff --git a/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp index 89c4e54f7e7..6d47adabbc5 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp @@ -596,11 +596,12 @@ TEST_P(WALStoreTest, ManyEdits) try { auto ctx = DB::tests::TiFlashTestEnv::getContext(); - auto provider = ctx.getFileProvider(); + auto enc_key_manager = std::make_shared(/*encryption_enabled_=*/true); + auto enc_provider = std::make_shared(enc_key_manager, true); auto path = getTemporaryPath(); // Stage 1. empty - auto [wal, reader] = WALStore::create(getCurrentTestName(), provider, delegator, config); + auto [wal, reader] = WALStore::create(getCurrentTestName(), enc_provider, delegator, config); ASSERT_NE(wal, nullptr); std::mt19937 rd; @@ -633,7 +634,7 @@ try size_t num_edits_read = 0; size_t num_pages_read = 0; - std::tie(wal, reader) = WALStore::create(getCurrentTestName(), provider, delegator, config); + std::tie(wal, reader) = WALStore::create(getCurrentTestName(), enc_provider, delegator, config); while (reader->remained()) { auto [ok, edit] = reader->next(); @@ -653,8 +654,7 @@ try LOG_FMT_INFO(&Poco::Logger::get("WALStoreTest"), "Done test for {} persist pages in {} edits", num_pages_read, num_edits_test); // Test for save snapshot (with encryption) - auto enc_key_manager = std::make_shared(/*encryption_enabled_=*/true); - auto enc_provider = std::make_shared(enc_key_manager, true); + LogFilenameSet persisted_log_files = WALStoreReader::listAllFiles(delegator, log); WALStore::FilesSnapshot file_snap{.current_writting_log_num = 100, // just a fake value .persisted_log_files = persisted_log_files}; From 09c067c3f716524fe340f30b84bf12023448e7b8 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 24 May 2022 14:43:21 +0800 Subject: [PATCH 03/15] cleanup useless code --- dbms/src/Storages/Page/V3/WALStore.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/dbms/src/Storages/Page/V3/WALStore.cpp b/dbms/src/Storages/Page/V3/WALStore.cpp index 9d56938b091..da8b800bc5b 100644 --- a/dbms/src/Storages/Page/V3/WALStore.cpp +++ b/dbms/src/Storages/Page/V3/WALStore.cpp @@ -219,27 +219,15 @@ bool WALStore::saveSnapshot(FilesSnapshot && files_snap, PageEntriesEdit && dire true); LOG_FMT_INFO(logger, "Rename log file to normal done [fullname={}]", normal_fullname); - // #define ARCHIVE_COMPACTED_LOGS // keep for debug - // Remove compacted log files. for (const auto & filename : files_snap.persisted_log_files) { const auto log_fullname = filename.fullname(LogFileStage::Normal); if (auto f = Poco::File(log_fullname); f.exists()) { -#ifndef ARCHIVE_COMPACTED_LOGS f.remove(); // remove encryption path from provider provider->deleteEncryptionInfo(EncryptionPath(log_fullname, "")); -#else - const Poco::Path archive_path(delegator->defaultPath(), "archive"); - Poco::File archive_dir(archive_path); - if (!archive_dir.exists()) - archive_dir.createDirectory(); - auto dest = archive_path.toString() + "/" + filename.filename(LogFileStage::Normal); - f.moveTo(dest); - LOG_FMT_INFO(logger, "archive {} to {}", filename.fullname(LogFileStage::Normal), dest); -#endif } } From 796e861e03be89ede4aeb81314de327dd8718f86 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 24 May 2022 14:55:52 +0800 Subject: [PATCH 04/15] Check the recover mode --- dbms/src/Storages/Page/PageStorage.h | 2 +- dbms/src/Storages/Page/V3/PageStorageImpl.h | 4 ++-- dbms/src/Storages/Page/V3/WALStore.h | 17 ++++++++++++++++- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h index 479c368a585..fed04f6d2d9 100644 --- a/dbms/src/Storages/Page/PageStorage.h +++ b/dbms/src/Storages/Page/PageStorage.h @@ -140,7 +140,7 @@ class PageStorage : private boost::noncopyable SettingUInt64 blob_block_alignment_bytes = 0; SettingUInt64 wal_roll_size = PAGE_META_ROLL_SIZE; - SettingUInt64 wal_recover_mode = 0; + SettingUInt64 wal_recover_mode = 0x00; SettingUInt64 wal_max_persisted_log_files = MAX_PERSISTED_LOG_FILES; void reload(const Config & rhs) diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.h b/dbms/src/Storages/Page/V3/PageStorageImpl.h index 082adb8df34..50d160e81da 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.h +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.h @@ -34,7 +34,7 @@ class PageStorageImpl : public DB::PageStorage const Config & config_, const FileProviderPtr & file_provider_); - ~PageStorageImpl(); + ~PageStorageImpl() override; static BlobStore::Config parseBlobConfig(const Config & config) { @@ -54,8 +54,8 @@ class PageStorageImpl : public DB::PageStorage WALStore::Config wal_config; wal_config.roll_size = config.wal_roll_size; - wal_config.wal_recover_mode = config.wal_recover_mode; wal_config.max_persisted_log_files = config.wal_max_persisted_log_files; + wal_config.setRecoverMode(config.wal_recover_mode); return wal_config; } diff --git a/dbms/src/Storages/Page/V3/WALStore.h b/dbms/src/Storages/Page/V3/WALStore.h index 1ab15007384..8e96ec6ea98 100644 --- a/dbms/src/Storages/Page/V3/WALStore.h +++ b/dbms/src/Storages/Page/V3/WALStore.h @@ -86,9 +86,24 @@ class WALStore struct Config { SettingUInt64 roll_size = PAGE_META_ROLL_SIZE; - SettingUInt64 wal_recover_mode = 0; SettingUInt64 max_persisted_log_files = MAX_PERSISTED_LOG_FILES; + private: + SettingUInt64 wal_recover_mode = 0; + + public: + void setRecoverMode(UInt64 recover_mode) + { + if (recover_mode != static_cast(WALRecoveryMode::TolerateCorruptedTailRecords) + && recover_mode != static_cast(WALRecoveryMode::AbsoluteConsistency) + && recover_mode != static_cast(WALRecoveryMode::PointInTimeRecovery) + && recover_mode != static_cast(WALRecoveryMode::SkipAnyCorruptedRecords)) + { + throw Exception("Unknow recover mode [num={}]", recover_mode); + } + wal_recover_mode = recover_mode; + } + static WALRecoveryMode getRecoverMode() { // return static_cast(wal_recover_mode.get()); From 1e3bea55569535a9c0edaf940c107ca0b7a41c8e Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 24 May 2022 15:13:14 +0800 Subject: [PATCH 05/15] Update comment --- dbms/src/Storages/Page/PageStorage.h | 2 +- dbms/src/Storages/Page/V3/PageDirectory.cpp | 5 ++++- dbms/src/Storages/Page/V3/WALStore.h | 14 ++++++-------- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h index fed04f6d2d9..e52c6ce6701 100644 --- a/dbms/src/Storages/Page/PageStorage.h +++ b/dbms/src/Storages/Page/PageStorage.h @@ -140,7 +140,7 @@ class PageStorage : private boost::noncopyable SettingUInt64 blob_block_alignment_bytes = 0; SettingUInt64 wal_roll_size = PAGE_META_ROLL_SIZE; - SettingUInt64 wal_recover_mode = 0x00; + SettingUInt64 wal_recover_mode = 0x00; // WALRecoveryMode::TolerateCorruptedTailRecords SettingUInt64 wal_max_persisted_log_files = MAX_PERSISTED_LOG_FILES; void reload(const Config & rhs) diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index db4995f538b..f4afc3b8740 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -1199,7 +1199,10 @@ bool PageDirectory::tryDumpSnapshot(const ReadLimiterPtr & read_limiter, const W auto files_snap = wal->getFilesSnapshot(); if (files_snap.needSave(max_persisted_log_files)) { - // Read from `file_snap` to create an edit for snapshot. + // To prevent writes from affecting dumping snapshot (and vice versa), old log files + // are read from disk and a temporary PageDirectory is generated for dumping snapshot. + // The main reason write affect dumping snapshot is that we can not get a read-only + // `being_ref_count` by the function `createSnapshot()`. assert(!files_snap.persisted_log_files.empty()); // should not be empty when `needSave` return true auto log_num = files_snap.persisted_log_files.rbegin()->log_num; auto identifier = fmt::format("{}_dump_{}", wal->name(), log_num); diff --git a/dbms/src/Storages/Page/V3/WALStore.h b/dbms/src/Storages/Page/V3/WALStore.h index 8e96ec6ea98..833f1182029 100644 --- a/dbms/src/Storages/Page/V3/WALStore.h +++ b/dbms/src/Storages/Page/V3/WALStore.h @@ -94,21 +94,19 @@ class WALStore public: void setRecoverMode(UInt64 recover_mode) { - if (recover_mode != static_cast(WALRecoveryMode::TolerateCorruptedTailRecords) - && recover_mode != static_cast(WALRecoveryMode::AbsoluteConsistency) - && recover_mode != static_cast(WALRecoveryMode::PointInTimeRecovery) - && recover_mode != static_cast(WALRecoveryMode::SkipAnyCorruptedRecords)) + if (unlikely(recover_mode != static_cast(WALRecoveryMode::TolerateCorruptedTailRecords) + && recover_mode != static_cast(WALRecoveryMode::AbsoluteConsistency) + && recover_mode != static_cast(WALRecoveryMode::PointInTimeRecovery) + && recover_mode != static_cast(WALRecoveryMode::SkipAnyCorruptedRecords))) { throw Exception("Unknow recover mode [num={}]", recover_mode); } wal_recover_mode = recover_mode; } - static WALRecoveryMode getRecoverMode() + WALRecoveryMode getRecoverMode() { - // return static_cast(wal_recover_mode.get()); - // Now we only use this mode - return WALRecoveryMode::TolerateCorruptedTailRecords; + return static_cast(wal_recover_mode.get()); } }; From 74ccd9a76b3bfa80016d812bf9083ba6de365e2f Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 24 May 2022 15:14:59 +0800 Subject: [PATCH 06/15] Use provider->deleteRegularFile --- dbms/src/Storages/Page/V3/WALStore.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/dbms/src/Storages/Page/V3/WALStore.cpp b/dbms/src/Storages/Page/V3/WALStore.cpp index da8b800bc5b..c7f11ee8b3c 100644 --- a/dbms/src/Storages/Page/V3/WALStore.cpp +++ b/dbms/src/Storages/Page/V3/WALStore.cpp @@ -223,12 +223,7 @@ bool WALStore::saveSnapshot(FilesSnapshot && files_snap, PageEntriesEdit && dire for (const auto & filename : files_snap.persisted_log_files) { const auto log_fullname = filename.fullname(LogFileStage::Normal); - if (auto f = Poco::File(log_fullname); f.exists()) - { - f.remove(); - // remove encryption path from provider - provider->deleteEncryptionInfo(EncryptionPath(log_fullname, "")); - } + provider->deleteRegularFile(log_fullname, EncryptionPath(log_fullname, "")); } FmtBuffer fmt_buf; From 8e51c2b5bdf9c2cf45eb7bbe91f063ca6eed2305 Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Mon, 23 May 2022 18:38:02 +0800 Subject: [PATCH 07/15] PageDirectory: need do gc twice in restore. --- .../Storages/Page/V3/PageDirectoryFactory.cpp | 16 ++++++++++ .../Page/V3/tests/gtest_page_directory.cpp | 29 +++++++++++++++++++ .../Page/V3/tests/gtest_page_storage.cpp | 29 +++++++++++++++++++ 3 files changed, 74 insertions(+) diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp index 483c5073ab5..9ae4289181d 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp @@ -47,6 +47,22 @@ PageDirectoryPtr PageDirectoryFactory::createFromReader(String storage_name, WAL dir->gcInMemEntries(); LOG_FMT_INFO(DB::Logger::get("PageDirectoryFactory", storage_name), "PageDirectory restored [max_page_id={}] [max_applied_ver={}]", dir->getMaxId(), dir->sequence); + // MVCC gc must do twice when restore + // Because the first time call MVCC GC may not clean the deleted entries if being reference. + // But it will clean up the deleted reference. + // ex. + // P 1 + // R 2 -> 1 + // D 1 + // D 2 + // First time call gcInMemEntries(): + // - P 1 entry 0 being_ref_count is not zero, won't be cleaned. + // - R 2 entry 0 being_ref_count is zero, deref the P 1, make it being_ref_count is 0. Also R2 will be clean up. + // + // Second time call gcInMemEntries(): + // - P 1 being_ref_count is 0, it will be clean. + dir->gcInMemEntries(); + if (blob_stats) { // After all entries restored to `mvcc_table_directory`, only apply diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index 4d65de74648..faae7847111 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -2272,6 +2272,35 @@ try } CATCH +TEST_F(PageDirectoryGCTest, testGCTwice) +try +{ + PageEntryV3 entry_50_1{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_50_2{.file_id = 2, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + + auto restore_from_edit = [](const PageEntriesEdit & edit) { + auto ctx = ::DB::tests::TiFlashTestEnv::getContext(); + auto provider = ctx.getFileProvider(); + auto path = getTemporaryPath(); + PSDiskDelegatorPtr delegator = std::make_shared(path); + PageDirectoryFactory factory; + auto d = factory.createFromEdit(getCurrentTestName(), provider, delegator, edit); + return d; + }; + + { + PageEntriesEdit edit; + edit.put(50, entry_50_1); + edit.put(50, entry_50_2); + edit.ref(51, 50); + edit.del(50); + edit.del(51); + auto page_ids = restore_from_edit(edit)->getAllPageIds(); + ASSERT_EQ(page_ids.size(), 0); + } +} +CATCH + #undef INSERT_ENTRY_TO #undef INSERT_ENTRY #undef INSERT_ENTRY_ACQ_SNAP diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp index 498fd4124e5..5095b6a7a56 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp @@ -1380,5 +1380,34 @@ try } CATCH +TEST_F(PageStorageTest, testGCTwice) +try +{ + // Make it in log_1_0 + { + WriteBatch batch; + batch.putExternal(1, 0); + page_storage->write(std::move(batch)); + } + + page_storage = reopenWithConfig(config); + + // Make it in log_2_0 + { + WriteBatch batch; + batch.putExternal(1, 0); + batch.putRefPage(2, 1); + batch.delPage(1); + batch.delPage(2); + page_storage->write(std::move(batch)); + } + page_storage = reopenWithConfig(config); + + auto alive_ids = page_storage->getAliveExternalPageIds(TEST_NAMESPACE_ID); + ASSERT_EQ(alive_ids.size(), 0); +} +CATCH + + } // namespace PS::V3::tests } // namespace DB From 40f83ada754614a557e91a1b6a58b44aa06f17b9 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 24 May 2022 16:07:22 +0800 Subject: [PATCH 08/15] Remove useless code Signed-off-by: JaySon-Huang --- .../Storages/Page/V3/PageDirectoryFactory.cpp | 16 ---------------- .../Page/V3/tests/gtest_page_directory.cpp | 5 +++-- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp index 9ae4289181d..483c5073ab5 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp @@ -47,22 +47,6 @@ PageDirectoryPtr PageDirectoryFactory::createFromReader(String storage_name, WAL dir->gcInMemEntries(); LOG_FMT_INFO(DB::Logger::get("PageDirectoryFactory", storage_name), "PageDirectory restored [max_page_id={}] [max_applied_ver={}]", dir->getMaxId(), dir->sequence); - // MVCC gc must do twice when restore - // Because the first time call MVCC GC may not clean the deleted entries if being reference. - // But it will clean up the deleted reference. - // ex. - // P 1 - // R 2 -> 1 - // D 1 - // D 2 - // First time call gcInMemEntries(): - // - P 1 entry 0 being_ref_count is not zero, won't be cleaned. - // - R 2 entry 0 being_ref_count is zero, deref the P 1, make it being_ref_count is 0. Also R2 will be clean up. - // - // Second time call gcInMemEntries(): - // - P 1 being_ref_count is 0, it will be clean. - dir->gcInMemEntries(); - if (blob_stats) { // After all entries restored to `mvcc_table_directory`, only apply diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index faae7847111..a83d9630ae2 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -2272,7 +2272,7 @@ try } CATCH -TEST_F(PageDirectoryGCTest, testGCTwice) +TEST_F(PageDirectoryGCTest, cleanAfterDecreaseRef) try { PageEntryV3 entry_50_1{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; @@ -2295,7 +2295,8 @@ try edit.ref(51, 50); edit.del(50); edit.del(51); - auto page_ids = restore_from_edit(edit)->getAllPageIds(); + auto restored_dir = restore_from_edit(edit); + auto page_ids = restored_dir->getAllPageIds(); ASSERT_EQ(page_ids.size(), 0); } } From 3ec076a45784d634b029671cad3244a5f710780d Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 24 May 2022 16:22:57 +0800 Subject: [PATCH 09/15] Address comment --- dbms/src/Storages/Page/V3/PageDirectory.cpp | 6 ++++-- dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp | 2 +- dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index f4afc3b8740..d944fe422d3 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -1208,13 +1208,15 @@ bool PageDirectory::tryDumpSnapshot(const ReadLimiterPtr & read_limiter, const W auto identifier = fmt::format("{}_dump_{}", wal->name(), log_num); auto snapshot_reader = wal->createReaderForFiles(identifier, files_snap.persisted_log_files, read_limiter); PageDirectoryFactory factory; + // we just use the `collapsed_dir` to dump edit of the snapshot, should never call functions like `apply` that + // persist new logs into disk. So we pass `nullptr` as `wal` to the factory. PageDirectoryPtr collapsed_dir = factory.createFromReader( identifier, std::move(snapshot_reader), /*wal=*/nullptr); // The records persisted in `files_snap` is older than or equal to all records in `edit` - auto edit = collapsed_dir->dumpSnapshotToEdit(); - done_any_io = wal->saveSnapshot(std::move(files_snap), std::move(edit), write_limiter); + auto edit_from_disk = collapsed_dir->dumpSnapshotToEdit(); + done_any_io = wal->saveSnapshot(std::move(files_snap), std::move(edit_from_disk), write_limiter); } return done_any_io; } diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index a83d9630ae2..6e2b0efa1ea 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -2272,7 +2272,7 @@ try } CATCH -TEST_F(PageDirectoryGCTest, cleanAfterDecreaseRef) +TEST_F(PageDirectoryGCTest, CleanAfterDecreaseRef) try { PageEntryV3 entry_50_1{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp index 5095b6a7a56..ce2ba0adaf4 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp @@ -1380,7 +1380,7 @@ try } CATCH -TEST_F(PageStorageTest, testGCTwice) +TEST_F(PageStorageTest, CleanAfterDecreaseRef) try { // Make it in log_1_0 From 114054ba04c305d2cde80ab4d96ee3f05fe6f4cf Mon Sep 17 00:00:00 2001 From: lidezhu Date: Tue, 24 May 2022 16:42:29 +0800 Subject: [PATCH 10/15] change the relative order of deleting file and deleting encryption info --- dbms/src/Encryption/FileProvider.cpp | 2 +- dbms/src/Encryption/FileProvider.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Encryption/FileProvider.cpp b/dbms/src/Encryption/FileProvider.cpp index b4666cf6a71..f44ae7f410d 100644 --- a/dbms/src/Encryption/FileProvider.cpp +++ b/dbms/src/Encryption/FileProvider.cpp @@ -142,8 +142,8 @@ void FileProvider::deleteRegularFile(const String & file_path_, const Encryption { throw DB::TiFlashException("File: " + data_file.path() + " is not a regular file", Errors::Encryption::Internal); } - key_manager->deleteFile(encryption_path_.full_path, true); data_file.remove(false); + key_manager->deleteFile(encryption_path_.full_path, true); } } diff --git a/dbms/src/Encryption/FileProvider.h b/dbms/src/Encryption/FileProvider.h index 79eec8f632b..1d23af7a2db 100644 --- a/dbms/src/Encryption/FileProvider.h +++ b/dbms/src/Encryption/FileProvider.h @@ -67,6 +67,7 @@ class FileProvider // If dir_path_as_encryption_path is true, use dir_path_ as EncryptionPath // If false, use every file's path inside dir_path_ as EncryptionPath + // Note this method is not atomic, and after calling it, the files in dir_path_ cannot be read again. void deleteDirectory( const String & dir_path_, bool dir_path_as_encryption_path = false, From e3980a6b4cd298c8868c54e9b0a87d3cbe4f70be Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 24 May 2022 16:45:11 +0800 Subject: [PATCH 11/15] Fix FileProvider::deleteRegularFile --- dbms/src/Encryption/FileProvider.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/Encryption/FileProvider.cpp b/dbms/src/Encryption/FileProvider.cpp index f44ae7f410d..f2f96fa8568 100644 --- a/dbms/src/Encryption/FileProvider.cpp +++ b/dbms/src/Encryption/FileProvider.cpp @@ -142,6 +142,10 @@ void FileProvider::deleteRegularFile(const String & file_path_, const Encryption { throw DB::TiFlashException("File: " + data_file.path() + " is not a regular file", Errors::Encryption::Internal); } + // Remove the file on disk before removing the encryption key. Or we may leave an encrypted file without the encryption key + // and the encrypted file can not be read. + // In the worst case that TiFlash crash between removing the file on disk and removing the encryption key, we may leave + // the encryption key not deleted. However, this is a rare case and won't cause serious problem. data_file.remove(false); key_manager->deleteFile(encryption_path_.full_path, true); } From ff79bcf030c18d07d769b7bf4436fc71533c7225 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 24 May 2022 16:52:04 +0800 Subject: [PATCH 12/15] fix --- dbms/src/Storages/Page/V3/WALStore.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/Page/V3/WALStore.h b/dbms/src/Storages/Page/V3/WALStore.h index 833f1182029..292cb7a6e5d 100644 --- a/dbms/src/Storages/Page/V3/WALStore.h +++ b/dbms/src/Storages/Page/V3/WALStore.h @@ -134,7 +134,7 @@ class WALStore // Note that persisted_log_files should not be empty for needSave() == true, // cause we get the largest log num from persisted_log_files as the new // file name. - bool needSave(const size_t & max_size) const + bool needSave(const size_t max_size) const { return persisted_log_files.size() > max_size; } From 158403c56d15550a6255711b00c300f354efc821 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 24 May 2022 17:18:29 +0800 Subject: [PATCH 13/15] pick the fix on GlobalStoragePool::gc --- dbms/src/Storages/DeltaMerge/StoragePool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.cpp b/dbms/src/Storages/DeltaMerge/StoragePool.cpp index a040c5b6c6a..fa765cd9b1d 100644 --- a/dbms/src/Storages/DeltaMerge/StoragePool.cpp +++ b/dbms/src/Storages/DeltaMerge/StoragePool.cpp @@ -130,7 +130,7 @@ void GlobalStoragePool::restore() bool GlobalStoragePool::gc() { - return gc(Settings(), true, DELTA_MERGE_GC_PERIOD); + return gc(global_context.getSettingsRef(), true, DELTA_MERGE_GC_PERIOD); } bool GlobalStoragePool::gc(const Settings & settings, bool immediately, const Seconds & try_gc_period) From 6bc27c0b7b3e203e5df1235b400fe9a90778813b Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 24 May 2022 18:21:14 +0800 Subject: [PATCH 14/15] fix ut regression --- dbms/src/Storages/Page/V3/tests/entries_helper.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dbms/src/Storages/Page/V3/tests/entries_helper.h b/dbms/src/Storages/Page/V3/tests/entries_helper.h index 24be99a53b2..19e42755dae 100644 --- a/dbms/src/Storages/Page/V3/tests/entries_helper.h +++ b/dbms/src/Storages/Page/V3/tests/entries_helper.h @@ -234,6 +234,14 @@ inline ::testing::AssertionResult getEntryNotExist( id_entry.first.low, toDebugString(id_entry.second)); } + catch (DB::Exception & ex) + { + if (ex.code() == ErrorCodes::PS_ENTRY_NOT_EXISTS || ex.code() == ErrorCodes::PS_ENTRY_NO_VALID_VERSION) + return ::testing::AssertionSuccess(); + else + error = ex.displayText(); + return ::testing::AssertionFailure(::testing::Message(error.c_str())); + } catch (...) { error = getCurrentExceptionMessage(true); From 377d31c02ff7be12a809b143c6a4db503870e78f Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 24 May 2022 19:30:30 +0800 Subject: [PATCH 15/15] address comment --- dbms/src/Storages/Page/PageStorage.h | 3 +- dbms/src/Storages/Page/V3/WALStore.h | 41 +--------------- dbms/src/Storages/Page/WALRecoveryMode.h | 61 ++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 40 deletions(-) create mode 100644 dbms/src/Storages/Page/WALRecoveryMode.h diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h index e52c6ce6701..78d800dc3af 100644 --- a/dbms/src/Storages/Page/PageStorage.h +++ b/dbms/src/Storages/Page/PageStorage.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -140,7 +141,7 @@ class PageStorage : private boost::noncopyable SettingUInt64 blob_block_alignment_bytes = 0; SettingUInt64 wal_roll_size = PAGE_META_ROLL_SIZE; - SettingUInt64 wal_recover_mode = 0x00; // WALRecoveryMode::TolerateCorruptedTailRecords + SettingUInt64 wal_recover_mode = static_cast(WALRecoveryMode::TolerateCorruptedTailRecords); SettingUInt64 wal_max_persisted_log_files = MAX_PERSISTED_LOG_FILES; void reload(const Config & rhs) diff --git a/dbms/src/Storages/Page/V3/WALStore.h b/dbms/src/Storages/Page/V3/WALStore.h index 292cb7a6e5d..f1ea00d3562 100644 --- a/dbms/src/Storages/Page/V3/WALStore.h +++ b/dbms/src/Storages/Page/V3/WALStore.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -34,45 +35,7 @@ class PSDiskDelegator; using PSDiskDelegatorPtr = std::shared_ptr; namespace PS::V3 { -enum class WALRecoveryMode : UInt8 -{ - // Original levelDB recovery - // - // We tolerate the last record in any log to be incomplete due to a crash - // while writing it. Zeroed bytes from preallocation are also tolerated in the - // trailing data of any log. - // - // Use case: Applications for which updates, once applied, must not be rolled - // back even after a crash-recovery. In this recovery mode, RocksDB guarantees - // this as long as `WritableFile::Append()` writes are durable. In case the - // user needs the guarantee in more situations (e.g., when - // `WritableFile::Append()` writes to page cache, but the user desires this - // guarantee in face of power-loss crash-recovery), RocksDB offers various - // mechanisms to additionally invoke `WritableFile::Sync()` in order to - // strengthen the guarantee. - // - // This differs from `kPointInTimeRecovery` in that, in case a corruption is - // detected during recovery, this mode will refuse to open the DB. Whereas, - // `kPointInTimeRecovery` will stop recovery just before the corruption since - // that is a valid point-in-time to which to recover. - TolerateCorruptedTailRecords = 0x00, - // Recover from clean shutdown - // We don't expect to find any corruption in the WAL - // Use case : This is ideal for unit tests and rare applications that - // can require high consistency guarantee - AbsoluteConsistency = 0x01, - // Recover to point-in-time consistency (default) - // We stop the WAL playback on discovering WAL inconsistency - // Use case : Ideal for systems that have disk controller cache like - // hard disk, SSD without super capacitor that store related data - PointInTimeRecovery = 0x02, - // Recovery after a disaster - // We ignore any corruption in the WAL and try to salvage as much data as - // possible - // Use case : Ideal for last ditch effort to recover data or systems that - // operate with low grade unrelated data - SkipAnyCorruptedRecords = 0x03, -}; + class WALStore; using WALStorePtr = std::unique_ptr; diff --git a/dbms/src/Storages/Page/WALRecoveryMode.h b/dbms/src/Storages/Page/WALRecoveryMode.h new file mode 100644 index 00000000000..740c9ed37a5 --- /dev/null +++ b/dbms/src/Storages/Page/WALRecoveryMode.h @@ -0,0 +1,61 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include + +namespace DB +{ + +enum class WALRecoveryMode : UInt8 +{ + // Original levelDB recovery + // + // We tolerate the last record in any log to be incomplete due to a crash + // while writing it. Zeroed bytes from preallocation are also tolerated in the + // trailing data of any log. + // + // Use case: Applications for which updates, once applied, must not be rolled + // back even after a crash-recovery. In this recovery mode, RocksDB guarantees + // this as long as `WritableFile::Append()` writes are durable. In case the + // user needs the guarantee in more situations (e.g., when + // `WritableFile::Append()` writes to page cache, but the user desires this + // guarantee in face of power-loss crash-recovery), RocksDB offers various + // mechanisms to additionally invoke `WritableFile::Sync()` in order to + // strengthen the guarantee. + // + // This differs from `kPointInTimeRecovery` in that, in case a corruption is + // detected during recovery, this mode will refuse to open the DB. Whereas, + // `kPointInTimeRecovery` will stop recovery just before the corruption since + // that is a valid point-in-time to which to recover. + TolerateCorruptedTailRecords = 0x00, + // Recover from clean shutdown + // We don't expect to find any corruption in the WAL + // Use case : This is ideal for unit tests and rare applications that + // can require high consistency guarantee + AbsoluteConsistency = 0x01, + // Recover to point-in-time consistency (default) + // We stop the WAL playback on discovering WAL inconsistency + // Use case : Ideal for systems that have disk controller cache like + // hard disk, SSD without super capacitor that store related data + PointInTimeRecovery = 0x02, + // Recovery after a disaster + // We ignore any corruption in the WAL and try to salvage as much data as + // possible + // Use case : Ideal for last ditch effort to recover data or systems that + // operate with low grade unrelated data + SkipAnyCorruptedRecords = 0x03, +}; + +} // namespace DB