Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix restore problems in PageStorage. #4350

Merged
merged 9 commits into from
Mar 21, 2022
16 changes: 16 additions & 0 deletions dbms/src/Storages/Page/V3/PageDirectory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,22 @@ std::optional<PageEntryV3> VersionedPageEntries::getEntry(UInt64 seq) const
return std::nullopt;
}

std::optional<PageEntryV3> VersionedPageEntries::getLastEntry() const
{
auto page_lock = acquireLock();
if (type == EditRecordType::VAR_ENTRY)
{
for (auto it_r = entries.rbegin(); it_r != entries.rend(); it_r++)
{
if (it_r->second.isEntry())
{
return it_r->second.entry;
}
}
}
return std::nullopt;
}

Int64 VersionedPageEntries::incrRefCount(const PageVersionType & ver)
{
auto page_lock = acquireLock();
Expand Down
2 changes: 2 additions & 0 deletions dbms/src/Storages/Page/V3/PageDirectory.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ class VersionedPageEntries

std::optional<PageEntryV3> getEntry(UInt64 seq) const;

std::optional<PageEntryV3> getLastEntry() const;

/**
* If there are entries point to file in `blob_ids`, take out the <page_id, ver, entry> and
* store them into `blob_versioned_entries`.
Expand Down
42 changes: 35 additions & 7 deletions dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ PageDirectoryPtr PageDirectoryFactory::create(FileProviderPtr & file_provider, P
auto [wal, reader] = WALStore::create(file_provider, delegator);
PageDirectoryPtr dir = std::make_unique<PageDirectory>(std::move(wal));
loadFromDisk(dir, std::move(reader));
// Reset the `sequence` to the maximum of persisted.
dir->sequence = max_applied_ver.sequence;
jiaqizho marked this conversation as resolved.
Show resolved Hide resolved

// After restoring from the disk, we need cleanup all invalid entries in memory, or it will
// try to run GC again on some entries that are already marked as invalid in BlobStore.
dir->gcInMemEntries();

if (blob_stats)
{
Expand All @@ -39,7 +41,36 @@ PageDirectoryPtr PageDirectoryFactory::create(FileProviderPtr & file_provider, P
for (const auto & [page_id, entries] : dir->mvcc_table_directory)
{
(void)page_id;
if (auto entry = entries->getEntry(max_applied_ver.sequence); entry)

// We can't use getEntry(max_seq) to get the entry.
// Otherwise, It is likely to cause data loss.
// for example:
//
// page id 4927
// {type:5, create_ver: <0,0>, is_deleted: false, delete_ver: <0,0>, ori_page_id: 0.0, being_ref_count: 1, num_entries: 2}
// entry 0
// sequence: 1802
// epoch: 0
// is del: false
// blob id: 5
// offset: 77661628
// size: 2381165
// crc: 0x1D1BEF504F12D3A0
// field offset size: 0
// entry 1
// sequence: 2121
// epoch: 0
// is del: true
// blob id: 0
// offset: 0
// size: 0
// crc: 0x0
// field offset size: 0
// page id 5819
// {type:6, create_ver: <2090,0>, is_deleted: false, delete_ver: <0,0>, ori_page_id: 0.4927, being_ref_count: 1, num_entries: 0}
//
// After getEntry, page id `4927` won't be restore by BlobStore.
JaySon-Huang marked this conversation as resolved.
Show resolved Hide resolved
if (auto entry = entries->getLastEntry(); entry)
{
blob_stats->restoreByEntry(*entry);
}
Expand All @@ -60,8 +91,7 @@ PageDirectoryPtr PageDirectoryFactory::createFromEdit(FileProviderPtr & file_pro
loadEdit(dir, edit);
if (blob_stats)
blob_stats->restore();
// Reset the `sequence` to the maximum of persisted.
dir->sequence = max_applied_ver.sequence;

return dir;
}

Expand All @@ -71,8 +101,6 @@ void PageDirectoryFactory::loadEdit(const PageDirectoryPtr & dir, const PageEntr

for (const auto & r : edit.getRecords())
{
if (max_applied_ver < r.version)
max_applied_ver = r.version;
max_applied_page_id = std::max(r.page_id, max_applied_page_id);

auto [iter, created] = mvcc_table_directory.insert(std::make_pair(r.page_id, nullptr));
Expand Down
1 change: 0 additions & 1 deletion dbms/src/Storages/Page/V3/PageDirectoryFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ using WALStoreReaderPtr = std::shared_ptr<WALStoreReader>;
class PageDirectoryFactory
{
public:
PageVersionType max_applied_ver;
PageIdV3Internal max_applied_page_id;

PageDirectoryFactory & setBlobStore(BlobStore & blob_store)
Expand Down
1 change: 1 addition & 0 deletions dbms/src/Storages/Page/V3/PageStorageImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ PageStorageImpl::~PageStorageImpl() = default;

void PageStorageImpl::restore()
{
// TODO: clean up blobstore.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What thing do you plan to optimize by adding this "TODO" comment? Can you explain more?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is such an extreme example.
Before shutdown:

blob 1 : valid entries 10
blob 2 : valid entries 100
blob 3 : valid entries 100

after shutdown, Then we do the restore:

blob 1 : all of entries is invalid 
blob 2 : valid entries less than 100, but still exist valid entry
blob 3 : valid entries less than 100, but still exist valid entry

Then blob 1 won't restored, because it has not any valid entries.
But at this time, we also won't remove it in the disk. Because it is not created on memory. So we need add a scan after we restored pagestorage. Then removed blob 1.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Get it

// TODO: Speedup restoring
PageDirectoryFactory factory;
page_directory = factory
Expand Down
40 changes: 40 additions & 0 deletions dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -905,5 +905,45 @@ try
}
CATCH


TEST_F(PageStorageTest, readRefAfterRestore)
try
{
const size_t buf_sz = 1024;
char c_buff[buf_sz];

for (size_t i = 0; i < buf_sz; ++i)
{
c_buff[i] = i % 0xff;
}

{
WriteBatch batch;
batch.putPage(1, 0, std::make_shared<ReadBufferFromMemory>(c_buff, buf_sz), buf_sz, PageFieldSizes{{32, 64, 79, 128, 196, 256, 269}});
batch.putRefPage(3, 1);
batch.delPage(1);
batch.putPage(4, 0, std::make_shared<ReadBufferFromMemory>(c_buff, buf_sz), buf_sz, {});
page_storage->write(std::move(batch));
}

page_storage = reopenWithConfig(config);

{
WriteBatch batch;
memset(c_buff, 0, buf_sz);
batch.putPage(5, 0, std::make_shared<ReadBufferFromMemory>(c_buff, buf_sz), buf_sz, {});
page_storage->write(std::move(batch));
}

std::vector<PageStorage::PageReadFields> fields;
PageStorage::PageReadFields field;
field.first = 3;
field.second = {0, 1, 2, 3, 4, 5, 6};
fields.emplace_back(field);

ASSERT_NO_THROW(page_storage->read(fields));
}
CATCH

} // namespace PS::V3::tests
} // namespace DB