Skip to content

Commit

Permalink
Include checksum in approximate memory usage and updated some comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
cbi42 committed Apr 18, 2023
1 parent e3a39a5 commit 9d8e70a
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 8 deletions.
16 changes: 13 additions & 3 deletions table/block_based/block.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1091,7 +1091,8 @@ void Block::InitializeDataBlockProtectionInfo(uint8_t protection_bytes_per_key,
num_keys = iter->NumberOfKeys(block_restart_interval_);
}
if (iter->status().ok()) {
kv_checksum_ = new char[(size_t)num_keys * protection_bytes_per_key];
checksum_size_ = num_keys * protection_bytes_per_key;
kv_checksum_ = new char[(size_t)checksum_size_];
size_t i = 0;
iter->SeekToFirst();
while (iter->Valid()) {
Expand All @@ -1116,6 +1117,12 @@ void Block::InitializeIndexBlockProtectionInfo(uint8_t protection_bytes_per_key,
bool index_has_first_key) {
protection_bytes_per_key_ = 0;
if (num_restarts_ > 0 && protection_bytes_per_key > 0) {
// Note that `global_seqno` and `key_includes_seq` are hardcoded here. They
// do not impact how the index block is parsed. During checksum
// construction/verification, we use the entire key buffer from
// raw_key_.GetKey() returned by iter->key() as the `key` part of key-value
// checksum, and the content of this buffer do not change for different
// values of `global_seqno` or `key_includes_seq`.
std::unique_ptr<IndexBlockIter> iter{NewIndexIterator(
raw_ucmp, kDisableGlobalSequenceNumber /* global_seqno */, nullptr,
nullptr /* Statistics */, true /* total_order_seek */,
Expand All @@ -1130,7 +1137,8 @@ void Block::InitializeIndexBlockProtectionInfo(uint8_t protection_bytes_per_key,
num_keys = iter->NumberOfKeys(block_restart_interval_);
}
if (iter->status().ok()) {
kv_checksum_ = new char[(size_t)num_keys * protection_bytes_per_key];
checksum_size_ = num_keys * protection_bytes_per_key;
kv_checksum_ = new char[(size_t)checksum_size_];
iter->SeekToFirst();
size_t i = 0;
while (iter->Valid()) {
Expand Down Expand Up @@ -1163,7 +1171,8 @@ void Block::InitializeMetaIndexBlockProtectionInfo(
num_keys = iter->NumberOfKeys(block_restart_interval_);
}
if (iter->status().ok()) {
kv_checksum_ = new char[(size_t)num_keys * protection_bytes_per_key];
checksum_size_ = num_keys * protection_bytes_per_key;
kv_checksum_ = new char[(size_t)checksum_size_];
iter->SeekToFirst();
size_t i = 0;
while (iter->Valid()) {
Expand Down Expand Up @@ -1275,6 +1284,7 @@ size_t Block::ApproximateMemoryUsage() const {
if (read_amp_bitmap_) {
usage += read_amp_bitmap_->ApproximateMemoryUsage();
}
usage += checksum_size_;
return usage;
}

Expand Down
1 change: 1 addition & 0 deletions table/block_based/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ class Block {
std::unique_ptr<BlockReadAmpBitmap> read_amp_bitmap_;
DataBlockHashIndex data_block_hash_index_;
char* kv_checksum_{nullptr};
uint32_t checksum_size_{0};
// Used by block iterators to calculate current key index within a block
uint32_t block_restart_interval_{0};
uint8_t protection_bytes_per_key_{0};
Expand Down
110 changes: 105 additions & 5 deletions table/block_based/block_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -746,11 +746,6 @@ TEST_F(BlockPerKVChecksumTest, UnsupportedOptionValue) {
TEST_F(BlockPerKVChecksumTest, InitializeProtectionInfo) {
// Make sure that the checksum construction code path does not break
// when the block is itself already corrupted.
std::string invalid_content = "1";
Slice raw_block = invalid_content;
BlockContents contents;
contents.data = raw_block;

Options options = Options();
BlockBasedTableOptions tbo;
uint8_t protection_bytes_per_key = 8;
Expand All @@ -759,13 +754,21 @@ TEST_F(BlockPerKVChecksumTest, InitializeProtectionInfo) {
protection_bytes_per_key, options.comparator};

{
std::string invalid_content = "1";
Slice raw_block = invalid_content;
BlockContents contents;
contents.data = raw_block;
std::unique_ptr<Block_kData> data_block;
create_context.Create(&data_block, std::move(contents));
std::unique_ptr<DataBlockIter> iter{data_block->NewDataIterator(
options.comparator, kDisableGlobalSequenceNumber)};
ASSERT_TRUE(iter->status().IsCorruption());
}
{
std::string invalid_content = "1";
Slice raw_block = invalid_content;
BlockContents contents;
contents.data = raw_block;
std::unique_ptr<Block_kIndex> index_block;
create_context.Create(&index_block, std::move(contents));
std::unique_ptr<IndexBlockIter> iter{index_block->NewIndexIterator(
Expand All @@ -774,13 +777,110 @@ TEST_F(BlockPerKVChecksumTest, InitializeProtectionInfo) {
ASSERT_TRUE(iter->status().IsCorruption());
}
{
std::string invalid_content = "1";
Slice raw_block = invalid_content;
BlockContents contents;
contents.data = raw_block;
std::unique_ptr<Block_kMetaIndex> meta_block;
create_context.Create(&meta_block, std::move(contents));
std::unique_ptr<MetaBlockIter> iter{meta_block->NewMetaIterator(true)};
ASSERT_TRUE(iter->status().IsCorruption());
}
}

TEST_F(BlockPerKVChecksumTest, ApproximateMemory) {
// Tests that ApproximateMemoryUsage() includes memory used by block kv
// checksum.
const int kNumRecords = 20;
std::vector<std::string> keys;
std::vector<std::string> values;
GenerateRandomKVs(&keys, &values, 0, kNumRecords, 1 /* step */,
24 /* padding_size */);
std::unique_ptr<BlockBuilder> builder;
auto generate_block_content = [&]() {
builder = std::make_unique<BlockBuilder>(16 /* restart_interval */);
for (int i = 0; i < kNumRecords; ++i) {
builder->Add(keys[i], values[i]);
}
Slice raw_block = builder->Finish();
BlockContents contents;
contents.data = raw_block;
return contents;
};

Options options = Options();
BlockBasedTableOptions tbo;
uint8_t protection_bytes_per_key = 8;
BlockCreateContext with_checksum_create_context{
&tbo,
nullptr /* statistics */,
false /* using_zstd */,
protection_bytes_per_key,
options.comparator,
true /* index_value_is_full */};
BlockCreateContext create_context{
&tbo, nullptr /* statistics */, false /* using_zstd */,
0, options.comparator, true /* index_value_is_full */};
size_t checksum_size = protection_bytes_per_key * kNumRecords;

{
std::unique_ptr<Block_kData> data_block;
create_context.Create(&data_block, generate_block_content());
std::unique_ptr<Block_kData> with_checksum_data_block;
with_checksum_create_context.Create(&with_checksum_data_block,
generate_block_content());
ASSERT_EQ(with_checksum_data_block->ApproximateMemoryUsage() -
data_block->ApproximateMemoryUsage(),
checksum_size);
}

{
std::unique_ptr<Block_kData> meta_block;
create_context.Create(&meta_block, generate_block_content());
std::unique_ptr<Block_kData> with_checksum_meta_block;
with_checksum_create_context.Create(&with_checksum_meta_block,
generate_block_content());
ASSERT_EQ(with_checksum_meta_block->ApproximateMemoryUsage() -
meta_block->ApproximateMemoryUsage(),
checksum_size);
}

{
// Index block has different contents.
std::vector<std::string> separators;
std::vector<BlockHandle> block_handles;
std::vector<std::string> first_keys;
GenerateRandomIndexEntries(&separators, &block_handles, &first_keys,
kNumRecords);
auto generate_index_content = [&]() {
builder = std::make_unique<BlockBuilder>(16 /* restart_interval */);
BlockHandle last_encoded_handle;
for (int i = 0; i < kNumRecords; ++i) {
IndexValue entry(block_handles[i], first_keys[i]);
std::string encoded_entry;
std::string delta_encoded_entry;
entry.EncodeTo(&encoded_entry, false, nullptr);
last_encoded_handle = entry.handle;
const Slice delta_encoded_entry_slice(delta_encoded_entry);
builder->Add(separators[i], encoded_entry, &delta_encoded_entry_slice);
}
Slice raw_block = builder->Finish();
BlockContents contents;
contents.data = raw_block;
return contents;
};

std::unique_ptr<Block_kIndex> index_block;
create_context.Create(&index_block, generate_index_content());
std::unique_ptr<Block_kIndex> with_checksum_index_block;
with_checksum_create_context.Create(&with_checksum_index_block,
generate_index_content());
ASSERT_EQ(with_checksum_index_block->ApproximateMemoryUsage() -
index_block->ApproximateMemoryUsage(),
checksum_size);
}
}

std::string GetDataBlockIndexTypeStr(
BlockBasedTableOptions::DataBlockIndexType t) {
return t == BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch
Expand Down

0 comments on commit 9d8e70a

Please sign in to comment.