Skip to content

Commit

Permalink
Write properties metablock last in block-based tables (#4158)
Browse files Browse the repository at this point in the history
Summary:
The properties meta-block should come at the end since we always need to
read it when opening a file, unlike index/filter/other meta-blocks, which
are sometimes read depending on the user's configuration. This ordering
will allow us to (in a future PR) do a small readahead on the end of the file
to read properties and meta-index blocks with one I/O.

The bulk of this PR is a refactoring of the `BlockBasedTableBuilder::Finish`
function. It was previously too large with inconsistent error handling, which
made it difficult to change. So I broke it up into one function per meta-block
write, and tried to make error handling consistent within those functions.
Then reordering the metablocks was trivial -- just reorder the calls to these
helper functions.
Pull Request resolved: #4158

Differential Revision: D8921705

Pulled By: ajkr

fbshipit-source-id: 96c9cc3182eb1adf11af46adab79dbeba7b12fcc
  • Loading branch information
ajkr authored and facebook-github-bot committed Jul 20, 2018
1 parent 2736752 commit ab35505
Show file tree
Hide file tree
Showing 3 changed files with 269 additions and 161 deletions.
323 changes: 171 additions & 152 deletions table/block_based_table_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
#include "table/filter_block.h"
#include "table/format.h"
#include "table/full_filter_block.h"
#include "table/meta_blocks.h"
#include "table/table_builder.h"

#include "util/string_util.h"
Expand Down Expand Up @@ -668,184 +667,204 @@ Status BlockBasedTableBuilder::InsertBlockInCache(const Slice& block_contents,
return Status::OK();
}

Status BlockBasedTableBuilder::Finish() {
Rep* r = rep_;
bool empty_data_block = r->data_block.empty();
Flush();
assert(!r->closed);
r->closed = true;

// To make sure properties block is able to keep the accurate size of index
// block, we will finish writing all index entries here and flush them
// to storage after metaindex block is written.
if (ok() && !empty_data_block) {
r->index_builder->AddIndexEntry(
&r->last_key, nullptr /* no next data block */, r->pending_handle);
}

BlockHandle filter_block_handle, metaindex_block_handle, index_block_handle,
compression_dict_block_handle, range_del_block_handle;

// Write filter block
bool empty_filter_block = (r->filter_builder == nullptr ||
r->filter_builder->NumAdded() == 0);
void BlockBasedTableBuilder::WriteFilterBlock(
MetaIndexBuilder* meta_index_builder) {
BlockHandle filter_block_handle;
bool empty_filter_block = (rep_->filter_builder == nullptr ||
rep_->filter_builder->NumAdded() == 0);
if (ok() && !empty_filter_block) {
Status s = Status::Incomplete();
while (s.IsIncomplete()) {
Slice filter_content = r->filter_builder->Finish(filter_block_handle, &s);
while (ok() && s.IsIncomplete()) {
Slice filter_content = rep_->filter_builder->Finish(filter_block_handle, &s);
assert(s.ok() || s.IsIncomplete());
r->props.filter_size += filter_content.size();
rep_->props.filter_size += filter_content.size();
WriteRawBlock(filter_content, kNoCompression, &filter_block_handle);
}
}
if (ok() && !empty_filter_block) {
// Add mapping from "<filter_block_prefix>.Name" to location
// of filter data.
std::string key;
if (rep_->filter_builder->IsBlockBased()) {
key = BlockBasedTable::kFilterBlockPrefix;
} else {
key = rep_->table_options.partition_filters
? BlockBasedTable::kPartitionedFilterBlockPrefix
: BlockBasedTable::kFullFilterBlockPrefix;
}
key.append(rep_->table_options.filter_policy->Name());
meta_index_builder->Add(key, filter_block_handle);
}
}

void BlockBasedTableBuilder::WriteIndexBlock(
MetaIndexBuilder* meta_index_builder, BlockHandle* index_block_handle) {
IndexBuilder::IndexBlocks index_blocks;
auto index_builder_status = r->index_builder->Finish(&index_blocks);
auto index_builder_status = rep_->index_builder->Finish(&index_blocks);
if (index_builder_status.IsIncomplete()) {
// We we have more than one index partition then meta_blocks are not
// supported for the index. Currently meta_blocks are used only by
// HashIndexBuilder which is not multi-partition.
assert(index_blocks.meta_blocks.empty());
} else if (!index_builder_status.ok()) {
return index_builder_status;
} else if (ok() && !index_builder_status.ok()) {
rep_->status = index_builder_status;
}

// Write meta blocks and metaindex block with the following order.
// 1. [meta block: filter]
// 2. [meta block: properties]
// 3. [meta block: compression dictionary]
// 4. [meta block: range deletion tombstone]
// 5. [metaindex block]
// write meta blocks
MetaIndexBuilder meta_index_builder;
for (const auto& item : index_blocks.meta_blocks) {
BlockHandle block_handle;
WriteBlock(item.second, &block_handle, false /* is_data_block */);
meta_index_builder.Add(item.first, block_handle);
}

if (ok()) {
if (!empty_filter_block) {
// Add mapping from "<filter_block_prefix>.Name" to location
// of filter data.
std::string key;
if (r->filter_builder->IsBlockBased()) {
key = BlockBasedTable::kFilterBlockPrefix;
} else {
key = r->table_options.partition_filters
? BlockBasedTable::kPartitionedFilterBlockPrefix
: BlockBasedTable::kFullFilterBlockPrefix;
for (const auto& item : index_blocks.meta_blocks) {
BlockHandle block_handle;
WriteBlock(item.second, &block_handle, false /* is_data_block */);
if (!ok()) {
break;
}
key.append(r->table_options.filter_policy->Name());
meta_index_builder.Add(key, filter_block_handle);
meta_index_builder->Add(item.first, block_handle);
}
}
if (ok()) {
if (rep_->table_options.enable_index_compression) {
WriteBlock(index_blocks.index_block_contents, index_block_handle, false);
} else {
WriteRawBlock(index_blocks.index_block_contents, kNoCompression,
index_block_handle);
}
}
// If there are more index partitions, finish them and write them out
Status s = index_builder_status;
while (ok() && s.IsIncomplete()) {
s = rep_->index_builder->Finish(&index_blocks, *index_block_handle);
if (!s.ok() && !s.IsIncomplete()) {
rep_->status = s;
return;
}
if (rep_->table_options.enable_index_compression) {
WriteBlock(index_blocks.index_block_contents, index_block_handle, false);
} else {
WriteRawBlock(index_blocks.index_block_contents, kNoCompression,
index_block_handle);
}
// The last index_block_handle will be for the partition index block
}
}

// Write properties and compression dictionary blocks.
{
PropertyBlockBuilder property_block_builder;
r->props.column_family_id = r->column_family_id;
r->props.column_family_name = r->column_family_name;
r->props.filter_policy_name = r->table_options.filter_policy != nullptr ?
r->table_options.filter_policy->Name() : "";
r->props.index_size =
r->index_builder->EstimatedSize() + kBlockTrailerSize;
r->props.comparator_name = r->ioptions.user_comparator != nullptr
? r->ioptions.user_comparator->Name()
: "nullptr";
r->props.merge_operator_name = r->ioptions.merge_operator != nullptr
? r->ioptions.merge_operator->Name()
void BlockBasedTableBuilder::WritePropertiesBlock(
MetaIndexBuilder* meta_index_builder) {
BlockHandle properties_block_handle;
if (ok()) {
PropertyBlockBuilder property_block_builder;
rep_->props.column_family_id = rep_->column_family_id;
rep_->props.column_family_name = rep_->column_family_name;
rep_->props.filter_policy_name = rep_->table_options.filter_policy != nullptr
? rep_->table_options.filter_policy->Name()
: "";
rep_->props.index_size = rep_->index_builder->EstimatedSize() + kBlockTrailerSize;
rep_->props.comparator_name = rep_->ioptions.user_comparator != nullptr
? rep_->ioptions.user_comparator->Name()
: "nullptr";
rep_->props.merge_operator_name = rep_->ioptions.merge_operator != nullptr
? rep_->ioptions.merge_operator->Name()
: "nullptr";
rep_->props.compression_name =
CompressionTypeToString(rep_->compression_ctx.type());
rep_->props.prefix_extractor_name = rep_->moptions.prefix_extractor != nullptr
? rep_->moptions.prefix_extractor->Name()
: "nullptr";
r->props.compression_name =
CompressionTypeToString(r->compression_ctx.type());
r->props.prefix_extractor_name =
r->moptions.prefix_extractor != nullptr
? r->moptions.prefix_extractor->Name()
: "nullptr";

std::string property_collectors_names = "[";
for (size_t i = 0;
i < r->ioptions.table_properties_collector_factories.size(); ++i) {
if (i != 0) {
property_collectors_names += ",";
}
property_collectors_names +=
r->ioptions.table_properties_collector_factories[i]->Name();
}
property_collectors_names += "]";
r->props.property_collectors_names = property_collectors_names;
if (r->table_options.index_type ==
BlockBasedTableOptions::kTwoLevelIndexSearch) {
assert(r->p_index_builder_ != nullptr);
r->props.index_partitions = r->p_index_builder_->NumPartitions();
r->props.top_level_index_size =
r->p_index_builder_->EstimateTopLevelIndexSize(r->offset);
}
r->props.index_key_is_user_key =
!r->index_builder->seperator_is_key_plus_seq();
r->props.creation_time = r->creation_time;
r->props.oldest_key_time = r->oldest_key_time;

// Add basic properties
property_block_builder.AddTableProperty(r->props);

// Add use collected properties
NotifyCollectTableCollectorsOnFinish(r->table_properties_collectors,
r->ioptions.info_log,
&property_block_builder);

BlockHandle properties_block_handle;
WriteRawBlock(
property_block_builder.Finish(),
kNoCompression,
&properties_block_handle
);
meta_index_builder.Add(kPropertiesBlock, properties_block_handle);

// Write compression dictionary block
if (r->compression_dict && r->compression_dict->size()) {
WriteRawBlock(*r->compression_dict, kNoCompression,
&compression_dict_block_handle);
meta_index_builder.Add(kCompressionDictBlock,
compression_dict_block_handle);

std::string property_collectors_names = "[";
for (size_t i = 0;
i < rep_->ioptions.table_properties_collector_factories.size(); ++i) {
if (i != 0) {
property_collectors_names += ",";
}
} // end of properties/compression dictionary block writing
property_collectors_names +=
rep_->ioptions.table_properties_collector_factories[i]->Name();
}
property_collectors_names += "]";
rep_->props.property_collectors_names = property_collectors_names;
if (rep_->table_options.index_type ==
BlockBasedTableOptions::kTwoLevelIndexSearch) {
assert(rep_->p_index_builder_ != nullptr);
rep_->props.index_partitions = rep_->p_index_builder_->NumPartitions();
rep_->props.top_level_index_size =
rep_->p_index_builder_->EstimateTopLevelIndexSize(rep_->offset);
}
rep_->props.index_key_is_user_key =
!rep_->index_builder->seperator_is_key_plus_seq();
rep_->props.creation_time = rep_->creation_time;
rep_->props.oldest_key_time = rep_->oldest_key_time;

// Add basic properties
property_block_builder.AddTableProperty(rep_->props);

if (ok() && !r->range_del_block.empty()) {
WriteRawBlock(r->range_del_block.Finish(), kNoCompression,
&range_del_block_handle);
meta_index_builder.Add(kRangeDelBlock, range_del_block_handle);
} // range deletion tombstone meta block
} // meta blocks
// Add use collected properties
NotifyCollectTableCollectorsOnFinish(rep_->table_properties_collectors,
rep_->ioptions.info_log,
&property_block_builder);

// Write index block
WriteRawBlock(property_block_builder.Finish(), kNoCompression,
&properties_block_handle);
}
if (ok()) {
// flush the meta index block
WriteRawBlock(meta_index_builder.Finish(), kNoCompression,
&metaindex_block_handle);
meta_index_builder->Add(kPropertiesBlock, properties_block_handle);
}
}

if (r->table_options.enable_index_compression) {
WriteBlock(index_blocks.index_block_contents, &index_block_handle, false);
} else {
WriteRawBlock(index_blocks.index_block_contents, kNoCompression,
&index_block_handle);
void BlockBasedTableBuilder::WriteCompressionDictBlock(
MetaIndexBuilder* meta_index_builder) {
if (rep_->compression_dict && rep_->compression_dict->size()) {
BlockHandle compression_dict_block_handle;
if (ok()) {
WriteRawBlock(*rep_->compression_dict, kNoCompression,
&compression_dict_block_handle);
}
// If there are more index partitions, finish them and write them out
Status& s = index_builder_status;
while (s.IsIncomplete()) {
s = r->index_builder->Finish(&index_blocks, index_block_handle);
if (!s.ok() && !s.IsIncomplete()) {
return s;
}
if (r->table_options.enable_index_compression) {
WriteBlock(index_blocks.index_block_contents, &index_block_handle,
false);
} else {
WriteRawBlock(index_blocks.index_block_contents, kNoCompression,
&index_block_handle);
}
// The last index_block_handle will be for the partition index block
if (ok()) {
meta_index_builder->Add(kCompressionDictBlock,
compression_dict_block_handle);
}
}
}

void BlockBasedTableBuilder::WriteRangeDelBlock(
MetaIndexBuilder* meta_index_builder) {
if (ok() && !rep_->range_del_block.empty()) {
BlockHandle range_del_block_handle;
WriteRawBlock(rep_->range_del_block.Finish(), kNoCompression,
&range_del_block_handle);
meta_index_builder->Add(kRangeDelBlock, range_del_block_handle);
}
}

Status BlockBasedTableBuilder::Finish() {
Rep* r = rep_;
bool empty_data_block = r->data_block.empty();
Flush();
assert(!r->closed);
r->closed = true;

// To make sure properties block is able to keep the accurate size of index
// block, we will finish writing all index entries first.
if (ok() && !empty_data_block) {
r->index_builder->AddIndexEntry(
&r->last_key, nullptr /* no next data block */, r->pending_handle);
}

// Write meta blocks and metaindex block with the following order.
// 1. [meta block: filter]
// 2. [meta block: index]
// 3. [meta block: compression dictionary]
// 4. [meta block: range deletion tombstone]
// 5. [meta block: properties]
// 6. [metaindex block]
BlockHandle metaindex_block_handle, index_block_handle;
MetaIndexBuilder meta_index_builder;
WriteFilterBlock(&meta_index_builder);
WriteIndexBlock(&meta_index_builder, &index_block_handle);
WriteCompressionDictBlock(&meta_index_builder);
WriteRangeDelBlock(&meta_index_builder);
WritePropertiesBlock(&meta_index_builder);
if (ok()) {
// flush the meta index block
WriteRawBlock(meta_index_builder.Finish(), kNoCompression,
&metaindex_block_handle);
}

// Write footer
if (ok()) {
Expand Down
9 changes: 9 additions & 0 deletions table/block_based_table_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "rocksdb/listener.h"
#include "rocksdb/options.h"
#include "rocksdb/status.h"
#include "table/meta_blocks.h"
#include "table/table_builder.h"
#include "util/compression.h"

Expand Down Expand Up @@ -106,6 +107,14 @@ class BlockBasedTableBuilder : public TableBuilder {
Status InsertBlockInCache(const Slice& block_contents,
const CompressionType type,
const BlockHandle* handle);

void WriteFilterBlock(MetaIndexBuilder* meta_index_builder);
void WriteIndexBlock(MetaIndexBuilder* meta_index_builder,
BlockHandle* index_block_handle);
void WritePropertiesBlock(MetaIndexBuilder* meta_index_builder);
void WriteCompressionDictBlock(MetaIndexBuilder* meta_index_builder);
void WriteRangeDelBlock(MetaIndexBuilder* meta_index_builder);

struct Rep;
class BlockBasedTablePropertiesCollectorFactory;
class BlockBasedTablePropertiesCollector;
Expand Down
Loading

0 comments on commit ab35505

Please sign in to comment.