From 5d054235bc575d4cc0f2b2868b13eccd3706c050 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Thu, 4 May 2023 18:31:41 -0700 Subject: [PATCH] Address comment --- HISTORY.md | 2 +- db/builder.cc | 2 +- db/compaction/compaction_outputs.cc | 2 +- db/db_impl/db_impl_compaction_flush.cc | 4 ++-- db/db_impl/db_impl_experimental.cc | 2 +- db/db_impl/db_impl_open.cc | 4 ++-- db/experimental.cc | 2 +- db/external_sst_file_ingestion_job.cc | 2 +- db/flush_job.cc | 2 +- db/import_column_family_job.cc | 2 +- db/repair.cc | 5 ++-- db/table_cache.cc | 6 +---- db/version_edit.cc | 20 ++++++++-------- db/version_edit.h | 14 +++++------ db/version_set.cc | 16 ++++++------- file/prefetch_test.cc | 1 + include/rocksdb/table_properties.h | 6 ++--- .../block_based/block_based_table_builder.cc | 10 ++++---- table/block_based/block_based_table_builder.h | 4 ++-- .../block_based/block_based_table_factory.cc | 3 +-- table/block_based/block_based_table_factory.h | 2 +- table/block_based/block_based_table_reader.cc | 23 ++++++------------- table/block_based/block_based_table_reader.h | 6 ++--- table/block_fetcher_test.cc | 8 +++---- table/meta_blocks.cc | 5 ++-- table/table_builder.h | 11 ++++----- table/table_properties.cc | 3 +-- 27 files changed, 73 insertions(+), 94 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 5c59c6618611..96170b41ba9b 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -11,7 +11,7 @@ * Delete an empty WAL file on DB open if the log number is less than the min log number to keep ### Performance Improvements -* Record the starting offset of block-based table's tail (i.e, all blocks after data blocks till the end) in manifest and use it to prefetch the tail more accurately during table open instead of relying on heuristics (#11406).Heuristics will still be used for now as a fallback for backward compatibility. +* Improved the I/O efficiency of prefetching SST metadata by recording more information in the DB manifest. Opening files written with previous versions will still rely on heuristics for how much to prefetch (#11406). ## 8.2.0 (04/24/2023) ### Public API Changes diff --git a/db/builder.cc b/db/builder.cc index c2e99fb66409..a99bb57e8c22 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -291,7 +291,7 @@ Status BuildTable( if (s.ok() && !empty) { uint64_t file_size = builder->FileSize(); meta->fd.file_size = file_size; - meta->tail_start_offset = builder->GetTailStartOffset(); + meta->tail_size = builder->GetTailSize(); meta->marked_for_compaction = builder->NeedCompact(); assert(meta->fd.GetFileSize() > 0); tp = builder diff --git a/db/compaction/compaction_outputs.cc b/db/compaction/compaction_outputs.cc index e1d54ecb3fcf..f6e0dbd7d43d 100644 --- a/db/compaction/compaction_outputs.cc +++ b/db/compaction/compaction_outputs.cc @@ -43,7 +43,7 @@ Status CompactionOutputs::Finish(const Status& intput_status, const uint64_t current_bytes = builder_->FileSize(); if (s.ok()) { meta->fd.file_size = current_bytes; - meta->tail_start_offset = builder_->GetTailStartOffset(); + meta->tail_size = builder_->GetTailSize(); meta->marked_for_compaction = builder_->NeedCompact(); } current_output().finished = true; diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index dfe41ded8302..39a8339199ee 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -1758,7 +1758,7 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) { f->marked_for_compaction, f->temperature, f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, f->epoch_number, f->file_checksum, f->file_checksum_func_name, f->unique_id, - f->compensated_range_deletion_size, f->tail_start_offset); + f->compensated_range_deletion_size, f->tail_size); } ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] Apply version edit:\n%s", cfd->GetName().c_str(), @@ -3457,7 +3457,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, f->epoch_number, f->file_checksum, f->file_checksum_func_name, f->unique_id, - f->compensated_range_deletion_size, f->tail_start_offset); + f->compensated_range_deletion_size, f->tail_size); ROCKS_LOG_BUFFER( log_buffer, diff --git a/db/db_impl/db_impl_experimental.cc b/db/db_impl/db_impl_experimental.cc index 9354e22f5207..8d958ffc17de 100644 --- a/db/db_impl/db_impl_experimental.cc +++ b/db/db_impl/db_impl_experimental.cc @@ -138,7 +138,7 @@ Status DBImpl::PromoteL0(ColumnFamilyHandle* column_family, int target_level) { f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, f->epoch_number, f->file_checksum, f->file_checksum_func_name, f->unique_id, - f->compensated_range_deletion_size, f->tail_start_offset); + f->compensated_range_deletion_size, f->tail_size); } status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index ee51c41578fa..5bad1e12bdfc 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -617,7 +617,7 @@ Status DBImpl::Recover( f->file_creation_time, f->epoch_number, f->file_checksum, f->file_checksum_func_name, f->unique_id, f->compensated_range_deletion_size, - f->tail_start_offset); + f->tail_size); ROCKS_LOG_WARN(immutable_db_options_.info_log, "[%s] Moving #%" PRIu64 " from from_level-%d to from_level-%d %" PRIu64 @@ -1681,7 +1681,7 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, meta.oldest_blob_file_number, meta.oldest_ancester_time, meta.file_creation_time, meta.epoch_number, meta.file_checksum, meta.file_checksum_func_name, meta.unique_id, - meta.compensated_range_deletion_size, meta.tail_start_offset); + meta.compensated_range_deletion_size, meta.tail_size); for (const auto& blob : blob_file_additions) { edit->AddBlobFile(blob); diff --git a/db/experimental.cc b/db/experimental.cc index f2b8854a3741..cb5fb3179452 100644 --- a/db/experimental.cc +++ b/db/experimental.cc @@ -102,7 +102,7 @@ Status UpdateManifestForFilesState( lf->oldest_blob_file_number, lf->oldest_ancester_time, lf->file_creation_time, lf->epoch_number, lf->file_checksum, lf->file_checksum_func_name, lf->unique_id, - lf->compensated_range_deletion_size, lf->tail_start_offset); + lf->compensated_range_deletion_size, lf->tail_size); } } } else { diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index f77727845cde..07b4582ee517 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -473,7 +473,7 @@ Status ExternalSstFileIngestionJob::Run() { ? kReservedEpochNumberForFileIngestedBehind : cfd_->NewEpochNumber(), f.file_checksum, f.file_checksum_func_name, f.unique_id, 0, - f.table_properties.tail_start_offset); + f.table_properties.tail_size); f_metadata.temperature = f.file_temperature; edit_.AddFile(f.picked_level, f_metadata); } diff --git a/db/flush_job.cc b/db/flush_job.cc index 4c687a9e42b8..8b152ee3e0a5 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -1008,7 +1008,7 @@ Status FlushJob::WriteLevel0Table() { meta_.file_creation_time, meta_.epoch_number, meta_.file_checksum, meta_.file_checksum_func_name, meta_.unique_id, meta_.compensated_range_deletion_size, - meta_.tail_start_offset); + meta_.tail_size); edit_->SetBlobFileAdditions(std::move(blob_file_additions)); } // Piggyback FlushJobInfo on the first first flushed memtable. diff --git a/db/import_column_family_job.cc b/db/import_column_family_job.cc index 5428d7631de2..aebbc547528e 100644 --- a/db/import_column_family_job.cc +++ b/db/import_column_family_job.cc @@ -146,7 +146,7 @@ Status ImportColumnFamilyJob::Run() { file_metadata.temperature, kInvalidBlobFileNumber, oldest_ancester_time, current_time, file_metadata.epoch_number, kUnknownFileChecksum, kUnknownFileChecksumFuncName, f.unique_id, 0, - f.table_properties.tail_start_offset); + f.table_properties.tail_size); s = dummy_version_builder.Apply(&dummy_version_edit); } if (s.ok()) { diff --git a/db/repair.cc b/db/repair.cc index 8dbacb41e4c4..451b345e29b2 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -552,7 +552,7 @@ class Repairer { t->meta.oldest_ancester_time = props->creation_time; } if (status.ok()) { - t->meta.tail_start_offset = props->tail_start_offset; + t->meta.tail_size = props->tail_size; } ColumnFamilyData* cfd = nullptr; if (status.ok()) { @@ -685,8 +685,7 @@ class Repairer { table->meta.oldest_ancester_time, table->meta.file_creation_time, table->meta.epoch_number, table->meta.file_checksum, table->meta.file_checksum_func_name, table->meta.unique_id, - table->meta.compensated_range_deletion_size, - table->meta.tail_start_offset); + table->meta.compensated_range_deletion_size, table->meta.tail_size); } s = dummy_version_builder.Apply(&dummy_edit); if (s.ok()) { diff --git a/db/table_cache.cc b/db/table_cache.cc index 560f253847c2..73fa07c6d54d 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -146,11 +146,7 @@ Status TableCache::GetTableReader( false /* force_direct_prefetch */, level, block_cache_tracer_, max_file_size_for_l0_meta_pin, db_session_id_, file_meta.fd.GetNumber(), expected_unique_id, - file_meta.fd.largest_seqno, file_meta.tail_start_offset, - file_meta.num_entries > 0 && - (file_meta.num_entries == file_meta.num_range_deletions) - ? true - : false /* contain_no_data_block */), + file_meta.fd.largest_seqno, file_meta.tail_size), std::move(file_reader), file_meta.fd.GetFileSize(), table_reader, prefetch_index_and_filter_in_cache); TEST_SYNC_POINT("TableCache::GetTableReader:0"); diff --git a/db/version_edit.cc b/db/version_edit.cc index c371228aa59b..4f1ae80d21e5 100644 --- a/db/version_edit.cc +++ b/db/version_edit.cc @@ -238,11 +238,11 @@ bool VersionEdit::EncodeTo(std::string* dst) const { f.compensated_range_deletion_size); PutLengthPrefixedSlice(dst, Slice(compensated_range_deletion_size)); } - if (f.tail_start_offset) { - PutVarint32(dst, NewFileCustomTag::kTailStartOffset); - std::string varint_tail_start_offset; - PutVarint64(&varint_tail_start_offset, f.tail_start_offset); - PutLengthPrefixedSlice(dst, Slice(varint_tail_start_offset)); + if (f.tail_size) { + PutVarint32(dst, NewFileCustomTag::kTailSize); + std::string varint_tail_size; + PutVarint64(&varint_tail_size, f.tail_size); + PutLengthPrefixedSlice(dst, Slice(varint_tail_size)); } TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:NewFile4:CustomizeFields", @@ -422,8 +422,8 @@ const char* VersionEdit::DecodeNewFile4From(Slice* input) { return "Invalid compensated range deletion size"; } break; - case kTailStartOffset: - if (!GetVarint64(&field, &f.tail_start_offset)) { + case kTailSize: + if (!GetVarint64(&field, &f.tail_size)) { return "invalid tail start offset"; } break; @@ -862,8 +862,8 @@ std::string VersionEdit::DebugString(bool hex_key) const { InternalUniqueIdToExternal(&id); r.append(UniqueIdToHumanString(EncodeUniqueIdBytes(&id))); } - r.append(" tail start offset:"); - AppendNumberTo(&r, f.tail_start_offset); + r.append(" tail size:"); + AppendNumberTo(&r, f.tail_size); } for (const auto& blob_file_addition : blob_file_additions_) { @@ -979,7 +979,7 @@ std::string VersionEdit::DebugJSON(int edit_num, bool hex_key) const { // permanent jw << "Temperature" << static_cast(f.temperature); } - jw << "TailStartOffset" << f.tail_start_offset; + jw << "TailSize" << f.tail_size; jw.EndArrayedObject(); } diff --git a/db/version_edit.h b/db/version_edit.h index d017ef9be24c..07e8f37742c5 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -90,7 +90,7 @@ enum NewFileCustomTag : uint32_t { kUniqueId = 12, kEpochNumber = 13, kCompensatedRangeDeletionSize = 14, - kTailStartOffset = 15, + kTailSize = 15, // If this bit for the custom tag is set, opening DB should fail if // we don't know this field. @@ -239,9 +239,9 @@ struct FileMetaData { // SST unique id UniqueId64x2 unique_id{}; - // Offset where the "tail" part of SST file starts + // Size of the "tail" part of a SST file // "Tail" refers to all blocks after data blocks till the end of the SST file - uint64_t tail_start_offset = 0; + uint64_t tail_size = 0; FileMetaData() = default; @@ -255,7 +255,7 @@ struct FileMetaData { const std::string& _file_checksum_func_name, UniqueId64x2 _unique_id, const uint64_t _compensated_range_deletion_size, - uint64_t _tail_start_offset) + uint64_t _tail_size) : fd(file, file_path_id, file_size, smallest_seq, largest_seq), smallest(smallest_key), largest(largest_key), @@ -269,7 +269,7 @@ struct FileMetaData { file_checksum(_file_checksum), file_checksum_func_name(_file_checksum_func_name), unique_id(std::move(_unique_id)), - tail_start_offset(_tail_start_offset) { + tail_size(_tail_size) { TEST_SYNC_POINT_CALLBACK("FileMetaData::FileMetaData", this); } @@ -454,7 +454,7 @@ class VersionEdit { const std::string& file_checksum_func_name, const UniqueId64x2& unique_id, const uint64_t compensated_range_deletion_size, - uint64_t tail_start_offset) { + uint64_t tail_size) { assert(smallest_seqno <= largest_seqno); new_files_.emplace_back( level, @@ -463,7 +463,7 @@ class VersionEdit { temperature, oldest_blob_file_number, oldest_ancester_time, file_creation_time, epoch_number, file_checksum, file_checksum_func_name, unique_id, - compensated_range_deletion_size, tail_start_offset)); + compensated_range_deletion_size, tail_size)); if (!HasLastSequence() || largest_seqno > GetLastSequence()) { SetLastSequence(largest_seqno); } diff --git a/db/version_set.cc b/db/version_set.cc index 392081535aa5..436389b81b1e 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -6346,14 +6346,14 @@ Status VersionSet::WriteCurrentStateToManifest( for (const auto& f : level_files) { assert(f); - edit.AddFile( - level, f->fd.GetNumber(), f->fd.GetPathId(), f->fd.GetFileSize(), - f->smallest, f->largest, f->fd.smallest_seqno, - f->fd.largest_seqno, f->marked_for_compaction, f->temperature, - f->oldest_blob_file_number, f->oldest_ancester_time, - f->file_creation_time, f->epoch_number, f->file_checksum, - f->file_checksum_func_name, f->unique_id, - f->compensated_range_deletion_size, f->tail_start_offset); + edit.AddFile(level, f->fd.GetNumber(), f->fd.GetPathId(), + f->fd.GetFileSize(), f->smallest, f->largest, + f->fd.smallest_seqno, f->fd.largest_seqno, + f->marked_for_compaction, f->temperature, + f->oldest_blob_file_number, f->oldest_ancester_time, + f->file_creation_time, f->epoch_number, f->file_checksum, + f->file_checksum_func_name, f->unique_id, + f->compensated_range_deletion_size, f->tail_size); } } diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index b1caccc67fc4..053caacd09e1 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -261,6 +261,7 @@ TEST_P(PrefetchTest, BlockBasedTableTailPrefetch) { Status s = TryReopen(options); if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { // If direct IO is not supported, skip the test + ROCKSDB_GTEST_BYPASS("Direct IO is not supported"); return; } else { ASSERT_OK(s); diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index ab259f930be4..6d32418e525d 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -70,7 +70,7 @@ struct TablePropertiesNames { static const std::string kSlowCompressionEstimatedDataSize; static const std::string kFastCompressionEstimatedDataSize; static const std::string kSequenceNumberTimeMapping; - static const std::string kTailStartOffset; + static const std::string kTailSize; }; // `TablePropertiesCollector` provides the mechanism for users to collect @@ -240,9 +240,9 @@ struct TableProperties { // 0 means not exists. uint64_t external_sst_file_global_seqno_offset = 0; - // Offset where the "tail" part of SST file starts + // Size of the "tail" part of a SST file // "Tail" refers to all blocks after data blocks till the end of the SST file - uint64_t tail_start_offset = 0; + uint64_t tail_size = 0; // DB identity // db_id is an identifier generated the first time the DB is created diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 309f392a004d..181ca5b3a671 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -1908,9 +1908,8 @@ Status BlockBasedTableBuilder::Finish() { } } - if (ok()) { - r->props.tail_start_offset = r->offset; - } + uint64_t tail_start_offset = r->offset; + // Write meta blocks, metaindex block and footer in the following order. // 1. [meta block: filter] // 2. [meta block: index] @@ -1938,6 +1937,7 @@ Status BlockBasedTableBuilder::Finish() { r->SetStatus(r->CopyIOStatus()); Status ret_status = r->CopyStatus(); assert(!ret_status.ok() || io_status().ok()); + r->props.tail_size = r->offset - tail_start_offset; return ret_status; } @@ -1971,8 +1971,8 @@ uint64_t BlockBasedTableBuilder::EstimatedFileSize() const { } } -uint64_t BlockBasedTableBuilder::GetTailStartOffset() const { - return rep_->props.tail_start_offset; +uint64_t BlockBasedTableBuilder::GetTailSize() const { + return rep_->props.tail_size; } bool BlockBasedTableBuilder::NeedCompact() const { diff --git a/table/block_based/block_based_table_builder.h b/table/block_based/block_based_table_builder.h index f5bf48b87368..3949474c5809 100644 --- a/table/block_based/block_based_table_builder.h +++ b/table/block_based/block_based_table_builder.h @@ -91,9 +91,9 @@ class BlockBasedTableBuilder : public TableBuilder { // is enabled. uint64_t EstimatedFileSize() const override; - // Get the offset where the "tail" part of a SST file starts. "Tail" refers to + // Get the size of the "tail" part of a SST file. "Tail" refers to // all blocks after data blocks till the end of the SST file. - uint64_t GetTailStartOffset() const override; + uint64_t GetTailSize() const override; bool NeedCompact() const override; diff --git a/table/block_based/block_based_table_factory.cc b/table/block_based/block_based_table_factory.cc index 1302ad29fbe9..653c222d5e49 100644 --- a/table/block_based/block_based_table_factory.cc +++ b/table/block_based/block_based_table_factory.cc @@ -568,8 +568,7 @@ Status BlockBasedTableFactory::NewTableReader( ro, table_reader_options.ioptions, table_reader_options.env_options, table_options_, table_reader_options.internal_comparator, std::move(file), file_size, table_reader_options.block_protection_bytes_per_key, - table_reader, table_reader_options.tail_start_offset, - table_reader_options.contain_no_data_block, table_reader_cache_res_mgr_, + table_reader, table_reader_options.tail_size, table_reader_cache_res_mgr_, table_reader_options.prefix_extractor, prefetch_index_and_filter_in_cache, table_reader_options.skip_filters, table_reader_options.level, table_reader_options.immortal, table_reader_options.largest_seqno, diff --git a/table/block_based/block_based_table_factory.h b/table/block_based/block_based_table_factory.h index f43183caf281..1f787697772e 100644 --- a/table/block_based/block_based_table_factory.h +++ b/table/block_based/block_based_table_factory.h @@ -29,7 +29,7 @@ class RandomAccessFileReader; class WritableFileWriter; // TODO: deprecate this class as it can be replaced with -// `FileMetaData::tail_start_offset` +// `FileMetaData::tail_size` // // A class used to track actual bytes written from the tail in the recent SST // file opens, and provide a suggestion for following open. diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 1a53a77d07d2..a5e8e701467a 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -561,8 +561,7 @@ Status BlockBasedTable::Open( const InternalKeyComparator& internal_comparator, std::unique_ptr&& file, uint64_t file_size, uint8_t block_protection_bytes_per_key, - std::unique_ptr* table_reader, uint64_t tail_start_offset, - bool contain_no_data_block, + std::unique_ptr* table_reader, uint64_t tail_size, std::shared_ptr table_reader_cache_res_mgr, const std::shared_ptr& prefix_extractor, const bool prefetch_index_and_filter_in_cache, const bool skip_filters, @@ -594,8 +593,8 @@ Status BlockBasedTable::Open( if (!ioptions.allow_mmap_reads) { s = PrefetchTail(ro, file.get(), file_size, force_direct_prefetch, tail_prefetch_stats, prefetch_all, preload_all, - &prefetch_buffer, ioptions.stats, tail_start_offset, - contain_no_data_block, ioptions.logger); + &prefetch_buffer, ioptions.stats, tail_size, + ioptions.logger); // Return error in prefetch path to users. if (!s.ok()) { return s; @@ -810,20 +809,12 @@ Status BlockBasedTable::PrefetchTail( bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats, const bool prefetch_all, const bool preload_all, std::unique_ptr* prefetch_buffer, Statistics* stats, - uint64_t tail_start_offset, bool contain_no_data_block, - Logger* const logger) { - assert(tail_start_offset <= file_size); + uint64_t tail_size, Logger* const logger) { + assert(tail_size <= file_size); size_t tail_prefetch_size = 0; - if (tail_start_offset != 0) { - tail_prefetch_size = file_size - tail_start_offset; - } else if (contain_no_data_block) { - tail_prefetch_size = file_size - tail_start_offset; - ROCKS_LOG_WARN(logger, - "Tail prefetch size %zu is calculated based on tail start " - "offset being 0 and the " - "file does not contain any data blocks", - tail_prefetch_size); + if (tail_size != 0) { + tail_prefetch_size = tail_size; } else { if (tail_prefetch_stats != nullptr) { // Multiple threads may get a 0 (no history) when running in parallel, diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 71c8a8fd3ec0..0f14e05dd143 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -99,8 +99,7 @@ class BlockBasedTable : public TableReader { const InternalKeyComparator& internal_key_comparator, std::unique_ptr&& file, uint64_t file_size, uint8_t block_protection_bytes_per_key, - std::unique_ptr* table_reader, uint64_t tail_start_offset, - bool contain_no_data_block, + std::unique_ptr* table_reader, uint64_t tail_size, std::shared_ptr table_reader_cache_res_mgr = nullptr, const std::shared_ptr& prefix_extractor = nullptr, @@ -461,8 +460,7 @@ class BlockBasedTable : public TableReader { bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats, const bool prefetch_all, const bool preload_all, std::unique_ptr* prefetch_buffer, Statistics* stats, - uint64_t tail_start_offset, bool contain_no_data_block, - Logger* const logger); + uint64_t tail_size, Logger* const logger); Status ReadMetaIndexBlock(const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer, std::unique_ptr* metaindex_block, diff --git a/table/block_fetcher_test.cc b/table/block_fetcher_test.cc index 9d09b2ae5404..c2f6552cc4b8 100644 --- a/table/block_fetcher_test.cc +++ b/table/block_fetcher_test.cc @@ -266,10 +266,10 @@ class BlockFetcherTest : public testing::Test { const auto* table_options = table_factory_.GetOptions(); ASSERT_NE(table_options, nullptr); - ASSERT_OK(BlockBasedTable::Open( - ro, ioptions, EnvOptions(), *table_options, comparator, std::move(file), - file_size, 0 /* block_protection_bytes_per_key */, &table_reader, - 0 /* tail_start_offset */, false /* contain_no_data_block */)); + ASSERT_OK(BlockBasedTable::Open(ro, ioptions, EnvOptions(), *table_options, + comparator, std::move(file), file_size, + 0 /* block_protection_bytes_per_key */, + &table_reader, 0 /* tail_size */)); table->reset(reinterpret_cast(table_reader.release())); } diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index 6fea536d624b..69a1357c06ca 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -115,7 +115,7 @@ void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) { Add(TablePropertiesNames::kFastCompressionEstimatedDataSize, props.fast_compression_estimated_data_size); } - Add(TablePropertiesNames::kTailStartOffset, props.tail_start_offset); + Add(TablePropertiesNames::kTailSize, props.tail_size); if (!props.db_id.empty()) { Add(TablePropertiesNames::kDbId, props.db_id); } @@ -308,8 +308,7 @@ Status ReadTablePropertiesHelper( &new_table_properties->slow_compression_estimated_data_size}, {TablePropertiesNames::kFastCompressionEstimatedDataSize, &new_table_properties->fast_compression_estimated_data_size}, - {TablePropertiesNames::kTailStartOffset, - &new_table_properties->tail_start_offset}, + {TablePropertiesNames::kTailSize, &new_table_properties->tail_size}, }; std::string last_key; diff --git a/table/table_builder.h b/table/table_builder.h index d8a62df7b1a1..6d98bce7078a 100644 --- a/table/table_builder.h +++ b/table/table_builder.h @@ -43,7 +43,7 @@ struct TableReaderOptions { size_t _max_file_size_for_l0_meta_pin = 0, const std::string& _cur_db_session_id = "", uint64_t _cur_file_num = 0, UniqueId64x2 _unique_id = {}, SequenceNumber _largest_seqno = 0, - uint64_t _tail_start_offset = 0, bool _contain_no_data_block = false) + uint64_t _tail_size = 0) : ioptions(_ioptions), prefix_extractor(_prefix_extractor), env_options(_env_options), @@ -59,8 +59,7 @@ struct TableReaderOptions { cur_file_num(_cur_file_num), unique_id(_unique_id), block_protection_bytes_per_key(_block_protection_bytes_per_key), - tail_start_offset(_tail_start_offset), - contain_no_data_block(_contain_no_data_block) {} + tail_size(_tail_size) {} const ImmutableOptions& ioptions; const std::shared_ptr& prefix_extractor; @@ -93,9 +92,7 @@ struct TableReaderOptions { uint8_t block_protection_bytes_per_key; - uint64_t tail_start_offset; - - bool contain_no_data_block; + uint64_t tail_size; }; struct TableBuilderOptions { @@ -207,7 +204,7 @@ class TableBuilder { // is enabled. virtual uint64_t EstimatedFileSize() const { return FileSize(); } - virtual uint64_t GetTailStartOffset() const { return 0; } + virtual uint64_t GetTailSize() const { return 0; } // If the user defined table properties collector suggest the file to // be further compacted. diff --git a/table/table_properties.cc b/table/table_properties.cc index 819d4da2ad8f..15d747bef8bf 100644 --- a/table/table_properties.cc +++ b/table/table_properties.cc @@ -303,8 +303,7 @@ const std::string TablePropertiesNames::kFastCompressionEstimatedDataSize = "rocksdb.sample_for_compression.fast.data.size"; const std::string TablePropertiesNames::kSequenceNumberTimeMapping = "rocksdb.seqno.time.map"; -const std::string TablePropertiesNames::kTailStartOffset = - "rocksdb.tail.start.offset"; +const std::string TablePropertiesNames::kTailSize = "rocksdb.tail.size"; #ifndef NDEBUG // WARNING: TEST_SetRandomTableProperties assumes the following layout of