Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Much better stats for seeks and prefix filtering #11460

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
* Introduced a new option `CompactionOptionsFIFO::file_temperature_age_thresholds` that allows FIFO compaction to compact files to different temperatures based on key age (#11428).
* Added a new ticker stat to count how many times RocksDB detected a corruption while verifying a block checksum: `BLOCK_CHECKSUM_MISMATCH_COUNT`.
* New statistics `rocksdb.file.read.db.open.micros` that measures read time of block-based SST tables or blob files during db open.
* New statistics tickers for various iterator seek behaviors and relevant filtering, as \*`_LEVEL_SEEK_`\*. (#11460)

### Public API Changes
* EXPERIMENTAL: Add new API `DB::ClipColumnFamily` to clip the key in CF to a certain range. It will physically deletes all keys outside the range including tombstones.
* Add `MakeSharedCache()` construction functions to various cache Options objects, and deprecated the `NewWhateverCache()` functions with long parameter lists.
* Changed the meaning of various Bloom filter stats (prefix vs. whole key), with iterator-related filtering only being tracked in the new \*`_LEVEL_SEEK_`\*. stats. (#11460)

### Behavior changes
* For x86, CPU features are no longer detected at runtime nor in build scripts, but in source code using common preprocessor defines. This will likely unlock some small performance improvements on some newer hardware, but could hurt performance of the kCRC32c checksum, which is no longer the default, on some "portable" builds. See PR #11419 for details.
Expand Down
465 changes: 249 additions & 216 deletions db/db_bloom_filter_test.cc

Large diffs are not rendered by default.

88 changes: 55 additions & 33 deletions db/db_test2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5992,17 +5992,15 @@ TEST_F(DBTest2, ChangePrefixExtractor) {
iterator->Seek("xa");
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("xb", iterator->key().ToString());
// It's a bug that the counter BLOOM_FILTER_PREFIX_CHECKED is not
// correct in this case. So don't check counters in this case.
if (expect_filter_check) {
ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
EXPECT_EQ(0, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
}

iterator->Seek("xz");
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("xz1", iterator->key().ToString());
if (expect_filter_check) {
ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
EXPECT_EQ(0, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
}
}

Expand All @@ -6020,7 +6018,7 @@ TEST_F(DBTest2, ChangePrefixExtractor) {
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("xb", iterator->key().ToString());
if (expect_filter_check) {
ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
EXPECT_EQ(0, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
}
}

Expand All @@ -6034,14 +6032,14 @@ TEST_F(DBTest2, ChangePrefixExtractor) {
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("xb", iterator->key().ToString());
if (expect_filter_check) {
ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
EXPECT_EQ(0, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
}

iterator->Seek("xx0");
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("xx1", iterator->key().ToString());
if (expect_filter_check) {
ASSERT_EQ(1, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
}
}

Expand All @@ -6059,21 +6057,21 @@ TEST_F(DBTest2, ChangePrefixExtractor) {
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("xb", iterator->key().ToString());
if (expect_filter_check) {
ASSERT_EQ(2, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
}

iterator->Seek("xg");
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("xx1", iterator->key().ToString());
if (expect_filter_check) {
ASSERT_EQ(3, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
}

iterator->Seek("xz");
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("xz1", iterator->key().ToString());
if (expect_filter_check) {
ASSERT_EQ(4, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
}

ASSERT_OK(iterator->status());
Expand All @@ -6085,14 +6083,14 @@ TEST_F(DBTest2, ChangePrefixExtractor) {
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("xb", iterator->key().ToString());
if (expect_filter_check) {
ASSERT_EQ(5, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
}

iterator->Seek("xx0");
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("xx1", iterator->key().ToString());
if (expect_filter_check) {
ASSERT_EQ(6, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
}

ASSERT_OK(iterator->status());
Expand All @@ -6106,7 +6104,7 @@ TEST_F(DBTest2, ChangePrefixExtractor) {
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("xb", iterator->key().ToString());
if (expect_filter_check) {
ASSERT_EQ(7, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
}
ASSERT_OK(iterator->status());
}
Expand Down Expand Up @@ -6180,13 +6178,19 @@ TEST_F(DBTest2, AutoPrefixMode1) {
ro.total_order_seek = false;
ro.auto_prefix_mode = true;

const auto stat = BLOOM_FILTER_PREFIX_CHECKED;
const auto hit_stat = options.num_levels == 1
? LAST_LEVEL_SEEK_FILTER_MATCH
: NON_LAST_LEVEL_SEEK_FILTER_MATCH;
const auto miss_stat = options.num_levels == 1
? LAST_LEVEL_SEEK_FILTERED
: NON_LAST_LEVEL_SEEK_FILTERED;
{
std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
iterator->Seek("b1");
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("x1", iterator->key().ToString());
EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
ASSERT_OK(iterator->status());
}

Expand All @@ -6198,7 +6202,8 @@ TEST_F(DBTest2, AutoPrefixMode1) {
std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
iterator->Seek("b1");
ASSERT_FALSE(iterator->Valid());
EXPECT_EQ(1, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
ASSERT_OK(iterator->status());
}

Expand All @@ -6208,7 +6213,8 @@ TEST_F(DBTest2, AutoPrefixMode1) {
iterator->Seek("b1");
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("x1", iterator->key().ToString());
EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
ASSERT_OK(iterator->status());
}

Expand All @@ -6217,7 +6223,8 @@ TEST_F(DBTest2, AutoPrefixMode1) {
std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
iterator->Seek("b1");
ASSERT_FALSE(iterator->Valid());
EXPECT_EQ(1, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
ASSERT_OK(iterator->status());
}

Expand All @@ -6226,7 +6233,8 @@ TEST_F(DBTest2, AutoPrefixMode1) {
std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
iterator->Seek("b1");
ASSERT_FALSE(iterator->Valid());
EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
ASSERT_OK(iterator->status());
}

Expand All @@ -6237,25 +6245,29 @@ TEST_F(DBTest2, AutoPrefixMode1) {
ub = "b9";
iterator->Seek("b1");
ASSERT_FALSE(iterator->Valid());
EXPECT_EQ(1, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
ASSERT_OK(iterator->status());

ub = "z";
iterator->Seek("b1");
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("x1", iterator->key().ToString());
EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));

ub = "c";
iterator->Seek("b1");
ASSERT_FALSE(iterator->Valid());
EXPECT_EQ(1, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));

ub = "b9";
iterator->SeekForPrev("b1");
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("a1", iterator->key().ToString());
EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));

ub = "zz";
iterator->SeekToLast();
Expand Down Expand Up @@ -6287,50 +6299,57 @@ TEST_F(DBTest2, AutoPrefixMode1) {
ub = "b1";
iterator->Seek("b9");
ASSERT_FALSE(iterator->Valid());
EXPECT_EQ(1, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
ASSERT_OK(iterator->status());

ub = "b1";
iterator->Seek("z");
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("y1", iterator->key().ToString());
EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));

ub = "b1";
iterator->Seek("c");
ASSERT_FALSE(iterator->Valid());
EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));

ub = "b";
iterator->Seek("c9");
ASSERT_FALSE(iterator->Valid());
// Fails if ReverseBytewiseComparator::IsSameLengthImmediateSuccessor
// is "correctly" implemented.
EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));

ub = "a";
iterator->Seek("b9");
// Fails if ReverseBytewiseComparator::IsSameLengthImmediateSuccessor
// is "correctly" implemented.
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("a1", iterator->key().ToString());
EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));

ub = "b";
iterator->Seek("a");
ASSERT_FALSE(iterator->Valid());
// Fails if ReverseBytewiseComparator::IsSameLengthImmediateSuccessor
// matches BytewiseComparator::IsSameLengthImmediateSuccessor. Upper
// comparing before seek key prevents a real bug from surfacing.
EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));

ub = "b1";
iterator->SeekForPrev("b9");
ASSERT_TRUE(iterator->Valid());
// Fails if ReverseBytewiseComparator::IsSameLengthImmediateSuccessor
// is "correctly" implemented.
ASSERT_EQ("x1", iterator->key().ToString());
EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));

ub = "a";
iterator->SeekToLast();
Expand Down Expand Up @@ -6372,7 +6391,8 @@ TEST_F(DBTest2, AutoPrefixMode1) {
std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
iterator->Seek(Slice(a_end_stuff, 2));
ASSERT_FALSE(iterator->Valid());
EXPECT_EQ(1, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
ASSERT_OK(iterator->status());

// test, cannot be validly optimized with auto_prefix_mode
Expand All @@ -6382,7 +6402,8 @@ TEST_F(DBTest2, AutoPrefixMode1) {
iterator->Seek(Slice(a_end_stuff, 2));
// !!! BUG !!! See "BUG" section of auto_prefix_mode.
ASSERT_FALSE(iterator->Valid());
EXPECT_EQ(1, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
ASSERT_OK(iterator->status());

// To prove that is the wrong result, now use total order seek
Expand All @@ -6393,7 +6414,8 @@ TEST_F(DBTest2, AutoPrefixMode1) {
iterator->Seek(Slice(a_end_stuff, 2));
ASSERT_TRUE(iterator->Valid());
ASSERT_EQ("b", iterator->key().ToString());
EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
ASSERT_OK(iterator->status());
}
} while (ChangeOptions(kSkipPlainTable));
Expand Down
4 changes: 4 additions & 0 deletions db/db_test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -1318,6 +1318,10 @@ class DBTestBase : public testing::Test {
Tickers ticker_type) {
return options.statistics->getAndResetTickerCount(ticker_type);
}
// Short name for TestGetAndResetTickerCount
uint64_t PopTicker(const Options& options, Tickers ticker_type) {
return options.statistics->getAndResetTickerCount(ticker_type);
}

// Note: reverting this setting within the same test run is not yet
// supported
Expand Down
6 changes: 4 additions & 2 deletions include/rocksdb/perf_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ namespace ROCKSDB_NAMESPACE {
// Break down performance counters by level and store per-level perf context in
// PerfContextByLevel
struct PerfContextByLevelBase {
// These Bloom stats apply to point reads (Get/MultiGet) for whole key and
// prefix filters.
// # of times bloom filter has avoided file reads, i.e., negatives.
uint64_t bloom_filter_useful = 0;
// # of times bloom FullFilter has not avoided the reads.
Expand Down Expand Up @@ -217,9 +219,9 @@ struct PerfContextBase {
uint64_t bloom_memtable_hit_count;
// total number of mem table bloom misses
uint64_t bloom_memtable_miss_count;
// total number of SST table bloom hits
// total number of SST bloom hits
uint64_t bloom_sst_hit_count;
// total number of SST table bloom misses
// total number of SST bloom misses
uint64_t bloom_sst_miss_count;

// Time spent waiting on key locks in transaction lock manager.
Expand Down
39 changes: 35 additions & 4 deletions include/rocksdb/statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,11 +157,16 @@ enum Tickers : uint32_t {

NUMBER_MERGE_FAILURES,

// number of times bloom was checked before creating iterator on a
// file, and the number of times the check was useful in avoiding
// iterator creation (and thus likely IOPs).
// Prefix filter stats when used for point lookups (Get / MultiGet).
// (For prefix filter stats on iterators, see *_LEVEL_SEEK_*.)
// Checked: filter was queried
BLOOM_FILTER_PREFIX_CHECKED,
// Useful: filter returned false so prevented accessing data+index blocks
BLOOM_FILTER_PREFIX_USEFUL,
// True positive: found a key matching the point query. When another key
// with the same prefix matches, it is considered a false positive by
// these statistics even though the filter returned a true positive.
BLOOM_FILTER_PREFIX_TRUE_POSITIVE,

// Number of times we had to reseek inside an iteration to skip
// over large number of keys with same userkey.
Expand Down Expand Up @@ -394,6 +399,32 @@ enum Tickers : uint32_t {
NON_LAST_LEVEL_READ_BYTES,
NON_LAST_LEVEL_READ_COUNT,

// Statistics on iterator Seek() (and variants) for each sorted run. I.e. a
// single user Seek() can result in many sorted run Seek()s.
// The stats are split between last level and non-last level.
// Filtered: a filter such as prefix Bloom filter indicate the Seek() would
// not find anything relevant, so avoided a likely access to data+index
// blocks.
LAST_LEVEL_SEEK_FILTERED,
// Filter match: a filter such as prefix Bloom filter was queried but did
// not filter out the seek.
LAST_LEVEL_SEEK_FILTER_MATCH,
// At least one data block was accessed for a Seek() (or variant) on a
// sorted run.
LAST_LEVEL_SEEK_DATA,
// At least one value() was accessed for the seek (suggesting it was useful),
// and no filter such as prefix Bloom was queried.
LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER,
// At least one value() was accessed for the seek (suggesting it was useful),
// after querying a filter such as prefix Bloom.
LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH,
// The same set of stats, but for non-last level seeks.
NON_LAST_LEVEL_SEEK_FILTERED,
NON_LAST_LEVEL_SEEK_FILTER_MATCH,
NON_LAST_LEVEL_SEEK_DATA,
NON_LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER,
NON_LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH,

// Number of block checksum verifications
BLOCK_CHECKSUM_COMPUTE_COUNT,
// Number of times RocksDB detected a corruption while verifying a block
Expand Down Expand Up @@ -666,7 +697,7 @@ class Statistics : public Customizable {
virtual void histogramData(uint32_t type,
HistogramData* const data) const = 0;
virtual std::string getHistogramString(uint32_t /*type*/) const { return ""; }
virtual void recordTick(uint32_t tickerType, uint64_t count = 0) = 0;
virtual void recordTick(uint32_t tickerType, uint64_t count = 1) = 0;
virtual void setTickerCount(uint32_t tickerType, uint64_t count) = 0;
virtual uint64_t getAndResetTickerCount(uint32_t tickerType) = 0;
virtual void reportTimeToHistogram(uint32_t histogramType, uint64_t time) {
Expand Down
Loading