From 6e35aae63cf33a8e5bbe43f293e410e1999f3610 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Thu, 18 May 2023 14:33:34 -0700 Subject: [PATCH 1/3] [WIP] Much better stats for seeks and prefix filtering Summary: We want to know more about opportunities for better range filters, and the effectiveness of our own range filters. Currently the stats are very limited, essentially logging just hits and misses against prefix filters for range scans in BLOOM_FILTER_PREFIX_* without tracking the false positive rate. Perhaps confusingly, when prefix filters are used for point queries, the stats are currently going into the non-PREFIX tickers. This change does several things: * Introduce new stat tickers for seeks, *LEVEL_SEEK* * Most importantly, allows us to see opportunities for range filtering. Specifically, we can count how many times a seek in an SST file accesses at least one data block, and how many times at least one value() is then accessed. If a data block was accessed but no value(), we can generally assume that the key(s) seen was(were) not of interest so could have been filtered with the right kind of filter, avoiding the data block access. * We can get the same level of detail when a filter (for now, prefix Bloom/ribbon) is used, or not. Specifically, we can infer a false positive rate for prefix filters (not available before) from the seek "false positive" rate: when a data block is accessed but no value() is called. (There can be other explanations for a seek false positive, but in typical iterator usage it would indicate a filter false positive.) * The stats are divided between last level and non-last levels, to help understand potential tiered storage use cases. * The old BLOOM_FILTER_PREFIX_* stats have a different meaning: no longer referring to iterators but to point queries using prefix filters. BLOOM_FILTER_PREFIX_TRUE_POSITIVE is added for computing the prefix false positive rate on point queries, which can be due to filter false positives as well as different keys with the same prefix. * Similarly, the non-PREFIX BLOOM_FILTER stats are now for whole key filtering only. Test Plan: unit tests updated (TODO: finish) Performance test shows a consistent small improvement with these changes, both with clang and with gcc. I'm not sure why, but I'll take it. TODO: details --- db/db_bloom_filter_test.cc | 266 +++++++++--------- db/db_test_util.h | 4 + include/rocksdb/statistics.h | 39 ++- monitoring/statistics.cc | 17 ++ .../block_based/block_based_table_iterator.cc | 45 ++- .../block_based/block_based_table_iterator.h | 42 ++- table/block_based/block_based_table_reader.cc | 63 +++-- table/block_based/block_based_table_reader.h | 6 +- .../block_based_table_reader_sync_and_async.h | 10 +- 9 files changed, 308 insertions(+), 184 deletions(-) diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index c4591920d4e..3dd0824f4b1 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -209,57 +209,35 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloomCustomPrefixExtractor) { ASSERT_OK(dbfull()->Flush(fo)); ASSERT_EQ("foo", Get("barbarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ( - 0, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ("foo2", Get("barbarbar2")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ( - 0, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ("NOT_FOUND", Get("barbarbar3")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ( - 0, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("barfoofoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ( - 1, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); - ASSERT_EQ( - 2, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); ro.total_order_seek = true; // NOTE: total_order_seek no longer affects Get() ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ( - 3, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); // No bloom on extractor changed ASSERT_OK(db_->SetOptions({{"prefix_extractor", "capped:10"}})); ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ( - 3, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); // No bloom on extractor changed, after re-open options.prefix_extractor.reset(NewCappedPrefixTransform(10)); Reopen(options); ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ( - 3, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); get_perf_context()->Reset(); } @@ -294,33 +272,27 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) { ASSERT_OK(dbfull()->Flush(fo)); ASSERT_EQ("foo", Get("barbarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ("foo2", Get("barbarbar2")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("barbarbar3")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("barfoofoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); ro.total_order_seek = true; // NOTE: total_order_seek no longer affects Get() ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ( - 3, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); // No bloom on extractor changed ASSERT_OK(db_->SetOptions({{"prefix_extractor", "capped:10"}})); ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ( - 3, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); get_perf_context()->Reset(); } @@ -358,11 +330,11 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { Reopen(options); ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); // Reopen with whole key filtering enabled and prefix extractor // NULL. Bloom filter should be off for both of whole key and @@ -372,13 +344,17 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { options.prefix_extractor.reset(); Reopen(options); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); // Write DB with only full key filtering. ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); // Needs insert some keys to make sure files are not filtered out by key @@ -394,13 +370,17 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { options.table_factory.reset(NewBlockBasedTableFactory(bbto)); Reopen(options); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); // Try to create a DB with mixed files: ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); @@ -426,15 +406,16 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { // Now we have two files: // File 1: An older file with prefix bloom. // File 2: A newer file with whole bloom filter. - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + // IAMHERE + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); + ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); // Reopen with the same setting: only whole key is used Reopen(options); @@ -696,11 +677,8 @@ TEST_P(DBBloomFilterTestWithParam, SkipFilterOnEssentiallyZeroBpk) { PutAndGetFn(); // Verify filter is accessed (and constructed) - EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), - maxKey * 2); - EXPECT_EQ( - TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), - maxKey); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_FULL_POSITIVE), maxKey * 2); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), maxKey); props.clear(); ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props)); EXPECT_NE(props["filter_size"], "0"); @@ -715,11 +693,8 @@ TEST_P(DBBloomFilterTestWithParam, SkipFilterOnEssentiallyZeroBpk) { GetFn(); // Verify filter is accessed - EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), - maxKey * 2); - EXPECT_EQ( - TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), - maxKey); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_FULL_POSITIVE), maxKey * 2); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), maxKey); // But new filters are not generated (configuration details unknown) DestroyAndReopen(options); @@ -864,9 +839,8 @@ TEST_F(DBBloomFilterTest, BloomFilterCompatibility) { ASSERT_EQ("val", Get(prefix + "Z")); // Filter positive // Filter negative, with high probability ASSERT_EQ("NOT_FOUND", Get(prefix + "Q")); - EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), - 2); - EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_FULL_POSITIVE), 2); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 1); } } } @@ -1705,7 +1679,7 @@ TEST_F(DBBloomFilterTest, ContextCustomFilterPolicy) { } // Since we have two tables / two filters, we might have Bloom checks on // our queries, but no more than one "useful" per query on a found key. - EXPECT_LE(TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey); + EXPECT_LE(PopTicker(options, BLOOM_FILTER_USEFUL), maxKey); // Check that we have two filters, each about // fifo: 0.12% FP rate (15 bits per key) @@ -1714,8 +1688,7 @@ TEST_F(DBBloomFilterTest, ContextCustomFilterPolicy) { ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); } { - auto useful_count = - TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL); + auto useful_count = PopTicker(options, BLOOM_FILTER_USEFUL); EXPECT_GE(useful_count, maxKey * 2 * (fifo ? 0.9980 : 0.975)); EXPECT_LE(useful_count, maxKey * 2 * (fifo ? 0.9995 : 0.98)); } @@ -1732,8 +1705,7 @@ TEST_F(DBBloomFilterTest, ContextCustomFilterPolicy) { ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); } { - auto useful_count = - TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL); + auto useful_count = PopTicker(options, BLOOM_FILTER_USEFUL); EXPECT_GE(useful_count, maxKey * 0.90); EXPECT_LE(useful_count, maxKey * 0.91); } @@ -2027,13 +1999,14 @@ TEST_P(DBBloomFilterTestVaryPrefixAndFormatVer, PartitionedMultiGet) { std::array statuses; std::array values; - TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_HIT); - TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_MISS); - TestGetAndResetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL); - TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL); - TestGetAndResetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED); - TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE); - TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE); + PopTicker(options, BLOCK_CACHE_FILTER_HIT); + PopTicker(options, BLOCK_CACHE_FILTER_MISS); + PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL); + PopTicker(options, BLOOM_FILTER_USEFUL); + PopTicker(options, BLOOM_FILTER_PREFIX_CHECKED); + PopTicker(options, BLOOM_FILTER_FULL_POSITIVE); + PopTicker(options, BLOOM_FILTER_FULL_TRUE_POSITIVE); + PopTicker(options, BLOOM_FILTER_PREFIX_TRUE_POSITIVE); // Check that initial clump of keys only loads one partition filter from // block cache. @@ -2063,26 +2036,22 @@ TEST_P(DBBloomFilterTestVaryPrefixAndFormatVer, PartitionedMultiGet) { } // Confirm correct Bloom stats (no FPs) - uint64_t filter_useful = TestGetAndResetTickerCount( - options, - use_prefix_ ? BLOOM_FILTER_PREFIX_USEFUL : BLOOM_FILTER_USEFUL); + uint64_t filter_useful = + PopTicker(options, use_prefix_ ? BLOOM_FILTER_PREFIX_USEFUL + : BLOOM_FILTER_USEFUL); uint64_t filter_checked = - TestGetAndResetTickerCount(options, use_prefix_ - ? BLOOM_FILTER_PREFIX_CHECKED - : BLOOM_FILTER_FULL_POSITIVE) + + PopTicker(options, use_prefix_ ? BLOOM_FILTER_PREFIX_CHECKED + : BLOOM_FILTER_FULL_POSITIVE) + (use_prefix_ ? 0 : filter_useful); EXPECT_EQ(filter_useful, number_not_found); EXPECT_EQ(filter_checked, Q); - if (!use_prefix_) { - EXPECT_EQ( - TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), - Q - number_not_found); - } + EXPECT_EQ(PopTicker(options, use_prefix_ ? BLOOM_FILTER_PREFIX_TRUE_POSITIVE + : BLOOM_FILTER_FULL_TRUE_POSITIVE), + Q - number_not_found); // Confirm no duplicate loading same filter partition - uint64_t filter_accesses = - TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_HIT) + - TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_MISS); + uint64_t filter_accesses = PopTicker(options, BLOCK_CACHE_FILTER_HIT) + + PopTicker(options, BLOCK_CACHE_FILTER_MISS); if (stride == 1) { EXPECT_EQ(filter_accesses, 1); } else { @@ -2118,26 +2087,22 @@ TEST_P(DBBloomFilterTestVaryPrefixAndFormatVer, PartitionedMultiGet) { } // Confirm correct Bloom stats (might see some FPs) - uint64_t filter_useful = TestGetAndResetTickerCount( - options, - use_prefix_ ? BLOOM_FILTER_PREFIX_USEFUL : BLOOM_FILTER_USEFUL); + uint64_t filter_useful = + PopTicker(options, use_prefix_ ? BLOOM_FILTER_PREFIX_USEFUL + : BLOOM_FILTER_USEFUL); uint64_t filter_checked = - TestGetAndResetTickerCount(options, use_prefix_ - ? BLOOM_FILTER_PREFIX_CHECKED - : BLOOM_FILTER_FULL_POSITIVE) + + PopTicker(options, use_prefix_ ? BLOOM_FILTER_PREFIX_CHECKED + : BLOOM_FILTER_FULL_POSITIVE) + (use_prefix_ ? 0 : filter_useful); EXPECT_GE(filter_useful, number_not_found - 2); // possible FP EXPECT_EQ(filter_checked, Q); - if (!use_prefix_) { - EXPECT_EQ( - TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), - Q - number_not_found); - } + EXPECT_EQ(PopTicker(options, use_prefix_ ? BLOOM_FILTER_PREFIX_TRUE_POSITIVE + : BLOOM_FILTER_FULL_TRUE_POSITIVE), + Q - number_not_found); // Confirm no duplicate loading of same filter partition - uint64_t filter_accesses = - TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_HIT) + - TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_MISS); + uint64_t filter_accesses = PopTicker(options, BLOCK_CACHE_FILTER_HIT) + + PopTicker(options, BLOCK_CACHE_FILTER_MISS); if (filter_accesses == 2) { // Spanned across partitions. ++found_spanning; @@ -2636,6 +2601,9 @@ int CountIter(std::unique_ptr& iter, const Slice& key) { int count = 0; for (iter->Seek(key); iter->Valid(); iter->Next()) { count++; + // Access key & value as if we were using them + (void)iter->key(); + (void)iter->value(); } EXPECT_OK(iter->status()); return count; @@ -2675,6 +2643,12 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abcd0000"), 4); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), + 1); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + ASSERT_EQ(TestGetTickerCount( + options, NON_LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH), + 1); } { Slice upper_bound("abcdzzzz"); @@ -2683,8 +2657,9 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abcd0000"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), + 2); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:5"}})); ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), @@ -2698,8 +2673,9 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abcdxx00"), 4); // should check bloom filter since upper bound meets requirement - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), + 3); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); } { // [abcdxx01, abcey) is not valid bound since upper bound is too long for @@ -2711,8 +2687,9 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abcdxx01"), 4); // should skip bloom filter since upper bound is too long - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), + 3); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); } { // [abcdxx02, abcdy) is a valid bound since the prefix is the same @@ -2724,8 +2701,9 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { ASSERT_EQ(CountIter(iter, "abcdxx02"), 4); // should check bloom filter since upper bound matches transformed seek // key - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), + 4); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); } { // [aaaaaaaa, abce) is not a valid bound since 1) they don't share the @@ -2737,8 +2715,9 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "aaaaaaaa"), 0); // should skip bloom filter since mismatch is found - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), + 4); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:3"}})); { @@ -2750,8 +2729,9 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abc"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), + 4); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); } // Same with re-open options.prefix_extractor.reset(NewFixedPrefixTransform(3)); @@ -2763,8 +2743,9 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abc"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), + 4); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); } // Set back to capped:4 and verify BF is always read options.prefix_extractor.reset(NewCappedPrefixTransform(4)); @@ -2776,8 +2757,9 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abc"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 5); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), + 4); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTERED), 1); } // Same if there's a problem initally loading prefix transform SyncPoint::GetInstance()->SetCallBack( @@ -2792,8 +2774,9 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abc"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), + 4); + ASSERT_EQ(TestGetTickerCount(options, NON_LAST_LEVEL_SEEK_FILTERED), 2); } SyncPoint::GetInstance()->DisableProcessing(); } @@ -2828,7 +2811,8 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { dbfull()->Flush(FlushOptions()); std::unique_ptr iter_old(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_old, "foo"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 1); + ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 1); ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), @@ -2836,10 +2820,11 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "foo"), 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2); + ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 1); ASSERT_EQ(CountIter(iter, "gpk"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 0); // second SST with capped:3 BF ASSERT_OK(Put("foo3", "bar3")); @@ -2851,14 +2836,15 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { // BF is cappped:3 now std::unique_ptr iter_tmp(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_tmp, "foo"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 2); ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0); // both counters are incremented because BF is "not changed" for 1 of the // 2 SST files, so filter is checked once and found no match. - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 5); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 1); + ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 0); } + // IAMHERE ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:2"}})); ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), diff --git a/db/db_test_util.h b/db/db_test_util.h index 50bfc78ee44..5b35c9907bf 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -1318,6 +1318,10 @@ class DBTestBase : public testing::Test { Tickers ticker_type) { return options.statistics->getAndResetTickerCount(ticker_type); } + // Short name for TestGetAndResetTickerCount + uint64_t PopTicker(const Options& options, Tickers ticker_type) { + return options.statistics->getAndResetTickerCount(ticker_type); + } // Note: reverting this setting within the same test run is not yet // supported diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index e93e3f16045..cfeaa5a53e0 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -157,11 +157,16 @@ enum Tickers : uint32_t { NUMBER_MERGE_FAILURES, - // number of times bloom was checked before creating iterator on a - // file, and the number of times the check was useful in avoiding - // iterator creation (and thus likely IOPs). + // Prefix filter stats when used for point lookups (Get / MultiGet). + // (For prefix filter stats on iterators, see *_LEVEL_SEEK_*.) + // Checked: filter was queried BLOOM_FILTER_PREFIX_CHECKED, + // Useful: filter returned false so prevented accessing data+index blocks BLOOM_FILTER_PREFIX_USEFUL, + // True positive: found a key matching the point query. When another key + // with the same prefix matches, it is considered a false positive by + // these statistics even though the filter returned a true positive. + BLOOM_FILTER_PREFIX_TRUE_POSITIVE, // Number of times we had to reseek inside an iteration to skip // over large number of keys with same userkey. @@ -394,6 +399,32 @@ enum Tickers : uint32_t { NON_LAST_LEVEL_READ_BYTES, NON_LAST_LEVEL_READ_COUNT, + // Statistics on iterator Seek() (and variants) for each sorted run. I.e. a + // single user Seek() can result in many sorted run Seek()s. + // The stats are split between last level and non-last level. + // Filtered: a filter such as prefix Bloom filter indicate the Seek() would + // not find anything relevant, so avoided a likely access to data+index + // blocks. + LAST_LEVEL_SEEK_FILTERED, + // Filter match: a filter such as prefix Bloom filter was queried but did + // not filter out the seek. + LAST_LEVEL_SEEK_FILTER_MATCH, + // At least one data block was accessed for a Seek() (or variant) on a + // sorted run. + LAST_LEVEL_SEEK_DATA, + // At least one value() was accessed for the seek (suggesting it was useful), + // and no filter such as prefix Bloom was queried. + LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER, + // At least one value() was accessed for the seek (suggesting it was useful), + // after querying a filter such as prefix Bloom. + LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH, + // The same set of stats, but for non-last level seeks. + NON_LAST_LEVEL_SEEK_FILTERED, + NON_LAST_LEVEL_SEEK_FILTER_MATCH, + NON_LAST_LEVEL_SEEK_DATA, + NON_LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER, + NON_LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH, + // Number of block checksum verifications BLOCK_CHECKSUM_COMPUTE_COUNT, // Number of times RocksDB detected a corruption while verifying a block @@ -666,7 +697,7 @@ class Statistics : public Customizable { virtual void histogramData(uint32_t type, HistogramData* const data) const = 0; virtual std::string getHistogramString(uint32_t /*type*/) const { return ""; } - virtual void recordTick(uint32_t tickerType, uint64_t count = 0) = 0; + virtual void recordTick(uint32_t tickerType, uint64_t count = 1) = 0; virtual void setTickerCount(uint32_t tickerType, uint64_t count) = 0; virtual uint64_t getAndResetTickerCount(uint32_t tickerType) = 0; virtual void reportTimeToHistogram(uint32_t histogramType, uint64_t time) { diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc index 65bc575446b..0e9c2250f21 100644 --- a/monitoring/statistics.cc +++ b/monitoring/statistics.cc @@ -83,6 +83,8 @@ const std::vector> TickersNameMap = { {NUMBER_MERGE_FAILURES, "rocksdb.number.merge.failures"}, {BLOOM_FILTER_PREFIX_CHECKED, "rocksdb.bloom.filter.prefix.checked"}, {BLOOM_FILTER_PREFIX_USEFUL, "rocksdb.bloom.filter.prefix.useful"}, + {BLOOM_FILTER_PREFIX_TRUE_POSITIVE, + "rocksdb.bloom.filter.prefix.true.positive"}, {NUMBER_OF_RESEEKS_IN_ITERATION, "rocksdb.number.reseeks.iteration"}, {GET_UPDATES_SINCE_CALLS, "rocksdb.getupdatessince.calls"}, {WAL_FILE_SYNCED, "rocksdb.wal.synced"}, @@ -204,6 +206,21 @@ const std::vector> TickersNameMap = { {LAST_LEVEL_READ_COUNT, "rocksdb.last.level.read.count"}, {NON_LAST_LEVEL_READ_BYTES, "rocksdb.non.last.level.read.bytes"}, {NON_LAST_LEVEL_READ_COUNT, "rocksdb.non.last.level.read.count"}, + {LAST_LEVEL_SEEK_FILTERED, "rocksdb.last.level.seek.filtered"}, + {LAST_LEVEL_SEEK_FILTER_MATCH, "rocksdb.last.level.seek.filter.match"}, + {LAST_LEVEL_SEEK_DATA, "rocksdb.last.level.seek.data"}, + {LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER, + "rocksdb.last.level.seek.data.useful.no.filter"}, + {LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH, + "rocksdb.last.level.seek.data.useful.filter.match"}, + {NON_LAST_LEVEL_SEEK_FILTERED, "rocksdb.non.last.level.seek.filtered"}, + {NON_LAST_LEVEL_SEEK_FILTER_MATCH, + "rocksdb.non.last.level.seek.filter.match"}, + {NON_LAST_LEVEL_SEEK_DATA, "rocksdb.non.last.level.seek.data"}, + {NON_LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER, + "rocksdb.non.last.level.seek.data.useful.no.filter"}, + {NON_LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH, + "rocksdb.non.last.level.seek.data.useful.filter.match"}, {BLOCK_CHECKSUM_COMPUTE_COUNT, "rocksdb.block.checksum.compute.count"}, {BLOCK_CHECKSUM_MISMATCH_COUNT, "rocksdb.block.checksum.mismatch.count"}, {MULTIGET_COROUTINE_COUNT, "rocksdb.multiget.coroutine.count"}, diff --git a/table/block_based/block_based_table_iterator.cc b/table/block_based/block_based_table_iterator.cc index d2605670fc3..57744a5877a 100644 --- a/table/block_based/block_based_table_iterator.cc +++ b/table/block_based/block_based_table_iterator.cc @@ -26,10 +26,22 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target, is_out_of_bound_ = false; is_at_first_key_from_index_ = false; - if (target && !CheckPrefixMayMatch(*target, IterDirection::kForward)) { + seek_stat_state_ = kNone; + bool filter_checked = false; + if (target && + !CheckPrefixMayMatch(*target, IterDirection::kForward, &filter_checked)) { ResetDataIter(); + RecordTick(table_->GetStatistics(), is_last_level_ + ? LAST_LEVEL_SEEK_FILTERED + : NON_LAST_LEVEL_SEEK_FILTERED); return; } + if (filter_checked) { + seek_stat_state_ = kFilterUsed; + RecordTick(table_->GetStatistics(), is_last_level_ + ? LAST_LEVEL_SEEK_FILTER_MATCH + : NON_LAST_LEVEL_SEEK_FILTER_MATCH); + } bool need_seek_index = true; if (block_iter_points_to_real_block_ && block_iter_.Valid()) { @@ -125,12 +137,23 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target, void BlockBasedTableIterator::SeekForPrev(const Slice& target) { is_out_of_bound_ = false; is_at_first_key_from_index_ = false; + seek_stat_state_ = kNone; + bool filter_checked = false; // For now totally disable prefix seek in auto prefix mode because we don't // have logic - if (!CheckPrefixMayMatch(target, IterDirection::kBackward)) { + if (!CheckPrefixMayMatch(target, IterDirection::kBackward, &filter_checked)) { ResetDataIter(); + RecordTick(table_->GetStatistics(), is_last_level_ + ? LAST_LEVEL_SEEK_FILTERED + : NON_LAST_LEVEL_SEEK_FILTERED); return; } + if (filter_checked) { + seek_stat_state_ = kFilterUsed; + RecordTick(table_->GetStatistics(), is_last_level_ + ? LAST_LEVEL_SEEK_FILTER_MATCH + : NON_LAST_LEVEL_SEEK_FILTER_MATCH); + } SavePrevIndexValue(); @@ -185,6 +208,7 @@ void BlockBasedTableIterator::SeekForPrev(const Slice& target) { void BlockBasedTableIterator::SeekToLast() { is_out_of_bound_ = false; is_at_first_key_from_index_ = false; + seek_stat_state_ = kNone; SavePrevIndexValue(); index_iter_->SeekToLast(); if (!index_iter_->Valid()) { @@ -266,6 +290,14 @@ void BlockBasedTableIterator::InitDataBlock() { /*for_compaction=*/is_for_compaction, /*async_read=*/false, s); block_iter_points_to_real_block_ = true; CheckDataBlockWithinUpperBound(); + if (!is_for_compaction && + (seek_stat_state_ & kDataBlockReadSinceLastSeek) == 0) { + RecordTick(table_->GetStatistics(), is_last_level_ + ? LAST_LEVEL_SEEK_DATA + : NON_LAST_LEVEL_SEEK_DATA); + seek_stat_state_ = static_cast( + seek_stat_state_ | kDataBlockReadSinceLastSeek | kReportOnUseful); + } } } @@ -320,6 +352,15 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) { } block_iter_points_to_real_block_ = true; CheckDataBlockWithinUpperBound(); + + if (!is_for_compaction && + (seek_stat_state_ & kDataBlockReadSinceLastSeek) == 0) { + RecordTick(table_->GetStatistics(), is_last_level_ + ? LAST_LEVEL_SEEK_DATA + : NON_LAST_LEVEL_SEEK_DATA); + seek_stat_state_ = static_cast( + seek_stat_state_ | kDataBlockReadSinceLastSeek | kReportOnUseful); + } async_read_in_progress_ = false; } diff --git a/table/block_based/block_based_table_iterator.h b/table/block_based/block_based_table_iterator.h index a2918b24866..6ea53f3315c 100644 --- a/table/block_based/block_based_table_iterator.h +++ b/table/block_based/block_based_table_iterator.h @@ -41,7 +41,8 @@ class BlockBasedTableIterator : public InternalIteratorBase { block_iter_points_to_real_block_(false), check_filter_(check_filter), need_upper_bound_check_(need_upper_bound_check), - async_read_in_progress_(false) {} + async_read_in_progress_(false), + is_last_level_(table->IsLastLevel()) {} ~BlockBasedTableIterator() {} @@ -88,6 +89,18 @@ class BlockBasedTableIterator : public InternalIteratorBase { assert(!is_at_first_key_from_index_); assert(Valid()); + if (seek_stat_state_ & kReportOnUseful) { + bool filter_used = (seek_stat_state_ & kFilterUsed) != 0; + RecordTick( + table_->GetStatistics(), + filter_used + ? (is_last_level_ ? LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH + : NON_LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH) + : (is_last_level_ ? LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER + : NON_LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER)); + seek_stat_state_ = kDataBlockReadSinceLastSeek; + } + return block_iter_.value(); } Status status() const override { @@ -204,12 +217,24 @@ class BlockBasedTableIterator : public InternalIteratorBase { // bound. // If the boundary key hasn't been checked against the upper bound, // kUnknown can be used. - enum class BlockUpperBound { + enum class BlockUpperBound : uint8_t { kUpperBoundInCurBlock, kUpperBoundBeyondCurBlock, kUnknown, }; + // State bits for collecting stats on seeks and whether they returned useful + // results. + enum SeekStatState : uint8_t { + kNone = 0, + // Most recent seek checked prefix filter (or similar future feature) + kFilterUsed = 1 << 0, + // Already recorded that a data block was accessed since the last seek. + kDataBlockReadSinceLastSeek = 1 << 1, + // Have not yet recorded that a value() was accessed. + kReportOnUseful = 1 << 2, + }; + const BlockBasedTable* table_; const ReadOptions& read_options_; const InternalKeyComparator& icomp_; @@ -240,6 +265,9 @@ class BlockBasedTableIterator : public InternalIteratorBase { bool async_read_in_progress_; + mutable SeekStatState seek_stat_state_ = SeekStatState::kNone; + bool is_last_level_; + // If `target` is null, seek to first. void SeekImpl(const Slice* target, bool async_prefetch); @@ -257,16 +285,18 @@ class BlockBasedTableIterator : public InternalIteratorBase { // we need to check and update data_block_within_upper_bound_ accordingly. void CheckDataBlockWithinUpperBound(); - bool CheckPrefixMayMatch(const Slice& ikey, IterDirection direction) { + bool CheckPrefixMayMatch(const Slice& ikey, IterDirection direction, + bool* filter_checked) { if (need_upper_bound_check_ && direction == IterDirection::kBackward) { // Upper bound check isn't sufficient for backward direction to // guarantee the same result as total order, so disable prefix // check. return true; } - if (check_filter_ && !table_->PrefixRangeMayMatch( - ikey, read_options_, prefix_extractor_, - need_upper_bound_check_, &lookup_context_)) { + if (check_filter_ && + !table_->PrefixRangeMayMatch(ikey, read_options_, prefix_extractor_, + need_upper_bound_check_, &lookup_context_, + filter_checked)) { // TODO remember the iterator is invalidated because of prefix // match. This can avoid the upper level file iterator to falsely // believe the position is the end of the SST file and move to diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index a5e8e701467..49d9a29a365 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -1816,8 +1816,8 @@ BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator( bool BlockBasedTable::PrefixRangeMayMatch( const Slice& internal_key, const ReadOptions& read_options, const SliceTransform* options_prefix_extractor, - const bool need_upper_bound_check, - BlockCacheLookupContext* lookup_context) const { + const bool need_upper_bound_check, BlockCacheLookupContext* lookup_context, + bool* filter_checked) const { if (!rep_->filter_policy) { return true; } @@ -1842,7 +1842,7 @@ bool BlockBasedTable::PrefixRangeMayMatch( bool may_match = true; FilterBlockReader* const filter = rep_->filter.get(); - bool filter_checked = false; + *filter_checked = false; if (filter != nullptr) { const bool no_io = read_options.read_tier == kBlockCacheTier; @@ -1850,18 +1850,10 @@ bool BlockBasedTable::PrefixRangeMayMatch( may_match = filter->RangeMayExist( read_options.iterate_upper_bound, user_key_without_ts, prefix_extractor, rep_->internal_comparator.user_comparator(), const_ikey_ptr, - &filter_checked, need_upper_bound_check, no_io, lookup_context, + filter_checked, need_upper_bound_check, no_io, lookup_context, read_options); } - if (filter_checked) { - Statistics* statistics = rep_->ioptions.stats; - RecordTick(statistics, BLOOM_FILTER_PREFIX_CHECKED); - if (!may_match) { - RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL); - } - } - return may_match; } @@ -1877,6 +1869,13 @@ bool BlockBasedTable::PrefixExtractorChanged( } } +Statistics* BlockBasedTable::GetStatistics() const { + return rep_->ioptions.stats; +} +bool BlockBasedTable::IsLastLevel() const { + return rep_->level == rep_->ioptions.num_levels - 1; +} + InternalIterator* BlockBasedTable::NewIterator( const ReadOptions& read_options, const SliceTransform* prefix_extractor, Arena* arena, bool skip_filters, TableReaderCaller caller, @@ -1937,18 +1936,24 @@ bool BlockBasedTable::FullFilterKeyMayMatch( if (rep_->whole_key_filtering) { may_match = filter->KeyMayMatch(user_key_without_ts, no_io, const_ikey_ptr, get_context, lookup_context, read_options); + if (may_match) { + RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_POSITIVE); + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, rep_->level); + } else { + RecordTick(rep_->ioptions.stats, BLOOM_FILTER_USEFUL); + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level); + } } else if (!PrefixExtractorChanged(prefix_extractor) && - prefix_extractor->InDomain(user_key_without_ts) && - !filter->PrefixMayMatch( - prefix_extractor->Transform(user_key_without_ts), no_io, - const_ikey_ptr, get_context, lookup_context, read_options)) { + prefix_extractor->InDomain(user_key_without_ts)) { // FIXME ^^^: there should be no reason for Get() to depend on current // prefix_extractor at all. It should always use table_prefix_extractor. - may_match = false; - } - if (may_match) { - RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_POSITIVE); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, rep_->level); + may_match = filter->PrefixMayMatch( + prefix_extractor->Transform(user_key_without_ts), no_io, const_ikey_ptr, + get_context, lookup_context, read_options); + RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED); + if (!may_match) { + RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_USEFUL); + } } return may_match; } @@ -2100,10 +2105,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, FullFilterKeyMayMatch(filter, key, no_io, prefix_extractor, get_context, &lookup_context, read_options); TEST_SYNC_POINT("BlockBasedTable::Get:AfterFilterMatch"); - if (!may_match) { - RecordTick(rep_->ioptions.stats, BLOOM_FILTER_USEFUL); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level); - } else { + if (may_match) { IndexBlockIter iiter_on_stack; // if prefix_extractor found in block differs from options, disable // BlockPrefixIndex. Only do this check when index_type is kHashSearch. @@ -2218,10 +2220,15 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, } } if (matched && filter != nullptr) { - RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, - rep_->level); + if (rep_->whole_key_filtering) { + RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE); + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, + rep_->level); + } else { + RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_TRUE_POSITIVE); + } } + if (s.ok() && !iiter->status().IsNotFound()) { s = iiter->status(); } diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 0f14e05dd14..8cdda064251 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -117,7 +117,8 @@ class BlockBasedTable : public TableReader { const ReadOptions& read_options, const SliceTransform* options_prefix_extractor, const bool need_upper_bound_check, - BlockCacheLookupContext* lookup_context) const; + BlockCacheLookupContext* lookup_context, + bool* filter_checked) const; // Returns a new iterator over the table contents. // The result of NewIterator() is initially invalid (caller must @@ -248,6 +249,9 @@ class BlockBasedTable : public TableReader { bool redundant, Statistics* const statistics); + Statistics* GetStatistics() const; + bool IsLastLevel() const; + // Get the size to read from storage for a BlockHandle. size_t because we // are about to load into memory. static inline size_t BlockSizeWithTrailer(const BlockHandle& handle) { diff --git a/table/block_based/block_based_table_reader_sync_and_async.h b/table/block_based/block_based_table_reader_sync_and_async.h index e033b688b3f..857ae854d41 100644 --- a/table/block_based/block_based_table_reader_sync_and_async.h +++ b/table/block_based/block_based_table_reader_sync_and_async.h @@ -732,9 +732,13 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet) } while (iiter->Valid()); if (matched && filter != nullptr) { - RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, - rep_->level); + if (rep_->whole_key_filtering) { + RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE); + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, + rep_->level); + } else { + RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_TRUE_POSITIVE); + } } if (s.ok() && !iiter->status().IsNotFound()) { s = iiter->status(); From 4a913b17344a48bf1c472d7b3253dba394724021 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 19 May 2023 09:50:52 -0700 Subject: [PATCH 2/3] Fix unit tests, restore good PerfContext stats --- db/db_bloom_filter_test.cc | 301 ++++++++++-------- db/db_test2.cc | 88 +++-- include/rocksdb/perf_context.h | 6 +- table/block_based/block_based_table_reader.cc | 20 +- .../block_based_table_reader_sync_and_async.h | 5 +- util/slice_transform_test.cc | 19 +- 6 files changed, 265 insertions(+), 174 deletions(-) diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index 3dd0824f4b1..79211860979 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -43,6 +43,16 @@ const std::string kStandard128Ribbon = test::Standard128RibbonFilterPolicy::kClassName(); const std::string kAutoBloom = BloomFilterPolicy::kClassName(); const std::string kAutoRibbon = RibbonFilterPolicy::kClassName(); + +template +T Pop(T& var) { + auto rv = var; + var = 0; + return rv; +} +PerfContextByLevel& GetLevelPerfContext(uint32_t level) { + return (*(get_perf_context()->level_to_perf_context))[level]; +} } // anonymous namespace // DB tests related to bloom filter. @@ -209,35 +219,43 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloomCustomPrefixExtractor) { ASSERT_OK(dbfull()->Flush(fo)); ASSERT_EQ("foo", Get("barbarbar")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(Pop(GetLevelPerfContext(0).bloom_filter_useful), 0); ASSERT_EQ("foo2", Get("barbarbar2")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(Pop(GetLevelPerfContext(0).bloom_filter_useful), 0); ASSERT_EQ("NOT_FOUND", Get("barbarbar3")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(Pop(GetLevelPerfContext(0).bloom_filter_useful), 0); ASSERT_EQ("NOT_FOUND", Get("barfoofoo")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + EXPECT_EQ(Pop(GetLevelPerfContext(0).bloom_filter_useful), 1); ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + EXPECT_EQ(Pop(GetLevelPerfContext(0).bloom_filter_useful), 1); ro.total_order_seek = true; // NOTE: total_order_seek no longer affects Get() ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + EXPECT_EQ(Pop(GetLevelPerfContext(0).bloom_filter_useful), 1); // No bloom on extractor changed ASSERT_OK(db_->SetOptions({{"prefix_extractor", "capped:10"}})); ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(Pop(GetLevelPerfContext(0).bloom_filter_useful), 0); // No bloom on extractor changed, after re-open options.prefix_extractor.reset(NewCappedPrefixTransform(10)); Reopen(options); ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(Pop(GetLevelPerfContext(0).bloom_filter_useful), 0); get_perf_context()->Reset(); } @@ -272,27 +290,32 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) { ASSERT_OK(dbfull()->Flush(fo)); ASSERT_EQ("foo", Get("barbarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ("foo2", Get("barbarbar2")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("barbarbar3")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(Pop(GetLevelPerfContext(0).bloom_filter_useful), 0); ASSERT_EQ("NOT_FOUND", Get("barfoofoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + EXPECT_EQ(Pop(GetLevelPerfContext(0).bloom_filter_useful), 1); ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + EXPECT_EQ(Pop(GetLevelPerfContext(0).bloom_filter_useful), 1); ro.total_order_seek = true; // NOTE: total_order_seek no longer affects Get() ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + EXPECT_EQ(Pop(GetLevelPerfContext(0).bloom_filter_useful), 1); // No bloom on extractor changed ASSERT_OK(db_->SetOptions({{"prefix_extractor", "capped:10"}})); ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(Pop(GetLevelPerfContext(0).bloom_filter_useful), 0); get_perf_context()->Reset(); } @@ -329,12 +352,17 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { ASSERT_OK(dbfull()->Flush(fo)); Reopen(options); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); // Reopen with whole key filtering enabled and prefix extractor // NULL. Bloom filter should be off for both of whole key and @@ -344,17 +372,17 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { options.prefix_extractor.reset(); Reopen(options); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); // Write DB with only full key filtering. ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); // Needs insert some keys to make sure files are not filtered out by key @@ -370,17 +398,17 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { options.table_factory.reset(NewBlockBasedTableFactory(bbto)); Reopen(options); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); // Try to create a DB with mixed files: ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); @@ -404,62 +432,81 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { ASSERT_OK(Flush()); // Now we have two files: - // File 1: An older file with prefix bloom. + // File 1: An older file with prefix bloom (disabled) // File 2: A newer file with whole bloom filter. - // IAMHERE - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 1); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 1); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 1); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); // Reopen with the same setting: only whole key is used Reopen(options); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 5); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 6); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); // Restart with both filters are allowed options.prefix_extractor.reset(NewFixedPrefixTransform(3)); bbto.whole_key_filtering = true; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); Reopen(options); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); // File 1 will has it filtered out. // File 2 will not, as prefix `foo` exists in the file. ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 8); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 10); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); // Restart with only prefix bloom is allowed. options.prefix_extractor.reset(NewFixedPrefixTransform(3)); bbto.whole_key_filtering = false; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); Reopen(options); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, BLOOM_FILTER_USEFUL), 0); uint64_t bloom_filter_useful_all_levels = 0; for (auto& kv : (*(get_perf_context()->level_to_perf_context))) { if (kv.second.bloom_filter_useful > 0) { @@ -758,9 +805,7 @@ TEST_F(DBBloomFilterTest, BloomFilterRate) { ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); } ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey * 0.98); - ASSERT_GE( - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful, - maxKey * 0.98); + ASSERT_GE(GetLevelPerfContext(0).bloom_filter_useful, maxKey * 0.98); get_perf_context()->Reset(); } } @@ -2811,8 +2856,8 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { dbfull()->Flush(FlushOptions()); std::unique_ptr iter_old(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_old, "foo"), 4); - ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); - ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 1); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 1); ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), @@ -2820,11 +2865,11 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "foo"), 2); - ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); - ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 1); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 1); ASSERT_EQ(CountIter(iter, "gpk"), 0); - ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); - ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 0); // second SST with capped:3 BF ASSERT_OK(Put("foo3", "bar3")); @@ -2836,15 +2881,14 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { // BF is cappped:3 now std::unique_ptr iter_tmp(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_tmp, "foo"), 4); - ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); - ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 2); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 2); ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0); // both counters are incremented because BF is "not changed" for 1 of the // 2 SST files, so filter is checked once and found no match. - ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 1); - ASSERT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 1); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 0); } - // IAMHERE ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:2"}})); ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), @@ -2860,33 +2904,34 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { std::unique_ptr iter_tmp(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_tmp, "foo"), 9); // the first and last BF are checked - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 7); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 2); ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0); // only last BF is checked and not found - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 8); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 1); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 0); } - // iter_old can only see the first SST, so checked plus 1 + // iter_old can only see the first SST ASSERT_EQ(CountIter(iter_old, "foo"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 9); - // iter was created after the first setoptions call so only full filter - // will check the filter + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 1); + // same with iter, but different prefix extractor ASSERT_EQ(CountIter(iter, "foo"), 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 10); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 1); { // keys in all three SSTs are visible to iterator // The range of [foo, foz90000] is compatible with (fixed:1) and (fixed:2) - // so +2 for checked counter std::unique_ptr iter_all(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_all, "foo"), 9); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 12); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 2); ASSERT_EQ(CountIter(iter_all, "gpk"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 13); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); + // FIXME? isn't seek key out of SST range? + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 1); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 0); } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), @@ -2896,11 +2941,12 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { ASSERT_EQ(CountIter(iter_all, "foo"), 6); // all three SST are checked because the current options has the same as // the remaining SST (capped:3) - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 16); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 3); ASSERT_EQ(CountIter(iter_all, "gpk"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 17); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 4); + // FIXME? isn't seek key out of SST range? + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 1); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 0); } // TODO(Zhongyi): Maybe also need to add Get calls to test point look up? } @@ -3002,13 +3048,13 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) { { std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "foo"), 12); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 3); } std::unique_ptr iter_old(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_old, "foo"), 12); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 3); ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), @@ -3017,17 +3063,18 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) { std::unique_ptr iter(db_->NewIterator(read_options)); // "fp*" should be skipped ASSERT_EQ(CountIter(iter, "foo"), 9); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 0); } // iterator created before should not be affected and see all keys ASSERT_EQ(CountIter(iter_old, "foo"), 12); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 9); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 3); ASSERT_EQ(CountIter(iter_old, "abc"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 12); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); + // FIXME? isn't seek key out of SST range? + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 3); + EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 0); } } @@ -3122,19 +3169,19 @@ class FixedSuffix4Transform : public SliceTransform { std::pair GetBloomStat(const Options& options, bool sst) { if (sst) { - return { - options.statistics->getAndResetTickerCount(BLOOM_FILTER_PREFIX_CHECKED), - options.statistics->getAndResetTickerCount(BLOOM_FILTER_PREFIX_USEFUL)}; + return {options.statistics->getAndResetTickerCount( + NON_LAST_LEVEL_SEEK_FILTER_MATCH), + options.statistics->getAndResetTickerCount( + NON_LAST_LEVEL_SEEK_FILTERED)}; } else { auto hit = std::exchange(get_perf_context()->bloom_memtable_hit_count, 0); auto miss = std::exchange(get_perf_context()->bloom_memtable_miss_count, 0); - return {hit + miss, miss}; + return {hit, miss}; } } -std::pair CheckedAndUseful(uint64_t checked, - uint64_t useful) { - return {checked, useful}; +std::pair HitAndMiss(uint64_t hits, uint64_t misses) { + return {hits, misses}; } } // anonymous namespace @@ -3172,27 +3219,27 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter1) { if (flushed) { // TODO: support auto_prefix_mode in memtable? read_options.auto_prefix_mode = true; } - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(0, 0)); { Slice ub("999aaaa"); read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "aaaa"), 3); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(1, 0)); } { Slice ub("999abaa"); read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "abaa"), 1); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(1, 0)); } { Slice ub("999acaa"); read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "acaa"), 0); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 1)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(0, 1)); } { Slice ub("zzzz"); @@ -3200,7 +3247,7 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter1) { std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "baa"), 3); if (flushed) { // TODO: fix memtable case - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(0, 0)); } } } @@ -3246,13 +3293,13 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter2) { get_perf_context()->bloom_memtable_hit_count = 0; get_perf_context()->bloom_memtable_miss_count = 0; } - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(0, 0)); { Slice ub("aaaa000"); read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "aaaa999"), 3); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(1, 0)); } { // Note: prefix does work as upper bound @@ -3260,7 +3307,7 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter2) { read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "aaaa999"), 3); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(1, 0)); } { // Note: prefix does not work here as seek key @@ -3268,28 +3315,28 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter2) { read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "aaaa"), 0); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(1, 0)); } { Slice ub("aaba000"); read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "aaba999"), 1); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(1, 0)); } { Slice ub("aaca000"); read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "aaca999"), 0); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 1)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(0, 1)); } { Slice ub("aaaz"); read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "zzz"), 5); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(0, 0)); } { // Note: prefix does work here as seek key, but only finds key equal @@ -3299,7 +3346,7 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter2) { read_options.prefix_same_as_start = true; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "qqqq"), 1); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(1, 0)); } } } @@ -3390,13 +3437,13 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter3) { get_perf_context()->bloom_memtable_hit_count = 0; get_perf_context()->bloom_memtable_miss_count = 0; } - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(0, 0)); { Slice ub("aaaa999"); read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "aaaa000"), 3); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(1, 0)); } { // Note: prefix as seek key is not bloom-optimized @@ -3406,28 +3453,28 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter3) { read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "aaaa"), 3); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(0, 0)); } { Slice ub("aaba9"); read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "aaba0"), 1); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(1, 0)); } { Slice ub("aaca9"); read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "aaca0"), 0); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 1)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(0, 1)); } { Slice ub("qqqq9"); read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "qqqq0"), 1); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(1, 0)); } { // Note: prefix as seek key is not bloom-optimized @@ -3435,7 +3482,7 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter3) { read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "qqqq"), weird_comparator ? 7 : 2); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(0, 0)); } { // Note: prefix as seek key is not bloom-optimized @@ -3443,14 +3490,14 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter3) { read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "zzzz"), weird_comparator ? 8 : 1); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(0, 0)); } { Slice ub("zzzz9"); read_options.iterate_upper_bound = &ub; std::unique_ptr iter(db_->NewIterator(read_options)); EXPECT_EQ(CountIter(iter, "aab"), weird_comparator ? 6 : 5); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); + EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(0, 0)); } } } diff --git a/db/db_test2.cc b/db/db_test2.cc index 4b25e16a597..91ded289058 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -5992,17 +5992,15 @@ TEST_F(DBTest2, ChangePrefixExtractor) { iterator->Seek("xa"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xb", iterator->key().ToString()); - // It's a bug that the counter BLOOM_FILTER_PREFIX_CHECKED is not - // correct in this case. So don't check counters in this case. if (expect_filter_check) { - ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + EXPECT_EQ(0, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH)); } iterator->Seek("xz"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xz1", iterator->key().ToString()); if (expect_filter_check) { - ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + EXPECT_EQ(0, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH)); } } @@ -6020,7 +6018,7 @@ TEST_F(DBTest2, ChangePrefixExtractor) { ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xb", iterator->key().ToString()); if (expect_filter_check) { - ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + EXPECT_EQ(0, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH)); } } @@ -6034,14 +6032,14 @@ TEST_F(DBTest2, ChangePrefixExtractor) { ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xb", iterator->key().ToString()); if (expect_filter_check) { - ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + EXPECT_EQ(0, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH)); } iterator->Seek("xx0"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xx1", iterator->key().ToString()); if (expect_filter_check) { - ASSERT_EQ(1, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH)); } } @@ -6059,21 +6057,21 @@ TEST_F(DBTest2, ChangePrefixExtractor) { ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xb", iterator->key().ToString()); if (expect_filter_check) { - ASSERT_EQ(2, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH)); } iterator->Seek("xg"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xx1", iterator->key().ToString()); if (expect_filter_check) { - ASSERT_EQ(3, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH)); } iterator->Seek("xz"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xz1", iterator->key().ToString()); if (expect_filter_check) { - ASSERT_EQ(4, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH)); } ASSERT_OK(iterator->status()); @@ -6085,14 +6083,14 @@ TEST_F(DBTest2, ChangePrefixExtractor) { ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xb", iterator->key().ToString()); if (expect_filter_check) { - ASSERT_EQ(5, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH)); } iterator->Seek("xx0"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xx1", iterator->key().ToString()); if (expect_filter_check) { - ASSERT_EQ(6, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH)); } ASSERT_OK(iterator->status()); @@ -6106,7 +6104,7 @@ TEST_F(DBTest2, ChangePrefixExtractor) { ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xb", iterator->key().ToString()); if (expect_filter_check) { - ASSERT_EQ(7, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH)); } ASSERT_OK(iterator->status()); } @@ -6180,13 +6178,19 @@ TEST_F(DBTest2, AutoPrefixMode1) { ro.total_order_seek = false; ro.auto_prefix_mode = true; - const auto stat = BLOOM_FILTER_PREFIX_CHECKED; + const auto hit_stat = options.num_levels == 1 + ? LAST_LEVEL_SEEK_FILTER_MATCH + : NON_LAST_LEVEL_SEEK_FILTER_MATCH; + const auto miss_stat = options.num_levels == 1 + ? LAST_LEVEL_SEEK_FILTERED + : NON_LAST_LEVEL_SEEK_FILTERED; { std::unique_ptr iterator(db_->NewIterator(ro)); iterator->Seek("b1"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("x1", iterator->key().ToString()); - EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat)); ASSERT_OK(iterator->status()); } @@ -6198,7 +6202,8 @@ TEST_F(DBTest2, AutoPrefixMode1) { std::unique_ptr iterator(db_->NewIterator(ro)); iterator->Seek("b1"); ASSERT_FALSE(iterator->Valid()); - EXPECT_EQ(1, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat)); ASSERT_OK(iterator->status()); } @@ -6208,7 +6213,8 @@ TEST_F(DBTest2, AutoPrefixMode1) { iterator->Seek("b1"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("x1", iterator->key().ToString()); - EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat)); ASSERT_OK(iterator->status()); } @@ -6217,7 +6223,8 @@ TEST_F(DBTest2, AutoPrefixMode1) { std::unique_ptr iterator(db_->NewIterator(ro)); iterator->Seek("b1"); ASSERT_FALSE(iterator->Valid()); - EXPECT_EQ(1, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat)); ASSERT_OK(iterator->status()); } @@ -6226,7 +6233,8 @@ TEST_F(DBTest2, AutoPrefixMode1) { std::unique_ptr iterator(db_->NewIterator(ro)); iterator->Seek("b1"); ASSERT_FALSE(iterator->Valid()); - EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat)); ASSERT_OK(iterator->status()); } @@ -6237,25 +6245,29 @@ TEST_F(DBTest2, AutoPrefixMode1) { ub = "b9"; iterator->Seek("b1"); ASSERT_FALSE(iterator->Valid()); - EXPECT_EQ(1, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat)); ASSERT_OK(iterator->status()); ub = "z"; iterator->Seek("b1"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("x1", iterator->key().ToString()); - EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat)); ub = "c"; iterator->Seek("b1"); ASSERT_FALSE(iterator->Valid()); - EXPECT_EQ(1, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat)); ub = "b9"; iterator->SeekForPrev("b1"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("a1", iterator->key().ToString()); - EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat)); ub = "zz"; iterator->SeekToLast(); @@ -6287,26 +6299,30 @@ TEST_F(DBTest2, AutoPrefixMode1) { ub = "b1"; iterator->Seek("b9"); ASSERT_FALSE(iterator->Valid()); - EXPECT_EQ(1, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat)); ASSERT_OK(iterator->status()); ub = "b1"; iterator->Seek("z"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("y1", iterator->key().ToString()); - EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat)); ub = "b1"; iterator->Seek("c"); ASSERT_FALSE(iterator->Valid()); - EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat)); ub = "b"; iterator->Seek("c9"); ASSERT_FALSE(iterator->Valid()); // Fails if ReverseBytewiseComparator::IsSameLengthImmediateSuccessor // is "correctly" implemented. - EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat)); ub = "a"; iterator->Seek("b9"); @@ -6314,7 +6330,8 @@ TEST_F(DBTest2, AutoPrefixMode1) { // is "correctly" implemented. ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("a1", iterator->key().ToString()); - EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat)); ub = "b"; iterator->Seek("a"); @@ -6322,7 +6339,8 @@ TEST_F(DBTest2, AutoPrefixMode1) { // Fails if ReverseBytewiseComparator::IsSameLengthImmediateSuccessor // matches BytewiseComparator::IsSameLengthImmediateSuccessor. Upper // comparing before seek key prevents a real bug from surfacing. - EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat)); ub = "b1"; iterator->SeekForPrev("b9"); @@ -6330,7 +6348,8 @@ TEST_F(DBTest2, AutoPrefixMode1) { // Fails if ReverseBytewiseComparator::IsSameLengthImmediateSuccessor // is "correctly" implemented. ASSERT_EQ("x1", iterator->key().ToString()); - EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat)); ub = "a"; iterator->SeekToLast(); @@ -6372,7 +6391,8 @@ TEST_F(DBTest2, AutoPrefixMode1) { std::unique_ptr iterator(db_->NewIterator(ro)); iterator->Seek(Slice(a_end_stuff, 2)); ASSERT_FALSE(iterator->Valid()); - EXPECT_EQ(1, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat)); ASSERT_OK(iterator->status()); // test, cannot be validly optimized with auto_prefix_mode @@ -6382,7 +6402,8 @@ TEST_F(DBTest2, AutoPrefixMode1) { iterator->Seek(Slice(a_end_stuff, 2)); // !!! BUG !!! See "BUG" section of auto_prefix_mode. ASSERT_FALSE(iterator->Valid()); - EXPECT_EQ(1, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat)); ASSERT_OK(iterator->status()); // To prove that is the wrong result, now use total order seek @@ -6393,7 +6414,8 @@ TEST_F(DBTest2, AutoPrefixMode1) { iterator->Seek(Slice(a_end_stuff, 2)); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("b", iterator->key().ToString()); - EXPECT_EQ(0, TestGetAndResetTickerCount(options, stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat)); + EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat)); ASSERT_OK(iterator->status()); } } while (ChangeOptions(kSkipPlainTable)); diff --git a/include/rocksdb/perf_context.h b/include/rocksdb/perf_context.h index f0edc4291fd..46266abbaab 100644 --- a/include/rocksdb/perf_context.h +++ b/include/rocksdb/perf_context.h @@ -25,6 +25,8 @@ namespace ROCKSDB_NAMESPACE { // Break down performance counters by level and store per-level perf context in // PerfContextByLevel struct PerfContextByLevelBase { + // These Bloom stats apply to point reads (Get/MultiGet) for whole key and + // prefix filters. // # of times bloom filter has avoided file reads, i.e., negatives. uint64_t bloom_filter_useful = 0; // # of times bloom FullFilter has not avoided the reads. @@ -217,9 +219,9 @@ struct PerfContextBase { uint64_t bloom_memtable_hit_count; // total number of mem table bloom misses uint64_t bloom_memtable_miss_count; - // total number of SST table bloom hits + // total number of SST bloom hits uint64_t bloom_sst_hit_count; - // total number of SST table bloom misses + // total number of SST bloom misses uint64_t bloom_sst_miss_count; // Time spent waiting on key locks in transaction lock manager. diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 49d9a29a365..a1519e21cc7 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -1951,8 +1951,13 @@ bool BlockBasedTable::FullFilterKeyMayMatch( prefix_extractor->Transform(user_key_without_ts), no_io, const_ikey_ptr, get_context, lookup_context, read_options); RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED); - if (!may_match) { + if (may_match) { + // Includes prefix stats + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, rep_->level); + } else { RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_USEFUL); + // Includes prefix stats + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level); } } return may_match; @@ -1989,10 +1994,18 @@ void BlockBasedTable::FullFilterKeysMayMatch( read_options); RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED, before_keys); uint64_t after_keys = range->KeysLeft(); + if (after_keys) { + // Includes prefix stats + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, after_keys, + rep_->level); + } uint64_t filtered_keys = before_keys - after_keys; if (filtered_keys) { RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_USEFUL, filtered_keys); + // Includes prefix stats + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, filtered_keys, + rep_->level); } } } @@ -2222,11 +2235,12 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, if (matched && filter != nullptr) { if (rep_->whole_key_filtering) { RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, - rep_->level); } else { RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_TRUE_POSITIVE); } + // Includes prefix stats + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, + rep_->level); } if (s.ok() && !iiter->status().IsNotFound()) { diff --git a/table/block_based/block_based_table_reader_sync_and_async.h b/table/block_based/block_based_table_reader_sync_and_async.h index 857ae854d41..0cb251afa52 100644 --- a/table/block_based/block_based_table_reader_sync_and_async.h +++ b/table/block_based/block_based_table_reader_sync_and_async.h @@ -734,11 +734,12 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet) if (matched && filter != nullptr) { if (rep_->whole_key_filtering) { RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, - rep_->level); } else { RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_TRUE_POSITIVE); } + // Includes prefix stats + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, + rep_->level); } if (s.ok() && !iiter->status().IsNotFound()) { s = iiter->status(); diff --git a/util/slice_transform_test.cc b/util/slice_transform_test.cc index 64ac8bb1f32..18b0ea51f32 100644 --- a/util/slice_transform_test.cc +++ b/util/slice_transform_test.cc @@ -91,8 +91,8 @@ class SliceTransformDBTest : public testing::Test { }; namespace { -uint64_t TestGetTickerCount(const Options& options, Tickers ticker_type) { - return options.statistics->getTickerCount(ticker_type); +uint64_t PopTicker(const Options& options, Tickers ticker_type) { + return options.statistics->getAndResetTickerCount(ticker_type); } } // namespace @@ -121,28 +121,33 @@ TEST_F(SliceTransformDBTest, CapPrefix) { ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->value().ToString(), "bar"); - ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 0U); + EXPECT_EQ(PopTicker(last_options_, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 1U); + EXPECT_EQ(PopTicker(last_options_, NON_LAST_LEVEL_SEEK_FILTERED), 0U); iter->Seek("foo2"); ASSERT_OK(iter->status()); ASSERT_TRUE(!iter->Valid()); - ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 1U); + EXPECT_EQ(PopTicker(last_options_, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 0U); + EXPECT_EQ(PopTicker(last_options_, NON_LAST_LEVEL_SEEK_FILTERED), 1U); iter->Seek("barbarbar"); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->value().ToString(), "foo"); - ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 1U); + EXPECT_EQ(PopTicker(last_options_, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 1U); + EXPECT_EQ(PopTicker(last_options_, NON_LAST_LEVEL_SEEK_FILTERED), 0U); iter->Seek("barfoofoo"); ASSERT_OK(iter->status()); ASSERT_TRUE(!iter->Valid()); - ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 2U); + EXPECT_EQ(PopTicker(last_options_, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 0U); + EXPECT_EQ(PopTicker(last_options_, NON_LAST_LEVEL_SEEK_FILTERED), 1U); iter->Seek("foobarbar"); ASSERT_OK(iter->status()); ASSERT_TRUE(!iter->Valid()); - ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 3U); + EXPECT_EQ(PopTicker(last_options_, NON_LAST_LEVEL_SEEK_FILTER_MATCH), 0U); + EXPECT_EQ(PopTicker(last_options_, NON_LAST_LEVEL_SEEK_FILTERED), 1U); } } // namespace ROCKSDB_NAMESPACE From 9595914d8f5d0c20e31a7adbd255672428378a01 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 19 May 2023 10:51:40 -0700 Subject: [PATCH 3/3] Update HISTORY --- HISTORY.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/HISTORY.md b/HISTORY.md index 51533d204ed..2dff045667e 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -7,10 +7,12 @@ * Introduced a new option `CompactionOptionsFIFO::file_temperature_age_thresholds` that allows FIFO compaction to compact files to different temperatures based on key age (#11428). * Added a new ticker stat to count how many times RocksDB detected a corruption while verifying a block checksum: `BLOCK_CHECKSUM_MISMATCH_COUNT`. * New statistics `rocksdb.file.read.db.open.micros` that measures read time of block-based SST tables or blob files during db open. +* New statistics tickers for various iterator seek behaviors and relevant filtering, as \*`_LEVEL_SEEK_`\*. (#11460) ### Public API Changes * EXPERIMENTAL: Add new API `DB::ClipColumnFamily` to clip the key in CF to a certain range. It will physically deletes all keys outside the range including tombstones. * Add `MakeSharedCache()` construction functions to various cache Options objects, and deprecated the `NewWhateverCache()` functions with long parameter lists. +* Changed the meaning of various Bloom filter stats (prefix vs. whole key), with iterator-related filtering only being tracked in the new \*`_LEVEL_SEEK_`\*. stats. (#11460) ### Behavior changes * For x86, CPU features are no longer detected at runtime nor in build scripts, but in source code using common preprocessor defines. This will likely unlock some small performance improvements on some newer hardware, but could hurt performance of the kCRC32c checksum, which is no longer the default, on some "portable" builds. See PR #11419 for details.