Skip to content

Commit

Permalink
Skip sstables that are entirely covered by a range tombstone
Browse files Browse the repository at this point in the history
During iteration skip sstables that are entirely covered by a range
tombstone. The check to do this is loose: we check that an sstable's
[smallest_key,largest_key) at largest_seqno is covered by a range
tombstone.
  • Loading branch information
petermattis committed Jul 13, 2018
1 parent 92b9e84 commit 9460832
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 1 deletion.
19 changes: 19 additions & 0 deletions db/db_range_del_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,25 @@ TEST_F(DBRangeDelTest, IteratorIgnoresRangeDeletions) {
db_->ReleaseSnapshot(snapshot);
}

TEST_F(DBRangeDelTest, IteratorCoveredSst) {
// TODO(peter): generate multiple sstables with some being covered
// by tombstones and some that aren't.
db_->Put(WriteOptions(), "key", "val");
ASSERT_OK(db_->Flush(FlushOptions()));
db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));

ReadOptions read_opts;
auto* iter = db_->NewIterator(read_opts);

int count = 0;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
++count;
}
ASSERT_EQ(0, count);
delete iter;
}

#ifndef ROCKSDB_UBSAN_RUN
TEST_F(DBRangeDelTest, TailingIteratorRangeTombstoneUnsupported) {
db_->Put(WriteOptions(), "key", "val");
Expand Down
59 changes: 59 additions & 0 deletions db/range_del_aggregator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,15 @@ class UncollapsedRangeDelMap : public RangeDelMap {
return false;
}

bool ShouldDeleteRange(const Slice& start, const Slice& end,
SequenceNumber seqno) {
// Unimplemented because the only client of this method, iteration, uses
// collapsed maps.
fprintf(stderr, "UncollapsedRangeDelMap::ShouldDeleteRange unimplemented");
abort();
return false;
}

bool IsRangeOverlapped(const Slice& start, const Slice& end) {
for (const auto& tombstone : rep_) {
if (ucmp_->Compare(start, tombstone.end_key_) < 0 &&
Expand Down Expand Up @@ -224,6 +233,44 @@ class CollapsedRangeDelMap : public RangeDelMap {
return parsed.sequence < iter_->second;
}

bool ShouldDeleteRange(const Slice& start, const Slice& end,
SequenceNumber seqno) {
ParsedInternalKey parsed_start;
if (!ParseInternalKey(start, &parsed_start)) {
assert(false);
}
ParsedInternalKey parsed_end;
if (!ParseInternalKey(end, &parsed_end)) {
assert(false);
}
if (ucmp_->Compare(parsed_start.user_key, parsed_end.user_key) > 0) {
return false;
}

auto iter = rep_.upper_bound(parsed_start.user_key);
if (iter == rep_.begin()) {
// before start of deletion intervals
return false;
}
--iter;
if (ucmp_->Compare(parsed_start.user_key, iter->first) < 0) {
return false;
}
// Loop looking for a tombstone that is older than the range
// sequence number, or we determine that our range is completely
// covered by newer tombstones.
for (; iter != rep_.end(); ++iter) {
if (ucmp_->Compare(parsed_end.user_key, iter->first) < 0) {
return true;
}
if (seqno >= iter->second) {
// Tombstone is older than range sequence number.
return false;
}
}
return false;
}

bool IsRangeOverlapped(const Slice&, const Slice&) {
// Unimplemented because the only client of this method, file ingestion,
// uses uncollapsed maps.
Expand Down Expand Up @@ -409,6 +456,18 @@ bool RangeDelAggregator::ShouldDeleteImpl(const ParsedInternalKey& parsed,
return tombstone_map.ShouldDelete(parsed, mode);
}

bool RangeDelAggregator::ShouldDeleteRange(
const Slice& start, const Slice& end, SequenceNumber seqno) {
if (rep_ == nullptr) {
return false;
}
auto& tombstone_map = GetRangeDelMap(seqno);
if (tombstone_map.IsEmpty()) {
return false;
}
return tombstone_map.ShouldDeleteRange(start, end, seqno);
}

bool RangeDelAggregator::IsRangeOverlapped(const Slice& start,
const Slice& end) {
// Unimplemented because the only client of this method, file ingestion,
Expand Down
9 changes: 9 additions & 0 deletions db/range_del_aggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ class RangeDelMap {

virtual bool ShouldDelete(const ParsedInternalKey& parsed,
RangeDelPositioningMode mode) = 0;
virtual bool ShouldDeleteRange(const Slice& start, const Slice& end,
SequenceNumber seqno) = 0;
virtual bool IsRangeOverlapped(const Slice& start, const Slice& end) = 0;
virtual void InvalidatePosition() = 0;

Expand Down Expand Up @@ -127,6 +129,13 @@ class RangeDelAggregator {
bool ShouldDeleteImpl(const Slice& internal_key,
RangeDelPositioningMode mode);

// Return true if one or more tombstones that are newer than seqno fully
// cover the range [start,end] (both endpoints are inclusive). Beware the
// inclusive endpoint which differs from most other key ranges, but matches
// the largest_key metadata for an sstable.
bool ShouldDeleteRange(const Slice& start, const Slice& end,
SequenceNumber seqno);

// Checks whether range deletions cover any keys between `start` and `end`,
// inclusive.
//
Expand Down
61 changes: 61 additions & 0 deletions db/range_del_aggregator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ struct ExpectedPoint {
SequenceNumber seq;
};

struct ExpectedRange {
Slice begin;
Slice end;
SequenceNumber seq;
};

enum Direction {
kForward,
kReverse,
Expand Down Expand Up @@ -121,6 +127,25 @@ void VerifyRangeDels(
}
}

bool ShouldDeleteRange(const std::vector<RangeTombstone>& range_dels,
const ExpectedRange& expected_range) {
RangeDelAggregator range_del_agg(icmp, {} /* snapshots */, true);
std::vector<std::string> keys, values;
for (const auto& range_del : range_dels) {
auto key_and_value = range_del.Serialize();
keys.push_back(key_and_value.first.Encode().ToString());
values.push_back(key_and_value.second.ToString());
}
std::unique_ptr<test::VectorIterator> range_del_iter(
new test::VectorIterator(keys, values));
range_del_agg.AddTombstones(std::move(range_del_iter));

std::string begin, end;
AppendInternalKey(&begin, {expected_range.begin, expected_range.seq, kTypeValue});
AppendInternalKey(&end, {expected_range.end, expected_range.seq, kTypeValue});
return range_del_agg.ShouldDeleteRange(begin, end, expected_range.seq);
}

} // anonymous namespace

TEST_F(RangeDelAggregatorTest, Empty) { VerifyRangeDels({}, {{"a", 0}}, {}); }
Expand Down Expand Up @@ -275,6 +300,42 @@ TEST_F(RangeDelAggregatorTest, MergingIteratorSeek) {
{{"c", "d", 20}, {"e", "f", 20}, {"f", "g", 10}});
}

TEST_F(RangeDelAggregatorTest, ShouldDeleteRange) {
ASSERT_TRUE(ShouldDeleteRange(
{{"a", "c", 10}},
{"a", "b", 9}));
ASSERT_TRUE(ShouldDeleteRange(
{{"a", "c", 10}},
{"a", "a", 9}));
ASSERT_FALSE(ShouldDeleteRange(
{{"a", "c", 10}},
{"b", "a", 9}));
ASSERT_FALSE(ShouldDeleteRange(
{{"a", "c", 10}},
{"a", "b", 10}));
ASSERT_FALSE(ShouldDeleteRange(
{{"a", "c", 10}},
{"a", "c", 9}));
ASSERT_FALSE(ShouldDeleteRange(
{{"b", "c", 10}},
{"a", "b", 9}));
ASSERT_TRUE(ShouldDeleteRange(
{{"a", "b", 10}, {"b", "d", 20}},
{"a", "c", 9}));
ASSERT_FALSE(ShouldDeleteRange(
{{"a", "b", 10}, {"b", "d", 20}},
{"a", "c", 15}));
ASSERT_FALSE(ShouldDeleteRange(
{{"a", "b", 10}, {"c", "e", 20}},
{"a", "d", 9}));
ASSERT_TRUE(ShouldDeleteRange(
{{"a", "b", 10}, {"c", "e", 20}},
{"c", "d", 15}));
ASSERT_FALSE(ShouldDeleteRange(
{{"a", "b", 10}, {"c", "e", 20}},
{"c", "d", 20}));
}

} // namespace rocksdb

int main(int argc, char** argv) {
Expand Down
25 changes: 24 additions & 1 deletion db/version_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,14 @@ class LevelIterator final : public InternalIterator {
InternalIterator* NewFileIterator() {
assert(file_index_ < flevel_->num_files);
auto file_meta = flevel_->files[file_index_];
// Check to see if every key in the sstable is covered by a range
// tombstone. SkipEmptyFile{Forward,Backward} will take care of
// skipping over an "empty" file if we return null.
if (range_del_agg_->ShouldDeleteRange(file_meta.smallest_key, file_meta.largest_key,
file_meta.file_metadata->largest_seqno)) {
return nullptr;
}

if (should_sample_) {
sample_file_read_inc(file_meta.file_metadata);
}
Expand Down Expand Up @@ -574,6 +582,21 @@ void LevelIterator::Seek(const Slice& target) {

InitFileIterator(new_file_index);
if (file_iter_.iter() != nullptr) {
// TODO(peter): Rather the seeking to target, we could use
// RangeDelAggregator to find the first key within the file that
// is not covered by a range tombstone. Something like:
//
// first_non_deleted = range_del_agg_->SeekForward(
// smallest_key, largest_key, largest_seqno);
// if (target < first_non_deleted) {
// target = first_non_deleted;
// }
//
// This optimization applies to SeekForPrev, Next and Prev as
// well. For Next and Prev we'd want some way to keep track of the
// current tombstone. Perhaps a RangeDelAggregator::Iterator,
// though that would need to be stable in the presence of
// modifications to RangeDelAggregator tombstone_maps.
file_iter_.Seek(target);
}
SkipEmptyFileForward();
Expand All @@ -588,8 +611,8 @@ void LevelIterator::SeekForPrev(const Slice& target) {
InitFileIterator(new_file_index);
if (file_iter_.iter() != nullptr) {
file_iter_.SeekForPrev(target);
SkipEmptyFileBackward();
}
SkipEmptyFileBackward();
}

void LevelIterator::SeekToFirst() {
Expand Down

0 comments on commit 9460832

Please sign in to comment.