Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Truncate range tombstones by leveraging InternalKeys #4432

Closed
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion db/compaction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,55 @@ void Compaction::GetBoundaryKeys(
}
}

std::vector<CompactionInputFiles> Compaction::PopulateWithAtomicBoundaries(
VersionStorageInfo* vstorage, std::vector<CompactionInputFiles> inputs) {
const Comparator* ucmp = vstorage->InternalComparator()->user_comparator();
for (size_t i = 0; i < inputs.size(); i++) {
if (inputs[i].level == 0 || inputs[i].files.empty()) {
continue;
}
inputs[i].atomic_compaction_unit_boundaries.reserve(inputs[i].files.size());
AtomicCompactionUnitBoundary cur_boundary;
uint64_t cur_end_key_footer = 0;
size_t first_atomic_idx = 0;
auto add_unit_boundary = [&](size_t to) {
if (first_atomic_idx == to) return;
for (size_t k = first_atomic_idx; k < to; k++) {
inputs[i].atomic_compaction_unit_boundaries.push_back(cur_boundary);
}
first_atomic_idx = to;
};
for (size_t j = 0; j < inputs[i].files.size(); j++) {
const auto* f = inputs[i].files[j];
const Slice& start_user_key = f->smallest.user_key();
const Slice& end_user_key = f->largest.user_key();
if (first_atomic_idx == j) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can only happen when j == 0, right?

// First file in an atomic compaction unit.
cur_boundary.smallest = start_user_key;
cur_boundary.largest = end_user_key;
} else if (ucmp->Compare(cur_boundary.largest, start_user_key) == 0 &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think it's possible to reuse sstableKeyCompare here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it is possible! Thanks for pointing that out.

cur_end_key_footer !=
PackSequenceAndType(kMaxSequenceNumber,
kTypeRangeDeletion)) {
// SSTs overlap but the end key of the previous file was not
// artificially extended by a range tombstone. Extend the current
// boundary.
cur_boundary.largest = end_user_key;
} else {
// Atomic compaction unit has ended.
add_unit_boundary(j);
cur_boundary.smallest = start_user_key;
cur_boundary.largest = end_user_key;
}
cur_end_key_footer = ExtractInternalKeyFooter(f->largest.Encode());
}
add_unit_boundary(inputs[i].files.size());
assert(inputs[i].files.size() ==
inputs[i].atomic_compaction_unit_boundaries.size());
}
return inputs;
}

// helper function to determine if compaction is creating files at the
// bottommost level
bool Compaction::IsBottommostLevel(
Expand Down Expand Up @@ -155,7 +204,7 @@ Compaction::Compaction(VersionStorageInfo* vstorage,
output_compression_(_compression),
output_compression_opts_(_compression_opts),
deletion_compaction_(_deletion_compaction),
inputs_(std::move(_inputs)),
inputs_(PopulateWithAtomicBoundaries(vstorage, std::move(_inputs))),
grandparents_(std::move(_grandparents)),
score_(_score),
bottommost_level_(IsBottommostLevel(output_level_, vstorage, inputs_)),
Expand Down
25 changes: 25 additions & 0 deletions db/compaction.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,23 @@

namespace rocksdb {

// An AtomicCompactionUnitBoundary represents a range of keys [smallest,
// largest] that exactly spans one ore more neighbouring SSTs on the same
// level. Every pair of SSTs in this range "overlap" (i.e., the largest
// user key of one file is the smallest user key of the next file). These
// boundaries are propagated down to RangeDelAggregator during compaction
// to provide safe truncation boundaries for range tombstones.
struct AtomicCompactionUnitBoundary {
Slice smallest;
Slice largest;
};

// The structure that manages compaction input files associated
// with the same physical level.
struct CompactionInputFiles {
int level;
std::vector<FileMetaData*> files;
std::vector<AtomicCompactionUnitBoundary> atomic_compaction_unit_boundaries;
inline bool empty() const { return files.empty(); }
inline size_t size() const { return files.size(); }
inline void clear() { files.clear(); }
Expand Down Expand Up @@ -96,6 +108,12 @@ class Compaction {
return inputs_[compaction_input_level][i];
}

const std::vector<AtomicCompactionUnitBoundary>* boundaries(
size_t compaction_input_level) const {
assert(compaction_input_level < inputs_.size());
return &inputs_[compaction_input_level].atomic_compaction_unit_boundaries;
}

// Returns the list of file meta data of the specified compaction
// input level.
// REQUIREMENT: "compaction_input_level" must be >= 0 and
Expand Down Expand Up @@ -262,6 +280,13 @@ class Compaction {
const std::vector<CompactionInputFiles>& inputs,
Slice* smallest_key, Slice* largest_key);

// Get the atomic file boundaries for all files in the compaction. Necessary
// in order to avoid the scenario described in
// https://github.com/facebook/rocksdb/pull/4432#discussion_r221072219 and plumb
// down appropriate key boundaries to RangeDelAggregator during compaction.
static std::vector<CompactionInputFiles> PopulateWithAtomicBoundaries(
VersionStorageInfo* vstorage, std::vector<CompactionInputFiles> inputs);

// helper function to determine if compaction with inputs and storage is
// bottommost
static bool IsBottommostLevel(
Expand Down
Loading