Skip to content

Commit

Permalink
Merge #42582
Browse files Browse the repository at this point in the history
42582: storage: Update MVCC on {Rocks,Pebble} to support ignored seqnum ranges and rollbacks r=itsbilal a=itsbilal

Builds on Raphael's PR #42152 and adds two commits, the second of which improves testing and the first of which adds the pebble MVCC scanner part of the ignore list functionality described in #41612, as well as restructures code in `mvccResolveWriteIntent` for better readability and to make it usable in the intent-history-cleanup-only case.

Fixes #41612 . 

Co-authored-by: Raphael 'kena' Poss <[email protected]>
Co-authored-by: Bilal Akhtar <[email protected]>
  • Loading branch information
3 people committed Jan 11, 2020
2 parents 51253e4 + 29f34cd commit 9a89b2e
Show file tree
Hide file tree
Showing 47 changed files with 4,200 additions and 1,281 deletions.
14 changes: 14 additions & 0 deletions c-deps/libroach/include/libroach.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,19 @@ typedef struct {
DBStatus status;
} DBIterState;

// A DBIgnoredSeqNumRange is an alias for the Go struct
// IgnoredSeqNumRange. It must have exactly the same memory
// layout.
typedef struct {
int32_t start_seqnum;
int32_t end_seqnum;
} DBIgnoredSeqNumRange;

typedef struct {
DBIgnoredSeqNumRange* ranges;
int len;
} DBIgnoredSeqNums;

typedef struct DBCache DBCache;
typedef struct DBEngine DBEngine;
typedef struct DBIterator DBIterator;
Expand Down Expand Up @@ -328,6 +341,7 @@ typedef struct {
uint32_t epoch;
int32_t sequence;
DBTimestamp max_timestamp;
DBIgnoredSeqNums ignored_seqnums;
} DBTxn;

typedef struct {
Expand Down
80 changes: 72 additions & 8 deletions c-deps/libroach/mvcc.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ template <bool reverse> class mvccScanner {
txn_epoch_(txn.epoch),
txn_sequence_(txn.sequence),
txn_max_timestamp_(txn.max_timestamp),
txn_ignored_seqnums_(txn.ignored_seqnums),
inconsistent_(inconsistent),
tombstones_(tombstones),
check_uncertainty_(timestamp < txn.max_timestamp),
Expand Down Expand Up @@ -163,27 +164,89 @@ template <bool reverse> class mvccScanner {
return results_;
}

bool seqNumIsIgnored(int32_t sequence) const {
// The ignored seqnum ranges are guaranteed to be
// non-overlapping, non-contiguous, and guaranteed to be
// sorted in seqnum order. We're going to look from the end to
// see if the current intent seqnum is ignored.
//
// TODO(nvanbenschoten): this can use use binary search to improve
// the complexity. Worth looking into if this loop takes a while, due to
// long lists of ignored sequence where the ones near the specified sequence
// number are near the start. Until then, the current implementation is
// simpler and correct.
for (int i = txn_ignored_seqnums_.len - 1; i >= 0; i--) {
if (sequence < txn_ignored_seqnums_.ranges[i].start_seqnum) {
// The history entry's sequence number is lower/older than
// the current ignored range. Go to the previous range
// and try again.
continue;
}

// Here we have a range where the start seqnum is lower than the current
// intent seqnum. Does it include it?
if (sequence > txn_ignored_seqnums_.ranges[i].end_seqnum) {
// Here we have a range where the current history entry's seqnum
// is higher than the range's end seqnum. Given that the
// ranges are storted, we're guaranteed that there won't
// be any further overlapping range at a lower value of i.
return false;
}
// Yes, it's included. We're going to skip over this
// intent seqnum and retry the search above.
return true;
}

// Exhausted the ignore list. Not ignored.
return false;
}

bool getFromIntentHistory() {
cockroach::storage::engine::enginepb::MVCCMetadata_SequencedIntent readIntent;
readIntent.set_sequence(txn_sequence_);

auto end = meta_.intent_history().end();
cockroach::storage::engine::enginepb::MVCCMetadata_SequencedIntent intent;

// Look for the intent with the sequence number less than or equal to the
// read sequence. To do so, search using upper_bound, which returns an
// iterator pointing to the first element in the range [first, last) that is
// greater than value, or last if no such element is found. Then, return the
// previous value.
auto up = std::upper_bound(
meta_.intent_history().begin(), meta_.intent_history().end(), readIntent,
meta_.intent_history().begin(), end, readIntent,
[](const cockroach::storage::engine::enginepb::MVCCMetadata_SequencedIntent& a,
const cockroach::storage::engine::enginepb::MVCCMetadata_SequencedIntent& b) -> bool {
return a.sequence() < b.sequence();
return a.sequence() < b.sequence();
});
while (up != meta_.intent_history().begin()) {
const auto intent_pos = up - 1;
// Here we have found a history entry with the highest seqnum that's
// equal or lower to the txn seqnum.
//
// However this entry may also be part of an ignored range
// (partially rolled back). We'll check this next. If it is,
// we'll try the previous sequence in the intent history.
if (seqNumIsIgnored(intent_pos->sequence())) {
// This entry was part of an ignored range. Iterate back in intent
// history to the previous sequence, and check if that one is
// ignored.
up--;
continue;
}
// This history entry has not been ignored, so we're going to select
// this version.
intent = *intent_pos;
break;
}

if (up == meta_.intent_history().begin()) {
// It is possible that no intent exists such that the sequence is less
// than the read sequence. In this case, we cannot read a value from the
// intent history.
return false;
// It is possible that no intent exists such that the sequence is less
// than the read sequence. In this case, we cannot read a value from the
// intent history.
return false;
}
const auto intent = *(up - 1);

rocksdb::Slice value = intent.value();
if (value.size() > 0 || tombstones_) {
kvs_->Put(cur_raw_key_, value);
Expand Down Expand Up @@ -301,7 +364,7 @@ template <bool reverse> class mvccScanner {
}

if (txn_epoch_ == meta_.txn().epoch()) {
if (txn_sequence_ >= meta_.txn().sequence()) {
if (txn_sequence_ >= meta_.txn().sequence() && !seqNumIsIgnored(meta_.txn().sequence())) {
// 8. We're reading our own txn's intent at an equal or higher sequence.
// Note that we read at the intent timestamp, not at our read timestamp
// as the intent timestamp may have been pushed forward by another
Expand Down Expand Up @@ -663,6 +726,7 @@ template <bool reverse> class mvccScanner {
const uint32_t txn_epoch_;
const int32_t txn_sequence_;
const DBTimestamp txn_max_timestamp_;
const DBIgnoredSeqNums txn_ignored_seqnums_;
const bool inconsistent_;
const bool tombstones_;
const bool check_uncertainty_;
Expand Down
Loading

0 comments on commit 9a89b2e

Please sign in to comment.