Skip to content

Commit

Permalink
engineccl: ignore intents beneath start in MVCCIncrementalIterator
Browse files Browse the repository at this point in the history
As determined in cockroachdb#28358, using time-bound iterators is rife with
pitfalls. Specifically, the keys returned outside of the time bounds
might be wildly inconsistent. A iteration over time bounds [ts3, ts4]
might observe an intent at time ts2 or ts5 as still pending when in fact
it was resolved, just in an SST that was not considered by the iterator.
The only guarantee is that the snapshot of keys within the [ts3, ts4]
time bounds is consistent. (Currently this isn't quite true, thanks to
another bug, but this is the guarantee that time-bound iterators should
be providing.)

MVCCIncrementalIterator mostly handled these pitfalls correctly. It
properly ignored all non-metadata keys outside of the timestamp bounds,
as well as metadata keys (i.e., intents) above the upper timestamp
bound. It was not, however, ignoring intents beneath the lower timestamp
bound. Since these intents might be inconsistent (i.e., they might
already be resolved), the iterator must ignore them.

The most problematic symptom was that ExportRequests, which use an
MVCCIncrementalIterator, could get stuck trying to resolve an
already-resolved intent. The ExportRequest would return a
WriteIntentError because it would observe the intent and not its
resolution, but resolving the intent would be a no-op because the intent
was, in fact, already resolved, and so retrying the ExportRequest would
run into the same problem. The situation would eventually unstick when a
RocksDB compaction rearranged SSTs such that the ExportRequest observed
both the intent and its resolution.

Note that 1eb3b2a disabled the use of time-bound iterators in all
other code paths that would have had a similar problem.

Release note: None
  • Loading branch information
benesch committed Nov 16, 2018
1 parent 7f9b9b1 commit 765df40
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pkg/ccl/storageccl/engineccl/mvcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ func (i *MVCCIncrementalIterator) advance() {

metaTimestamp := hlc.Timestamp(i.meta.Timestamp)
if i.meta.Txn != nil {
if !i.endTime.Less(metaTimestamp) {
if i.startTime.Less(metaTimestamp) && !i.endTime.Less(metaTimestamp) {
i.err = &roachpb.WriteIntentError{
Intents: []roachpb.Intent{{
Span: roachpb.Span{Key: i.iter.Key().Key},
Expand Down
31 changes: 23 additions & 8 deletions pkg/ccl/storageccl/engineccl/mvcc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func assertEqualKVs(
expected []engine.MVCCKeyValue,
) func(*testing.T) {
return func(t *testing.T) {
t.Helper()
iter := NewMVCCIncrementalIterator(e, IterOptions{
StartTime: startTime,
EndTime: endTime,
Expand Down Expand Up @@ -180,11 +181,18 @@ func TestMVCCIterateIncremental(t *testing.T) {
if err := engine.MVCCPut(ctx, e, nil, txn2.TxnMeta.Key, txn2.TxnMeta.Timestamp, txn2Val, &txn2); err != nil {
t.Fatal(err)
}
t.Run("intents1",
t.Run("intents",
iterateExpectErr(e, fn, testKey1, testKey1.PrefixEnd(), tsMin, tsMax, "conflicting intents"))
t.Run("intents2",
t.Run("intents",
iterateExpectErr(e, fn, testKey2, testKey2.PrefixEnd(), tsMin, tsMax, "conflicting intents"))
t.Run("intents3", assertEqualKVs(e, fn, keyMin, keyMax, tsMin, ts3, kvs(kv1_3Deleted, kv2_2_2)))
t.Run("intents",
iterateExpectErr(e, fn, keyMin, keyMax, tsMin, ts4, "conflicting intents"))
// Intents above the upper time bound or beneath the lower time bound must
// be ignored (#28358). Note that the lower time bound is exclusive while
// the upper time bound is inclusive.
t.Run("intents", assertEqualKVs(e, fn, keyMin, keyMax, tsMin, ts3, kvs(kv1_3Deleted, kv2_2_2)))
t.Run("intents", assertEqualKVs(e, fn, keyMin, keyMax, ts4, tsMax, kvs()))
t.Run("intents", assertEqualKVs(e, fn, keyMin, keyMax, ts4.Next(), tsMax, kvs()))

intent1 := roachpb.Intent{Span: roachpb.Span{Key: testKey1}, Txn: txn1.TxnMeta, Status: roachpb.COMMITTED}
if err := engine.MVCCResolveWriteIntent(ctx, e, nil, intent1); err != nil {
Expand All @@ -194,7 +202,7 @@ func TestMVCCIterateIncremental(t *testing.T) {
if err := engine.MVCCResolveWriteIntent(ctx, e, nil, intent2); err != nil {
t.Fatal(err)
}
t.Run("intents4", assertEqualKVs(e, fn, keyMin, keyMax, tsMin, tsMax, kvs(kv1_4_4, kv2_2_2)))
t.Run("intents", assertEqualKVs(e, fn, keyMin, keyMax, tsMin, tsMax, kvs(kv1_4_4, kv2_2_2)))
})

t.Run("iterFn=Next", func(t *testing.T) {
Expand Down Expand Up @@ -252,11 +260,18 @@ func TestMVCCIterateIncremental(t *testing.T) {
if err := engine.MVCCPut(ctx, e, nil, txn2.TxnMeta.Key, txn2.TxnMeta.Timestamp, txn2Val, &txn2); err != nil {
t.Fatal(err)
}
t.Run("intents1",
t.Run("intents",
iterateExpectErr(e, fn, testKey1, testKey1.PrefixEnd(), tsMin, tsMax, "conflicting intents"))
t.Run("intents2",
t.Run("intents",
iterateExpectErr(e, fn, testKey2, testKey2.PrefixEnd(), tsMin, tsMax, "conflicting intents"))
t.Run("intents3", assertEqualKVs(e, fn, keyMin, keyMax, tsMin, ts3, kvs(kv1_3Deleted, kv1_2_2, kv1_1_1, kv2_2_2)))
t.Run("intents",
iterateExpectErr(e, fn, keyMin, keyMax, tsMin, ts4, "conflicting intents"))
// Intents above the upper time bound or beneath the lower time bound must
// be ignored (#28358). Note that the lower time bound is exclusive while
// the upper time bound is inclusive.
t.Run("intents", assertEqualKVs(e, fn, keyMin, keyMax, tsMin, ts3, kvs(kv1_3Deleted, kv1_2_2, kv1_1_1, kv2_2_2)))
t.Run("intents", assertEqualKVs(e, fn, keyMin, keyMax, ts4, tsMax, kvs()))
t.Run("intents", assertEqualKVs(e, fn, keyMin, keyMax, ts4.Next(), tsMax, kvs()))

intent1 := roachpb.Intent{Span: roachpb.Span{Key: testKey1}, Txn: txn1.TxnMeta, Status: roachpb.COMMITTED}
if err := engine.MVCCResolveWriteIntent(ctx, e, nil, intent1); err != nil {
Expand All @@ -266,7 +281,7 @@ func TestMVCCIterateIncremental(t *testing.T) {
if err := engine.MVCCResolveWriteIntent(ctx, e, nil, intent2); err != nil {
t.Fatal(err)
}
t.Run("intents4", assertEqualKVs(e, fn, keyMin, keyMax, tsMin, tsMax, kvs(kv1_4_4, kv1_3Deleted, kv1_2_2, kv1_1_1, kv2_2_2)))
t.Run("intents", assertEqualKVs(e, fn, keyMin, keyMax, tsMin, tsMax, kvs(kv1_4_4, kv1_3Deleted, kv1_2_2, kv1_1_1, kv2_2_2)))
})
}

Expand Down

0 comments on commit 765df40

Please sign in to comment.