diff --git a/pkg/storage/engine/mvcc.go b/pkg/storage/engine/mvcc.go index 16198f3d8853..d676687b5d53 100644 --- a/pkg/storage/engine/mvcc.go +++ b/pkg/storage/engine/mvcc.go @@ -303,32 +303,48 @@ func updateStatsOnResolve( commit bool, ) enginepb.MVCCStats { var ms enginepb.MVCCStats + + // NB: this is logging for ongoing work on #20554. + if false { + defer func() { + log.Infof(context.TODO(), "onResolve\n"+ + "orig: ts=%d metaKeySize=%d metaValSize=%d KeyBytes=%d ValBytes=%d\n"+ + "meta: ts=%d metaKeySize=%d metaValSize=%d KeyBytes=%d ValBytes=%d\n"+ + "%+v", + orig.Timestamp.WallTime, origMetaKeySize, origMetaValSize, orig.KeyBytes, orig.ValBytes, + meta.Timestamp.WallTime, metaKeySize, metaValSize, meta.KeyBytes, meta.ValBytes, + &ms) + }() + } + // In this case, we're only removing the contribution from having the // meta key around from orig.Timestamp to meta.Timestamp. ms.AgeTo(orig.Timestamp.WallTime) sys := isSysLocal(key) - // Always zero. - keyDiff := metaKeySize - origMetaKeySize - // This is going to be nonpositive: the old meta key was - // real, the new one is implicit. - valDiff := metaValSize - origMetaValSize - if sys { - ms.SysBytes += keyDiff + valDiff + ms.SysBytes += metaKeySize + metaValSize - origMetaValSize - origMetaKeySize + ms.AgeTo(meta.Timestamp.WallTime) } else { if !meta.Deleted { - ms.LiveBytes += keyDiff + valDiff + ms.LiveBytes += metaKeySize + metaValSize - origMetaValSize - origMetaKeySize } - ms.KeyBytes += keyDiff - ms.ValBytes += valDiff + // At orig.Timestamp, the original meta key disappears. + ms.KeyBytes -= origMetaKeySize + orig.KeyBytes + ms.ValBytes -= origMetaValSize + orig.ValBytes + // If committing, subtract out intent counts. if commit { ms.IntentBytes -= (meta.KeyBytes + meta.ValBytes) ms.IntentCount-- } + + ms.AgeTo(meta.Timestamp.WallTime) + + // At meta.Timestamp, the new meta key appears. + ms.KeyBytes += metaKeySize + meta.KeyBytes + ms.ValBytes += metaValSize + meta.ValBytes } - ms.AgeTo(meta.Timestamp.WallTime) return ms } diff --git a/pkg/storage/engine/mvcc_stats_test.go b/pkg/storage/engine/mvcc_stats_test.go index 7d788ff98e04..6f2d754fe791 100644 --- a/pkg/storage/engine/mvcc_stats_test.go +++ b/pkg/storage/engine/mvcc_stats_test.go @@ -34,50 +34,117 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/uuid" ) -func verifyStats(debug string, ms, expMS *enginepb.MVCCStats, t *testing.T) { +// assertEq compares the given ms and expMS and errors when they don't match. It +// also recomputes the stats over the whole engine with all known +// implementations and errors on mismatch with any of them. +func assertEq(t *testing.T, engine Engine, debug string, ms, expMS *enginepb.MVCCStats) { t.Helper() + if !ms.Equal(expMS) { t.Errorf("%s: diff(ms, expMS) = %s", debug, pretty.Diff(ms, expMS)) } + + it := engine.NewIterator(false) + defer it.Close() + from, to := MVCCKey{}, MVCCKey{Key: roachpb.KeyMax} + + for _, mvccStatsTest := range mvccStatsTests { + compMS, err := mvccStatsTest.fn(it, from, to, ms.LastUpdateNanos) + if err != nil { + t.Fatal(err) + } + if !compMS.Equal(*ms) { + t.Errorf("%s: diff(ms, %s) = %s", debug, mvccStatsTest.name, pretty.Diff(*ms, compMS)) + } + } + + if t.Failed() { + t.FailNow() + } } -// TestMVCCStatsBasic writes a value, then deletes it as an intent via -// a transaction, then resolves the intent, manually verifying the -// mvcc stats at each step. -func TestMVCCStatsBasic(t *testing.T) { +// TestMVCCStatsResolveMovesTimestamp exercises the case in which an intent has its timestamp changed +// and commits. +func TestMVCCStatsResolveMovesTimestamp(t *testing.T) { defer leaktest.AfterTest(t)() engine := createTestEngine() defer engine.Close() - ms := &enginepb.MVCCStats{} + ctx := context.Background() + aggMS := &enginepb.MVCCStats{} - assertEq := func(debug string, ms, expMS *enginepb.MVCCStats) { - t.Helper() + assertEq(t, engine, "initially", aggMS, &enginepb.MVCCStats{}) - verifyStats(debug, ms, expMS, t) + key := roachpb.Key("a") + ts1 := hlc.Timestamp{WallTime: 1E9} + // Put a value. + value := roachpb.MakeValueFromString("value") + if err := MVCCPut(context.Background(), engine, aggMS, key, ts1, value, nil); err != nil { + t.Fatal(err) + } - it := engine.NewIterator(false) - defer it.Close() - from, to := MVCCKey{}, MVCCKey{Key: roachpb.KeyMax} + mKeySize := int64(mvccKey(key).EncodedSize()) // 2 + vKeySize := mvccVersionTimestampSize // 12 + vValSize := int64(len(value.RawBytes)) // 10 - for _, mvccStatsTest := range mvccStatsTests { - compMS, err := mvccStatsTest.fn(it, from, to, ms.LastUpdateNanos) - if err != nil { - t.Fatal(err) - } - if !compMS.Equal(*ms) { - // TODO(tschottdorf): This unfortunately fails. Upgrade this to - // t.Errorf when the computation has been fixed. - t.Logf("%s: diff(ms, %s) = %s", debug, mvccStatsTest.name, pretty.Diff(*ms, compMS)) - } - } + log.Infof(ctx, "mKeySize=%d vKeySize=%d vValSize=%d", mKeySize, vKeySize, vValSize) - if t.Failed() { - t.FailNow() - } + expMS := enginepb.MVCCStats{ + LiveBytes: mKeySize + vKeySize + vValSize, // 24 + LiveCount: 1, + KeyBytes: mKeySize + vKeySize, // 14 + KeyCount: 1, + ValBytes: vValSize, // 10 + ValCount: 1, + LastUpdateNanos: 1E9, } + assertEq(t, engine, "after put", aggMS, &expMS) + + // Delete the value at ts=3. We'll commit this at ts=4 later. + ts3 := hlc.Timestamp{WallTime: 3 * 1E9} + txn := &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts3}} + txn.Timestamp.Forward(ts3) + if err := MVCCDelete(context.Background(), engine, aggMS, key, ts3, txn); err != nil { + t.Fatal(err) + } + + // Now commit the value, but with a timestamp gap (i.e. this is a + // push-commit as it would happen for a SNAPSHOT txn) + ts4 := hlc.Timestamp{WallTime: 4 * 1E9} + txn.Status = roachpb.COMMITTED + txn.Timestamp.Forward(ts4) + if err := MVCCResolveWriteIntent(context.Background(), engine, aggMS, roachpb.Intent{Span: roachpb.Span{Key: key}, Status: txn.Status, Txn: txn.TxnMeta}); err != nil { + t.Fatal(err) + } + + expAggMS := enginepb.MVCCStats{ + LastUpdateNanos: 4E9, + LiveBytes: 0, + LiveCount: 0, + KeyCount: 1, + ValCount: 2, + // The implicit meta record (deletion tombstone) counts for len("a")+1=2. + // Two versioned keys count for 2*vKeySize. + KeyBytes: mKeySize + 2*vKeySize, + ValBytes: vValSize, // the initial write (10)... + GCBytesAge: (vValSize + vKeySize) * 3, // ... along with the value (12) aged over 3s, a total of 66 + } + + assertEq(t, engine, "after committing", aggMS, &expAggMS) +} - assertEq("initially", ms, &enginepb.MVCCStats{}) +// TestMVCCStatsBasic writes a value, then deletes it as an intent via +// a transaction, then resolves the intent, manually verifying the +// mvcc stats at each step. +func TestMVCCStatsBasic(t *testing.T) { + defer leaktest.AfterTest(t)() + engine := createTestEngine() + defer engine.Close() + + ctx := context.Background() + ms := &enginepb.MVCCStats{} + + assertEq(t, engine, "initially", ms, &enginepb.MVCCStats{}) // Verify size of mvccVersionTimestampSize. ts := hlc.Timestamp{WallTime: 1 * 1E9} @@ -105,7 +172,7 @@ func TestMVCCStatsBasic(t *testing.T) { ValCount: 1, LastUpdateNanos: 1E9, } - assertEq("after put", ms, &expMS) + assertEq(t, engine, "after put", ms, &expMS) if e, a := int64(0), ms.GCBytes(); e != a { t.Fatalf("GCBytes: expected %d, got %d", e, a) } @@ -137,7 +204,7 @@ func TestMVCCStatsBasic(t *testing.T) { GCBytesAge: vValSize + vKeySize, // immediately recognizes GC'able bytes from old value at 1E9 LastUpdateNanos: 2E9, } - assertEq("after delete", ms, &expMS2) + assertEq(t, engine, "after delete", ms, &expMS2) // This is expMS2.KeyBytes + expMS2.ValBytes - expMS2.LiveBytes expGC2 := mKeySize + vKeySize + v2KeySize + m2ValSize + vValSize + v2ValSize if a := ms.GCBytes(); expGC2 != a { @@ -152,7 +219,7 @@ func TestMVCCStatsBasic(t *testing.T) { } // Stats should equal same as before the deletion after aborting the intent. expMS.LastUpdateNanos = 2E9 - assertEq("after abort", ms, &expMS) + assertEq(t, engine, "after abort", ms, &expMS) // Re-delete, but this time, we're going to commit it. txn.Status = roachpb.PENDING @@ -164,7 +231,7 @@ func TestMVCCStatsBasic(t *testing.T) { // GCBytesAge will now count the deleted value from ts=1E9 to ts=3E9. expMS2.GCBytesAge = (vValSize + vKeySize) * 2 expMS2.LastUpdateNanos = 3E9 - assertEq("after 2nd delete", ms, &expMS2) // should be same as before. + assertEq(t, engine, "after 2nd delete", ms, &expMS2) // should be same as before. if a := ms.GCBytes(); expGC2 != a { t.Fatalf("GCBytes: expected %d, got %d", expGC2, a) } @@ -203,7 +270,7 @@ func TestMVCCStatsBasic(t *testing.T) { } expGC3 := expGC2 // no change, didn't delete anything - assertEq("after 2nd put", ms, &expMS3) + assertEq(t, engine, "after 2nd put", ms, &expMS3) if a := ms.GCBytes(); expGC3 != a { t.Fatalf("GCBytes: expected %d, got %d", expGC3, a) } @@ -223,21 +290,14 @@ func TestMVCCStatsBasic(t *testing.T) { IntentBytes: vKey2Size + vVal2Size, IntentCount: 1, // The commit turned the explicit deletion intent meta back into an - // implicit one; so we see the originally written value which is now 3s - // old, plus the implicit meta key contribution (basically the key - // prefix) along with the deletion tombstone kv pair. - // You could also write this as - // (vValSize+vKeySize)*2 + (expGC3 - m2ValSize) - // as we do in the second commit. - // - // TODO(tschottdorf): the computation here is wrong; the result should be: - // GCBytesAge: (vValSize + vKeySize) * 3, - // The new deletion tombstone entry is at t=4 and so it contributes nothing to - // GCBytesAge just yet. - GCBytesAge: (vValSize+vKeySize)*3 + (mKeySize + v2ValSize + v2KeySize), + // implicit one, so we see the originally written value which is now 3s + // old. There is no contribution from the deletion tombstone yet as it + // moved from 3s (inside the meta) to 4s (implicit meta), the current + // time. + GCBytesAge: (vValSize + vKeySize) * 3, LastUpdateNanos: 4E9, } - assertEq("after first commit", ms, &expMS4) + assertEq(t, engine, "after first commit", ms, &expMS4) // With commit of the deletion intent, what really happens is that the // explicit meta (carrying the intent) becomes implicit (so its key @@ -258,10 +318,10 @@ func TestMVCCStatsBasic(t *testing.T) { LiveBytes: mKey2Size + vKey2Size + vVal2Size, LiveCount: 1, IntentAge: 0, - GCBytesAge: (vValSize+vKeySize)*2 + (expGC3 - m2ValSize), + GCBytesAge: (vValSize + vKeySize) * 3, // unchanged; still at 4s LastUpdateNanos: 4E9, } - assertEq("after second commit", ms, &expMS4) + assertEq(t, engine, "after second commit", ms, &expMS4) if a := ms.GCBytes(); expGC4 != a { // no change here t.Fatalf("GCBytes: expected %d, got %d", expGC4, a) } @@ -278,7 +338,7 @@ func TestMVCCStatsBasic(t *testing.T) { // The age increases: 6 seconds for each key2 and key. expMS5.GCBytesAge += (vKey2Size+vVal2Size)*6 + expGC4*6 expMS5.LastUpdateNanos = 10E9 - assertEq("after overwrite", ms, &expMS5) + assertEq(t, engine, "after overwrite", ms, &expMS5) // Write a transaction record which is a system-local key. txnKey := keys.TransactionKey(txn.Key, txn.ID) @@ -291,7 +351,7 @@ func TestMVCCStatsBasic(t *testing.T) { expMS6 := expMS5 expMS6.SysBytes += txnKeySize + txnValSize expMS6.SysCount++ - assertEq("after sys-local key", ms, &expMS6) + assertEq(t, engine, "after sys-local key", ms, &expMS6) } var mvccStatsTests = []struct { @@ -321,112 +381,101 @@ func TestMVCCStatsWithRandomRuns(t *testing.T) { seed := randutil.NewPseudoSeed() log.Infof(context.Background(), "using pseudo random number generator with seed %d", seed) - for _, mvccStatsTest := range mvccStatsTests { - t.Run(mvccStatsTest.name, func(t *testing.T) { - rng := rand.New(rand.NewSource(seed)) + rng := rand.New(rand.NewSource(seed)) - engine := createTestEngine() - defer engine.Close() - - ms := &enginepb.MVCCStats{} + engine := createTestEngine() + defer engine.Close() - // Now, generate a random sequence of puts, deletes and resolves. - // Each put and delete may or may not involve a txn. Resolves may - // either commit or abort. - keys := map[int32][]byte{} - var lastWT int64 - for i := int32(0); i < int32(1000); i++ { - // Create random future timestamp, up to a few seconds ahead. - ts := hlc.Timestamp{WallTime: lastWT + int64(rng.Float32()*4E9), Logical: int32(rng.Int())} - lastWT = ts.WallTime + ms := &enginepb.MVCCStats{} - if log.V(1) { - log.Infof(context.Background(), "*** cycle %d @ %s", i, ts) - } - // Manually advance aggregate intent age based on one extra second of simulation. - // Same for aggregate gc'able bytes age. - key := []byte(fmt.Sprintf("%s-%d", randutil.RandBytes(rng, int(rng.Int31n(32))), i)) - keys[i] = key - - var txn *roachpb.Transaction - if rng.Int31n(2) == 0 { // create a txn with 50% prob - txn = &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts}} - } - // With 25% probability, put a new value; otherwise, delete an earlier - // key. Because an earlier step in this process may have itself been - // a delete, we could end up deleting a non-existent key, which is good; - // we don't mind testing that case as well. - isDelete := rng.Int31n(4) == 0 - if i > 0 && isDelete { - idx := rng.Int31n(i) - if log.V(1) { - log.Infof(context.Background(), "*** DELETE index %d", idx) - } - if err := MVCCDelete(context.Background(), engine, ms, keys[idx], ts, txn); err != nil { - // Abort any write intent on an earlier, unresolved txn. - if wiErr, ok := err.(*roachpb.WriteIntentError); ok { - wiErr.Intents[0].Status = roachpb.ABORTED - if log.V(1) { - log.Infof(context.Background(), "*** ABORT index %d", idx) - } - // Note that this already incorporates committing an intent - // at a later time (since we use a potentially later ts here - // for the resolution). - if err := MVCCResolveWriteIntent(context.Background(), engine, ms, wiErr.Intents[0]); err != nil { - t.Fatal(err) - } - // Now, re-delete. - if log.V(1) { - log.Infof(context.Background(), "*** RE-DELETE index %d", idx) - } - if err := MVCCDelete(context.Background(), engine, ms, keys[idx], ts, txn); err != nil { - t.Fatal(err) - } - } else { - t.Fatal(err) - } - } - } else { - rngVal := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, int(rng.Int31n(128)))) + // Now, generate a random sequence of puts, deletes and resolves. + // Each put and delete may or may not involve a txn. Resolves may + // either commit or abort. + keys := map[int32][]byte{} + var lastWT int64 + for i := int32(0); i < int32(1000); i++ { + // Create random future timestamp, up to a few seconds ahead. + ts := hlc.Timestamp{WallTime: lastWT + int64(rng.Float32()*4E9), Logical: int32(rng.Int())} + lastWT = ts.WallTime + + if log.V(1) { + log.Infof(context.Background(), "*** cycle %d @ %s", i, ts) + } + // Manually advance aggregate intent age based on one extra second of simulation. + // Same for aggregate gc'able bytes age. + key := []byte(fmt.Sprintf("%s-%d", randutil.RandBytes(rng, int(rng.Int31n(32))), i)) + keys[i] = key + + var txn *roachpb.Transaction + if rng.Int31n(2) == 0 { // create a txn with 50% prob + txn = &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts}} + } + // With 25% probability, put a new value; otherwise, delete an earlier + // key. Because an earlier step in this process may have itself been + // a delete, we could end up deleting a non-existent key, which is good; + // we don't mind testing that case as well. + isDelete := rng.Int31n(4) == 0 + if i > 0 && isDelete { + idx := rng.Int31n(i) + if log.V(1) { + log.Infof(context.Background(), "*** DELETE index %d", idx) + } + if err := MVCCDelete(context.Background(), engine, ms, keys[idx], ts, txn); err != nil { + // Abort any write intent on an earlier, unresolved txn. + if wiErr, ok := err.(*roachpb.WriteIntentError); ok { + wiErr.Intents[0].Status = roachpb.ABORTED if log.V(1) { - log.Infof(context.Background(), "*** PUT index %d; TXN=%t", i, txn != nil) + log.Infof(context.Background(), "*** ABORT index %d", idx) } - if err := MVCCPut(context.Background(), engine, ms, key, ts, rngVal, txn); err != nil { + // Note that this already incorporates committing an intent + // at a later time (since we use a potentially later ts here + // for the resolution). + if err := MVCCResolveWriteIntent(context.Background(), engine, ms, wiErr.Intents[0]); err != nil { t.Fatal(err) } - } - if !isDelete && txn != nil && rng.Int31n(2) == 0 { // resolve txn with 50% prob - // TODO(tschottdorf): need to simulate resolving at a pushed timestamp. - txn.Status = roachpb.COMMITTED - if rng.Int31n(10) == 0 { // abort txn with 10% prob - txn.Status = roachpb.ABORTED - } + // Now, re-delete. if log.V(1) { - log.Infof(context.Background(), "*** RESOLVE index %d; COMMIT=%t", i, txn.Status == roachpb.COMMITTED) + log.Infof(context.Background(), "*** RE-DELETE index %d", idx) } - if err := MVCCResolveWriteIntent(context.Background(), engine, ms, roachpb.Intent{Span: roachpb.Span{Key: key}, Status: txn.Status, Txn: txn.TxnMeta}); err != nil { + if err := MVCCDelete(context.Background(), engine, ms, keys[idx], ts, txn); err != nil { t.Fatal(err) } + } else { + t.Fatal(err) } + } + } else { + rngVal := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, int(rng.Int31n(128)))) + if log.V(1) { + log.Infof(context.Background(), "*** PUT index %d; TXN=%t", i, txn != nil) + } + if err := MVCCPut(context.Background(), engine, ms, key, ts, rngVal, txn); err != nil { + t.Fatal(err) + } + } + if !isDelete && txn != nil && rng.Int31n(2) == 0 { // resolve txn with 50% prob + // TODO(tschottdorf): need to simulate resolving at a pushed timestamp. + txn.Status = roachpb.COMMITTED + if rng.Int31n(10) == 0 { // abort txn with 10% prob + txn.Status = roachpb.ABORTED + } + if log.V(1) { + log.Infof(context.Background(), "*** RESOLVE index %d; COMMIT=%t", i, txn.Status == roachpb.COMMITTED) + } + if err := MVCCResolveWriteIntent(context.Background(), engine, ms, roachpb.Intent{Span: roachpb.Span{Key: key}, Status: txn.Status, Txn: txn.TxnMeta}); err != nil { + t.Fatal(err) + } + } - ms.AgeTo(ts.WallTime) // a noop may not have updated the stats - // Every 10th step, verify the stats via manual engine scan. - if i%10 == 0 { - // Compute the stats manually. - iter := engine.NewIterator(false) - expMS, err := mvccStatsTest.fn(iter, mvccKey(roachpb.KeyMin), - mvccKey(roachpb.KeyMax), ts.WallTime) - iter.Close() - if err != nil { - t.Fatal(err) - } - verifyStats(fmt.Sprintf("cycle %d", i), ms, &expMS, t) - if t.Failed() { - t.Fatal("giving up") - } - } + ms.AgeTo(ts.WallTime) // a noop may not have updated the stats + // Every 10th step, verify the stats via manual engine scan. + if i%10 == 0 { + // Recompute the stats and compare. + assertEq(t, engine, fmt.Sprintf("cycle %d", i), ms, ms) + if t.Failed() { + t.Fatal("giving up") } - }) + } } } diff --git a/pkg/storage/engine/mvcc_test.go b/pkg/storage/engine/mvcc_test.go index 7ee9dc22c170..639a53c67434 100644 --- a/pkg/storage/engine/mvcc_test.go +++ b/pkg/storage/engine/mvcc_test.go @@ -3514,7 +3514,7 @@ func TestMVCCGarbageCollect(t *testing.T) { if err != nil { t.Fatal(err) } - verifyStats("verification", ms, &expMS, t) + assertEq(t, engine, "verification", ms, &expMS) }) } }