Skip to content

Commit

Permalink
Merge #88969 #89207
Browse files Browse the repository at this point in the history
88969: sql: address minor typos in recent overflow fix r=mgartner a=mgartner

This commit fixes minor typos introduced in #88199.

Release note: None

89207: kvserver: rm consistency check diff report in tests r=erikgrinaker a=pavelkalinnikov

This change removes the `BadChecksumReportDiff` testing knob.

Previously, a node that runs a consistency check would report any diffs through this callback to tests. However, now the plan is to move away from computing the diff, and instead leave storage engine checkpoints on each replica so that later they can be analysed/diffed by tooling.

When this plan is implemented, the initiating node will no longer be able to report diffs into the testing knob. This commit proactively refactors a test in such a way that it doesn't need the knob, and verifies the diff by reading from the storage directly. This approximates what we will do with tooling too.

Touches #21128

Release note: None

Co-authored-by: Marcus Gartner <[email protected]>
Co-authored-by: Pavel Kalinnikov <[email protected]>
  • Loading branch information
3 people committed Oct 4, 2022
3 parents 60928a6 + cebb8ab + 706823e commit 619ca6e
Show file tree
Hide file tree
Showing 10 changed files with 91 additions and 115 deletions.
9 changes: 3 additions & 6 deletions pkg/kv/kvserver/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,11 @@ func getArgs(key roachpb.Key) *roachpb.GetRequest {
}
}

// putArgs returns a PutRequest and PutResponse pair addressed to
// the default replica for the specified key / value.
// putArgs returns a PutRequest for the specified key / value.
func putArgs(key roachpb.Key, value []byte) *roachpb.PutRequest {
return &roachpb.PutRequest{
RequestHeader: roachpb.RequestHeader{
Key: key,
},
Value: roachpb.MakeValueFromBytes(value),
RequestHeader: roachpb.RequestHeader{Key: key},
Value: roachpb.MakeValueFromBytes(value),
}
}

Expand Down
150 changes: 60 additions & 90 deletions pkg/kv/kvserver/consistency_queue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
package kvserver_test

import (
"bytes"
"context"
"fmt"
io "io"
Expand All @@ -22,7 +21,6 @@ import (
"time"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
Expand Down Expand Up @@ -239,59 +237,17 @@ func TestCheckConsistencyInconsistent(t *testing.T) {
stickyEngineRegistry := server.NewStickyInMemEnginesRegistry()
defer stickyEngineRegistry.CloseAllStickyInMemEngines()

// The cluster has 3 node, with 1 store per node. The test writes a few KVs to
// a range, which gets replicated to all 3 stores. Then it manually replaces
// an entry in s2. The consistency check must detect this and terminate n2/s2.
const numStores = 3
testKnobs := kvserver.StoreTestingKnobs{
DisableConsistencyQueue: true,
}

testKnobs := kvserver.StoreTestingKnobs{DisableConsistencyQueue: true}
var tc *testcluster.TestCluster

// s1 will report a diff with inconsistent key "e", and only s2 has that
// write (s3 agrees with s1).
diffKey := []byte("e")
var diffTimestamp hlc.Timestamp
notifyReportDiff := make(chan struct{})
testKnobs.ConsistencyTestingKnobs.BadChecksumReportDiff =
func(s roachpb.StoreIdent, diff kvserver.ReplicaSnapshotDiffSlice) {
rangeDesc := tc.LookupRangeOrFatal(t, diffKey)
repl, err := tc.FindRangeLeaseHolder(rangeDesc, nil)
require.NoError(t, err)
// Servers start at 0, but NodeID starts at 1.
store, err := tc.Servers[repl.NodeID-1].Stores().GetStore(repl.StoreID)
require.NoError(t, err)
if !assert.Equal(t, *store.Ident, s) {
return
}
if !assert.Len(t, diff, 1) {
return
}
d := diff[0]
if d.LeaseHolder || !bytes.Equal(diffKey, d.Key) || diffTimestamp != d.Timestamp {
t.Errorf("diff = %v", d)
}

// mock this out for a consistent string below.
diff[0].Timestamp = hlc.Timestamp{Logical: 987, WallTime: 123}

act := diff.String()

exp := `--- leaseholder
+++ follower
+0.000000123,987 "e"
+ ts:1970-01-01 00:00:00.000000123 +0000 UTC
+ value:"\x00\x00\x00\x00\x01T"
+ raw mvcc_key/value: 6500000000000000007b000003db0d 000000000154
`
assert.Equal(t, exp, act)
close(notifyReportDiff)
}
// s2 (index 1) will panic.
notifyFatal := make(chan struct{})
testKnobs.ConsistencyTestingKnobs.OnBadChecksumFatal = func(s roachpb.StoreIdent) {
store := tc.GetFirstStoreFromServer(t, 1)
if !assert.Equal(t, *store.Ident, s) {
return
}
store := tc.GetFirstStoreFromServer(t, 1) // only s2 must terminate
require.Equal(t, *store.Ident, s)
close(notifyFatal)
}

Expand All @@ -315,27 +271,23 @@ func TestCheckConsistencyInconsistent(t *testing.T) {
})
defer tc.Stopper().Stop(context.Background())

store := tc.GetFirstStoreFromServer(t, 0)
// Write something to the DB.
pArgs := putArgs([]byte("a"), []byte("b"))
if _, err := kv.SendWrapped(context.Background(), store.DB().NonTransactionalSender(), pArgs); err != nil {
t.Fatal(err)
}
pArgs = putArgs([]byte("c"), []byte("d"))
if _, err := kv.SendWrapped(context.Background(), store.DB().NonTransactionalSender(), pArgs); err != nil {
t.Fatal(err)
store := tc.GetFirstStoreFromServer(t, 0)
for k, v := range map[string]string{"a": "b", "c": "d"} {
_, err := kv.SendWrapped(context.Background(), store.DB().NonTransactionalSender(),
putArgs([]byte(k), []byte(v)))
require.NoError(t, err.GoError())
}

runConsistencyCheck := func() *roachpb.CheckConsistencyResponse {
checkArgs := roachpb.CheckConsistencyRequest{
RequestHeader: roachpb.RequestHeader{
// span of keys that include "a" & "c".
req := roachpb.CheckConsistencyRequest{
RequestHeader: roachpb.RequestHeader{ // keys span that includes "a" & "c"
Key: []byte("a"),
EndKey: []byte("z"),
},
Mode: roachpb.ChecksumMode_CHECK_VIA_QUEUE,
}
resp, err := kv.SendWrapped(context.Background(), store.DB().NonTransactionalSender(), &checkArgs)
resp, err := kv.SendWrapped(context.Background(), store.DB().NonTransactionalSender(), &req)
require.NoError(t, err.GoError())
return resp.(*roachpb.CheckConsistencyResponse)
}
Expand All @@ -355,17 +307,18 @@ func TestCheckConsistencyInconsistent(t *testing.T) {
}

// Run the check the first time, it shouldn't find anything.
respOK := runConsistencyCheck()
assert.Len(t, respOK.Result, 1)
assert.Equal(t, roachpb.CheckConsistencyResponse_RANGE_CONSISTENT, respOK.Result[0].Status)
resp := runConsistencyCheck()
require.Len(t, resp.Result, 1)
assert.Equal(t, roachpb.CheckConsistencyResponse_RANGE_CONSISTENT, resp.Result[0].Status)
select {
case <-notifyReportDiff:
t.Fatal("unexpected diff")
case <-notifyFatal:
t.Fatal("unexpected panic")
default:
case <-time.After(time.Second): // the panic may come asynchronously
// TODO(pavelkalinnikov): track the full lifecycle of the check in testKnobs
// on each replica, to make checks more reliable. E.g., instead of waiting
// for panic, ensure the task has started, wait until it exits, and only
// then check whether it panicked.
}

// No checkpoints should have been created.
for i := 0; i < numStores; i++ {
assert.Empty(t, onDiskCheckpointPaths(i))
Expand All @@ -375,28 +328,29 @@ func TestCheckConsistencyInconsistent(t *testing.T) {
store1 := tc.GetFirstStoreFromServer(t, 1)
var val roachpb.Value
val.SetInt(42)
diffTimestamp = tc.Server(0).Clock().Now()
// Put an inconsistent key "e" to s2, and have s1 and s3 still agree.
require.NoError(t, storage.MVCCPut(context.Background(), store1.Engine(), nil,
diffKey, diffTimestamp, hlc.ClockTimestamp{}, val, nil))
roachpb.Key("e"), tc.Server(0).Clock().Now(), hlc.ClockTimestamp{}, val, nil))

// Run consistency check again, this time it should find something.
resp := runConsistencyCheck()

select {
case <-notifyReportDiff:
case <-time.After(5 * time.Second):
t.Fatal("CheckConsistency() failed to report a diff as expected")
}
resp = runConsistencyCheck()
select {
case <-notifyFatal:
case <-time.After(5 * time.Second):
t.Fatal("CheckConsistency() failed to panic as expected")
}

// Checkpoints should have been created on all stores and they're not empty.
require.Len(t, resp.Result, 1)
assert.Equal(t, roachpb.CheckConsistencyResponse_RANGE_INCONSISTENT, resp.Result[0].Status)
assert.Contains(t, resp.Result[0].Detail, `[minority]`)
assert.Contains(t, resp.Result[0].Detail, `stats`)

// Checkpoints should have been created on all stores. Load replica snapshots
// from them.
snaps := make([]roachpb.RaftSnapshotData, numStores)
for i := 0; i < numStores; i++ {
cps := onDiskCheckpointPaths(i)
assert.Len(t, cps, 1)
require.Len(t, cps, 1)
metric := tc.GetFirstStoreFromServer(t, i).Metrics().RdbCheckpoints
testutils.SucceedsSoon(t, func() error {
if got, want := metric.Value(), int64(1); got != want {
Expand All @@ -405,25 +359,41 @@ func TestCheckConsistencyInconsistent(t *testing.T) {
return nil
})

// Create a new store on top of checkpoint location inside existing in mem
// Create a new store on top of checkpoint location inside existing in-mem
// VFS to verify its contents.
fs, err := stickyEngineRegistry.GetUnderlyingFS(base.StoreSpec{StickyInMemoryEngineID: strconv.FormatInt(int64(i), 10)})
assert.NoError(t, err)
cpEng := storage.InMemFromFS(context.Background(), fs, cps[0], storage.CacheSize(1<<20))
defer cpEng.Close()

// The range is specified using only global keys, since the implementation
// may use an intentInterleavingIter.
ms, err := storage.ComputeStats(cpEng, keys.LocalMax, roachpb.KeyMax, 0 /* nowNanos */)
assert.NoError(t, err)
// Find the problematic range in the storage.
var desc *roachpb.RangeDescriptor
require.NoError(t, kvserver.IterateRangeDescriptorsFromDisk(context.Background(), cpEng,
func(rd roachpb.RangeDescriptor) error {
if rd.RangeID == resp.Result[0].RangeID {
desc = &rd
}
return nil
}))
require.NotNil(t, desc)

assert.NotZero(t, ms.KeyBytes)
// Load the content of the problematic range.
snap, err := kvserver.LoadRaftSnapshotDataForTesting(context.Background(), *desc, cpEng)
require.NoError(t, err)
snaps[i] = snap
}

assert.Len(t, resp.Result, 1)
assert.Equal(t, roachpb.CheckConsistencyResponse_RANGE_INCONSISTENT, resp.Result[0].Status)
assert.Contains(t, resp.Result[0].Detail, `[minority]`)
assert.Contains(t, resp.Result[0].Detail, `stats`)
assert.Empty(t, kvserver.DiffRange(&snaps[0], &snaps[2])) // s1 and s3 agree
diff := kvserver.DiffRange(&snaps[0], &snaps[1])
diff[0].Timestamp = hlc.Timestamp{Logical: 987, WallTime: 123} // for determinism
wantDiff := `--- leaseholder
+++ follower
+0.000000123,987 "e"
+ ts:1970-01-01 00:00:00.000000123 +0000 UTC
+ value:"\x00\x00\x00\x00\x01T"
+ raw mvcc_key/value: 6500000000000000007b000003db0d 000000000154
`
assert.Equal(t, wantDiff, diff.String())

// A death rattle should have been written on s2 (store index 1).
eng := store1.Engine()
Expand Down
19 changes: 15 additions & 4 deletions pkg/kv/kvserver/replica_consistency.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,7 @@ func (r *Replica) checkConsistencyImpl(
minoritySnap := results[shaToIdxs[minoritySHA][0]].Response.Snapshot
curSnap := results[shaToIdxs[sha][0]].Response.Snapshot
if sha != minoritySHA && minoritySnap != nil && curSnap != nil {
diff := diffRange(curSnap, minoritySnap)
if report := r.store.cfg.TestingKnobs.ConsistencyTestingKnobs.BadChecksumReportDiff; report != nil {
report(*r.store.Ident, diff)
}
diff := DiffRange(curSnap, minoritySnap)
buf.Printf("====== diff(%x, [minority]) ======\n%v", redact.Safe(sha), diff)
}
}
Expand Down Expand Up @@ -494,6 +491,20 @@ type replicaHash struct {
PersistedMS, RecomputedMS enginepb.MVCCStats
}

// LoadRaftSnapshotDataForTesting returns all the KV data of the given range.
// Only for testing.
func LoadRaftSnapshotDataForTesting(
ctx context.Context, rd roachpb.RangeDescriptor, store storage.Reader,
) (roachpb.RaftSnapshotData, error) {
var r *Replica
var snap roachpb.RaftSnapshotData
lim := quotapool.NewRateLimiter("test", 1<<20, 1<<20)
if _, err := r.sha512(ctx, rd, store, &snap, roachpb.ChecksumMode_CHECK_FULL, lim); err != nil {
return roachpb.RaftSnapshotData{}, err
}
return snap, nil
}

// sha512 computes the SHA512 hash of all the replica data at the snapshot.
// It will dump all the kv data into snapshot if it is provided.
func (*Replica) sha512(
Expand Down
4 changes: 2 additions & 2 deletions pkg/kv/kvserver/replica_consistency_diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ func (rsds ReplicaSnapshotDiffSlice) String() string {
return redact.StringWithoutMarkers(rsds)
}

// diffs the two kv dumps between the lease holder and the replica.
func diffRange(l, r *roachpb.RaftSnapshotData) ReplicaSnapshotDiffSlice {
// DiffRange diffs the two KV dumps between the leaseholder and the replica.
func DiffRange(l, r *roachpb.RaftSnapshotData) ReplicaSnapshotDiffSlice {
if l == nil || r == nil {
return nil
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/kv/kvserver/replica_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7559,7 +7559,7 @@ func TestDiffRange(t *testing.T) {
// TODO(tschottdorf): this test should really pass the data through a
// RocksDB engine to verify that the original snapshots sort correctly.

require.Empty(t, diffRange(nil, nil))
require.Empty(t, DiffRange(nil, nil))

timestamp := hlc.Timestamp{WallTime: 1729, Logical: 1}
value := []byte("foo")
Expand Down Expand Up @@ -7598,7 +7598,7 @@ func TestDiffRange(t *testing.T) {
}

// No diff works.
require.Empty(t, diffRange(leaderSnapshot, leaderSnapshot))
require.Empty(t, DiffRange(leaderSnapshot, leaderSnapshot))

replicaSnapshot := &roachpb.RaftSnapshotData{
KV: []roachpb.RaftSnapshotData_KeyValue{
Expand Down Expand Up @@ -7650,7 +7650,7 @@ func TestDiffRange(t *testing.T) {
{LeaseHolder: false, Key: []byte("X"), EndKey: []byte("Z"), Timestamp: timestamp.Add(0, 1)},
{LeaseHolder: true, Key: []byte("X"), EndKey: []byte("Z"), Timestamp: timestamp},
}
diff := diffRange(leaderSnapshot, replicaSnapshot)
diff := DiffRange(leaderSnapshot, replicaSnapshot)
require.Equal(t, eDiff, diff)

// Assert the stringifed output. This is what the consistency checker
Expand Down
4 changes: 1 addition & 3 deletions pkg/kv/kvserver/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -1133,9 +1133,7 @@ type ConsistencyTestingKnobs struct {
// If non-nil, OnBadChecksumFatal is called by CheckConsistency() (instead of
// calling log.Fatal) on a checksum mismatch.
OnBadChecksumFatal func(roachpb.StoreIdent)
// If non-nil, BadChecksumReportDiff is called by CheckConsistency() on a
// checksum mismatch to report the diff between snapshots.
BadChecksumReportDiff func(roachpb.StoreIdent, ReplicaSnapshotDiffSlice)

ConsistencyQueueResultHook func(response roachpb.CheckConsistencyResponse)
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/logictest/testdata/logic_test/overflow
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,6 @@ true
# constant addend to the right side of the comparison when the subtraction would
# overflow.
query B
SELECT i + 1000 > -9223372036854775800 FROM t88128
SELECT i + (-1000) < 9223372036854775800 FROM t88128
----
true
6 changes: 3 additions & 3 deletions pkg/sql/opt/norm/comp_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,16 @@ func (c *CustomFuncs) CommuteInequality(

// FoldBinaryCheckOverflow attempts to evaluate a binary expression with
// constant inputs. The only operations supported are plus and minus. It returns
// a constant expression as if all the following criteria are met:
// a constant expression if all the following criteria are met:
//
// 1. The right datum is an integer, float, decimal, or interval. This
// restriction can be lifted for any type that we can construct a zero value
// of. The zero value of the right type is required in order to check for
// overflow/underflow (see #4).
// overflow/underflow (see #5).
// 2. An overload function for the given operator and input types exists and
// has an appropriate volatility.
// 3. The result type of the overload is equivalent to the type of left. This
// is required in order to check for overflow/underflow (see #4).
// is required in order to check for overflow/underflow (see #5).
// 4. The evaluation causes no error.
// 5. The evaluation does not overflow or underflow.
//
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/norm/rules/comp.opt
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@
# 1. $leftRight is an integer, float, decimal, or interval. This restriction
# can be lifted for any type that we can construct a zero value of. The
# zero value of the right type is required in order to check for
# overflow/underflow (see #4).
# overflow/underflow (see #5).
# 2. A Minus overload for the given input types exists and has an appropriate
# volatility.
# 3. The result type of the overload is equivalent to the type of $right. This
# is required in order to check for overflow/underflow (see #4).
# is required in order to check for overflow/underflow (see #5).
# 4. The evaluation of the Minus operator causes no error.
# 5. The evaluation of the Minus operator does not overflow or underflow.
#
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/sem/tree/datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -3031,7 +3031,7 @@ type DInterval struct {
duration.Duration
}

// DZeroInterval is the zero-valued DTimestamp.
// DZeroInterval is the zero-valued DInterval.
var DZeroInterval = &DInterval{}

// AsDInterval attempts to retrieve a DInterval from an Expr, panicking if the
Expand Down

0 comments on commit 619ca6e

Please sign in to comment.