From 6515f70c27d5c24a148a263daebfe1e4d3b45243 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Mon, 17 Jul 2017 13:41:00 -0400 Subject: [PATCH] storage: in race build, assert HardState integrity Motivated by #16749. Added an assertion that catches HardState clobbering. Now ``` make stressrace PKG=./pkg/storage/ TESTS=TestStoreRangeSplitRaceUninitializedRHS ``` fails immediately with ``` clobbered hard state: [Term: 8 != 9 Commit: 10 != 0] previously: raftpb.HardState{ Term: 0x9, Vote: 0x2, Commit: 0x0, XXX_unrecognized: nil, } overwritten with: raftpb.HardState{ Term: 0x8, Vote: 0x2, Commit: 0xa, XXX_unrecognized: nil, } ``` which is fixed in the next commit in this PR. --- pkg/storage/replica.go | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pkg/storage/replica.go b/pkg/storage/replica.go index 5f7aa53e035b..b0c21daf66fb 100644 --- a/pkg/storage/replica.go +++ b/pkg/storage/replica.go @@ -4392,10 +4392,33 @@ func (r *Replica) applyRaftCommand( writer.Close() start := timeutil.Now() + + var assertHS *raftpb.HardState + if util.RaceEnabled && rResult.Split != nil { + oldHS, err := loadHardState(ctx, r.store.Engine(), rResult.Split.RightDesc.RangeID) + if err != nil { + log.Fatalf(ctx, "unable to load HardState: %s", err) + } + assertHS = &oldHS + } if err := batch.Commit(false); err != nil { return enginepb.MVCCStats{}, roachpb.NewError(NewReplicaCorruptionError( errors.Wrap(err, "could not commit batch"))) } + + if assertHS != nil { + // Load the HardState that was just committed (if any). + newHS, err := loadHardState(ctx, r.store.Engine(), rResult.Split.RightDesc.RangeID) + if err != nil { + panic(err) + } + // Assert that nothing moved "backwards". + if newHS.Term < assertHS.Term || (newHS.Term == assertHS.Term && newHS.Commit < assertHS.Commit) { + log.Fatalf(ctx, "clobbered hard state: %s\n\npreviously: %s\noverwritten with: %s", + pretty.Diff(newHS, *assertHS), pretty.Sprint(*assertHS), pretty.Sprint(newHS)) + } + } + elapsed := timeutil.Since(start) r.store.metrics.RaftCommandCommitLatency.RecordValue(elapsed.Nanoseconds()) return rResult.Delta, nil