Skip to content

Commit

Permalink
tests: Fix member id in CORRUPT alarm
Browse files Browse the repository at this point in the history
Signed-off-by: Marek Siarkowicz <[email protected]>
  • Loading branch information
serathius committed Sep 7, 2022
1 parent 5660bf0 commit 2ddb9e0
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 24 deletions.
27 changes: 13 additions & 14 deletions server/etcdserver/corrupt.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ type Hasher interface {
MemberId() types.ID
PeerHashByRev(int64) []*peerHashKVResp
LinearizableReadNotify(context.Context) error
TriggerCorruptAlarm(uint64)
TriggerCorruptAlarm(types.ID)
}

func newCorruptionChecker(lg *zap.Logger, s *EtcdServer, storage mvcc.HashStorage) *corruptionChecker {
Expand All @@ -83,7 +83,7 @@ func (h hasherAdapter) PeerHashByRev(rev int64) []*peerHashKVResp {
return h.EtcdServer.getPeerHashKVs(rev)
}

func (h hasherAdapter) TriggerCorruptAlarm(memberID uint64) {
func (h hasherAdapter) TriggerCorruptAlarm(memberID types.ID) {
h.EtcdServer.triggerCorruptAlarm(memberID)
}

Expand Down Expand Up @@ -189,7 +189,7 @@ func (cm *corruptionChecker) PeriodicCheck() error {
}

alarmed := false
mismatch := func(id uint64) {
mismatch := func(id types.ID) {
if alarmed {
return
}
Expand All @@ -207,7 +207,7 @@ func (cm *corruptionChecker) PeriodicCheck() error {
zap.Int64("compact-revision-2", h2.CompactRevision),
zap.Uint32("hash-2", h2.Hash),
)
mismatch(uint64(cm.hasher.MemberId()))
mismatch(cm.hasher.MemberId())
}

checkedCount := 0
Expand All @@ -216,17 +216,16 @@ func (cm *corruptionChecker) PeriodicCheck() error {
continue
}
checkedCount++
id := p.resp.Header.MemberId

// leader expects follower's latest revision less than or equal to leader's
if p.resp.Header.Revision > rev2 {
cm.lg.Warn(
"revision from follower must be less than or equal to leader's",
zap.Int64("leader-revision", rev2),
zap.Int64("follower-revision", p.resp.Header.Revision),
zap.String("follower-peer-id", types.ID(id).String()),
zap.String("follower-peer-id", p.id.String()),
)
mismatch(id)
mismatch(p.id)
}

// leader expects follower's latest compact revision less than or equal to leader's
Expand All @@ -235,9 +234,9 @@ func (cm *corruptionChecker) PeriodicCheck() error {
"compact revision from follower must be less than or equal to leader's",
zap.Int64("leader-compact-revision", h2.CompactRevision),
zap.Int64("follower-compact-revision", p.resp.CompactRevision),
zap.String("follower-peer-id", types.ID(id).String()),
zap.String("follower-peer-id", p.id.String()),
)
mismatch(id)
mismatch(p.id)
}

// follower's compact revision is leader's old one, then hashes must match
Expand All @@ -248,9 +247,9 @@ func (cm *corruptionChecker) PeriodicCheck() error {
zap.Uint32("leader-hash", h.Hash),
zap.Int64("follower-compact-revision", p.resp.CompactRevision),
zap.Uint32("follower-hash", p.resp.Hash),
zap.String("follower-peer-id", types.ID(id).String()),
zap.String("follower-peer-id", p.id.String()),
)
mismatch(id)
mismatch(p.id)
}
}
cm.lg.Info("finished peer corruption check", zap.Int("number-of-peers-checked", checkedCount))
Expand All @@ -277,7 +276,7 @@ func (cm *corruptionChecker) CompactHashCheck() {

// follower's compact revision is leader's old one, then hashes must match
if p.resp.Hash != hash.Hash {
cm.hasher.TriggerCorruptAlarm(uint64(p.id))
cm.hasher.TriggerCorruptAlarm(p.id)
cm.lg.Error("failed compaction hash check",
zap.Int64("revision", hash.Revision),
zap.Int64("leader-compact-revision", hash.CompactRevision),
Expand Down Expand Up @@ -335,9 +334,9 @@ func (cm *corruptionChecker) uncheckedRevisions() []mvcc.KeyValueHash {
return hashes
}

func (s *EtcdServer) triggerCorruptAlarm(id uint64) {
func (s *EtcdServer) triggerCorruptAlarm(id types.ID) {
a := &pb.AlarmRequest{
MemberID: id,
MemberID: uint64(id),
Action: pb.AlarmRequest_ACTIVATE,
Alarm: pb.AlarmType_CORRUPT,
}
Expand Down
12 changes: 6 additions & 6 deletions server/etcdserver/corrupt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,15 +161,15 @@ func TestPeriodicCheck(t *testing.T) {
{
name: "Peer with newer revision",
hasher: fakeHasher{
peerHashes: []*peerHashKVResp{{resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 1, MemberId: 42}}}},
peerHashes: []*peerHashKVResp{{peerInfo: peerInfo{id: 42}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 1}}}},
},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(42)"},
expectCorrupt: true,
},
{
name: "Peer with newer compact revision",
hasher: fakeHasher{
peerHashes: []*peerHashKVResp{{resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 10, MemberId: 88}, CompactRevision: 2}}},
peerHashes: []*peerHashKVResp{{peerInfo: peerInfo{id: 88}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 10}, CompactRevision: 2}}},
},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(88)"},
expectCorrupt: true,
Expand All @@ -186,7 +186,7 @@ func TestPeriodicCheck(t *testing.T) {
name: "Peer with different hash and same compact revision as first local",
hasher: fakeHasher{
hashByRevResponses: []hashByRev{{hash: mvcc.KeyValueHash{Hash: 1, CompactRevision: 1}, revision: 1}, {hash: mvcc.KeyValueHash{Hash: 2, CompactRevision: 2}, revision: 2}},
peerHashes: []*peerHashKVResp{{resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 1, MemberId: 666}, CompactRevision: 1, Hash: 2}}},
peerHashes: []*peerHashKVResp{{peerInfo: peerInfo{id: 666}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 1}, CompactRevision: 1, Hash: 2}}},
},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(1)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(666)"},
expectCorrupt: true,
Expand All @@ -195,8 +195,8 @@ func TestPeriodicCheck(t *testing.T) {
name: "Multiple corrupted peers trigger one alarm",
hasher: fakeHasher{
peerHashes: []*peerHashKVResp{
{resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 10, MemberId: 88}, CompactRevision: 2}},
{resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 10, MemberId: 89}, CompactRevision: 2}},
{peerInfo: peerInfo{id: 88}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 10}, CompactRevision: 2}},
{peerInfo: peerInfo{id: 89}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 10}, CompactRevision: 2}},
},
},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(88)"},
Expand Down Expand Up @@ -371,7 +371,7 @@ func (f *fakeHasher) LinearizableReadNotify(ctx context.Context) error {
return f.linearizableReadNotify
}

func (f *fakeHasher) TriggerCorruptAlarm(memberId uint64) {
func (f *fakeHasher) TriggerCorruptAlarm(memberId types.ID) {
f.actions = append(f.actions, fmt.Sprintf("TriggerCorruptAlarm(%d)", memberId))
f.alarmTriggered = true
}
12 changes: 10 additions & 2 deletions tests/e2e/corrupt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,15 @@ func TestPeriodicCheckDetectsCorruption(t *testing.T) {
assert.NoError(t, err, "error on put")
}

members, err := cc.MemberList()
assert.NoError(t, err, "error on member list")
var memberID uint64
for _, m := range members.Members {
if m.Name == epc.procs[0].Config().name {
memberID = m.ID
}
}
assert.NotZero(t, memberID, "member not found")
epc.procs[0].Stop()
err = testutil.CorruptBBolt(datadir.ToBackendFileName(epc.procs[0].Config().dataDirPath))
assert.NoError(t, err)
Expand All @@ -128,8 +137,7 @@ func TestPeriodicCheckDetectsCorruption(t *testing.T) {
time.Sleep(checkTime * 11 / 10)
alarmResponse, err := cc.AlarmList()
assert.NoError(t, err, "error on alarm list")
// TODO: Investigate why MemberID is 0?
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: 0}}, alarmResponse.Alarms)
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: memberID}}, alarmResponse.Alarms)
}

func TestCompactHashCheckDetectCorruption(t *testing.T) {
Expand Down
3 changes: 1 addition & 2 deletions tests/integration/corrupt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,7 @@ func TestPeriodicCheckDetectsCorruption(t *testing.T) {
time.Sleep(50 * time.Millisecond)
alarmResponse, err := cc.AlarmList(ctx)
assert.NoError(t, err, "error on alarm list")
// TODO: Investigate why MemberID is 0?
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: 0}}, alarmResponse.Alarms)
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: uint64(clus.Members[0].ID())}}, alarmResponse.Alarms)
}

func TestCompactHashCheck(t *testing.T) {
Expand Down

0 comments on commit 2ddb9e0

Please sign in to comment.