-
-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
NRG (2.11): Start catchup from n.commit
& fix AppendEntry is stored at seq=ae.pindex+1
#5987
Changes from 6 commits
05bff04
eb44111
b8dd252
2fc9d6f
0012d42
b62c058
18db9aa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4237,3 +4237,162 @@ func TestJetStreamClusterHardKillAfterStreamAdd(t *testing.T) { | |
_, err = js.StreamInfo("TEST") | ||
require_NoError(t, err) | ||
} | ||
|
||
func TestJetStreamClusterDesyncAfterPublishToLeaderWithoutQuorum(t *testing.T) { | ||
c := createJetStreamClusterExplicit(t, "R3S", 3) | ||
defer c.shutdown() | ||
|
||
nc, js := jsClientConnect(t, c.randomServer()) | ||
defer nc.Close() | ||
|
||
si, err := js.AddStream(&nats.StreamConfig{ | ||
Name: "TEST", | ||
Subjects: []string{"foo"}, | ||
Replicas: 3, | ||
}) | ||
require_NoError(t, err) | ||
|
||
streamLeader := si.Cluster.Leader | ||
streamLeaderServer := c.serverByName(streamLeader) | ||
nc.Close() | ||
nc, js = jsClientConnect(t, streamLeaderServer) | ||
defer nc.Close() | ||
|
||
servers := slices.DeleteFunc([]string{"S-1", "S-2", "S-3"}, func(s string) bool { | ||
return s == streamLeader | ||
}) | ||
|
||
// Stop followers so further publishes will not have quorum. | ||
followerName1 := servers[0] | ||
followerName2 := servers[1] | ||
followerServer1 := c.serverByName(followerName1) | ||
followerServer2 := c.serverByName(followerName2) | ||
followerServer1.Shutdown() | ||
followerServer2.Shutdown() | ||
followerServer1.WaitForShutdown() | ||
followerServer2.WaitForShutdown() | ||
|
||
// Although this request will time out, it will be added to the stream leader's WAL. | ||
_, err = js.Publish("foo", []byte("first")) | ||
require_NotNil(t, err) | ||
require_Equal(t, err, nats.ErrTimeout) | ||
|
||
// Now shut down the leader as well. | ||
nc.Close() | ||
streamLeaderServer.Shutdown() | ||
streamLeaderServer.WaitForShutdown() | ||
|
||
// Only restart the (previous) followers. | ||
followerServer1 = c.restartServer(followerServer1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is one server variable reassigned and not the other? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's used below to have a connection to that server: nc, js = jsClientConnect(t, followerServer1) The connection could be to either server, as long as it's not the (previous) leader. So only this one variable is used to setup the connection. |
||
c.restartServer(followerServer2) | ||
c.waitOnStreamLeader(globalAccountName, "TEST") | ||
|
||
nc, js = jsClientConnect(t, followerServer1) | ||
defer nc.Close() | ||
|
||
// Publishing a message will now have quorum. | ||
pubAck, err := js.Publish("foo", []byte("first, this is a retry")) | ||
require_NoError(t, err) | ||
require_Equal(t, pubAck.Sequence, 1) | ||
|
||
// Bring up the previous stream leader. | ||
c.restartServer(streamLeaderServer) | ||
c.waitOnAllCurrent() | ||
c.waitOnStreamLeader(globalAccountName, "TEST") | ||
|
||
// Check all servers ended up with the last published message, which had quorum. | ||
for _, s := range c.servers { | ||
c.waitOnStreamCurrent(s, globalAccountName, "TEST") | ||
|
||
acc, err := s.lookupAccount(globalAccountName) | ||
require_NoError(t, err) | ||
mset, err := acc.lookupStream("TEST") | ||
require_NoError(t, err) | ||
state := mset.state() | ||
require_Equal(t, state.Msgs, 1) | ||
require_Equal(t, state.Bytes, 55) | ||
} | ||
} | ||
|
||
func TestJetStreamClusterPreserveWALDuringCatchupWithMatchingTerm(t *testing.T) { | ||
c := createJetStreamClusterExplicit(t, "R3S", 3) | ||
defer c.shutdown() | ||
|
||
nc, js := jsClientConnect(t, c.randomServer()) | ||
defer nc.Close() | ||
|
||
_, err := js.AddStream(&nats.StreamConfig{ | ||
Name: "TEST", | ||
Subjects: []string{"foo.>"}, | ||
Replicas: 3, | ||
}) | ||
nc.Close() | ||
require_NoError(t, err) | ||
|
||
// Pick one server that will only store a part of the messages in its WAL. | ||
rs := c.randomNonStreamLeader(globalAccountName, "TEST") | ||
ts := time.Now().UnixNano() | ||
|
||
var scratch [1024]byte | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This bit may use an explanation... maybe
I actually am not sure. Your inner loop goes to 3, but then you have a break at 1. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That description is correct. Two servers will have 3 uncommitted entries, and one server will have 2 uncommitted entries so it needs to catchup for that third one. Have moved that condition for that one server up, so it's a bit clearer it gets 2 iterations of that loop. |
||
for _, s := range c.servers { | ||
for _, n := range s.raftNodes { | ||
rn := n.(*raft) | ||
if rn.accName == globalAccountName { | ||
for i := uint64(0); i < 3; i++ { | ||
esm := encodeStreamMsgAllowCompress("foo", "_INBOX.foo", nil, nil, i, ts, true) | ||
entries := []*Entry{newEntry(EntryNormal, esm)} | ||
rn.Lock() | ||
ae := rn.buildAppendEntry(entries) | ||
ae.buf, err = ae.encode(scratch[:]) | ||
require_NoError(t, err) | ||
err = rn.storeToWAL(ae) | ||
rn.Unlock() | ||
require_NoError(t, err) | ||
|
||
// One server will be behind and need to catchup. | ||
if s.Name() == rs.Name() && i >= 1 { | ||
break | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
// Restart all. | ||
c.stopAll() | ||
c.restartAll() | ||
c.waitOnAllCurrent() | ||
c.waitOnStreamLeader(globalAccountName, "TEST") | ||
|
||
// Check all servers ended up with all published messages, which had quorum. | ||
for _, s := range c.servers { | ||
c.waitOnStreamCurrent(s, globalAccountName, "TEST") | ||
|
||
acc, err := s.lookupAccount(globalAccountName) | ||
require_NoError(t, err) | ||
mset, err := acc.lookupStream("TEST") | ||
require_NoError(t, err) | ||
state := mset.state() | ||
require_Equal(t, state.Msgs, 3) | ||
require_Equal(t, state.Bytes, 99) | ||
} | ||
|
||
// Check that the first two published messages came from our WAL, and | ||
// the last came from a catchup by another leader. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems to me you are doing the same check for all 3 entries you look at, this comment is maybe outdated? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are 3 checks, 2x |
||
for _, n := range rs.raftNodes { | ||
rn := n.(*raft) | ||
if rn.accName == globalAccountName { | ||
ae, err := rn.loadEntry(2) | ||
require_NoError(t, err) | ||
require_Equal(t, ae.leader, rn.ID()) | ||
|
||
ae, err = rn.loadEntry(3) | ||
require_NoError(t, err) | ||
require_Equal(t, ae.leader, rn.ID()) | ||
|
||
ae, err = rn.loadEntry(4) | ||
require_NoError(t, err) | ||
require_NotEqual(t, ae.leader, rn.ID()) | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could set a shorter timeout to make the test faster? (not sure what the default is)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, default seemed to be 5 seconds, lowered to 1s.