-
Notifications
You must be signed in to change notification settings - Fork 36
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add cleanup when connection state is RESELECT_REQUESTED (using begin server) #1471
Changes from 16 commits
cd44a94
e1a9ddb
073cc78
35481c5
d80fb5a
b331833
4870ab1
5405eb5
c7d769e
a70747d
0f57f88
c328cc8
8a4b0a3
04b03b2
e3913c0
06183b2
8102231
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,327 @@ | ||
// Copyright (c) 2023 Cisco and/or its affiliates. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at: | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package nsmgr_test | ||
|
||
import ( | ||
"context" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
"go.uber.org/goleak" | ||
|
||
"github.com/networkservicemesh/api/pkg/api/registry" | ||
|
||
"github.com/networkservicemesh/sdk/pkg/networkservice/chains/client" | ||
"github.com/networkservicemesh/sdk/pkg/networkservice/chains/nsmgr" | ||
"github.com/networkservicemesh/sdk/pkg/networkservice/utils/count" | ||
"github.com/networkservicemesh/sdk/pkg/tools/sandbox" | ||
) | ||
|
||
// Even if NSMgr has restarted, | ||
// we expect that all other apps should get a Close call | ||
func TestReselect_NsmgrRestart(t *testing.T) { | ||
var samples = []struct { | ||
name string | ||
nodeNum int | ||
restartLocal bool | ||
restartRemote bool | ||
}{ | ||
{ | ||
name: "Local", | ||
nodeNum: 1, | ||
}, | ||
{ | ||
name: "Remote_RestartLocal", | ||
nodeNum: 2, | ||
restartLocal: true, | ||
}, | ||
{ | ||
name: "Remote_RestartRemote", | ||
nodeNum: 2, | ||
restartRemote: true, | ||
}, | ||
{ | ||
name: "Remote_RestartBoth", | ||
nodeNum: 2, | ||
restartLocal: true, | ||
restartRemote: true, | ||
}, | ||
} | ||
|
||
for _, sample := range samples { | ||
t.Run(sample.name, func(t *testing.T) { | ||
// nolint:scopelint | ||
testReselectWithNsmgrRestart(t, sample.nodeNum, sample.restartLocal, sample.restartRemote) | ||
}) | ||
} | ||
} | ||
|
||
func testReselectWithNsmgrRestart(t *testing.T, nodeNum int, restartLocal, restartRemote bool) { | ||
t.Cleanup(func() { goleak.VerifyNone(t) }) | ||
ctx, cancel := context.WithTimeout(context.Background(), timeout) | ||
|
||
// in this test we add counters to apps in chain | ||
// to make sure that in each app Close call goes through the whole chain, | ||
// without stopping on an error mid-chain | ||
var counterFwd []*count.Server | ||
for i := 0; i < nodeNum; i++ { | ||
counterFwd = append(counterFwd, new(count.Server)) | ||
} | ||
|
||
defer cancel() | ||
domain := sandbox.NewBuilder(ctx, t). | ||
SetNodesCount(nodeNum). | ||
SetNSMgrProxySupplier(nil). | ||
SetRegistryProxySupplier(nil). | ||
SetNodeSetup(func(ctx context.Context, node *sandbox.Node, i int) { | ||
node.NewNSMgr(ctx, "nsmgr", nil, sandbox.GenerateTestToken, nsmgr.NewServer) | ||
node.NewForwarder(ctx, ®istry.NetworkServiceEndpoint{ | ||
Name: sandbox.UniqueName("forwarder"), | ||
NetworkServiceNames: []string{"forwarder"}, | ||
}, sandbox.GenerateTestToken, counterFwd[i]) | ||
}). | ||
Build() | ||
|
||
nsRegistryClient := domain.NewNSRegistryClient(ctx, sandbox.GenerateTestToken) | ||
|
||
nsReg, err := nsRegistryClient.Register(ctx, defaultRegistryService(t.Name())) | ||
require.NoError(t, err) | ||
|
||
nseReg := defaultRegistryEndpoint(nsReg.Name) | ||
|
||
counterNse := new(count.Server) | ||
nse := domain.Nodes[nodeNum-1].NewEndpoint(ctx, nseReg, sandbox.GenerateTestToken, counterNse) | ||
|
||
request := defaultRequest(nsReg.Name) | ||
|
||
counterClient := new(count.Client) | ||
nsc := domain.Nodes[0].NewClient(ctx, sandbox.GenerateTestToken, client.WithAdditionalFunctionality(counterClient)) | ||
|
||
conn, err := nsc.Request(ctx, request.Clone()) | ||
require.NoError(t, err) | ||
|
||
if restartLocal { | ||
domain.Nodes[0].NSMgr.Restart() | ||
} | ||
if restartRemote { | ||
domain.Nodes[1].NSMgr.Restart() | ||
} | ||
|
||
nse.Cancel() | ||
|
||
nseReg2 := defaultRegistryEndpoint(nsReg.Name) | ||
nseReg2.Name += "-2" | ||
domain.Nodes[nodeNum-1].NewEndpoint(ctx, nseReg2, sandbox.GenerateTestToken, counterNse) | ||
|
||
// Wait for heal to finish successfully | ||
require.Eventually(t, checkSecondRequestsReceived(counterNse.UniqueRequests), timeout, tick) | ||
// Client should try to close connection before reselect | ||
require.Equal(t, 1, counterClient.UniqueCloses()) | ||
// Forwarder(s) should get a Close, even though NSMgr(s) restarted and didn't pass the Close | ||
for i := 0; i < nodeNum; i++ { | ||
require.Equal(t, 1, counterFwd[i].Closes()) | ||
} | ||
// Old NSE died, new NSE should not get a Close call | ||
require.Equal(t, 0, counterNse.Closes()) | ||
|
||
// Refresh shouldn't cause Close calls | ||
request.Connection = conn | ||
_, err = nsc.Request(ctx, request.Clone()) | ||
require.NoError(t, err) | ||
require.Equal(t, 0, counterNse.Closes()) | ||
for i := 0; i < nodeNum; i++ { | ||
require.Equal(t, 1, counterFwd[i].Closes()) | ||
} | ||
|
||
clientCloses := counterClient.Closes() | ||
// Close should still be able to pass though the whole connection path | ||
_, err = nsc.Close(ctx, conn) | ||
require.NoError(t, err) | ||
require.Equal(t, clientCloses+1, counterClient.Closes()) | ||
require.Equal(t, 1, counterNse.Closes()) | ||
for i := 0; i < nodeNum; i++ { | ||
require.Equal(t, 1, counterFwd[i].UniqueCloses(), i) | ||
require.Equal(t, 2, counterFwd[i].Closes(), i) | ||
} | ||
} | ||
|
||
// Even if Local forwarder has restarted, | ||
// we expect that all other apps should get a Close call. | ||
func TestReselect_LocalForwarderRestart(t *testing.T) { | ||
var samples = []struct { | ||
name string | ||
nodeNum int | ||
}{ | ||
{ | ||
name: "Local", | ||
nodeNum: 1, | ||
}, | ||
{ | ||
name: "Remote", | ||
nodeNum: 2, | ||
}, | ||
} | ||
|
||
for _, sample := range samples { | ||
t.Run(sample.name, func(t *testing.T) { | ||
// nolint:scopelint | ||
testReselectWithLocalForwarderRestart(t, sample.nodeNum) | ||
}) | ||
} | ||
} | ||
|
||
func testReselectWithLocalForwarderRestart(t *testing.T, nodeNum int) { | ||
t.Cleanup(func() { goleak.VerifyNone(t) }) | ||
ctx, cancel := context.WithTimeout(context.Background(), timeout) | ||
|
||
// in this test we add counters to apps in chain | ||
// to make sure that in each app Close call goes through the whole chain, | ||
// without stopping on an error mid-chain | ||
var counterFwd []*count.Server | ||
for i := 0; i < nodeNum; i++ { | ||
counterFwd = append(counterFwd, new(count.Server)) | ||
} | ||
|
||
defer cancel() | ||
domain := sandbox.NewBuilder(ctx, t). | ||
SetNodesCount(nodeNum). | ||
SetNSMgrProxySupplier(nil). | ||
SetRegistryProxySupplier(nil). | ||
SetNodeSetup(func(ctx context.Context, node *sandbox.Node, i int) { | ||
node.NewNSMgr(ctx, "nsmgr", nil, sandbox.GenerateTestToken, nsmgr.NewServer) | ||
node.NewForwarder(ctx, ®istry.NetworkServiceEndpoint{ | ||
Name: sandbox.UniqueName("forwarder"), | ||
NetworkServiceNames: []string{"forwarder"}, | ||
}, sandbox.GenerateTestToken, counterFwd[i]) | ||
}). | ||
Build() | ||
|
||
nsRegistryClient := domain.NewNSRegistryClient(ctx, sandbox.GenerateTestToken) | ||
|
||
nsReg, err := nsRegistryClient.Register(ctx, defaultRegistryService(t.Name())) | ||
require.NoError(t, err) | ||
|
||
nseReg := defaultRegistryEndpoint(nsReg.Name) | ||
|
||
counterNse := new(count.Server) | ||
nse := domain.Nodes[nodeNum-1].NewEndpoint(ctx, nseReg, sandbox.GenerateTestToken, counterNse) | ||
|
||
request := defaultRequest(nsReg.Name) | ||
|
||
counterClient := new(count.Client) | ||
nsc := domain.Nodes[0].NewClient(ctx, sandbox.GenerateTestToken, client.WithAdditionalFunctionality(counterClient)) | ||
|
||
conn, err := nsc.Request(ctx, request.Clone()) | ||
require.NoError(t, err) | ||
|
||
for _, fwd := range domain.Nodes[0].Forwarders { | ||
fwd.Restart() | ||
} | ||
|
||
nse.Cancel() | ||
|
||
nseReg2 := defaultRegistryEndpoint(nsReg.Name) | ||
nseReg2.Name += "-2" | ||
domain.Nodes[nodeNum-1].NewEndpoint(ctx, nseReg2, sandbox.GenerateTestToken, counterNse) | ||
|
||
// Wait for heal to finish successfully | ||
require.Eventually(t, checkSecondRequestsReceived(counterNse.UniqueRequests), timeout, tick) | ||
// Client should try to close connection before reselect | ||
require.Equal(t, 1, counterClient.UniqueCloses()) | ||
// local Forwarder has restarted, new forwarder should not get a Close call | ||
require.Equal(t, 0, counterFwd[0].Closes()) | ||
if nodeNum > 1 { | ||
// remote forwarder should get Close | ||
require.Equal(t, 1, counterFwd[1].Closes()) | ||
} | ||
require.Equal(t, 0, counterNse.Closes()) | ||
|
||
// Refresh shouldn't cause any Close calls | ||
request.Connection = conn | ||
_, err = nsc.Request(ctx, request.Clone()) | ||
require.NoError(t, err) | ||
require.Equal(t, 0, counterNse.Closes()) | ||
require.Equal(t, 0, counterFwd[0].Closes()) | ||
if nodeNum > 1 { | ||
require.Equal(t, 1, counterFwd[1].Closes()) | ||
} | ||
|
||
clientCloses := counterClient.Closes() | ||
// Close should still be able to pass though the whole connection path | ||
_, err = nsc.Close(ctx, conn) | ||
require.NoError(t, err) | ||
require.Equal(t, clientCloses+1, counterClient.Closes()) | ||
require.Equal(t, 1, counterNse.Closes()) | ||
require.Equal(t, 1, counterFwd[0].Closes()) | ||
if nodeNum > 1 { | ||
require.Equal(t, 2, counterFwd[1].Closes()) | ||
} | ||
} | ||
|
||
// If registry died, NSMgr and Forwarder | ||
// will not be able to query it to get URLs to next app | ||
// but we still expect Close call to finish successfully | ||
func TestReselect_Close_RegistryDied(t *testing.T) { | ||
t.Cleanup(func() { goleak.VerifyNone(t) }) | ||
ctx, cancel := context.WithTimeout(context.Background(), timeout) | ||
|
||
// in this test we add counters to apps in chain | ||
// to make sure that in each app Close call goes through the whole chain, | ||
// without stopping on an error mid-chain | ||
counterFwd := new(count.Server) | ||
|
||
defer cancel() | ||
domain := sandbox.NewBuilder(ctx, t). | ||
SetNSMgrProxySupplier(nil). | ||
SetRegistryProxySupplier(nil). | ||
SetNSMgrSupplier(nil). | ||
SetNodeSetup(func(ctx context.Context, node *sandbox.Node, _ int) { | ||
node.NewNSMgr(ctx, "nsmgr", nil, sandbox.GenerateTestToken, nsmgr.NewServer) | ||
node.NewForwarder(ctx, ®istry.NetworkServiceEndpoint{ | ||
Name: sandbox.UniqueName("forwarder"), | ||
NetworkServiceNames: []string{"forwarder"}, | ||
}, sandbox.GenerateTestToken, counterFwd) | ||
}). | ||
Build() | ||
|
||
nsRegistryClient := domain.NewNSRegistryClient(ctx, sandbox.GenerateTestToken) | ||
|
||
nsReg, err := nsRegistryClient.Register(ctx, defaultRegistryService(t.Name())) | ||
require.NoError(t, err) | ||
|
||
nseReg := defaultRegistryEndpoint(nsReg.Name) | ||
|
||
counterNse := new(count.Server) | ||
domain.Nodes[0].NewEndpoint(ctx, nseReg, sandbox.GenerateTestToken, counterNse) | ||
|
||
request := defaultRequest(nsReg.Name) | ||
|
||
counterClient := new(count.Client) | ||
nsc := domain.Nodes[0].NewClient(ctx, sandbox.GenerateTestToken, client.WithAdditionalFunctionality(counterClient)) | ||
|
||
conn, err := nsc.Request(ctx, request.Clone()) | ||
require.NoError(t, err) | ||
|
||
domain.Registry.Cancel() | ||
|
||
_, err = nsc.Close(ctx, conn) | ||
require.NoError(t, err) | ||
|
||
require.Equal(t, 1, counterClient.Closes()) | ||
require.Equal(t, 1, counterFwd.Closes()) | ||
require.Equal(t, 1, counterNse.Closes()) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -217,15 +217,15 @@ func Test_DiscoverForwarder_ChangeForwarderOnDeath_LostHeal(t *testing.T) { | |
require.Eventually(t, checkSecondRequestsReceived(counter.Requests), timeout, tick) | ||
require.Equal(t, 1, counter.UniqueRequests()) | ||
require.Equal(t, 2, counter.Requests()) | ||
require.Equal(t, 0, counter.Closes()) | ||
require.Equal(t, 1, counter.Closes()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we change existing tests? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test is designed to test what happens if there were network issues during Close. |
||
|
||
// check different forwarder selected | ||
request.Connection = conn | ||
conn, err = nsc.Request(ctx, request.Clone()) | ||
require.NoError(t, err) | ||
require.Equal(t, 1, counter.UniqueRequests()) | ||
require.Equal(t, 3, counter.Requests()) | ||
require.Equal(t, 0, counter.Closes()) | ||
require.Equal(t, 1, counter.Closes()) | ||
require.NotEqual(t, selectedFwd, conn.GetPath().GetPathSegments()[2].Name) | ||
} | ||
|
||
|
@@ -283,21 +283,21 @@ func Test_DiscoverForwarder_ChangeRemoteForwarderOnDeath(t *testing.T) { | |
|
||
selectedFwd := conn.GetPath().GetPathSegments()[4].Name | ||
|
||
domain.Nodes[1].Forwarders[selectedFwd].Cancel() | ||
|
||
domain.Registry.Restart() | ||
|
||
domain.Nodes[1].Forwarders[selectedFwd].Cancel() | ||
|
||
require.Eventually(t, checkSecondRequestsReceived(counter.Requests), timeout, tick) | ||
require.Equal(t, 1, counter.UniqueRequests()) | ||
require.Equal(t, 2, counter.Requests()) | ||
require.Equal(t, 0, counter.Closes()) | ||
require.Equal(t, 1, counter.Closes()) | ||
|
||
// check different forwarder selected | ||
request.Connection = conn | ||
conn, err = nsc.Request(ctx, request.Clone()) | ||
require.NoError(t, err) | ||
require.Equal(t, 1, counter.UniqueRequests()) | ||
require.Equal(t, 3, counter.Requests()) | ||
require.Equal(t, 0, counter.Closes()) | ||
require.Equal(t, 1, counter.Closes()) | ||
require.NotEqual(t, selectedFwd, conn.GetPath().GetPathSegments()[4].Name) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we change this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Previously Close never reached NSE because nsmgr restarted, but now Close call is automatically re-issued when reselect request reaches forwarder.