Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cleanup when connection state is RESELECT_REQUESTED (using begin server) #1471

Merged
merged 17 commits into from
Jul 3, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/networkservice/chains/nsmgr/heal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ func testNSMGRHealNSMgr(t *testing.T, nodeNum int, restored bool) {

if restored {
require.Equal(t, 3, counter.Requests())
require.Equal(t, 1, counter.Closes())
require.Equal(t, 2, counter.Closes())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we change this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously Close never reached NSE because nsmgr restarted, but now Close call is automatically re-issued when reselect request reaches forwarder.

} else {
require.Equal(t, 2, counter.UniqueRequests())
require.Equal(t, closes+1, counter.UniqueCloses())
Expand Down
327 changes: 327 additions & 0 deletions pkg/networkservice/chains/nsmgr/reselect_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,327 @@
// Copyright (c) 2023 Cisco and/or its affiliates.
//
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package nsmgr_test

import (
"context"
"testing"

"github.com/stretchr/testify/require"
"go.uber.org/goleak"

"github.com/networkservicemesh/api/pkg/api/registry"

"github.com/networkservicemesh/sdk/pkg/networkservice/chains/client"
"github.com/networkservicemesh/sdk/pkg/networkservice/chains/nsmgr"
"github.com/networkservicemesh/sdk/pkg/networkservice/utils/count"
"github.com/networkservicemesh/sdk/pkg/tools/sandbox"
)

// Even if NSMgr has restarted,
// we expect that all other apps should get a Close call
func TestReselect_NsmgrRestart(t *testing.T) {
var samples = []struct {
name string
nodeNum int
restartLocal bool
restartRemote bool
}{
{
name: "Local",
nodeNum: 1,
},
{
name: "Remote_RestartLocal",
nodeNum: 2,
restartLocal: true,
},
{
name: "Remote_RestartRemote",
nodeNum: 2,
restartRemote: true,
},
{
name: "Remote_RestartBoth",
nodeNum: 2,
restartLocal: true,
restartRemote: true,
},
}

for _, sample := range samples {
t.Run(sample.name, func(t *testing.T) {
// nolint:scopelint
testReselectWithNsmgrRestart(t, sample.nodeNum, sample.restartLocal, sample.restartRemote)
})
}
}

func testReselectWithNsmgrRestart(t *testing.T, nodeNum int, restartLocal, restartRemote bool) {
t.Cleanup(func() { goleak.VerifyNone(t) })
ctx, cancel := context.WithTimeout(context.Background(), timeout)

// in this test we add counters to apps in chain
// to make sure that in each app Close call goes through the whole chain,
// without stopping on an error mid-chain
var counterFwd []*count.Server
for i := 0; i < nodeNum; i++ {
counterFwd = append(counterFwd, new(count.Server))
}

defer cancel()
domain := sandbox.NewBuilder(ctx, t).
SetNodesCount(nodeNum).
SetNSMgrProxySupplier(nil).
SetRegistryProxySupplier(nil).
SetNodeSetup(func(ctx context.Context, node *sandbox.Node, i int) {
node.NewNSMgr(ctx, "nsmgr", nil, sandbox.GenerateTestToken, nsmgr.NewServer)
node.NewForwarder(ctx, &registry.NetworkServiceEndpoint{
Name: sandbox.UniqueName("forwarder"),
NetworkServiceNames: []string{"forwarder"},
}, sandbox.GenerateTestToken, counterFwd[i])
}).
Build()

nsRegistryClient := domain.NewNSRegistryClient(ctx, sandbox.GenerateTestToken)

nsReg, err := nsRegistryClient.Register(ctx, defaultRegistryService(t.Name()))
require.NoError(t, err)

nseReg := defaultRegistryEndpoint(nsReg.Name)

counterNse := new(count.Server)
nse := domain.Nodes[nodeNum-1].NewEndpoint(ctx, nseReg, sandbox.GenerateTestToken, counterNse)

request := defaultRequest(nsReg.Name)

counterClient := new(count.Client)
nsc := domain.Nodes[0].NewClient(ctx, sandbox.GenerateTestToken, client.WithAdditionalFunctionality(counterClient))

conn, err := nsc.Request(ctx, request.Clone())
require.NoError(t, err)

if restartLocal {
domain.Nodes[0].NSMgr.Restart()
}
if restartRemote {
domain.Nodes[1].NSMgr.Restart()
}

nse.Cancel()

nseReg2 := defaultRegistryEndpoint(nsReg.Name)
nseReg2.Name += "-2"
domain.Nodes[nodeNum-1].NewEndpoint(ctx, nseReg2, sandbox.GenerateTestToken, counterNse)

// Wait for heal to finish successfully
require.Eventually(t, checkSecondRequestsReceived(counterNse.UniqueRequests), timeout, tick)
// Client should try to close connection before reselect
require.Equal(t, 1, counterClient.UniqueCloses())
// Forwarder(s) should get a Close, even though NSMgr(s) restarted and didn't pass the Close
for i := 0; i < nodeNum; i++ {
require.Equal(t, 1, counterFwd[i].Closes())
}
// Old NSE died, new NSE should not get a Close call
require.Equal(t, 0, counterNse.Closes())

// Refresh shouldn't cause Close calls
request.Connection = conn
_, err = nsc.Request(ctx, request.Clone())
require.NoError(t, err)
require.Equal(t, 0, counterNse.Closes())
for i := 0; i < nodeNum; i++ {
require.Equal(t, 1, counterFwd[i].Closes())
}

clientCloses := counterClient.Closes()
// Close should still be able to pass though the whole connection path
_, err = nsc.Close(ctx, conn)
require.NoError(t, err)
require.Equal(t, clientCloses+1, counterClient.Closes())
require.Equal(t, 1, counterNse.Closes())
for i := 0; i < nodeNum; i++ {
require.Equal(t, 1, counterFwd[i].UniqueCloses(), i)
require.Equal(t, 2, counterFwd[i].Closes(), i)
}
}

// Even if Local forwarder has restarted,
// we expect that all other apps should get a Close call.
func TestReselect_LocalForwarderRestart(t *testing.T) {
var samples = []struct {
name string
nodeNum int
}{
{
name: "Local",
nodeNum: 1,
},
{
name: "Remote",
nodeNum: 2,
},
}

for _, sample := range samples {
t.Run(sample.name, func(t *testing.T) {
// nolint:scopelint
testReselectWithLocalForwarderRestart(t, sample.nodeNum)
})
}
}

func testReselectWithLocalForwarderRestart(t *testing.T, nodeNum int) {
t.Cleanup(func() { goleak.VerifyNone(t) })
ctx, cancel := context.WithTimeout(context.Background(), timeout)

// in this test we add counters to apps in chain
// to make sure that in each app Close call goes through the whole chain,
// without stopping on an error mid-chain
var counterFwd []*count.Server
for i := 0; i < nodeNum; i++ {
counterFwd = append(counterFwd, new(count.Server))
}

defer cancel()
domain := sandbox.NewBuilder(ctx, t).
SetNodesCount(nodeNum).
SetNSMgrProxySupplier(nil).
SetRegistryProxySupplier(nil).
SetNodeSetup(func(ctx context.Context, node *sandbox.Node, i int) {
node.NewNSMgr(ctx, "nsmgr", nil, sandbox.GenerateTestToken, nsmgr.NewServer)
node.NewForwarder(ctx, &registry.NetworkServiceEndpoint{
Name: sandbox.UniqueName("forwarder"),
NetworkServiceNames: []string{"forwarder"},
}, sandbox.GenerateTestToken, counterFwd[i])
}).
Build()

nsRegistryClient := domain.NewNSRegistryClient(ctx, sandbox.GenerateTestToken)

nsReg, err := nsRegistryClient.Register(ctx, defaultRegistryService(t.Name()))
require.NoError(t, err)

nseReg := defaultRegistryEndpoint(nsReg.Name)

counterNse := new(count.Server)
nse := domain.Nodes[nodeNum-1].NewEndpoint(ctx, nseReg, sandbox.GenerateTestToken, counterNse)

request := defaultRequest(nsReg.Name)

counterClient := new(count.Client)
nsc := domain.Nodes[0].NewClient(ctx, sandbox.GenerateTestToken, client.WithAdditionalFunctionality(counterClient))

conn, err := nsc.Request(ctx, request.Clone())
require.NoError(t, err)

for _, fwd := range domain.Nodes[0].Forwarders {
fwd.Restart()
}

nse.Cancel()

nseReg2 := defaultRegistryEndpoint(nsReg.Name)
nseReg2.Name += "-2"
domain.Nodes[nodeNum-1].NewEndpoint(ctx, nseReg2, sandbox.GenerateTestToken, counterNse)

// Wait for heal to finish successfully
require.Eventually(t, checkSecondRequestsReceived(counterNse.UniqueRequests), timeout, tick)
// Client should try to close connection before reselect
require.Equal(t, 1, counterClient.UniqueCloses())
// local Forwarder has restarted, new forwarder should not get a Close call
require.Equal(t, 0, counterFwd[0].Closes())
if nodeNum > 1 {
// remote forwarder should get Close
require.Equal(t, 1, counterFwd[1].Closes())
}
require.Equal(t, 0, counterNse.Closes())

// Refresh shouldn't cause any Close calls
request.Connection = conn
_, err = nsc.Request(ctx, request.Clone())
require.NoError(t, err)
require.Equal(t, 0, counterNse.Closes())
require.Equal(t, 0, counterFwd[0].Closes())
if nodeNum > 1 {
require.Equal(t, 1, counterFwd[1].Closes())
}

clientCloses := counterClient.Closes()
// Close should still be able to pass though the whole connection path
_, err = nsc.Close(ctx, conn)
require.NoError(t, err)
require.Equal(t, clientCloses+1, counterClient.Closes())
require.Equal(t, 1, counterNse.Closes())
require.Equal(t, 1, counterFwd[0].Closes())
if nodeNum > 1 {
require.Equal(t, 2, counterFwd[1].Closes())
}
}

// If registry died, NSMgr and Forwarder
// will not be able to query it to get URLs to next app
// but we still expect Close call to finish successfully
func TestReselect_Close_RegistryDied(t *testing.T) {
t.Cleanup(func() { goleak.VerifyNone(t) })
ctx, cancel := context.WithTimeout(context.Background(), timeout)

// in this test we add counters to apps in chain
// to make sure that in each app Close call goes through the whole chain,
// without stopping on an error mid-chain
counterFwd := new(count.Server)

defer cancel()
domain := sandbox.NewBuilder(ctx, t).
SetNSMgrProxySupplier(nil).
SetRegistryProxySupplier(nil).
SetNSMgrSupplier(nil).
SetNodeSetup(func(ctx context.Context, node *sandbox.Node, _ int) {
node.NewNSMgr(ctx, "nsmgr", nil, sandbox.GenerateTestToken, nsmgr.NewServer)
node.NewForwarder(ctx, &registry.NetworkServiceEndpoint{
Name: sandbox.UniqueName("forwarder"),
NetworkServiceNames: []string{"forwarder"},
}, sandbox.GenerateTestToken, counterFwd)
}).
Build()

nsRegistryClient := domain.NewNSRegistryClient(ctx, sandbox.GenerateTestToken)

nsReg, err := nsRegistryClient.Register(ctx, defaultRegistryService(t.Name()))
require.NoError(t, err)

nseReg := defaultRegistryEndpoint(nsReg.Name)

counterNse := new(count.Server)
domain.Nodes[0].NewEndpoint(ctx, nseReg, sandbox.GenerateTestToken, counterNse)

request := defaultRequest(nsReg.Name)

counterClient := new(count.Client)
nsc := domain.Nodes[0].NewClient(ctx, sandbox.GenerateTestToken, client.WithAdditionalFunctionality(counterClient))

conn, err := nsc.Request(ctx, request.Clone())
require.NoError(t, err)

domain.Registry.Cancel()

_, err = nsc.Close(ctx, conn)
require.NoError(t, err)

require.Equal(t, 1, counterClient.Closes())
require.Equal(t, 1, counterFwd.Closes())
require.Equal(t, 1, counterNse.Closes())
}
12 changes: 6 additions & 6 deletions pkg/networkservice/chains/nsmgr/select_forwarder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,15 +217,15 @@ func Test_DiscoverForwarder_ChangeForwarderOnDeath_LostHeal(t *testing.T) {
require.Eventually(t, checkSecondRequestsReceived(counter.Requests), timeout, tick)
require.Equal(t, 1, counter.UniqueRequests())
require.Equal(t, 2, counter.Requests())
require.Equal(t, 0, counter.Closes())
require.Equal(t, 1, counter.Closes())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we change existing tests?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is designed to test what happens if there were network issues during Close.
Network issues don't matter for Close anymore, as long as we go through the same apps.
Maybe we should just delete this test.


// check different forwarder selected
request.Connection = conn
conn, err = nsc.Request(ctx, request.Clone())
require.NoError(t, err)
require.Equal(t, 1, counter.UniqueRequests())
require.Equal(t, 3, counter.Requests())
require.Equal(t, 0, counter.Closes())
require.Equal(t, 1, counter.Closes())
require.NotEqual(t, selectedFwd, conn.GetPath().GetPathSegments()[2].Name)
}

Expand Down Expand Up @@ -283,21 +283,21 @@ func Test_DiscoverForwarder_ChangeRemoteForwarderOnDeath(t *testing.T) {

selectedFwd := conn.GetPath().GetPathSegments()[4].Name

domain.Nodes[1].Forwarders[selectedFwd].Cancel()

domain.Registry.Restart()

domain.Nodes[1].Forwarders[selectedFwd].Cancel()

require.Eventually(t, checkSecondRequestsReceived(counter.Requests), timeout, tick)
require.Equal(t, 1, counter.UniqueRequests())
require.Equal(t, 2, counter.Requests())
require.Equal(t, 0, counter.Closes())
require.Equal(t, 1, counter.Closes())

// check different forwarder selected
request.Connection = conn
conn, err = nsc.Request(ctx, request.Clone())
require.NoError(t, err)
require.Equal(t, 1, counter.UniqueRequests())
require.Equal(t, 3, counter.Requests())
require.Equal(t, 0, counter.Closes())
require.Equal(t, 1, counter.Closes())
require.NotEqual(t, selectedFwd, conn.GetPath().GetPathSegments()[4].Name)
}
Loading