From 0cf65652b2537486e03faeff5b7c9ccc1520be18 Mon Sep 17 00:00:00 2001 From: WVerlaek Date: Wed, 5 Jan 2022 15:39:34 +0100 Subject: [PATCH] e2e test to reproduce issue#2397 --- test/e2e/fleet_test.go | 46 ++++++++++++++++++++++++++++----- test/e2e/framework/framework.go | 32 +++++++++++++++++++++++ 2 files changed, 71 insertions(+), 7 deletions(-) diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go index 9ce1f79521..c7ff98a4eb 100644 --- a/test/e2e/fleet_test.go +++ b/test/e2e/fleet_test.go @@ -223,14 +223,16 @@ func TestFleetRollingUpdate(t *testing.T) { t.Parallel() ctx := context.Background() // Use scaleFleetPatch (true) or scaleFleetSubresource (false) - fixtures := []bool{true, false} - maxSurge := []string{"25%", "10%"} + fixtures := []bool{true} //, false} // TODO Enable these again + maxSurge := []string{"25%"} //, "10%"} // TODO + doCycle := true // TODO: fixture? for _, usePatch := range fixtures { for _, maxSurgeParam := range maxSurge { usePatch := usePatch maxSurgeParam := maxSurgeParam - t.Run(fmt.Sprintf("Use fleet Patch %t %s", usePatch, maxSurgeParam), func(t *testing.T) { + doCycleParam := doCycle + t.Run(fmt.Sprintf("Use fleet Patch %t %s cycle %t", usePatch, maxSurgeParam, doCycleParam), func(t *testing.T) { t.Parallel() client := framework.AgonesClient.AgonesV1() @@ -267,10 +269,33 @@ func TestFleetRollingUpdate(t *testing.T) { flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.ObjectMeta.GetName(), metav1.GetOptions{}) assert.NoError(t, err) + done := make(chan bool, 1) + defer close(done) + if doCycleParam { + // Repeatedly cycle allocations to keep ~half of the GameServers Allocated, spread over both GSSets. + // Simulates a rolling update on a live Fleet that continuously receives new allocations, + // and reproduces an issue where this causes a rolling update to get stuck. + const halfScale = targetScale / 2 + go framework.CycleAllocations(t, flt, time.Second*3, time.Second*halfScale*3, done) + + // Wait for at least half of the fleet to have be cycled (either Allocated or shutting down) + // before updating the fleet. + err = framework.WaitForFleetCondition(t, flt, func(entry *logrus.Entry, fleet *agonesv1.Fleet) bool { + return fleet.Status.ReadyReplicas < halfScale + }) + } + // Change ContainerPort to trigger creating a new GSSet - fltCopy := flt.DeepCopy() - fltCopy.Spec.Template.Spec.Ports[0].ContainerPort++ - flt, err = client.Fleets(framework.Namespace).Update(ctx, fltCopy, metav1.UpdateOptions{}) + err = retry.RetryOnConflict(retry.DefaultBackoff, func() error { + flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.GetName(), metav1.GetOptions{}) + if err != nil { + return err + } + fltCopy := flt.DeepCopy() + fltCopy.Spec.Template.Spec.Ports[0].ContainerPort++ + flt, err = client.Fleets(framework.Namespace).Update(ctx, fltCopy, metav1.UpdateOptions{}) + return err + }) assert.NoError(t, err) selector := labels.SelectorFromSet(labels.Set{agonesv1.FleetNameLabel: flt.ObjectMeta.Name}) @@ -308,7 +333,9 @@ func TestFleetRollingUpdate(t *testing.T) { assert.Nil(t, err) expectedTotal := targetScale + maxSurge + maxUnavailable + shift - if len(list.Items) > expectedTotal { + if len(list.Items) > expectedTotal && !doCycleParam { + // This fails when Allocation cycling is enabled as there's a number of additional gameservers + // shutting down. err = fmt.Errorf("new replicas should be less than target + maxSurge + maxUnavailable + shift. Replicas: %d, Expected: %d", len(list.Items), expectedTotal) } if err != nil { @@ -324,6 +351,11 @@ func TestFleetRollingUpdate(t *testing.T) { assert.NoError(t, err) + // Stop cycling Allocations. + // The AssertFleetConditions below will wait until the Allocation cycling has + // fully stopped (when all Allocated GameServers are shut down). + done <- true + // scale down, with allocation const scaleDownTarget = 1 if usePatch { diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index bbfdd556c3..7bc09b6a16 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -278,6 +278,38 @@ func (f *Framework) WaitForGameServerState(t *testing.T, gs *agonesv1.GameServer state, gs.Namespace, gs.Name) } +func (f *Framework) GetGameServer(t *testing.T, namespace string, name string) *agonesv1.GameServer { + gs, err := f.AgonesClient.AgonesV1().GameServers(namespace).Get(context.Background(), name, metav1.GetOptions{}) + require.NoError(t, err, "failed to get gameserver: %s/%s", namespace, name) + return gs +} + +// CycleAllocations repeatedly Allocates a GameServer in the Fleet (if one is available), once every specified period. +// Each Allocated GameServer gets deleted allocDuration after it was Allocated. +// GameServers will continue to be Allocated until a message is passed to the done channel. +func (f *Framework) CycleAllocations(t *testing.T, flt *agonesv1.Fleet, period time.Duration, allocDuration time.Duration, done <-chan bool) { + ticker := time.NewTicker(period) + for { + select { + case <-done: + return + case <-ticker.C: + gsa := GetAllocation(flt) + gsa, err := f.AgonesClient.AllocationV1().GameServerAllocations(flt.Namespace).Create(context.Background(), gsa, metav1.CreateOptions{}) + if err != nil || gsa.Status.State != allocationv1.GameServerAllocationAllocated { + continue + } + + // Deallocate after allocDuration. + go func(gsa *allocationv1.GameServerAllocation) { + time.Sleep(allocDuration) + err := f.AgonesClient.AgonesV1().GameServers(gsa.Namespace).Delete(context.Background(), gsa.Status.GameServerName, metav1.DeleteOptions{}) + require.NoError(t, err) + }(gsa) + } + } +} + // AssertFleetCondition waits for the Fleet to be in a specific condition or fails the test if the condition can't be met in 5 minutes. func (f *Framework) AssertFleetCondition(t *testing.T, flt *agonesv1.Fleet, condition func(*logrus.Entry, *agonesv1.Fleet) bool) { err := f.WaitForFleetCondition(t, flt, condition)