From ee1c3ddd80cc7325178a26d491c348563cb141f6 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Wed, 2 Oct 2019 17:01:03 -0700 Subject: [PATCH] GameServer container restart before Ready, move to Unhealthy state After This brings our implementation inline with what our health checking documentation states that we do. This is done by implementing extra checks in the HealthController to determine if it's appropriate to move to Unhealthy rather than allow a restart to occur. Replaced PR #1069 Closes #956 --- pkg/apis/agones/v1/gameserver.go | 23 +++++ pkg/apis/agones/v1/gameserver_test.go | 27 ++++++ pkg/gameservers/controller.go | 26 +++-- pkg/gameservers/controller_test.go | 49 +++++++++- pkg/gameservers/health.go | 48 +++++++++ pkg/gameservers/health_test.go | 135 +++++++++++++++++++++++++- pkg/sdk/sdk.pb.go | 10 +- test/e2e/gameserver_test.go | 104 ++++++++++++++++++++ 8 files changed, 406 insertions(+), 16 deletions(-) diff --git a/pkg/apis/agones/v1/gameserver.go b/pkg/apis/agones/v1/gameserver.go index 173a236633..30dfce7565 100644 --- a/pkg/apis/agones/v1/gameserver.go +++ b/pkg/apis/agones/v1/gameserver.go @@ -91,6 +91,10 @@ const ( // DevAddressAnnotation is an annotation to indicate that a GameServer hosted outside of Agones. // A locally hosted GameServer is not managed by Agones it is just simply registered. DevAddressAnnotation = "agones.dev/dev-address" + // GameServerReadyContainerIDAnnotation is an annotation that is set on the GameServer + // becomes ready, so we can track when restarts should occur and when a GameServer + // should be moved to Unhealthy. + GameServerReadyContainerIDAnnotation = agones.GroupName + "/ready-container-id" ) var ( @@ -425,6 +429,25 @@ func (gs *GameServer) IsBeingDeleted() bool { return !gs.ObjectMeta.DeletionTimestamp.IsZero() || gs.Status.State == GameServerStateShutdown } +// IsBeforeReady returns true if the GameServer Status has yet to move to or past the Ready +// state in its lifecycle, such as Allocated or Reserved, or any of the Error/Unhealthy states +func (gs *GameServer) IsBeforeReady() bool { + switch gs.Status.State { + case GameServerStatePortAllocation: + return true + case GameServerStateCreating: + return true + case GameServerStateStarting: + return true + case GameServerStateScheduled: + return true + case GameServerStateRequestReady: + return true + } + + return false +} + // FindGameServerContainer returns the container that is specified in // gameServer.Spec.Container. Returns the index and the value. // Returns an error if not found diff --git a/pkg/apis/agones/v1/gameserver_test.go b/pkg/apis/agones/v1/gameserver_test.go index 5a6e140213..a4487e3a6f 100644 --- a/pkg/apis/agones/v1/gameserver_test.go +++ b/pkg/apis/agones/v1/gameserver_test.go @@ -629,6 +629,33 @@ func TestGameServerIsDeletable(t *testing.T) { assert.True(t, gs.IsDeletable()) } +func TestGameServerIsBeforeReady(t *testing.T) { + fixtures := []struct { + state GameServerState + expected bool + }{ + {GameServerStatePortAllocation, true}, + {GameServerStateCreating, true}, + {GameServerStateStarting, true}, + {GameServerStateScheduled, true}, + {GameServerStateRequestReady, true}, + {GameServerStateReady, false}, + {GameServerStateShutdown, false}, + {GameServerStateError, false}, + {GameServerStateUnhealthy, false}, + {GameServerStateReserved, false}, + {GameServerStateAllocated, false}, + } + + for _, test := range fixtures { + t.Run(string(test.state), func(t *testing.T) { + gs := &GameServer{Status: GameServerStatus{State: test.state}} + assert.Equal(t, test.expected, gs.IsBeforeReady()) + }) + } + +} + func TestGameServerApplyToPodGameServerContainer(t *testing.T) { t.Parallel() diff --git a/pkg/gameservers/controller.go b/pkg/gameservers/controller.go index e7ad9e8223..515762feaf 100644 --- a/pkg/gameservers/controller.go +++ b/pkg/gameservers/controller.go @@ -762,26 +762,38 @@ func (c *Controller) syncGameServerRequestReadyState(gs *agonesv1.GameServer) (* gsCopy := gs.DeepCopy() + pod, err := c.gameServerPod(gs) + // NotFound should never happen, and if it does -- something bad happened, + // so go into workerqueue backoff. + if err != nil { + return nil, err + } + // if the address hasn't been populated, and the Ready request comes // before the controller has had a chance to do it, then // do it here instead addressPopulated := false if gs.Status.NodeName == "" { addressPopulated = true - pod, err := c.gameServerPod(gs) - // NotFound should never happen, and if it does -- something bad happened, - // so go into workerqueue backoff. - if err != nil { - return nil, err - } gsCopy, err = c.applyGameServerAddressAndPort(gsCopy, pod) if err != nil { return gs, err } } + // track the ready gameserver container, so we can determine that after this point, we should move to Unhealthy + // if there is a container crash/restart after we move to Ready + for _, cs := range pod.Status.ContainerStatuses { + if cs.Name == gs.Spec.Container { + if _, ok := gs.ObjectMeta.Annotations[agonesv1.GameServerReadyContainerIDAnnotation]; !ok { + gsCopy.ObjectMeta.Annotations[agonesv1.GameServerReadyContainerIDAnnotation] = cs.ContainerID + } + break + } + } + gsCopy.Status.State = agonesv1.GameServerStateReady - gs, err := c.gameServerGetter.GameServers(gs.ObjectMeta.Namespace).Update(gsCopy) + gs, err = c.gameServerGetter.GameServers(gs.ObjectMeta.Namespace).Update(gsCopy) if err != nil { return gs, errors.Wrapf(err, "error setting Ready, Port and address on GameServer %s Status", gs.ObjectMeta.Name) } diff --git a/pkg/gameservers/controller_test.go b/pkg/gameservers/controller_test.go index 92222b0c55..674e00c92b 100644 --- a/pkg/gameservers/controller_test.go +++ b/pkg/gameservers/controller_test.go @@ -919,6 +919,7 @@ func TestControllerApplyGameServerAddressAndPort(t *testing.T) { func TestControllerSyncGameServerRequestReadyState(t *testing.T) { t.Parallel() + containerID := "1234" t.Run("GameServer with ReadyRequest State", func(t *testing.T) { c, m := newFakeController() @@ -928,6 +929,9 @@ func TestControllerSyncGameServerRequestReadyState(t *testing.T) { gsFixture.ApplyDefaults() gsFixture.Status.NodeName = "node" pod, err := gsFixture.Pod() + pod.Status.ContainerStatuses = []corev1.ContainerStatus{ + {Name: gsFixture.Spec.Container, ContainerID: containerID}, + } assert.Nil(t, err) gsUpdated := false @@ -939,6 +943,7 @@ func TestControllerSyncGameServerRequestReadyState(t *testing.T) { ua := action.(k8stesting.UpdateAction) gs := ua.GetObject().(*agonesv1.GameServer) assert.Equal(t, agonesv1.GameServerStateReady, gs.Status.State) + assert.Equal(t, containerID, gs.Annotations[agonesv1.GameServerReadyContainerIDAnnotation]) return true, gs, nil }) @@ -946,7 +951,7 @@ func TestControllerSyncGameServerRequestReadyState(t *testing.T) { defer cancel() gs, err := c.syncGameServerRequestReadyState(gsFixture) - assert.Nil(t, err, "should not error") + assert.NoError(t, err, "should not error") assert.True(t, gsUpdated, "GameServer wasn't updated") assert.Equal(t, agonesv1.GameServerStateReady, gs.Status.State) agtesting.AssertEventContains(t, m.FakeRecorder.Events, "SDK.Ready() complete") @@ -961,6 +966,9 @@ func TestControllerSyncGameServerRequestReadyState(t *testing.T) { pod, err := gsFixture.Pod() assert.Nil(t, err) pod.Spec.NodeName = nodeFixtureName + pod.Status.ContainerStatuses = []corev1.ContainerStatus{ + {Name: gsFixture.Spec.Container, ContainerID: containerID}, + } gsUpdated := false ipFixture := "12.12.12.12" @@ -978,6 +986,7 @@ func TestControllerSyncGameServerRequestReadyState(t *testing.T) { ua := action.(k8stesting.UpdateAction) gs := ua.GetObject().(*agonesv1.GameServer) assert.Equal(t, agonesv1.GameServerStateReady, gs.Status.State) + assert.Equal(t, containerID, gs.Annotations[agonesv1.GameServerReadyContainerIDAnnotation]) return true, gs, nil }) @@ -996,6 +1005,44 @@ func TestControllerSyncGameServerRequestReadyState(t *testing.T) { agtesting.AssertEventContains(t, m.FakeRecorder.Events, "SDK.Ready() complete") }) + t.Run("GameServer with a GameServerReadyContainerIDAnnotation already", func(t *testing.T) { + c, m := newFakeController() + + gsFixture := &agonesv1.GameServer{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"}, + Spec: newSingleContainerSpec(), Status: agonesv1.GameServerStatus{State: agonesv1.GameServerStateRequestReady}} + gsFixture.ApplyDefaults() + gsFixture.Status.NodeName = "node" + gsFixture.Annotations[agonesv1.GameServerReadyContainerIDAnnotation] = "4321" + pod, err := gsFixture.Pod() + pod.Status.ContainerStatuses = []corev1.ContainerStatus{ + {Name: gsFixture.Spec.Container, ContainerID: containerID}, + } + assert.Nil(t, err) + gsUpdated := false + + m.KubeClient.AddReactor("list", "pods", func(action k8stesting.Action) (bool, runtime.Object, error) { + return true, &corev1.PodList{Items: []corev1.Pod{*pod}}, nil + }) + m.AgonesClient.AddReactor("update", "gameservers", func(action k8stesting.Action) (bool, runtime.Object, error) { + gsUpdated = true + ua := action.(k8stesting.UpdateAction) + gs := ua.GetObject().(*agonesv1.GameServer) + assert.Equal(t, agonesv1.GameServerStateReady, gs.Status.State) + assert.NotEqual(t, containerID, gs.Annotations[agonesv1.GameServerReadyContainerIDAnnotation]) + + return true, gs, nil + }) + + _, cancel := agtesting.StartInformers(m, c.podSynced) + defer cancel() + + gs, err := c.syncGameServerRequestReadyState(gsFixture) + assert.NoError(t, err, "should not error") + assert.True(t, gsUpdated, "GameServer wasn't updated") + assert.Equal(t, agonesv1.GameServerStateReady, gs.Status.State) + agtesting.AssertEventContains(t, m.FakeRecorder.Events, "SDK.Ready() complete") + }) + for _, s := range []agonesv1.GameServerState{"Unknown", agonesv1.GameServerStateUnhealthy} { name := fmt.Sprintf("GameServer with %s state", s) t.Run(name, func(t *testing.T) { diff --git a/pkg/gameservers/health.go b/pkg/gameservers/health.go index 1e4a795ddd..1cc6a94554 100644 --- a/pkg/gameservers/health.go +++ b/pkg/gameservers/health.go @@ -190,6 +190,10 @@ func (hc *HealthController) syncGameServer(key string) error { return nil } + if skip, err := hc.skipUnhealthy(gs); err != nil || skip { + return err + } + hc.loggerForGameServer(gs).Info("Issue with GameServer pod, marking as GameServerStateUnhealthy") gsCopy := gs.DeepCopy() gsCopy.Status.State = agonesv1.GameServerStateUnhealthy @@ -202,3 +206,47 @@ func (hc *HealthController) syncGameServer(key string) error { return nil } + +// skipUnhealthy determines if it's appropriate to not move to Unhealthy when a Pod's +// gameserver container has crashed, or let it restart as per usual K8s operations. +// It does this by checking a combination of the current GameServer state and annotation data that stores +// which container instance was live if the GameServer has been marked as Ready. +// The logic is as follows: +// - If the GameServer is not yet Ready, allow to restart (return true) +// - If the GameServer is in a state past Ready, move to Unhealthy +func (hc *HealthController) skipUnhealthy(gs *agonesv1.GameServer) (bool, error) { + pod, err := hc.podLister.Pods(gs.ObjectMeta.Namespace).Get(gs.ObjectMeta.Name) + if err != nil { + // Pod doesn't exist, so the GameServer is definitely not healthy + if k8serrors.IsNotFound(err) { + return false, nil + } + // if it's something else, go back into the queue + return false, errors.Wrapf(err, "error retrieving Pod %s for GameServer to check status", gs.ObjectMeta.Name) + } + if !metav1.IsControlledBy(pod, gs) { + // This is not the Pod we are looking for 🤖 + return false, nil + } + if gs.IsBeforeReady() { + return hc.failedContainer(pod), nil + } + + // finally, we need to check if there a failed container happened after the gameserver was ready or before. + for _, cs := range pod.Status.ContainerStatuses { + if cs.Name == gs.Spec.Container { + if cs.State.Terminated != nil { + return false, nil + } + if cs.LastTerminationState.Terminated != nil { + // if the current container is running, and is the ready container, then we know this is some + // other pod update, and we previously had a restart before we got to being Ready, and therefore + // shouldn't move to Unhealthy. + return cs.ContainerID == gs.Annotations[agonesv1.GameServerReadyContainerIDAnnotation], nil + } + break + } + } + + return false, nil +} diff --git a/pkg/gameservers/health_test.go b/pkg/gameservers/health_test.go index 6d110d338a..702a72f3af 100644 --- a/pkg/gameservers/health_test.go +++ b/pkg/gameservers/health_test.go @@ -79,6 +79,112 @@ func TestHealthUnschedulableWithNoFreePorts(t *testing.T) { assert.False(t, hc.unschedulableWithNoFreePorts(pod)) } +func TestHealthControllerSkipUnhealthy(t *testing.T) { + t.Parallel() + + fixtures := map[string]struct { + setup func(*agonesv1.GameServer, *corev1.Pod) + expected bool + }{ + "scheduled and terminated container": { + setup: func(gs *agonesv1.GameServer, pod *corev1.Pod) { + gs.Status.State = agonesv1.GameServerStateScheduled + pod.Status.ContainerStatuses = []corev1.ContainerStatus{{ + Name: gs.Spec.Container, + State: corev1.ContainerState{Terminated: &corev1.ContainerStateTerminated{}}, + }} + }, + expected: true, + }, + "after ready and terminated container": { + setup: func(gs *agonesv1.GameServer, pod *corev1.Pod) { + gs.Status.State = agonesv1.GameServerStateReady + pod.Status.ContainerStatuses = []corev1.ContainerStatus{{ + Name: gs.Spec.Container, + State: corev1.ContainerState{Terminated: &corev1.ContainerStateTerminated{}}, + }} + }, + expected: false, + }, + "before ready, with no terminated container": { + setup: func(gs *agonesv1.GameServer, pod *corev1.Pod) { + gs.Status.State = agonesv1.GameServerStateScheduled + }, + expected: false, + }, + "after ready, with no terminated container": { + setup: func(gs *agonesv1.GameServer, pod *corev1.Pod) { + gs.Status.State = agonesv1.GameServerStateAllocated + }, + expected: false, + }, + "before ready, with a LastTerminated container": { + setup: func(gs *agonesv1.GameServer, pod *corev1.Pod) { + gs.Status.State = agonesv1.GameServerStateScheduled + pod.Status.ContainerStatuses = []corev1.ContainerStatus{{ + Name: gs.Spec.Container, + LastTerminationState: corev1.ContainerState{Terminated: &corev1.ContainerStateTerminated{}}, + }} + }, + expected: true, + }, + "after ready, with a LastTerminated container, not matching": { + setup: func(gs *agonesv1.GameServer, pod *corev1.Pod) { + gs.Status.State = agonesv1.GameServerStateReady + gs.Annotations[agonesv1.GameServerReadyContainerIDAnnotation] = "4321" + pod.Status.ContainerStatuses = []corev1.ContainerStatus{{ + ContainerID: "1234", + Name: gs.Spec.Container, + LastTerminationState: corev1.ContainerState{Terminated: &corev1.ContainerStateTerminated{}}, + }} + }, + expected: false, + }, + "after ready, with a LastTerminated container, matching": { + setup: func(gs *agonesv1.GameServer, pod *corev1.Pod) { + gs.Status.State = agonesv1.GameServerStateReserved + gs.Annotations[agonesv1.GameServerReadyContainerIDAnnotation] = "1234" + pod.Status.ContainerStatuses = []corev1.ContainerStatus{{ + ContainerID: "1234", + Name: gs.Spec.Container, + LastTerminationState: corev1.ContainerState{Terminated: &corev1.ContainerStateTerminated{}}, + }} + }, + expected: true, + }, + "pod is missing!": { + setup: func(server *agonesv1.GameServer, pod *corev1.Pod) { + pod.ObjectMeta.Name = "missing" + }, + expected: false, + }, + } + + for k, v := range fixtures { + t.Run(k, func(t *testing.T) { + m := agtesting.NewMocks() + hc := NewHealthController(healthcheck.NewHandler(), m.KubeClient, m.AgonesClient, m.KubeInformerFactory, m.AgonesInformerFactory) + gs := &agonesv1.GameServer{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: defaultNs}, Spec: newSingleContainerSpec()} + gs.ApplyDefaults() + pod, err := gs.Pod() + assert.NoError(t, err) + + v.setup(gs, pod) + + m.KubeClient.AddReactor("list", "pods", func(action k8stesting.Action) (bool, runtime.Object, error) { + return true, &corev1.PodList{Items: []corev1.Pod{*pod}}, nil + }) + + _, cancel := agtesting.StartInformers(m, hc.podSynced) + defer cancel() + + result, err := hc.skipUnhealthy(gs) + assert.NoError(t, err) + assert.Equal(t, v.expected, result) + }) + } +} + func TestHealthControllerSyncGameServer(t *testing.T) { t.Parallel() @@ -86,8 +192,9 @@ func TestHealthControllerSyncGameServer(t *testing.T) { updated bool } fixtures := map[string]struct { - state agonesv1.GameServerState - expected expected + state agonesv1.GameServerState + podStatus *corev1.PodStatus + expected expected }{ "started": { state: agonesv1.GameServerStateStarting, @@ -119,6 +226,18 @@ func TestHealthControllerSyncGameServer(t *testing.T) { updated: true, }, }, + "container failed before ready": { + state: agonesv1.GameServerStateStarting, + podStatus: &corev1.PodStatus{ContainerStatuses: []corev1.ContainerStatus{ + {Name: "container", State: corev1.ContainerState{Terminated: &corev1.ContainerStateTerminated{}}}}}, + expected: expected{updated: false}, + }, + "container failed after ready": { + state: agonesv1.GameServerStateAllocated, + podStatus: &corev1.PodStatus{ContainerStatuses: []corev1.ContainerStatus{ + {Name: "container", State: corev1.ContainerState{Terminated: &corev1.ContainerStateTerminated{}}}}}, + expected: expected{updated: true}, + }, } for name, test := range fixtures { @@ -133,6 +252,16 @@ func TestHealthControllerSyncGameServer(t *testing.T) { got := false updated := false + m.KubeClient.AddReactor("list", "pods", func(action k8stesting.Action) (bool, runtime.Object, error) { + list := &corev1.PodList{Items: []corev1.Pod{}} + if test.podStatus != nil { + pod, err := gs.Pod() + assert.NoError(t, err) + pod.Status = *test.podStatus + list.Items = append(list.Items, *pod) + } + return true, list, nil + }) m.AgonesClient.AddReactor("list", "gameservers", func(action k8stesting.Action) (bool, runtime.Object, error) { got = true return true, &agonesv1.GameServerList{Items: []agonesv1.GameServer{gs}}, nil @@ -145,7 +274,7 @@ func TestHealthControllerSyncGameServer(t *testing.T) { return true, gsObj, nil }) - _, cancel := agtesting.StartInformers(m) + _, cancel := agtesting.StartInformers(m, hc.gameServerSynced, hc.podSynced) defer cancel() err := hc.syncGameServer("default/test") diff --git a/pkg/sdk/sdk.pb.go b/pkg/sdk/sdk.pb.go index 015876a7d9..c05497dd43 100644 --- a/pkg/sdk/sdk.pb.go +++ b/pkg/sdk/sdk.pb.go @@ -18,13 +18,13 @@ package sdk -import proto "github.com/golang/protobuf/proto" -import fmt "fmt" -import math "math" -import _ "google.golang.org/genproto/googleapis/api/annotations" - import ( + fmt "fmt" + math "math" + + proto "github.com/golang/protobuf/proto" context "golang.org/x/net/context" + _ "google.golang.org/genproto/googleapis/api/annotations" grpc "google.golang.org/grpc" ) diff --git a/test/e2e/gameserver_test.go b/test/e2e/gameserver_test.go index 2d744d8aba..c2f2928398 100644 --- a/test/e2e/gameserver_test.go +++ b/test/e2e/gameserver_test.go @@ -213,6 +213,110 @@ func TestGameServerUnhealthyAfterDeletingPod(t *testing.T) { assert.NoError(t, err) } +func TestGameServerRestartBeforeReadyCrash(t *testing.T) { + t.Parallel() + logger := logrus.WithField("test", t.Name()) + + gs := defaultGameServer(defaultNs) + // give some buffer with gameservers crashing and coming back + gs.Spec.Health.PeriodSeconds = 60 * 60 + gs.Spec.Template.Spec.Containers[0].Env = append(gs.Spec.Template.Spec.Containers[0].Env, corev1.EnvVar{Name: "READY", Value: "FALSE"}) + gsClient := framework.AgonesClient.AgonesV1().GameServers(defaultNs) + newGs, err := gsClient.Create(gs) + if !assert.NoError(t, err) { + assert.Fail(t, "could not create the gameserver") + } + defer gsClient.Delete(newGs.ObjectMeta.Name, nil) // nolint: errcheck + + logger.Info("Waiting for us to have an address to send things to") + newGs, err = framework.WaitForGameServerState(newGs, agonesv1.GameServerStateScheduled, time.Minute) + assert.NoError(t, err) + + logger.WithField("gs", newGs.ObjectMeta.Name).Info("GameServer created") + + address := fmt.Sprintf("%s:%d", newGs.Status.Address, newGs.Status.Ports[0].Port) + logger.WithField("address", address).Info("Dialing UDP message to address") + + messageAndWait := func(gs *agonesv1.GameServer, msg string, check func(gs *agonesv1.GameServer, pod *corev1.Pod) bool) error { + return wait.PollImmediate(3*time.Second, 3*time.Minute, func() (bool, error) { + gs, err := gsClient.Get(gs.ObjectMeta.Name, metav1.GetOptions{}) + if err != nil { + logger.WithError(err).Warn("could not get gameserver") + return true, err + } + pod, err := framework.KubeClient.CoreV1().Pods(defaultNs).Get(newGs.ObjectMeta.Name, metav1.GetOptions{}) + if err != nil { + logger.WithError(err).Warn("could not get pod for gameserver") + return true, err + } + + if check(gs, pod) { + return true, nil + } + + // create a connection each time, as weird stuff happens if the receiver isn't up and running. + conn, err := net.Dial("udp", address) + assert.NoError(t, err) + defer conn.Close() // nolint: errcheck + // doing this last, so that there is a short delay between the msg being sent, and the check. + logger.WithField("gs", gs.ObjectMeta.Name).WithField("msg", msg).Info("sending message") + if _, err = conn.Write([]byte(msg)); err != nil { + logger.WithError(err).WithField("gs", gs.ObjectMeta.Name). + WithField("state", gs.Status.State).Info("error sending packet") + } + return false, nil + }) + } + + logger.Info("crashing, and waiting to see restart") + err = messageAndWait(newGs, "CRASH", func(gs *agonesv1.GameServer, pod *corev1.Pod) bool { + for _, c := range pod.Status.ContainerStatuses { + if c.Name == newGs.Spec.Container && c.RestartCount > 0 { + logger.Info("successfully crashed. Moving on!") + return true + } + } + return false + }) + assert.NoError(t, err) + + // check that the GameServer is not in an unhealthy state. If it does happen, it should happen pretty quick + newGs, err = framework.WaitForGameServerState(newGs, agonesv1.GameServerStateUnhealthy, 5*time.Second) + // should be an error, as the state should not occur + if !assert.Error(t, err) { + assert.FailNow(t, "GameServer should not be Unhealthy") + } + assert.Contains(t, err.Error(), "waiting for GameServer") + + // ping READY until it doesn't fail anymore - since it may take a while + // for this to come back up -- or we could get a delayed CRASH, so we have to + // wait for the process to restart again to fire the SDK.Ready() + logger.Info("marking GameServer as ready") + err = messageAndWait(newGs, "READY", func(gs *agonesv1.GameServer, pod *corev1.Pod) bool { + if gs.Status.State == agonesv1.GameServerStateReady { + logger.Info("ready! Moving On!") + return true + } + return false + }) + if err != nil { + assert.Failf(t, "Could not make GameServer Ready: %v", err.Error()) + } + // now crash, should be unhealthy, since it's after being Ready + logger.Info("crashing again, should be unhealthy") + // retry on crash, as with the restarts, sometimes Go takes a moment to send this through. + err = messageAndWait(newGs, "CRASH", func(gs *agonesv1.GameServer, pod *corev1.Pod) bool { + logger.WithField("gs", gs.ObjectMeta.Name).WithField("state", gs.Status.State). + Info("checking final crash state") + if gs.Status.State == agonesv1.GameServerStateUnhealthy { + logger.Info("Unhealthy! We are done!") + return true + } + return false + }) + assert.NoError(t, err) +} + func TestGameServerUnhealthyAfterReadyCrash(t *testing.T) { t.Parallel()