Skip to content

Commit

Permalink
Revert UnHealthy standalone GameServers to not be deleted (#763)
Browse files Browse the repository at this point in the history
This PR does several things:

- Revert back to standalone GameServers that are unhealthy to not be
  deleted. This should be a better debugging experience when trying to
  determine why a live GameServer is moving to Unhealthy - as it stays
  persistent outside a Fleet lifecycle
- Attempt to fix the bug wherein Unhealthy GameServers where leaking
  when shutting down GameServers that where in fleets - especially
  at scale
- Add more testing around shutdown scenarios, such as Shutdown,
  Unhealthy, pod deletion and more.
  • Loading branch information
markmandel authored May 16, 2019
1 parent a08d6c2 commit fd1bf69
Show file tree
Hide file tree
Showing 25 changed files with 234 additions and 112 deletions.
2 changes: 1 addition & 1 deletion build/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ KIND_PROFILE ?= agones
KIND_CONTAINER_NAME=kind-$(KIND_PROFILE)-control-plane

# Game Server image to use while doing end-to-end tests
GS_TEST_IMAGE ?= gcr.io/agones-images/udp-server:0.8
GS_TEST_IMAGE ?= gcr.io/agones-images/udp-server:0.9

# Directory that this Makefile is in.
mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST)))
Expand Down
2 changes: 1 addition & 1 deletion examples/fleet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,4 @@ spec:
spec:
containers:
- name: simple-udp
image: gcr.io/agones-images/udp-server:0.8
image: gcr.io/agones-images/udp-server:0.9
2 changes: 1 addition & 1 deletion examples/simple-udp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ REPOSITORY = gcr.io/agones-images

mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST)))
project_path := $(dir $(mkfile_path))
server_tag = $(REPOSITORY)/udp-server:0.8
server_tag = $(REPOSITORY)/udp-server:0.9
root_path = $(realpath $(project_path)/../..)

# _____ _
Expand Down
2 changes: 1 addition & 1 deletion examples/simple-udp/fleet-distributed.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ spec:
spec:
containers:
- name: simple-udp
image: gcr.io/agones-images/udp-server:0.8
image: gcr.io/agones-images/udp-server:0.9
resources:
requests:
memory: "32Mi"
Expand Down
2 changes: 1 addition & 1 deletion examples/simple-udp/fleet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ spec:
spec:
containers:
- name: simple-udp
image: gcr.io/agones-images/udp-server:0.8
image: gcr.io/agones-images/udp-server:0.9
resources:
requests:
memory: "64Mi"
Expand Down
2 changes: 1 addition & 1 deletion examples/simple-udp/gameserver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ spec:
spec:
containers:
- name: simple-udp
image: gcr.io/agones-images/udp-server:0.8
image: gcr.io/agones-images/udp-server:0.9
resources:
requests:
memory: "32Mi"
Expand Down
2 changes: 1 addition & 1 deletion examples/simple-udp/gameserverset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ spec:
spec:
containers:
- name: simple-udp
image: gcr.io/agones-images/udp-server:0.8
image: gcr.io/agones-images/udp-server:0.9
2 changes: 2 additions & 0 deletions examples/simple-udp/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ func readWriteLoop(conn net.PacketConn, stop chan struct{}, s *sdk.SDK) {
switch parts[0] {
// shuts down the gameserver
case "EXIT":
// respond here, as we os.Exit() before we get to below
respond(conn, sender, "ACK: "+txt+"\n")
exit(s)

// turns off the health pings
Expand Down
2 changes: 1 addition & 1 deletion install/helm/agones/templates/NOTES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ spec:
spec:
containers:
- name: simple-udp
image: gcr.io/agones-images/udp-server:0.8
image: gcr.io/agones-images/udp-server:0.9

Finally don't forget to explore our documentation and usage guides on how to develop and host dedicated game servers on top of Agones. :

Expand Down
3 changes: 1 addition & 2 deletions pkg/gameservers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -729,8 +729,7 @@ func (c *Controller) syncGameServerRequestReadyState(gs *v1alpha1.GameServer) (*

// syncGameServerShutdownState deletes the GameServer (and therefore the backing Pod) if it is in shutdown state
func (c *Controller) syncGameServerShutdownState(gs *v1alpha1.GameServer) error {
if !gs.ObjectMeta.DeletionTimestamp.IsZero() ||
(gs.Status.State != v1alpha1.GameServerStateShutdown && gs.Status.State != v1alpha1.GameServerStateUnhealthy) {
if !(gs.Status.State == v1alpha1.GameServerStateShutdown && gs.ObjectMeta.DeletionTimestamp.IsZero()) {
return nil
}

Expand Down
3 changes: 1 addition & 2 deletions pkg/gameservers/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,7 @@ func (hc *HealthController) syncGameServer(key string) error {
}

// at this point we don't care, we're already Unhealthy / deleting
if !gs.ObjectMeta.DeletionTimestamp.IsZero() || gs.Status.State == v1alpha1.GameServerStateShutdown ||
gs.Status.State == v1alpha1.GameServerStateUnhealthy {
if gs.IsBeingDeleted() || gs.Status.State == v1alpha1.GameServerStateUnhealthy {
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/gameserversets/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ func computeReconciliationAction(strategy apis.SchedulingStrategy, list []*v1alp
toDelete = append(toDelete, potentialDeletions[0:deleteCount]...)
}

if deleteCount > maxDeletions {
if len(toDelete) > maxDeletions {
toDelete = toDelete[0:maxDeletions]
partialReconciliation = true
}
Expand Down
38 changes: 38 additions & 0 deletions pkg/gameserversets/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,44 @@ func TestComputeReconciliationAction(t *testing.T) {
wantNumServersToAdd: 0,
wantIsPartial: true,
},
{
desc: "DeletingUnhealthyGameServers",
list: []*v1alpha1.GameServer{
gsWithState(v1alpha1.GameServerStateReady),
gsWithState(v1alpha1.GameServerStateUnhealthy),
gsWithState(v1alpha1.GameServerStateUnhealthy),
},
targetReplicaCount: 3,
wantNumServersToAdd: 2,
wantNumServersToDelete: 2,
},
{
desc: "DeletingErrorGameServers",
list: []*v1alpha1.GameServer{
gsWithState(v1alpha1.GameServerStateReady),
gsWithState(v1alpha1.GameServerStateError),
gsWithState(v1alpha1.GameServerStateError),
},
targetReplicaCount: 3,
wantNumServersToAdd: 2,
wantNumServersToDelete: 2,
},
{
desc: "DeletingPartialGameServers",
list: []*v1alpha1.GameServer{
gsWithState(v1alpha1.GameServerStateReady),
gsWithState(v1alpha1.GameServerStateUnhealthy),
gsWithState(v1alpha1.GameServerStateError),
gsWithState(v1alpha1.GameServerStateUnhealthy),
gsWithState(v1alpha1.GameServerStateError),
gsWithState(v1alpha1.GameServerStateUnhealthy),
gsWithState(v1alpha1.GameServerStateError),
},
targetReplicaCount: 3,
wantNumServersToAdd: 2,
wantNumServersToDelete: 3,
wantIsPartial: true,
},
}

for _, tc := range cases {
Expand Down
8 changes: 7 additions & 1 deletion pkg/sdkserver/sdkserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,12 @@ func (s *SDKServer) updateState() error {
return err
}

// if we are currently in shutdown/being deleted, there is no escaping
if gs.IsBeingDeleted() {
s.logger.Info("GameServerState being shutdown. Skipping update.")
return nil
}

// if the state is currently unhealthy, you can't go back to Ready
if gs.Status.State == stablev1alpha1.GameServerStateUnhealthy {
s.logger.Info("GameServerState already unhealthy. Skipping update.")
Expand Down Expand Up @@ -484,7 +490,7 @@ func (s *SDKServer) sendGameServerUpdate(gs *stablev1alpha1.GameServer) {
func (s *SDKServer) runHealth() {
s.checkHealth()
if !s.healthy() {
s.logger.WithField("gameServerName", s.gameServerName).Info("being marked as not healthy")
s.logger.WithField("gameServerName", s.gameServerName).Info("has failed health check")
s.enqueueState(stablev1alpha1.GameServerStateUnhealthy)
}
}
Expand Down
79 changes: 52 additions & 27 deletions pkg/sdkserver/sdkserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,37 +295,62 @@ func TestSDKServerSyncGameServer(t *testing.T) {
func TestSidecarUpdateState(t *testing.T) {
t.Parallel()

t.Run("ignore state change when unhealthy", func(t *testing.T) {
m := agtesting.NewMocks()
sc, err := defaultSidecar(m)
assert.Nil(t, err)
sc.gsState = v1alpha1.GameServerStateReady
fixtures := map[string]struct {
f func(gs *v1alpha1.GameServer)
}{
"unhealthy": {
f: func(gs *v1alpha1.GameServer) {
gs.Status.State = v1alpha1.GameServerStateUnhealthy
},
},
"shutdown": {
f: func(gs *v1alpha1.GameServer) {
gs.Status.State = v1alpha1.GameServerStateShutdown
},
},
"deleted": {
f: func(gs *v1alpha1.GameServer) {
now := metav1.Now()
gs.ObjectMeta.DeletionTimestamp = &now
},
},
}

updated := false
for k, v := range fixtures {
t.Run(k, func(t *testing.T) {
m := agtesting.NewMocks()
sc, err := defaultSidecar(m)
assert.Nil(t, err)
sc.gsState = v1alpha1.GameServerStateReady

m.AgonesClient.AddReactor("list", "gameservers", func(action k8stesting.Action) (bool, runtime.Object, error) {
gs := v1alpha1.GameServer{
ObjectMeta: metav1.ObjectMeta{Name: sc.gameServerName, Namespace: sc.namespace},
Status: v1alpha1.GameServerStatus{
State: v1alpha1.GameServerStateUnhealthy,
},
}
return true, &v1alpha1.GameServerList{Items: []v1alpha1.GameServer{gs}}, nil
})
m.AgonesClient.AddReactor("update", "gameservers", func(action k8stesting.Action) (bool, runtime.Object, error) {
updated = true
return true, nil, nil
})
updated := false

stop := make(chan struct{})
defer close(stop)
sc.informerFactory.Start(stop)
assert.True(t, cache.WaitForCacheSync(stop, sc.gameServerSynced))
m.AgonesClient.AddReactor("list", "gameservers", func(action k8stesting.Action) (bool, runtime.Object, error) {
gs := v1alpha1.GameServer{
ObjectMeta: metav1.ObjectMeta{Name: sc.gameServerName, Namespace: sc.namespace},
Status: v1alpha1.GameServerStatus{},
}

err = sc.updateState()
assert.Nil(t, err)
assert.False(t, updated)
})
// apply mutation
v.f(&gs)

return true, &v1alpha1.GameServerList{Items: []v1alpha1.GameServer{gs}}, nil
})
m.AgonesClient.AddReactor("update", "gameservers", func(action k8stesting.Action) (bool, runtime.Object, error) {
updated = true
return true, nil, nil
})

stop := make(chan struct{})
defer close(stop)
sc.informerFactory.Start(stop)
assert.True(t, cache.WaitForCacheSync(stop, sc.gameServerSynced))

err = sc.updateState()
assert.Nil(t, err)
assert.False(t, updated)
})
}
}

func TestSidecarHealthLastUpdated(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion site/content/en/docs/Advanced/limiting-resources.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ spec:
spec:
containers:
- name: simple-udp
image: gcr.io/agones-images/udp-server:0.8
image: gcr.io/agones-images/udp-server:0.9
resources:
limit:
cpu: "250m" #this is our limit here
Expand Down
4 changes: 2 additions & 2 deletions site/content/en/docs/Advanced/scheduling-and-autoscaling.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ spec:
spec:
containers:
- name: simple-udp
image: gcr.io/agones-images/udp-server:0.8
image: gcr.io/agones-images/udp-server:0.9
```
This is the *default* Fleet scheduling strategy. It is designed for dynamic Kubernetes environments, wherein you wish
Expand Down Expand Up @@ -135,7 +135,7 @@ spec:
spec:
containers:
- name: simple-udp
image: gcr.io/agones-images/udp-server:0.8
image: gcr.io/agones-images/udp-server:0.9
```

This Fleet scheduling strategy is designed for static Kubernetes environments, such as when you are running Kubernetes
Expand Down
2 changes: 1 addition & 1 deletion site/content/en/docs/Advanced/service-accounts.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ spec:
serviceAccountName: my-special-service-account # a custom service account
containers:
- name: simple-udp
image: gcr.io/agones-images/udp-server:0.8
image: gcr.io/agones-images/udp-server:0.9
```
If a service account is configured, the mounted key is not overwritten, as it assumed that you want to have full control
Expand Down
4 changes: 2 additions & 2 deletions site/content/en/docs/Getting Started/create-fleet.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ Spec:
Creation Timestamp: <nil>
Spec:
Containers:
Image: gcr.io/agones-images/udp-server:0.8
Image: gcr.io/agones-images/udp-server:0.9
Name: simple-udp
Resources:
Status:
Expand Down Expand Up @@ -308,7 +308,7 @@ status:
creationTimestamp: null
spec:
containers:
- image: gcr.io/agones-images/udp-server:0.8
- image: gcr.io/agones-images/udp-server:0.9
name: simple-udp
resources: {}
status:
Expand Down
2 changes: 1 addition & 1 deletion site/content/en/docs/Getting Started/create-gameserver.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ Spec:
Creation Timestamp: <nil>
Spec:
Containers:
Image: gcr.io/agones-images/udp-server:0.8
Image: gcr.io/agones-images/udp-server:0.9
Name: simple-udp
Resources:
Limits:
Expand Down
8 changes: 4 additions & 4 deletions site/content/en/docs/Guides/access-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ func main() {
Spec: v1alpha1.GameServerSpec{
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{{Name: "udp-server", Image: "gcr.io/agones-images/udp-server:0.8"}},
Containers: []corev1.Container{{Name: "udp-server", Image: "gcr.io/agones-images/udp-server:0.9"}},
},
},
},
Expand Down Expand Up @@ -178,7 +178,7 @@ $ curl http://localhost:8001/apis/stable.agones.dev/v1alpha1/namespaces/default/
"kind": "GameServer",
"metadata": {
"annotations": {
"kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"stable.agones.dev/v1alpha1\",\"kind\":\"GameServer\",\"metadata\":{\"annotations\":{},\"name\":\"simple-udp\",\"namespace\":\"default\"},\"spec\":{\"containerPort\":7654,\"hostPort\":7777,\"portPolicy\":\"static\",\"template\":{\"spec\":{\"containers\":[{\"image\":\"gcr.io/agones-images/udp-server:0.8\",\"name\":\"simple-udp\"}]}}}}\n"
"kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"stable.agones.dev/v1alpha1\",\"kind\":\"GameServer\",\"metadata\":{\"annotations\":{},\"name\":\"simple-udp\",\"namespace\":\"default\"},\"spec\":{\"containerPort\":7654,\"hostPort\":7777,\"portPolicy\":\"static\",\"template\":{\"spec\":{\"containers\":[{\"image\":\"gcr.io/agones-images/udp-server:0.9\",\"name\":\"simple-udp\"}]}}}}\n"
},
"clusterName": "",
"creationTimestamp": "2018-03-02T21:41:05Z",
Expand Down Expand Up @@ -210,7 +210,7 @@ $ curl http://localhost:8001/apis/stable.agones.dev/v1alpha1/namespaces/default/
"spec": {
"containers": [
{
"image": "gcr.io/agones-images/udp-server:0.8",
"image": "gcr.io/agones-images/udp-server:0.9",
"name": "simple-udp",
"resources": {}
}
Expand Down Expand Up @@ -317,7 +317,7 @@ $ curl -d '{"apiVersion":"stable.agones.dev/v1alpha1","kind":"FleetAllocation","
"spec": {
"containers": [
{
"image": "gcr.io/agones-images/udp-server:0.8",
"image": "gcr.io/agones-images/udp-server:0.9",
"name": "simple-udp",
"resources": {}
}
Expand Down
Loading

0 comments on commit fd1bf69

Please sign in to comment.