From 3c38876382d505244af68489211d8916dce07596 Mon Sep 17 00:00:00 2001 From: Zach Loafman Date: Wed, 2 Nov 2022 17:58:55 -0700 Subject: [PATCH] Sync Pod host ports back to GameServer in GCP (#2782) This is the start of the implementation for #2777: * Most of this is mechanical and implements a thin cloud product abstraction layer in pkg/cloud, instantiated with New(product). The product abstraction provides a single function so far: SyncPodPortsToGameServer. * SyncPodPortsToGameServer is inserted as a hook while syncing IP/ports, to let different cloud providers handle port allocation slightly differently (in this case, GKE Autopilot) * In GKE Autopilot, we look for a JSON string like `{"min":7000,"max":8000,"portsAssigned":{"7001":7737,"7002":7738}}` as an indication that the host ports were reassigned (per policy). As a side note to anyone watching, this is currently an unreleased feature. If we see this, we use the provided mapping to map the host ports in the GameServer.Spec. With this change, it's possible to launch a GameServer and get a healthy GameServer Pod by adding the following annotation: ``` annotations: cluster-autoscaler.kubernetes.io/safe-to-evict: "true" autopilot.gke.io/host-port-assignment: '{"min": 7000, "max": 8000}' ``` If this PR causes any issues, the cloud product auto detection can be disabled by setting `agones.cloudProduct=generic`, or forced to GKE Autopilot using `agones.cloudProduct=gke-autopilot`. In a future PR, I will add the host-port-assignment annotation automatically on Autopilot Co-authored-by: Mark Mandel --- cmd/controller/main.go | 17 ++- install/helm/agones/templates/controller.yaml | 2 + .../templates/serviceaccounts/controller.yaml | 5 + install/helm/agones/values.yaml | 1 + install/yaml/install.yaml | 5 + pkg/cloudproduct/cloudproduct.go | 84 +++++++++++++ pkg/cloudproduct/doc.go | 16 +++ pkg/cloudproduct/generic/doc.go | 17 +++ pkg/cloudproduct/generic/generic.go | 25 ++++ pkg/cloudproduct/gke/doc.go | 16 +++ pkg/cloudproduct/gke/gke.go | 84 +++++++++++++ pkg/cloudproduct/gke/gke_test.go | 112 ++++++++++++++++++ pkg/gameservers/controller.go | 13 +- pkg/gameservers/controller_test.go | 3 +- pkg/gameservers/gameservers.go | 7 +- pkg/gameservers/gameservers_test.go | 56 ++++++--- pkg/gameservers/migration.go | 8 +- pkg/gameservers/migration_test.go | 7 +- 18 files changed, 448 insertions(+), 30 deletions(-) create mode 100644 pkg/cloudproduct/cloudproduct.go create mode 100644 pkg/cloudproduct/doc.go create mode 100644 pkg/cloudproduct/generic/doc.go create mode 100644 pkg/cloudproduct/generic/generic.go create mode 100644 pkg/cloudproduct/gke/doc.go create mode 100644 pkg/cloudproduct/gke/gke.go create mode 100644 pkg/cloudproduct/gke/gke_test.go diff --git a/cmd/controller/main.go b/cmd/controller/main.go index 43db1c972a..7a54cdd2d4 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -28,6 +28,7 @@ import ( agonesv1 "agones.dev/agones/pkg/apis/agones/v1" "agones.dev/agones/pkg/client/clientset/versioned" "agones.dev/agones/pkg/client/informers/externalversions" + "agones.dev/agones/pkg/cloudproduct" "agones.dev/agones/pkg/fleetautoscalers" "agones.dev/agones/pkg/fleets" "agones.dev/agones/pkg/gameserverallocations" @@ -78,6 +79,7 @@ const ( logSizeLimitMBFlag = "log-size-limit-mb" kubeconfigFlag = "kubeconfig" allocationBatchWaitTime = "allocation-batch-wait-time" + cloudProductFlag = "cloud-product" defaultResync = 30 * time.Second ) @@ -105,6 +107,7 @@ func setupLogging(logDir string, logSizeLimitMB int) { // main starts the operator for the gameserver CRD func main() { + ctx := signals.NewSigKillContext() ctlConf := parseEnvFlags() if ctlConf.LogDir != "" { @@ -154,6 +157,11 @@ func main() { logger.WithError(err).Fatal("Could not create the agones api clientset") } + cloudProduct, err := cloudproduct.New(ctx, ctlConf.CloudProduct, kubeClient) + if err != nil { + logger.WithError(err).Fatal("Could not initialize cloud provider") + } + // https server and the items that share the Mux for routing httpsServer := https.NewServer(ctlConf.CertFile, ctlConf.KeyFile) wh := webhooks.NewWebHook(httpsServer.Mux) @@ -207,7 +215,7 @@ func main() { ctlConf.MinPort, ctlConf.MaxPort, ctlConf.SidecarImage, ctlConf.AlwaysPullSidecar, ctlConf.SidecarCPURequest, ctlConf.SidecarCPULimit, ctlConf.SidecarMemoryRequest, ctlConf.SidecarMemoryLimit, ctlConf.SdkServiceAccount, - kubeClient, kubeInformerFactory, extClient, agonesClient, agonesInformerFactory) + kubeClient, kubeInformerFactory, extClient, agonesClient, agonesInformerFactory, cloudProduct) gsSetController := gameserversets.NewController(wh, health, gsCounter, kubeClient, extClient, agonesClient, agonesInformerFactory) fleetController := fleets.NewController(wh, health, kubeClient, extClient, agonesClient, agonesInformerFactory) @@ -219,8 +227,6 @@ func main() { rs = append(rs, httpsServer, gsCounter, gsController, gsSetController, fleetController, fasController, gasController, server) - ctx := signals.NewSigKillContext() - kubeInformerFactory.Start(ctx.Done()) agonesInformerFactory.Start(ctx.Done()) @@ -264,6 +270,7 @@ func parseEnvFlags() config { viper.SetDefault(logDirFlag, "") viper.SetDefault(logLevelFlag, "Info") viper.SetDefault(logSizeLimitMBFlag, 10000) // 10 GB, will be split into 100 MB chunks + viper.SetDefault(cloudProductFlag, cloudproduct.AutoDetect) pflag.String(sidecarImageFlag, viper.GetString(sidecarImageFlag), "Flag to overwrite the GameServer sidecar image that is used. Can also use SIDECAR env variable") pflag.String(sidecarCPULimitFlag, viper.GetString(sidecarCPULimitFlag), "Flag to overwrite the GameServer sidecar container's cpu limit. Can also use SIDECAR_CPU_LIMIT env variable") @@ -288,6 +295,7 @@ func parseEnvFlags() config { pflag.Int32(logSizeLimitMBFlag, 1000, "Log file size limit in MB") pflag.String(logLevelFlag, viper.GetString(logLevelFlag), "Agones Log level") pflag.Duration(allocationBatchWaitTime, viper.GetDuration(allocationBatchWaitTime), "Flag to configure the waiting period between allocations batches") + pflag.String(cloudProductFlag, viper.GetString(cloudProductFlag), "Cloud product. Set to 'auto' to auto-detect, set to 'generic' to force generic behavior, set to 'gke-autopilot' for GKE Autopilot. Can also use CLOUD_PRODUCT env variable.") runtime.FeaturesBindFlags() pflag.Parse() @@ -315,6 +323,7 @@ func parseEnvFlags() config { runtime.Must(viper.BindEnv(logDirFlag)) runtime.Must(viper.BindEnv(logSizeLimitMBFlag)) runtime.Must(viper.BindEnv(allocationBatchWaitTime)) + runtime.Must(viper.BindEnv(cloudProductFlag)) runtime.Must(viper.BindPFlags(pflag.CommandLine)) runtime.Must(runtime.FeaturesBindEnv()) @@ -364,6 +373,7 @@ func parseEnvFlags() config { LogSizeLimitMB: int(viper.GetInt32(logSizeLimitMBFlag)), StackdriverLabels: viper.GetString(stackdriverLabels), AllocationBatchWaitTime: viper.GetDuration(allocationBatchWaitTime), + CloudProduct: viper.GetString(cloudProductFlag), } } @@ -392,6 +402,7 @@ type config struct { LogLevel string LogSizeLimitMB int AllocationBatchWaitTime time.Duration + CloudProduct string } // validate ensures the ctlConfig data is valid. diff --git a/install/helm/agones/templates/controller.yaml b/install/helm/agones/templates/controller.yaml index 43a341e81e..0347ccb6c9 100644 --- a/install/helm/agones/templates/controller.yaml +++ b/install/helm/agones/templates/controller.yaml @@ -115,6 +115,8 @@ spec: value: {{ .Values.agones.featureGates | quote }} - name: ALLOCATION_BATCH_WAIT_TIME value: {{ .Values.agones.controller.allocationBatchWaitTime | quote }} + - name: CLOUD_PRODUCT + value: {{ .Values.agones.cloudProduct | quote }} {{- if .Values.agones.controller.persistentLogs }} - name: LOG_DIR value: "/home/agones/logs" diff --git a/install/helm/agones/templates/serviceaccounts/controller.yaml b/install/helm/agones/templates/serviceaccounts/controller.yaml index e77e4d6ee7..6476b9c0cd 100644 --- a/install/helm/agones/templates/serviceaccounts/controller.yaml +++ b/install/helm/agones/templates/serviceaccounts/controller.yaml @@ -50,6 +50,11 @@ rules: - apiGroups: [""] resources: ["nodes", "secrets"] verbs: ["list", "watch"] +{{- if eq .Values.agones.cloudProduct "auto" }} +- apiGroups: ["admissionregistration.k8s.io"] # only needed for cloudProduct detection + resources: ["mutatingwebhookconfigurations"] + verbs: ["get"] +{{- end}} - apiGroups: ["apiextensions.k8s.io"] resources: ["customresourcedefinitions"] verbs: ["get"] diff --git a/install/helm/agones/values.yaml b/install/helm/agones/values.yaml index 748b81317e..7aec49498d 100644 --- a/install/helm/agones/values.yaml +++ b/install/helm/agones/values.yaml @@ -44,6 +44,7 @@ agones: annotations: {} createPriorityClass: true priorityClassName: agones-system + cloudProduct: "auto" controller: resources: {} # requests: diff --git a/install/yaml/install.yaml b/install/yaml/install.yaml index 6de6741a8e..f2ef648c32 100644 --- a/install/yaml/install.yaml +++ b/install/yaml/install.yaml @@ -14342,6 +14342,9 @@ rules: - apiGroups: [""] resources: ["nodes", "secrets"] verbs: ["list", "watch"] +- apiGroups: ["admissionregistration.k8s.io"] # only needed for cloudProduct detection + resources: ["mutatingwebhookconfigurations"] + verbs: ["get"] - apiGroups: ["apiextensions.k8s.io"] resources: ["customresourcedefinitions"] verbs: ["get"] @@ -14726,6 +14729,8 @@ spec: value: "" - name: ALLOCATION_BATCH_WAIT_TIME value: "500ms" + - name: CLOUD_PRODUCT + value: "auto" - name: LOG_DIR value: "/home/agones/logs" - name: LOG_SIZE_LIMIT_MB diff --git a/pkg/cloudproduct/cloudproduct.go b/pkg/cloudproduct/cloudproduct.go new file mode 100644 index 0000000000..a965632ee2 --- /dev/null +++ b/pkg/cloudproduct/cloudproduct.go @@ -0,0 +1,84 @@ +// Copyright 2022 Google LLC All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cloudproduct + +import ( + "context" + "fmt" + + agonesv1 "agones.dev/agones/pkg/apis/agones/v1" + "agones.dev/agones/pkg/cloudproduct/generic" + "agones.dev/agones/pkg/cloudproduct/gke" + "agones.dev/agones/pkg/util/runtime" + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/kubernetes" +) + +// CloudProduct provides a generic interface that abstracts cloud product +// specific functionality. Users should call New() to instantiate a +// specific cloud product interface. +type CloudProduct interface { + // SyncPodPortsToGameServer runs after a Pod has been assigned to a Node and before we sync + // Pod host ports to the GameServer status. + SyncPodPortsToGameServer(*agonesv1.GameServer, *corev1.Pod) error +} + +const ( + // If --cloud-product=auto, auto-detect + AutoDetect = "auto" + + genericProduct = "generic" +) + +var ( + logger = runtime.NewLoggerWithSource("cloudproduct") + productDetectors = []func(context.Context, *kubernetes.Clientset) string{gke.Detect} +) + +// New instantiates a new CloudProduct interface by product name. +func New(ctx context.Context, product string, kc *kubernetes.Clientset) (CloudProduct, error) { + product = autoDetect(ctx, product, kc) + + switch product { + case "gke-autopilot": + return gke.Autopilot() + case genericProduct: + return generic.New() + } + return nil, fmt.Errorf("unknown cloud product: %q", product) +} + +func autoDetect(ctx context.Context, product string, kc *kubernetes.Clientset) string { + if product != AutoDetect { + logger.Infof("Cloud product forced to %q, skipping auto-detection", product) + return product + } + for _, detect := range productDetectors { + product = detect(ctx, kc) + if product != "" { + logger.Infof("Cloud product detected as %q", product) + return product + } + } + logger.Infof("Cloud product defaulted to %q", genericProduct) + return genericProduct +} + +// MustNewGeneric returns the "generic" cloud product, panicking if an error is encountered. +func MustNewGeneric(ctx context.Context) CloudProduct { + c, err := New(ctx, genericProduct, nil) + runtime.Must(err) + return c +} diff --git a/pkg/cloudproduct/doc.go b/pkg/cloudproduct/doc.go new file mode 100644 index 0000000000..6319845bb4 --- /dev/null +++ b/pkg/cloudproduct/doc.go @@ -0,0 +1,16 @@ +// Copyright 2022 Google LLC All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package cloudproduct provides an abstraction layer to product specific functionality +package cloudproduct diff --git a/pkg/cloudproduct/generic/doc.go b/pkg/cloudproduct/generic/doc.go new file mode 100644 index 0000000000..8a6c4c8395 --- /dev/null +++ b/pkg/cloudproduct/generic/doc.go @@ -0,0 +1,17 @@ +// Copyright 2022 Google LLC All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package generic provides the generic cloud provider. The generic cloud provider +// should be usable on all clouds, but provides no special functionality. +package generic diff --git a/pkg/cloudproduct/generic/generic.go b/pkg/cloudproduct/generic/generic.go new file mode 100644 index 0000000000..c593ea16f5 --- /dev/null +++ b/pkg/cloudproduct/generic/generic.go @@ -0,0 +1,25 @@ +// Copyright 2022 Google LLC All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package generic + +import ( + agonesv1 "agones.dev/agones/pkg/apis/agones/v1" + corev1 "k8s.io/api/core/v1" +) + +func New() (*generic, error) { return &generic{}, nil } + +type generic struct{} + +func (*generic) SyncPodPortsToGameServer(*agonesv1.GameServer, *corev1.Pod) error { return nil } diff --git a/pkg/cloudproduct/gke/doc.go b/pkg/cloudproduct/gke/doc.go new file mode 100644 index 0000000000..e4aa662c55 --- /dev/null +++ b/pkg/cloudproduct/gke/doc.go @@ -0,0 +1,16 @@ +// Copyright 2022 Google LLC All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package gke provides the GKE cloud product interfaces for GKE Standard and Autopilot +package gke diff --git a/pkg/cloudproduct/gke/gke.go b/pkg/cloudproduct/gke/gke.go new file mode 100644 index 0000000000..f2afda1f00 --- /dev/null +++ b/pkg/cloudproduct/gke/gke.go @@ -0,0 +1,84 @@ +// Copyright 2022 Google LLC All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package gke + +import ( + "context" + "encoding/json" + + agonesv1 "agones.dev/agones/pkg/apis/agones/v1" + "agones.dev/agones/pkg/util/runtime" + "cloud.google.com/go/compute/metadata" + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +const ( + workloadDefaulterWebhook = "workload-defaulter.config.common-webhooks.networking.gke.io" + noWorkloadDefaulter = "failed to get MutatingWebhookConfigurations/workload-defaulter.config.common-webhooks.networking.gke.io (error expected if not on GKE Autopilot)" + hostPortAssignmentAnnotation = "autopilot.gke.io/host-port-assignment" +) + +var logger = runtime.NewLoggerWithSource("gke") + +type gkeAutopilot struct{} + +// hostPortAssignment is the JSON structure of the `host-port-assignment` annotation +type hostPortAssignment struct { + Min int32 `json:"min,omitempty"` + Max int32 `json:"max,omitempty"` + PortsAssigned map[int32]int32 `json:"portsAssigned,omitempty"` // old -> new +} + +func Detect(ctx context.Context, kc *kubernetes.Clientset) string { + if !metadata.OnGCE() { + return "" + } + // Look for the workload defaulter - this is the current best method to detect Autopilot + if _, err := kc.AdmissionregistrationV1().MutatingWebhookConfigurations().Get( + ctx, workloadDefaulterWebhook, metav1.GetOptions{}); err != nil { + logger.WithError(err).WithField("reason", noWorkloadDefaulter).Info( + "Assuming GKE Standard and defaulting to generic provider") + return "" // GKE standard, but we don't need an interface for it just yet. + } + logger.Info("Running on GKE Autopilot (skip detection with --cloud-product=gke-autopilot)") + return "gke-autopilot" +} + +func Autopilot() (*gkeAutopilot, error) { return &gkeAutopilot{}, nil } + +func (*gkeAutopilot) SyncPodPortsToGameServer(gs *agonesv1.GameServer, pod *corev1.Pod) error { + // If applyGameServerAddressAndPort has already filled in Status, SyncPodPortsToGameServer + // has already run. Skip syncing from the Pod again - this avoids having to reason + // about whether we're re-applying the old->new mapping. + if len(gs.Status.Ports) == len(gs.Spec.Ports) { + return nil + } + annotation, ok := pod.ObjectMeta.Annotations[hostPortAssignmentAnnotation] + if !ok { + return nil + } + var hpa hostPortAssignment + if err := json.Unmarshal([]byte(annotation), &hpa); err != nil { + return errors.Wrapf(err, "could not unmarshal annotation %s (value %q)", hostPortAssignmentAnnotation, annotation) + } + for i, p := range gs.Spec.Ports { + if newPort, ok := hpa.PortsAssigned[p.HostPort]; ok { + gs.Spec.Ports[i].HostPort = newPort + } + } + return nil +} diff --git a/pkg/cloudproduct/gke/gke_test.go b/pkg/cloudproduct/gke/gke_test.go new file mode 100644 index 0000000000..d833f06cff --- /dev/null +++ b/pkg/cloudproduct/gke/gke_test.go @@ -0,0 +1,112 @@ +// Copyright 2022 Google LLC All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package gke + +import ( + "testing" + + agonesv1 "agones.dev/agones/pkg/apis/agones/v1" + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestSyncPodPortsToGameServer(t *testing.T) { + assignmentAnnotation := map[string]string{hostPortAssignmentAnnotation: `{"min":7000,"max":8000,"portsAssigned":{"7001":7737,"7002":7738}}`} + badAnnotation := map[string]string{hostPortAssignmentAnnotation: `good luck parsing this as JSON`} + for name, tc := range map[string]struct { + gs *agonesv1.GameServer + pod *corev1.Pod + wantGS *agonesv1.GameServer + wantErr bool + }{ + "no ports => no change": { + gs: &agonesv1.GameServer{}, + pod: testPod(nil), + wantGS: &agonesv1.GameServer{}, + }, + "no annotation => no change": { + gs: testGameServer([]int32{7777}, nil), + pod: testPod(nil), + wantGS: testGameServer([]int32{7777}, nil), + }, + "annotation => ports mapped": { + gs: testGameServer([]int32{7002, 7001, 7002}, nil), + pod: testPod(assignmentAnnotation), + wantGS: testGameServer([]int32{7738, 7737, 7738}, nil), + }, + "annotation, but ports already assigned => ports mapped": { + gs: testGameServer([]int32{7001, 7002}, []int32{7001, 7002}), + pod: testPod(assignmentAnnotation), + wantGS: testGameServer([]int32{7001, 7002}, []int32{7001, 7002}), + }, + "bad annotation": { + gs: testGameServer([]int32{7002, 7001, 7002}, nil), + pod: testPod(badAnnotation), + wantErr: true, + }, + } { + t.Run(name, func(t *testing.T) { + oldPod := tc.pod.DeepCopy() + err := (&gkeAutopilot{}).SyncPodPortsToGameServer(tc.gs, tc.pod) + if tc.wantErr { + assert.NotNil(t, err) + return + } + if assert.NoError(t, err) { + if diff := cmp.Diff(tc.wantGS, tc.gs); diff != "" { + t.Errorf("GameServer diff (-want +got):\n%s", diff) + } + if diff := cmp.Diff(oldPod, tc.pod); diff != "" { + t.Errorf("Pod was modified (-old +new):\n%s", diff) + } + } + }) + } +} + +func testPod(annotations map[string]string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "best-game-server", + Namespace: "best-game", + Annotations: annotations, + }, + TypeMeta: metav1.TypeMeta{Kind: "Pod"}, + } +} + +func testGameServer(portSpecIn []int32, portStatusIn []int32) *agonesv1.GameServer { + var portSpec []agonesv1.GameServerPort + for _, port := range portSpecIn { + portSpec = append(portSpec, agonesv1.GameServerPort{HostPort: port}) + } + var portStatus []agonesv1.GameServerStatusPort + for _, port := range portStatusIn { + portStatus = append(portStatus, agonesv1.GameServerStatusPort{Port: port}) + } + return &agonesv1.GameServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "best-game-server", + Namespace: "best-game", + }, + Spec: agonesv1.GameServerSpec{ + Ports: portSpec, + }, + Status: agonesv1.GameServerStatus{ + Ports: portStatus, + }, + } +} diff --git a/pkg/gameservers/controller.go b/pkg/gameservers/controller.go index 1d4b54b611..057e9b3db5 100644 --- a/pkg/gameservers/controller.go +++ b/pkg/gameservers/controller.go @@ -28,6 +28,7 @@ import ( getterv1 "agones.dev/agones/pkg/client/clientset/versioned/typed/agones/v1" "agones.dev/agones/pkg/client/informers/externalversions" listerv1 "agones.dev/agones/pkg/client/listers/agones/v1" + "agones.dev/agones/pkg/cloudproduct" "agones.dev/agones/pkg/util/crd" "agones.dev/agones/pkg/util/logfields" "agones.dev/agones/pkg/util/runtime" @@ -88,6 +89,7 @@ type Controller struct { creationWorkerQueue *workerqueue.WorkerQueue // handles creation only deletionWorkerQueue *workerqueue.WorkerQueue // handles deletion only recorder record.EventRecorder + cloudProduct cloudproduct.CloudProduct } // NewController returns a new gameserver crd controller @@ -106,7 +108,9 @@ func NewController( kubeInformerFactory informers.SharedInformerFactory, extClient extclientset.Interface, agonesClient versioned.Interface, - agonesInformerFactory externalversions.SharedInformerFactory) *Controller { + agonesInformerFactory externalversions.SharedInformerFactory, + cloudProduct cloudproduct.CloudProduct, +) *Controller { pods := kubeInformerFactory.Core().V1().Pods() gameServers := agonesInformerFactory.Agones().V1().GameServers() @@ -131,8 +135,9 @@ func NewController( nodeSynced: kubeInformerFactory.Core().V1().Nodes().Informer().HasSynced, portAllocator: NewPortAllocator(minPort, maxPort, kubeInformerFactory, agonesInformerFactory), healthController: NewHealthController(health, kubeClient, agonesClient, kubeInformerFactory, agonesInformerFactory), - migrationController: NewMigrationController(health, kubeClient, agonesClient, kubeInformerFactory, agonesInformerFactory), + migrationController: NewMigrationController(health, kubeClient, agonesClient, kubeInformerFactory, agonesInformerFactory, cloudProduct), missingPodController: NewMissingPodController(health, kubeClient, agonesClient, kubeInformerFactory, agonesInformerFactory), + cloudProduct: cloudProduct, } c.baseLogger = runtime.NewLoggerWithType(c) @@ -765,7 +770,7 @@ func (c *Controller) syncGameServerStartingState(ctx context.Context, gs *agones return gs, errors.Wrapf(err, "error retrieving node %s for Pod %s", pod.Spec.NodeName, pod.ObjectMeta.Name) } gsCopy := gs.DeepCopy() - gsCopy, err = applyGameServerAddressAndPort(gsCopy, node, pod) + gsCopy, err = applyGameServerAddressAndPort(gsCopy, node, pod, c.cloudProduct.SyncPodPortsToGameServer) if err != nil { return gs, err } @@ -816,7 +821,7 @@ func (c *Controller) syncGameServerRequestReadyState(ctx context.Context, gs *ag if err != nil { return gs, errors.Wrapf(err, "error retrieving node %s for Pod %s", pod.Spec.NodeName, pod.ObjectMeta.Name) } - gsCopy, err = applyGameServerAddressAndPort(gsCopy, node, pod) + gsCopy, err = applyGameServerAddressAndPort(gsCopy, node, pod, c.cloudProduct.SyncPodPortsToGameServer) if err != nil { return gs, err } diff --git a/pkg/gameservers/controller_test.go b/pkg/gameservers/controller_test.go index e881b50432..8e6939c879 100644 --- a/pkg/gameservers/controller_test.go +++ b/pkg/gameservers/controller_test.go @@ -26,6 +26,7 @@ import ( "agones.dev/agones/pkg/apis/agones" agonesv1 "agones.dev/agones/pkg/apis/agones/v1" + "agones.dev/agones/pkg/cloudproduct" agtesting "agones.dev/agones/pkg/testing" "agones.dev/agones/pkg/util/webhooks" "github.com/heptiolabs/healthcheck" @@ -1946,7 +1947,7 @@ func newFakeController() (*Controller, agtesting.Mocks) { 10, 20, "sidecar:dev", false, resource.MustParse("0.05"), resource.MustParse("0.1"), resource.MustParse("50Mi"), resource.MustParse("100Mi"), "sdk-service-account", - m.KubeClient, m.KubeInformerFactory, m.ExtClient, m.AgonesClient, m.AgonesInformerFactory) + m.KubeClient, m.KubeInformerFactory, m.ExtClient, m.AgonesClient, m.AgonesInformerFactory, cloudproduct.MustNewGeneric(context.Background())) c.recorder = m.FakeRecorder return c, m } diff --git a/pkg/gameservers/gameservers.go b/pkg/gameservers/gameservers.go index 4b0eb24639..6aba3da5ad 100644 --- a/pkg/gameservers/gameservers.go +++ b/pkg/gameservers/gameservers.go @@ -72,7 +72,7 @@ func address(node *corev1.Node) (string, error) { // applyGameServerAddressAndPort gathers the address and port details from the node and pod // and applies them to the GameServer that is passed in, and returns it. -func applyGameServerAddressAndPort(gs *agonesv1.GameServer, node *corev1.Node, pod *corev1.Pod) (*agonesv1.GameServer, error) { +func applyGameServerAddressAndPort(gs *agonesv1.GameServer, node *corev1.Node, pod *corev1.Pod, syncPodPortsToGameServer func(*agonesv1.GameServer, *corev1.Pod) error) (*agonesv1.GameServer, error) { addr, err := address(node) if err != nil { return gs, errors.Wrapf(err, "error getting external address for GameServer %s", gs.ObjectMeta.Name) @@ -80,6 +80,11 @@ func applyGameServerAddressAndPort(gs *agonesv1.GameServer, node *corev1.Node, p gs.Status.Address = addr gs.Status.NodeName = pod.Spec.NodeName + + if err := syncPodPortsToGameServer(gs, pod); err != nil { + return gs, errors.Wrapf(err, "cloud product error syncing ports on GameServer %s", gs.ObjectMeta.Name) + } + // HostPort is always going to be populated, even when dynamic // This will be a double up of information, but it will be easier to read gs.Status.Ports = make([]agonesv1.GameServerStatusPort, len(gs.Spec.Ports)) diff --git a/pkg/gameservers/gameservers_test.go b/pkg/gameservers/gameservers_test.go index d35c5b054e..124880fd72 100644 --- a/pkg/gameservers/gameservers_test.go +++ b/pkg/gameservers/gameservers_test.go @@ -107,23 +107,45 @@ func TestAddress(t *testing.T) { func TestApplyGameServerAddressAndPort(t *testing.T) { t.Parallel() - t.Run("OK scenario", func(t *testing.T) { - gsFixture := &agonesv1.GameServer{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"}, - Spec: newSingleContainerSpec(), Status: agonesv1.GameServerStatus{State: agonesv1.GameServerStateRequestReady}} - gsFixture.ApplyDefaults() - node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeFixtureName}, Status: corev1.NodeStatus{Addresses: []corev1.NodeAddress{{Address: ipFixture, Type: corev1.NodeExternalIP}}}} - pod, err := gsFixture.Pod() - require.NoError(t, err) - pod.Spec.NodeName = node.ObjectMeta.Name + noopMod := func(*corev1.Pod) {} + noopSyncer := func(*agonesv1.GameServer, *corev1.Pod) error { return nil } + for name, tc := range map[string]struct { + podMod func(*corev1.Pod) + podSyncer func(*agonesv1.GameServer, *corev1.Pod) error + wantHostPort int32 + }{ + "normal": {noopMod, noopSyncer, 9999}, + "host ports changed after create": { + podMod: func(pod *corev1.Pod) { + pod.Spec.Containers[0].Ports[0].HostPort = 9876 + }, + podSyncer: func(gs *agonesv1.GameServer, pod *corev1.Pod) error { + gs.Spec.Ports[0].HostPort = pod.Spec.Containers[0].Ports[0].HostPort + return nil + }, + wantHostPort: 9876, + }, + } { + t.Run(name, func(t *testing.T) { + gsFixture := &agonesv1.GameServer{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"}, + Spec: newSingleContainerSpec(), Status: agonesv1.GameServerStatus{State: agonesv1.GameServerStateRequestReady}} + gsFixture.ApplyDefaults() + node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeFixtureName}, Status: corev1.NodeStatus{Addresses: []corev1.NodeAddress{{Address: ipFixture, Type: corev1.NodeExternalIP}}}} + pod, err := gsFixture.Pod() + require.NoError(t, err) + pod.Spec.NodeName = node.ObjectMeta.Name + tc.podMod(pod) - gs, err := applyGameServerAddressAndPort(gsFixture, node, pod) - require.NoError(t, err) - if assert.NotEmpty(t, gs.Spec.Ports) { - assert.Equal(t, gs.Spec.Ports[0].HostPort, gs.Status.Ports[0].Port) - } - assert.Equal(t, ipFixture, gs.Status.Address) - assert.Equal(t, node.ObjectMeta.Name, gs.Status.NodeName) - }) + gs, err := applyGameServerAddressAndPort(gsFixture, node, pod, tc.podSyncer) + require.NoError(t, err) + if assert.NotEmpty(t, gs.Spec.Ports) { + assert.Equal(t, tc.wantHostPort, gs.Status.Ports[0].Port) + assert.Equal(t, gs.Spec.Ports[0].HostPort, gs.Status.Ports[0].Port) + } + assert.Equal(t, ipFixture, gs.Status.Address) + assert.Equal(t, node.ObjectMeta.Name, gs.Status.NodeName) + }) + } t.Run("No IP specified, err expected", func(t *testing.T) { gsFixture := &agonesv1.GameServer{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"}, @@ -134,7 +156,7 @@ func TestApplyGameServerAddressAndPort(t *testing.T) { require.NoError(t, err) pod.Spec.NodeName = node.ObjectMeta.Name - _, err = applyGameServerAddressAndPort(gsFixture, node, pod) + _, err = applyGameServerAddressAndPort(gsFixture, node, pod, noopSyncer) if assert.Error(t, err) { assert.Equal(t, "error getting external address for GameServer test: Could not find an address for Node: node1", err.Error()) } diff --git a/pkg/gameservers/migration.go b/pkg/gameservers/migration.go index 1f4579482b..49447a67f1 100644 --- a/pkg/gameservers/migration.go +++ b/pkg/gameservers/migration.go @@ -23,6 +23,7 @@ import ( getterv1 "agones.dev/agones/pkg/client/clientset/versioned/typed/agones/v1" "agones.dev/agones/pkg/client/informers/externalversions" listerv1 "agones.dev/agones/pkg/client/listers/agones/v1" + "agones.dev/agones/pkg/cloudproduct" "agones.dev/agones/pkg/util/logfields" "agones.dev/agones/pkg/util/runtime" "agones.dev/agones/pkg/util/workerqueue" @@ -55,6 +56,7 @@ type MigrationController struct { nodeSynced cache.InformerSynced workerqueue *workerqueue.WorkerQueue recorder record.EventRecorder + cloudProduct cloudproduct.CloudProduct } // NewMigrationController returns a MigrationController @@ -62,7 +64,8 @@ func NewMigrationController(health healthcheck.Handler, kubeClient kubernetes.Interface, agonesClient versioned.Interface, kubeInformerFactory informers.SharedInformerFactory, - agonesInformerFactory externalversions.SharedInformerFactory) *MigrationController { + agonesInformerFactory externalversions.SharedInformerFactory, + cloudProduct cloudproduct.CloudProduct) *MigrationController { podInformer := kubeInformerFactory.Core().V1().Pods().Informer() gameserverInformer := agonesInformerFactory.Agones().V1().GameServers() @@ -74,6 +77,7 @@ func NewMigrationController(health healthcheck.Handler, gameServerLister: gameserverInformer.Lister(), nodeLister: kubeInformerFactory.Core().V1().Nodes().Lister(), nodeSynced: kubeInformerFactory.Core().V1().Nodes().Informer().HasSynced, + cloudProduct: cloudProduct, } mc.baseLogger = runtime.NewLoggerWithType(mc) @@ -187,7 +191,7 @@ func (mc *MigrationController) syncGameServer(ctx context.Context, key string) e // If the GameServer has yet to become ready, we will reapply the Address and Port // otherwise, we move it to Unhealthy so that a new GameServer will be recreated. if gsCopy.IsBeforeReady() { - gsCopy, err = applyGameServerAddressAndPort(gsCopy, node, pod) + gsCopy, err = applyGameServerAddressAndPort(gsCopy, node, pod, mc.cloudProduct.SyncPodPortsToGameServer) if err != nil { return err } diff --git a/pkg/gameservers/migration_test.go b/pkg/gameservers/migration_test.go index a571dc4842..77e03e6a70 100644 --- a/pkg/gameservers/migration_test.go +++ b/pkg/gameservers/migration_test.go @@ -20,6 +20,7 @@ import ( "time" agonesv1 "agones.dev/agones/pkg/apis/agones/v1" + "agones.dev/agones/pkg/cloudproduct" agtesting "agones.dev/agones/pkg/testing" "github.com/heptiolabs/healthcheck" "github.com/stretchr/testify/assert" @@ -192,7 +193,8 @@ func TestMigrationControllerSyncGameServer(t *testing.T) { for k, v := range fixtures { t.Run(k, func(t *testing.T) { m := agtesting.NewMocks() - c := NewMigrationController(healthcheck.NewHandler(), m.KubeClient, m.AgonesClient, m.KubeInformerFactory, m.AgonesInformerFactory) + c := NewMigrationController(healthcheck.NewHandler(), m.KubeClient, m.AgonesClient, m.KubeInformerFactory, m.AgonesInformerFactory, + cloudproduct.MustNewGeneric(context.Background())) c.recorder = m.FakeRecorder gs := &agonesv1.GameServer{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"}, @@ -243,7 +245,8 @@ func TestMigrationControllerSyncGameServer(t *testing.T) { func TestMigrationControllerRun(t *testing.T) { m := agtesting.NewMocks() - c := NewMigrationController(healthcheck.NewHandler(), m.KubeClient, m.AgonesClient, m.KubeInformerFactory, m.AgonesInformerFactory) + c := NewMigrationController(healthcheck.NewHandler(), m.KubeClient, m.AgonesClient, m.KubeInformerFactory, m.AgonesInformerFactory, + cloudproduct.MustNewGeneric(context.Background())) gs := &agonesv1.GameServer{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"}, Spec: newSingleContainerSpec(), Status: agonesv1.GameServerStatus{}} gs.ApplyDefaults()