From 087ca0b677f0feee0e9a05c4baebdeecdec2795a Mon Sep 17 00:00:00 2001 From: Gabe brown Date: Wed, 3 Oct 2018 17:34:14 -0700 Subject: [PATCH 01/25] Add AWS install guide --- install/README.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/install/README.md b/install/README.md index 492a136717..2b696a6045 100644 --- a/install/README.md +++ b/install/README.md @@ -15,6 +15,10 @@ In this quickstart, we will create a Kubernetes cluster, and populate it with th 1. [Installing Minikube](#installing-minikube) 1. [Creating an agones profile](#creating-an-agones-profile) 1. [Starting Minikube](#starting-minikube) +1. [Setting up an Amazon Web Services EKS cluster](#setting-up-an-amazon-web-services-eks-cluster) + 1. [Create EKS Instance](#create-eks-instance) + 1. [Ensure VPC CNI 1.2 is Running](#ensure-vpc-cni-12-is-running) + 1. [Follow Normal Instructions to Install](#follow-normal-instructions-to-install) 1. [Setting up an Azure Kubernetes Service (AKS) cluster](#setting-up-an-azure-kubernetes-service-aks-cluster) 1. [Choosing your shell](#choosing-your-shell) 1. [Creating the AKS cluster](#creating-the-aks-cluster) @@ -174,6 +178,31 @@ minikube start --kubernetes-version v1.10.0 --vm-driver virtualbox \ > the --bootstrapper=localkube is required since we aren't using the `default` profile. ([bug](https://github.com/kubernetes/minikube/issues/2717)) +# Setting up an Amazon Web Services EKS cluster + +## Create EKS Instance + +Create your EKS instance using the [Getting Started Guide](https://docs.aws.amazon.com/eks/latest/userguide/getting-started.html). + +## Ensure VPC CNI 1.2 is Running + +EKS does not use the normal Kubernetes networking since it is [incompatible with Amazon VPC networking](https://www.contino.io/insights/kubernetes-is-hard-why-eks-makes-it-easier-for-network-and-security-architects). + +In a console, run this command to get your current cni version + +```bash +kubectl describe daemonset aws-node --namespace kube-system | grep Image | cut -d "/" -f 2 +``` +Output should be `amazon-k8s-cni:1.2.0` or newer. To upgrade to version 1.2, run the following command. + +```bash +kubectl apply -f https://raw.githubusercontent.com/aws/amazon-vpc-cni-k8s/master/config/v1.2/aws-k8s-cni.yaml +``` + +## Follow Normal Instructions to Install + +Continue to [Installing Agones](#installing-agones). + # Setting up an Azure Kubernetes Service (AKS) Cluster Follow these steps to create a cluster and install Agones directly on [Azure Kubernetes Service (AKS) ](https://docs.microsoft.com/azure/aks/). From df8294676936d8693e1b663ee40b563b5a808ce6 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Wed, 3 Oct 2018 17:31:07 -0700 Subject: [PATCH 02/25] Update instructions for Minikube 0.29.0 Changed the documentation to work for minikube 0.29.0, as well as updating the development tools. --- build/Makefile | 5 ++--- build/README.md | 3 +-- install/README.md | 8 +++----- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/build/Makefile b/build/Makefile index 62506d71f5..897d968f05 100644 --- a/build/Makefile +++ b/build/Makefile @@ -421,9 +421,8 @@ minikube-test-cluster: DOCKER_RUN_ARGS+=--network=host -v $(minikube_cert_mount) minikube-test-cluster: $(ensure-build-image) minikube-agones-profile # localkube bootstrapper fixes issues with profiles $(MINIKUBE) start --kubernetes-version v1.10.0 --vm-driver $(MINIKUBE_DRIVER) \ - --bootstrapper=localkube \ - --extra-config=apiserver.Admission.PluginNames=NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,DefaultStorageClass,DefaultTolerationSeconds,MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ResourceQuota \ - --extra-config=apiserver.Authorization.Mode=RBAC + --extra-config=apiserver.admission-control=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ResourceQuota \ + --extra-config=apiserver.authorization-mode=RBAC # wait until the master is up until docker run --rm $(common_mounts) $(DOCKER_RUN_ARGS) $(build_tag) kubectl cluster-info; \ do \ diff --git a/build/README.md b/build/README.md index 59e31f908b..ae7498b7fa 100644 --- a/build/README.md +++ b/build/README.md @@ -191,8 +191,7 @@ This will setup a [Minikube](https://github.com/kubernetes/minikube) cluster, ru Because Minikube runs on a virtualisation layer on the host, some of the standard build and development Make targets need to be replaced by Minikube specific targets. -> We recommend installing version [0.28.0 of minikube](https://github.com/kubernetes/minikube/releases/tag/v0.28.0), -due to issues with other versions +> We recommend installing version [0.29.0 of minikube](https://github.com/kubernetes/minikube/releases/tag/v0.29.0). First, [install Minikube](https://github.com/kubernetes/minikube#installation), which may also require you to install a virtualisation solution, such as [VirtualBox](https://www.virtualbox.org) as well. diff --git a/install/README.md b/install/README.md index 2b696a6045..37e3925842 100644 --- a/install/README.md +++ b/install/README.md @@ -153,8 +153,7 @@ a virtualisation solution, such as [VirtualBox][vb] as well. [minikube]: https://github.com/kubernetes/minikube#installation [vb]: https://www.virtualbox.org -> We recommend installing version [0.28.0 of minikube](https://github.com/kubernetes/minikube/releases/tag/v0.28.0), -due to issues with other versions +> We recommend installing version [0.29.0 of minikube](https://github.com/kubernetes/minikube/releases/tag/v0.29.0). ## Creating an `agones` profile @@ -171,9 +170,8 @@ replaced by a [vm-driver](https://github.com/kubernetes/minikube#requirements) o ```bash minikube start --kubernetes-version v1.10.0 --vm-driver virtualbox \ - --bootstrapper=localkube \ - --extra-config=apiserver.Admission.PluginNames=NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,DefaultStorageClass,DefaultTolerationSeconds,MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ResourceQuota \ - --extra-config=apiserver.Authorization.Mode=RBAC + --extra-config=apiserver.admission-control=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ResourceQuota \ + --extra-config=apiserver.authorization-mode=RBAC ``` > the --bootstrapper=localkube is required since we aren't using the `default` profile. ([bug](https://github.com/kubernetes/minikube/issues/2717)) From aafa8063ccfd9d7e84cb856671f701df437fd0b5 Mon Sep 17 00:00:00 2001 From: Maxim Makarov Date: Thu, 4 Oct 2018 17:38:40 +0300 Subject: [PATCH 03/25] Fix typo --- install/helm/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/install/helm/README.md b/install/helm/README.md index 6cf9ef2c09..40e8b25692 100644 --- a/install/helm/README.md +++ b/install/helm/README.md @@ -108,7 +108,7 @@ Specify each parameter using the `--set key=value[,key=value]` argument to `helm ```bash $ helm install --name my-release --namespace agones-system \ - --set agones.namespace=mynamespace,gameservers.minPort=1000,gamesevers.maxPort=5000 agones + --set agones.namespace=mynamespace,gameservers.minPort=1000,gameservers.maxPort=5000 agones ``` The above command sets the namespace where Agones is deployed to `mynamespace`. Additionally Agones will use a dynamic port allocation range of 1000-5000. @@ -130,4 +130,4 @@ For most used cases the controller would have required a restart anyway (eg: con ## Confirm Agones is running -To confirm Agones is up and running, [go to the next section](../README.md#confirming-agones-started-successfully) \ No newline at end of file +To confirm Agones is up and running, [go to the next section](../README.md#confirming-agones-started-successfully) From e309bba9da46dd1808960bdf18927dd27c1b8f24 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Sat, 6 Oct 2018 12:04:17 -0700 Subject: [PATCH 04/25] Fix for flaky test TestControllerAddress --- pkg/gameservers/controller_test.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/gameservers/controller_test.go b/pkg/gameservers/controller_test.go index 09cd98a52b..3bac274f3b 100644 --- a/pkg/gameservers/controller_test.go +++ b/pkg/gameservers/controller_test.go @@ -949,7 +949,10 @@ func TestControllerAddress(t *testing.T) { return true, &corev1.NodeList{Items: []corev1.Node{fixture.node}}, nil }) - _, cancel := agtesting.StartInformers(mocks, c.gameServerSynced) + v1 := mocks.KubeInformationFactory.Core().V1() + nodeSynced := v1.Nodes().Informer().HasSynced + podSynced := v1.Pods().Informer().HasSynced + _, cancel := agtesting.StartInformers(mocks, c.gameServerSynced, podSynced, nodeSynced) defer cancel() addr, err := c.address(&pod) From cd874a2e1a8427fcb5311dbc0ed686f66ff8bfb4 Mon Sep 17 00:00:00 2001 From: slartibaartfast Date: Mon, 10 Sep 2018 12:36:00 -0400 Subject: [PATCH 05/25] Add example allocator service, docs --- README.md | 9 +- docs/access_api.md | 29 +- docs/create_allocator_service.md | 333 ++++++++++++++++++ docs/create_fleet.md | 18 +- docs/edit_first_game_server.md | 11 +- examples/allocator-service/README.md | 10 + .../allocator-service/allocator-ingress.yaml | 18 + .../allocator-service/allocator-service.yaml | 61 ++++ examples/allocator-service/dockerfile | 24 ++ examples/allocator-service/main.go | 184 ++++++++++ .../allocator-service/service-account.yaml | 45 +++ 11 files changed, 714 insertions(+), 28 deletions(-) create mode 100644 docs/create_allocator_service.md create mode 100644 examples/allocator-service/README.md create mode 100644 examples/allocator-service/allocator-ingress.yaml create mode 100644 examples/allocator-service/allocator-service.yaml create mode 100644 examples/allocator-service/dockerfile create mode 100644 examples/allocator-service/main.go create mode 100644 examples/allocator-service/service-account.yaml diff --git a/README.md b/README.md index 84c0cdc711..01ec90caf1 100644 --- a/README.md +++ b/README.md @@ -18,14 +18,14 @@ This software is currently alpha, and subject to change. Not to be used in produ - Client SDKs for integration with dedicated game servers to work with Agones. ## Why does this project exist? -For more details on why this project was written, read the +For more details on why this project was written, read the [announcement blog post](https://cloudplatform.googleblog.com/2018/03/introducing-Agones-open-source-multiplayer-dedicated-game-server-hosting-built-on-Kubernetes.html). ## Requirements - Kubernetes cluster version 1.9+ - [Minikube](https://github.com/kubernetes/minikube), [Google Kubernetes Engine](https://cloud.google.com/kubernetes-engine/) and [Azure Kubernetes Service](https://azure.microsoft.com/en-us/services/kubernetes-service/) have been tested - If you are creating and managing your own Kubernetes cluster, the - [MutatingAdmissionWebhook](https://kubernetes.io/docs/admin/admission-controllers/#mutatingadmissionwebhook-beta-in-19), and + [MutatingAdmissionWebhook](https://kubernetes.io/docs/admin/admission-controllers/#mutatingadmissionwebhook-beta-in-19), and [ValidatingAdmissionWebhook](https://kubernetes.io/docs/admin/admission-controllers/#validatingadmissionwebhook-alpha-in-18-beta-in-19) admission controllers are required. We also recommend following the @@ -52,6 +52,9 @@ Documentation and usage guides on how to develop and host dedicated game servers - [GameServer Health Checking](./docs/health_checking.md) - [Accessing Agones via the Kubernetes API](./docs/access_api.md) +### Tutorials + - [Create an Allocator Service (Go)](./docs/create_allocator_service.md) - Learn to programmatically access Agones via the API + ### Reference - [Game Server Specification](./docs/gameserver_spec.md) - [Fleet Specification](./docs/fleet_spec.md) @@ -80,7 +83,7 @@ Please read the [contributing](CONTRIBUTING.md) guide for directions on submitti See the [Developing, Testing and Building Agones](build/README.md) documentation for developing, testing and building Agones from source. -The [Release Process](docs/governance/release_process.md) documentation displays the project's upcoming release calendar and release process. +The [Release Process](docs/governance/release_process.md) documentation displays the project's upcoming release calendar and release process. Agones is in active development - we would love your help in shaping its future! diff --git a/docs/access_api.md b/docs/access_api.md index d18e82bc97..320375500f 100644 --- a/docs/access_api.md +++ b/docs/access_api.md @@ -1,15 +1,15 @@ # Accessing Agones via the Kubernetes API It's likely that we will want to programmatically interact with Agones. Everything that can be done -via the `kubectl` and yaml configurations can also be done via +via the `kubectl` and yaml configurations can also be done via the [Kubernetes API](https://kubernetes.io/docs/concepts/overview/kubernetes-api/). Installing Agones creates several [Custom Resource Definitions (CRD)](https://kubernetes.io/docs/concepts/api-extension/custom-resources), which can be accessed and manipulated through the Kubernetes API. -Kubernetes has multiple [client libraries](https://kubernetes.io/docs/reference/client-libraries/), however, -at time of writing, only -the [Go](https://github.com/kubernetes/client-go) and +Kubernetes has multiple [client libraries](https://kubernetes.io/docs/reference/client-libraries/), however, +at time of writing, only +the [Go](https://github.com/kubernetes/client-go) and [Python](https://github.com/kubernetes-client/python/) clients are documented to support accessing CRDs. This can be found in the [Accessing a custom resource](https://kubernetes.io/docs/concepts/api-extension/custom-resources/#accessing-a-custom-resource) @@ -34,21 +34,21 @@ If you plan to run your code in the same cluster as the Agones install, have a l [in cluster configuration](https://github.com/kubernetes/client-go/tree/master/examples/in-cluster-client-configuration) example from the Kubernetes Client. -If you plan to run your code outside the Kubernetes cluster as your Agones install, +If you plan to run your code outside the Kubernetes cluster as your Agones install, look at the [out of cluster configuration](https://github.com/kubernetes/client-go/tree/master/examples/out-of-cluster-client-configuration) example from the Kubernetes client. ### Example The following is an example of a in-cluster configuration, that creates a `Clientset` for Agones -and then creates a `GameServer`. +and then creates a `GameServer`. ```go package main import ( "fmt" - + "agones.dev/agones/pkg/apis/stable/v1alpha1" "agones.dev/agones/pkg/client/clientset/versioned" corev1 "k8s.io/api/core/v1" @@ -62,9 +62,9 @@ func main() { if err != nil { logger.WithError(err).Fatal("Could not create in cluster config") } - + // Access to standard Kubernetes resources through the Kubernetes Clientset - // We don't actually need this for this example, but it's just here for + // We don't actually need this for this example, but it's just here for // illustrative purposes kubeClient, err := kubernetes.NewForConfig(config) if err != nil { @@ -92,7 +92,7 @@ func main() { if err != nil { panic(err) } - + fmt.Printf("New game servers' name is: %s", newGS.ObjectMeta.Name) } @@ -103,8 +103,8 @@ func main() { If there isn't a client written in your preferred language, it is always possible to communicate directly with Kubernetes API to interact with Agones. -The Kubernetes API can be authenticated and exposed locally through the -[`kubectl proxy`](https://kubernetes.io/docs/tasks/access-kubernetes-api/http-proxy-access-api/) +The Kubernetes API can be authenticated and exposed locally through the +[`kubectl proxy`](https://kubernetes.io/docs/tasks/access-kubernetes-api/http-proxy-access-api/) For example: @@ -339,3 +339,8 @@ The [Verb Resources](https://github.com/kubernetes/community/blob/master/contrib section provide the more details on the API conventions that are used in the Kubernetes API. It may also be useful to look at the [API patterns for standard Kubernetes resources](https://v1-10.docs.kubernetes.io/docs/reference/generated/kubernetes-api/v1.10/#-strong-write-operations-strong--54). + + +## Next Steps + +Learn how to interact with Agones programmatically through the API while creating an [Allocator Service](./create_allocator_service.md). diff --git a/docs/create_allocator_service.md b/docs/create_allocator_service.md new file mode 100644 index 0000000000..64de616686 --- /dev/null +++ b/docs/create_allocator_service.md @@ -0,0 +1,333 @@ +# Tutorial Create an Allocator Service + +This tutorial describes how to interact programmatically with the [Agones API](https://godoc.org/agones.dev/agones/pkg/client/clientset/versioned/typed/stable/v1alpha1). To do this, we will implement a [Service](https://kubernetes.io/docs/concepts/services-networking/service/) which allocates a Game Server on demand by calling the Create() method of the FleetAllocationInterface. After creating the fleet allocation, we will return the JSON encoded GameServerStatus of the allocated GameServer. + +The type of service we will be learning about could be used by a game client to connect directly to a dedicated Game Server, as part of a larger system, such as a matchmaker service, or in conjunction with a database of level transition data. We will be using the service as a vehicle with which to execute the API calls found in our main.go file. + +## Objectives +- Create a secure allocator service +- Deploy the service to [GKE](https://cloud.google.com/kubernetes-engine/) +- Allocate a Game Server from a Fleet using the Agones API + +## Prerequisites +1. [Docker](https://www.docker.com/get-started/) +2. Agones installed on GKE, running a simple-udp fleet +3. kubectl properly configured +4. A local copy of the [allocator service](https://github.com/GoogleCloudPlatform/agones/tree/master/examples/allocator-service) +5. A repository for Docker images, such as [Docker Hub](https://hub.docker.com/) or [GC Container Registry](https://cloud.google.com/container-registry/) + + +>NOTE: Agones requires Kubernetes versions 1.9 with role-based access controls (RBAC) and MutatingAdmissionWebhook features activated. To check your version, enter `kubectl version`. + +To install on GKE, follow the install instructions (if you haven't already) at +[Setting up a Google Kubernetes Engine (GKE) cluster](../install/README.md#setting-up-a-google-kubernetes-engine-gke-cluster). Also complete the "Enabling creation of RBAC resources" and "Installing Agones" sets of instructions on the same page. + +While not required, you may wish to review the [Create a Game Server](./create_gameserver.md), [Create Game Server Fleet](./create_fleet.md), and/or [Edit a Game Server](./edit_first_game_server.md) quickstarts. + + +### 1. Build and Push the Service +Change directories to your local agones/examples/allocator-service directory and build a new docker image. The multi-stage Dockerfile will pull down all the dependencies for you and build the executable. For example, where USER is your username, REPO is your repository, and TAG is your tag: +``` +docker build -t [USER]/allocator-service:[TAG] . +``` + +Push it to your repository: +``` +docker push [USER]/allocator-service:[TAG] +``` + +Edit allocator-service.yaml to point to your new image: +``` +containers: +- name: fleet-allocator + image: [REPO]/[USER]/allocator-service:[TAG] + imagePullPolicy: Always +``` + + +### 2. Create Firewall Rules + +Let's making some [firewall](https://kubernetes.io/docs/tasks/access-application-cluster/configure-cloud-provider-firewall/) rules that will be used by kubernetes health checks and the ingress which we will create shortly. + +First, we will make one for the HTTPS health checks that will be sent to our service by running: +``` +gcloud compute firewall-rules create fleet-allocator-healthcheck \ + --allow tcp \ + --source-ranges 130.211.0.0/22,35.191.0.0/16 \ + --target-tags fleet-allocator \ + --description "Firewall to allow health check of fleet allocator service" +``` + +The output should be something like: +``` +Creating firewall...done. +NAME NETWORK DIRECTION PRIORITY ALLOW DENY DISABLED +fleet-allocator-healthcheck default INGRESS 1000 tcp False +``` + +Create a firewall rule for nodePort traffic on the range of ports used by Ingress services of type nodePort. We are using nobdePort becuase it supports TLS. +``` +gcloud compute firewall-rules create nodeport-rule \ + --allow=tcp:30000-32767 \ + --target-tags fleet-allocator \ + --description "Firewall to allow nodePort traffic of fleet allocator service" +``` + +The output should be something like: +``` +Creating firewall...done. +NAME NETWORK DIRECTION PRIORITY ALLOW DENY DISABLED +nodeport-rule default INGRESS 1000 tcp:30000-32767 False +``` + + +### 3. Make It Secure +Let's keep security in mind from the beginning by creating a certificate, key and secret for the allocator service, and another set for the web server. + +Pick a more permanent location for the files if you like - /tmp may be purged depending on your operating system. + +Create a public private key pair for the allocator service: +``` +openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout /tmp/allocsvc.key -out /tmp/allocsvc.crt -subj "/CN=my-allocator/O=my-allocator" +``` + +The output should be something like: +``` +Generating a 2048 bit RSA private key +....................................................+++ +......................+++ +writing new private key to '/tmp/allocsvc.key' +----- +``` + +Create a public private key pair that will be bound to the pod and used by the web server : +``` +openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout /tmp/tls.key -out /tmp/tls.crt -subj "/CN=my-allocator-w3/O=my-allocator-w3" +``` + +The output should be something like: +``` +Generating a 2048 bit RSA private key +....................................................+++ +......................+++ +writing new private key to '/tmp/tls.key' +----- +``` + + +### 4. Create Kubernetes Secrets + +The allocatorsecret will allow the service to use TLS for connections with workers. + +Create the [secret](https://kubernetes.io/docs/concepts/configuration/secret/) by running this command: +``` +kubectl create secret tls allocatorsecret --cert=/tmp/allocsvc.crt --key=/tmp/allocsvc.key +``` + +The output should be something like: +``` +secret "allocatorsecret" created +``` + +The allocatorw3secret will let data be served by the webserver over https. + +Create the secret by running this command: +``` +kubectl create secret tls allocatorw3secret --cert=/tmp/tls.crt --key=/tmp/tls.key +``` +The output should be something like: +``` +secret "allocatorw3secret" created +``` + +See that the secrets exist by running: +``` +kubectl get secrets +``` + +The output should contain the secrets: +``` +NAME TYPE DATA AGE +... +allocatorsecret kubernetes.io/tls 2 29s +allocatorw3secret kubernetes.io/tls 2 15s +... +``` + + +### 5. Create the Service Account +This service will interact with Agones via the Agones API by using a [service account](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/) named fleet-allocator. Specifically, the fleet-allocator service account is granted permissions to perform create operations against FleetAllocation objects, and get operations against Fleet objects. + +Create the service account by changing directories to your local agones/examples/allocator-service directory and running this command: +``` +kubectl create -f service-account.yaml +``` + +The output should look like this: +``` +role "fleet-allocator" created +serviceaccount "fleet-allocator" created +rolebinding "fleet-allocator" created +``` + + +### 6. Define and Deploy the Service +The service definition defines a [nodePort](https://kubernetes.io/docs/concepts/services-networking/service/#nodeport) service which uses https, and sets up ports and names. The deployment describes the number of replicas we would like, which account to use, which image to use, and defines a health check. + +Define and Deploy the service by running this command: +``` +kubectl create -f allocator-service.yaml +``` + +The output should look like this: +``` +service "fleet-allocator-backend" created +deployment "fleet-allocator" created +``` + + +### 7. Deploy the Ingress Resource +This [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) directs traffic to the allocator service using an ephemeral IP address. The allocator service pod needs to exist and the readiness probe should be passing health checks before the ingress is created. + +Deploy the Ingress with this command: +``` +kubectl apply -f allocator-ingress.yaml +``` + +The output should look like this: +``` +ingress "fleet-allocator-ingress" created +``` + + +### 8. Retrieve the Ephemeral Public IP Address +After deployment, it will take about a minute for the IP address to be present, and up to 10 minutes before it can start returning data. + +Run this command to get the IP address: +``` +kubectl get ingress fleet-allocator-ingress +``` + +The output should look something like this: +``` +NAME HOSTS ADDRESS PORTS AGE +fleet-allocator-ingress * 35.186.225.103 80, 443 1m +``` + +To learn more about the status of the ingress, run: +``` +kubectl get ingress fleet-allocator-ingress -o yaml +``` + +When the output shows the ingress.kubernetes.io/backends as 'HEALTHY' rather than 'UNHEALTHY' or 'UNKOWN' it is probably ready. +``` +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + annotations: + ingress.kubernetes.io/backends: '{"k8s-be-30021--7e98a70481f48a13":"HEALTHY"}' + ingress.kubernetes.io/https-forwarding-rule: k8s-fws-default-fleet-allocator-ingress--7e98a70481f48a13 + ingress.kubernetes.io/https-target-proxy: k8s-tps-default-fleet-allocator-ingress--7e98a70481f48a13 + ingress.kubernetes.io/ssl-cert: k8s-ssl-1ab99915a1f6b5f1-b2a9924cee73d20a--7e98a70481f48a13 + ingress.kubernetes.io/url-map: k8s-um-default-fleet-allocator-ingress--7e98a70481f48a13 + kubectl.kubernetes.io/last-applied-configuration: | + {"apiVersion":"extensions/v1beta1","kind":"Ingress","metadata":{"annotations":{"kubernetes.io/ingress.allow-http":"false","kubernetes.io/ingress.class":"gce"},"labels":{"app":"fleet-allocator"},"name":"fleet-allocator-ingress","namespace":"default"},"spec":{"backend":{"serviceName":"fleet-allocator-backend","servicePort":8000},"tls":[{"secretName":"allocatorsecret"}]}} + kubernetes.io/ingress.allow-http: "false" + kubernetes.io/ingress.class: gce + creationTimestamp: 2018-09-23T19:13:36Z + generation: 1 + labels: + app: fleet-allocator + name: fleet-allocator-ingress + namespace: default + resourceVersion: "4086" + selfLink: /apis/extensions/v1beta1/namespaces/default/ingresses/fleet-allocator-ingress + uid: c5a149b3-bf64-11e8-8a6e-42010a8e013f +spec: + backend: + serviceName: fleet-allocator-backend + servicePort: 8000 + tls: + - secretName: allocatorsecret +status: + loadBalancer: + ingress: + - ip: 35.186.225.103 +``` + +### 9. Check Game Servers +Let's make sure that we have one or more Game Servers in a ready state by running this command: +``` +kubectl get gs -o=custom-columns=NAME:.metadata.name,STATUS:.status.state,IP:.status.address,PORT:.status.ports +``` + +For a fleet of 2 replicas, you should see 2 Game Servers with a Status of Ready: +``` +NAME STATUS IP PORT +simple-udp-s2snf-765bc Ready 35.231.204.26 [map[name:default port:7260]] +simple-udp-s2snf-vf6l8 Ready 35.196.162.169 [map[name:default port:7591]] +``` + +If there is no fleet, please review [Create Game Server Fleet](../docs/create_fleet.md). + + +### 10. Allocate a Game Server +Now that the ingress has been created, let's allocate a Game Server by passing in our user and key to the /address endpoint. This will call the allocate() function in main.go, which will return a JSON string of the GameServerStatus of an allocated GameServer, or an error. The service uses Basic Auth to provide some security as to who can allocate GameServer resources, and the generated key is in main.go, in the function basicAuth(). Read the comments and code in main.go for a more detailed explanation of each function and method. + +Allocate a Game Server by running this command: +``` +curl -k -u v1GameClientKey:EAEC945C371B2EC361DE399C2F11E https://[the.ip.address]/address +``` + +The output should show the JSON of the GameServerStatus, similar to this: +``` +{"status":{"state":"Allocated","ports":[{"name":"default","port":7260}],"address":"35.231.204.26","nodeName":"gke-agones-simple-udp-cluste-default-pool-e03a9bde-000f"}} +``` + +You may need to wait a few moments longer if the output has ssl errors like this: +``` +curl: (35) error:14094410:SSL routines:ssl3_read_bytes:sslv3 alert handshake failure +``` + +Check the Game Servers again, and notice the Allocated Status. You should see something like this: +``` +NAME STATUS IP PORT +simple-udp-s2snf-765bc Allocated 35.231.204.26 [map[name:default port:7260]] +simple-udp-s2snf-vf6l8 Ready 35.196.162.169 [map[name:default port:7591]] +``` + +Congratulations, your call to the API has allocated a Game Server from your simple-udp Fleet! + + +### 11. Cleanup +You can delete the allocator service and associated resources with the following commands. + +Delete the Ingress +``` +kubectl delete ingress fleet-allocator-ingress +``` + +Delete the Service +``` +kubectl delete -f allocator-service.yaml +``` + +Delete the Service Account +``` +kubectl delete -f service-account.yaml +``` + +Delete the health check and firewall rules +``` +gcloud compute health-checks delete fleet-allocator-healthcheck +gcloud compute firewall-rules delete fleet-allocator-healthcheck +gcloud compute health-checks delete nodeport-rule +gcloud compute firewall-rules delete nodeport-rule +``` + + +### Next Steps +- Customize the service by changing the constants and service key in main.go +- Make the [IP Address Permanent](https://cloud.google.com/kubernetes-engine/docs/tutorials/configuring-domain-name-static-ip) +- Create an A record that points to your permanent IP address +- [Create a Fleet Autoscaler](./create_fleetautoscaler.md) diff --git a/docs/create_fleet.md b/docs/create_fleet.md index 615d096ad5..d879473ffb 100644 --- a/docs/create_fleet.md +++ b/docs/create_fleet.md @@ -1,6 +1,6 @@ # Quickstart Create a Game Server Fleet -This guide covers how you can quickly get started using Agones to create a Fleet +This guide covers how you can quickly get started using Agones to create a Fleet of warm GameServers ready for you to allocate out of and play on! ## Prerequisites @@ -44,7 +44,7 @@ fleet "simple-udp" created This has created a Fleet record inside Kubernetes, which in turn creates two warm [GameServers](gameserver_spec.md) to be available to being allocated for usage for a game session. - + ``` kubectl get fleet ``` @@ -55,7 +55,7 @@ NAME AGE simple-udp 5m ``` -You can also see the GameServers that have been created by the Fleet by running `kubectl get gameservers`, +You can also see the GameServers that have been created by the Fleet by running `kubectl get gameservers`, the GameServer will be prefixed by `simple-udp`. ``` @@ -289,7 +289,7 @@ grow and shrink. ### 6. Connect to the GameServer Since we've only got one allocation, we'll just grab the details of the IP and port of the -only allocated `GameServer`: +only allocated `GameServer`: ``` kubectl get $(kubectl get fleetallocation -o name) -o jsonpath='{.status.GameServer.staatus.GameServer.status.ports[0].port}' @@ -312,7 +312,7 @@ If you run `kubectl describe gs | grep State` again - either the GameServer will , or it will be in `Shutdown` state, on the way to being deleted. Since we are running a `Fleet`, Agones will always do it's best to ensure there are always the configured number -of `GameServers` in the pool in either a `Ready` or `Allocated` state. +of `GameServers` in the pool in either a `Ready` or `Allocated` state. ### 7. Deploy a new version of the GameServer on the Fleet @@ -327,7 +327,7 @@ Let's also allocate ourselves a `GameServer` kubectl create -f https://raw.githubusercontent.com/GoogleCloudPlatform/agones/master/examples/simple-udp/fleetallocation.yaml -o yaml ``` -We should now have four `Ready` `GameServers` and one `Allocated`. +We should now have four `Ready` `GameServers` and one `Allocated`. We can check this by running `kubectl get gs -o=custom-columns=NAME:.metadata.name,STATUS:.status.state,IP:.status.address,PORT:.status.ports`. @@ -349,7 +349,7 @@ with a Container Port of `6000`. > NOTE: This will make it such that you can no longer connect to the simple-udp game server. -Run `watch kubectl get gs -o=custom-columns=NAME:.metadata.name,STATUS:.status.state,CONTAINERPORT:.spec.ports[0].containerPort` +Run `watch kubectl get gs -o=custom-columns=NAME:.metadata.name,STATUS:.status.state,CONTAINERPORT:.spec.ports[0].containerPort` until you can see that there are one of `7654`, which is the `Allocated` `GameServer`, and four instances to `6000` which is the new configuration. @@ -358,7 +358,9 @@ You have now deployed a new version of your game! ## Next Steps -You can now create a fleet autoscaler to automatically resize your fleet based on the actual usage. +You can now create a fleet autoscaler to automatically resize your fleet based on the actual usage. See [Create a Fleet Autoscaler](./create_fleetautoscaler.md). Or if you want to try to use your own GameServer container make sure you have properly integrated the [Agones SDK](../sdks/). + +If you would like to learn how to programmatically allocate a Game Server from the fleet using the Agones API, see the [Allocator Service](./create_allocator_service.md) tutorial. diff --git a/docs/edit_first_game_server.md b/docs/edit_first_game_server.md index e4dc8ee211..84df7a715d 100644 --- a/docs/edit_first_game_server.md +++ b/docs/edit_first_game_server.md @@ -1,11 +1,12 @@ -# Getting Started +# Quickstart Edit a Game Server The following guide is for developers without Docker or Kubernetes experience, that want to use the simple-udp example as a starting point for a custom game server. This guide addresses Google Kubernetes Engine and Minikube. We would welcome a Pull Request to expand this to include other platforms as well. ## Prerequisites -1. Downland and install Golang from https://golang.org/dl/. -2. Install Docker from https://www.docker.com/get-docker. -3. Install Agones on GKE or Minikube. +1. A [Go](https://golang.org/dl/) environment +2. [Docker](https://www.docker.com/get-started/) +3. Agones installed on GKE or Minikube +4. kubectl properly configured To install on GKE, follow the install instructions (if you haven't already) at [Setting up a Google Kubernetes Engine (GKE) cluster](../install/README.md#setting-up-a-google-kubernetes-engine-gke-cluster). Also complete the "Enabling creation of RBAC resources" and "Installing Agones" sets of instructions on the same page. @@ -14,7 +15,7 @@ To install locally on Minikube, read [Setting up a Minikube cluster](../install/ ## Modify the code and push another new image -### Modify the simple-udp example source source code +### Modify the simple-udp example source code Modify the main.go file. For example: Change main.go line 92: diff --git a/examples/allocator-service/README.md b/examples/allocator-service/README.md new file mode 100644 index 0000000000..2be0823a9b --- /dev/null +++ b/examples/allocator-service/README.md @@ -0,0 +1,10 @@ +# Simple Allocator Service + +This service provides an example of using the [Agones API](https://godoc.org/agones.dev/agones/pkg/client/clientset/versioned/typed/stable/v1alpha1) to allocate a GameServer from a Fleet, and is used in the [Create an Allocator Service (Go)](../../docs/create_allocator_service.md) tutorial. + +## Allocator Service +The service exposes an endpoint which allows client calls to FleetAllocationInterface.Create() over a secure connection. It also provides examples of how to create a service account with the least necessary privileges, how to create an Ingress, and how services can use secrets specific to their respective accounts. + +When the endpoint is called and a GameServer is allocated, it returns the JSON encoded GameServerStatus of the freshly allocated GameServer. + +To learn how to deploy this allocator service to GKE, please see the tutorial [Create an Allocator Service (Go)](../../docs/create_allocator_service.md). diff --git a/examples/allocator-service/allocator-ingress.yaml b/examples/allocator-service/allocator-ingress.yaml new file mode 100644 index 0000000000..8dfc495ff9 --- /dev/null +++ b/examples/allocator-service/allocator-ingress.yaml @@ -0,0 +1,18 @@ +# Create a single service Ingress resource +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: fleet-allocator-ingress + labels: + app: fleet-allocator + namespace: default + annotations: + kubernetes.io/ingress.class: "gce" + kubernetes.io/ingress.allow-http: "false" + #kubernetes.io/ingress.global-static-ip-name: "allocator-static-ip" +spec: + tls: + - secretName: allocatorsecret + backend: + serviceName: fleet-allocator-backend + servicePort: 8000 diff --git a/examples/allocator-service/allocator-service.yaml b/examples/allocator-service/allocator-service.yaml new file mode 100644 index 0000000000..23cc3a5159 --- /dev/null +++ b/examples/allocator-service/allocator-service.yaml @@ -0,0 +1,61 @@ +# Define a Service for the fleet-allocator +apiVersion: v1 +kind: Service +metadata: + name: fleet-allocator-backend + annotations: + service.alpha.kubernetes.io/app-protocols: '{"https":"HTTPS"}' + labels: + app: fleet-allocator +spec: + type: NodePort + selector: + app: fleet-allocator + ports: + - port: 8000 + protocol: TCP + name: https + targetPort: fleet-allocator # retrieve port from deployment config + +--- +# Deploy a pod to run the fleet-allocator code +apiVersion: apps/v1 +kind: Deployment +metadata: + name: fleet-allocator + namespace: default + labels: + app: fleet-allocator +spec: + replicas: 1 + selector: + matchLabels: + app: fleet-allocator + template: + metadata: + labels: + app: fleet-allocator + spec: + serviceAccount: fleet-allocator + volumes: + - name: secret-volume + secret: + secretName: allocatorw3secret + containers: + - name: fleet-allocator + image: [REPO]/[USER]/allocator-service:[TAG] + #image: index.docker.io/exampleuser/allocator-service:simple-udp-5 + imagePullPolicy: Always + ports: + - name: fleet-allocator + containerPort: 8000 + livenessProbe: + httpGet: + scheme: HTTPS + path: /healthz + port: 8000 + initialDelaySeconds: 3 + periodSeconds: 5 + volumeMounts: + - mountPath: /home/service/certs + name: secret-volume diff --git a/examples/allocator-service/dockerfile b/examples/allocator-service/dockerfile new file mode 100644 index 0000000000..2209bdc68a --- /dev/null +++ b/examples/allocator-service/dockerfile @@ -0,0 +1,24 @@ +# Gather dependencies and build the executable +FROM golang:1.10.3 as builder + +WORKDIR /go/src/agones.dev +RUN git clone https://github.com/GoogleCloudPlatform/agones.git + +WORKDIR /go/src/agones.dev/agones/examples/allocator-service +ADD ./main.go . +RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o service . + + +# Create the final image that will run the allocator service +FROM alpine:3.8 +RUN apk add --update ca-certificates +RUN adduser -D service + +COPY --from=builder /go/src/agones.dev/agones/examples/allocator-service \ + /home/service + +RUN chown -R service /home/service && \ + chmod o+x /home/service/service + +USER service +ENTRYPOINT /home/service/service diff --git a/examples/allocator-service/main.go b/examples/allocator-service/main.go new file mode 100644 index 0000000000..2a53a00772 --- /dev/null +++ b/examples/allocator-service/main.go @@ -0,0 +1,184 @@ +package main + +import ( + "encoding/json" + "errors" + "io" + "net/http" + + "agones.dev/agones/pkg/apis/stable/v1alpha1" + "agones.dev/agones/pkg/client/clientset/versioned" + "agones.dev/agones/pkg/util/runtime" // for the logger + "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/rest" +) + +// Constants which define the fleet and namespace we are using +const namespace = "default" +const fleetname = "simple-udp" +const generatename = "simple-udp-" + +// Variables for the logger and Agones Clientset +var ( + logger = runtime.NewLoggerWithSource("main") + agonesClient = getAgonesClient() +) + +// A handler for the web server +type handler func(w http.ResponseWriter, r *http.Request) + +// The structure of the json response +type result struct { + Status v1alpha1.GameServerStatus `json:"status"` +} + +// Main will set up an http server and three endpoints +func main() { + // Serve 200 status on / for k8s health checks + http.HandleFunc("/", handleRoot) + + // Serve 200 status on /healthz for k8s health checks + http.HandleFunc("/healthz", handleHealthz) + + // Return the GameServerStatus of the allocated replica to the authorized client + http.HandleFunc("/address", getOnly(basicAuth(handleAddress))) + + // Run the HTTP server using the bound certificate and key for TLS + if err := http.ListenAndServeTLS(":8000", "/home/service/certs/tls.crt", "/home/service/certs/tls.key", nil); err != nil { + logger.WithError(err).Fatal("HTTPS server failed to run") + } else { + logger.Info("HTTPS server is running on port 8000") + } +} + +// Set up our client which we will use to call the API +func getAgonesClient() *versioned.Clientset { + // Create the in-cluster config + config, err := rest.InClusterConfig() + if err != nil { + logger.WithError(err).Fatal("Could not create in cluster config") + } + + // Access to the Agones resources through the Agones Clientset + agonesClient, err := versioned.NewForConfig(config) + if err != nil { + logger.WithError(err).Fatal("Could not create the agones api clientset") + } else { + logger.Info("Created the agones api clientset") + } + return agonesClient +} + +// Limit verbs the web server handles +func getOnly(h handler) handler { + return func(w http.ResponseWriter, r *http.Request) { + if r.Method == "GET" { + h(w, r) + return + } + http.Error(w, "Get Only", http.StatusMethodNotAllowed) + } +} + +// Let the web server do basic authentication +func basicAuth(pass handler) handler { + return func(w http.ResponseWriter, r *http.Request) { + key, value, _ := r.BasicAuth() + if key != "v1GameClientKey" || value != "EAEC945C371B2EC361DE399C2F11E" { + http.Error(w, "authorization failed", http.StatusUnauthorized) + return + } + pass(w, r) + } +} + +// Let / return Healthy and status code 200 +func handleRoot(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, err := io.WriteString(w, "Healthy") + if err != nil { + logger.WithError(err).Fatal("Error writing string Healthy from /") + } +} + +// Let /healthz return Healthy and status code 200 +func handleHealthz(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, err := io.WriteString(w, "Healthy") + if err != nil { + logger.WithError(err).Fatal("Error writing string Healthy from /healthz") + } +} + +// Let /address return the GameServerStatus +func handleAddress(w http.ResponseWriter, r *http.Request) { + status, err := allocate() + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + } + w.Header().Set("Content-Type", "application/json") + result, _ := json.Marshal(&result{status}) + _, err = io.WriteString(w, string(result)) + if err != nil { + logger.WithError(err).Fatal("Error writing json from /address") + } +} + +// Return the number of ready game servers available to this fleet for allocation +func checkReadyReplicas() int32 { + // Get a FleetInterface for this namespace + fleetInterface := agonesClient.StableV1alpha1().Fleets(namespace) + // Get our fleet + fleet, err := fleetInterface.Get(fleetname, v1.GetOptions{}) + if err != nil { + logger.WithError(err).Info("Get fleet failed") + } + + return fleet.Status.ReadyReplicas +} + +// Move a replica from ready to allocated and return the GameServerStatus +func allocate() (v1alpha1.GameServerStatus, error) { + var result v1alpha1.GameServerStatus + + // Log the values used in the fleet allocation + logger.WithField("namespace", namespace).Info("namespace for fa") + logger.WithField("generatename", generatename).Info("generatename for fa") + logger.WithField("fleetname", fleetname).Info("fleetname for fa") + + // Find out how many ready replicas the fleet has - we need at least one + readyReplicas := checkReadyReplicas() + logger.WithField("readyReplicas", readyReplicas).Info("numer of ready replicas") + + // Log and return an error if there are no ready replicas + if readyReplicas < 1 { + logger.WithField("fleetname", fleetname).Info("Insufficient ready replicas, cannot create fleet allocation") + return result, errors.New("Insufficient ready replicas, cannot create fleet allocation") + } + + // Get a FleetAllocationInterface for this namespace + fleetAllocationInterface := agonesClient.StableV1alpha1().FleetAllocations(namespace) + + // Define the fleet allocation using the constants set earlier + fa := &v1alpha1.FleetAllocation{ + ObjectMeta: v1.ObjectMeta{ + GenerateName: generatename, Namespace: namespace, + }, + Spec: v1alpha1.FleetAllocationSpec{FleetName: fleetname}, + } + + // Create a new fleet allocation + newFleetAllocation, err := fleetAllocationInterface.Create(fa) + if err != nil { + // Log and return the error if the call to Create fails + logger.WithError(err).Info("Failed to create fleet allocation") + return result, errors.New("Failed to ceate fleet allocation") + } + + // Log the GameServer.Staus of the new allocation, then return those values + logger.Info("New GameServer allocated: ", newFleetAllocation.Status.GameServer.Status) + result = newFleetAllocation.Status.GameServer.Status + return result, nil +} diff --git a/examples/allocator-service/service-account.yaml b/examples/allocator-service/service-account.yaml new file mode 100644 index 0000000000..b46b228913 --- /dev/null +++ b/examples/allocator-service/service-account.yaml @@ -0,0 +1,45 @@ +# Create a Role in the default namespace that grants access to the agones api +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: fleet-allocator + namespace: default + labels: + app: fleet-allocator +rules: +- apiGroups: [""] + resources: ["events"] + verbs: ["create"] +- apiGroups: ["stable.agones.dev"] + resources: ["fleetallocations"] + verbs: ["create"] +- apiGroups: ["stable.agones.dev"] + resources: [fleets, fleet"] + verbs: ["get"] + +--- +# Create a ServiceAccount that will be bound to the above role +apiVersion: v1 +kind: ServiceAccount +metadata: + name: fleet-allocator + namespace: default + labels: + app: fleet-allocator + +--- +# Bind the fleet-allocator ServiceAccount to the fleet-allocator Role +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: fleet-allocator + namespace: default + labels: + app: fleet-allocator +subjects: +- kind: ServiceAccount + name: fleet-allocator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: fleet-allocator From cd0c726c966db7b537ec1407bf8d239da98ddb95 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Mon, 8 Oct 2018 15:14:21 -0700 Subject: [PATCH 06/25] Make WaitForFleetCondition take up to 5 minutes Should fix flakiness in autoscaler tests. --- test/e2e/framework/framework.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index 94233f1eb2..d4df574eb7 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -21,13 +21,12 @@ import ( "net" "time" - "k8s.io/apimachinery/pkg/labels" - "agones.dev/agones/pkg/apis/stable/v1alpha1" "agones.dev/agones/pkg/client/clientset/versioned" "github.com/pkg/errors" "github.com/sirupsen/logrus" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" @@ -119,7 +118,7 @@ func (f *Framework) WaitForGameServerState(gs *v1alpha1.GameServer, state v1alph // WaitForFleetCondition waits for the Fleet to be in a specific condition func (f *Framework) WaitForFleetCondition(flt *v1alpha1.Fleet, condition func(fleet *v1alpha1.Fleet) bool) error { - err := wait.PollImmediate(2*time.Second, 120*time.Second, func() (bool, error) { + err := wait.PollImmediate(2*time.Second, 5*time.Minute, func() (bool, error) { fleet, err := f.AgonesClient.StableV1alpha1().Fleets(flt.ObjectMeta.Namespace).Get(flt.ObjectMeta.Name, metav1.GetOptions{}) if err != nil { return true, err From 0675b2364ebbb5bd06d2898356047608be294c87 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Mon, 8 Oct 2018 15:36:57 -0700 Subject: [PATCH 07/25] Release 0.5.0.rc changes Includes an updated Changelog generator, to fix previous issues. --- CHANGELOG.md | 102 +++++++++++++++++++++++++++++---- build/Makefile | 10 ++-- docs/create_fleetautoscaler.md | 2 + 3 files changed, 98 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd1a73821f..d8454b3d1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,65 @@ -# Change Log +# Changelog + +## [v0.5.0.rc](https://github.com/GoogleCloudPlatform/agones/tree/v0.5.0.rc) (2018-10-09) + +[Full Changelog](https://github.com/GoogleCloudPlatform/agones/compare/v0.4.0...v0.5.0.rc) + +**Implemented enhancements:** + +- GameServer Safe Manual Cluster Node Scaling [\#365](https://github.com/GoogleCloudPlatform/agones/issues/365) +- Improve support for developing in custom environments [\#348](https://github.com/GoogleCloudPlatform/agones/issues/348) +- Agones helm repo [\#285](https://github.com/GoogleCloudPlatform/agones/issues/285) +- Add Amazon EKS Agones Setup Instructions [\#372](https://github.com/GoogleCloudPlatform/agones/pull/372) ([GabeBigBoxVR](https://github.com/GabeBigBoxVR)) +- Agones stable helm repository [\#361](https://github.com/GoogleCloudPlatform/agones/pull/361) ([Kuqd](https://github.com/Kuqd)) +- Improve support for custom dev environments [\#349](https://github.com/GoogleCloudPlatform/agones/pull/349) ([victor-prodan](https://github.com/victor-prodan)) +- FleetAutoScaler v0 [\#340](https://github.com/GoogleCloudPlatform/agones/pull/340) ([victor-prodan](https://github.com/victor-prodan)) +- Forces restart when using tls generation. [\#338](https://github.com/GoogleCloudPlatform/agones/pull/338) ([Kuqd](https://github.com/Kuqd)) + +**Fixed bugs:** + +- Fix loophole in game server initialization [\#354](https://github.com/GoogleCloudPlatform/agones/issues/354) +- Health messages logged with wrong severity [\#335](https://github.com/GoogleCloudPlatform/agones/issues/335) +- Helm upgrade and SSL certificates [\#309](https://github.com/GoogleCloudPlatform/agones/issues/309) +- Fix for race condition: Allocation of Deleting GameServers Possible [\#367](https://github.com/GoogleCloudPlatform/agones/pull/367) ([markmandel](https://github.com/markmandel)) +- Map level to severity for stackdriver [\#363](https://github.com/GoogleCloudPlatform/agones/pull/363) ([Kuqd](https://github.com/Kuqd)) +- Add ReadTimeout for e2e tests, otherwise this can hang forever. [\#359](https://github.com/GoogleCloudPlatform/agones/pull/359) ([markmandel](https://github.com/markmandel)) +- Fixes race condition bug with Pod not being scheduled before Ready\(\) [\#357](https://github.com/GoogleCloudPlatform/agones/pull/357) ([markmandel](https://github.com/markmandel)) +- Allocation is broken when using the generated go client [\#347](https://github.com/GoogleCloudPlatform/agones/pull/347) ([markmandel](https://github.com/markmandel)) + +**Security fixes:** + +- \[Vuln\] Update to Alpine 3.8.1 [\#355](https://github.com/GoogleCloudPlatform/agones/issues/355) +- Update Alpine version to 3.8.1 [\#364](https://github.com/GoogleCloudPlatform/agones/pull/364) ([fooock](https://github.com/fooock)) + +**Closed issues:** + +- C++ SDK no destructor body [\#366](https://github.com/GoogleCloudPlatform/agones/issues/366) +- Release 0.4.0 [\#341](https://github.com/GoogleCloudPlatform/agones/issues/341) +- Update "Developing, Testing and Building Agones" tutorial with how to push updates to your test cluster [\#308](https://github.com/GoogleCloudPlatform/agones/issues/308) +- Use revive instead of gometalinter [\#237](https://github.com/GoogleCloudPlatform/agones/issues/237) +- Integrate a spell and/or grammar check into build system [\#187](https://github.com/GoogleCloudPlatform/agones/issues/187) +- Helm package CI [\#153](https://github.com/GoogleCloudPlatform/agones/issues/153) +- Use functional parameters in Controller creation [\#104](https://github.com/GoogleCloudPlatform/agones/issues/104) + +**Merged pull requests:** + +- Make WaitForFleetCondition take up to 5 minutes [\#377](https://github.com/GoogleCloudPlatform/agones/pull/377) ([markmandel](https://github.com/markmandel)) +- Fix for flaky test TestControllerAddress [\#376](https://github.com/GoogleCloudPlatform/agones/pull/376) ([markmandel](https://github.com/markmandel)) +- Fix typo [\#374](https://github.com/GoogleCloudPlatform/agones/pull/374) ([Maxpain177](https://github.com/Maxpain177)) +- Update instructions for Minikube 0.29.0 [\#373](https://github.com/GoogleCloudPlatform/agones/pull/373) ([markmandel](https://github.com/markmandel)) +- Update README.md [\#371](https://github.com/GoogleCloudPlatform/agones/pull/371) ([mohammedfakhar](https://github.com/mohammedfakhar)) +- Remove c++ sdk destructor causing linker errors [\#369](https://github.com/GoogleCloudPlatform/agones/pull/369) ([nikibobi](https://github.com/nikibobi)) +- Update README.md [\#362](https://github.com/GoogleCloudPlatform/agones/pull/362) ([mohammedfakhar](https://github.com/mohammedfakhar)) +- Upgrade GKE version and increase test cluster size [\#360](https://github.com/GoogleCloudPlatform/agones/pull/360) ([markmandel](https://github.com/markmandel)) +- Fix typo in sdk readme which said only two sdks [\#356](https://github.com/GoogleCloudPlatform/agones/pull/356) ([ReDucTor](https://github.com/ReDucTor)) +- Add allocator service example and documentation [\#353](https://github.com/GoogleCloudPlatform/agones/pull/353) ([slartibaartfast](https://github.com/slartibaartfast)) +- Adding goimports back into the build shell. [\#352](https://github.com/GoogleCloudPlatform/agones/pull/352) ([markmandel](https://github.com/markmandel)) +- e2e tests for Fleet Scaling and Updates [\#351](https://github.com/GoogleCloudPlatform/agones/pull/351) ([markmandel](https://github.com/markmandel)) +- Switch to golangci-lint [\#346](https://github.com/GoogleCloudPlatform/agones/pull/346) ([Kuqd](https://github.com/Kuqd)) +- Prepare for next release - 0.5.0.rc [\#343](https://github.com/GoogleCloudPlatform/agones/pull/343) ([markmandel](https://github.com/markmandel)) ## [v0.4.0](https://github.com/GoogleCloudPlatform/agones/tree/v0.4.0) (2018-09-04) + [Full Changelog](https://github.com/GoogleCloudPlatform/agones/compare/v0.4.0.rc...v0.4.0) **Closed issues:** @@ -9,11 +68,13 @@ **Merged pull requests:** +- Release 0.4.0 [\#342](https://github.com/GoogleCloudPlatform/agones/pull/342) ([markmandel](https://github.com/markmandel)) - Fix yaml file paths [\#339](https://github.com/GoogleCloudPlatform/agones/pull/339) ([oskoi](https://github.com/oskoi)) - Add Troubleshooting section to Build doc [\#337](https://github.com/GoogleCloudPlatform/agones/pull/337) ([victor-prodan](https://github.com/victor-prodan)) - Preparing for 0.4.0 release next week. [\#333](https://github.com/GoogleCloudPlatform/agones/pull/333) ([markmandel](https://github.com/markmandel)) ## [v0.4.0.rc](https://github.com/GoogleCloudPlatform/agones/tree/v0.4.0.rc) (2018-08-28) + [Full Changelog](https://github.com/GoogleCloudPlatform/agones/compare/v0.3.0...v0.4.0.rc) **Implemented enhancements:** @@ -25,9 +86,9 @@ - Ability to pass `GameServer` yaml/json to local sdk server [\#328](https://github.com/GoogleCloudPlatform/agones/pull/328) ([markmandel](https://github.com/markmandel)) - Move Status \> Address & Ports population to `Creating` state processing [\#326](https://github.com/GoogleCloudPlatform/agones/pull/326) ([markmandel](https://github.com/markmandel)) - Implement SDK SetLabel and SetAnnotation functionality [\#323](https://github.com/GoogleCloudPlatform/agones/pull/323) ([markmandel](https://github.com/markmandel)) -- Metadata propagation from fleet allocation to game server [\#312](https://github.com/GoogleCloudPlatform/agones/pull/312) ([victor-prodan](https://github.com/victor-prodan)) -- Features/e2e [\#315](https://github.com/GoogleCloudPlatform/agones/pull/315) ([Kuqd](https://github.com/Kuqd)) - Implements SDK callback for GameServer updates [\#316](https://github.com/GoogleCloudPlatform/agones/pull/316) ([markmandel](https://github.com/markmandel)) +- Features/e2e [\#315](https://github.com/GoogleCloudPlatform/agones/pull/315) ([Kuqd](https://github.com/Kuqd)) +- Metadata propagation from fleet allocation to game server [\#312](https://github.com/GoogleCloudPlatform/agones/pull/312) ([victor-prodan](https://github.com/victor-prodan)) **Fixed bugs:** @@ -57,6 +118,7 @@ - Update to move from release to the next version \(0.4.0.rc\) [\#306](https://github.com/GoogleCloudPlatform/agones/pull/306) ([markmandel](https://github.com/markmandel)) ## [v0.3.0](https://github.com/GoogleCloudPlatform/agones/tree/v0.3.0) (2018-07-26) + [Full Changelog](https://github.com/GoogleCloudPlatform/agones/compare/v0.3.0.rc...v0.3.0) **Fixed bugs:** @@ -73,15 +135,19 @@ - Move back to 0.3.0 [\#292](https://github.com/GoogleCloudPlatform/agones/pull/292) ([markmandel](https://github.com/markmandel)) ## [v0.3.0.rc](https://github.com/GoogleCloudPlatform/agones/tree/v0.3.0.rc) (2018-07-17) + [Full Changelog](https://github.com/GoogleCloudPlatform/agones/compare/v0.2.0...v0.3.0.rc) +**Breaking changes:** + +- \[Breaking Change\] Multiple port support for `GameServer` [\#283](https://github.com/GoogleCloudPlatform/agones/pull/283) ([markmandel](https://github.com/markmandel)) + **Implemented enhancements:** - Expose SDK Sidecar GRPC Server as HTTP+JSON [\#240](https://github.com/GoogleCloudPlatform/agones/issues/240) - supporting multiple ports [\#151](https://github.com/GoogleCloudPlatform/agones/issues/151) - Support Cluster Node addition/deletion [\#60](https://github.com/GoogleCloudPlatform/agones/issues/60) - SDK `GameServer\(\)` function for retrieving backing GameServer configuration [\#288](https://github.com/GoogleCloudPlatform/agones/pull/288) ([markmandel](https://github.com/markmandel)) -- \[Breaking Change\] Multiple port support for `GameServer` [\#283](https://github.com/GoogleCloudPlatform/agones/pull/283) ([markmandel](https://github.com/markmandel)) - Move cluster node addition/removal out of "experimental" [\#271](https://github.com/GoogleCloudPlatform/agones/pull/271) ([markmandel](https://github.com/markmandel)) - added information about Agones running on Azure Kubernetes Service [\#269](https://github.com/GoogleCloudPlatform/agones/pull/269) ([dgkanatsios](https://github.com/dgkanatsios)) - Expose SDK-Server at HTTP+JSON [\#265](https://github.com/GoogleCloudPlatform/agones/pull/265) ([markmandel](https://github.com/markmandel)) @@ -89,6 +155,7 @@ **Fixed bugs:** +- Error running make install with GKE [\#258](https://github.com/GoogleCloudPlatform/agones/issues/258) - Minikube does not start with 0.26.x [\#192](https://github.com/GoogleCloudPlatform/agones/issues/192) - Forgot to update the k8s client-go codegen. [\#281](https://github.com/GoogleCloudPlatform/agones/pull/281) ([markmandel](https://github.com/markmandel)) - Fix bug with hung GameServer resource on Kubernetes 1.10 [\#278](https://github.com/GoogleCloudPlatform/agones/pull/278) ([markmandel](https://github.com/markmandel)) @@ -97,10 +164,12 @@ **Closed issues:** - Agones on Azure AKS [\#254](https://github.com/GoogleCloudPlatform/agones/issues/254) +- Release v0.2.0 [\#242](https://github.com/GoogleCloudPlatform/agones/issues/242) - helm namespace [\#212](https://github.com/GoogleCloudPlatform/agones/issues/212) **Merged pull requests:** +- Release 0.3.0.rc [\#291](https://github.com/GoogleCloudPlatform/agones/pull/291) ([markmandel](https://github.com/markmandel)) - Update README.md with information about Public IPs on AKS [\#289](https://github.com/GoogleCloudPlatform/agones/pull/289) ([dgkanatsios](https://github.com/dgkanatsios)) - fix yaml install link [\#286](https://github.com/GoogleCloudPlatform/agones/pull/286) ([nikibobi](https://github.com/nikibobi)) - install.yaml now installs by default in agones-system [\#284](https://github.com/GoogleCloudPlatform/agones/pull/284) ([Kuqd](https://github.com/Kuqd)) @@ -126,25 +195,27 @@ - use the helm --namespace convention [\#250](https://github.com/GoogleCloudPlatform/agones/pull/250) ([Kuqd](https://github.com/Kuqd)) - fix podspec template broken link to documentation [\#247](https://github.com/GoogleCloudPlatform/agones/pull/247) ([Kuqd](https://github.com/Kuqd)) - Make Cloud Builder Faster [\#245](https://github.com/GoogleCloudPlatform/agones/pull/245) ([markmandel](https://github.com/markmandel)) +- Increment base version [\#244](https://github.com/GoogleCloudPlatform/agones/pull/244) ([markmandel](https://github.com/markmandel)) - Lock protoc-gen-go to 1.0 release [\#241](https://github.com/GoogleCloudPlatform/agones/pull/241) ([markmandel](https://github.com/markmandel)) ## [v0.2.0](https://github.com/GoogleCloudPlatform/agones/tree/v0.2.0) (2018-06-06) + [Full Changelog](https://github.com/GoogleCloudPlatform/agones/compare/v0.2.0.rc...v0.2.0) **Closed issues:** -- Release v0.2.0 [\#242](https://github.com/GoogleCloudPlatform/agones/issues/242) - Release v0.2.0.rc [\#231](https://github.com/GoogleCloudPlatform/agones/issues/231) **Merged pull requests:** - Release 0.2.0 [\#243](https://github.com/GoogleCloudPlatform/agones/pull/243) ([markmandel](https://github.com/markmandel)) - Adding my streaming development to contributing [\#239](https://github.com/GoogleCloudPlatform/agones/pull/239) ([markmandel](https://github.com/markmandel)) +- Updates to release process [\#235](https://github.com/GoogleCloudPlatform/agones/pull/235) ([markmandel](https://github.com/markmandel)) - Adding a README.md file for the simple-udp to help developer to get start [\#234](https://github.com/GoogleCloudPlatform/agones/pull/234) ([g-ericso](https://github.com/g-ericso)) - Revert install configuration back to 0.2.0 [\#233](https://github.com/GoogleCloudPlatform/agones/pull/233) ([markmandel](https://github.com/markmandel)) -- Increment base version [\#244](https://github.com/GoogleCloudPlatform/agones/pull/244) ([markmandel](https://github.com/markmandel)) ## [v0.2.0.rc](https://github.com/GoogleCloudPlatform/agones/tree/v0.2.0.rc) (2018-05-30) + [Full Changelog](https://github.com/GoogleCloudPlatform/agones/compare/v0.1...v0.2.0.rc) **Implemented enhancements:** @@ -181,6 +252,10 @@ - Point the install.yaml to the release-0.1 branch [\#189](https://github.com/GoogleCloudPlatform/agones/pull/189) ([markmandel](https://github.com/markmandel)) - Fixed missing links in documentation. [\#166](https://github.com/GoogleCloudPlatform/agones/pull/166) ([fooock](https://github.com/fooock)) +**Security fixes:** + +- RBAC: controller doesn't need fleet create [\#202](https://github.com/GoogleCloudPlatform/agones/pull/202) ([markmandel](https://github.com/markmandel)) + **Closed issues:** - helm RBAC on/off [\#211](https://github.com/GoogleCloudPlatform/agones/issues/211) @@ -194,7 +269,6 @@ **Merged pull requests:** -- Updates to release process [\#235](https://github.com/GoogleCloudPlatform/agones/pull/235) ([markmandel](https://github.com/markmandel)) - Release v0.2.0.rc [\#232](https://github.com/GoogleCloudPlatform/agones/pull/232) ([markmandel](https://github.com/markmandel)) - do-release release registry and upstream push [\#228](https://github.com/GoogleCloudPlatform/agones/pull/228) ([markmandel](https://github.com/markmandel)) - Archive C++ src on build and release [\#227](https://github.com/GoogleCloudPlatform/agones/pull/227) ([markmandel](https://github.com/markmandel)) @@ -212,7 +286,6 @@ - Clearer docs on developing and building from source [\#206](https://github.com/GoogleCloudPlatform/agones/pull/206) ([markmandel](https://github.com/markmandel)) - Add formatting guidelines to CONTRIBUTING.md [\#205](https://github.com/GoogleCloudPlatform/agones/pull/205) ([enocom](https://github.com/enocom)) - Fleet docs: Some missing pieces. [\#204](https://github.com/GoogleCloudPlatform/agones/pull/204) ([markmandel](https://github.com/markmandel)) -- RBAC: controller doesn't need fleet create [\#202](https://github.com/GoogleCloudPlatform/agones/pull/202) ([markmandel](https://github.com/markmandel)) - Release version, and twitter badges. [\#201](https://github.com/GoogleCloudPlatform/agones/pull/201) ([markmandel](https://github.com/markmandel)) - Typo in GameServer json [\#200](https://github.com/GoogleCloudPlatform/agones/pull/200) ([markmandel](https://github.com/markmandel)) - Install docs: minikube 0.25.2 and k8s 1.9.4 [\#195](https://github.com/GoogleCloudPlatform/agones/pull/195) ([markmandel](https://github.com/markmandel)) @@ -246,6 +319,9 @@ - Centralise the canonical import paths and more package docs [\#130](https://github.com/GoogleCloudPlatform/agones/pull/130) ([markmandel](https://github.com/markmandel)) ## [v0.1](https://github.com/GoogleCloudPlatform/agones/tree/v0.1) (2018-03-06) + +[Full Changelog](https://github.com/GoogleCloudPlatform/agones/compare/20f6ab798a49e3629d5f6651201504ff49ea251a...v0.1) + **Implemented enhancements:** - The local mode of the agon sidecar listen to localhost only [\#62](https://github.com/GoogleCloudPlatform/agones/issues/62) @@ -297,6 +373,11 @@ - Use the preferred ENTRYPOINT format [\#43](https://github.com/GoogleCloudPlatform/agones/pull/43) ([markmandel](https://github.com/markmandel)) - Update Kubernetes dependencies to release branch [\#24](https://github.com/GoogleCloudPlatform/agones/pull/24) ([markmandel](https://github.com/markmandel)) +**Security fixes:** + +- Switch to RBAC [\#57](https://github.com/GoogleCloudPlatform/agones/issues/57) +- Upgrade to Go 1.9.4 [\#81](https://github.com/GoogleCloudPlatform/agones/pull/81) ([markmandel](https://github.com/markmandel)) + **Closed issues:** - `make do-release` target [\#115](https://github.com/GoogleCloudPlatform/agones/issues/115) @@ -305,7 +386,6 @@ - Health check should be moved out of `gameservers/controller.go` [\#88](https://github.com/GoogleCloudPlatform/agones/issues/88) - Add archiving the sdk-server binaries into gcs into the cloudbuild.yaml [\#87](https://github.com/GoogleCloudPlatform/agones/issues/87) - Upgrade to Go 1.9.3 [\#63](https://github.com/GoogleCloudPlatform/agones/issues/63) -- Switch to RBAC [\#57](https://github.com/GoogleCloudPlatform/agones/issues/57) - Building Agon on Windows [\#47](https://github.com/GoogleCloudPlatform/agones/issues/47) - Building Agones on macOS [\#46](https://github.com/GoogleCloudPlatform/agones/issues/46) - Write documentation for creating a GameServer [\#45](https://github.com/GoogleCloudPlatform/agones/issues/45) @@ -350,7 +430,6 @@ - Update everything to be Kubernetes 1.9+ [\#85](https://github.com/GoogleCloudPlatform/agones/pull/85) ([markmandel](https://github.com/markmandel)) - Expand on contributing documentation. [\#84](https://github.com/GoogleCloudPlatform/agones/pull/84) ([markmandel](https://github.com/markmandel)) - Remove entrypoints in makefile. [\#82](https://github.com/GoogleCloudPlatform/agones/pull/82) ([Kuqd](https://github.com/Kuqd)) -- Upgrade to Go 1.9.4 [\#81](https://github.com/GoogleCloudPlatform/agones/pull/81) ([markmandel](https://github.com/markmandel)) - Update to client-go release 1.6 [\#80](https://github.com/GoogleCloudPlatform/agones/pull/80) ([markmandel](https://github.com/markmandel)) - Setup for social/get involved section. [\#79](https://github.com/GoogleCloudPlatform/agones/pull/79) ([markmandel](https://github.com/markmandel)) - Changing name from Agon =\> Agones. [\#78](https://github.com/GoogleCloudPlatform/agones/pull/78) ([markmandel](https://github.com/markmandel)) @@ -361,7 +440,6 @@ - Update Xonotic demo to use dynamic ports [\#72](https://github.com/GoogleCloudPlatform/agones/pull/72) ([markmandel](https://github.com/markmandel)) - Basic structure for better documentation [\#68](https://github.com/GoogleCloudPlatform/agones/pull/68) ([markmandel](https://github.com/markmandel)) - Update gke-test-cluster admin password to new minimum length 16 chars. [\#65](https://github.com/GoogleCloudPlatform/agones/pull/65) ([dzlier-gcp](https://github.com/dzlier-gcp)) -- Bring rbac up to date with master [\#64](https://github.com/GoogleCloudPlatform/agones/pull/64) ([dzlier-gcp](https://github.com/dzlier-gcp)) - Output the stack error as an actual array [\#61](https://github.com/GoogleCloudPlatform/agones/pull/61) ([markmandel](https://github.com/markmandel)) - Update documentation [\#53](https://github.com/GoogleCloudPlatform/agones/pull/53) ([Kuqd](https://github.com/Kuqd)) - Correct maximum parameter typo [\#52](https://github.com/GoogleCloudPlatform/agones/pull/52) ([Kuqd](https://github.com/Kuqd)) @@ -378,4 +456,4 @@ -\* *This Change Log was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)* \ No newline at end of file +\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)* diff --git a/build/Makefile b/build/Makefile index 897d968f05..1e3f534b03 100644 --- a/build/Makefile +++ b/build/Makefile @@ -292,11 +292,13 @@ push-build-image: # generate a changelog using github-changelog-generator gen-changelog: RELEASE_VERSION ?= $(base_version) gen-changelog: - @read -p 'Github Token: ' TOKEN && \ - docker run -it --rm -v "$(agones_path)":/usr/local/src/your-app ferrarimarco/github-changelog-generator:1.14.3 \ - GoogleCloudPlatform/agones --bug-labels=kind/bug --enhancement-labels=kind/feature \ + read -p 'Github Token: ' TOKEN && \ + docker run -it --rm -v "$(agones_path)":/project markmandel/github-changelog-generator \ + --user=GoogleCloudPlatform --project=agones \ + --bug-labels=kind/bug --enhancement-labels=kind/feature \ + --breaking-labels=kind/breaking --security-labels=area/security \ --future-release "v$(RELEASE_VERSION)" \ - --token=$$TOKEN + --token $$TOKEN # Creates a release. Version defaults to the base_version # - Checks out a release branch diff --git a/docs/create_fleetautoscaler.md b/docs/create_fleetautoscaler.md index 5472446b7e..9df6806747 100644 --- a/docs/create_fleetautoscaler.md +++ b/docs/create_fleetautoscaler.md @@ -1,5 +1,7 @@ # Quickstart Create a Fleet Autoscaler +⚠️⚠️⚠️ **This is currently a release candidate feature** ⚠️⚠️⚠️ + This guide covers how you can quickly get started using Agones to create a Fleet Autoscaler to manage your fleet size automatically, based on actual load. From 0b9f91a69c1ea75056ed654fb71ae900cbb3dfcd Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Tue, 9 Oct 2018 14:19:46 -0700 Subject: [PATCH 08/25] Post 0.5.0-rc updates Includes move to -rc suffix, so that we're semantically versioned. Also fix for the Changelog. --- CHANGELOG.md | 1 - docs/governance/templates/release.md | 2 +- docs/governance/templates/release_issue.md | 12 ++++---- install/helm/agones/Chart.yaml | 4 +-- install/helm/agones/values.yaml | 2 +- install/yaml/install.yaml | 34 +++++++++++----------- 6 files changed, 27 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d8454b3d1f..b2a1023ffc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,6 @@ **Implemented enhancements:** -- GameServer Safe Manual Cluster Node Scaling [\#365](https://github.com/GoogleCloudPlatform/agones/issues/365) - Improve support for developing in custom environments [\#348](https://github.com/GoogleCloudPlatform/agones/issues/348) - Agones helm repo [\#285](https://github.com/GoogleCloudPlatform/agones/issues/285) - Add Amazon EKS Agones Setup Instructions [\#372](https://github.com/GoogleCloudPlatform/agones/pull/372) ([GabeBigBoxVR](https://github.com/GabeBigBoxVR)) diff --git a/docs/governance/templates/release.md b/docs/governance/templates/release.md index fbda46ce87..2771f30ad0 100644 --- a/docs/governance/templates/release.md +++ b/docs/governance/templates/release.md @@ -22,6 +22,6 @@ Images available with this release: Helm chart available with this release: -- [`helm install agones/agones --version {example-version}](https://agones.dev/chart/stable/agones-{example-version}.tgz) +- [`helm install agones/agones --version {version}`](https://agones.dev/chart/stable/agones-{version}.tgz) > Make sure to add our stable helm repository using `helm repo add https://agones.dev/chart/stable` diff --git a/docs/governance/templates/release_issue.md b/docs/governance/templates/release_issue.md index 37cefba81e..440aa2b85a 100644 --- a/docs/governance/templates/release_issue.md +++ b/docs/governance/templates/release_issue.md @@ -9,9 +9,9 @@ and copy it into a release issue. Fill in relevent values, found inside {} - [ ] Review closed issues have been applied to the current milestone. - [ ] Ensure the next version milestone is created. - [ ] Any issues in the current milestone that are not closed, move to next milestone. -- [ ] Run `make gen-changelog` to generate the CHANGELOG.md (if release candidate `make gen-changelog RELEASE_VERSION={version}.rc`) -- [ ] Ensure the [helm `tag` value][values] is correct (should be the {version} if a full release, {version}.rc if release candidate) -- [ ] Ensure the [helm `Chart` version values][chart] are correct (should be the {version} if a full release, {version}.rc if release candidate) +- [ ] Run `make gen-changelog` to generate the CHANGELOG.md (if release candidate `make gen-changelog RELEASE_VERSION={version}-rc`) +- [ ] Ensure the [helm `tag` value][values] is correct (should be the {version} if a full release, {version}-rc if release candidate) +- [ ] Ensure the [helm `Chart` version values][chart] are correct (should be the {version} if a full release, {version}-rc if release candidate) - [ ] Run `make gen-install` - [ ] Ensure all example images exist on gcr.io/agones-images - [ ] If full release, update documentation with updated example images tags @@ -21,7 +21,7 @@ and copy it into a release issue. Fill in relevent values, found inside {} - [ ] Create PR with these changes, and merge them with approval - [ ] Confirm local git remote `upstream` points at `git@github.com:GoogleCloudPlatform/agones.git` - [ ] Run `git checkout master && git reset --hard upstream/master` to ensure your code is in line with upstream (unless this is a hotfix, then do the same, but for the the release branch) -- [ ] Run `make do-release`. (if release candidate `make do-release RELEASE_VERSION={version}.rc`) to create and push the docker images and helm chart. +- [ ] Run `make do-release`. (if release candidate `make do-release RELEASE_VERSION={version}-rc`) to create and push the docker images and helm chart. - [ ] Do a `helm repo add agones https://agones.dev/chart/stable` and verify that the new version is available via the command `helm search agones/` - [ ] Do a `helm install` and a smoke test to confirm everything is working. - [ ] Create a release with the [release template][release-template] @@ -29,8 +29,8 @@ and copy it into a release issue. Fill in relevent values, found inside {} - [ ] Attach all assets found in the `release` folder to the release. - [ ] Send an email to the [mailing list][list] with the release details (copy-paste the github release) - [ ] If full release, then increment the `base_version` in [`build/Makefile`][build-makefile] -- [ ] Ensure the [helm `tag` value][values] is set to the next version (should be the {version} if a full release, {version}.rc if release candidate) -- [ ] Ensure the [helm `Chart` version values][chart] is set to the next version (should be the {version} if a full release, {version}.rc if release candidate) +- [ ] Ensure the [helm `tag` value][values] is set to the next version (should be the {version} if a full release, {version}-rc if release candidate) +- [ ] Ensure the [helm `Chart` version values][chart] is set to the next version (should be the {version} if a full release, {version}-rc if release candidate) - [ ] Run `make gen-install` - [ ] Create PR with these changes, and merge them with approval - [ ] Close this issue. diff --git a/install/helm/agones/Chart.yaml b/install/helm/agones/Chart.yaml index 6569c20329..2d92df9bea 100644 --- a/install/helm/agones/Chart.yaml +++ b/install/helm/agones/Chart.yaml @@ -15,8 +15,8 @@ # Declare variables to be passed into your templates. apiVersion: v1 -appVersion: "0.5.0.rc" -version: 0.5.0.rc +appVersion: "0.5.0" +version: 0.5.0 name: agones description: a library for hosting, running and scaling dedicated game servers on Kubernetes. keywords: diff --git a/install/helm/agones/values.yaml b/install/helm/agones/values.yaml index 10117c16dc..b503551037 100644 --- a/install/helm/agones/values.yaml +++ b/install/helm/agones/values.yaml @@ -31,7 +31,7 @@ agones: timeoutSeconds: 1 image: registry: gcr.io/agones-images - tag: 0.5.0.rc + tag: 0.5.0 controller: name: agones-controller pullPolicy: IfNotPresent diff --git a/install/yaml/install.yaml b/install/yaml/install.yaml index d6fb245972..b96b6fd492 100644 --- a/install/yaml/install.yaml +++ b/install/yaml/install.yaml @@ -21,7 +21,7 @@ metadata: namespace: agones-system labels: app: agones - chart: agones-0.5.0.rc + chart: agones-0.5.0 release: agones-manual heritage: Tiller --- @@ -32,7 +32,7 @@ metadata: namespace: agones-system labels: app: agones - chart: agones-0.5.0.rc + chart: agones-0.5.0 release: agones-manual heritage: Tiller rules: @@ -62,7 +62,7 @@ metadata: namespace: agones-system labels: app: agones - chart: agones-0.5.0.rc + chart: agones-0.5.0 release: agones-manual heritage: Tiller subjects: @@ -96,7 +96,7 @@ metadata: namespace: default labels: app: agones - chart: agones-0.5.0.rc + chart: agones-0.5.0 release: agones-manual heritage: Tiller --- @@ -107,7 +107,7 @@ metadata: namespace: agones-system labels: app: agones - chart: agones-0.5.0.rc + chart: agones-0.5.0 release: agones-manual heritage: Tiller rules: @@ -125,7 +125,7 @@ metadata: namespace: default labels: app: agones - chart: agones-0.5.0.rc + chart: agones-0.5.0 release: agones-manual heritage: Tiller subjects: @@ -160,7 +160,7 @@ metadata: labels: component: crd app: agones - chart: agones-0.5.0.rc + chart: agones-0.5.0 release: agones-manual heritage: Tiller spec: @@ -341,7 +341,7 @@ metadata: labels: component: crd app: agones - chart: agones-0.5.0.rc + chart: agones-0.5.0 release: agones-manual heritage: Tiller spec: @@ -390,7 +390,7 @@ metadata: labels: component: crd app: agones - chart: agones-0.5.0.rc + chart: agones-0.5.0 release: agones-manual heritage: Tiller spec: @@ -457,7 +457,7 @@ metadata: labels: component: crd app: agones - chart: agones-0.5.0.rc + chart: agones-0.5.0 release: agones-manual heritage: Tiller spec: @@ -622,7 +622,7 @@ metadata: labels: component: crd app: agones - chart: agones-0.5.0.rc + chart: agones-0.5.0 release: agones-manual heritage: Tiller spec: @@ -799,7 +799,7 @@ metadata: labels: component: controller app: agones - chart: agones-0.5.0.rc + chart: agones-0.5.0 release: agones-manual heritage: Tiller spec: @@ -832,7 +832,7 @@ metadata: labels: component: controller app: agones - chart: agones-0.5.0.rc + chart: agones-0.5.0 release: agones-manual heritage: Tiller spec: @@ -856,7 +856,7 @@ spec: serviceAccountName: agones-controller containers: - name: agones-controller - image: "gcr.io/agones-images/agones-controller:0.5.0.rc" + image: "gcr.io/agones-images/agones-controller:0.5.0" imagePullPolicy: IfNotPresent env: - name: ALWAYS_PULL_SIDECAR # set the sidecar imagePullPolicy to Always @@ -868,7 +868,7 @@ spec: - name: MAX_PORT value: "8000" - name: SIDECAR # overwrite the GameServer sidecar image that is used - value: "gcr.io/agones-images/agones-sdk:0.5.0.rc" + value: "gcr.io/agones-images/agones-sdk:0.5.0" livenessProbe: httpGet: path: /live @@ -944,7 +944,7 @@ metadata: labels: component: controller app: agones - chart: agones-0.5.0.rc + chart: agones-0.5.0 release: agones-manual heritage: Tiller webhooks: @@ -976,7 +976,7 @@ metadata: namespace: agones-system labels: app: agones-manual - chart: "agones-0.5.0.rc" + chart: "agones-0.5.0" release: "agones-manual" heritage: "Tiller" type: Opaque From d6858e26ddb4454e1484e839fe7be7a994be1b76 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Tue, 9 Oct 2018 17:33:42 -0700 Subject: [PATCH 09/25] Spec docs for FleetAutoscaler Moved this out of #370 so people can review it during the RC cycle. --- README.md | 1 + docs/fleetautoscaler_spec.md | 44 ++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 docs/fleetautoscaler_spec.md diff --git a/README.md b/README.md index 01ec90caf1..cdb1f090ff 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ Documentation and usage guides on how to develop and host dedicated game servers ### Reference - [Game Server Specification](./docs/gameserver_spec.md) - [Fleet Specification](./docs/fleet_spec.md) +- [Fleet Autoscaler Specification](./docs/fleetautoscaler_spec.md) ### Examples - [Full GameServer Configuration](./examples/gameserver.yaml) diff --git a/docs/fleetautoscaler_spec.md b/docs/fleetautoscaler_spec.md new file mode 100644 index 0000000000..2e382a1d6e --- /dev/null +++ b/docs/fleetautoscaler_spec.md @@ -0,0 +1,44 @@ +# Fleet Autoscaler Specification + +⚠️⚠️⚠️ **This is currently a release candidate feature** ⚠️⚠️⚠️ + +A `FleetAutoscaler`'s job is to automatically scale up and down a `Fleet` in response to demand. + +A full `FleetAutoscaler` specification is available below and in the +[example folder](../examples/fleetautoscaler.yaml) for reference : + +```yaml +apiVersion: "stable.agones.dev/v1alpha1" +kind: FleetAutoscaler +metadata: + name: fleet-autoscaler-example +spec: + + fleetName: fleet-example + policy: + type: Buffer + buffer: + bufferSize: 5 + minReplicas: 10 + maxReplicas: 20 +``` + +Since Agones defines a new +[Custom Resources Definition (CRD)](https://kubernetes.io/docs/concepts/api-extension/custom-resources/) +we can define a new resource using the kind `FleetAutoscaler` with the custom group `stable.agones.dev` and API +version `v1alpha1`. + +The `spec` field is the actual `FleetAutoscaler` specification and it is composed as follows: + +- `fleetName` is name of the fleet to attach to and control. Must be an existing `Fleet` in the same namespace + as this `FleetAutoscaler`. +- `policy` is the autoscaling policy + - `type` is type of the policy. For now, only "Buffer" is available + - `buffer` parameters of the buffer policy + - `bufferSize` is the size of a buffer of "ready" game server instances + The FleetAutoscaler will scale the fleet up and down trying to maintain this buffer, + as instances are being allocated or terminated + it can be specified either in absolute (i.e. 5) or percentage format (i.e. 5%) + - `minReplicas` is the minimum fleet size to be set by this FleetAutoscaler. + if not specified, the minimum fleet size will be bufferSize + - `maxReplicas` is the maximum fleet size that can be set by this FleetAutoscaler. Required. \ No newline at end of file From fe5dc0dad1c4cae18adc928d9e931e81ad0817b5 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Thu, 11 Oct 2018 11:07:08 -0700 Subject: [PATCH 10/25] Troubleshooting guide for issues with Agones. Fixes #282 --- README.md | 1 + docs/troubleshooting.md | 58 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 docs/troubleshooting.md diff --git a/README.md b/README.md index cdb1f090ff..c0ebb533ee 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,7 @@ Documentation and usage guides on how to develop and host dedicated game servers - [Integrating the Game Server SDK](sdks) - [GameServer Health Checking](./docs/health_checking.md) - [Accessing Agones via the Kubernetes API](./docs/access_api.md) + - [Troubleshooting](./docs/troubleshooting.md) ### Tutorials - [Create an Allocator Service (Go)](./docs/create_allocator_service.md) - Learn to programmatically access Agones via the API diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md new file mode 100644 index 0000000000..975dc1361f --- /dev/null +++ b/docs/troubleshooting.md @@ -0,0 +1,58 @@ +# Troubleshooting + +Troubleshooting guides and steps. + +Table of Contents +================= + +* [How do I see the logs for Agones?](#how-do-i-see-the-logs-for-agones) +* [I uninstalled Agones before deleted all my GameServers and now they won't delete](#i-uninstalled-agones-before-deleted-all-my-gameservers-and-now-they-wont-delete) +* [I'm getting Forbidden errors when trying to install Agones](#im-getting-forbidden-errors-when-trying-to-install-agones) + +## How do I see the logs for Agones? + +If something is going wrong, and you want to see the logs for Agones, there are potentially two places you will want to +check: + +1. The controller: assuming you installed Agones in the `agones-system` namespace, you will find that there +is a single pod called `agones-controller-` (where hash is the unique code that Kubernetes generates) +that exists there, that you can get the logs from. This is the main +controller for Agones, and should be the first place to check when things go wrong. + + To get the logs from this controller run: + `kubectl logs --namespace=agones-system agones-controller-` +2. The sdk server sidecar: Agones runs a small [gRPC](https://grpc.io/) + http server for the SDK in a container in the +same network namespace as the game server container to connect to via the SDK. +The logs from this SDk server are also useful for tracking down issues, especially if you are having trouble with a +particular `GameServer`. + 1. To find the `Pod` for the `GameServer` look for the pod with a name that is prefixed with the name of the + owning `GameServer`. For example if you have a `GameServer` named `simple-udp`, it's pod could potentially be named + `simple-udp-dnbwj`. + 2. To get the logs from that `Pod`, we need to specify that we want the logs from the `agones-gameserver-sidecar` + container. To do that, run the following: + `kubectl logs simple-udp-dnbwj -c agones-gameserver-sidecar` + +Agones uses JSON structured logging, therefore errors will be visible through the `"severity":"info"` key and value. + +## I uninstalled Agones before deleted all my `GameServers` and now they won't delete + +Agones `GameServers` use [Finalizers](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/#finalizers) +to manage garbage collection of the `GameServers`. This means that if the Agones controller +doesn't remove the finalizer for you (i.e. if it has been uninstalled), it can be tricky to remove them all. + +Thankfully, if we create a patch to remove the finalizers from all GameServers, we can delete them with impunity. + +A quick one liner to do this: + +`kubectl get gameserver -o name | xargs -n1 -P1 -I{} kubectl patch {} --type=merge -p '{"metadata": {"finalizers": []}}'` + +Once this is done, you can `kubectl delete gs --all` and clean everything up (if it's not gone already). + +## I'm getting Forbidden errors when trying to install Agones + +Some troubleshooting steps: + +1. Run `kubectl describe clusterrolebinding cluster-admin-binding` and make sure your email is in there. This may be +_case sensitive_ so you may need to compare it to the case you used. +1. In the [GKE tutorial](../install/README.md#enabling-creation-of-rbac-resources) `gcloud config get-value accounts` +will return a lowercase email address, so if you are using a CamelCase email, you may want to type that in manually. From cafcf04ab30b93004afe3f5f67c6f771b2f82f48 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Tue, 16 Oct 2018 09:23:56 -0700 Subject: [PATCH 11/25] Change for the 0.5.0 release. --- CHANGELOG.md | 23 +++++++++++++++++++++-- docs/access_api.md | 8 ++++---- docs/create_fleet.md | 4 ++-- docs/create_fleetautoscaler.md | 2 -- docs/create_gameserver.md | 2 +- docs/fleetautoscaler_spec.md | 2 -- docs/gameserver_spec.md | 2 +- install/README.md | 4 ++-- install/helm/agones/templates/NOTES.txt | 2 +- 9 files changed, 32 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b2a1023ffc..357816f6da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,26 @@ # Changelog -## [v0.5.0.rc](https://github.com/GoogleCloudPlatform/agones/tree/v0.5.0.rc) (2018-10-09) +## [v0.5.0](https://github.com/GoogleCloudPlatform/agones/tree/v0.5.0) (2018-10-16) -[Full Changelog](https://github.com/GoogleCloudPlatform/agones/compare/v0.4.0...v0.5.0.rc) +[Full Changelog](https://github.com/GoogleCloudPlatform/agones/compare/v0.5.0-rc...v0.5.0) + +**Fixed bugs:** + +- Tutorial fails @ Step 5 due to RBAC issues if you have capital letters in your gcloud account name [\#282](https://github.com/GoogleCloudPlatform/agones/issues/282) + +**Closed issues:** + +- Release 0.5.0.rc [\#378](https://github.com/GoogleCloudPlatform/agones/issues/378) + +**Merged pull requests:** + +- Troubleshooting guide for issues with Agones. [\#384](https://github.com/GoogleCloudPlatform/agones/pull/384) ([markmandel](https://github.com/markmandel)) +- Spec docs for FleetAutoscaler [\#381](https://github.com/GoogleCloudPlatform/agones/pull/381) ([markmandel](https://github.com/markmandel)) +- Post 0.5.0-rc updates [\#380](https://github.com/GoogleCloudPlatform/agones/pull/380) ([markmandel](https://github.com/markmandel)) + +## [v0.5.0-rc](https://github.com/GoogleCloudPlatform/agones/tree/v0.5.0-rc) (2018-10-09) + +[Full Changelog](https://github.com/GoogleCloudPlatform/agones/compare/v0.4.0...v0.5.0-rc) **Implemented enhancements:** @@ -42,6 +60,7 @@ **Merged pull requests:** +- Release 0.5.0.rc changes [\#379](https://github.com/GoogleCloudPlatform/agones/pull/379) ([markmandel](https://github.com/markmandel)) - Make WaitForFleetCondition take up to 5 minutes [\#377](https://github.com/GoogleCloudPlatform/agones/pull/377) ([markmandel](https://github.com/markmandel)) - Fix for flaky test TestControllerAddress [\#376](https://github.com/GoogleCloudPlatform/agones/pull/376) ([markmandel](https://github.com/markmandel)) - Fix typo [\#374](https://github.com/GoogleCloudPlatform/agones/pull/374) ([Maxpain177](https://github.com/Maxpain177)) diff --git a/docs/access_api.md b/docs/access_api.md index 320375500f..dc6a1a515a 100644 --- a/docs/access_api.md +++ b/docs/access_api.md @@ -83,7 +83,7 @@ func main() { Spec: v1alpha1.GameServerSpec{ Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ - Containers: []corev1.Container{{Name: "udp-server", Image: "gcr.io/agones-images/udp-server:0.3"}}, + Containers: []corev1.Container{{Name: "udp-server", Image: "gcr.io/agones-images/udp-server:0.4"}}, }, }, }, @@ -171,7 +171,7 @@ $ curl http://localhost:8001/apis/stable.agones.dev/v1alpha1/namespaces/default/ "kind": "GameServer", "metadata": { "annotations": { - "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"stable.agones.dev/v1alpha1\",\"kind\":\"GameServer\",\"metadata\":{\"annotations\":{},\"name\":\"simple-udp\",\"namespace\":\"default\"},\"spec\":{\"containerPort\":7654,\"hostPort\":7777,\"portPolicy\":\"static\",\"template\":{\"spec\":{\"containers\":[{\"image\":\"gcr.io/agones-images/udp-server:0.3\",\"name\":\"simple-udp\"}]}}}}\n" + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"stable.agones.dev/v1alpha1\",\"kind\":\"GameServer\",\"metadata\":{\"annotations\":{},\"name\":\"simple-udp\",\"namespace\":\"default\"},\"spec\":{\"containerPort\":7654,\"hostPort\":7777,\"portPolicy\":\"static\",\"template\":{\"spec\":{\"containers\":[{\"image\":\"gcr.io/agones-images/udp-server:0.4\",\"name\":\"simple-udp\"}]}}}}\n" }, "clusterName": "", "creationTimestamp": "2018-03-02T21:41:05Z", @@ -203,7 +203,7 @@ $ curl http://localhost:8001/apis/stable.agones.dev/v1alpha1/namespaces/default/ "spec": { "containers": [ { - "image": "gcr.io/agones-images/udp-server:0.3", + "image": "gcr.io/agones-images/udp-server:0.4", "name": "simple-udp", "resources": {} } @@ -311,7 +311,7 @@ $ curl -d '{"apiVersion":"stable.agones.dev/v1alpha1","kind":"FleetAllocation"," "spec": { "containers": [ { - "image": "gcr.io/agones-images/udp-server:0.3", + "image": "gcr.io/agones-images/udp-server:0.4", "name": "simple-udp", "resources": {} } diff --git a/docs/create_fleet.md b/docs/create_fleet.md index d879473ffb..b6dfcde34e 100644 --- a/docs/create_fleet.md +++ b/docs/create_fleet.md @@ -109,7 +109,7 @@ Spec: Creation Timestamp: Spec: Containers: - Image: gcr.io/agones-images/udp-server:0.3 + Image: gcr.io/agones-images/udp-server:0.4 Name: simple-udp Resources: Status: @@ -225,7 +225,7 @@ status: creationTimestamp: null spec: containers: - - image: gcr.io/agones-images/udp-server:0.3 + - image: gcr.io/agones-images/udp-server:0.4 name: simple-udp resources: {} status: diff --git a/docs/create_fleetautoscaler.md b/docs/create_fleetautoscaler.md index 9df6806747..5472446b7e 100644 --- a/docs/create_fleetautoscaler.md +++ b/docs/create_fleetautoscaler.md @@ -1,7 +1,5 @@ # Quickstart Create a Fleet Autoscaler -⚠️⚠️⚠️ **This is currently a release candidate feature** ⚠️⚠️⚠️ - This guide covers how you can quickly get started using Agones to create a Fleet Autoscaler to manage your fleet size automatically, based on actual load. diff --git a/docs/create_gameserver.md b/docs/create_gameserver.md index 9b1b9153b5..724559ee42 100644 --- a/docs/create_gameserver.md +++ b/docs/create_gameserver.md @@ -104,7 +104,7 @@ Spec: Creation Timestamp: Spec: Containers: - Image: gcr.io/agones-images/udp-server:0.3 + Image: gcr.io/agones-images/udp-server:0.4 Name: simple-udp Resources: Status: diff --git a/docs/fleetautoscaler_spec.md b/docs/fleetautoscaler_spec.md index 2e382a1d6e..f74073ea51 100644 --- a/docs/fleetautoscaler_spec.md +++ b/docs/fleetautoscaler_spec.md @@ -1,7 +1,5 @@ # Fleet Autoscaler Specification -⚠️⚠️⚠️ **This is currently a release candidate feature** ⚠️⚠️⚠️ - A `FleetAutoscaler`'s job is to automatically scale up and down a `Fleet` in response to demand. A full `FleetAutoscaler` specification is available below and in the diff --git a/docs/gameserver_spec.md b/docs/gameserver_spec.md index 81c3609f20..b13cea16c9 100644 --- a/docs/gameserver_spec.md +++ b/docs/gameserver_spec.md @@ -2,7 +2,7 @@ Like any other Kubernetes resource you describe a GameServer's desired state via a specification written in YAML or JSON to the Kubernetes API. The Agones controller will then change the actual state to the desired state. -A full GameServer specification is available below and in the [example folder](https://github.com/GoogleCloudPlatform/agones/blob/release-0.4.0/examples/gameserver.yaml) for reference : +A full GameServer specification is available below and in the [example folder](https://github.com/GoogleCloudPlatform/agones/blob/release-0.5.0/examples/gameserver.yaml) for reference : ``` apiVersion: "stable.agones.dev/v1alpha1" diff --git a/install/README.md b/install/README.md index 37e3925842..b11c62a6a6 100644 --- a/install/README.md +++ b/install/README.md @@ -288,11 +288,11 @@ This will install Agones in your cluster. ## Install with YAML We can install Agones to the cluster using the -[install.yaml](https://github.com/GoogleCloudPlatform/agones/blob/release-0.4.0/install/yaml/install.yaml) file. +[install.yaml](https://github.com/GoogleCloudPlatform/agones/blob/release-0.5.0/install/yaml/install.yaml) file. ```bash kubectl create namespace agones-system -kubectl apply -f https://github.com/GoogleCloudPlatform/agones/raw/release-0.4.0/install/yaml/install.yaml +kubectl apply -f https://github.com/GoogleCloudPlatform/agones/raw/release-0.5.0/install/yaml/install.yaml ``` You can also find the install.yaml in the latest `agones-install` zip from the [releases](https://github.com/GoogleCloudPlatform/agones/releases) archive. diff --git a/install/helm/agones/templates/NOTES.txt b/install/helm/agones/templates/NOTES.txt index f97246e0af..697f51444d 100644 --- a/install/helm/agones/templates/NOTES.txt +++ b/install/helm/agones/templates/NOTES.txt @@ -19,7 +19,7 @@ spec: spec: containers: - name: simple-udp - image: gcr.io/agones-images/udp-server:0.2 + image: gcr.io/agones-images/udp-server:0.4 Finally don't forget to explore our documentation and usage guides on how to develop and host dedicated game servers on top of Agones. : From a30763772855f457f0ad10f2365afe21c733433c Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Tue, 16 Oct 2018 12:29:22 -0700 Subject: [PATCH 12/25] Updates to for 0.6.0 --- build/Makefile | 2 +- install/helm/agones/Chart.yaml | 4 ++-- install/helm/agones/values.yaml | 2 +- install/yaml/install.yaml | 34 ++++++++++++++++----------------- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/build/Makefile b/build/Makefile index 1e3f534b03..a70deca136 100644 --- a/build/Makefile +++ b/build/Makefile @@ -24,7 +24,7 @@ # # base version target. This is usually the next release. -base_version = 0.5.0 +base_version = 0.6.0 # agones image release registry release_registry = gcr.io/agones-images diff --git a/install/helm/agones/Chart.yaml b/install/helm/agones/Chart.yaml index 2d92df9bea..5959c36867 100644 --- a/install/helm/agones/Chart.yaml +++ b/install/helm/agones/Chart.yaml @@ -15,8 +15,8 @@ # Declare variables to be passed into your templates. apiVersion: v1 -appVersion: "0.5.0" -version: 0.5.0 +appVersion: "0.6.0-rc" +version: 0.6.0-rc name: agones description: a library for hosting, running and scaling dedicated game servers on Kubernetes. keywords: diff --git a/install/helm/agones/values.yaml b/install/helm/agones/values.yaml index b503551037..80f4a74af9 100644 --- a/install/helm/agones/values.yaml +++ b/install/helm/agones/values.yaml @@ -31,7 +31,7 @@ agones: timeoutSeconds: 1 image: registry: gcr.io/agones-images - tag: 0.5.0 + tag: 0.6.0-rc controller: name: agones-controller pullPolicy: IfNotPresent diff --git a/install/yaml/install.yaml b/install/yaml/install.yaml index b96b6fd492..2914616756 100644 --- a/install/yaml/install.yaml +++ b/install/yaml/install.yaml @@ -21,7 +21,7 @@ metadata: namespace: agones-system labels: app: agones - chart: agones-0.5.0 + chart: agones-0.6.0-rc release: agones-manual heritage: Tiller --- @@ -32,7 +32,7 @@ metadata: namespace: agones-system labels: app: agones - chart: agones-0.5.0 + chart: agones-0.6.0-rc release: agones-manual heritage: Tiller rules: @@ -62,7 +62,7 @@ metadata: namespace: agones-system labels: app: agones - chart: agones-0.5.0 + chart: agones-0.6.0-rc release: agones-manual heritage: Tiller subjects: @@ -96,7 +96,7 @@ metadata: namespace: default labels: app: agones - chart: agones-0.5.0 + chart: agones-0.6.0-rc release: agones-manual heritage: Tiller --- @@ -107,7 +107,7 @@ metadata: namespace: agones-system labels: app: agones - chart: agones-0.5.0 + chart: agones-0.6.0-rc release: agones-manual heritage: Tiller rules: @@ -125,7 +125,7 @@ metadata: namespace: default labels: app: agones - chart: agones-0.5.0 + chart: agones-0.6.0-rc release: agones-manual heritage: Tiller subjects: @@ -160,7 +160,7 @@ metadata: labels: component: crd app: agones - chart: agones-0.5.0 + chart: agones-0.6.0-rc release: agones-manual heritage: Tiller spec: @@ -341,7 +341,7 @@ metadata: labels: component: crd app: agones - chart: agones-0.5.0 + chart: agones-0.6.0-rc release: agones-manual heritage: Tiller spec: @@ -390,7 +390,7 @@ metadata: labels: component: crd app: agones - chart: agones-0.5.0 + chart: agones-0.6.0-rc release: agones-manual heritage: Tiller spec: @@ -457,7 +457,7 @@ metadata: labels: component: crd app: agones - chart: agones-0.5.0 + chart: agones-0.6.0-rc release: agones-manual heritage: Tiller spec: @@ -622,7 +622,7 @@ metadata: labels: component: crd app: agones - chart: agones-0.5.0 + chart: agones-0.6.0-rc release: agones-manual heritage: Tiller spec: @@ -799,7 +799,7 @@ metadata: labels: component: controller app: agones - chart: agones-0.5.0 + chart: agones-0.6.0-rc release: agones-manual heritage: Tiller spec: @@ -832,7 +832,7 @@ metadata: labels: component: controller app: agones - chart: agones-0.5.0 + chart: agones-0.6.0-rc release: agones-manual heritage: Tiller spec: @@ -856,7 +856,7 @@ spec: serviceAccountName: agones-controller containers: - name: agones-controller - image: "gcr.io/agones-images/agones-controller:0.5.0" + image: "gcr.io/agones-images/agones-controller:0.6.0-rc" imagePullPolicy: IfNotPresent env: - name: ALWAYS_PULL_SIDECAR # set the sidecar imagePullPolicy to Always @@ -868,7 +868,7 @@ spec: - name: MAX_PORT value: "8000" - name: SIDECAR # overwrite the GameServer sidecar image that is used - value: "gcr.io/agones-images/agones-sdk:0.5.0" + value: "gcr.io/agones-images/agones-sdk:0.6.0-rc" livenessProbe: httpGet: path: /live @@ -944,7 +944,7 @@ metadata: labels: component: controller app: agones - chart: agones-0.5.0 + chart: agones-0.6.0-rc release: agones-manual heritage: Tiller webhooks: @@ -976,7 +976,7 @@ metadata: namespace: agones-system labels: app: agones-manual - chart: "agones-0.5.0" + chart: "agones-0.6.0-rc" release: "agones-manual" heritage: "Tiller" type: Opaque From 9ff820c861c06a4717fa11580f8dc275ab18481d Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Thu, 11 Oct 2018 12:38:03 -0700 Subject: [PATCH 13/25] Update to Go 1.11.1 --- build/build-image/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/build-image/Dockerfile b/build/build-image/Dockerfile index 67288c8793..988d932dcc 100644 --- a/build/build-image/Dockerfile +++ b/build/build-image/Dockerfile @@ -24,7 +24,7 @@ RUN apt-get update && \ # install go WORKDIR /usr/local -ENV GO_VERSION=1.10.3 +ENV GO_VERSION=1.11.1 ENV GOPATH /go RUN wget -q https://redirector.gvt1.com/edgedl/go/go${GO_VERSION}.linux-amd64.tar.gz && \ tar -xzf go${GO_VERSION}.linux-amd64.tar.gz && rm go${GO_VERSION}.linux-amd64.tar.gz && mkdir ${GOPATH} From 9e94becb0543daecde29257382fdc5602fe5878e Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Thu, 11 Oct 2018 11:08:06 -0700 Subject: [PATCH 14/25] Mount point for helm config This allows repository information to be saved between invocations of the build image. --- build/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/build/Makefile b/build/Makefile index a70deca136..93e69cd89a 100644 --- a/build/Makefile +++ b/build/Makefile @@ -58,10 +58,13 @@ agones_path := $(realpath $(build_path)/..) kubeconfig_path := $(dir $(KUBECONFIG)) kubeconfig_file := $(notdir $(KUBECONFIG)) +helm_path := ~/.helm + agones_package = agones.dev/agones mount_path = /go/src/$(agones_package) common_mounts = -v $(build_path)/.config/gcloud:/root/.config/gcloud \ -v $(kubeconfig_path):/root/.kube \ + -v $(helm_path):/root/.helm \ -v $(agones_path):$(mount_path) build_tag = agones-build:$(build_version) @@ -271,6 +274,7 @@ clean-build-image: ensure-build-config: -mkdir -p $(kubeconfig_path) -mkdir -p $(build_path)/.config/gcloud + -mkdir -p $(helm_path) # create the build image if it doesn't exist ensure-build-image: ensure-build-config From 1923a41679abec3ff458e9fc9392671cfd3d3ce8 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Tue, 2 Oct 2018 09:26:24 -0700 Subject: [PATCH 15/25] Prioritise Allocation from Nodes with Allocated/Ready GameServers One of the first parts for Node autoscaling (#368) - make sure we essentially bin pack our allocated game servers. This change makes allocation first prioritise allocation from `Nodes` that already have the most `Allocated` `GameServers`, and then in the case of a tie, to the `Nodes` that have the most `Ready` `GameServers`. This sets us up for the next part, such that when we scale down a Fleet, it removes `GameServers` from `Nodes` that have the least `GameServers` on them. --- README.md | 3 + docs/create_fleetautoscaler.md | 2 + docs/fleet_spec.md | 6 ++ docs/scheduling_autoscaling.md | 113 ++++++++++++++++++++++ examples/fleet.yaml | 7 ++ pkg/apis/stable/v1alpha1/fleet.go | 22 +++++ pkg/apis/stable/v1alpha1/fleet_test.go | 2 + pkg/fleetallocation/controller.go | 34 +++---- pkg/fleetallocation/controller_test.go | 95 +++++++++++++++++++ pkg/fleetallocation/find.go | 88 +++++++++++++++++ pkg/fleetallocation/find_test.go | 125 +++++++++++++++++++++++++ test/e2e/fleet_test.go | 41 ++++---- 12 files changed, 505 insertions(+), 33 deletions(-) create mode 100644 docs/scheduling_autoscaling.md create mode 100644 pkg/fleetallocation/find.go create mode 100644 pkg/fleetallocation/find_test.go diff --git a/README.md b/README.md index c0ebb533ee..41b0e90f8f 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,9 @@ Documentation and usage guides on how to develop and host dedicated game servers - [CPP Simple](./examples/cpp-simple) (C++) - C++ example that starts up, stays healthy and then shuts down after 60 seconds. - [Xonotic](./examples/xonotic) - Wraps the SDK around the open source FPS game [Xonotic](http://www.xonotic.org) and hosts it on Agones. +### Advanced +- [Scheduling and Autoscaling](./docs/scheduling_autoscaling.md) + ## Get involved - [Slack](https://join.slack.com/t/agones/shared_invite/enQtMzE5NTE0NzkyOTk1LWQ2ZmY1Mjc4ZDQ4NDJhOGYxYTY2NTY0NjUwNjliYzVhMWFjYjMxM2RlMjg3NGU0M2E0YTYzNDIxNDMyZGNjMjU) diff --git a/docs/create_fleetautoscaler.md b/docs/create_fleetautoscaler.md index 5472446b7e..22577d53c6 100644 --- a/docs/create_fleetautoscaler.md +++ b/docs/create_fleetautoscaler.md @@ -251,4 +251,6 @@ simple-udp-mzhrl-zg9rq Ready 10.30.64.99 [map[name:default port:7745]] ## Next Steps +Read the advanced [Scheduling and Autoscaling](scheduling_autoscaling.md) guide, for more details on autoscaling. + If you want to use your own GameServer container make sure you have properly integrated the [Agones SDK](../sdks/). \ No newline at end of file diff --git a/docs/fleet_spec.md b/docs/fleet_spec.md index 042e017dcf..53909e18af 100644 --- a/docs/fleet_spec.md +++ b/docs/fleet_spec.md @@ -15,6 +15,7 @@ metadata: name: fleet-example spec: replicas: 2 + scheduling: Packed strategy: type: RollingUpdate rollingUpdate: @@ -53,6 +54,11 @@ This is a very common pattern in the Kubernetes ecosystem. The `spec` field is the actual `Fleet` specification and it is composed as follow: - `replicas` is the number of `GameServers` to keep Ready or Allocated in this Fleet +- `scheduling`(⚠️⚠️⚠️ **This is currently a development feature and has not been released** ⚠️⚠️⚠️) defines how GameServers are organised across the cluster. Currently only affects Allocation, but will expand + in future releases. Options include: + "Packed" (default) is aimed at dynamic Kubernetes clusters, such as cloud providers, wherein we want to bin pack + resources. "Distributed" is aimed at static Kubernetes clusters, wherein we want to distribute resources across the entire + cluster. See [Scheduling and Autoscaling](scheduling_autoscaling.md) for more details. - `strategy` is the `GameServer` replacement strategy for when the `GameServer` template is edited. - `type` is replacement strategy for when the GameServer template is changed. Default option is "RollingUpdate", but "Recreate" is also available. - `RollingUpdate` will increment by `maxSurge` value on each iteration, while decrementing by `maxUnavailable` on each iteration, until all GameServers have been switched from one version to another. diff --git a/docs/scheduling_autoscaling.md b/docs/scheduling_autoscaling.md new file mode 100644 index 0000000000..e6747e2448 --- /dev/null +++ b/docs/scheduling_autoscaling.md @@ -0,0 +1,113 @@ +# Scheduling and Autoscaling + +⚠️⚠️⚠️ **This is currently a development feature and has not been released** ⚠️⚠️⚠️ + +> Autoscaling is currently ongoing work within Agones. The work you see here is just the beginning. + + +Table of Contents +================= + +* [Fleet Autoscaling](#fleet-autoscaling) +* [Autoscalng Concepts](#autoscalng-concepts) + * [Allocation Scheduling](#allocation-scheduling) +* [Fleet Scheduling](#fleet-scheduling) + * [Packed](#packed) + * [Allocation Scheduling Strategy](#allocation-scheduling-strategy) + * [Distributed](#distributed) + * [Allocation Scheduling Stategy](#allocation-scheduling-stategy) + +Scheduling and autoscaling go hand in hand, as where in the cluster `GameServers` are provisioned +impacts how to autoscale fleets up and down (or if you would even want to) + +## Fleet Autoscaling + +Fleet autoscaling is currently the only type of autoscaling that exists in Agones. It is also only available as a simple +buffer autoscaling strategy. Have a look at the [Create a Fleet Autoscaler](create_fleetautoscaler.md) quickstart, +and the [Fleet Autoscaler Specification](fleetautoscaler_spec.md) for details. + +Node scaling, and more sophisticated fleet autoscaling will be coming in future releases ([design](https://github.com/GoogleCloudPlatform/agones/issues/368)) + +## Autoscaling Concepts + +To facilitate autoscaling, we need to combine several piece of concepts and functionality, described below. + +### Allocation Scheduling + +Allocation scheduling refers to the order in which `GameServers`, and specifically their backing `Pods` are chosen +from across the Kubernetes cluster within a given `Fleet` when [allocation](./create_fleet.md#4-allocate-a-game-server-from-the-fleet) occurs. + +## Fleet Scheduling + +There are two scheduling strategies for Fleets - each designed for different types of Kubernetes Environments. + +### Packed + +```yaml +apiVersion: "stable.agones.dev/v1alpha1" +kind: Fleet +metadata: + name: simple-udp +spec: + replicas: 100 + scheduling: Packed + template: + spec: + ports: + - containerPort: 7654 + template: + spec: + containers: + - name: simple-udp + image: gcr.io/agones-images/udp-server:0.4 +``` + +This is the *default* Fleet scheduling strategy. It is designed for dynamic Kubernetes environments, wherein you wish +to scale up and down as load increases or decreases, such as in a Cloud environment where you are paying +for the infrastructure you use. + +It attempts to _pack_ as much as possible into the smallest set of nodes, to make +scaling infrastructure down as easy as possible. + +Currently, Allocation scheduling is the only aspect this strategy affects, but in future releases it will +also affect `GameServer` `Pod` scheduling, and `Fleet` scale down scheduling as well. + +#### Allocation Scheduling Strategy + +Under the "Packed" strategy, allocation will prioritise allocating `GameServers` to nodes that are running on +Nodes that already have allocated `GameServers` running on them. + +### Distributed + +```yaml +apiVersion: "stable.agones.dev/v1alpha1" +kind: Fleet +metadata: + name: simple-udp +spec: + replicas: 100 + scheduling: Distributed + template: + spec: + ports: + - containerPort: 7654 + template: + spec: + containers: + - name: simple-udp + image: gcr.io/agones-images/udp-server:0.4 +``` + +This Fleet scheduling strategy is designed for static Kubernetes environments, such as when you are running Kubernetes +on bare metal, and the cluster size rarely changes, if at all. + +This attempts to distribute the load across the entire cluster as much as possible, to take advantage of the static +size of the cluster. + +Currently, the only thing the scheduling strategy affects is Allocation scheduling, but in future releases it will +also affect `GameServer` `Pod` scheduling, and `Fleet` scaledown scheduling as well. + +#### Allocation Scheduling Strategy + +Under the "Distributed" strategy, allocation will prioritise allocating `GameSerers` to nodes that have the least +number of allocated `GameServers` on them. diff --git a/examples/fleet.yaml b/examples/fleet.yaml index ebcb2e71c9..7ef1eec9c5 100644 --- a/examples/fleet.yaml +++ b/examples/fleet.yaml @@ -27,6 +27,13 @@ metadata: spec: # the number of GameServers to keep Ready or Allocated in this Fleet replicas: 2 + # defines how GameServers are organised across the cluster. Currently only affects Allocation, but will expand + # in future releases. Options include: + # "Packed" (default) is aimed at dynamic Kubernetes clusters, such as cloud providers, wherein we want to bin pack + # resources + # "Distributed" is aimed at static Kubernetes clusters, wherein we want to distribute resources across the entire + # cluster + scheduling: Packed # a GameServer template - see: # https://github.com/GoogleCloudPlatform/agones/blob/master/docs/gameserver_spec.md for all the options strategy: diff --git a/pkg/apis/stable/v1alpha1/fleet.go b/pkg/apis/stable/v1alpha1/fleet.go index a74c05d520..cefe2dc254 100644 --- a/pkg/apis/stable/v1alpha1/fleet.go +++ b/pkg/apis/stable/v1alpha1/fleet.go @@ -22,11 +22,27 @@ import ( ) const ( + // Packed scheduling strategy will prioritise allocating GameServers + // on Nodes with the most Allocated, and then Ready GameServers + // to bin pack as many Allocated GameServers on a single node. + // This is most useful for dynamic Kubernetes clusters - such as on Cloud Providers. + // In future versions, this will also impact Fleet scale down, and Pod Scheduling. + Packed SchedulingStrategy = "Packed" + + // Distributed scheduling strategy will prioritise allocating GameServers + // on Nodes with the least Allocated, and then Ready GameServers + // to distribute Allocated GameServers across many nodes. + // This is most useful for statically sized Kubernetes clusters - such as on physical hardware. + // In future versions, this will also impact Fleet scale down, and Pod Scheduling. + Distributed SchedulingStrategy = "Distributed" + // FleetGameServerSetLabel is the label that the name of the Fleet // is set to on the GameServerSet the Fleet controls FleetGameServerSetLabel = stable.GroupName + "/fleet" ) +type SchedulingStrategy string + // +genclient // +genclient:noStatus // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object @@ -56,6 +72,8 @@ type FleetSpec struct { Replicas int32 `json:"replicas"` // Deployment strategy Strategy appsv1.DeploymentStrategy `json:"strategy"` + // Scheduling strategy. Defaults to "Packed". + Scheduling SchedulingStrategy `json:"scheduling"` // Template the GameServer template to apply for this Fleet Template GameServerTemplateSpec `json:"template"` } @@ -105,6 +123,10 @@ func (f *Fleet) ApplyDefaults() { f.Spec.Strategy.Type = appsv1.RollingUpdateDeploymentStrategyType } + if f.Spec.Scheduling == "" { + f.Spec.Scheduling = Packed + } + if f.Spec.Strategy.Type == appsv1.RollingUpdateDeploymentStrategyType { if f.Spec.Strategy.RollingUpdate == nil { f.Spec.Strategy.RollingUpdate = &appsv1.RollingUpdateDeployment{} diff --git a/pkg/apis/stable/v1alpha1/fleet_test.go b/pkg/apis/stable/v1alpha1/fleet_test.go index 46e13eb3a0..f2581c854b 100644 --- a/pkg/apis/stable/v1alpha1/fleet_test.go +++ b/pkg/apis/stable/v1alpha1/fleet_test.go @@ -60,11 +60,13 @@ func TestFleetApplyDefaults(t *testing.T) { // gate assert.EqualValues(t, "", f.Spec.Strategy.Type) + assert.EqualValues(t, "", f.Spec.Scheduling) f.ApplyDefaults() assert.Equal(t, appsv1.RollingUpdateDeploymentStrategyType, f.Spec.Strategy.Type) assert.Equal(t, "25%", f.Spec.Strategy.RollingUpdate.MaxUnavailable.String()) assert.Equal(t, "25%", f.Spec.Strategy.RollingUpdate.MaxSurge.String()) + assert.Equal(t, Packed, f.Spec.Scheduling) } func TestFleetUpperBoundReplicas(t *testing.T) { diff --git a/pkg/fleetallocation/controller.go b/pkg/fleetallocation/controller.go index 2666011b4f..3edbcd6a48 100644 --- a/pkg/fleetallocation/controller.go +++ b/pkg/fleetallocation/controller.go @@ -20,7 +20,7 @@ import ( "sync" "agones.dev/agones/pkg/apis/stable" - stablev1alpha1 "agones.dev/agones/pkg/apis/stable/v1alpha1" + "agones.dev/agones/pkg/apis/stable/v1alpha1" "agones.dev/agones/pkg/client/clientset/versioned" getterv1alpha1 "agones.dev/agones/pkg/client/clientset/versioned/typed/stable/v1alpha1" "agones.dev/agones/pkg/client/informers/externalversions" @@ -95,7 +95,7 @@ func NewController( eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) c.recorder = eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "fleetallocation-controller"}) - kind := stablev1alpha1.Kind("FleetAllocation") + kind := v1alpha1.Kind("FleetAllocation") wh.AddHandler("/mutate", kind, admv1beta1.Create, c.creationMutationHandler) wh.AddHandler("/validate", kind, admv1beta1.Create, c.creationValidationHandler) wh.AddHandler("/validate", kind, admv1beta1.Update, c.mutationValidationHandler) @@ -120,7 +120,7 @@ func (c *Controller) Run(workers int, stop <-chan struct{}) error { func (c *Controller) creationMutationHandler(review admv1beta1.AdmissionReview) (admv1beta1.AdmissionReview, error) { c.logger.WithField("review", review).Info("creationMutationHandler") obj := review.Request.Object - fa := &stablev1alpha1.FleetAllocation{} + fa := &v1alpha1.FleetAllocation{} err := json.Unmarshal(obj.Raw, fa) if err != nil { @@ -157,10 +157,10 @@ func (c *Controller) creationMutationHandler(review admv1beta1.AdmissionReview) } // When a GameServer is deleted, the FleetAllocation should go with it - ref := metav1.NewControllerRef(gs, stablev1alpha1.SchemeGroupVersion.WithKind("GameServer")) + ref := metav1.NewControllerRef(gs, v1alpha1.SchemeGroupVersion.WithKind("GameServer")) fa.ObjectMeta.OwnerReferences = append(fa.ObjectMeta.OwnerReferences, *ref) - fa.Status = stablev1alpha1.FleetAllocationStatus{GameServer: gs} + fa.Status = v1alpha1.FleetAllocationStatus{GameServer: gs} newFA, err := json.Marshal(fa) if err != nil { @@ -191,7 +191,7 @@ func (c *Controller) creationMutationHandler(review admv1beta1.AdmissionReview) func (c *Controller) creationValidationHandler(review admv1beta1.AdmissionReview) (admv1beta1.AdmissionReview, error) { c.logger.WithField("review", review).Info("creationValidationHandler") obj := review.Request.Object - fa := &stablev1alpha1.FleetAllocation{} + fa := &v1alpha1.FleetAllocation{} if err := json.Unmarshal(obj.Raw, fa); err != nil { return review, errors.Wrapf(err, "error unmarshalling original FleetAllocation json: %s", obj.Raw) } @@ -225,8 +225,8 @@ func (c *Controller) creationValidationHandler(review admv1beta1.AdmissionReview func (c *Controller) mutationValidationHandler(review admv1beta1.AdmissionReview) (admv1beta1.AdmissionReview, error) { c.logger.WithField("review", review).Info("mutationValidationHandler") - newFA := &stablev1alpha1.FleetAllocation{} - oldFA := &stablev1alpha1.FleetAllocation{} + newFA := &v1alpha1.FleetAllocation{} + oldFA := &v1alpha1.FleetAllocation{} if err := json.Unmarshal(review.Request.Object.Raw, newFA); err != nil { return review, errors.Wrapf(err, "error unmarshalling new FleetAllocation json: %s", review.Request.Object.Raw) @@ -256,8 +256,8 @@ func (c *Controller) mutationValidationHandler(review admv1beta1.AdmissionReview } // allocate allocated a GameServer from a given Fleet -func (c *Controller) allocate(f *stablev1alpha1.Fleet, fam *stablev1alpha1.FleetAllocationMeta) (*stablev1alpha1.GameServer, error) { - var allocation *stablev1alpha1.GameServer +func (c *Controller) allocate(f *v1alpha1.Fleet, fam *v1alpha1.FleetAllocationMeta) (*v1alpha1.GameServer, error) { + var allocation *v1alpha1.GameServer // can only allocate one at a time, as we don't want two separate processes // trying to allocate the same GameServer to different clients c.allocationMutex.Lock() @@ -272,11 +272,11 @@ func (c *Controller) allocate(f *stablev1alpha1.Fleet, fam *stablev1alpha1.Fleet return allocation, err } - for _, gs := range gsList { - if gs.Status.State == stablev1alpha1.Ready && gs.ObjectMeta.DeletionTimestamp.IsZero() { - allocation = gs - break - } + switch f.Spec.Scheduling { + case v1alpha1.Packed: + allocation = findReadyGameServerForAllocation(gsList, packedComparator) + case v1alpha1.Distributed: + allocation = findReadyGameServerForAllocation(gsList, distributedComparator) } if allocation == nil { @@ -284,7 +284,7 @@ func (c *Controller) allocate(f *stablev1alpha1.Fleet, fam *stablev1alpha1.Fleet } gsCopy := allocation.DeepCopy() - gsCopy.Status.State = stablev1alpha1.Allocated + gsCopy.Status.State = v1alpha1.Allocated if fam != nil { c.patchMetadata(gsCopy, fam) @@ -300,7 +300,7 @@ func (c *Controller) allocate(f *stablev1alpha1.Fleet, fam *stablev1alpha1.Fleet } // patch the labels and annotations of an allocated GameServer with metadata from a FleetAllocation -func (c *Controller) patchMetadata(gs *stablev1alpha1.GameServer, fam *stablev1alpha1.FleetAllocationMeta) { +func (c *Controller) patchMetadata(gs *v1alpha1.GameServer, fam *v1alpha1.FleetAllocationMeta) { // patch ObjectMeta labels if fam.Labels != nil { if gs.ObjectMeta.Labels == nil { diff --git a/pkg/fleetallocation/controller_test.go b/pkg/fleetallocation/controller_test.go index 51e2a37e37..ea4c518ac9 100644 --- a/pkg/fleetallocation/controller_test.go +++ b/pkg/fleetallocation/controller_test.go @@ -141,6 +141,8 @@ func TestControllerMutationValidationHandler(t *testing.T) { } func TestControllerAllocate(t *testing.T) { + t.Parallel() + f, gsSet, gsList := defaultFixtures(4) c, m := newFakeController() n := metav1.Now() @@ -210,6 +212,98 @@ func TestControllerAllocate(t *testing.T) { assert.False(t, updated) } +func TestControllerAllocatePriority(t *testing.T) { + t.Parallel() + + n1 := "node1" + n2 := "node2" + + run := func(t *testing.T, name string, test func(t *testing.T, c *Controller, fleet *v1alpha1.Fleet)) { + f, gsSet, gsList := defaultFixtures(4) + c, m := newFakeController() + + gsList[0].Status.NodeName = n1 + gsList[1].Status.NodeName = n2 + gsList[2].Status.NodeName = n1 + gsList[3].Status.NodeName = n1 + + m.AgonesClient.AddReactor("list", "fleets", func(action k8stesting.Action) (bool, runtime.Object, error) { + return true, &v1alpha1.FleetList{Items: []v1alpha1.Fleet{*f}}, nil + }) + m.AgonesClient.AddReactor("list", "gameserversets", func(action k8stesting.Action) (bool, runtime.Object, error) { + return true, &v1alpha1.GameServerSetList{Items: []v1alpha1.GameServerSet{*gsSet}}, nil + }) + m.AgonesClient.AddReactor("list", "gameservers", func(action k8stesting.Action) (bool, runtime.Object, error) { + return true, &v1alpha1.GameServerList{Items: gsList}, nil + }) + + gsWatch := watch.NewFake() + m.AgonesClient.AddWatchReactor("gameservers", k8stesting.DefaultWatchReactor(gsWatch, nil)) + m.AgonesClient.AddReactor("update", "gameservers", func(action k8stesting.Action) (bool, runtime.Object, error) { + ua := action.(k8stesting.UpdateAction) + gs := ua.GetObject().(*v1alpha1.GameServer) + gsWatch.Modify(gs) + return true, gs, nil + }) + + _, cancel := agtesting.StartInformers(m) + defer cancel() + + t.Run(name, func(t *testing.T) { + test(t, c, f) + }) + } + + run(t, "packed", func(t *testing.T, c *Controller, f *v1alpha1.Fleet) { + // priority should be node1, then node2 + gs, err := c.allocate(f, nil) + assert.Nil(t, err) + assert.Equal(t, n1, gs.Status.NodeName) + + gs, err = c.allocate(f, nil) + assert.Nil(t, err) + assert.Equal(t, n1, gs.Status.NodeName) + + gs, err = c.allocate(f, nil) + assert.Nil(t, err) + assert.Equal(t, n1, gs.Status.NodeName) + + gs, err = c.allocate(f, nil) + assert.Nil(t, err) + assert.Equal(t, n2, gs.Status.NodeName) + + // should have none left + _, err = c.allocate(f, nil) + assert.NotNil(t, err) + }) + + run(t, "distributed", func(t *testing.T, c *Controller, f *v1alpha1.Fleet) { + // make a copy, to avoid the race check + f = f.DeepCopy() + f.Spec.Scheduling = v1alpha1.Distributed + // should go node2, then node1 + gs, err := c.allocate(f, nil) + assert.Nil(t, err) + assert.Equal(t, n2, gs.Status.NodeName) + + gs, err = c.allocate(f, nil) + assert.Nil(t, err) + assert.Equal(t, n1, gs.Status.NodeName) + + gs, err = c.allocate(f, nil) + assert.Nil(t, err) + assert.Equal(t, n1, gs.Status.NodeName) + + gs, err = c.allocate(f, nil) + assert.Nil(t, err) + assert.Equal(t, n1, gs.Status.NodeName) + + // should have none left + _, err = c.allocate(f, nil) + assert.NotNil(t, err) + }) +} + func TestControllerAllocateMutex(t *testing.T) { t.Parallel() @@ -270,6 +364,7 @@ func defaultFixtures(gsLen int) (*v1alpha1.Fleet, *v1alpha1.GameServerSet, []v1a Template: v1alpha1.GameServerTemplateSpec{}, }, } + f.ApplyDefaults() gsSet := f.GameServerSet() gsSet.ObjectMeta.Name = "gsSet1" var gsList []v1alpha1.GameServer diff --git a/pkg/fleetallocation/find.go b/pkg/fleetallocation/find.go new file mode 100644 index 0000000000..ba1a8501af --- /dev/null +++ b/pkg/fleetallocation/find.go @@ -0,0 +1,88 @@ +// Copyright 2018 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fleetallocation + +import ( + "agones.dev/agones/pkg/apis/stable/v1alpha1" +) + +// nodeCount is just a convenience data structure for +// keeping relevant GameServer counts about Nodes +type nodeCount struct { + ready int64 + allocated int64 +} + +// findReadyGameServerForAllocation is a O(n) implementation to find a GameServer with priority +// defined in the comparator function. +func findReadyGameServerForAllocation(gsList []*v1alpha1.GameServer, comparator func(bestCount, currentCount *nodeCount) bool) *v1alpha1.GameServer { + counts := map[string]*nodeCount{} + // track potential gameservers, one for each node + allocatableGameServers := map[string]*v1alpha1.GameServer{} + + // count up the number of allocated and ready game servers that exist + // also, since we're already looping through, track one Ready GameServer + // per node, so we can use that as a short list to allocate from + for _, gs := range gsList { + if gs.DeletionTimestamp.IsZero() && + (gs.Status.State == v1alpha1.Allocated || gs.Status.State == v1alpha1.Ready) { + _, ok := counts[gs.Status.NodeName] + if !ok { + counts[gs.Status.NodeName] = &nodeCount{} + } + + if gs.Status.State == v1alpha1.Allocated { + counts[gs.Status.NodeName].allocated++ + } else if gs.Status.State == v1alpha1.Ready { + counts[gs.Status.NodeName].ready++ + allocatableGameServers[gs.Status.NodeName] = gs + } + } + } + + // track the best node count + var bestCount *nodeCount + // the current GameServer from the node with the most GameServers (allocated, ready) + var bestGS *v1alpha1.GameServer + + for nodeName, count := range counts { + // count.ready > 0: no reason to check if we don't have ready GameServers on this node + // bestGS == nil: if there is no best GameServer, then this node & GameServer is the always the best + if count.ready > 0 && (bestGS == nil || comparator(bestCount, count)) { + bestCount = count + bestGS = allocatableGameServers[nodeName] + } + } + + return bestGS +} + +// packedComparator prioritises Nodes with GameServers that are allocated, and then Nodes with the most +// Ready GameServers -- this will bin pack allocated game servers together. +func packedComparator(bestCount, currentCount *nodeCount) bool { + if currentCount.allocated == bestCount.allocated && currentCount.ready > bestCount.ready { + return true + } else if currentCount.allocated > bestCount.allocated { + return true + } + + return false +} + +// distributedComparator is the inverse of the packed comparator, +// looking to distribute allocated gameservers on as many nodes as possible. +func distributedComparator(bestCount, currentCount *nodeCount) bool { + return !packedComparator(bestCount, currentCount) +} diff --git a/pkg/fleetallocation/find_test.go b/pkg/fleetallocation/find_test.go new file mode 100644 index 0000000000..ce80877b34 --- /dev/null +++ b/pkg/fleetallocation/find_test.go @@ -0,0 +1,125 @@ +// Copyright 2018 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fleetallocation + +import ( + "testing" + + "agones.dev/agones/pkg/apis/stable/v1alpha1" + "github.com/stretchr/testify/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestFindPackedReadyGameServer(t *testing.T) { + t.Parallel() + + t.Run("test one", func(t *testing.T) { + n := metav1.Now() + + gsList := []*v1alpha1.GameServer{ + {ObjectMeta: metav1.ObjectMeta{Name: "gs6", DeletionTimestamp: &n}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs1"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs2"}, Status: v1alpha1.GameServerStatus{NodeName: "node2", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs3"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Allocated}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs4"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Allocated}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs5"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Error}}, + } + + gs := findReadyGameServerForAllocation(gsList, packedComparator) + assert.Equal(t, "node1", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + // mock that the first game server is allocated + gsList[1].Status.State = v1alpha1.Allocated + gs = findReadyGameServerForAllocation(gsList, packedComparator) + assert.Equal(t, "node2", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + gsList[2].Status.State = v1alpha1.Allocated + gs = findReadyGameServerForAllocation(gsList, packedComparator) + assert.Nil(t, gs) + }) + + t.Run("allocation trap", func(t *testing.T) { + gsList := []*v1alpha1.GameServer{ + {ObjectMeta: metav1.ObjectMeta{Name: "gs1"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Allocated}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs2"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Allocated}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs3"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Allocated}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs4"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Allocated}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs5"}, Status: v1alpha1.GameServerStatus{NodeName: "node2", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs6"}, Status: v1alpha1.GameServerStatus{NodeName: "node2", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs7"}, Status: v1alpha1.GameServerStatus{NodeName: "node2", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs8"}, Status: v1alpha1.GameServerStatus{NodeName: "node2", State: v1alpha1.Ready}}, + } + + gs := findReadyGameServerForAllocation(gsList, packedComparator) + assert.Equal(t, "node2", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + }) +} + +func TestFindDistributedReadyGameServer(t *testing.T) { + t.Parallel() + + n := metav1.Now() + gsList := []*v1alpha1.GameServer{ + {ObjectMeta: metav1.ObjectMeta{Name: "gs6", DeletionTimestamp: &n}, Status: v1alpha1.GameServerStatus{NodeName: "node3", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs1"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs2"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs3"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs4"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Error}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs5"}, Status: v1alpha1.GameServerStatus{NodeName: "node2", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs6"}, Status: v1alpha1.GameServerStatus{NodeName: "node2", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs7"}, Status: v1alpha1.GameServerStatus{NodeName: "node3", State: v1alpha1.Ready}}, + } + + gs := findReadyGameServerForAllocation(gsList, distributedComparator) + assert.Equal(t, "node3", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + + gsList[7].Status.State = v1alpha1.Allocated + + gs = findReadyGameServerForAllocation(gsList, distributedComparator) + assert.Equal(t, "node2", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + + gsList[5].Status.State = v1alpha1.Allocated + assert.Equal(t, "node2", gsList[5].Status.NodeName) + + gs = findReadyGameServerForAllocation(gsList, distributedComparator) + assert.Equal(t, "node1", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + + gsList[1].Status.State = v1alpha1.Allocated + + gs = findReadyGameServerForAllocation(gsList, distributedComparator) + assert.Equal(t, "node2", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + + gsList[6].Status.State = v1alpha1.Allocated + + gs = findReadyGameServerForAllocation(gsList, distributedComparator) + assert.Equal(t, "node1", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + + gsList[2].Status.State = v1alpha1.Allocated + + gs = findReadyGameServerForAllocation(gsList, distributedComparator) + assert.Equal(t, "node1", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + + gsList[3].Status.State = v1alpha1.Allocated + + gs = findReadyGameServerForAllocation(gsList, distributedComparator) + assert.Nil(t, gs) +} diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go index f655756108..9480bcfaae 100644 --- a/test/e2e/fleet_test.go +++ b/test/e2e/fleet_test.go @@ -39,25 +39,34 @@ const ( func TestCreateFleetAndAllocate(t *testing.T) { t.Parallel() - fleets := framework.AgonesClient.StableV1alpha1().Fleets(defaultNs) - flt, err := fleets.Create(defaultFleet()) - if assert.Nil(t, err) { - defer fleets.Delete(flt.ObjectMeta.Name, nil) // nolint:errcheck - } + fixtures := []v1alpha1.SchedulingStrategy{v1alpha1.Packed, v1alpha1.Distributed} + + for _, strategy := range fixtures { + t.Run(string(strategy), func(t *testing.T) { + fleets := framework.AgonesClient.StableV1alpha1().Fleets(defaultNs) + fleet := defaultFleet() + fleet.Spec.Scheduling = strategy + flt, err := fleets.Create(fleet) + if assert.Nil(t, err) { + defer fleets.Delete(flt.ObjectMeta.Name, nil) // nolint:errcheck + } - err = framework.WaitForFleetCondition(flt, e2e.FleetReadyCount(flt.Spec.Replicas)) - assert.Nil(t, err, "fleet not ready") + err = framework.WaitForFleetCondition(flt, e2e.FleetReadyCount(flt.Spec.Replicas)) + assert.Nil(t, err, "fleet not ready") - fa := &v1alpha1.FleetAllocation{ - ObjectMeta: metav1.ObjectMeta{GenerateName: "allocatioon-", Namespace: defaultNs}, - Spec: v1alpha1.FleetAllocationSpec{ - FleetName: flt.ObjectMeta.Name, - }, - } + fa := &v1alpha1.FleetAllocation{ + ObjectMeta: metav1.ObjectMeta{GenerateName: "allocatioon-", Namespace: defaultNs}, + Spec: v1alpha1.FleetAllocationSpec{ + FleetName: flt.ObjectMeta.Name, + }, + } - fa, err = framework.AgonesClient.StableV1alpha1().FleetAllocations(defaultNs).Create(fa) - assert.Nil(t, err) - assert.Equal(t, v1alpha1.Allocated, fa.Status.GameServer.Status.State) + fa, err = framework.AgonesClient.StableV1alpha1().FleetAllocations(defaultNs).Create(fa) + assert.Nil(t, err) + assert.Equal(t, v1alpha1.Allocated, fa.Status.GameServer.Status.State) + }) + + } } func TestScaleFleetUpAndDownWithAllocation(t *testing.T) { From c30c70cbdf12173cb92371380060bcac0e40c6b6 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Wed, 17 Oct 2018 11:21:51 -0700 Subject: [PATCH 16/25] Specify CPU Request for the SDK Server Sidecar This provides the mechanism (and defaults) for being able to set both the CPU request, and CPU limits for the SDK Server `GameServer` sidecar. I've only set the Request level, as it seems that the major issue is not CPU usage, but actually how the scheduler allots space for the sidecar (by default 100/0.1 vCPU is alloted to each container. After discussion, the CPU request has been set to 30m, but is also configurable via the helm chart. I've not set a CPU limit, as I found when setting a low (<= 20m) CPU limit on the sidecar it mostly stopped working. But if people want to experiment with this, it is also configurable via the Helm chart. Closes #344 --- README.md | 1 + cmd/controller/main.go | 94 +++++++++++-------- docs/limiting_resources.md | 57 +++++++++++ install/helm/README.md | 4 + install/helm/agones/templates/controller.yaml | 12 ++- install/helm/agones/values.yaml | 2 + install/yaml/install.yaml | 12 ++- pkg/gameservers/controller.go | 22 ++++- pkg/gameservers/controller_test.go | 5 +- pkg/testing/controller.go | 3 +- pkg/util/webhooks/webhooks_test.go | 5 +- pkg/util/workerqueue/workerqueue_test.go | 5 +- 12 files changed, 162 insertions(+), 60 deletions(-) create mode 100644 docs/limiting_resources.md diff --git a/README.md b/README.md index 41b0e90f8f..e3927788f6 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,7 @@ Documentation and usage guides on how to develop and host dedicated game servers ### Advanced - [Scheduling and Autoscaling](./docs/scheduling_autoscaling.md) +- [Limiting CPU/Memory](./docs/limiting_resources.md) ## Get involved diff --git a/cmd/controller/main.go b/cmd/controller/main.go index dbb73378e5..be83cb7f38 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -40,20 +40,23 @@ import ( "github.com/spf13/pflag" "github.com/spf13/viper" extclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" ) const ( - sidecarFlag = "sidecar" - pullSidecarFlag = "always-pull-sidecar" - minPortFlag = "min-port" - maxPortFlag = "max-port" - certFileFlag = "cert-file" - keyFileFlag = "key-file" - workers = 2 - defaultResync = 30 * time.Second + sidecarImageFlag = "sidecar-image" + sidecarCPURequestFlag = "sidecar-cpu-request" + sidecarCPULimitFlag = "sidecar-cpu-limit" + pullSidecarFlag = "always-pull-sidecar" + minPortFlag = "min-port" + maxPortFlag = "max-port" + certFileFlag = "cert-file" + keyFileFlag = "key-file" + workers = 2 + defaultResync = 30 * time.Second ) var ( @@ -63,6 +66,9 @@ var ( // main starts the operator for the gameserver CRD func main() { ctlConf := parseEnvFlags() + logger.WithField("version", pkg.Version). + WithField("ctlConf", ctlConf).Info("starting gameServer operator...") + if err := ctlConf.validate(); err != nil { logger.WithError(err).Fatal("Could not create controller from environment or flags") } @@ -88,14 +94,15 @@ func main() { } health := healthcheck.NewHandler() - wh := webhooks.NewWebHook(ctlConf.certFile, ctlConf.keyFile) + wh := webhooks.NewWebHook(ctlConf.CertFile, ctlConf.KeyFile) agonesInformerFactory := externalversions.NewSharedInformerFactory(agonesClient, defaultResync) kubeInformationFactory := informers.NewSharedInformerFactory(kubeClient, defaultResync) allocationMutex := &sync.Mutex{} gsController := gameservers.NewController(wh, health, allocationMutex, - ctlConf.minPort, ctlConf.maxPort, ctlConf.sidecarImage, ctlConf.alwaysPullSidecar, + ctlConf.MinPort, ctlConf.MaxPort, ctlConf.SidecarImage, ctlConf.AlwaysPullSidecar, + ctlConf.SidecarCPURequest, ctlConf.SidecarCPULimit, kubeClient, kubeInformationFactory, extClient, agonesClient, agonesInformerFactory) gsSetController := gameserversets.NewController(wh, health, allocationMutex, kubeClient, extClient, agonesClient, agonesInformerFactory) @@ -132,12 +139,16 @@ func parseEnvFlags() config { } base := filepath.Dir(exec) - viper.SetDefault(sidecarFlag, "gcr.io/agones-images/agones-sdk:"+pkg.Version) + viper.SetDefault(sidecarImageFlag, "gcr.io/agones-images/agones-sdk:"+pkg.Version) + viper.SetDefault(sidecarCPURequestFlag, "0") + viper.SetDefault(sidecarCPULimitFlag, "0") viper.SetDefault(pullSidecarFlag, false) viper.SetDefault(certFileFlag, filepath.Join(base, "certs/server.crt")) viper.SetDefault(keyFileFlag, filepath.Join(base, "certs/server.key")) - pflag.String(sidecarFlag, viper.GetString(sidecarFlag), "Flag to overwrite the GameServer sidecar image that is used. Can also use SIDECAR env variable") + pflag.String(sidecarImageFlag, viper.GetString(sidecarImageFlag), "Flag to overwrite the GameServer sidecar image that is used. Can also use SIDECAR env variable") + pflag.String(sidecarCPULimitFlag, viper.GetString(sidecarCPULimitFlag), "Flag to overwrite the GameServer sidecar container's cpu limit. Can also use SIDECAR_CPU_LIMIT env variable") + pflag.String(sidecarCPURequestFlag, viper.GetString(sidecarCPURequestFlag), "Flag to overwrite the GameServer sidecar container's cpu request. Can also use SIDECAR_CPU_REQUEST env variable") pflag.Bool(pullSidecarFlag, viper.GetBool(pullSidecarFlag), "For development purposes, set the sidecar image to have a ImagePullPolicy of Always. Can also use ALWAYS_PULL_SIDECAR env variable") pflag.Int32(minPortFlag, 0, "Required. The minimum port that that a GameServer can be allocated to. Can also use MIN_PORT env variable.") pflag.Int32(maxPortFlag, 0, "Required. The maximum port that that a GameServer can be allocated to. Can also use MAX_PORT env variable") @@ -146,7 +157,9 @@ func parseEnvFlags() config { pflag.Parse() viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_")) - runtime.Must(viper.BindEnv(sidecarFlag)) + runtime.Must(viper.BindEnv(sidecarImageFlag)) + runtime.Must(viper.BindEnv(sidecarCPULimitFlag)) + runtime.Must(viper.BindEnv(sidecarCPURequestFlag)) runtime.Must(viper.BindEnv(pullSidecarFlag)) runtime.Must(viper.BindEnv(minPortFlag)) runtime.Must(viper.BindEnv(maxPortFlag)) @@ -154,47 +167,46 @@ func parseEnvFlags() config { runtime.Must(viper.BindEnv(certFileFlag)) runtime.Must(viper.BindPFlags(pflag.CommandLine)) - minPort := int32(viper.GetInt64(minPortFlag)) - maxPort := int32(viper.GetInt64(maxPortFlag)) - sidecarImage := viper.GetString(sidecarFlag) - alwaysPullSidecar := viper.GetBool(pullSidecarFlag) - keyFile := viper.GetString(keyFileFlag) - certFile := viper.GetString(certFileFlag) - - logger.WithField(sidecarFlag, sidecarImage). - WithField("minPort", minPort). - WithField("maxPort", maxPort). - WithField(keyFileFlag, keyFile). - WithField(certFileFlag, certFile). - WithField("alwaysPullSidecarImage", alwaysPullSidecar). - WithField("Version", pkg.Version).Info("starting gameServer operator...") + request, err := resource.ParseQuantity(viper.GetString(sidecarCPURequestFlag)) + if err != nil { + logger.WithError(err).Fatalf("could not parse %s", sidecarCPURequestFlag) + } + + limit, err := resource.ParseQuantity(viper.GetString(sidecarCPULimitFlag)) + if err != nil { + logger.WithError(err).Fatalf("could not parse %s", sidecarCPULimitFlag) + } return config{ - minPort: minPort, - maxPort: maxPort, - sidecarImage: sidecarImage, - alwaysPullSidecar: alwaysPullSidecar, - keyFile: keyFile, - certFile: certFile, + MinPort: int32(viper.GetInt64(minPortFlag)), + MaxPort: int32(viper.GetInt64(maxPortFlag)), + SidecarImage: viper.GetString(sidecarImageFlag), + SidecarCPURequest: request, + SidecarCPULimit: limit, + AlwaysPullSidecar: viper.GetBool(pullSidecarFlag), + KeyFile: viper.GetString(keyFileFlag), + CertFile: viper.GetString(certFileFlag), } } // config stores all required configuration to create a game server controller. type config struct { - minPort int32 - maxPort int32 - sidecarImage string - alwaysPullSidecar bool - keyFile string - certFile string + MinPort int32 + MaxPort int32 + SidecarImage string + SidecarCPURequest resource.Quantity + SidecarCPULimit resource.Quantity + AlwaysPullSidecar bool + KeyFile string + CertFile string } // validate ensures the ctlConfig data is valid. func (c config) validate() error { - if c.minPort <= 0 || c.maxPort <= 0 { + if c.MinPort <= 0 || c.MaxPort <= 0 { return errors.New("min Port and Max Port values are required") } - if c.maxPort < c.minPort { + if c.MaxPort < c.MinPort { return errors.New("max Port cannot be set less that the Min Port") } return nil diff --git a/docs/limiting_resources.md b/docs/limiting_resources.md new file mode 100644 index 0000000000..6f3b3c7b3b --- /dev/null +++ b/docs/limiting_resources.md @@ -0,0 +1,57 @@ +# Limiting CPU & Memory + +Kubernetes natively has inbuilt capabilities for requesting and limiting both CPU and Memory usage of running containers. + +As a short description: + +- CPU `Requests` are limits that are applied when there is CPU congestion, and as such can burst above their set limits. +- CPU `Limits` are hard limits on how much CPU time the particular container gets access to. + +This is useful for game servers, not just as a mechanism to distribute compute resources evenly, but also as a way +to advice the Kubernetes scheduler how many game server processes it is able to fit into a given node in the cluster. + +It's worth reading the [Managing Compute Resources for Containers](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) +Kubernetes documentation for more details on "requests" and "limits" to both CPU and Memory, and how to configure them. + +## GameServers + +Since the `GameServer` specification provides a full [`PodSpecTemplate`](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.10/#podtemplatespec-v1-core), +we can take advantage of both resource limits and requests in our `GameServer` configurations. + +For example, to set a CPU limit on our `GameServer` configuration of 250m/0.25 of a cpu, +we could do so as followed: + +```yaml +apiVersion: "stable.agones.dev/v1alpha1" +kind: GameServer +metadata: + name: "simple-udp" +spec: + ports: + - name: default + portPolicy: "dynamic" + containerPort: 7654 + template: + spec: + containers: + - name: simple-udp + image: gcr.io/agones-images/udp-server:0.4 + resources: + limit: + cpu: "250m" #this is our limit here +``` + +If you do not set a limit or request, the default is set my Kubernetes at a 100m CPU request. + +## SDK GameServer sidecar + +⚠️⚠️⚠️ **This is currently a development feature and has not been released** ⚠️⚠️⚠️ + +You may also want to tweak the CPU request or limits on the SDK `GameServer` sidecar process that spins up alongside +each game server container. + +You can do this through the [Helm configuration](../install/helm/README.md#configuration) when installing Agones. + +By default, this is set to having a CPU request value of 30m, with no hard CPU limit. This ensures that the sidecar always has enough CPU +to function, but it is configurable in case a lower, or higher value is required on your clusters, or if you desire +hard limit. \ No newline at end of file diff --git a/install/helm/README.md b/install/helm/README.md index 40e8b25692..eb26ed1222 100644 --- a/install/helm/README.md +++ b/install/helm/README.md @@ -92,6 +92,8 @@ The following tables lists the configurable parameters of the Agones chart and t | `agones.image.controller.pullPolicy` | Image pull policy for the controller | `IfNotPresent` | | `agones.image.controller.pullSecret` | Image pull secret for the controller | `` | | `agones.image.sdk.name` | Image name for the sdk | `agones-sdk` | +| `agones.image.sdk.cpuRequest` | (⚠️ Development feature ⚠️) the [cpu request][constraints] for sdk server container | `30m` | +| `agones.image.sdk.cpuLimit` | (⚠️ Development feature ⚠️) the [cpu limit][constraints] for the sdk server container | `0` (none) | | `agones.image.sdk.alwaysPull` | Tells if the sdk image should always be pulled | `false` | | `agones.controller.healthCheck.http.port` | Port to use for liveness probe service | `8080` | | `agones.controller.healthCheck.initialDelaySeconds` | Initial delay before performing the first probe (in seconds) | `3` | @@ -104,6 +106,8 @@ The following tables lists the configurable parameters of the Agones chart and t | `gameservers.minPort` | Minimum port to use for dynamic port allocation | `7000` | | `gameservers.maxPort` | Maximum port to use for dynamic port allocation | `8000` | +[constraints]: https://kubernetes.io/docs/tasks/administer-cluster/manage-resources/cpu-constraint-namespace/ + Specify each parameter using the `--set key=value[,key=value]` argument to `helm install`. For example, ```bash diff --git a/install/helm/agones/templates/controller.yaml b/install/helm/agones/templates/controller.yaml index f6d22c40de..2e35d2cd3a 100644 --- a/install/helm/agones/templates/controller.yaml +++ b/install/helm/agones/templates/controller.yaml @@ -51,16 +51,20 @@ spec: image: "{{ .Values.agones.image.registry }}/{{ .Values.agones.image.controller.name}}:{{ .Values.agones.image.tag }}" imagePullPolicy: {{ .Values.agones.image.controller.pullPolicy }} env: - - name: ALWAYS_PULL_SIDECAR # set the sidecar imagePullPolicy to Always - value: {{ .Values.agones.image.sdk.alwaysPull | quote }} # minimum port that can be exposed to GameServer traffic - name: MIN_PORT - value: {{ .Values.gameservers.minPort | quote }} + value: {{ .Values.gameservers.minPort | quote }} # maximum port that can be exposed to GameServer traffic - name: MAX_PORT value: {{ .Values.gameservers.maxPort | quote }} - - name: SIDECAR # overwrite the GameServer sidecar image that is used + - name: SIDECAR_IMAGE # overwrite the GameServer sidecar image that is used value: "{{ .Values.agones.image.registry }}/{{ .Values.agones.image.sdk.name}}:{{ .Values.agones.image.tag }}" + - name: ALWAYS_PULL_SIDECAR # set the sidecar imagePullPolicy to Always + value: {{ .Values.agones.image.sdk.alwaysPull | quote }} + - name: SIDECAR_CPU_REQUEST + value: {{ .Values.agones.image.sdk.cpuRequest | quote }} + - name: SIDECAR_CPU_LIMIT + value: {{ .Values.agones.image.sdk.cpuLimit | quote }} livenessProbe: httpGet: path: /live diff --git a/install/helm/agones/values.yaml b/install/helm/agones/values.yaml index 80f4a74af9..2d0be0b6e5 100644 --- a/install/helm/agones/values.yaml +++ b/install/helm/agones/values.yaml @@ -37,6 +37,8 @@ agones: pullPolicy: IfNotPresent sdk: name: agones-sdk + cpuRequest: 30m + cpuLimit: 0 alwaysPull: false gameservers: diff --git a/install/yaml/install.yaml b/install/yaml/install.yaml index 2914616756..98d01371a9 100644 --- a/install/yaml/install.yaml +++ b/install/yaml/install.yaml @@ -859,16 +859,20 @@ spec: image: "gcr.io/agones-images/agones-controller:0.6.0-rc" imagePullPolicy: IfNotPresent env: - - name: ALWAYS_PULL_SIDECAR # set the sidecar imagePullPolicy to Always - value: "false" # minimum port that can be exposed to GameServer traffic - name: MIN_PORT - value: "7000" + value: "7000" # maximum port that can be exposed to GameServer traffic - name: MAX_PORT value: "8000" - - name: SIDECAR # overwrite the GameServer sidecar image that is used + - name: SIDECAR_IMAGE # overwrite the GameServer sidecar image that is used value: "gcr.io/agones-images/agones-sdk:0.6.0-rc" + - name: ALWAYS_PULL_SIDECAR # set the sidecar imagePullPolicy to Always + value: "false" + - name: SIDECAR_CPU_REQUEST + value: "30m" + - name: SIDECAR_CPU_LIMIT + value: "0" livenessProbe: httpGet: path: /live diff --git a/pkg/gameservers/controller.go b/pkg/gameservers/controller.go index c634378c13..8d2fe055d5 100644 --- a/pkg/gameservers/controller.go +++ b/pkg/gameservers/controller.go @@ -38,6 +38,7 @@ import ( extclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1beta1" k8serrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/intstr" @@ -59,6 +60,8 @@ type Controller struct { logger *logrus.Entry sidecarImage string alwaysPullSidecarImage bool + sidecarCPURequest resource.Quantity + sidecarCPULimit resource.Quantity crdGetter v1beta1.CustomResourceDefinitionInterface podGetter typedcorev1.PodsGetter podLister corelisterv1.PodLister @@ -71,8 +74,9 @@ type Controller struct { healthController *HealthController workerqueue *workerqueue.WorkerQueue allocationMutex *sync.Mutex - stop <-chan struct{} - recorder record.EventRecorder + stop <-chan struct { + } + recorder record.EventRecorder } // NewController returns a new gameserver crd controller @@ -83,6 +87,8 @@ func NewController( minPort, maxPort int32, sidecarImage string, alwaysPullSidecarImage bool, + sidecarCPURequest resource.Quantity, + sidecarCPULimit resource.Quantity, kubeClient kubernetes.Interface, kubeInformerFactory informers.SharedInformerFactory, extClient extclientset.Interface, @@ -95,6 +101,8 @@ func NewController( c := &Controller{ sidecarImage: sidecarImage, + sidecarCPULimit: sidecarCPULimit, + sidecarCPURequest: sidecarCPURequest, alwaysPullSidecarImage: alwaysPullSidecarImage, allocationMutex: allocationMutex, crdGetter: extClient.ApiextensionsV1beta1().CustomResourceDefinitions(), @@ -463,6 +471,7 @@ func (c *Controller) sidecar(gs *v1alpha1.GameServer) corev1.Container { }, }, }, + Resources: corev1.ResourceRequirements{}, LivenessProbe: &corev1.Probe{ Handler: corev1.Handler{ HTTPGet: &corev1.HTTPGetAction{ @@ -474,6 +483,15 @@ func (c *Controller) sidecar(gs *v1alpha1.GameServer) corev1.Container { PeriodSeconds: 3, }, } + + if !c.sidecarCPURequest.IsZero() { + sidecar.Resources.Requests = corev1.ResourceList{corev1.ResourceCPU: c.sidecarCPURequest} + } + + if !c.sidecarCPULimit.IsZero() { + sidecar.Resources.Limits = corev1.ResourceList{corev1.ResourceCPU: c.sidecarCPULimit} + } + if c.alwaysPullSidecarImage { sidecar.ImagePullPolicy = corev1.PullAlways } diff --git a/pkg/gameservers/controller_test.go b/pkg/gameservers/controller_test.go index 3bac274f3b..325cf5a89d 100644 --- a/pkg/gameservers/controller_test.go +++ b/pkg/gameservers/controller_test.go @@ -32,6 +32,7 @@ import ( admv1beta1 "k8s.io/api/admission/v1beta1" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" @@ -707,6 +708,8 @@ func TestControllerCreateGameServerPod(t *testing.T) { assert.Len(t, pod.Spec.Containers, 2, "Should have a sidecar container") assert.Equal(t, pod.Spec.Containers[1].Image, c.sidecarImage) + assert.Equal(t, pod.Spec.Containers[1].Resources.Limits.Cpu(), &c.sidecarCPULimit) + assert.Equal(t, pod.Spec.Containers[1].Resources.Requests.Cpu(), &c.sidecarCPURequest) assert.Len(t, pod.Spec.Containers[1].Env, 2, "2 env vars") assert.Equal(t, "GAMESERVER_NAME", pod.Spec.Containers[1].Env[0].Name) assert.Equal(t, fixture.ObjectMeta.Name, pod.Spec.Containers[1].Env[0].Value) @@ -1099,7 +1102,7 @@ func newFakeController() (*Controller, agtesting.Mocks) { wh := webhooks.NewWebHook("", "") c := NewController(wh, healthcheck.NewHandler(), &sync.Mutex{}, 10, 20, "sidecar:dev", false, - m.KubeClient, m.KubeInformationFactory, m.ExtClient, m.AgonesClient, m.AgonesInformerFactory) + resource.MustParse("0.05"), resource.MustParse("0.1"), m.KubeClient, m.KubeInformationFactory, m.ExtClient, m.AgonesClient, m.AgonesInformerFactory) c.recorder = m.FakeRecorder return c, m } diff --git a/pkg/testing/controller.go b/pkg/testing/controller.go index dddda75a9f..342a0eb7b1 100644 --- a/pkg/testing/controller.go +++ b/pkg/testing/controller.go @@ -16,9 +16,8 @@ package testing import ( "context" - "time" - gotesting "testing" + "time" agonesfake "agones.dev/agones/pkg/client/clientset/versioned/fake" "agones.dev/agones/pkg/client/informers/externalversions" diff --git a/pkg/util/webhooks/webhooks_test.go b/pkg/util/webhooks/webhooks_test.go index dda5bdec42..d0dded6b9f 100644 --- a/pkg/util/webhooks/webhooks_test.go +++ b/pkg/util/webhooks/webhooks_test.go @@ -15,13 +15,12 @@ package webhooks import ( + "bytes" + "encoding/json" "net/http" "net/http/httptest" "testing" - "bytes" - "encoding/json" - "github.com/stretchr/testify/assert" "k8s.io/api/admission/v1beta1" "k8s.io/apimachinery/pkg/apis/meta/v1" diff --git a/pkg/util/workerqueue/workerqueue_test.go b/pkg/util/workerqueue/workerqueue_test.go index 866f33093b..01e6657d1e 100644 --- a/pkg/util/workerqueue/workerqueue_test.go +++ b/pkg/util/workerqueue/workerqueue_test.go @@ -15,12 +15,11 @@ package workerqueue import ( - "testing" - "time" - "io/ioutil" "net/http" "net/http/httptest" + "testing" + "time" "github.com/heptiolabs/healthcheck" "github.com/sirupsen/logrus" From 8669cf44ff868c1f971219c313626abeaa2c9186 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Fri, 19 Oct 2018 15:01:02 -0700 Subject: [PATCH 17/25] Set test clusters to base version. GKE will work out the rest. Apparently we now don't have to be brittle, and can set the base K8s version we want, and GKE will sort out the rest. Neat. Also updated the permissions to be inline with the documentation. --- build/gke-test-cluster/cluster-e2e.yml | 7 ++++--- build/gke-test-cluster/cluster.yml.jinja | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/build/gke-test-cluster/cluster-e2e.yml b/build/gke-test-cluster/cluster-e2e.yml index dcda65e956..cae41645f9 100644 --- a/build/gke-test-cluster/cluster-e2e.yml +++ b/build/gke-test-cluster/cluster-e2e.yml @@ -19,7 +19,7 @@ resources: cluster: name: e2e-test-cluster description: End to end tests cluster for Agones - initialClusterVersion: 1.10.7-gke.1 + initialClusterVersion: "1.10" nodePools: - name: "default" initialNodeCount: 2 @@ -28,9 +28,10 @@ resources: tags: - game-server oauthScopes: - - https://www.googleapis.com/auth/devstorage.read_only - https://www.googleapis.com/auth/compute - - https://www.googleapis.com/auth/cloud-platform + - https://www.googleapis.com/auth/devstorage.read_only + - https://www.googleapis.com/auth/logging.write + - https://www.googleapis.com/auth/monitoring - name: game-server-firewall type: compute.beta.firewall properties: diff --git a/build/gke-test-cluster/cluster.yml.jinja b/build/gke-test-cluster/cluster.yml.jinja index e244715d65..2b4bff69c6 100644 --- a/build/gke-test-cluster/cluster.yml.jinja +++ b/build/gke-test-cluster/cluster.yml.jinja @@ -21,7 +21,7 @@ resources: cluster: name: {{ properties["cluster.name"] }} description: Test cluster for Agones - initialClusterVersion: 1.10.7-gke.1 + initialClusterVersion: "1.10" nodePools: - name: "default" initialNodeCount: {{ properties["cluster.nodePool.initialNodeCount"] }} @@ -30,9 +30,10 @@ resources: tags: - game-server oauthScopes: - - https://www.googleapis.com/auth/devstorage.read_only - https://www.googleapis.com/auth/compute - - https://www.googleapis.com/auth/cloud-platform + - https://www.googleapis.com/auth/devstorage.read_only + - https://www.googleapis.com/auth/logging.write + - https://www.googleapis.com/auth/monitoring masterAuth: username: admin password: supersecretpassword From 3038778add778ca8a67aea887d866daa9ca5a99b Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Sat, 20 Oct 2018 10:27:57 -0700 Subject: [PATCH 18/25] TOC for the SDK integration and tooling I did this because I thought the local tooling wasn't getting noticed, so a TOC would help bring that to attention, and also make the documentation easier to navigate. Also found some semantic issues with header depth that I also fixed. --- sdks/README.md | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/sdks/README.md b/sdks/README.md index 3728708d37..c4104726ee 100644 --- a/sdks/README.md +++ b/sdks/README.md @@ -1,5 +1,27 @@ # Agones Game Server Client SDKs + +Table of Contents +================= + + * [Overview](#overview) + * [Function Reference](#function-reference) + * [Ready()](#ready) + * [Health()](#health) + * [Shutdown()](#shutdown) + * [SetLabel(key, value)](#setlabelkey-value) + * [SetAnnotation(key, value)](#setannotationkey-value) + * [GameServer()](#gameserver) + * [WatchGameServer(function(gameserver){...})](#watchgameserverfunctiongameserver) + * [Local Development](#local-development) + * [Providing your own GameServer configuration for local development](#providing-your-own-gameserver-configuration-for-local-development) + * [Writing your own SDK](#writing-your-own-sdk) + * [gRPC Client Generation](#grpc-client-generation) + * [REST API Implementation](#rest-api-implementation) + * [Building the Local Tools](#building-the-local-tools) + +## Overview + The SDKs are integration points for game servers with Agones itself. They are required for a game server to work with Agones. @@ -148,17 +170,17 @@ $ ./sdk-server.linux.amd64 --local -f ../../../examples/simple-udp/gameserver.ya {"level":"info","msg":"Starting SDKServer grpc-gateway...","source":"main","time":"2018-08-25T17:56:39-07:00"} ``` -### Writing your own SDK +## Writing your own SDK If there isn't a SDK for the language and platform you are looking for, you have several options: -#### gRPC Client Generation +### gRPC Client Generation If client generation is well supported by [gRPC](https://grpc.io/docs/), then generate a client from the [sdk.proto](../sdk.proto), and look at the current [sdks](.) to see how the wrappers are implemented to make interaction with the SDK server simpler for the user. -#### REST API Implementation +### REST API Implementation If client generation is not well supported by gRPC, or if there are other complicating factors, implement the SDK through the [REST](../docs/sdk_rest_api.md) HTTP+JSON interface. This could be written by hand, or potentially generated from @@ -166,7 +188,7 @@ the [Swagger/OpenAPI Spec](../sdk.swagger.json). Finally, if you build something that would be usable by the community, please submit a pull request! -### Building the Local Tools +## Building the Local Tools If you wish to build the binaries for local development from source the `make` target `build-agones-sdk-binary` will compile the necessary binaries From d48e23d384a9d80cf9bf2b88f7950d7189ab630f Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Wed, 24 Oct 2018 21:44:14 -0700 Subject: [PATCH 19/25] Adding colour to the linter, because colours are pretty. --- build/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/Makefile b/build/Makefile index 93e69cd89a..f8cbd7dde2 100644 --- a/build/Makefile +++ b/build/Makefile @@ -191,7 +191,7 @@ build-controller-binary: $(ensure-build-image) # use LINT_TIMEOUT to manipulate the linter timeout lint: LINT_TIMEOUT ?= 15m lint: $(ensure-build-image) - docker run --rm $(common_mounts) -w $(mount_path) $(DOCKER_RUN_ARGS) $(build_tag) bash -c \ + docker run -t -e "TERM=xterm-256color" --rm $(common_mounts) -w $(mount_path) $(DOCKER_RUN_ARGS) $(build_tag) bash -c \ "golangci-lint run ./examples/... && golangci-lint run --deadline $(LINT_TIMEOUT) ./..." # Build the image for the gameserver controller From 0fba4641e4b2735d2a5a48d812a42676adffe1af Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Mon, 22 Oct 2018 14:04:39 -0700 Subject: [PATCH 20/25] Adding third part videos and presentations. --- README.md | 3 +++ docs/videos_presentations.md | 11 +++++++++++ 2 files changed, 14 insertions(+) create mode 100644 docs/videos_presentations.md diff --git a/README.md b/README.md index e3927788f6..9d0a451241 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,9 @@ Documentation and usage guides on how to develop and host dedicated game servers - [Scheduling and Autoscaling](./docs/scheduling_autoscaling.md) - [Limiting CPU/Memory](./docs/limiting_resources.md) +### Third Party Content +- [Videos and Presentation](./docs/videos_presentations.md) + ## Get involved - [Slack](https://join.slack.com/t/agones/shared_invite/enQtMzE5NTE0NzkyOTk1LWQ2ZmY1Mjc4ZDQ4NDJhOGYxYTY2NTY0NjUwNjliYzVhMWFjYjMxM2RlMjg3NGU0M2E0YTYzNDIxNDMyZGNjMjU) diff --git a/docs/videos_presentations.md b/docs/videos_presentations.md new file mode 100644 index 0000000000..4cffe74106 --- /dev/null +++ b/docs/videos_presentations.md @@ -0,0 +1,11 @@ +# Third Party Videos and Presentations + +Community contributed videos and presentations on Agones. + +## Presentations + +- [Agones: Scaling Multiplayer Dedicated Game Servers with Kubernetes (Cloud Next '18)](https://www.youtube.com/watch?v=CLNpkjolxYA) +- [Google Cloud Next '18 London: Carl Dionne, Development Director at Ubisoft Montreal](https://www.youtube.com/watch?v=5n-JlLA7bIc&feature=youtu.be&list=PLBgogxgQVM9s-lumfvF7qLSqqX5pT8G8Y&t=835) + +## Screencasts +- Agones: How Do I Docker and Kubernetes? [Part 1](https://www.youtube.com/watch?v=i4_zaztK6mE), [Part 2](https://www.youtube.com/watch?v=ZZXLDnmO1TM) \ No newline at end of file From 8bbcecb870ea18c1658208852be7c935c8331018 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Mon, 22 Oct 2018 14:15:01 -0700 Subject: [PATCH 21/25] Make sure do-release always uses the release_registry I often set REGISTRY env var locally, so this catches me often when I do releases. --- build/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/Makefile b/build/Makefile index f8cbd7dde2..c9c40c9346 100644 --- a/build/Makefile +++ b/build/Makefile @@ -325,7 +325,7 @@ do-release: cp $(agones_path)/sdks/cpp/bin/agonessdk-$(RELEASE_VERSION)-src.zip $(agones_path)/release cd $(agones_path) && zip -r ./release/agones-install-$(RELEASE_VERSION).zip ./README.md ./install ./LICENSE $(MAKE) push-chart - $(MAKE) gcloud-auth-docker push VERSION=$(RELEASE_VERSION) + $(MAKE) gcloud-auth-docker push REGISTRY=$(release_registry) VERSION=$(RELEASE_VERSION) git push -u upstream release-$(RELEASE_VERSION) @echo "Now go make the $(RELEASE_VERSION) release on Github!" From 597344e26068bf6e55852247982c826c65582132 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Sun, 21 Oct 2018 18:44:18 -0700 Subject: [PATCH 22/25] This PR sets a preferredDuringSchedulingIgnoredDuringExecution PodAffinity with a HostName topology. This does a pretty decent job of grouping together GameServer Pods. It does tend to distribute more widely when large groups of GameServer Pods get created, but it's worth experimenting with the first, before going the more risky route of a custom scheduler (in which we've already found some issues). We may also find as GameServers shut down at the end of sessions, they start to group together when they reschedule, as at lower load, the scheduler tends to do a better job of packing. Working towards #368 --- docs/scheduling_autoscaling.md | 21 ++++- pkg/apis/stable/v1alpha1/fleet.go | 17 +--- pkg/apis/stable/v1alpha1/fleet_test.go | 4 +- pkg/apis/stable/v1alpha1/gameserver.go | 86 ++++++++++++++++----- pkg/apis/stable/v1alpha1/gameserver_test.go | 70 ++++++++++++----- pkg/apis/stable/v1alpha1/gameserverset.go | 4 + pkg/apis/stable/v1alpha1/scheduling.go | 31 ++++++++ pkg/gameservers/portallocator.go | 1 - 8 files changed, 175 insertions(+), 59 deletions(-) create mode 100644 pkg/apis/stable/v1alpha1/scheduling.go diff --git a/docs/scheduling_autoscaling.md b/docs/scheduling_autoscaling.md index e6747e2448..ffd951c930 100644 --- a/docs/scheduling_autoscaling.md +++ b/docs/scheduling_autoscaling.md @@ -4,7 +4,6 @@ > Autoscaling is currently ongoing work within Agones. The work you see here is just the beginning. - Table of Contents ================= @@ -37,6 +36,12 @@ To facilitate autoscaling, we need to combine several piece of concepts and func Allocation scheduling refers to the order in which `GameServers`, and specifically their backing `Pods` are chosen from across the Kubernetes cluster within a given `Fleet` when [allocation](./create_fleet.md#4-allocate-a-game-server-from-the-fleet) occurs. +### Pod Scheduling + +Each `GameServer` is backed by a Kubernetes [`Pod`](https://kubernetes.io/docs/concepts/workloads/pods/pod/). Pod scheduling +refers to the strategy that is in place that determines which node in the Kubernetes cluster the Pod is assigned to, +when it is created. + ## Fleet Scheduling There are two scheduling strategies for Fleets - each designed for different types of Kubernetes Environments. @@ -77,6 +82,15 @@ also affect `GameServer` `Pod` scheduling, and `Fleet` scale down scheduling as Under the "Packed" strategy, allocation will prioritise allocating `GameServers` to nodes that are running on Nodes that already have allocated `GameServers` running on them. +#### Pod Scheduling Strategy + +Under the "Packed" strategy, Pods will be scheduled using the [`PodAffinity`](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#inter-pod-affinity-and-anti-affinity-beta-feature) +with a `preferredDuringSchedulingIgnoredDuringExecution` affinity with [hostname](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#interlude-built-in-node-labels) +topology. This attempts to group together `GameServer` Pods within as few nodes in the cluster as it can. + +> The default Kubernetes scheduler doesn't do a perfect job of packing, but it's a good enough job for what we need - + at least at this stage. + ### Distributed ```yaml @@ -111,3 +125,8 @@ also affect `GameServer` `Pod` scheduling, and `Fleet` scaledown scheduling as w Under the "Distributed" strategy, allocation will prioritise allocating `GameSerers` to nodes that have the least number of allocated `GameServers` on them. + +#### Pod Scheduling Strategy + +Under the "Distributed" strategy, `Pod` scheduling is provided by the default Kubernetes scheduler, which will attempt +to distribute the `GameServer` `Pods` across as many nodes as possible. \ No newline at end of file diff --git a/pkg/apis/stable/v1alpha1/fleet.go b/pkg/apis/stable/v1alpha1/fleet.go index cefe2dc254..b74ba5115c 100644 --- a/pkg/apis/stable/v1alpha1/fleet.go +++ b/pkg/apis/stable/v1alpha1/fleet.go @@ -22,20 +22,6 @@ import ( ) const ( - // Packed scheduling strategy will prioritise allocating GameServers - // on Nodes with the most Allocated, and then Ready GameServers - // to bin pack as many Allocated GameServers on a single node. - // This is most useful for dynamic Kubernetes clusters - such as on Cloud Providers. - // In future versions, this will also impact Fleet scale down, and Pod Scheduling. - Packed SchedulingStrategy = "Packed" - - // Distributed scheduling strategy will prioritise allocating GameServers - // on Nodes with the least Allocated, and then Ready GameServers - // to distribute Allocated GameServers across many nodes. - // This is most useful for statically sized Kubernetes clusters - such as on physical hardware. - // In future versions, this will also impact Fleet scale down, and Pod Scheduling. - Distributed SchedulingStrategy = "Distributed" - // FleetGameServerSetLabel is the label that the name of the Fleet // is set to on the GameServerSet the Fleet controls FleetGameServerSetLabel = stable.GroupName + "/fleet" @@ -93,7 +79,8 @@ func (f *Fleet) GameServerSet() *GameServerSet { gsSet := &GameServerSet{ ObjectMeta: *f.Spec.Template.ObjectMeta.DeepCopy(), Spec: GameServerSetSpec{ - Template: f.Spec.Template, + Template: f.Spec.Template, + Scheduling: f.Spec.Scheduling, }, } diff --git a/pkg/apis/stable/v1alpha1/fleet_test.go b/pkg/apis/stable/v1alpha1/fleet_test.go index f2581c854b..9860488193 100644 --- a/pkg/apis/stable/v1alpha1/fleet_test.go +++ b/pkg/apis/stable/v1alpha1/fleet_test.go @@ -31,7 +31,8 @@ func TestFleetGameServerSetGameServer(t *testing.T) { UID: "1234", }, Spec: FleetSpec{ - Replicas: 10, + Replicas: 10, + Scheduling: Packed, Template: GameServerTemplateSpec{ Spec: GameServerSpec{ Ports: []GameServerPort{{ContainerPort: 1234}}, @@ -51,6 +52,7 @@ func TestFleetGameServerSetGameServer(t *testing.T) { assert.Equal(t, f.ObjectMeta.Name+"-", gsSet.ObjectMeta.GenerateName) assert.Equal(t, f.ObjectMeta.Name, gsSet.ObjectMeta.Labels[FleetGameServerSetLabel]) assert.Equal(t, int32(0), gsSet.Spec.Replicas) + assert.Equal(t, f.Spec.Scheduling, gsSet.Spec.Scheduling) assert.Equal(t, f.Spec.Template, gsSet.Spec.Template) assert.True(t, v1.IsControlledBy(gsSet, &f)) } diff --git a/pkg/apis/stable/v1alpha1/gameserver.go b/pkg/apis/stable/v1alpha1/gameserver.go index aab38447bc..8e4c11cbb6 100644 --- a/pkg/apis/stable/v1alpha1/gameserver.go +++ b/pkg/apis/stable/v1alpha1/gameserver.go @@ -117,6 +117,8 @@ type GameServerSpec struct { Ports []GameServerPort `json:"ports"` // Health configures health checking Health Health `json:"health,omitempty"` + // Scheduling strategy. Defaults to "Packed". + Scheduling SchedulingStrategy `json:"scheduling"` // Template describes the Pod that will be created for the GameServer Template corev1.PodTemplateSpec `json:"template"` } @@ -182,6 +184,7 @@ func (gs *GameServer) ApplyDefaults() { gs.applyPortDefaults() gs.applyStateDefaults() gs.applyHealthDefaults() + gs.applySchedulingDefaults() } // applyContainerDefaults applues the container defaults @@ -230,6 +233,12 @@ func (gs *GameServer) applyPortDefaults() { } } +func (gs *GameServer) applySchedulingDefaults() { + if gs.Spec.Scheduling == "" { + gs.Spec.Scheduling = Packed + } +} + // Validate validates the GameServer configuration. // If a GameServer is invalid there will be > 0 values in // the returned array @@ -289,6 +298,38 @@ func (gs *GameServer) Pod(sidecars ...corev1.Container) (*corev1.Pod, error) { ObjectMeta: *gs.Spec.Template.ObjectMeta.DeepCopy(), Spec: *gs.Spec.Template.Spec.DeepCopy(), } + + gs.podObjectMeta(pod) + + if pod.Spec.ServiceAccountName == "" { + pod.Spec.ServiceAccountName = SidecarServiceAccountName + } + + i, gsContainer, err := gs.FindGameServerContainer() + // this shouldn't happen, but if it does. + if err != nil { + return pod, err + } + + for _, p := range gs.Spec.Ports { + cp := corev1.ContainerPort{ + ContainerPort: p.ContainerPort, + HostPort: p.HostPort, + Protocol: p.Protocol, + } + gsContainer.Ports = append(gsContainer.Ports, cp) + } + pod.Spec.Containers[i] = gsContainer + + pod.Spec.Containers = append(pod.Spec.Containers, sidecars...) + + gs.podScheduling(pod) + + return pod, nil +} + +// podObjectMeta configures the pod ObjectMeta details +func (gs *GameServer) podObjectMeta(pod *corev1.Pod) { // Switch to GenerateName, so that we always get a Unique name for the Pod, and there // can be no collisions pod.ObjectMeta.GenerateName = gs.ObjectMeta.Name + "-" @@ -296,11 +337,8 @@ func (gs *GameServer) Pod(sidecars ...corev1.Container) (*corev1.Pod, error) { // Pods for GameServers need to stay in the same namespace pod.ObjectMeta.Namespace = gs.ObjectMeta.Namespace // Make sure these are blank, just in case - pod.ResourceVersion = "" - if pod.Spec.ServiceAccountName == "" { - pod.Spec.ServiceAccountName = SidecarServiceAccountName - } - pod.UID = "" + pod.ObjectMeta.ResourceVersion = "" + pod.ObjectMeta.UID = "" if pod.ObjectMeta.Labels == nil { pod.ObjectMeta.Labels = make(map[string]string, 2) } @@ -312,28 +350,34 @@ func (gs *GameServer) Pod(sidecars ...corev1.Container) (*corev1.Pod, error) { pod.ObjectMeta.Labels[GameServerPodLabel] = gs.ObjectMeta.Name // store the GameServer container as an annotation, to make lookup at a Pod level easier pod.ObjectMeta.Annotations[GameServerContainerAnnotation] = gs.Spec.Container - ref := metav1.NewControllerRef(gs, SchemeGroupVersion.WithKind("GameServer")) pod.ObjectMeta.OwnerReferences = append(pod.ObjectMeta.OwnerReferences, *ref) +} - i, gsContainer, err := gs.FindGameServerContainer() - // this shouldn't happen, but if it does. - if err != nil { - return pod, err - } +// podScheduling applies the Fleet scheduling strategy to the passed in Pod +// this sets the a PreferredDuringSchedulingIgnoredDuringExecution for GameServer +// pods to a host topology. Basically doing a half decent job of packing GameServer +// pods together. +// TODO: update the scheduling doc +func (gs *GameServer) podScheduling(pod *corev1.Pod) { + if gs.Spec.Scheduling == Packed { + if pod.Spec.Affinity == nil { + pod.Spec.Affinity = &corev1.Affinity{} + } + if pod.Spec.Affinity.PodAffinity == nil { + pod.Spec.Affinity.PodAffinity = &corev1.PodAffinity{} + } - for _, p := range gs.Spec.Ports { - cp := corev1.ContainerPort{ - ContainerPort: p.ContainerPort, - HostPort: p.HostPort, - Protocol: p.Protocol, + wpat := corev1.WeightedPodAffinityTerm{ + Weight: 100, + PodAffinityTerm: corev1.PodAffinityTerm{ + TopologyKey: "kubernetes.io/hostname", + LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{RoleLabel: GameServerLabelRole}}, + }, } - gsContainer.Ports = append(gsContainer.Ports, cp) - } - pod.Spec.Containers[i] = gsContainer - pod.Spec.Containers = append(pod.Spec.Containers, sidecars...) - return pod, nil + pod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(pod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution, wpat) + } } // HasPortPolicy checks if there is a port with a given diff --git a/pkg/apis/stable/v1alpha1/gameserver_test.go b/pkg/apis/stable/v1alpha1/gameserver_test.go index 327e8a9b8d..48f4cdc6e6 100644 --- a/pkg/apis/stable/v1alpha1/gameserver_test.go +++ b/pkg/apis/stable/v1alpha1/gameserver_test.go @@ -53,10 +53,11 @@ func TestGameServerApplyDefaults(t *testing.T) { t.Parallel() type expected struct { - protocol corev1.Protocol - state State - policy PortPolicy - health Health + protocol corev1.Protocol + state State + policy PortPolicy + health Health + scheduling SchedulingStrategy } data := map[string]struct { gameServer GameServer @@ -74,9 +75,10 @@ func TestGameServerApplyDefaults(t *testing.T) { }, container: "testing", expected: expected{ - protocol: "UDP", - state: PortAllocation, - policy: Dynamic, + protocol: "UDP", + state: PortAllocation, + policy: Dynamic, + scheduling: Packed, health: Health{ Disabled: false, FailureThreshold: 3, @@ -109,9 +111,10 @@ func TestGameServerApplyDefaults(t *testing.T) { Status: GameServerStatus{State: "TestState"}}, container: "testing2", expected: expected{ - protocol: "TCP", - state: "TestState", - policy: Static, + protocol: "TCP", + state: "TestState", + policy: Static, + scheduling: Packed, health: Health{ Disabled: false, FailureThreshold: 10, @@ -129,9 +132,10 @@ func TestGameServerApplyDefaults(t *testing.T) { }, container: "testing", expected: expected{ - protocol: "UDP", - state: Creating, - policy: Static, + protocol: "UDP", + state: Creating, + policy: Static, + scheduling: Packed, health: Health{ Disabled: false, FailureThreshold: 3, @@ -150,9 +154,10 @@ func TestGameServerApplyDefaults(t *testing.T) { }, container: "testing", expected: expected{ - protocol: "UDP", - state: PortAllocation, - policy: Dynamic, + protocol: "UDP", + state: PortAllocation, + policy: Dynamic, + scheduling: Packed, health: Health{ Disabled: true, }, @@ -175,10 +180,11 @@ func TestGameServerApplyDefaults(t *testing.T) { }, container: "testing", expected: expected{ - protocol: corev1.ProtocolTCP, - state: Creating, - policy: Static, - health: Health{Disabled: true}, + protocol: corev1.ProtocolTCP, + state: Creating, + policy: Static, + scheduling: Packed, + health: Health{Disabled: true}, }, }, } @@ -193,6 +199,7 @@ func TestGameServerApplyDefaults(t *testing.T) { assert.Equal(t, test.expected.protocol, spec.Ports[0].Protocol) assert.Equal(t, test.expected.state, test.gameServer.Status.State) assert.Equal(t, test.expected.health, test.gameServer.Spec.Health) + assert.Equal(t, test.expected.scheduling, test.gameServer.Spec.Scheduling) }) } } @@ -278,6 +285,29 @@ func TestGameServerPod(t *testing.T) { assert.True(t, metav1.IsControlledBy(pod, fixture)) } +func TestGameServerPodScheduling(t *testing.T) { + fixture := &corev1.Pod{Spec: corev1.PodSpec{}} + + t.Run("packed", func(t *testing.T) { + gs := &GameServer{Spec: GameServerSpec{Scheduling: Packed}} + pod := fixture.DeepCopy() + gs.podScheduling(pod) + + assert.Len(t, pod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution, 1) + wpat := pod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution[0] + assert.Equal(t, int32(100), wpat.Weight) + assert.Contains(t, wpat.PodAffinityTerm.LabelSelector.String(), GameServerLabelRole) + assert.Contains(t, wpat.PodAffinityTerm.LabelSelector.String(), RoleLabel) + }) + + t.Run("distributed", func(t *testing.T) { + gs := &GameServer{Spec: GameServerSpec{Scheduling: Distributed}} + pod := fixture.DeepCopy() + gs.podScheduling(pod) + assert.Empty(t, pod.Spec.Affinity) + }) +} + func TestGameServerCountPorts(t *testing.T) { fixture := &GameServer{Spec: GameServerSpec{Ports: []GameServerPort{ {PortPolicy: Dynamic}, diff --git a/pkg/apis/stable/v1alpha1/gameserverset.go b/pkg/apis/stable/v1alpha1/gameserverset.go index 8cd01f3bb6..339eee9f10 100644 --- a/pkg/apis/stable/v1alpha1/gameserverset.go +++ b/pkg/apis/stable/v1alpha1/gameserverset.go @@ -56,6 +56,8 @@ type GameServerSetList struct { type GameServerSetSpec struct { // Replicas are the number of GameServers that should be in this set Replicas int32 `json:"replicas"` + // Scheduling strategy. Defaults to "Packed". + Scheduling SchedulingStrategy `json:"scheduling"` // Template the GameServer template to apply for this GameServerSet Template GameServerTemplateSpec `json:"template"` } @@ -93,6 +95,8 @@ func (gsSet *GameServerSet) GameServer() *GameServer { Spec: *gsSet.Spec.Template.Spec.DeepCopy(), } + gs.Spec.Scheduling = gsSet.Spec.Scheduling + // Switch to GenerateName, so that we always get a Unique name for the GameServer, and there // can be no collisions gs.ObjectMeta.GenerateName = gsSet.ObjectMeta.Name + "-" diff --git a/pkg/apis/stable/v1alpha1/scheduling.go b/pkg/apis/stable/v1alpha1/scheduling.go new file mode 100644 index 0000000000..13cf63d0c5 --- /dev/null +++ b/pkg/apis/stable/v1alpha1/scheduling.go @@ -0,0 +1,31 @@ +// Copyright 2018 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1alpha1 + +const ( + // Packed scheduling strategy will prioritise allocating GameServers + // on Nodes with the most Allocated, and then Ready GameServers + // to bin pack as many Allocated GameServers on a single node. + // This is most useful for dynamic Kubernetes clusters - such as on Cloud Providers. + // In future versions, this will also impact Fleet scale down, and Pod Scheduling. + Packed SchedulingStrategy = "Packed" + + // Distributed scheduling strategy will prioritise allocating GameServers + // on Nodes with the least Allocated, and then Ready GameServers + // to distribute Allocated GameServers across many nodes. + // This is most useful for statically sized Kubernetes clusters - such as on physical hardware. + // In future versions, this will also impact Fleet scale down, and Pod Scheduling. + Distributed SchedulingStrategy = "Distributed" +) diff --git a/pkg/gameservers/portallocator.go b/pkg/gameservers/portallocator.go index a924f3685c..37cc6957d9 100644 --- a/pkg/gameservers/portallocator.go +++ b/pkg/gameservers/portallocator.go @@ -97,7 +97,6 @@ func NewPortAllocator(minPort, maxPort int32, DeleteFunc: pa.syncDeleteGameServer, }) - // Experimental support for node adding/removal pa.nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { node := obj.(*corev1.Node) From cccdc8abc39779c6ea50ee714d9b530487c89bf9 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Thu, 25 Oct 2018 16:28:08 -0700 Subject: [PATCH 23/25] Apparently patching events is a thing. --- install/helm/agones/templates/serviceaccounts/controller.yaml | 2 +- install/yaml/install.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/install/helm/agones/templates/serviceaccounts/controller.yaml b/install/helm/agones/templates/serviceaccounts/controller.yaml index 461a249a11..851d04c717 100644 --- a/install/helm/agones/templates/serviceaccounts/controller.yaml +++ b/install/helm/agones/templates/serviceaccounts/controller.yaml @@ -37,7 +37,7 @@ metadata: rules: - apiGroups: [""] resources: ["events"] - verbs: ["create"] + verbs: ["create", "patch"] - apiGroups: [""] resources: ["pods"] verbs: ["create", "delete", "list", "watch"] diff --git a/install/yaml/install.yaml b/install/yaml/install.yaml index 98d01371a9..d66c3c33f7 100644 --- a/install/yaml/install.yaml +++ b/install/yaml/install.yaml @@ -38,7 +38,7 @@ metadata: rules: - apiGroups: [""] resources: ["events"] - verbs: ["create"] + verbs: ["create", "patch"] - apiGroups: [""] resources: ["pods"] verbs: ["create", "delete", "list", "watch"] From 860ebdafe82074c0fec470d9502aee8db3ebcf8f Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Tue, 23 Oct 2018 09:12:02 -0700 Subject: [PATCH 24/25] Packed: Fleet scaled down removes nodes from least used Nodes This implements the strategy such that when a Fleet is scaled down, and has the "Packed" strategy, it removed GameServers from Nodes that have the least GameServers running on them. This also fixes up some issues with the OpenAPI validation as well. THis looks like it's working. Go team. --- docs/scheduling_autoscaling.md | 48 +++++--- examples/simple-udp/fleet.yaml | 1 - .../crds/_gameserverspecvalidation.yaml | 31 +---- install/helm/agones/templates/crds/fleet.yaml | 6 + .../templates/crds/fleetautoscaler.yaml | 3 +- .../agones/templates/crds/gameserverset.yaml | 5 + install/yaml/install.yaml | 107 ++++-------------- pkg/apis/stable/v1alpha1/gameserver.go | 2 +- pkg/apis/stable/v1alpha1/gameserverset.go | 2 +- pkg/gameserversets/controller.go | 42 +++---- pkg/gameserversets/controller_test.go | 9 +- pkg/gameserversets/gameserversets.go | 67 ++++++++++- pkg/gameserversets/gameserversets_test.go | 37 ++++++ 13 files changed, 207 insertions(+), 153 deletions(-) diff --git a/docs/scheduling_autoscaling.md b/docs/scheduling_autoscaling.md index ffd951c930..1b0747dac0 100644 --- a/docs/scheduling_autoscaling.md +++ b/docs/scheduling_autoscaling.md @@ -7,14 +7,23 @@ Table of Contents ================= -* [Fleet Autoscaling](#fleet-autoscaling) -* [Autoscalng Concepts](#autoscalng-concepts) - * [Allocation Scheduling](#allocation-scheduling) -* [Fleet Scheduling](#fleet-scheduling) - * [Packed](#packed) - * [Allocation Scheduling Strategy](#allocation-scheduling-strategy) - * [Distributed](#distributed) - * [Allocation Scheduling Stategy](#allocation-scheduling-stategy) + * [Scheduling and Autoscaling](#scheduling-and-autoscaling) + * [Table of Contents](#table-of-contents) + * [Fleet Autoscaling](#fleet-autoscaling) + * [Autoscaling Concepts](#autoscaling-concepts) + * [Allocation Scheduling](#allocation-scheduling) + * [Pod Scheduling](#pod-scheduling) + * [Fleet Scale Down Strategy](#fleet-scale-down-strategy) + * [Fleet Scheduling](#fleet-scheduling) + * [Packed](#packed) + * [Allocation Scheduling Strategy](#allocation-scheduling-strategy) + * [Pod Scheduling Strategy](#pod-scheduling-strategy) + * [Fleet Scale Down Strategy](#fleet-scale-down-strategy-1) + * [Distributed](#distributed) + * [Allocation Scheduling Strategy](#allocation-scheduling-strategy-1) + * [Pod Scheduling Strategy](#pod-scheduling-strategy-1) + * [Fleet Scale Down Strategy](#fleet-scale-down-strategy-2) + Scheduling and autoscaling go hand in hand, as where in the cluster `GameServers` are provisioned impacts how to autoscale fleets up and down (or if you would even want to) @@ -42,6 +51,11 @@ Each `GameServer` is backed by a Kubernetes [`Pod`](https://kubernetes.io/docs/c refers to the strategy that is in place that determines which node in the Kubernetes cluster the Pod is assigned to, when it is created. +### Fleet Scale Down Strategy + +Fleet Scale Down strategy refers to the order in which the `GameServers` that belong to a `Fleet` are deleted, +when Fleets are shrunk in size. + ## Fleet Scheduling There are two scheduling strategies for Fleets - each designed for different types of Kubernetes Environments. @@ -74,8 +88,7 @@ for the infrastructure you use. It attempts to _pack_ as much as possible into the smallest set of nodes, to make scaling infrastructure down as easy as possible. -Currently, Allocation scheduling is the only aspect this strategy affects, but in future releases it will -also affect `GameServer` `Pod` scheduling, and `Fleet` scale down scheduling as well. +This affects Allocation Scheduling, Pod Scheduling and Fleet Scale Down Scheduling. #### Allocation Scheduling Strategy @@ -91,6 +104,11 @@ topology. This attempts to group together `GameServer` Pods within as few nodes > The default Kubernetes scheduler doesn't do a perfect job of packing, but it's a good enough job for what we need - at least at this stage. +#### Fleet Scale Down Strategy + +With the "Packed" strategy, Fleets will remove `Ready` `GameServers` from Nodes with the _least_ number of `Ready` and +`Allocated` `GameServers` on them. Attempting to empty Nodes so that they can be safely removed. + ### Distributed ```yaml @@ -118,8 +136,7 @@ on bare metal, and the cluster size rarely changes, if at all. This attempts to distribute the load across the entire cluster as much as possible, to take advantage of the static size of the cluster. -Currently, the only thing the scheduling strategy affects is Allocation scheduling, but in future releases it will -also affect `GameServer` `Pod` scheduling, and `Fleet` scaledown scheduling as well. +This affects Allocation Scheduling, Pod Scheduling and Fleet Scale Down Scheduling. #### Allocation Scheduling Strategy @@ -129,4 +146,9 @@ number of allocated `GameServers` on them. #### Pod Scheduling Strategy Under the "Distributed" strategy, `Pod` scheduling is provided by the default Kubernetes scheduler, which will attempt -to distribute the `GameServer` `Pods` across as many nodes as possible. \ No newline at end of file +to distribute the `GameServer` `Pods` across as many nodes as possible. + +#### Fleet Scale Down Strategy + +With the "Distributed" strategy, Fleets will remove `Ready` `GameServers` from Nodes with at random, to ensure +a distributed load is maintained. \ No newline at end of file diff --git a/examples/simple-udp/fleet.yaml b/examples/simple-udp/fleet.yaml index f145f9e50d..7aac59c34b 100644 --- a/examples/simple-udp/fleet.yaml +++ b/examples/simple-udp/fleet.yaml @@ -25,7 +25,6 @@ spec: spec: ports: - name: default - portPolicy: "dynamic" containerPort: 7654 template: spec: diff --git a/install/helm/agones/templates/crds/_gameserverspecvalidation.yaml b/install/helm/agones/templates/crds/_gameserverspecvalidation.yaml index 0fa0ff1ef9..75135d8a50 100644 --- a/install/helm/agones/templates/crds/_gameserverspecvalidation.yaml +++ b/install/helm/agones/templates/crds/_gameserverspecvalidation.yaml @@ -57,7 +57,7 @@ properties: ports: title: array of ports to expose on the game server container type: array - minItems: 0 # make this 1 in 0.4.0 + minItems: 1 required: - containerPort items: @@ -91,34 +91,11 @@ properties: type: integer minimum: 1 maximum: 65535 - portPolicy: # remove this in 0.4.0 - title: the port policy that will be applied to the game server - description: | - portPolicy has two options: - - "dynamic" (default) the system allocates a free hostPort for the gameserver, for game clients to connect to - - "static", user defines the hostPort that the game client will connect to. Then onus is on the user to ensure that the - port is available. When static is the policy specified, `hostPort` is required to be populated + scheduling: type: string enum: - - dynamic - - static - protocol: # remove this in 0.4.0 - title: Protocol being used. Defaults to UDP. TCP is the only other option - type: string - enum: - - UDP - - TCP - containerPort: # remove this in 0.4.0 - title: The port that is being opened on the game server process - type: integer - minimum: 1 - maximum: 65535 - hostPort: # remove this in 0.4.0 - title: The port exposed on the host - description: Only required when `portPolicy` is "static". Overwritten when portPolicy is "dynamic". - type: integer - minimum: 1 - maximum: 65535 + - Packed + - Distributed health: type: object title: Health checking for the running game server diff --git a/install/helm/agones/templates/crds/fleet.yaml b/install/helm/agones/templates/crds/fleet.yaml index f2719791fa..35773bd2e6 100644 --- a/install/helm/agones/templates/crds/fleet.yaml +++ b/install/helm/agones/templates/crds/fleet.yaml @@ -43,9 +43,15 @@ spec: replicas: type: integer minimum: 0 + scheduling: + type: string + enum: + - Packed + - Distributed strategy: properties: type: + type: string enum: - Recreate - RollingUpdate diff --git a/install/helm/agones/templates/crds/fleetautoscaler.yaml b/install/helm/agones/templates/crds/fleetautoscaler.yaml index 435d1a3b84..15c0e9a1cc 100644 --- a/install/helm/agones/templates/crds/fleetautoscaler.yaml +++ b/install/helm/agones/templates/crds/fleetautoscaler.yaml @@ -50,8 +50,9 @@ spec: - type properties: type: + type: string enum: - - Buffer + - Buffer buffer: required: - maxReplicas diff --git a/install/helm/agones/templates/crds/gameserverset.yaml b/install/helm/agones/templates/crds/gameserverset.yaml index 370b3a79d3..aa7fc55768 100644 --- a/install/helm/agones/templates/crds/gameserverset.yaml +++ b/install/helm/agones/templates/crds/gameserverset.yaml @@ -44,5 +44,10 @@ spec: replicas: type: integer minimum: 0 + scheduling: + type: string + enum: + - Packed + - Distributed template: {{- include "gameserver.validation" . | indent 14 }} \ No newline at end of file diff --git a/install/yaml/install.yaml b/install/yaml/install.yaml index d66c3c33f7..4609795a21 100644 --- a/install/yaml/install.yaml +++ b/install/yaml/install.yaml @@ -184,9 +184,15 @@ spec: replicas: type: integer minimum: 0 + scheduling: + type: string + enum: + - Packed + - Distributed strategy: properties: type: + type: string enum: - Recreate - RollingUpdate @@ -234,7 +240,7 @@ spec: ports: title: array of ports to expose on the game server container type: array - minItems: 0 # make this 1 in 0.4.0 + minItems: 1 required: - containerPort items: @@ -268,34 +274,11 @@ spec: type: integer minimum: 1 maximum: 65535 - portPolicy: # remove this in 0.4.0 - title: the port policy that will be applied to the game server - description: | - portPolicy has two options: - - "dynamic" (default) the system allocates a free hostPort for the gameserver, for game clients to connect to - - "static", user defines the hostPort that the game client will connect to. Then onus is on the user to ensure that the - port is available. When static is the policy specified, `hostPort` is required to be populated + scheduling: type: string enum: - - dynamic - - static - protocol: # remove this in 0.4.0 - title: Protocol being used. Defaults to UDP. TCP is the only other option - type: string - enum: - - UDP - - TCP - containerPort: # remove this in 0.4.0 - title: The port that is being opened on the game server process - type: integer - minimum: 1 - maximum: 65535 - hostPort: # remove this in 0.4.0 - title: The port exposed on the host - description: Only required when `portPolicy` is "static". Overwritten when portPolicy is "dynamic". - type: integer - minimum: 1 - maximum: 65535 + - Packed + - Distributed health: type: object title: Health checking for the running game server @@ -421,8 +404,9 @@ spec: - type properties: type: + type: string enum: - - Buffer + - Buffer buffer: required: - maxReplicas @@ -515,7 +499,7 @@ spec: ports: title: array of ports to expose on the game server container type: array - minItems: 0 # make this 1 in 0.4.0 + minItems: 1 required: - containerPort items: @@ -549,34 +533,11 @@ spec: type: integer minimum: 1 maximum: 65535 - portPolicy: # remove this in 0.4.0 - title: the port policy that will be applied to the game server - description: | - portPolicy has two options: - - "dynamic" (default) the system allocates a free hostPort for the gameserver, for game clients to connect to - - "static", user defines the hostPort that the game client will connect to. Then onus is on the user to ensure that the - port is available. When static is the policy specified, `hostPort` is required to be populated - type: string - enum: - - dynamic - - static - protocol: # remove this in 0.4.0 - title: Protocol being used. Defaults to UDP. TCP is the only other option + scheduling: type: string enum: - - UDP - - TCP - containerPort: # remove this in 0.4.0 - title: The port that is being opened on the game server process - type: integer - minimum: 1 - maximum: 65535 - hostPort: # remove this in 0.4.0 - title: The port exposed on the host - description: Only required when `portPolicy` is "static". Overwritten when portPolicy is "dynamic". - type: integer - minimum: 1 - maximum: 65535 + - Packed + - Distributed health: type: object title: Health checking for the running game server @@ -647,6 +608,11 @@ spec: replicas: type: integer minimum: 0 + scheduling: + type: string + enum: + - Packed + - Distributed template: required: - spec @@ -691,7 +657,7 @@ spec: ports: title: array of ports to expose on the game server container type: array - minItems: 0 # make this 1 in 0.4.0 + minItems: 1 required: - containerPort items: @@ -725,34 +691,11 @@ spec: type: integer minimum: 1 maximum: 65535 - portPolicy: # remove this in 0.4.0 - title: the port policy that will be applied to the game server - description: | - portPolicy has two options: - - "dynamic" (default) the system allocates a free hostPort for the gameserver, for game clients to connect to - - "static", user defines the hostPort that the game client will connect to. Then onus is on the user to ensure that the - port is available. When static is the policy specified, `hostPort` is required to be populated + scheduling: type: string enum: - - dynamic - - static - protocol: # remove this in 0.4.0 - title: Protocol being used. Defaults to UDP. TCP is the only other option - type: string - enum: - - UDP - - TCP - containerPort: # remove this in 0.4.0 - title: The port that is being opened on the game server process - type: integer - minimum: 1 - maximum: 65535 - hostPort: # remove this in 0.4.0 - title: The port exposed on the host - description: Only required when `portPolicy` is "static". Overwritten when portPolicy is "dynamic". - type: integer - minimum: 1 - maximum: 65535 + - Packed + - Distributed health: type: object title: Health checking for the running game server diff --git a/pkg/apis/stable/v1alpha1/gameserver.go b/pkg/apis/stable/v1alpha1/gameserver.go index 8e4c11cbb6..beddc69301 100644 --- a/pkg/apis/stable/v1alpha1/gameserver.go +++ b/pkg/apis/stable/v1alpha1/gameserver.go @@ -118,7 +118,7 @@ type GameServerSpec struct { // Health configures health checking Health Health `json:"health,omitempty"` // Scheduling strategy. Defaults to "Packed". - Scheduling SchedulingStrategy `json:"scheduling"` + Scheduling SchedulingStrategy `json:"scheduling,omitempty"` // Template describes the Pod that will be created for the GameServer Template corev1.PodTemplateSpec `json:"template"` } diff --git a/pkg/apis/stable/v1alpha1/gameserverset.go b/pkg/apis/stable/v1alpha1/gameserverset.go index 339eee9f10..87ab941e3a 100644 --- a/pkg/apis/stable/v1alpha1/gameserverset.go +++ b/pkg/apis/stable/v1alpha1/gameserverset.go @@ -57,7 +57,7 @@ type GameServerSetSpec struct { // Replicas are the number of GameServers that should be in this set Replicas int32 `json:"replicas"` // Scheduling strategy. Defaults to "Packed". - Scheduling SchedulingStrategy `json:"scheduling"` + Scheduling SchedulingStrategy `json:"scheduling,omitempty"` // Template the GameServer template to apply for this GameServerSet Template GameServerTemplateSpec `json:"template"` } diff --git a/pkg/gameserversets/controller.go b/pkg/gameserversets/controller.go index 1c919c98f5..30cd7f66fa 100644 --- a/pkg/gameserversets/controller.go +++ b/pkg/gameserversets/controller.go @@ -19,7 +19,7 @@ import ( "sync" "agones.dev/agones/pkg/apis/stable" - stablev1alpha1 "agones.dev/agones/pkg/apis/stable/v1alpha1" + "agones.dev/agones/pkg/apis/stable/v1alpha1" "agones.dev/agones/pkg/client/clientset/versioned" getterv1alpha1 "agones.dev/agones/pkg/client/clientset/versioned/typed/stable/v1alpha1" "agones.dev/agones/pkg/client/informers/externalversions" @@ -101,13 +101,13 @@ func NewController( eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) c.recorder = eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "gameserverset-controller"}) - wh.AddHandler("/validate", stablev1alpha1.Kind("GameServerSet"), admv1beta1.Update, c.updateValidationHandler) + wh.AddHandler("/validate", v1alpha1.Kind("GameServerSet"), admv1beta1.Update, c.updateValidationHandler) gsSetInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.workerqueue.Enqueue, UpdateFunc: func(oldObj, newObj interface{}) { - oldGss := oldObj.(*stablev1alpha1.GameServerSet) - newGss := newObj.(*stablev1alpha1.GameServerSet) + oldGss := oldObj.(*v1alpha1.GameServerSet) + newGss := newObj.(*v1alpha1.GameServerSet) if oldGss.Spec.Replicas != newGss.Spec.Replicas { c.workerqueue.Enqueue(newGss) } @@ -117,7 +117,7 @@ func NewController( gsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.gameServerEventHandler, UpdateFunc: func(oldObj, newObj interface{}) { - gs := newObj.(*stablev1alpha1.GameServer) + gs := newObj.(*v1alpha1.GameServer) // ignore if already being deleted if gs.ObjectMeta.DeletionTimestamp == nil { c.gameServerEventHandler(gs) @@ -153,8 +153,8 @@ func (c *Controller) Run(workers int, stop <-chan struct{}) error { func (c *Controller) updateValidationHandler(review admv1beta1.AdmissionReview) (admv1beta1.AdmissionReview, error) { c.logger.WithField("review", review).Info("updateValidationHandler") - newGss := &stablev1alpha1.GameServerSet{} - oldGss := &stablev1alpha1.GameServerSet{} + newGss := &v1alpha1.GameServerSet{} + oldGss := &v1alpha1.GameServerSet{} newObj := review.Request.Object if err := json.Unmarshal(newObj.Raw, newGss); err != nil { @@ -190,7 +190,7 @@ func (c *Controller) updateValidationHandler(review admv1beta1.AdmissionReview) } func (c *Controller) gameServerEventHandler(obj interface{}) { - gs := obj.(*stablev1alpha1.GameServer) + gs := obj.(*v1alpha1.GameServer) ref := metav1.GetControllerOf(gs) if ref == nil { return @@ -243,7 +243,7 @@ func (c *Controller) syncGameServerSet(key string) error { if err := c.syncMoreGameServers(gsSet, diff); err != nil { return err } - if err := c.syncLessGameSevers(gsSet, diff); err != nil { + if err := c.syncLessGameServers(gsSet, diff); err != nil { return err } if err := c.syncGameServerSetState(gsSet, list); err != nil { @@ -254,9 +254,9 @@ func (c *Controller) syncGameServerSet(key string) error { } // syncUnhealthyGameServers deletes any unhealthy game servers (that are not already being deleted) -func (c *Controller) syncUnhealthyGameServers(gsSet *stablev1alpha1.GameServerSet, list []*stablev1alpha1.GameServer) error { +func (c *Controller) syncUnhealthyGameServers(gsSet *v1alpha1.GameServerSet, list []*v1alpha1.GameServer) error { for _, gs := range list { - if gs.Status.State == stablev1alpha1.Unhealthy && gs.ObjectMeta.DeletionTimestamp.IsZero() { + if gs.Status.State == v1alpha1.Unhealthy && gs.ObjectMeta.DeletionTimestamp.IsZero() { c.allocationMutex.Lock() err := c.gameServerGetter.GameServers(gs.ObjectMeta.Namespace).Delete(gs.ObjectMeta.Name, nil) c.allocationMutex.Unlock() @@ -271,7 +271,7 @@ func (c *Controller) syncUnhealthyGameServers(gsSet *stablev1alpha1.GameServerSe } // syncMoreGameServers adds diff more GameServers to the set -func (c *Controller) syncMoreGameServers(gsSet *stablev1alpha1.GameServerSet, diff int32) error { +func (c *Controller) syncMoreGameServers(gsSet *v1alpha1.GameServerSet, diff int32) error { if diff <= 0 { return nil } @@ -288,8 +288,8 @@ func (c *Controller) syncMoreGameServers(gsSet *stablev1alpha1.GameServerSet, di return nil } -// syncLessGameSevers removes Ready GameServers from the set of GameServers -func (c *Controller) syncLessGameSevers(gsSet *stablev1alpha1.GameServerSet, diff int32) error { +// syncLessGameServers removes Ready GameServers from the set of GameServers +func (c *Controller) syncLessGameServers(gsSet *v1alpha1.GameServerSet, diff int32) error { if diff >= 0 { return nil } @@ -321,12 +321,16 @@ func (c *Controller) syncLessGameSevers(gsSet *stablev1alpha1.GameServerSet, dif } } + if gsSet.Spec.Scheduling == v1alpha1.Packed { + list = filterGameServersOnLeastFullNodes(list, diff) + } + for _, gs := range list { if diff <= count { return nil } - if gs.Status.State != stablev1alpha1.Allocated { + if gs.Status.State != v1alpha1.Allocated { err := c.gameServerGetter.GameServers(gs.Namespace).Delete(gs.ObjectMeta.Name, nil) if err != nil { return errors.Wrapf(err, "error deleting gameserver for gameserverset %s", gsSet.ObjectMeta.Name) @@ -340,19 +344,19 @@ func (c *Controller) syncLessGameSevers(gsSet *stablev1alpha1.GameServerSet, dif } // syncGameServerSetState synchronises the GameServerSet State with active GameServer counts -func (c *Controller) syncGameServerSetState(gsSet *stablev1alpha1.GameServerSet, list []*stablev1alpha1.GameServer) error { +func (c *Controller) syncGameServerSetState(gsSet *v1alpha1.GameServerSet, list []*v1alpha1.GameServer) error { rc := int32(0) ac := int32(0) for _, gs := range list { switch gs.Status.State { - case stablev1alpha1.Ready: + case v1alpha1.Ready: rc++ - case stablev1alpha1.Allocated: + case v1alpha1.Allocated: ac++ } } - status := stablev1alpha1.GameServerSetStatus{ + status := v1alpha1.GameServerSetStatus{ Replicas: int32(len(list)), ReadyReplicas: rc, AllocatedReplicas: ac, diff --git a/pkg/gameserversets/controller_test.go b/pkg/gameserversets/controller_test.go index d7e8852ecb..e0df7ad29c 100644 --- a/pkg/gameserversets/controller_test.go +++ b/pkg/gameserversets/controller_test.go @@ -262,7 +262,7 @@ func TestSyncLessGameServers(t *testing.T) { list := createGameServers(gsSet, 11) - // make some as unhealthy + // mark some as Allocated list[0].Status.State = v1alpha1.Allocated list[3].Status.State = v1alpha1.Allocated @@ -301,7 +301,7 @@ func TestSyncLessGameServers(t *testing.T) { assert.Nil(t, err) assert.Len(t, list2, 11) - err = c.syncLessGameSevers(gsSet, int32(-expected)) + err = c.syncLessGameServers(gsSet, int32(-expected)) assert.Nil(t, err) // subtract one, because one is already deleted @@ -459,8 +459,9 @@ func defaultFixture() *v1alpha1.GameServerSet { gsSet := &v1alpha1.GameServerSet{ ObjectMeta: metav1.ObjectMeta{Namespace: "default", Name: "test", UID: "1234"}, Spec: v1alpha1.GameServerSetSpec{ - Replicas: 10, - Template: v1alpha1.GameServerTemplateSpec{}, + Replicas: 10, + Scheduling: v1alpha1.Packed, + Template: v1alpha1.GameServerTemplateSpec{}, }, } return gsSet diff --git a/pkg/gameserversets/gameserversets.go b/pkg/gameserversets/gameserversets.go index 9aa7eed853..26a601d3a7 100644 --- a/pkg/gameserversets/gameserversets.go +++ b/pkg/gameserversets/gameserversets.go @@ -15,22 +15,81 @@ package gameserversets import ( - stablev1alpha1 "agones.dev/agones/pkg/apis/stable/v1alpha1" + "sort" + + "agones.dev/agones/pkg/apis/stable/v1alpha1" listerv1alpha1 "agones.dev/agones/pkg/client/listers/stable/v1alpha1" "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" ) +// node is just a convenience data structure for +// keeping relevant GameServer information about Nodes +type node struct { + name string + total int64 + ready []*v1alpha1.GameServer +} + +// filterGameServersOnLeastFullNodes returns a limited list of GameServers, ordered by the nodes +// they are hosted on, with the least utilised Nodes being prioritised +func filterGameServersOnLeastFullNodes(gsList []*v1alpha1.GameServer, limit int32) []*v1alpha1.GameServer { + if limit <= 0 { + return nil + } + + nodeMap := map[string]*node{} + var nodeList []*node + + // count up the number of allocated and ready game servers that exist + // also, since we're already looping through, track all the deletable GameServers + // per node, so we can use this as a shortlist to delete from + for _, gs := range gsList { + if gs.DeletionTimestamp.IsZero() && + (gs.Status.State == v1alpha1.Allocated || gs.Status.State == v1alpha1.Ready) { + _, ok := nodeMap[gs.Status.NodeName] + if !ok { + node := &node{name: gs.Status.NodeName} + nodeMap[gs.Status.NodeName] = node + nodeList = append(nodeList, node) + } + + nodeMap[gs.Status.NodeName].total++ + if gs.Status.State == v1alpha1.Ready { + nodeMap[gs.Status.NodeName].ready = append(nodeMap[gs.Status.NodeName].ready, gs) + } + } + } + + // sort our nodes, least to most + sort.Slice(nodeList, func(i, j int) bool { + return nodeList[i].total < nodeList[j].total + }) + + // we need to get Ready GameServer until we equal or pass limit + result := make([]*v1alpha1.GameServer, 0, limit) + + for _, n := range nodeList { + result = append(result, n.ready...) + + if int32(len(result)) >= limit { + return result + } + } + + return result +} + // ListGameServersByGameServerSetOwner lists the GameServers for a given GameServerSet func ListGameServersByGameServerSetOwner(gameServerLister listerv1alpha1.GameServerLister, - gsSet *stablev1alpha1.GameServerSet) ([]*stablev1alpha1.GameServer, error) { - list, err := gameServerLister.List(labels.SelectorFromSet(labels.Set{stablev1alpha1.GameServerSetGameServerLabel: gsSet.ObjectMeta.Name})) + gsSet *v1alpha1.GameServerSet) ([]*v1alpha1.GameServer, error) { + list, err := gameServerLister.List(labels.SelectorFromSet(labels.Set{v1alpha1.GameServerSetGameServerLabel: gsSet.ObjectMeta.Name})) if err != nil { return list, errors.Wrapf(err, "error listing gameservers for gameserverset %s", gsSet.ObjectMeta.Name) } - var result []*stablev1alpha1.GameServer + var result []*v1alpha1.GameServer for _, gs := range list { if metav1.IsControlledBy(gs, gsSet) { result = append(result, gs) diff --git a/pkg/gameserversets/gameserversets_test.go b/pkg/gameserversets/gameserversets_test.go index d385842f53..fe0edec42d 100644 --- a/pkg/gameserversets/gameserversets_test.go +++ b/pkg/gameserversets/gameserversets_test.go @@ -26,6 +26,43 @@ import ( k8stesting "k8s.io/client-go/testing" ) +func TestFilterGameServersOnLeastFullNodes(t *testing.T) { + t.Parallel() + + gsList := []*v1alpha1.GameServer{ + {ObjectMeta: metav1.ObjectMeta{Name: "gs1"}, Status: v1alpha1.GameServerStatus{NodeName: "n1", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs2"}, Status: v1alpha1.GameServerStatus{NodeName: "n1", State: v1alpha1.Starting}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs3"}, Status: v1alpha1.GameServerStatus{NodeName: "n2", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs4"}, Status: v1alpha1.GameServerStatus{NodeName: "n2", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs5"}, Status: v1alpha1.GameServerStatus{NodeName: "n3", State: v1alpha1.Allocated}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs6"}, Status: v1alpha1.GameServerStatus{NodeName: "n3", State: v1alpha1.Allocated}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs7"}, Status: v1alpha1.GameServerStatus{NodeName: "n3", State: v1alpha1.Ready}}, + } + + t.Run("normal", func(t *testing.T) { + limit := 4 + result := filterGameServersOnLeastFullNodes(gsList, int32(limit)) + assert.Len(t, result, limit) + assert.Equal(t, "gs1", result[0].Name) + assert.Equal(t, "n2", result[1].Status.NodeName) + assert.Equal(t, "n2", result[2].Status.NodeName) + assert.Equal(t, "n3", result[3].Status.NodeName) + }) + + t.Run("zero", func(t *testing.T) { + limit := 0 + result := filterGameServersOnLeastFullNodes(gsList, int32(limit)) + assert.Len(t, result, limit) + }) + + t.Run("negative", func(t *testing.T) { + limit := -1 + result := filterGameServersOnLeastFullNodes(gsList, int32(limit)) + assert.Len(t, result, 0) + assert.Empty(t, result) + }) +} + func TestListGameServersByGameServerSetOwner(t *testing.T) { t.Parallel() From 1c480402ccb2653415485f831c3d061b1175426f Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Mon, 22 Oct 2018 17:10:28 -0700 Subject: [PATCH 25/25] Update Helm to 2.11.0 This will likely fail on first PR, as we will need to update the Helm install on the e2e cluster. A good opportunity to delete and restart the e2e cluster anyway -- but will need to manage this around other PRs. Should enable #375 to pass tests as well. --- build/Makefile | 6 ++++-- build/build-image/Dockerfile | 5 ++--- build/e2e-image/Dockerfile | 6 +++--- build/e2e-image/entrypoint.sh | 2 +- build/gke-test-cluster/cluster-e2e.yml | 2 +- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/build/Makefile b/build/Makefile index c9c40c9346..ff391b4216 100644 --- a/build/Makefile +++ b/build/Makefile @@ -385,8 +385,10 @@ gcloud-e2e-test-cluster: $(ensure-build-image) GCP_CLUSTER_NAME=e2e-test-cluster GCP_CLUSTER_ZONE=us-west1-c $(MAKE) gcloud-auth-cluster docker run --rm $(common_mounts) $(DOCKER_RUN_ARGS) $(build_tag) \ kubectl apply -f $(mount_path)/build/helm.yaml - docker run --rm $(common_mounts) $(DOCKER_RUN_ARGS) $(build_tag) helm init --service-account helm --wait && \ - helm install --wait --set Replicas=1,uiService.type=ClusterIP --name consul stable/consul + docker run --rm $(common_mounts) $(DOCKER_RUN_ARGS) $(build_tag) \ + helm init --service-account helm --wait + docker run --rm $(common_mounts) $(DOCKER_RUN_ARGS) $(build_tag) \ + helm install --wait --set Replicas=1,uiService.type=ClusterIP --name consul stable/consul # Deletes the gcloud e2e cluster and cleanup any left pvc volumes clean-gcloud-e2e-test-cluster: $(ensure-build-image) diff --git a/build/build-image/Dockerfile b/build/build-image/Dockerfile index 988d932dcc..d6d662c74b 100644 --- a/build/build-image/Dockerfile +++ b/build/build-image/Dockerfile @@ -49,7 +49,7 @@ RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/v${KUBECT RUN echo "source <(kubectl completion bash)" >> /root/.bashrc # install Helm package manager -ENV HELM_VER 2.9.1 +ENV HELM_VER 2.11.0 ENV HELM_URL https://storage.googleapis.com/kubernetes-helm/helm-v${HELM_VER}-linux-amd64.tar.gz RUN curl -L ${HELM_URL} > /tmp/helm.tar.gz \ && tar -zxvf /tmp/helm.tar.gz -C /tmp \ @@ -69,8 +69,7 @@ RUN go get -u github.com/golang/dep/cmd/dep && \ go get -u golang.org/x/tools/cmd/goimports # install golang-ci linter -RUN curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | \ - bash -s -- -b $GOPATH/bin v1.10.2 +RUN curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $GOPATH/bin v1.10.2 # install the release branch of the code generator tools RUN mkdir -p /go/src && cd /go/src && mkdir -p k8s.io && cd k8s.io && \ diff --git a/build/e2e-image/Dockerfile b/build/e2e-image/Dockerfile index dd54e5a64c..5a5812b1cb 100644 --- a/build/e2e-image/Dockerfile +++ b/build/e2e-image/Dockerfile @@ -1,12 +1,12 @@ FROM gcr.io/cloud-builders/gcloud-slim RUN apt-get update && \ - apt-get install -y wget psmisc make python jq zip && \ + apt-get install -y wget psmisc make gcc python jq zip && \ apt-get clean # install go WORKDIR /usr/local -ENV GO_VERSION=1.10.3 +ENV GO_VERSION=1.11.1 ENV GOPATH /go RUN wget -q https://redirector.gvt1.com/edgedl/go/go${GO_VERSION}.linux-amd64.tar.gz && \ tar -xzf go${GO_VERSION}.linux-amd64.tar.gz && rm go${GO_VERSION}.linux-amd64.tar.gz && mkdir ${GOPATH} @@ -20,7 +20,7 @@ RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/v${KUBECT mv ./kubectl /usr/local/bin/kubectl # install Helm package manager -ENV HELM_VER 2.9.1 +ENV HELM_VER 2.11.0 ENV HELM_URL https://storage.googleapis.com/kubernetes-helm/helm-v${HELM_VER}-linux-amd64.tar.gz RUN curl -L ${HELM_URL} > /tmp/helm.tar.gz \ && tar -zxvf /tmp/helm.tar.gz -C /tmp \ diff --git a/build/e2e-image/entrypoint.sh b/build/e2e-image/entrypoint.sh index 4b871fec36..1bb67786dc 100644 --- a/build/e2e-image/entrypoint.sh +++ b/build/e2e-image/entrypoint.sh @@ -26,7 +26,7 @@ then fi gcloud container clusters get-credentials e2e-test-cluster \ --zone=us-west1-c --project=agones-images -kubectl port-forward statefulset/consul-consul 8500:8500 & +kubectl port-forward statefulset/consul 8500:8500 & echo "Waiting consul port-forward to launch on 8500..." timeout 60 bash -c 'until printf "" 2>>/dev/null >>/dev/tcp/$0/$1; do sleep 1; done' 127.0.0.1 8500 echo "consul port-forward launched. Starting e2e tests..." diff --git a/build/gke-test-cluster/cluster-e2e.yml b/build/gke-test-cluster/cluster-e2e.yml index cae41645f9..cd7cdfd975 100644 --- a/build/gke-test-cluster/cluster-e2e.yml +++ b/build/gke-test-cluster/cluster-e2e.yml @@ -22,7 +22,7 @@ resources: initialClusterVersion: "1.10" nodePools: - name: "default" - initialNodeCount: 2 + initialNodeCount: 4 config: machineType: n1-standard-4 tags: