From d211b17e82c47d72f5d74c01950ffeee95930a88 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Thu, 22 Oct 2020 14:01:31 -0700 Subject: [PATCH 1/2] Add QPS settings to Allocation endpoints Allocation endpoints where throttled to the default ~4qps for a Kubernetes client. Matching the controller settings on standard QPS and Burst to allow higher throughput. Closes #1852 --- cmd/allocator/main.go | 7 +++++-- cmd/allocator/metrics.go | 12 ++++++++++++ .../helm/agones/templates/service/allocation.yaml | 4 ++++ install/helm/agones/values.yaml | 2 ++ install/yaml/install.yaml | 4 ++++ .../en/docs/Installation/Install Agones/helm.md | 2 ++ 6 files changed, 29 insertions(+), 2 deletions(-) diff --git a/cmd/allocator/main.go b/cmd/allocator/main.go index f986633582..31ea24d80f 100644 --- a/cmd/allocator/main.go +++ b/cmd/allocator/main.go @@ -76,7 +76,7 @@ func main() { // http.DefaultServerMux is used for http connection, not for https http.Handle("/", health) - kubeClient, agonesClient, err := getClients() + kubeClient, agonesClient, err := getClients(conf) if err != nil { logger.WithError(err).Fatal("could not create clients") } @@ -312,13 +312,16 @@ func (h *serviceHandler) verifyClientCertificate(rawCerts [][]byte, verifiedChai } // Set up our client which we will use to call the API -func getClients() (*kubernetes.Clientset, *versioned.Clientset, error) { +func getClients(ctlConfig config) (*kubernetes.Clientset, *versioned.Clientset, error) { // Create the in-cluster config config, err := rest.InClusterConfig() if err != nil { return nil, nil, errors.New("Could not create in cluster config") } + config.QPS = float32(ctlConfig.APIServerSustainedQPS) + config.Burst = ctlConfig.APIServerBurstQPS + // Access to the Agones resources through the Agones Clientset kubeClient, err := kubernetes.NewForConfig(config) if err != nil { diff --git a/cmd/allocator/metrics.go b/cmd/allocator/metrics.go index d3c75ca8a6..ab540dacda 100644 --- a/cmd/allocator/metrics.go +++ b/cmd/allocator/metrics.go @@ -37,6 +37,8 @@ const ( tlsDisabledFlag = "disable-tls" remoteAllocationTimeoutFlag = "remote-allocation-timeout" totalRemoteAllocationTimeoutFlag = "total-remote-allocation-timeout" + apiServerSustainedQPSFlag = "api-server-qps" + apiServerBurstQPSFlag = "api-server-qps-burst" ) func init() { @@ -44,6 +46,8 @@ func init() { } type config struct { + APIServerSustainedQPS int + APIServerBurstQPS int TLSDisabled bool MTLSDisabled bool PrometheusMetrics bool @@ -56,6 +60,8 @@ type config struct { func parseEnvFlags() config { + viper.SetDefault(apiServerSustainedQPSFlag, 100) + viper.SetDefault(apiServerBurstQPSFlag, 200) viper.SetDefault(enablePrometheusMetricsFlag, true) viper.SetDefault(enableStackdriverMetricsFlag, false) viper.SetDefault(projectIDFlag, "") @@ -65,6 +71,8 @@ func parseEnvFlags() config { viper.SetDefault(remoteAllocationTimeoutFlag, 10*time.Second) viper.SetDefault(totalRemoteAllocationTimeoutFlag, 30*time.Second) + pflag.Int32(apiServerSustainedQPSFlag, 100, "Maximum sustained queries per second to send to the API server") + pflag.Int32(apiServerBurstQPSFlag, 200, "Maximum burst queries per second to send to the API server") pflag.Bool(enablePrometheusMetricsFlag, viper.GetBool(enablePrometheusMetricsFlag), "Flag to activate metrics of Agones. Can also use PROMETHEUS_EXPORTER env variable.") pflag.Bool(enableStackdriverMetricsFlag, viper.GetBool(enableStackdriverMetricsFlag), "Flag to activate stackdriver monitoring metrics for Agones. Can also use STACKDRIVER_EXPORTER env variable.") pflag.String(projectIDFlag, viper.GetString(projectIDFlag), "GCP ProjectID used for Stackdriver, if not specified ProjectID from Application Default Credentials would be used. Can also use GCP_PROJECT_ID env variable.") @@ -77,6 +85,8 @@ func parseEnvFlags() config { pflag.Parse() viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_")) + runtime.Must(viper.BindEnv(apiServerSustainedQPSFlag)) + runtime.Must(viper.BindEnv(apiServerBurstQPSFlag)) runtime.Must(viper.BindEnv(enablePrometheusMetricsFlag)) runtime.Must(viper.BindEnv(enableStackdriverMetricsFlag)) runtime.Must(viper.BindEnv(projectIDFlag)) @@ -89,6 +99,8 @@ func parseEnvFlags() config { runtime.Must(runtime.ParseFeaturesFromEnv()) return config{ + APIServerSustainedQPS: int(viper.GetInt32(apiServerSustainedQPSFlag)), + APIServerBurstQPS: int(viper.GetInt32(apiServerBurstQPSFlag)), PrometheusMetrics: viper.GetBool(enablePrometheusMetricsFlag), Stackdriver: viper.GetBool(enableStackdriverMetricsFlag), GCPProjectID: viper.GetString(projectIDFlag), diff --git a/install/helm/agones/templates/service/allocation.yaml b/install/helm/agones/templates/service/allocation.yaml index 4353173c8e..400fe2baf7 100644 --- a/install/helm/agones/templates/service/allocation.yaml +++ b/install/helm/agones/templates/service/allocation.yaml @@ -123,6 +123,10 @@ spec: path: /ready port: 8080 env: + - name: API_SERVER_QPS + value: {{ .Values.agones.allocator.apiServerQPS | quote }} + - name: API_SERVER_QPS_BURST + value: {{ .Values.agones.allocator.apiServerQPSBurst | quote }} - name: PROMETHEUS_EXPORTER value: {{ .Values.agones.metrics.prometheusEnabled | quote }} - name: STACKDRIVER_EXPORTER diff --git a/install/helm/agones/values.yaml b/install/helm/agones/values.yaml index c3583e9b23..fa5b93a66f 100644 --- a/install/helm/agones/values.yaml +++ b/install/helm/agones/values.yaml @@ -108,6 +108,8 @@ agones: timeoutSeconds: 1 allocator: install: true + apiServerQPS: 400 + apiServerQPSBurst: 500 annotations: {} healthCheck: initialDelaySeconds: 3 diff --git a/install/yaml/install.yaml b/install/yaml/install.yaml index 61a34a4564..1dfdffc2f6 100644 --- a/install/yaml/install.yaml +++ b/install/yaml/install.yaml @@ -1569,6 +1569,10 @@ spec: path: /ready port: 8080 env: + - name: API_SERVER_QPS + value: "400" + - name: API_SERVER_QPS_BURST + value: "500" - name: PROMETHEUS_EXPORTER value: "true" - name: STACKDRIVER_EXPORTER diff --git a/site/content/en/docs/Installation/Install Agones/helm.md b/site/content/en/docs/Installation/Install Agones/helm.md index daa3ebd6d7..3560e581b4 100644 --- a/site/content/en/docs/Installation/Install Agones/helm.md +++ b/site/content/en/docs/Installation/Install Agones/helm.md @@ -248,6 +248,8 @@ The following tables lists the configurable parameters of the Agones chart and t | Parameter | Description | Default | | --------------------------------------------------- | ----------------------------------------------------------------------------------------------- | ---------------------- | +| `agones.allocator.apiServerQPS` | Maximum sustained queries per second that an allocator should be making against API Server | `400` | +| `agones.allocator.apiServerQPSBurst` | Maximum burst queries per second that an allocator should be making against API Server | `500` | | `agones.allocator.allocationTimeout` | Remote allocation call timeout. | `10s` | | `agones.allocator.remoteAllocationTimeout` | Total remote allocation timeout including retries. | `30s` | | `agones.controller.annotations` | [Annotations][annotations] added to the Agones controller pods | `{}` | From 6836e0e0eee84391e039c897e91f2bd09f1702fc Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Tue, 27 Oct 2020 16:43:26 -0700 Subject: [PATCH 2/2] Review updates. --- cmd/allocator/metrics.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/allocator/metrics.go b/cmd/allocator/metrics.go index ab540dacda..eeb9dcfdb7 100644 --- a/cmd/allocator/metrics.go +++ b/cmd/allocator/metrics.go @@ -60,8 +60,8 @@ type config struct { func parseEnvFlags() config { - viper.SetDefault(apiServerSustainedQPSFlag, 100) - viper.SetDefault(apiServerBurstQPSFlag, 200) + viper.SetDefault(apiServerSustainedQPSFlag, 400) + viper.SetDefault(apiServerBurstQPSFlag, 500) viper.SetDefault(enablePrometheusMetricsFlag, true) viper.SetDefault(enableStackdriverMetricsFlag, false) viper.SetDefault(projectIDFlag, "") @@ -71,8 +71,8 @@ func parseEnvFlags() config { viper.SetDefault(remoteAllocationTimeoutFlag, 10*time.Second) viper.SetDefault(totalRemoteAllocationTimeoutFlag, 30*time.Second) - pflag.Int32(apiServerSustainedQPSFlag, 100, "Maximum sustained queries per second to send to the API server") - pflag.Int32(apiServerBurstQPSFlag, 200, "Maximum burst queries per second to send to the API server") + pflag.Int32(apiServerSustainedQPSFlag, viper.GetInt32(apiServerSustainedQPSFlag), "Maximum sustained queries per second to send to the API server") + pflag.Int32(apiServerBurstQPSFlag, viper.GetInt32(apiServerBurstQPSFlag), "Maximum burst queries per second to send to the API server") pflag.Bool(enablePrometheusMetricsFlag, viper.GetBool(enablePrometheusMetricsFlag), "Flag to activate metrics of Agones. Can also use PROMETHEUS_EXPORTER env variable.") pflag.Bool(enableStackdriverMetricsFlag, viper.GetBool(enableStackdriverMetricsFlag), "Flag to activate stackdriver monitoring metrics for Agones. Can also use STACKDRIVER_EXPORTER env variable.") pflag.String(projectIDFlag, viper.GetString(projectIDFlag), "GCP ProjectID used for Stackdriver, if not specified ProjectID from Application Default Credentials would be used. Can also use GCP_PROJECT_ID env variable.")